2007-06-26 19:15:18

by Ed L. Cashin

[permalink] [raw]
Subject: [PATCH 01/12] bring driver version number to 47

These patches were made against kernel 2.6.22-rc4.

Bring driver version number to 47.

Signed-off-by: Ed L. Cashin <[email protected]>
---
drivers/block/aoe/aoe.h | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 1d84668..2ce5ce9 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -1,5 +1,5 @@
/* Copyright (c) 2006 Coraid, Inc. See COPYING for GPL terms. */
-#define VERSION "32"
+#define VERSION "47"
#define AOE_MAJOR 152
#define DEVICE_NAME "aoe"

--
1.5.2.1


2007-06-26 19:05:21

by Ed L. Cashin

[permalink] [raw]
Subject: [PATCH 03/12] mac_addr: avoid 64-bit arch compiler warnings

By returning unsigned long long, mac_addr does not generate compiler
warnings on 64-bit architectures.

Signed-off-by: Ed L. Cashin <[email protected]>
---
drivers/block/aoe/aoe.h | 2 +-
drivers/block/aoe/aoeblk.c | 3 +--
drivers/block/aoe/aoecmd.c | 10 +++++-----
drivers/block/aoe/aoenet.c | 4 ++--
4 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 069f04c..f085465 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -208,4 +208,4 @@ void aoenet_xmit(struct sk_buff *);
int is_aoe_netif(struct net_device *ifp);
int set_aoe_iflist(const char __user *str, size_t size);

-u64 mac_addr(char addr[6]);
+unsigned long long mac_addr(char addr[6]);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index f6773ab..c9cf576 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -32,8 +32,7 @@ static ssize_t aoedisk_show_mac(struct gendisk * disk, char *page)

if (t == NULL)
return snprintf(page, PAGE_SIZE, "none\n");
- return snprintf(page, PAGE_SIZE, "%012llx\n",
- (unsigned long long)mac_addr(t->addr));
+ return snprintf(page, PAGE_SIZE, "%012llx\n", mac_addr(t->addr));
}
static ssize_t aoedisk_show_netif(struct gendisk * disk, char *page)
{
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 8a3a973..62ba58c 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -305,7 +305,8 @@ resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
snprintf(buf, sizeof buf,
"%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%012llx d=%012llx nout=%d\n",
"retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
- mac_addr(h->src), mac_addr(h->dst), t->nout);
+ mac_addr(h->src),
+ mac_addr(h->dst), t->nout);
aoechr_error(buf);

f->tag = n;
@@ -616,7 +617,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)

if (d->ssize != ssize)
printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu sectors\n",
- (unsigned long long)mac_addr(t->addr),
+ mac_addr(t->addr),
d->aoemajor, d->aoeminor,
d->fw_ver, (long long)ssize);
d->ssize = ssize;
@@ -710,8 +711,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
t = gettgt(d, hin->src);
if (t == NULL) {
printk(KERN_INFO "aoe: can't find target e%ld.%d:%012llx\n",
- d->aoemajor, d->aoeminor,
- (unsigned long long) mac_addr(hin->src));
+ d->aoemajor, d->aoeminor, mac_addr(hin->src));
spin_unlock_irqrestore(&d->lock, flags);
return;
}
@@ -988,7 +988,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
printk(KERN_INFO
"aoe: e%ld.%d: setting %d byte data frames on %s:%012llx\n",
d->aoemajor, d->aoeminor, n, ifp->nd->name,
- (unsigned long long) mac_addr(t->addr));
+ mac_addr(t->addr));
ifp->maxbcnt = n;
}
}
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index f335099..d2c1a47 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -82,7 +82,7 @@ set_aoe_iflist(const char __user *user_str, size_t size)
return 0;
}

-u64
+unsigned long long
mac_addr(char addr[6])
{
__be64 n = 0;
@@ -90,7 +90,7 @@ mac_addr(char addr[6])

memcpy(p + 2, addr, 6); /* (sizeof addr != 6) */

- return __be64_to_cpu(n);
+ return (unsigned long long) __be64_to_cpu(n);
}

void
--
1.5.2.1

2007-06-26 19:05:38

by Ed L. Cashin

[permalink] [raw]
Subject: [PATCH 04/12] clean up udev configuration example

This patch adds a known default location for the udev configuration
file and uses the more recent "==" syntax for SUBSYSTEM and KERNEL.

Signed-off-by: Ed L. Cashin <[email protected]>
---
Documentation/aoe/udev-install.sh | 5 ++++-
Documentation/aoe/udev.txt | 15 ++++++++-------
2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/Documentation/aoe/udev-install.sh b/Documentation/aoe/udev-install.sh
index 6449911..15e86f5 100644
--- a/Documentation/aoe/udev-install.sh
+++ b/Documentation/aoe/udev-install.sh
@@ -23,7 +23,10 @@ fi
# /etc/udev/rules.d
#
rules_d="`sed -n '/^udev_rules=/{ s!udev_rules=!!; s!\"!!g; p; }' $conf`"
-if test -z "$rules_d" || test ! -d "$rules_d"; then
+if test -z "$rules_d" ; then
+ rules_d=/etc/udev/rules.d
+fi
+if test ! -d "$rules_d"; then
echo "$me Error: cannot find udev rules directory" 1>&2
exit 1
fi
diff --git a/Documentation/aoe/udev.txt b/Documentation/aoe/udev.txt
index a7ed1dc..17e76c4 100644
--- a/Documentation/aoe/udev.txt
+++ b/Documentation/aoe/udev.txt
@@ -1,6 +1,7 @@
# These rules tell udev what device nodes to create for aoe support.
-# They may be installed along the following lines (adjusted to what
-# you see on your system).
+# They may be installed along the following lines. Check the section
+# 8 udev manpage to see whether your udev supports SUBSYSTEM, and
+# whether it uses one or two equal signs for SUBSYSTEM and KERNEL.
#
# ecashin@makki ~$ su
# Password:
@@ -15,10 +16,10 @@
#

# aoe char devices
-SUBSYSTEM="aoe", KERNEL="discover", NAME="etherd/%k", GROUP="disk", MODE="0220"
-SUBSYSTEM="aoe", KERNEL="err", NAME="etherd/%k", GROUP="disk", MODE="0440"
-SUBSYSTEM="aoe", KERNEL="interfaces", NAME="etherd/%k", GROUP="disk", MODE="0220"
-SUBSYSTEM="aoe", KERNEL="revalidate", NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="discover", NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="err", NAME="etherd/%k", GROUP="disk", MODE="0440"
+SUBSYSTEM=="aoe", KERNEL=="interfaces", NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="revalidate", NAME="etherd/%k", GROUP="disk", MODE="0220"

# aoe block devices
-KERNEL="etherd*", NAME="%k", GROUP="disk"
+KERNEL=="etherd*", NAME="%k", GROUP="disk"
--
1.5.2.1

2007-06-26 19:05:55

by Ed L. Cashin

[permalink] [raw]
Subject: [PATCH 05/12] eliminate goto and improve readability

Adam Richter suggested eliminating this goto.

Signed-off-by: Ed L. Cashin <[email protected]>
---
drivers/block/aoe/aoechr.c | 69 +++++++++++++++++++++----------------------
1 files changed, 34 insertions(+), 35 deletions(-)

diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index 9026c44..511f995 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -191,52 +191,51 @@ aoechr_read(struct file *filp, char __user *buf, size_t cnt, loff_t *off)
ulong flags;

n = (unsigned long) filp->private_data;
- switch (n) {
- case MINOR_ERR:
- spin_lock_irqsave(&emsgs_lock, flags);
-loop:
- em = emsgs + emsgs_head_idx;
- if ((em->flags & EMFL_VALID) == 0) {
- if (filp->f_flags & O_NDELAY) {
- spin_unlock_irqrestore(&emsgs_lock, flags);
- return -EAGAIN;
- }
- nblocked_emsgs_readers++;
+ if (n != MINOR_ERR)
+ return -EFAULT;
+
+ spin_lock_irqsave(&emsgs_lock, flags);

+ for (;;) {
+ em = emsgs + emsgs_head_idx;
+ if ((em->flags & EMFL_VALID) != 0)
+ break;
+ if (filp->f_flags & O_NDELAY) {
spin_unlock_irqrestore(&emsgs_lock, flags);
+ return -EAGAIN;
+ }
+ nblocked_emsgs_readers++;
+
+ spin_unlock_irqrestore(&emsgs_lock, flags);

- n = down_interruptible(&emsgs_sema);
+ n = down_interruptible(&emsgs_sema);

- spin_lock_irqsave(&emsgs_lock, flags);
+ spin_lock_irqsave(&emsgs_lock, flags);

- nblocked_emsgs_readers--;
+ nblocked_emsgs_readers--;

- if (n) {
- spin_unlock_irqrestore(&emsgs_lock, flags);
- return -ERESTARTSYS;
- }
- goto loop;
- }
- if (em->len > cnt) {
+ if (n) {
spin_unlock_irqrestore(&emsgs_lock, flags);
- return -EAGAIN;
+ return -ERESTARTSYS;
}
- mp = em->msg;
- len = em->len;
- em->msg = NULL;
- em->flags &= ~EMFL_VALID;
+ }
+ if (em->len > cnt) {
+ spin_unlock_irqrestore(&emsgs_lock, flags);
+ return -EAGAIN;
+ }
+ mp = em->msg;
+ len = em->len;
+ em->msg = NULL;
+ em->flags &= ~EMFL_VALID;

- emsgs_head_idx++;
- emsgs_head_idx %= ARRAY_SIZE(emsgs);
+ emsgs_head_idx++;
+ emsgs_head_idx %= ARRAY_SIZE(emsgs);

- spin_unlock_irqrestore(&emsgs_lock, flags);
+ spin_unlock_irqrestore(&emsgs_lock, flags);

- n = copy_to_user(buf, mp, len);
- kfree(mp);
- return n == 0 ? len : -EFAULT;
- default:
- return -EFAULT;
- }
+ n = copy_to_user(buf, mp, len);
+ kfree(mp);
+ return n == 0 ? len : -EFAULT;
}

static const struct file_operations aoe_fops = {
--
1.5.2.1

2007-06-26 19:06:15

by Ed L. Cashin

[permalink] [raw]
Subject: [PATCH 06/12] user can ask driver to forget previously detected devices

When an AoE device is detected, the kernel is informed, and a new
block device is created. If the device is unused, the block device
corresponding to remote device that is no longer available may be
removed from the system by telling the aoe driver to "flush" its list
of devices.

Signed-off-by: Ed L. Cashin <[email protected]>
---
Documentation/aoe/mkdevs.sh | 2 +
Documentation/aoe/udev.txt | 1 +
drivers/block/aoe/aoe.h | 1 +
drivers/block/aoe/aoechr.c | 5 ++
drivers/block/aoe/aoedev.c | 87 +++++++++++++++++++++++++++++++++---------
5 files changed, 77 insertions(+), 19 deletions(-)

diff --git a/Documentation/aoe/mkdevs.sh b/Documentation/aoe/mkdevs.sh
index 97374aa..44c0ab7 100644
--- a/Documentation/aoe/mkdevs.sh
+++ b/Documentation/aoe/mkdevs.sh
@@ -29,6 +29,8 @@ rm -f $dir/interfaces
mknod -m 0200 $dir/interfaces c $MAJOR 4
rm -f $dir/revalidate
mknod -m 0200 $dir/revalidate c $MAJOR 5
+rm -f $dir/flush
+mknod -m 0200 $dir/flush c $MAJOR 6

export n_partitions
mkshelf=`echo $0 | sed 's!mkdevs!mkshelf!'`
diff --git a/Documentation/aoe/udev.txt b/Documentation/aoe/udev.txt
index 17e76c4..8686e78 100644
--- a/Documentation/aoe/udev.txt
+++ b/Documentation/aoe/udev.txt
@@ -20,6 +20,7 @@ SUBSYSTEM=="aoe", KERNEL=="discover", NAME="etherd/%k", GROUP="disk", MODE="0220
SUBSYSTEM=="aoe", KERNEL=="err", NAME="etherd/%k", GROUP="disk", MODE="0440"
SUBSYSTEM=="aoe", KERNEL=="interfaces", NAME="etherd/%k", GROUP="disk", MODE="0220"
SUBSYSTEM=="aoe", KERNEL=="revalidate", NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="flush", NAME="etherd/%k", GROUP="disk", MODE="0220"

# aoe block devices
KERNEL=="etherd*", NAME="%k", GROUP="disk"
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index f085465..7fa86dd 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -201,6 +201,7 @@ struct aoedev *aoedev_by_aoeaddr(int maj, int min);
struct aoedev *aoedev_by_sysminor_m(ulong sysminor);
void aoedev_downdev(struct aoedev *d);
int aoedev_isbusy(struct aoedev *d);
+int aoedev_flush(const char __user *str, size_t size);

int aoenet_init(void);
void aoenet_exit(void);
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index 511f995..10b38a7 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -15,6 +15,7 @@ enum {
MINOR_DISCOVER,
MINOR_INTERFACES,
MINOR_REVALIDATE,
+ MINOR_FLUSH,
MSGSZ = 2048,
NMSG = 100, /* message backlog to retain */
};
@@ -43,6 +44,7 @@ static struct aoe_chardev chardevs[] = {
{ MINOR_DISCOVER, "discover" },
{ MINOR_INTERFACES, "interfaces" },
{ MINOR_REVALIDATE, "revalidate" },
+ { MINOR_FLUSH, "flush" },
};

static int
@@ -155,6 +157,9 @@ aoechr_write(struct file *filp, const char __user *buf, size_t cnt, loff_t *offp
break;
case MINOR_REVALIDATE:
ret = revalidate(buf, cnt);
+ break;
+ case MINOR_FLUSH:
+ ret = aoedev_flush(buf, cnt);
}
if (ret == 0)
ret = cnt;
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 04c75b4..8659796 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -9,6 +9,10 @@
#include <linux/netdevice.h>
#include "aoe.h"

+static void dummy_timer(ulong);
+static void aoedev_freedev(struct aoedev *);
+static void freetgt(struct aoetgt *t);
+
static struct aoedev *devlist;
static spinlock_t devlist_lock;

@@ -108,6 +112,70 @@ aoedev_downdev(struct aoedev *d)
d->flags &= ~DEVFL_UP;
}

+static void
+aoedev_freedev(struct aoedev *d)
+{
+ struct aoetgt **t, **e;
+
+ if (d->gd) {
+ aoedisk_rm_sysfs(d);
+ del_gendisk(d->gd);
+ put_disk(d->gd);
+ }
+ t = d->targets;
+ e = t + NTARGETS;
+ for (; t<e && *t; t++)
+ freetgt(*t);
+ if (d->bufpool)
+ mempool_destroy(d->bufpool);
+ kfree(d);
+}
+
+int
+aoedev_flush(const char __user *str, size_t cnt)
+{
+ ulong flags;
+ struct aoedev *d, **dd;
+ struct aoedev *rmd = NULL;
+ char buf[16];
+ int all = 0;
+
+ if (cnt >= 3) {
+ if (cnt > sizeof buf)
+ cnt = sizeof buf;
+ if (copy_from_user(buf, str, cnt))
+ return -EFAULT;
+ all = !strncmp(buf, "all", 3);
+ }
+
+ flush_scheduled_work();
+ spin_lock_irqsave(&devlist_lock, flags);
+ dd = &devlist;
+ while ((d=*dd)) {
+ spin_lock(&d->lock);
+ if ((!all && (d->flags & DEVFL_UP))
+ || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
+ || d->nopen) {
+ spin_unlock(&d->lock);
+ dd = &d->next;
+ continue;
+ }
+ *dd = d->next;
+ aoedev_downdev(d);
+ d->flags |= DEVFL_TKILL;
+ spin_unlock(&d->lock);
+ d->next = rmd;
+ rmd = d;
+ }
+ spin_unlock_irqrestore(&devlist_lock, flags);
+ while ((d=rmd)) {
+ rmd = d->next;
+ del_timer_sync(&d->timer);
+ aoedev_freedev(d); // must be able to sleep
+ }
+ return 0;
+}
+
/* find it or malloc it */
struct aoedev *
aoedev_by_sysminor_m(ulong sysminor)
@@ -160,25 +228,6 @@ freetgt(struct aoetgt *t)
kfree(t);
}

-static void
-aoedev_freedev(struct aoedev *d)
-{
- struct aoetgt **t, **e;
-
- if (d->gd) {
- aoedisk_rm_sysfs(d);
- del_gendisk(d->gd);
- put_disk(d->gd);
- }
- t = d->targets;
- e = t + NTARGETS;
- for (; t<e && *t; t++)
- freetgt(*t);
- if (d->bufpool)
- mempool_destroy(d->bufpool);
- kfree(d);
-}
-
void
aoedev_exit(void)
{
--
1.5.2.1

2007-06-26 19:06:33

by Ed L. Cashin

[permalink] [raw]
Subject: [PATCH 07/12] use a dynamic pool of sk_buffs to keep up with fast targets

Use a dynamic pool of sk_buffs to keep up with fast targets.

Signed-off-by: Ed L. Cashin <[email protected]>
---
drivers/block/aoe/aoe.h | 5 ++
drivers/block/aoe/aoecmd.c | 129 +++++++++++++++++++++++++++++---------------
drivers/block/aoe/aoedev.c | 51 +++++++++++++++---
3 files changed, 134 insertions(+), 51 deletions(-)

diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 7fa86dd..55c2f08 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -98,6 +98,7 @@ enum {
MIN_BUFS = 16,
NTARGETS = 8,
NAOEIFS = 8,
+ NSKBPOOLMAX = 128,

TIMERTICK = HZ / 10,
MINTIMER = HZ >> 2,
@@ -147,6 +148,7 @@ struct aoetgt {
u16 useme;
ulong lastwadj; /* last window adjustment */
int wpkts, rpkts;
+int dataref;
};

struct aoedev {
@@ -168,6 +170,9 @@ struct aoedev {
spinlock_t lock;
struct sk_buff *sendq_hd; /* packets needing to be sent, list head */
struct sk_buff *sendq_tl;
+ struct sk_buff *skbpool_hd;
+ struct sk_buff *skbpool_tl;
+ int nskbpool;
mempool_t *bufpool; /* for deadlock-free Buf allocation */
struct list_head bufq; /* queue of bios to work on */
struct buf *inprocess; /* the one we're currently working on */
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 62ba58c..89df9de 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -105,43 +105,102 @@ ifrotate(struct aoetgt *t)
}
}

+static void
+skb_pool_put(struct aoedev *d, struct sk_buff *skb)
+{
+ if (!d->skbpool_hd)
+ d->skbpool_hd = skb;
+ else
+ d->skbpool_tl->next = skb;
+ d->skbpool_tl = skb;
+}
+
+static struct sk_buff *
+skb_pool_get(struct aoedev *d)
+{
+ struct sk_buff *skb;
+
+ skb = d->skbpool_hd;
+ if (skb)
+ if (atomic_read(&skb_shinfo(skb)->dataref) == 1) {
+ d->skbpool_hd = skb->next;
+ skb->next = NULL;
+ return skb;
+ }
+ if (d->nskbpool < NSKBPOOLMAX)
+ if ((skb = new_skb(ETH_ZLEN))) {
+ d->nskbpool++;
+ return skb;
+ }
+ return NULL;
+}
+
+/* freeframe is where we do our load balancing so it's a little hairy. */
static struct frame *
freeframe(struct aoedev *d)
{
- struct frame *f, *e;
+ struct frame *f, *e, *rf;
struct aoetgt **t;
- ulong n;
+ struct sk_buff *skb;

if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */
printk(KERN_ERR "aoe: NULL TARGETS!\n");
return NULL;
}
- t = d->targets;
- do {
+ t = d->tgt;
+ t++;
+ if (t >= &d->targets[NTARGETS] || !*t)
+ t = d->targets;
+ for (;;) {
+ if ((*t)->nout < (*t)->maxout)
if (t != d->htgt)
- if ((*t)->ifp->nd)
- if ((*t)->nout < (*t)->maxout) {
- n = (*t)->nframes;
+ if ((*t)->ifp->nd) {
+ rf = NULL;
f = (*t)->frames;
- e = f + n;
+ e = f + (*t)->nframes;
for (; f<e; f++) {
if (f->tag != FREETAG)
continue;
- if (atomic_read(&skb_shinfo(f->skb)->dataref) != 1) {
- n--;
+ skb = f->skb;
+ if (!skb)
+ if (!(f->skb = skb = new_skb(ETH_ZLEN)))
+ continue;
+ if (atomic_read(&skb_shinfo(skb)->dataref) != 1) {
+ if (!rf)
+ rf = f;
continue;
}
- skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0;
- skb_trim(f->skb, 0);
+gotone: skb_shinfo(skb)->nr_frags = skb->data_len = 0;
+ skb_trim(skb, 0);
d->tgt = t;
ifrotate(*t);
return f;
}
- if (n == 0) /* slow polling network card */
+ /* Work can be done, but the network layer is
+ holding our precious packets. Try to grab
+ one from the pool. */
+ f = rf;
+ if (f == NULL) { /* more paranoia */
+ printk(KERN_ERR "aoe: freeframe: unexpected null rf.\n");
+ d->flags |= DEVFL_KICKME;
+ return NULL;
+ }
+ skb = skb_pool_get(d);
+ if (skb) {
+ skb_pool_put(d, f->skb);
+ f->skb = skb;
+ goto gotone;
+ }
+ (*t)->dataref++;
+ if ((*t)->nout == 0)
d->flags |= DEVFL_KICKME;
}
+ if (t == d->tgt) /* we've looped and found nada */
+ break;
t++;
- } while (t < &d->targets[NTARGETS] && *t);
+ if (t >= &d->targets[NTARGETS] || !*t)
+ t = d->targets;
+ }
return NULL;
}

@@ -870,44 +929,26 @@ addtgt(struct aoedev *d, char *addr, ulong nframes)

tt = d->targets;
te = tt + NTARGETS;
- for (; tt<te; tt++) {
- if (*tt == NULL)
- break;
- else if (memcmp((*tt)->addr, addr, 6) > 0) {
- memmove(tt+1, tt, (void *)te-(void *)(tt+1));
- break;
- }
- }
+ for (; tt<te && *tt; tt++)
+ ;
+
if (tt == te)
return NULL;

t = kcalloc(1, sizeof *t, GFP_ATOMIC);
+ if (!t)
+ return NULL;
f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
- switch (!t || !f) {
- case 0:
- t->nframes = nframes;
- t->frames = f;
- e = f + nframes;
- for (; f<e; f++) {
- f->tag = FREETAG;
- f->skb = new_skb(ETH_ZLEN);
- if (!f->skb)
- break;
- }
- if (f == e)
- break;
- while (f > t->frames) {
- f--;
- dev_kfree_skb(f->skb);
- }
- default:
- if (f)
- kfree(f);
- if (t)
- kfree(t);
+ if (!f) {
+ kfree(t);
return NULL;
}

+ t->nframes = nframes;
+ t->frames = f;
+ e = f + nframes;
+ for (; f<e; f++)
+ f->tag = FREETAG;
memcpy(t->addr, addr, sizeof t->addr);
t->ifp = t->ifs;
t->maxout = t->nframes;
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 8659796..cf4e58d 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -7,11 +7,13 @@
#include <linux/hdreg.h>
#include <linux/blkdev.h>
#include <linux/netdevice.h>
+#include <linux/delay.h>
#include "aoe.h"

static void dummy_timer(ulong);
static void aoedev_freedev(struct aoedev *);
-static void freetgt(struct aoetgt *t);
+static void freetgt(struct aoedev *d, struct aoetgt *t);
+static void skbpoolfree(struct aoedev *d);

static struct aoedev *devlist;
static spinlock_t devlist_lock;
@@ -125,9 +127,10 @@ aoedev_freedev(struct aoedev *d)
t = d->targets;
e = t + NTARGETS;
for (; t<e && *t; t++)
- freetgt(*t);
+ freetgt(d, *t);
if (d->bufpool)
mempool_destroy(d->bufpool);
+ skbpoolfree(d);
kfree(d);
}

@@ -176,6 +179,42 @@ aoedev_flush(const char __user *str, size_t cnt)
return 0;
}

+/* I'm not really sure that this is a realistic problem, but if the
+network driver goes gonzo let's just leak memory after complaining. */
+static void
+skbfree(struct sk_buff *skb)
+{
+ enum { Sms= 100, Tms= 3*1000};
+ int i = Tms / Sms;
+
+ if (skb == NULL)
+ return;
+ while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
+ msleep_interruptible(Sms);
+ if (i <= 0) {
+ printk(KERN_ERR
+ "aoe: %s holds ref: cannot free skb -- memory leaked.\n",
+ skb->dev ? skb->dev->name : "netif");
+ return;
+ }
+ skb_shinfo(skb)->nr_frags = skb->data_len = 0;
+ skb_trim(skb, 0);
+ dev_kfree_skb(skb);
+}
+
+static void
+skbpoolfree(struct aoedev *d)
+{
+ struct sk_buff *skb;
+
+ while ((skb = d->skbpool_hd)) {
+ d->skbpool_hd = skb->next;
+ skb->next = NULL;
+ skbfree(skb);
+ }
+ d->skbpool_tl = NULL;
+}
+
/* find it or malloc it */
struct aoedev *
aoedev_by_sysminor_m(ulong sysminor)
@@ -214,16 +253,14 @@ aoedev_by_sysminor_m(ulong sysminor)
}

static void
-freetgt(struct aoetgt *t)
+freetgt(struct aoedev *d, struct aoetgt *t)
{
struct frame *f, *e;

f = t->frames;
e = f + t->nframes;
- for (; f<e; f++) {
- skb_shinfo(f->skb)->nr_frags = 0;
- dev_kfree_skb(f->skb);
- }
+ for (; f<e; f++)
+ skbfree(f->skb);
kfree(t->frames);
kfree(t);
}
--
1.5.2.1

2007-06-26 19:06:46

by Ed L. Cashin

[permalink] [raw]
Subject: [PATCH 10/12] add module parameter for users who need more outstanding I/O

Add module parameter for users who need more outstanding I/O.

Signed-off-by: Ed L. Cashin <[email protected]>
---
drivers/block/aoe/aoecmd.c | 10 +++++++---
1 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 8d6540b..9de0024 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -17,6 +17,11 @@ static int aoe_deadsecs = 60 * 3;
module_param(aoe_deadsecs, int, 0644);
MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");

+static int aoe_maxout = 16;
+module_param(aoe_maxout, int, 0644);
+MODULE_PARM_DESC(aoe_maxout,
+ "Only aoe_maxout outstanding packets for every MAC on eX.Y.");
+
static struct sk_buff *
new_skb(ulong len)
{
@@ -967,7 +972,6 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
struct aoeif *ifp;
ulong flags, sysminor, aoemajor;
struct sk_buff *sl;
- enum { MAXFRAMES = 16 };
u16 n;

h = aoe_hdr(skb);
@@ -992,8 +996,8 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
}

n = be16_to_cpu(ch->bufcnt);
- if (n > MAXFRAMES) /* keep it reasonable */
- n = MAXFRAMES;
+ if (n > aoe_maxout) /* keep it reasonable */
+ n = aoe_maxout;

d = aoedev_by_sysminor_m(sysminor);
if (d == NULL) {
--
1.5.2.1

2007-06-26 19:07:11

by Ed L. Cashin

[permalink] [raw]
Subject: [PATCH 08/12] only schedule work once

Only schedule work once.

Signed-off-by: Ed L. Cashin <[email protected]>
---
drivers/block/aoe/aoecmd.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 89df9de..8d6540b 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -681,6 +681,8 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
d->fw_ver, (long long)ssize);
d->ssize = ssize;
d->geo.start = 0;
+ if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
+ return;
if (d->gd != NULL) {
d->gd->capacity = ssize;
d->flags |= DEVFL_NEWSIZE;
--
1.5.2.1

2007-06-26 19:07:51

by Ed L. Cashin

[permalink] [raw]
Subject: [PATCH 09/12] remove race between use and initialization of locks

This change was originally submitted by Alexey Dobriyan in an email
with ...

Message-ID: <[email protected]>

and the comment,

Some drivers do register_chrdev() before lock or semaphore used in
corresponding file_operations is initialized.

Andrew Morton commented that these locks should be initialized at
compile time, but Alexey Debriyan pointed out that the Documentation
tells us to use dynamic initialization whenever possible, and then the
discussion petered out.

http://preview.tinyurl.com/2pxq6p

I believe we made these locks dynamic because of the notice in
Documentation/spinlocks.txt, which says that static initializers are
deprecated:

UPDATE March 21 2005 Amit Gud <[email protected]>

Macros SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED are deprecated and will be
removed soon. So for any new code dynamic initialization should be used:
...

In any case, the patch below makes the code correct and in keeping
with the existing documentation. If the existing docs are wrong, I'd
be happy to follow up with a patch that corrects them and makes these
aoechr.c locks static.

Signed-off-by: Alexey Dobriyan <[email protected]>
Signed-off-by: Ed L. Cashin <[email protected]>
---
drivers/block/aoe/aoechr.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index 10b38a7..2b4f873 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -256,13 +256,13 @@ aoechr_init(void)
{
int n, i;

+ sema_init(&emsgs_sema, 0);
+ spin_lock_init(&emsgs_lock);
n = register_chrdev(AOE_MAJOR, "aoechr", &aoe_fops);
if (n < 0) {
printk(KERN_ERR "aoe: can't register char device\n");
return n;
}
- sema_init(&emsgs_sema, 0);
- spin_lock_init(&emsgs_lock);
aoe_class = class_create(THIS_MODULE, "aoe");
if (IS_ERR(aoe_class)) {
unregister_chrdev(AOE_MAJOR, "aoechr");
--
1.5.2.1

2007-06-26 19:08:15

by Ed L. Cashin

[permalink] [raw]
Subject: [PATCH 11/12] remove extra space in prototypes for consistency

Remove extra space in prototypes for consistency.

Signed-off-by: Ed L. Cashin <[email protected]>
---
drivers/block/aoe/aoeblk.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index c9cf576..ccadd9a 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -14,7 +14,7 @@

static struct kmem_cache *buf_pool_cache;

-static ssize_t aoedisk_show_state(struct gendisk * disk, char *page)
+static ssize_t aoedisk_show_state(struct gendisk *disk, char *page)
{
struct aoedev *d = disk->private_data;

@@ -25,7 +25,7 @@ static ssize_t aoedisk_show_state(struct gendisk * disk, char *page)
(d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : "");
/* I'd rather see nopen exported so we can ditch closewait */
}
-static ssize_t aoedisk_show_mac(struct gendisk * disk, char *page)
+static ssize_t aoedisk_show_mac(struct gendisk *disk, char *page)
{
struct aoedev *d = disk->private_data;
struct aoetgt *t = d->targets[0];
@@ -34,7 +34,7 @@ static ssize_t aoedisk_show_mac(struct gendisk * disk, char *page)
return snprintf(page, PAGE_SIZE, "none\n");
return snprintf(page, PAGE_SIZE, "%012llx\n", mac_addr(t->addr));
}
-static ssize_t aoedisk_show_netif(struct gendisk * disk, char *page)
+static ssize_t aoedisk_show_netif(struct gendisk *disk, char *page)
{
struct aoedev *d = disk->private_data;
struct net_device *nds[8], **nd, **nnd, **ne;
@@ -71,7 +71,7 @@ static ssize_t aoedisk_show_netif(struct gendisk * disk, char *page)
return p-page;
}
/* firmware version */
-static ssize_t aoedisk_show_fwver(struct gendisk * disk, char *page)
+static ssize_t aoedisk_show_fwver(struct gendisk *disk, char *page)
{
struct aoedev *d = disk->private_data;

--
1.5.2.1

2007-06-26 19:08:36

by Ed L. Cashin

[permalink] [raw]
Subject: [PATCH 12/12] the aoeminor doesn't need a long format

The aoedev aoeminor member doesn't need a long format.

Signed-off-by: Ed L. Cashin <[email protected]>
---
drivers/block/aoe/aoeblk.c | 6 +++---
drivers/block/aoe/aoecmd.c | 4 ++--
2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index ccadd9a..e216fe0 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -203,7 +203,7 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio)
spin_lock_irqsave(&d->lock, flags);

if ((d->flags & DEVFL_UP) == 0) {
- printk(KERN_INFO "aoe: device %ld.%ld is not up\n",
+ printk(KERN_INFO "aoe: device %ld.%d is not up\n",
d->aoemajor, d->aoeminor);
spin_unlock_irqrestore(&d->lock, flags);
mempool_free(buf, d->bufpool);
@@ -256,7 +256,7 @@ aoeblk_gdalloc(void *vp)

gd = alloc_disk(AOE_PARTITIONS);
if (gd == NULL) {
- printk(KERN_ERR "aoe: cannot allocate disk structure for %ld.%ld\n",
+ printk(KERN_ERR "aoe: cannot allocate disk structure for %ld.%d\n",
d->aoemajor, d->aoeminor);
spin_lock_irqsave(&d->lock, flags);
d->flags &= ~DEVFL_GDALLOC;
@@ -266,7 +266,7 @@ aoeblk_gdalloc(void *vp)

d->bufpool = mempool_create_slab_pool(MIN_BUFS, buf_pool_cache);
if (d->bufpool == NULL) {
- printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%ld\n",
+ printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n",
d->aoemajor, d->aoeminor);
put_disk(gd);
spin_lock_irqsave(&d->lock, flags);
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 9de0024..dfb1184 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -680,7 +680,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
}

if (d->ssize != ssize)
- printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu sectors\n",
+ printk(KERN_INFO "aoe: %012llx e%ld.%d v%04x has %llu sectors\n",
mac_addr(t->addr),
d->aoemajor, d->aoeminor,
d->fw_ver, (long long)ssize);
@@ -805,7 +805,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)

if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
printk(KERN_ERR
- "aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%ld\n",
+ "aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
ahout->cmdstat, ahin->cmdstat,
d->aoemajor, d->aoeminor);
if (buf)
--
1.5.2.1

2007-06-26 19:15:42

by Ed L. Cashin

[permalink] [raw]
Subject: [PATCH 02/12] handle multiple network paths to AoE device

Handle multiple network paths to AoE device.

Signed-off-by: Ed L. Cashin <[email protected]>
---
drivers/block/aoe/aoe.h | 58 +++--
drivers/block/aoe/aoeblk.c | 63 ++++-
drivers/block/aoe/aoechr.c | 14 +-
drivers/block/aoe/aoecmd.c | 660 +++++++++++++++++++++++++++++--------------
drivers/block/aoe/aoedev.c | 163 +++++------
drivers/block/aoe/aoenet.c | 4 +-
6 files changed, 630 insertions(+), 332 deletions(-)

diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 2ce5ce9..069f04c 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -85,10 +85,8 @@ enum {
DEVFL_EXT = (1<<2), /* device accepts lba48 commands */
DEVFL_CLOSEWAIT = (1<<3), /* device is waiting for all closes to revalidate */
DEVFL_GDALLOC = (1<<4), /* need to alloc gendisk */
- DEVFL_PAUSE = (1<<5),
+ DEVFL_KICKME = (1<<5), /* slow polling network card catch */
DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */
- DEVFL_MAXBCNT = (1<<7), /* d->maxbcnt is not changeable */
- DEVFL_KICKME = (1<<8),

BUFFL_FAIL = 1,
};
@@ -97,17 +95,24 @@ enum {
DEFAULTBCNT = 2 * 512, /* 2 sectors */
NPERSHELF = 16, /* number of slots per shelf address */
FREETAG = -1,
- MIN_BUFS = 8,
+ MIN_BUFS = 16,
+ NTARGETS = 8,
+ NAOEIFS = 8,
+
+ TIMERTICK = HZ / 10,
+ MINTIMER = HZ >> 2,
+ MAXTIMER = HZ << 1,
+ HELPWAIT = 20,
};

struct buf {
struct list_head bufs;
- ulong start_time; /* for disk stats */
+ ulong stime; /* for disk stats */
ulong flags;
ulong nframesout;
- char *bufaddr;
ulong resid;
ulong bv_resid;
+ ulong bv_off;
sector_t sector;
struct bio *bio;
struct bio_vec *bv;
@@ -123,19 +128,37 @@ struct frame {
struct sk_buff *skb;
};

+struct aoeif {
+ struct net_device *nd;
+ unsigned char lost;
+ unsigned char lostjumbo;
+ ushort maxbcnt;
+};
+
+struct aoetgt {
+ unsigned char addr[6];
+ ushort nframes;
+ struct frame *frames;
+ struct aoeif ifs[NAOEIFS];
+ struct aoeif *ifp; /* current aoeif in use */
+ ushort nout;
+ ushort maxout;
+ u16 lasttag; /* last tag sent */
+ u16 useme;
+ ulong lastwadj; /* last window adjustment */
+int wpkts, rpkts;
+};
+
struct aoedev {
struct aoedev *next;
- unsigned char addr[6]; /* remote mac addr */
- ushort flags;
ulong sysminor;
ulong aoemajor;
- ulong aoeminor;
+ u16 aoeminor;
+ u16 flags;
u16 nopen; /* (bd_openers isn't available without sleeping) */
- u16 lasttag; /* last tag sent */
u16 rttavg; /* round trip average of requests/responses */
u16 mintimer;
u16 fw_ver; /* version of blade's firmware */
- u16 maxbcnt;
struct work_struct work;/* disk create work struct */
struct gendisk *gd;
request_queue_t blkq;
@@ -143,15 +166,15 @@ struct aoedev {
sector_t ssize;
struct timer_list timer;
spinlock_t lock;
- struct net_device *ifp; /* interface ed is attached to */
struct sk_buff *sendq_hd; /* packets needing to be sent, list head */
struct sk_buff *sendq_tl;
mempool_t *bufpool; /* for deadlock-free Buf allocation */
struct list_head bufq; /* queue of bios to work on */
struct buf *inprocess; /* the one we're currently working on */
- ushort lostjumbo;
- ushort nframes; /* number of frames below */
- struct frame *frames;
+ struct aoetgt *targets[NTARGETS];
+ struct aoetgt **tgt; /* target in use when working */
+ struct aoetgt **htgt; /* target needing rexmit assistance */
+//int ios[64];
};


@@ -169,12 +192,13 @@ void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor);
void aoecmd_ata_rsp(struct sk_buff *);
void aoecmd_cfg_rsp(struct sk_buff *);
void aoecmd_sleepwork(struct work_struct *);
-struct sk_buff *new_skb(ulong);
+void aoecmd_cleanslate(struct aoedev *);
+struct sk_buff *aoecmd_ata_id(struct aoedev *);

int aoedev_init(void);
void aoedev_exit(void);
struct aoedev *aoedev_by_aoeaddr(int maj, int min);
-struct aoedev *aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt);
+struct aoedev *aoedev_by_sysminor_m(ulong sysminor);
void aoedev_downdev(struct aoedev *d);
int aoedev_isbusy(struct aoedev *d);

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 478489c..f6773ab 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -21,22 +21,55 @@ static ssize_t aoedisk_show_state(struct gendisk * disk, char *page)
return snprintf(page, PAGE_SIZE,
"%s%s\n",
(d->flags & DEVFL_UP) ? "up" : "down",
- (d->flags & DEVFL_PAUSE) ? ",paused" :
+ (d->flags & DEVFL_KICKME) ? ",kickme" :
(d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : "");
/* I'd rather see nopen exported so we can ditch closewait */
}
static ssize_t aoedisk_show_mac(struct gendisk * disk, char *page)
{
struct aoedev *d = disk->private_data;
+ struct aoetgt *t = d->targets[0];

+ if (t == NULL)
+ return snprintf(page, PAGE_SIZE, "none\n");
return snprintf(page, PAGE_SIZE, "%012llx\n",
- (unsigned long long)mac_addr(d->addr));
+ (unsigned long long)mac_addr(t->addr));
}
static ssize_t aoedisk_show_netif(struct gendisk * disk, char *page)
{
struct aoedev *d = disk->private_data;
+ struct net_device *nds[8], **nd, **nnd, **ne;
+ struct aoetgt **t, **te;
+ struct aoeif *ifp, *e;
+ char *p;
+
+ memset(nds, 0, ARRAY_SIZE(nds));
+ nd = nds;
+ ne = nd + ARRAY_SIZE(nds);
+ t = d->targets;
+ te = t + NTARGETS;
+ for (; t<te && *t; t++) {
+ ifp = (*t)->ifs;
+ e = ifp + NAOEIFS;
+ for (; ifp<e && ifp->nd; ifp++) {
+ for (nnd=nds; nnd<nd; nnd++)
+ if (*nnd == ifp->nd)
+ break;
+ if (nnd == nd)
+ if (nd != ne)
+ *nd++ = ifp->nd;
+ }
+ }

- return snprintf(page, PAGE_SIZE, "%s\n", d->ifp->name);
+ ne = nd;
+ nd = nds;
+ if (*nd == NULL)
+ return snprintf(page, PAGE_SIZE, "none\n");
+ for (p=page; nd<ne; nd++)
+ p += snprintf(p, PAGE_SIZE - (p-page), "%s%s",
+ p == page ? "" : ",", (*nd)->name);
+ p += snprintf(p, PAGE_SIZE - (p-page), "\n");
+ return p-page;
}
/* firmware version */
static ssize_t aoedisk_show_fwver(struct gendisk * disk, char *page)
@@ -134,7 +167,23 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio)

blk_queue_bounce(q, &bio);

+ if (bio == NULL) {
+ printk(KERN_ERR "aoe: bio is NULL\n");
+ BUG();
+ return 0;
+ }
d = bio->bi_bdev->bd_disk->private_data;
+ if (d == NULL) {
+ printk(KERN_ERR "aoe: bd_disk->private_data is NULL\n");
+ BUG();
+ bio_endio(bio, bio->bi_size, -ENXIO);
+ return 0;
+ } else if (bio->bi_io_vec == NULL) {
+ printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
+ BUG();
+ bio_endio(bio, bio->bi_size, -ENXIO);
+ return 0;
+ }
buf = mempool_alloc(d->bufpool, GFP_NOIO);
if (buf == NULL) {
printk(KERN_INFO "aoe: buf allocation failure\n");
@@ -143,14 +192,14 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio)
}
memset(buf, 0, sizeof(*buf));
INIT_LIST_HEAD(&buf->bufs);
- buf->start_time = jiffies;
+ buf->stime = jiffies;
buf->bio = bio;
buf->resid = bio->bi_size;
buf->sector = bio->bi_sector;
buf->bv = &bio->bi_io_vec[bio->bi_idx];
- WARN_ON(buf->bv->bv_len == 0);
buf->bv_resid = buf->bv->bv_len;
- buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
+ WARN_ON(buf->bv_resid == 0);
+ buf->bv_off = buf->bv->bv_offset;

spin_lock_irqsave(&d->lock, flags);

@@ -234,7 +283,7 @@ aoeblk_gdalloc(void *vp)
gd->fops = &aoe_bdops;
gd->private_data = d;
gd->capacity = d->ssize;
- snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%ld",
+ snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
d->aoemajor, d->aoeminor);

gd->queue = &d->blkq;
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index 39e563e..9026c44 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -6,6 +6,7 @@

#include <linux/hdreg.h>
#include <linux/blkdev.h>
+#include <linux/delay.h>
#include "aoe.h"

enum {
@@ -68,6 +69,7 @@ revalidate(const char __user *str, size_t size)
int major, minor, n;
ulong flags;
struct aoedev *d;
+ struct sk_buff *skb;
char buf[16];

if (size >= sizeof buf)
@@ -85,13 +87,17 @@ revalidate(const char __user *str, size_t size)
d = aoedev_by_aoeaddr(major, minor);
if (!d)
return -EINVAL;
-
spin_lock_irqsave(&d->lock, flags);
- d->flags &= ~DEVFL_MAXBCNT;
- d->flags |= DEVFL_PAUSE;
+ aoecmd_cleanslate(d);
+loop:
+ skb = aoecmd_ata_id(d);
spin_unlock_irqrestore(&d->lock, flags);
+ if (!skb && !msleep_interruptible(200)) {
+ spin_lock_irqsave(&d->lock, flags);
+ goto loop;
+ }
+ aoenet_xmit(skb);
aoecmd_cfg(major, minor);
-
return 0;
}

diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 01fbdd3..8a3a973 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -9,18 +9,15 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/genhd.h>
+#include <linux/moduleparam.h>
#include <asm/unaligned.h>
#include "aoe.h"

-#define TIMERTICK (HZ / 10)
-#define MINTIMER (2 * TIMERTICK)
-#define MAXTIMER (HZ << 1)
-
static int aoe_deadsecs = 60 * 3;
module_param(aoe_deadsecs, int, 0644);
MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");

-struct sk_buff *
+static struct sk_buff *
new_skb(ulong len)
{
struct sk_buff *skb;
@@ -42,12 +39,12 @@ new_skb(ulong len)
}

static struct frame *
-getframe(struct aoedev *d, int tag)
+getframe(struct aoetgt *t, int tag)
{
struct frame *f, *e;

- f = d->frames;
- e = f + d->nframes;
+ f = t->frames;
+ e = f + t->nframes;
for (; f<e; f++)
if (f->tag == tag)
return f;
@@ -60,21 +57,21 @@ getframe(struct aoedev *d, int tag)
* This driver reserves tag -1 to mean "unused frame."
*/
static int
-newtag(struct aoedev *d)
+newtag(struct aoetgt *t)
{
register ulong n;

n = jiffies & 0xffff;
- return n |= (++d->lasttag & 0x7fff) << 16;
+ return n |= (++t->lasttag & 0x7fff) << 16;
}

static int
-aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
+aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
{
- u32 host_tag = newtag(d);
+ u32 host_tag = newtag(t);

- memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
- memcpy(h->dst, d->addr, sizeof h->dst);
+ memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
+ memcpy(h->dst, t->addr, sizeof h->dst);
h->type = __constant_cpu_to_be16(ETH_P_AOE);
h->verfl = AOE_HVER;
h->major = cpu_to_be16(d->aoemajor);
@@ -97,42 +94,101 @@ put_lba(struct aoe_atahdr *ah, sector_t lba)
}

static void
-aoecmd_ata_rw(struct aoedev *d, struct frame *f)
+ifrotate(struct aoetgt *t)
+{
+ t->ifp++;
+ if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL)
+ t->ifp = t->ifs;
+ if (t->ifp->nd == NULL) {
+ printk(KERN_INFO "aoe: no interface to rotate to\n");
+ BUG();
+ }
+}
+
+static struct frame *
+freeframe(struct aoedev *d)
{
+ struct frame *f, *e;
+ struct aoetgt **t;
+ ulong n;
+
+ if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */
+ printk(KERN_ERR "aoe: NULL TARGETS!\n");
+ return NULL;
+ }
+ t = d->targets;
+ do {
+ if (t != d->htgt)
+ if ((*t)->ifp->nd)
+ if ((*t)->nout < (*t)->maxout) {
+ n = (*t)->nframes;
+ f = (*t)->frames;
+ e = f + n;
+ for (; f<e; f++) {
+ if (f->tag != FREETAG)
+ continue;
+ if (atomic_read(&skb_shinfo(f->skb)->dataref) != 1) {
+ n--;
+ continue;
+ }
+ skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0;
+ skb_trim(f->skb, 0);
+ d->tgt = t;
+ ifrotate(*t);
+ return f;
+ }
+ if (n == 0) /* slow polling network card */
+ d->flags |= DEVFL_KICKME;
+ }
+ t++;
+ } while (t < &d->targets[NTARGETS] && *t);
+ return NULL;
+}
+
+static int
+aoecmd_ata_rw(struct aoedev *d)
+{
+ struct frame *f;
struct aoe_hdr *h;
struct aoe_atahdr *ah;
struct buf *buf;
+ struct bio_vec *bv;
+ struct aoetgt *t;
struct sk_buff *skb;
ulong bcnt;
- register sector_t sector;
char writebit, extbit;

writebit = 0x10;
extbit = 0x4;

+ f = freeframe(d);
+ if (f == NULL)
+ return 0;
+ t = *d->tgt;
buf = d->inprocess;
-
- sector = buf->sector;
- bcnt = buf->bv_resid;
- if (bcnt > d->maxbcnt)
- bcnt = d->maxbcnt;
-
+ bv = buf->bv;
+ bcnt = t->ifp->maxbcnt;
+ if (bcnt == 0)
+ bcnt = DEFAULTBCNT;
+ if (bcnt > buf->bv_resid)
+ bcnt = buf->bv_resid;
/* initialize the headers & frame */
skb = f->skb;
h = aoe_hdr(skb);
ah = (struct aoe_atahdr *) (h+1);
skb_put(skb, sizeof *h + sizeof *ah);
memset(h, 0, skb->len);
- f->tag = aoehdr_atainit(d, h);
+ f->tag = aoehdr_atainit(d, t, h);
+ t->nout++;
f->waited = 0;
f->buf = buf;
- f->bufaddr = buf->bufaddr;
+ f->bufaddr = page_address(bv->bv_page) + buf->bv_off;
f->bcnt = bcnt;
- f->lba = sector;
+ f->lba = buf->sector;

/* set up ata header */
ah->scnt = bcnt >> 9;
- put_lba(ah, sector);
+ put_lba(ah, buf->sector);
if (d->flags & DEVFL_EXT) {
ah->aflags |= AOEAFL_EXT;
} else {
@@ -140,14 +196,14 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f)
ah->lba3 &= 0x0f;
ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
}
-
if (bio_data_dir(buf->bio) == WRITE) {
- skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
- offset_in_page(f->bufaddr), bcnt);
+ skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt);
ah->aflags |= AOEAFL_WRITE;
skb->len += bcnt;
skb->data_len = bcnt;
+ t->wpkts++;
} else {
+ t->rpkts++;
writebit = 0;
}

@@ -155,29 +211,29 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f)

/* mark all tracking fields and load out */
buf->nframesout += 1;
- buf->bufaddr += bcnt;
+ buf->bv_off += bcnt;
buf->bv_resid -= bcnt;
-/* printk(KERN_DEBUG "aoe: bv_resid=%ld\n", buf->bv_resid); */
buf->resid -= bcnt;
buf->sector += bcnt >> 9;
if (buf->resid == 0) {
d->inprocess = NULL;
} else if (buf->bv_resid == 0) {
- buf->bv++;
- WARN_ON(buf->bv->bv_len == 0);
- buf->bv_resid = buf->bv->bv_len;
- buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
+ buf->bv = ++bv;
+ buf->bv_resid = bv->bv_len;
+ WARN_ON(buf->bv_resid == 0);
+ buf->bv_off = bv->bv_offset;
}

- skb->dev = d->ifp;
+ skb->dev = t->ifp->nd;
skb = skb_clone(skb, GFP_ATOMIC);
- if (skb == NULL)
- return;
- if (d->sendq_hd)
- d->sendq_tl->next = skb;
- else
- d->sendq_hd = skb;
- d->sendq_tl = skb;
+ if (skb) {
+ if (d->sendq_hd)
+ d->sendq_tl->next = skb;
+ else
+ d->sendq_hd = skb;
+ d->sendq_tl = skb;
+ }
+ return 1;
}

/* some callers cannot sleep, and they can call this function,
@@ -231,62 +287,8 @@ cont:
return sl;
}

-static struct frame *
-freeframe(struct aoedev *d)
-{
- struct frame *f, *e;
- int n = 0;
-
- f = d->frames;
- e = f + d->nframes;
- for (; f<e; f++) {
- if (f->tag != FREETAG)
- continue;
- if (atomic_read(&skb_shinfo(f->skb)->dataref) == 1) {
- skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0;
- skb_trim(f->skb, 0);
- return f;
- }
- n++;
- }
- if (n == d->nframes) /* wait for network layer */
- d->flags |= DEVFL_KICKME;
-
- return NULL;
-}
-
-/* enters with d->lock held */
-void
-aoecmd_work(struct aoedev *d)
-{
- struct frame *f;
- struct buf *buf;
-
- if (d->flags & DEVFL_PAUSE) {
- if (!aoedev_isbusy(d))
- d->sendq_hd = aoecmd_cfg_pkts(d->aoemajor,
- d->aoeminor, &d->sendq_tl);
- return;
- }
-
-loop:
- f = freeframe(d);
- if (f == NULL)
- return;
- if (d->inprocess == NULL) {
- if (list_empty(&d->bufq))
- return;
- buf = container_of(d->bufq.next, struct buf, bufs);
- list_del(d->bufq.next);
-/*printk(KERN_DEBUG "aoe: bi_size=%ld\n", buf->bio->bi_size); */
- d->inprocess = buf;
- }
- aoecmd_ata_rw(d, f);
- goto loop;
-}
-
static void
-rexmit(struct aoedev *d, struct frame *f)
+resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
{
struct sk_buff *skb;
struct aoe_hdr *h;
@@ -294,41 +296,44 @@ rexmit(struct aoedev *d, struct frame *f)
char buf[128];
u32 n;

- n = newtag(d);
+ ifrotate(t);
+ n = newtag(t);
+ skb = f->skb;
+ h = aoe_hdr(skb);
+ ah = (struct aoe_atahdr *) (h+1);

snprintf(buf, sizeof buf,
- "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
- "retransmit",
- d->aoemajor, d->aoeminor, f->tag, jiffies, n);
+ "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%012llx d=%012llx nout=%d\n",
+ "retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
+ mac_addr(h->src), mac_addr(h->dst), t->nout);
aoechr_error(buf);

- skb = f->skb;
- h = aoe_hdr(skb);
- ah = (struct aoe_atahdr *) (h+1);
f->tag = n;
h->tag = cpu_to_be32(n);
- memcpy(h->dst, d->addr, sizeof h->dst);
- memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
-
- n = DEFAULTBCNT / 512;
- if (ah->scnt > n) {
- ah->scnt = n;
+ memcpy(h->dst, t->addr, sizeof h->dst);
+ memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
+
+ switch (ah->cmdstat) {
+ default:
+ break;
+ case WIN_READ:
+ case WIN_READ_EXT:
+ case WIN_WRITE:
+ case WIN_WRITE_EXT:
+ put_lba(ah, f->lba);
+
+ n = f->bcnt;
+ if (n > DEFAULTBCNT)
+ n = DEFAULTBCNT;
+ ah->scnt = n >> 9;
if (ah->aflags & AOEAFL_WRITE) {
skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
- offset_in_page(f->bufaddr), DEFAULTBCNT);
- skb->len = sizeof *h + sizeof *ah + DEFAULTBCNT;
- skb->data_len = DEFAULTBCNT;
- }
- if (++d->lostjumbo > (d->nframes << 1))
- if (d->maxbcnt != DEFAULTBCNT) {
- printk(KERN_INFO "aoe: e%ld.%ld: too many lost jumbo on %s - using 1KB frames.\n",
- d->aoemajor, d->aoeminor, d->ifp->name);
- d->maxbcnt = DEFAULTBCNT;
- d->flags |= DEVFL_MAXBCNT;
+ offset_in_page(f->bufaddr), n);
+ skb->len = sizeof *h + sizeof *ah + n;
+ skb->data_len = n;
}
}
-
- skb->dev = d->ifp;
+ skb->dev = t->ifp->nd;
skb = skb_clone(skb, GFP_ATOMIC);
if (skb == NULL)
return;
@@ -351,10 +356,83 @@ tsince(int tag)
return n;
}

+static struct aoeif *
+getif(struct aoetgt *t, struct net_device *nd)
+{
+ struct aoeif *p, *e;
+
+ p = t->ifs;
+ e = p + NAOEIFS;
+ for (; p<e; p++)
+ if (p->nd == nd)
+ return p;
+ return NULL;
+}
+
+static struct aoeif *
+addif(struct aoetgt *t, struct net_device *nd)
+{
+ struct aoeif *p;
+
+ p = getif(t, NULL);
+ if (!p)
+ return NULL;
+ p->nd = nd;
+ p->maxbcnt = DEFAULTBCNT;
+ p->lost = p->lostjumbo = 0;
+ return p;
+}
+
+static void
+ejectif(struct aoetgt *t, struct aoeif *ifp)
+{
+ struct aoeif *e;
+ ulong n;
+
+ e = t->ifs + NAOEIFS - 1;
+ n = (e - ifp) * sizeof *ifp;
+ memmove(ifp, ifp+1, n);
+ e->nd = NULL;
+}
+
+static int
+sthtith(struct aoedev *d)
+{
+ struct frame *f, *e, *nf;
+ struct sk_buff *skb;
+ struct aoetgt *ht = *d->htgt;
+
+ f = ht->frames;
+ e = f + ht->nframes;
+ for (; f<e; f++) {
+ if (f->tag == FREETAG)
+ continue;
+ nf = freeframe(d);
+ if (!nf)
+ return 0;
+ skb = nf->skb;
+ *nf = *f;
+ f->skb = skb;
+ f->tag = FREETAG;
+ nf->waited = 0;
+ ht->nout--;
+ (*d->tgt)->nout++;
+ resend(d, *d->tgt, nf);
+ }
+ /* he's clean, he's useless. take away his interfaces */
+ memset(ht->ifs, 0, sizeof ht->ifs);
+ d->htgt = NULL;
+ return 1;
+}
+
+#define ATASCNT(raw) (((struct aoe_atahdr *)(((struct aoe_hdr *)raw)+1))->scnt)
+
static void
rexmit_timer(ulong vp)
{
struct aoedev *d;
+ struct aoetgt *t, **tt, **te;
+ struct aoeif *ifp;
struct frame *f, *e;
struct sk_buff *sl;
register long timeout;
@@ -373,31 +451,75 @@ rexmit_timer(ulong vp)
spin_unlock_irqrestore(&d->lock, flags);
return;
}
- f = d->frames;
- e = f + d->nframes;
- for (; f<e; f++) {
- if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
+ tt = d->targets;
+ te = tt + NTARGETS;
+ for (; tt<te && *tt; tt++) {
+ t = *tt;
+ f = t->frames;
+ e = f + t->nframes;
+ for (; f<e; f++) {
+ if (f->tag == FREETAG
+ || tsince(f->tag) < timeout)
+ continue;
n = f->waited += timeout;
n /= HZ;
- if (n > aoe_deadsecs) { /* waited too long for response */
+ if (n > aoe_deadsecs) { /* waited too long. device failure. */
aoedev_downdev(d);
break;
}
- rexmit(d, f);
+
+ if (n > HELPWAIT) /* see if another target can help */
+ if (tt != d->targets || d->targets[1])
+ d->htgt = tt;
+
+ if (t->nout == t->maxout) {
+ if (t->maxout > 1)
+ t->maxout--;
+ t->lastwadj = jiffies;
+ }
+
+ ifp = getif(t, f->skb->dev);
+ if (ifp && ++ifp->lost > (t->nframes << 1))
+ if (ifp != t->ifs || t->ifs[1].nd) {
+ ejectif(t, ifp);
+ ifp = NULL;
+ }
+
+ if (ATASCNT(aoe_hdr(f->skb)) > DEFAULTBCNT / 512)
+ if (ifp && ++ifp->lostjumbo > (t->nframes << 1))
+ if (ifp->maxbcnt != DEFAULTBCNT) {
+ printk(KERN_INFO "aoe: e%ld.%d: too many lost jumbo on %s:%012llx - "
+ "falling back to %d frames.\n",
+ d->aoemajor, d->aoeminor,
+ ifp->nd->name, mac_addr(t->addr),
+ DEFAULTBCNT);
+ ifp->maxbcnt = 0;
+ }
+ resend(d, t, f);
+ }
+
+ /* window check */
+ if (t->nout == t->maxout)
+ if (t->maxout < t->nframes)
+ if ((jiffies - t->lastwadj)/HZ > 10) {
+ t->maxout++;
+ t->lastwadj = jiffies;
}
}
- if (d->flags & DEVFL_KICKME) {
+
+ if (d->sendq_hd) {
+ n = d->rttavg <<= 1;
+ if (n > MAXTIMER)
+ d->rttavg = MAXTIMER;
+ }
+
+ if (d->flags & DEVFL_KICKME || d->htgt) {
d->flags &= ~DEVFL_KICKME;
aoecmd_work(d);
}

sl = d->sendq_hd;
d->sendq_hd = d->sendq_tl = NULL;
- if (sl) {
- n = d->rttavg <<= 1;
- if (n > MAXTIMER)
- d->rttavg = MAXTIMER;
- }

d->timer.expires = jiffies + TIMERTICK;
add_timer(&d->timer);
@@ -407,6 +529,25 @@ rexmit_timer(ulong vp)
aoenet_xmit(sl);
}

+/* enters with d->lock held */
+void
+aoecmd_work(struct aoedev *d)
+{
+ struct buf *buf;
+loop:
+ if (d->htgt && !sthtith(d))
+ return;
+ if (d->inprocess == NULL) {
+ if (list_empty(&d->bufq))
+ return;
+ buf = container_of(d->bufq.next, struct buf, bufs);
+ list_del(d->bufq.next);
+ d->inprocess = buf;
+ }
+ if (aoecmd_ata_rw(d))
+ goto loop;
+}
+
/* this function performs work that has been deferred until sleeping is OK
*/
void
@@ -439,7 +580,7 @@ aoecmd_sleepwork(struct work_struct *work)
}

static void
-ataid_complete(struct aoedev *d, unsigned char *id)
+ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
{
u64 ssize;
u16 n;
@@ -475,7 +616,7 @@ ataid_complete(struct aoedev *d, unsigned char *id)

if (d->ssize != ssize)
printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu sectors\n",
- (unsigned long long)mac_addr(d->addr),
+ (unsigned long long)mac_addr(t->addr),
d->aoemajor, d->aoeminor,
d->fw_ver, (long long)ssize);
d->ssize = ssize;
@@ -483,15 +624,8 @@ ataid_complete(struct aoedev *d, unsigned char *id)
if (d->gd != NULL) {
d->gd->capacity = ssize;
d->flags |= DEVFL_NEWSIZE;
- } else {
- if (d->flags & DEVFL_GDALLOC) {
- printk(KERN_ERR "aoe: can't schedule work for e%lu.%lu, %s\n",
- d->aoemajor, d->aoeminor,
- "it's already on! This shouldn't happen.\n");
- return;
- }
+ } else
d->flags |= DEVFL_GDALLOC;
- }
schedule_work(&d->work);
}

@@ -518,6 +652,31 @@ calc_rttavg(struct aoedev *d, int rtt)
d->rttavg += n >> 2;
}

+static struct aoetgt *
+gettgt(struct aoedev *d, char *addr)
+{
+ struct aoetgt **t, **e;
+
+ t = d->targets;
+ e = t + NTARGETS;
+ for(; t<e && *t; t++)
+ if (memcmp((*t)->addr, addr, sizeof (*t)->addr) == 0)
+ return *t;
+ return NULL;
+}
+
+static inline void
+diskstats(struct gendisk *disk, struct bio *bio, ulong duration)
+{
+ unsigned long n_sect = bio->bi_size >> 9;
+ const int rw = bio_data_dir(bio);
+
+ disk_stat_inc(disk, ios[rw]);
+ disk_stat_add(disk, ticks[rw], duration);
+ disk_stat_add(disk, sectors[rw], n_sect);
+ disk_stat_add(disk, io_ticks, duration);
+}
+
void
aoecmd_ata_rsp(struct sk_buff *skb)
{
@@ -527,6 +686,8 @@ aoecmd_ata_rsp(struct sk_buff *skb)
struct frame *f;
struct buf *buf;
struct sk_buff *sl;
+ struct aoetgt *t;
+ struct aoeif *ifp;
register long n;
ulong flags;
char ebuf[128];
@@ -546,7 +707,15 @@ aoecmd_ata_rsp(struct sk_buff *skb)
spin_lock_irqsave(&d->lock, flags);

n = be32_to_cpu(get_unaligned(&hin->tag));
- f = getframe(d, n);
+ t = gettgt(d, hin->src);
+ if (t == NULL) {
+ printk(KERN_INFO "aoe: can't find target e%ld.%d:%012llx\n",
+ d->aoemajor, d->aoeminor,
+ (unsigned long long) mac_addr(hin->src));
+ spin_unlock_irqrestore(&d->lock, flags);
+ return;
+ }
+ f = getframe(t, n);
if (f == NULL) {
calc_rttavg(d, -tsince(n));
spin_unlock_irqrestore(&d->lock, flags);
@@ -568,8 +737,6 @@ aoecmd_ata_rsp(struct sk_buff *skb)
ahout = (struct aoe_atahdr *) (hout+1);
buf = f->buf;

- if (ahout->cmdstat == WIN_IDENTIFY)
- d->flags &= ~DEVFL_PAUSE;
if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
printk(KERN_ERR
"aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%ld\n",
@@ -578,14 +745,16 @@ aoecmd_ata_rsp(struct sk_buff *skb)
if (buf)
buf->flags |= BUFFL_FAIL;
} else {
+ if (d->htgt && t == *d->htgt) /* I'll help myself, thank you. */
+ d->htgt = NULL;
n = ahout->scnt << 9;
switch (ahout->cmdstat) {
case WIN_READ:
case WIN_READ_EXT:
if (skb->len - sizeof *hin - sizeof *ahin < n) {
printk(KERN_ERR
- "aoe: runt data size in read. skb->len=%d\n",
- skb->len);
+ "aoe: %s. skb->len=%d need=%ld\n",
+ "runt data size in read", skb->len, n);
/* fail frame f? just returning will rexmit. */
spin_unlock_irqrestore(&d->lock, flags);
return;
@@ -593,32 +762,18 @@ aoecmd_ata_rsp(struct sk_buff *skb)
memcpy(f->bufaddr, ahin+1, n);
case WIN_WRITE:
case WIN_WRITE_EXT:
+ ifp = getif(t, skb->dev);
+ if (ifp) {
+ ifp->lost = 0;
+ if (n > DEFAULTBCNT)
+ ifp->lostjumbo = 0;
+ }
if (f->bcnt -= n) {
- skb = f->skb;
+ f->lba += n >> 9;
f->bufaddr += n;
- put_lba(ahout, f->lba += ahout->scnt);
- n = f->bcnt;
- if (n > DEFAULTBCNT)
- n = DEFAULTBCNT;
- ahout->scnt = n >> 9;
- if (ahout->aflags & AOEAFL_WRITE) {
- skb_fill_page_desc(skb, 0,
- virt_to_page(f->bufaddr),
- offset_in_page(f->bufaddr), n);
- skb->len = sizeof *hout + sizeof *ahout + n;
- skb->data_len = n;
- }
- f->tag = newtag(d);
- hout->tag = cpu_to_be32(f->tag);
- skb->dev = d->ifp;
- skb = skb_clone(skb, GFP_ATOMIC);
- spin_unlock_irqrestore(&d->lock, flags);
- if (skb)
- aoenet_xmit(skb);
- return;
+ resend(d, t, f);
+ goto xmit;
}
- if (n > DEFAULTBCNT)
- d->lostjumbo = 0;
break;
case WIN_IDENTIFY:
if (skb->len - sizeof *hin - sizeof *ahin < 512) {
@@ -628,7 +783,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
spin_unlock_irqrestore(&d->lock, flags);
return;
}
- ataid_complete(d, (char *) (ahin+1));
+ ataid_complete(d, t, (char *) (ahin+1));
break;
default:
printk(KERN_INFO
@@ -639,28 +794,19 @@ aoecmd_ata_rsp(struct sk_buff *skb)
}
}

- if (buf) {
- buf->nframesout -= 1;
- if (buf->nframesout == 0 && buf->resid == 0) {
- unsigned long duration = jiffies - buf->start_time;
- unsigned long n_sect = buf->bio->bi_size >> 9;
- struct gendisk *disk = d->gd;
- const int rw = bio_data_dir(buf->bio);
-
- disk_stat_inc(disk, ios[rw]);
- disk_stat_add(disk, ticks[rw], duration);
- disk_stat_add(disk, sectors[rw], n_sect);
- disk_stat_add(disk, io_ticks, duration);
- n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
- bio_endio(buf->bio, buf->bio->bi_size, n);
- mempool_free(buf, d->bufpool);
- }
+ if (buf && --buf->nframesout == 0 && buf->resid == 0) {
+ diskstats(d->gd, buf->bio, jiffies - buf->stime);
+ n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
+ bio_endio(buf->bio, buf->bio->bi_size, n);
+ mempool_free(buf, d->bufpool);
}

f->buf = NULL;
f->tag = FREETAG;
+ t->nout--;

aoecmd_work(d);
+xmit:
sl = d->sendq_hd;
d->sendq_hd = d->sendq_tl = NULL;

@@ -678,23 +824,20 @@ aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
aoenet_xmit(sl);
}

-/*
- * Since we only call this in one place (and it only prepares one frame)
- * we just return the skb. Usually we'd chain it up to the aoedev sendq.
- */
-static struct sk_buff *
+struct sk_buff *
aoecmd_ata_id(struct aoedev *d)
{
struct aoe_hdr *h;
struct aoe_atahdr *ah;
struct frame *f;
struct sk_buff *skb;
+ struct aoetgt *t;

f = freeframe(d);
- if (f == NULL) {
- printk(KERN_ERR "aoe: can't get a frame. This shouldn't happen.\n");
+ if (f == NULL)
return NULL;
- }
+
+ t = *d->tgt;

/* initialize the headers & frame */
skb = f->skb;
@@ -702,7 +845,8 @@ aoecmd_ata_id(struct aoedev *d)
ah = (struct aoe_atahdr *) (h+1);
skb_put(skb, sizeof *h + sizeof *ah);
memset(h, 0, skb->len);
- f->tag = aoehdr_atainit(d, h);
+ f->tag = aoehdr_atainit(d, t, h);
+ t->nout++;
f->waited = 0;

/* set up ata header */
@@ -710,7 +854,7 @@ aoecmd_ata_id(struct aoedev *d)
ah->cmdstat = WIN_IDENTIFY;
ah->lba3 = 0xa0;

- skb->dev = d->ifp;
+ skb->dev = t->ifp->nd;

d->rttavg = MAXTIMER;
d->timer.function = rexmit_timer;
@@ -718,12 +862,66 @@ aoecmd_ata_id(struct aoedev *d)
return skb_clone(skb, GFP_ATOMIC);
}

+static struct aoetgt *
+addtgt(struct aoedev *d, char *addr, ulong nframes)
+{
+ struct aoetgt *t, **tt, **te;
+ struct frame *f, *e;
+
+ tt = d->targets;
+ te = tt + NTARGETS;
+ for (; tt<te; tt++) {
+ if (*tt == NULL)
+ break;
+ else if (memcmp((*tt)->addr, addr, 6) > 0) {
+ memmove(tt+1, tt, (void *)te-(void *)(tt+1));
+ break;
+ }
+ }
+ if (tt == te)
+ return NULL;
+
+ t = kcalloc(1, sizeof *t, GFP_ATOMIC);
+ f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
+ switch (!t || !f) {
+ case 0:
+ t->nframes = nframes;
+ t->frames = f;
+ e = f + nframes;
+ for (; f<e; f++) {
+ f->tag = FREETAG;
+ f->skb = new_skb(ETH_ZLEN);
+ if (!f->skb)
+ break;
+ }
+ if (f == e)
+ break;
+ while (f > t->frames) {
+ f--;
+ dev_kfree_skb(f->skb);
+ }
+ default:
+ if (f)
+ kfree(f);
+ if (t)
+ kfree(t);
+ return NULL;
+ }
+
+ memcpy(t->addr, addr, sizeof t->addr);
+ t->ifp = t->ifs;
+ t->maxout = t->nframes;
+ return *tt = t;
+}
+
void
aoecmd_cfg_rsp(struct sk_buff *skb)
{
struct aoedev *d;
struct aoe_hdr *h;
struct aoe_cfghdr *ch;
+ struct aoetgt *t;
+ struct aoeif *ifp;
ulong flags, sysminor, aoemajor;
struct sk_buff *sl;
enum { MAXFRAMES = 16 };
@@ -754,7 +952,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
if (n > MAXFRAMES) /* keep it reasonable */
n = MAXFRAMES;

- d = aoedev_by_sysminor_m(sysminor, n);
+ d = aoedev_by_sysminor_m(sysminor);
if (d == NULL) {
printk(KERN_INFO "aoe: device sysminor_m failure\n");
return;
@@ -762,38 +960,70 @@ aoecmd_cfg_rsp(struct sk_buff *skb)

spin_lock_irqsave(&d->lock, flags);

- /* permit device to migrate mac and network interface */
- d->ifp = skb->dev;
- memcpy(d->addr, h->src, sizeof d->addr);
- if (!(d->flags & DEVFL_MAXBCNT)) {
- n = d->ifp->mtu;
+ t = gettgt(d, h->src);
+ if (!t) {
+ t = addtgt(d, h->src, n);
+ if (!t) {
+ printk(KERN_INFO "aoe: device addtgt failure; too many targets?\n");
+ spin_unlock_irqrestore(&d->lock, flags);
+ return;
+ }
+ }
+ ifp = getif(t, skb->dev);
+ if (!ifp) {
+ if (!(ifp = addif(t, skb->dev))) {
+ printk(KERN_INFO "aoe: device addif failure; too many interfaces?\n");
+ spin_unlock_irqrestore(&d->lock, flags);
+ return;
+ }
+ }
+ if (ifp->maxbcnt) {
+ n = ifp->nd->mtu;
n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
n /= 512;
if (n > ch->scnt)
n = ch->scnt;
n = n ? n * 512 : DEFAULTBCNT;
- if (n != d->maxbcnt) {
+ if (n != ifp->maxbcnt) {
printk(KERN_INFO
- "aoe: e%ld.%ld: setting %d byte data frames on %s\n",
- d->aoemajor, d->aoeminor, n, d->ifp->name);
- d->maxbcnt = n;
+ "aoe: e%ld.%d: setting %d byte data frames on %s:%012llx\n",
+ d->aoemajor, d->aoeminor, n, ifp->nd->name,
+ (unsigned long long) mac_addr(t->addr));
+ ifp->maxbcnt = n;
}
}

/* don't change users' perspective */
- if (d->nopen && !(d->flags & DEVFL_PAUSE)) {
+ if (d->nopen) {
spin_unlock_irqrestore(&d->lock, flags);
return;
}
- d->flags |= DEVFL_PAUSE; /* force pause */
- d->mintimer = MINTIMER;
d->fw_ver = be16_to_cpu(ch->fwver);

- /* check for already outstanding ataid */
- sl = aoedev_isbusy(d) == 0 ? aoecmd_ata_id(d) : NULL;
+ sl = aoecmd_ata_id(d);

spin_unlock_irqrestore(&d->lock, flags);

aoenet_xmit(sl);
}

+void
+aoecmd_cleanslate(struct aoedev *d)
+{
+ struct aoetgt **t, **te;
+ struct aoeif *p, *e;
+
+ d->mintimer = MINTIMER;
+
+ t = d->targets;
+ te = t + NTARGETS;
+ for (; t<te && *t; t++) {
+ (*t)->maxout = (*t)->nframes;
+ p = (*t)->ifs;
+ e = p + NAOEIFS;
+ for (; p<e; p++) {
+ p->lostjumbo = p->lost = 0;
+ p->maxbcnt = DEFAULTBCNT;
+ }
+ }
+}
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 05a9719..04c75b4 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -15,15 +15,18 @@ static spinlock_t devlist_lock;
int
aoedev_isbusy(struct aoedev *d)
{
+ struct aoetgt **t, **te;
struct frame *f, *e;

- f = d->frames;
- e = f + d->nframes;
- do {
- if (f->tag != FREETAG)
- return 1;
- } while (++f < e);
-
+ t = d->targets;
+ te = t + NTARGETS;
+ for (; t<te && *t; t++) {
+ f = (*t)->frames;
+ e = f + (*t)->nframes;
+ for (; f<e; f++)
+ if (f->tag != FREETAG)
+ return 1;
+ }
return 0;
}

@@ -55,75 +58,41 @@ dummy_timer(ulong vp)
add_timer(&d->timer);
}

-/* called with devlist lock held */
-static struct aoedev *
-aoedev_newdev(ulong nframes)
-{
- struct aoedev *d;
- struct frame *f, *e;
-
- d = kzalloc(sizeof *d, GFP_ATOMIC);
- f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
- switch (!d || !f) {
- case 0:
- d->nframes = nframes;
- d->frames = f;
- e = f + nframes;
- for (; f<e; f++) {
- f->tag = FREETAG;
- f->skb = new_skb(ETH_ZLEN);
- if (!f->skb)
- break;
- }
- if (f == e)
- break;
- while (f > d->frames) {
- f--;
- dev_kfree_skb(f->skb);
- }
- default:
- if (f)
- kfree(f);
- if (d)
- kfree(d);
- return NULL;
- }
- INIT_WORK(&d->work, aoecmd_sleepwork);
- spin_lock_init(&d->lock);
- init_timer(&d->timer);
- d->timer.data = (ulong) d;
- d->timer.function = dummy_timer;
- d->timer.expires = jiffies + HZ;
- add_timer(&d->timer);
- d->bufpool = NULL; /* defer to aoeblk_gdalloc */
- INIT_LIST_HEAD(&d->bufq);
- d->next = devlist;
- devlist = d;
-
- return d;
-}
-
void
aoedev_downdev(struct aoedev *d)
{
+ struct aoetgt **t, **te;
struct frame *f, *e;
struct buf *buf;
struct bio *bio;

- f = d->frames;
- e = f + d->nframes;
- for (; f<e; f->tag = FREETAG, f->buf = NULL, f++) {
- if (f->tag == FREETAG || f->buf == NULL)
- continue;
- buf = f->buf;
- bio = buf->bio;
- if (--buf->nframesout == 0) {
- mempool_free(buf, d->bufpool);
- bio_endio(bio, bio->bi_size, -EIO);
+ t = d->targets;
+ te = t + NTARGETS;
+ for (; t<te && *t; t++) {
+ f = (*t)->frames;
+ e = f + (*t)->nframes;
+ for (; f<e; f->tag = FREETAG, f->buf = NULL, f++) {
+ if (f->tag == FREETAG || f->buf == NULL)
+ continue;
+ buf = f->buf;
+ bio = buf->bio;
+ if (--buf->nframesout == 0)
+ if (buf != d->inprocess) {
+ mempool_free(buf, d->bufpool);
+ bio_endio(bio, bio->bi_size, -EIO);
+ }
}
- skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0;
+ (*t)->maxout = (*t)->nframes;
+ (*t)->nout = 0;
+ }
+ buf = d->inprocess;
+ if (buf) {
+ bio = buf->bio;
+ mempool_free(buf, d->bufpool);
+ bio_endio(bio, bio->bi_size, -EIO);
}
d->inprocess = NULL;
+ d->htgt = NULL;

while (!list_empty(&d->bufq)) {
buf = container_of(d->bufq.next, struct buf, bufs);
@@ -136,12 +105,12 @@ aoedev_downdev(struct aoedev *d)
if (d->gd)
d->gd->capacity = 0;

- d->flags &= ~(DEVFL_UP | DEVFL_PAUSE);
+ d->flags &= ~DEVFL_UP;
}

/* find it or malloc it */
struct aoedev *
-aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt)
+aoedev_by_sysminor_m(ulong sysminor)
{
struct aoedev *d;
ulong flags;
@@ -151,40 +120,60 @@ aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt)
for (d=devlist; d; d=d->next)
if (d->sysminor == sysminor)
break;
-
- if (d == NULL) {
- d = aoedev_newdev(bufcnt);
- if (d == NULL) {
- spin_unlock_irqrestore(&devlist_lock, flags);
- printk(KERN_INFO "aoe: aoedev_newdev failure.\n");
- return NULL;
- }
- d->sysminor = sysminor;
- d->aoemajor = AOEMAJOR(sysminor);
- d->aoeminor = AOEMINOR(sysminor);
+ if (d || !(d = kcalloc(1, sizeof *d, GFP_ATOMIC))) {
+ spin_unlock_irqrestore(&devlist_lock, flags);
+ return d;
}
+ INIT_WORK(&d->work, aoecmd_sleepwork);
+ spin_lock_init(&d->lock);
+ init_timer(&d->timer);
+ d->timer.data = (ulong) d;
+ d->timer.function = dummy_timer;
+ d->timer.expires = jiffies + HZ;
+ add_timer(&d->timer);
+ d->bufpool = NULL; /* defer to aoeblk_gdalloc */
+ d->tgt = d->targets;
+ INIT_LIST_HEAD(&d->bufq);
+ d->sysminor = sysminor;
+ d->aoemajor = AOEMAJOR(sysminor);
+ d->aoeminor = AOEMINOR(sysminor);
+ d->mintimer = MINTIMER;
+ d->next = devlist;
+ devlist = d;

spin_unlock_irqrestore(&devlist_lock, flags);
return d;
}

static void
-aoedev_freedev(struct aoedev *d)
+freetgt(struct aoetgt *t)
{
struct frame *f, *e;

+ f = t->frames;
+ e = f + t->nframes;
+ for (; f<e; f++) {
+ skb_shinfo(f->skb)->nr_frags = 0;
+ dev_kfree_skb(f->skb);
+ }
+ kfree(t->frames);
+ kfree(t);
+}
+
+static void
+aoedev_freedev(struct aoedev *d)
+{
+ struct aoetgt **t, **e;
+
if (d->gd) {
aoedisk_rm_sysfs(d);
del_gendisk(d->gd);
put_disk(d->gd);
}
- f = d->frames;
- e = f + d->nframes;
- for (; f<e; f++) {
- skb_shinfo(f->skb)->nr_frags = 0;
- dev_kfree_skb(f->skb);
- }
- kfree(d->frames);
+ t = d->targets;
+ e = t + NTARGETS;
+ for (; t<e && *t; t++)
+ freetgt(*t);
if (d->bufpool)
mempool_destroy(d->bufpool);
kfree(d);
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index f9ddfda..f335099 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -133,8 +133,8 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt,
if (n > NECODES)
n = 0;
if (net_ratelimit())
- printk(KERN_ERR "aoe: error packet from %d.%d; ecode=%d '%s'\n",
- be16_to_cpu(get_unaligned(&h->major)), h->minor,
+ printk(KERN_ERR "aoe: error packet from %d.%d@%s; ecode=%d '%s'\n",
+ be16_to_cpu(get_unaligned(&h->major)), h->minor, skb->dev->name,
h->err, aoe_errlist[n]);
goto exit;
}
--
1.5.2.1

2007-06-26 19:50:08

by Randy Dunlap

[permalink] [raw]
Subject: Re: [PATCH 12/12] the aoeminor doesn't need a long format

On Tue, 26 Jun 2007 14:50:12 -0400 Ed L. Cashin wrote:

> The aoedev aoeminor member doesn't need a long format.

Was there a patch that changed aoeminor to an int?
Last I see is:
ulong aoeminor;
in linux-2.6.22-rc6/drivers/block/aoe/aoe.h.

If it's still ulong, you shouldn't change the printk format.

> Signed-off-by: Ed L. Cashin <[email protected]>
> ---
> drivers/block/aoe/aoeblk.c | 6 +++---
> drivers/block/aoe/aoecmd.c | 4 ++--
> 2 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
> index ccadd9a..e216fe0 100644
> --- a/drivers/block/aoe/aoeblk.c
> +++ b/drivers/block/aoe/aoeblk.c
> @@ -203,7 +203,7 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio)
> spin_lock_irqsave(&d->lock, flags);
>
> if ((d->flags & DEVFL_UP) == 0) {
> - printk(KERN_INFO "aoe: device %ld.%ld is not up\n",
> + printk(KERN_INFO "aoe: device %ld.%d is not up\n",
> d->aoemajor, d->aoeminor);
> spin_unlock_irqrestore(&d->lock, flags);
> mempool_free(buf, d->bufpool);
> @@ -256,7 +256,7 @@ aoeblk_gdalloc(void *vp)
>
> gd = alloc_disk(AOE_PARTITIONS);
> if (gd == NULL) {
> - printk(KERN_ERR "aoe: cannot allocate disk structure for %ld.%ld\n",
> + printk(KERN_ERR "aoe: cannot allocate disk structure for %ld.%d\n",
> d->aoemajor, d->aoeminor);
> spin_lock_irqsave(&d->lock, flags);
> d->flags &= ~DEVFL_GDALLOC;
> @@ -266,7 +266,7 @@ aoeblk_gdalloc(void *vp)
>
> d->bufpool = mempool_create_slab_pool(MIN_BUFS, buf_pool_cache);
> if (d->bufpool == NULL) {
> - printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%ld\n",
> + printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n",
> d->aoemajor, d->aoeminor);
> put_disk(gd);
> spin_lock_irqsave(&d->lock, flags);
> diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
> index 9de0024..dfb1184 100644
> --- a/drivers/block/aoe/aoecmd.c
> +++ b/drivers/block/aoe/aoecmd.c
> @@ -680,7 +680,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
> }
>
> if (d->ssize != ssize)
> - printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu sectors\n",
> + printk(KERN_INFO "aoe: %012llx e%ld.%d v%04x has %llu sectors\n",
> mac_addr(t->addr),
> d->aoemajor, d->aoeminor,
> d->fw_ver, (long long)ssize);
> @@ -805,7 +805,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
>
> if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
> printk(KERN_ERR
> - "aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%ld\n",
> + "aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
> ahout->cmdstat, ahin->cmdstat,
> d->aoemajor, d->aoeminor);
> if (buf)
> --

---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***

2007-06-26 20:00:42

by Ed L. Cashin

[permalink] [raw]
Subject: Re: [PATCH 12/12] the aoeminor doesn't need a long format

On Tue, Jun 26, 2007 at 12:51:07PM -0700, Randy Dunlap wrote:
> On Tue, 26 Jun 2007 14:50:12 -0400 Ed L. Cashin wrote:
>
> > The aoedev aoeminor member doesn't need a long format.
>
> Was there a patch that changed aoeminor to an int?
> Last I see is:
> ulong aoeminor;
> in linux-2.6.22-rc6/drivers/block/aoe/aoe.h.
>
> If it's still ulong, you shouldn't change the printk format.

Yes, thanks for checking. Patch 2 of 12 did change it to a u16.

--
Ed L Cashin <[email protected]>