Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1760249AbXFZTPm (ORCPT ); Tue, 26 Jun 2007 15:15:42 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1759650AbXFZTPM (ORCPT ); Tue, 26 Jun 2007 15:15:12 -0400 Received: from ns1.coraid.com ([65.14.39.133]:25156 "EHLO coraid.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1759396AbXFZTPH (ORCPT ); Tue, 26 Jun 2007 15:15:07 -0400 Message-Id: <60bd316bfa72fb225cefad39569999e583f4f72e.1182883861.git.ecashin@coraid.com> In-Reply-To: <1d8423c28c48a6d26516cdc707dbcdf015a4e347.1182883861.git.ecashin@coraid.com> References: <1d8423c28c48a6d26516cdc707dbcdf015a4e347.1182883861.git.ecashin@coraid.com> From: "Ed L. Cashin" Date: Tue, 26 Jun 2007 14:50:10 -0400 Subject: [PATCH 02/12] handle multiple network paths to AoE device To: linux-kernel@vger.kernel.org Cc: Greg K-H , ecashin@coraid.com Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 38375 Lines: 1534 Handle multiple network paths to AoE device. Signed-off-by: Ed L. Cashin --- drivers/block/aoe/aoe.h | 58 +++-- drivers/block/aoe/aoeblk.c | 63 ++++- drivers/block/aoe/aoechr.c | 14 +- drivers/block/aoe/aoecmd.c | 660 +++++++++++++++++++++++++++++-------------- drivers/block/aoe/aoedev.c | 163 +++++------ drivers/block/aoe/aoenet.c | 4 +- 6 files changed, 630 insertions(+), 332 deletions(-) diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 2ce5ce9..069f04c 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -85,10 +85,8 @@ enum { DEVFL_EXT = (1<<2), /* device accepts lba48 commands */ DEVFL_CLOSEWAIT = (1<<3), /* device is waiting for all closes to revalidate */ DEVFL_GDALLOC = (1<<4), /* need to alloc gendisk */ - DEVFL_PAUSE = (1<<5), + DEVFL_KICKME = (1<<5), /* slow polling network card catch */ DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */ - DEVFL_MAXBCNT = (1<<7), /* d->maxbcnt is not changeable */ - DEVFL_KICKME = (1<<8), BUFFL_FAIL = 1, }; @@ -97,17 +95,24 @@ enum { DEFAULTBCNT = 2 * 512, /* 2 sectors */ NPERSHELF = 16, /* number of slots per shelf address */ FREETAG = -1, - MIN_BUFS = 8, + MIN_BUFS = 16, + NTARGETS = 8, + NAOEIFS = 8, + + TIMERTICK = HZ / 10, + MINTIMER = HZ >> 2, + MAXTIMER = HZ << 1, + HELPWAIT = 20, }; struct buf { struct list_head bufs; - ulong start_time; /* for disk stats */ + ulong stime; /* for disk stats */ ulong flags; ulong nframesout; - char *bufaddr; ulong resid; ulong bv_resid; + ulong bv_off; sector_t sector; struct bio *bio; struct bio_vec *bv; @@ -123,19 +128,37 @@ struct frame { struct sk_buff *skb; }; +struct aoeif { + struct net_device *nd; + unsigned char lost; + unsigned char lostjumbo; + ushort maxbcnt; +}; + +struct aoetgt { + unsigned char addr[6]; + ushort nframes; + struct frame *frames; + struct aoeif ifs[NAOEIFS]; + struct aoeif *ifp; /* current aoeif in use */ + ushort nout; + ushort maxout; + u16 lasttag; /* last tag sent */ + u16 useme; + ulong lastwadj; /* last window adjustment */ +int wpkts, rpkts; +}; + struct aoedev { struct aoedev *next; - unsigned char addr[6]; /* remote mac addr */ - ushort flags; ulong sysminor; ulong aoemajor; - ulong aoeminor; + u16 aoeminor; + u16 flags; u16 nopen; /* (bd_openers isn't available without sleeping) */ - u16 lasttag; /* last tag sent */ u16 rttavg; /* round trip average of requests/responses */ u16 mintimer; u16 fw_ver; /* version of blade's firmware */ - u16 maxbcnt; struct work_struct work;/* disk create work struct */ struct gendisk *gd; request_queue_t blkq; @@ -143,15 +166,15 @@ struct aoedev { sector_t ssize; struct timer_list timer; spinlock_t lock; - struct net_device *ifp; /* interface ed is attached to */ struct sk_buff *sendq_hd; /* packets needing to be sent, list head */ struct sk_buff *sendq_tl; mempool_t *bufpool; /* for deadlock-free Buf allocation */ struct list_head bufq; /* queue of bios to work on */ struct buf *inprocess; /* the one we're currently working on */ - ushort lostjumbo; - ushort nframes; /* number of frames below */ - struct frame *frames; + struct aoetgt *targets[NTARGETS]; + struct aoetgt **tgt; /* target in use when working */ + struct aoetgt **htgt; /* target needing rexmit assistance */ +//int ios[64]; }; @@ -169,12 +192,13 @@ void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor); void aoecmd_ata_rsp(struct sk_buff *); void aoecmd_cfg_rsp(struct sk_buff *); void aoecmd_sleepwork(struct work_struct *); -struct sk_buff *new_skb(ulong); +void aoecmd_cleanslate(struct aoedev *); +struct sk_buff *aoecmd_ata_id(struct aoedev *); int aoedev_init(void); void aoedev_exit(void); struct aoedev *aoedev_by_aoeaddr(int maj, int min); -struct aoedev *aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt); +struct aoedev *aoedev_by_sysminor_m(ulong sysminor); void aoedev_downdev(struct aoedev *d); int aoedev_isbusy(struct aoedev *d); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 478489c..f6773ab 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -21,22 +21,55 @@ static ssize_t aoedisk_show_state(struct gendisk * disk, char *page) return snprintf(page, PAGE_SIZE, "%s%s\n", (d->flags & DEVFL_UP) ? "up" : "down", - (d->flags & DEVFL_PAUSE) ? ",paused" : + (d->flags & DEVFL_KICKME) ? ",kickme" : (d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : ""); /* I'd rather see nopen exported so we can ditch closewait */ } static ssize_t aoedisk_show_mac(struct gendisk * disk, char *page) { struct aoedev *d = disk->private_data; + struct aoetgt *t = d->targets[0]; + if (t == NULL) + return snprintf(page, PAGE_SIZE, "none\n"); return snprintf(page, PAGE_SIZE, "%012llx\n", - (unsigned long long)mac_addr(d->addr)); + (unsigned long long)mac_addr(t->addr)); } static ssize_t aoedisk_show_netif(struct gendisk * disk, char *page) { struct aoedev *d = disk->private_data; + struct net_device *nds[8], **nd, **nnd, **ne; + struct aoetgt **t, **te; + struct aoeif *ifp, *e; + char *p; + + memset(nds, 0, ARRAY_SIZE(nds)); + nd = nds; + ne = nd + ARRAY_SIZE(nds); + t = d->targets; + te = t + NTARGETS; + for (; tifs; + e = ifp + NAOEIFS; + for (; ifpnd; ifp++) { + for (nnd=nds; nndnd) + break; + if (nnd == nd) + if (nd != ne) + *nd++ = ifp->nd; + } + } - return snprintf(page, PAGE_SIZE, "%s\n", d->ifp->name); + ne = nd; + nd = nds; + if (*nd == NULL) + return snprintf(page, PAGE_SIZE, "none\n"); + for (p=page; ndname); + p += snprintf(p, PAGE_SIZE - (p-page), "\n"); + return p-page; } /* firmware version */ static ssize_t aoedisk_show_fwver(struct gendisk * disk, char *page) @@ -134,7 +167,23 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio) blk_queue_bounce(q, &bio); + if (bio == NULL) { + printk(KERN_ERR "aoe: bio is NULL\n"); + BUG(); + return 0; + } d = bio->bi_bdev->bd_disk->private_data; + if (d == NULL) { + printk(KERN_ERR "aoe: bd_disk->private_data is NULL\n"); + BUG(); + bio_endio(bio, bio->bi_size, -ENXIO); + return 0; + } else if (bio->bi_io_vec == NULL) { + printk(KERN_ERR "aoe: bi_io_vec is NULL\n"); + BUG(); + bio_endio(bio, bio->bi_size, -ENXIO); + return 0; + } buf = mempool_alloc(d->bufpool, GFP_NOIO); if (buf == NULL) { printk(KERN_INFO "aoe: buf allocation failure\n"); @@ -143,14 +192,14 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio) } memset(buf, 0, sizeof(*buf)); INIT_LIST_HEAD(&buf->bufs); - buf->start_time = jiffies; + buf->stime = jiffies; buf->bio = bio; buf->resid = bio->bi_size; buf->sector = bio->bi_sector; buf->bv = &bio->bi_io_vec[bio->bi_idx]; - WARN_ON(buf->bv->bv_len == 0); buf->bv_resid = buf->bv->bv_len; - buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset; + WARN_ON(buf->bv_resid == 0); + buf->bv_off = buf->bv->bv_offset; spin_lock_irqsave(&d->lock, flags); @@ -234,7 +283,7 @@ aoeblk_gdalloc(void *vp) gd->fops = &aoe_bdops; gd->private_data = d; gd->capacity = d->ssize; - snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%ld", + snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", d->aoemajor, d->aoeminor); gd->queue = &d->blkq; diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index 39e563e..9026c44 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c @@ -6,6 +6,7 @@ #include #include +#include #include "aoe.h" enum { @@ -68,6 +69,7 @@ revalidate(const char __user *str, size_t size) int major, minor, n; ulong flags; struct aoedev *d; + struct sk_buff *skb; char buf[16]; if (size >= sizeof buf) @@ -85,13 +87,17 @@ revalidate(const char __user *str, size_t size) d = aoedev_by_aoeaddr(major, minor); if (!d) return -EINVAL; - spin_lock_irqsave(&d->lock, flags); - d->flags &= ~DEVFL_MAXBCNT; - d->flags |= DEVFL_PAUSE; + aoecmd_cleanslate(d); +loop: + skb = aoecmd_ata_id(d); spin_unlock_irqrestore(&d->lock, flags); + if (!skb && !msleep_interruptible(200)) { + spin_lock_irqsave(&d->lock, flags); + goto loop; + } + aoenet_xmit(skb); aoecmd_cfg(major, minor); - return 0; } diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 01fbdd3..8a3a973 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -9,18 +9,15 @@ #include #include #include +#include #include #include "aoe.h" -#define TIMERTICK (HZ / 10) -#define MINTIMER (2 * TIMERTICK) -#define MAXTIMER (HZ << 1) - static int aoe_deadsecs = 60 * 3; module_param(aoe_deadsecs, int, 0644); MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev."); -struct sk_buff * +static struct sk_buff * new_skb(ulong len) { struct sk_buff *skb; @@ -42,12 +39,12 @@ new_skb(ulong len) } static struct frame * -getframe(struct aoedev *d, int tag) +getframe(struct aoetgt *t, int tag) { struct frame *f, *e; - f = d->frames; - e = f + d->nframes; + f = t->frames; + e = f + t->nframes; for (; ftag == tag) return f; @@ -60,21 +57,21 @@ getframe(struct aoedev *d, int tag) * This driver reserves tag -1 to mean "unused frame." */ static int -newtag(struct aoedev *d) +newtag(struct aoetgt *t) { register ulong n; n = jiffies & 0xffff; - return n |= (++d->lasttag & 0x7fff) << 16; + return n |= (++t->lasttag & 0x7fff) << 16; } static int -aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h) +aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h) { - u32 host_tag = newtag(d); + u32 host_tag = newtag(t); - memcpy(h->src, d->ifp->dev_addr, sizeof h->src); - memcpy(h->dst, d->addr, sizeof h->dst); + memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src); + memcpy(h->dst, t->addr, sizeof h->dst); h->type = __constant_cpu_to_be16(ETH_P_AOE); h->verfl = AOE_HVER; h->major = cpu_to_be16(d->aoemajor); @@ -97,42 +94,101 @@ put_lba(struct aoe_atahdr *ah, sector_t lba) } static void -aoecmd_ata_rw(struct aoedev *d, struct frame *f) +ifrotate(struct aoetgt *t) +{ + t->ifp++; + if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL) + t->ifp = t->ifs; + if (t->ifp->nd == NULL) { + printk(KERN_INFO "aoe: no interface to rotate to\n"); + BUG(); + } +} + +static struct frame * +freeframe(struct aoedev *d) { + struct frame *f, *e; + struct aoetgt **t; + ulong n; + + if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */ + printk(KERN_ERR "aoe: NULL TARGETS!\n"); + return NULL; + } + t = d->targets; + do { + if (t != d->htgt) + if ((*t)->ifp->nd) + if ((*t)->nout < (*t)->maxout) { + n = (*t)->nframes; + f = (*t)->frames; + e = f + n; + for (; ftag != FREETAG) + continue; + if (atomic_read(&skb_shinfo(f->skb)->dataref) != 1) { + n--; + continue; + } + skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0; + skb_trim(f->skb, 0); + d->tgt = t; + ifrotate(*t); + return f; + } + if (n == 0) /* slow polling network card */ + d->flags |= DEVFL_KICKME; + } + t++; + } while (t < &d->targets[NTARGETS] && *t); + return NULL; +} + +static int +aoecmd_ata_rw(struct aoedev *d) +{ + struct frame *f; struct aoe_hdr *h; struct aoe_atahdr *ah; struct buf *buf; + struct bio_vec *bv; + struct aoetgt *t; struct sk_buff *skb; ulong bcnt; - register sector_t sector; char writebit, extbit; writebit = 0x10; extbit = 0x4; + f = freeframe(d); + if (f == NULL) + return 0; + t = *d->tgt; buf = d->inprocess; - - sector = buf->sector; - bcnt = buf->bv_resid; - if (bcnt > d->maxbcnt) - bcnt = d->maxbcnt; - + bv = buf->bv; + bcnt = t->ifp->maxbcnt; + if (bcnt == 0) + bcnt = DEFAULTBCNT; + if (bcnt > buf->bv_resid) + bcnt = buf->bv_resid; /* initialize the headers & frame */ skb = f->skb; h = aoe_hdr(skb); ah = (struct aoe_atahdr *) (h+1); skb_put(skb, sizeof *h + sizeof *ah); memset(h, 0, skb->len); - f->tag = aoehdr_atainit(d, h); + f->tag = aoehdr_atainit(d, t, h); + t->nout++; f->waited = 0; f->buf = buf; - f->bufaddr = buf->bufaddr; + f->bufaddr = page_address(bv->bv_page) + buf->bv_off; f->bcnt = bcnt; - f->lba = sector; + f->lba = buf->sector; /* set up ata header */ ah->scnt = bcnt >> 9; - put_lba(ah, sector); + put_lba(ah, buf->sector); if (d->flags & DEVFL_EXT) { ah->aflags |= AOEAFL_EXT; } else { @@ -140,14 +196,14 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f) ah->lba3 &= 0x0f; ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ } - if (bio_data_dir(buf->bio) == WRITE) { - skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr), - offset_in_page(f->bufaddr), bcnt); + skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt); ah->aflags |= AOEAFL_WRITE; skb->len += bcnt; skb->data_len = bcnt; + t->wpkts++; } else { + t->rpkts++; writebit = 0; } @@ -155,29 +211,29 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f) /* mark all tracking fields and load out */ buf->nframesout += 1; - buf->bufaddr += bcnt; + buf->bv_off += bcnt; buf->bv_resid -= bcnt; -/* printk(KERN_DEBUG "aoe: bv_resid=%ld\n", buf->bv_resid); */ buf->resid -= bcnt; buf->sector += bcnt >> 9; if (buf->resid == 0) { d->inprocess = NULL; } else if (buf->bv_resid == 0) { - buf->bv++; - WARN_ON(buf->bv->bv_len == 0); - buf->bv_resid = buf->bv->bv_len; - buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset; + buf->bv = ++bv; + buf->bv_resid = bv->bv_len; + WARN_ON(buf->bv_resid == 0); + buf->bv_off = bv->bv_offset; } - skb->dev = d->ifp; + skb->dev = t->ifp->nd; skb = skb_clone(skb, GFP_ATOMIC); - if (skb == NULL) - return; - if (d->sendq_hd) - d->sendq_tl->next = skb; - else - d->sendq_hd = skb; - d->sendq_tl = skb; + if (skb) { + if (d->sendq_hd) + d->sendq_tl->next = skb; + else + d->sendq_hd = skb; + d->sendq_tl = skb; + } + return 1; } /* some callers cannot sleep, and they can call this function, @@ -231,62 +287,8 @@ cont: return sl; } -static struct frame * -freeframe(struct aoedev *d) -{ - struct frame *f, *e; - int n = 0; - - f = d->frames; - e = f + d->nframes; - for (; ftag != FREETAG) - continue; - if (atomic_read(&skb_shinfo(f->skb)->dataref) == 1) { - skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0; - skb_trim(f->skb, 0); - return f; - } - n++; - } - if (n == d->nframes) /* wait for network layer */ - d->flags |= DEVFL_KICKME; - - return NULL; -} - -/* enters with d->lock held */ -void -aoecmd_work(struct aoedev *d) -{ - struct frame *f; - struct buf *buf; - - if (d->flags & DEVFL_PAUSE) { - if (!aoedev_isbusy(d)) - d->sendq_hd = aoecmd_cfg_pkts(d->aoemajor, - d->aoeminor, &d->sendq_tl); - return; - } - -loop: - f = freeframe(d); - if (f == NULL) - return; - if (d->inprocess == NULL) { - if (list_empty(&d->bufq)) - return; - buf = container_of(d->bufq.next, struct buf, bufs); - list_del(d->bufq.next); -/*printk(KERN_DEBUG "aoe: bi_size=%ld\n", buf->bio->bi_size); */ - d->inprocess = buf; - } - aoecmd_ata_rw(d, f); - goto loop; -} - static void -rexmit(struct aoedev *d, struct frame *f) +resend(struct aoedev *d, struct aoetgt *t, struct frame *f) { struct sk_buff *skb; struct aoe_hdr *h; @@ -294,41 +296,44 @@ rexmit(struct aoedev *d, struct frame *f) char buf[128]; u32 n; - n = newtag(d); + ifrotate(t); + n = newtag(t); + skb = f->skb; + h = aoe_hdr(skb); + ah = (struct aoe_atahdr *) (h+1); snprintf(buf, sizeof buf, - "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n", - "retransmit", - d->aoemajor, d->aoeminor, f->tag, jiffies, n); + "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%012llx d=%012llx nout=%d\n", + "retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n, + mac_addr(h->src), mac_addr(h->dst), t->nout); aoechr_error(buf); - skb = f->skb; - h = aoe_hdr(skb); - ah = (struct aoe_atahdr *) (h+1); f->tag = n; h->tag = cpu_to_be32(n); - memcpy(h->dst, d->addr, sizeof h->dst); - memcpy(h->src, d->ifp->dev_addr, sizeof h->src); - - n = DEFAULTBCNT / 512; - if (ah->scnt > n) { - ah->scnt = n; + memcpy(h->dst, t->addr, sizeof h->dst); + memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src); + + switch (ah->cmdstat) { + default: + break; + case WIN_READ: + case WIN_READ_EXT: + case WIN_WRITE: + case WIN_WRITE_EXT: + put_lba(ah, f->lba); + + n = f->bcnt; + if (n > DEFAULTBCNT) + n = DEFAULTBCNT; + ah->scnt = n >> 9; if (ah->aflags & AOEAFL_WRITE) { skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr), - offset_in_page(f->bufaddr), DEFAULTBCNT); - skb->len = sizeof *h + sizeof *ah + DEFAULTBCNT; - skb->data_len = DEFAULTBCNT; - } - if (++d->lostjumbo > (d->nframes << 1)) - if (d->maxbcnt != DEFAULTBCNT) { - printk(KERN_INFO "aoe: e%ld.%ld: too many lost jumbo on %s - using 1KB frames.\n", - d->aoemajor, d->aoeminor, d->ifp->name); - d->maxbcnt = DEFAULTBCNT; - d->flags |= DEVFL_MAXBCNT; + offset_in_page(f->bufaddr), n); + skb->len = sizeof *h + sizeof *ah + n; + skb->data_len = n; } } - - skb->dev = d->ifp; + skb->dev = t->ifp->nd; skb = skb_clone(skb, GFP_ATOMIC); if (skb == NULL) return; @@ -351,10 +356,83 @@ tsince(int tag) return n; } +static struct aoeif * +getif(struct aoetgt *t, struct net_device *nd) +{ + struct aoeif *p, *e; + + p = t->ifs; + e = p + NAOEIFS; + for (; pnd == nd) + return p; + return NULL; +} + +static struct aoeif * +addif(struct aoetgt *t, struct net_device *nd) +{ + struct aoeif *p; + + p = getif(t, NULL); + if (!p) + return NULL; + p->nd = nd; + p->maxbcnt = DEFAULTBCNT; + p->lost = p->lostjumbo = 0; + return p; +} + +static void +ejectif(struct aoetgt *t, struct aoeif *ifp) +{ + struct aoeif *e; + ulong n; + + e = t->ifs + NAOEIFS - 1; + n = (e - ifp) * sizeof *ifp; + memmove(ifp, ifp+1, n); + e->nd = NULL; +} + +static int +sthtith(struct aoedev *d) +{ + struct frame *f, *e, *nf; + struct sk_buff *skb; + struct aoetgt *ht = *d->htgt; + + f = ht->frames; + e = f + ht->nframes; + for (; ftag == FREETAG) + continue; + nf = freeframe(d); + if (!nf) + return 0; + skb = nf->skb; + *nf = *f; + f->skb = skb; + f->tag = FREETAG; + nf->waited = 0; + ht->nout--; + (*d->tgt)->nout++; + resend(d, *d->tgt, nf); + } + /* he's clean, he's useless. take away his interfaces */ + memset(ht->ifs, 0, sizeof ht->ifs); + d->htgt = NULL; + return 1; +} + +#define ATASCNT(raw) (((struct aoe_atahdr *)(((struct aoe_hdr *)raw)+1))->scnt) + static void rexmit_timer(ulong vp) { struct aoedev *d; + struct aoetgt *t, **tt, **te; + struct aoeif *ifp; struct frame *f, *e; struct sk_buff *sl; register long timeout; @@ -373,31 +451,75 @@ rexmit_timer(ulong vp) spin_unlock_irqrestore(&d->lock, flags); return; } - f = d->frames; - e = f + d->nframes; - for (; ftag != FREETAG && tsince(f->tag) >= timeout) { + tt = d->targets; + te = tt + NTARGETS; + for (; ttframes; + e = f + t->nframes; + for (; ftag == FREETAG + || tsince(f->tag) < timeout) + continue; n = f->waited += timeout; n /= HZ; - if (n > aoe_deadsecs) { /* waited too long for response */ + if (n > aoe_deadsecs) { /* waited too long. device failure. */ aoedev_downdev(d); break; } - rexmit(d, f); + + if (n > HELPWAIT) /* see if another target can help */ + if (tt != d->targets || d->targets[1]) + d->htgt = tt; + + if (t->nout == t->maxout) { + if (t->maxout > 1) + t->maxout--; + t->lastwadj = jiffies; + } + + ifp = getif(t, f->skb->dev); + if (ifp && ++ifp->lost > (t->nframes << 1)) + if (ifp != t->ifs || t->ifs[1].nd) { + ejectif(t, ifp); + ifp = NULL; + } + + if (ATASCNT(aoe_hdr(f->skb)) > DEFAULTBCNT / 512) + if (ifp && ++ifp->lostjumbo > (t->nframes << 1)) + if (ifp->maxbcnt != DEFAULTBCNT) { + printk(KERN_INFO "aoe: e%ld.%d: too many lost jumbo on %s:%012llx - " + "falling back to %d frames.\n", + d->aoemajor, d->aoeminor, + ifp->nd->name, mac_addr(t->addr), + DEFAULTBCNT); + ifp->maxbcnt = 0; + } + resend(d, t, f); + } + + /* window check */ + if (t->nout == t->maxout) + if (t->maxout < t->nframes) + if ((jiffies - t->lastwadj)/HZ > 10) { + t->maxout++; + t->lastwadj = jiffies; } } - if (d->flags & DEVFL_KICKME) { + + if (d->sendq_hd) { + n = d->rttavg <<= 1; + if (n > MAXTIMER) + d->rttavg = MAXTIMER; + } + + if (d->flags & DEVFL_KICKME || d->htgt) { d->flags &= ~DEVFL_KICKME; aoecmd_work(d); } sl = d->sendq_hd; d->sendq_hd = d->sendq_tl = NULL; - if (sl) { - n = d->rttavg <<= 1; - if (n > MAXTIMER) - d->rttavg = MAXTIMER; - } d->timer.expires = jiffies + TIMERTICK; add_timer(&d->timer); @@ -407,6 +529,25 @@ rexmit_timer(ulong vp) aoenet_xmit(sl); } +/* enters with d->lock held */ +void +aoecmd_work(struct aoedev *d) +{ + struct buf *buf; +loop: + if (d->htgt && !sthtith(d)) + return; + if (d->inprocess == NULL) { + if (list_empty(&d->bufq)) + return; + buf = container_of(d->bufq.next, struct buf, bufs); + list_del(d->bufq.next); + d->inprocess = buf; + } + if (aoecmd_ata_rw(d)) + goto loop; +} + /* this function performs work that has been deferred until sleeping is OK */ void @@ -439,7 +580,7 @@ aoecmd_sleepwork(struct work_struct *work) } static void -ataid_complete(struct aoedev *d, unsigned char *id) +ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) { u64 ssize; u16 n; @@ -475,7 +616,7 @@ ataid_complete(struct aoedev *d, unsigned char *id) if (d->ssize != ssize) printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu sectors\n", - (unsigned long long)mac_addr(d->addr), + (unsigned long long)mac_addr(t->addr), d->aoemajor, d->aoeminor, d->fw_ver, (long long)ssize); d->ssize = ssize; @@ -483,15 +624,8 @@ ataid_complete(struct aoedev *d, unsigned char *id) if (d->gd != NULL) { d->gd->capacity = ssize; d->flags |= DEVFL_NEWSIZE; - } else { - if (d->flags & DEVFL_GDALLOC) { - printk(KERN_ERR "aoe: can't schedule work for e%lu.%lu, %s\n", - d->aoemajor, d->aoeminor, - "it's already on! This shouldn't happen.\n"); - return; - } + } else d->flags |= DEVFL_GDALLOC; - } schedule_work(&d->work); } @@ -518,6 +652,31 @@ calc_rttavg(struct aoedev *d, int rtt) d->rttavg += n >> 2; } +static struct aoetgt * +gettgt(struct aoedev *d, char *addr) +{ + struct aoetgt **t, **e; + + t = d->targets; + e = t + NTARGETS; + for(; taddr, addr, sizeof (*t)->addr) == 0) + return *t; + return NULL; +} + +static inline void +diskstats(struct gendisk *disk, struct bio *bio, ulong duration) +{ + unsigned long n_sect = bio->bi_size >> 9; + const int rw = bio_data_dir(bio); + + disk_stat_inc(disk, ios[rw]); + disk_stat_add(disk, ticks[rw], duration); + disk_stat_add(disk, sectors[rw], n_sect); + disk_stat_add(disk, io_ticks, duration); +} + void aoecmd_ata_rsp(struct sk_buff *skb) { @@ -527,6 +686,8 @@ aoecmd_ata_rsp(struct sk_buff *skb) struct frame *f; struct buf *buf; struct sk_buff *sl; + struct aoetgt *t; + struct aoeif *ifp; register long n; ulong flags; char ebuf[128]; @@ -546,7 +707,15 @@ aoecmd_ata_rsp(struct sk_buff *skb) spin_lock_irqsave(&d->lock, flags); n = be32_to_cpu(get_unaligned(&hin->tag)); - f = getframe(d, n); + t = gettgt(d, hin->src); + if (t == NULL) { + printk(KERN_INFO "aoe: can't find target e%ld.%d:%012llx\n", + d->aoemajor, d->aoeminor, + (unsigned long long) mac_addr(hin->src)); + spin_unlock_irqrestore(&d->lock, flags); + return; + } + f = getframe(t, n); if (f == NULL) { calc_rttavg(d, -tsince(n)); spin_unlock_irqrestore(&d->lock, flags); @@ -568,8 +737,6 @@ aoecmd_ata_rsp(struct sk_buff *skb) ahout = (struct aoe_atahdr *) (hout+1); buf = f->buf; - if (ahout->cmdstat == WIN_IDENTIFY) - d->flags &= ~DEVFL_PAUSE; if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */ printk(KERN_ERR "aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%ld\n", @@ -578,14 +745,16 @@ aoecmd_ata_rsp(struct sk_buff *skb) if (buf) buf->flags |= BUFFL_FAIL; } else { + if (d->htgt && t == *d->htgt) /* I'll help myself, thank you. */ + d->htgt = NULL; n = ahout->scnt << 9; switch (ahout->cmdstat) { case WIN_READ: case WIN_READ_EXT: if (skb->len - sizeof *hin - sizeof *ahin < n) { printk(KERN_ERR - "aoe: runt data size in read. skb->len=%d\n", - skb->len); + "aoe: %s. skb->len=%d need=%ld\n", + "runt data size in read", skb->len, n); /* fail frame f? just returning will rexmit. */ spin_unlock_irqrestore(&d->lock, flags); return; @@ -593,32 +762,18 @@ aoecmd_ata_rsp(struct sk_buff *skb) memcpy(f->bufaddr, ahin+1, n); case WIN_WRITE: case WIN_WRITE_EXT: + ifp = getif(t, skb->dev); + if (ifp) { + ifp->lost = 0; + if (n > DEFAULTBCNT) + ifp->lostjumbo = 0; + } if (f->bcnt -= n) { - skb = f->skb; + f->lba += n >> 9; f->bufaddr += n; - put_lba(ahout, f->lba += ahout->scnt); - n = f->bcnt; - if (n > DEFAULTBCNT) - n = DEFAULTBCNT; - ahout->scnt = n >> 9; - if (ahout->aflags & AOEAFL_WRITE) { - skb_fill_page_desc(skb, 0, - virt_to_page(f->bufaddr), - offset_in_page(f->bufaddr), n); - skb->len = sizeof *hout + sizeof *ahout + n; - skb->data_len = n; - } - f->tag = newtag(d); - hout->tag = cpu_to_be32(f->tag); - skb->dev = d->ifp; - skb = skb_clone(skb, GFP_ATOMIC); - spin_unlock_irqrestore(&d->lock, flags); - if (skb) - aoenet_xmit(skb); - return; + resend(d, t, f); + goto xmit; } - if (n > DEFAULTBCNT) - d->lostjumbo = 0; break; case WIN_IDENTIFY: if (skb->len - sizeof *hin - sizeof *ahin < 512) { @@ -628,7 +783,7 @@ aoecmd_ata_rsp(struct sk_buff *skb) spin_unlock_irqrestore(&d->lock, flags); return; } - ataid_complete(d, (char *) (ahin+1)); + ataid_complete(d, t, (char *) (ahin+1)); break; default: printk(KERN_INFO @@ -639,28 +794,19 @@ aoecmd_ata_rsp(struct sk_buff *skb) } } - if (buf) { - buf->nframesout -= 1; - if (buf->nframesout == 0 && buf->resid == 0) { - unsigned long duration = jiffies - buf->start_time; - unsigned long n_sect = buf->bio->bi_size >> 9; - struct gendisk *disk = d->gd; - const int rw = bio_data_dir(buf->bio); - - disk_stat_inc(disk, ios[rw]); - disk_stat_add(disk, ticks[rw], duration); - disk_stat_add(disk, sectors[rw], n_sect); - disk_stat_add(disk, io_ticks, duration); - n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; - bio_endio(buf->bio, buf->bio->bi_size, n); - mempool_free(buf, d->bufpool); - } + if (buf && --buf->nframesout == 0 && buf->resid == 0) { + diskstats(d->gd, buf->bio, jiffies - buf->stime); + n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; + bio_endio(buf->bio, buf->bio->bi_size, n); + mempool_free(buf, d->bufpool); } f->buf = NULL; f->tag = FREETAG; + t->nout--; aoecmd_work(d); +xmit: sl = d->sendq_hd; d->sendq_hd = d->sendq_tl = NULL; @@ -678,23 +824,20 @@ aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) aoenet_xmit(sl); } -/* - * Since we only call this in one place (and it only prepares one frame) - * we just return the skb. Usually we'd chain it up to the aoedev sendq. - */ -static struct sk_buff * +struct sk_buff * aoecmd_ata_id(struct aoedev *d) { struct aoe_hdr *h; struct aoe_atahdr *ah; struct frame *f; struct sk_buff *skb; + struct aoetgt *t; f = freeframe(d); - if (f == NULL) { - printk(KERN_ERR "aoe: can't get a frame. This shouldn't happen.\n"); + if (f == NULL) return NULL; - } + + t = *d->tgt; /* initialize the headers & frame */ skb = f->skb; @@ -702,7 +845,8 @@ aoecmd_ata_id(struct aoedev *d) ah = (struct aoe_atahdr *) (h+1); skb_put(skb, sizeof *h + sizeof *ah); memset(h, 0, skb->len); - f->tag = aoehdr_atainit(d, h); + f->tag = aoehdr_atainit(d, t, h); + t->nout++; f->waited = 0; /* set up ata header */ @@ -710,7 +854,7 @@ aoecmd_ata_id(struct aoedev *d) ah->cmdstat = WIN_IDENTIFY; ah->lba3 = 0xa0; - skb->dev = d->ifp; + skb->dev = t->ifp->nd; d->rttavg = MAXTIMER; d->timer.function = rexmit_timer; @@ -718,12 +862,66 @@ aoecmd_ata_id(struct aoedev *d) return skb_clone(skb, GFP_ATOMIC); } +static struct aoetgt * +addtgt(struct aoedev *d, char *addr, ulong nframes) +{ + struct aoetgt *t, **tt, **te; + struct frame *f, *e; + + tt = d->targets; + te = tt + NTARGETS; + for (; ttaddr, addr, 6) > 0) { + memmove(tt+1, tt, (void *)te-(void *)(tt+1)); + break; + } + } + if (tt == te) + return NULL; + + t = kcalloc(1, sizeof *t, GFP_ATOMIC); + f = kcalloc(nframes, sizeof *f, GFP_ATOMIC); + switch (!t || !f) { + case 0: + t->nframes = nframes; + t->frames = f; + e = f + nframes; + for (; ftag = FREETAG; + f->skb = new_skb(ETH_ZLEN); + if (!f->skb) + break; + } + if (f == e) + break; + while (f > t->frames) { + f--; + dev_kfree_skb(f->skb); + } + default: + if (f) + kfree(f); + if (t) + kfree(t); + return NULL; + } + + memcpy(t->addr, addr, sizeof t->addr); + t->ifp = t->ifs; + t->maxout = t->nframes; + return *tt = t; +} + void aoecmd_cfg_rsp(struct sk_buff *skb) { struct aoedev *d; struct aoe_hdr *h; struct aoe_cfghdr *ch; + struct aoetgt *t; + struct aoeif *ifp; ulong flags, sysminor, aoemajor; struct sk_buff *sl; enum { MAXFRAMES = 16 }; @@ -754,7 +952,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb) if (n > MAXFRAMES) /* keep it reasonable */ n = MAXFRAMES; - d = aoedev_by_sysminor_m(sysminor, n); + d = aoedev_by_sysminor_m(sysminor); if (d == NULL) { printk(KERN_INFO "aoe: device sysminor_m failure\n"); return; @@ -762,38 +960,70 @@ aoecmd_cfg_rsp(struct sk_buff *skb) spin_lock_irqsave(&d->lock, flags); - /* permit device to migrate mac and network interface */ - d->ifp = skb->dev; - memcpy(d->addr, h->src, sizeof d->addr); - if (!(d->flags & DEVFL_MAXBCNT)) { - n = d->ifp->mtu; + t = gettgt(d, h->src); + if (!t) { + t = addtgt(d, h->src, n); + if (!t) { + printk(KERN_INFO "aoe: device addtgt failure; too many targets?\n"); + spin_unlock_irqrestore(&d->lock, flags); + return; + } + } + ifp = getif(t, skb->dev); + if (!ifp) { + if (!(ifp = addif(t, skb->dev))) { + printk(KERN_INFO "aoe: device addif failure; too many interfaces?\n"); + spin_unlock_irqrestore(&d->lock, flags); + return; + } + } + if (ifp->maxbcnt) { + n = ifp->nd->mtu; n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr); n /= 512; if (n > ch->scnt) n = ch->scnt; n = n ? n * 512 : DEFAULTBCNT; - if (n != d->maxbcnt) { + if (n != ifp->maxbcnt) { printk(KERN_INFO - "aoe: e%ld.%ld: setting %d byte data frames on %s\n", - d->aoemajor, d->aoeminor, n, d->ifp->name); - d->maxbcnt = n; + "aoe: e%ld.%d: setting %d byte data frames on %s:%012llx\n", + d->aoemajor, d->aoeminor, n, ifp->nd->name, + (unsigned long long) mac_addr(t->addr)); + ifp->maxbcnt = n; } } /* don't change users' perspective */ - if (d->nopen && !(d->flags & DEVFL_PAUSE)) { + if (d->nopen) { spin_unlock_irqrestore(&d->lock, flags); return; } - d->flags |= DEVFL_PAUSE; /* force pause */ - d->mintimer = MINTIMER; d->fw_ver = be16_to_cpu(ch->fwver); - /* check for already outstanding ataid */ - sl = aoedev_isbusy(d) == 0 ? aoecmd_ata_id(d) : NULL; + sl = aoecmd_ata_id(d); spin_unlock_irqrestore(&d->lock, flags); aoenet_xmit(sl); } +void +aoecmd_cleanslate(struct aoedev *d) +{ + struct aoetgt **t, **te; + struct aoeif *p, *e; + + d->mintimer = MINTIMER; + + t = d->targets; + te = t + NTARGETS; + for (; tmaxout = (*t)->nframes; + p = (*t)->ifs; + e = p + NAOEIFS; + for (; plostjumbo = p->lost = 0; + p->maxbcnt = DEFAULTBCNT; + } + } +} diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 05a9719..04c75b4 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -15,15 +15,18 @@ static spinlock_t devlist_lock; int aoedev_isbusy(struct aoedev *d) { + struct aoetgt **t, **te; struct frame *f, *e; - f = d->frames; - e = f + d->nframes; - do { - if (f->tag != FREETAG) - return 1; - } while (++f < e); - + t = d->targets; + te = t + NTARGETS; + for (; tframes; + e = f + (*t)->nframes; + for (; ftag != FREETAG) + return 1; + } return 0; } @@ -55,75 +58,41 @@ dummy_timer(ulong vp) add_timer(&d->timer); } -/* called with devlist lock held */ -static struct aoedev * -aoedev_newdev(ulong nframes) -{ - struct aoedev *d; - struct frame *f, *e; - - d = kzalloc(sizeof *d, GFP_ATOMIC); - f = kcalloc(nframes, sizeof *f, GFP_ATOMIC); - switch (!d || !f) { - case 0: - d->nframes = nframes; - d->frames = f; - e = f + nframes; - for (; ftag = FREETAG; - f->skb = new_skb(ETH_ZLEN); - if (!f->skb) - break; - } - if (f == e) - break; - while (f > d->frames) { - f--; - dev_kfree_skb(f->skb); - } - default: - if (f) - kfree(f); - if (d) - kfree(d); - return NULL; - } - INIT_WORK(&d->work, aoecmd_sleepwork); - spin_lock_init(&d->lock); - init_timer(&d->timer); - d->timer.data = (ulong) d; - d->timer.function = dummy_timer; - d->timer.expires = jiffies + HZ; - add_timer(&d->timer); - d->bufpool = NULL; /* defer to aoeblk_gdalloc */ - INIT_LIST_HEAD(&d->bufq); - d->next = devlist; - devlist = d; - - return d; -} - void aoedev_downdev(struct aoedev *d) { + struct aoetgt **t, **te; struct frame *f, *e; struct buf *buf; struct bio *bio; - f = d->frames; - e = f + d->nframes; - for (; ftag = FREETAG, f->buf = NULL, f++) { - if (f->tag == FREETAG || f->buf == NULL) - continue; - buf = f->buf; - bio = buf->bio; - if (--buf->nframesout == 0) { - mempool_free(buf, d->bufpool); - bio_endio(bio, bio->bi_size, -EIO); + t = d->targets; + te = t + NTARGETS; + for (; tframes; + e = f + (*t)->nframes; + for (; ftag = FREETAG, f->buf = NULL, f++) { + if (f->tag == FREETAG || f->buf == NULL) + continue; + buf = f->buf; + bio = buf->bio; + if (--buf->nframesout == 0) + if (buf != d->inprocess) { + mempool_free(buf, d->bufpool); + bio_endio(bio, bio->bi_size, -EIO); + } } - skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0; + (*t)->maxout = (*t)->nframes; + (*t)->nout = 0; + } + buf = d->inprocess; + if (buf) { + bio = buf->bio; + mempool_free(buf, d->bufpool); + bio_endio(bio, bio->bi_size, -EIO); } d->inprocess = NULL; + d->htgt = NULL; while (!list_empty(&d->bufq)) { buf = container_of(d->bufq.next, struct buf, bufs); @@ -136,12 +105,12 @@ aoedev_downdev(struct aoedev *d) if (d->gd) d->gd->capacity = 0; - d->flags &= ~(DEVFL_UP | DEVFL_PAUSE); + d->flags &= ~DEVFL_UP; } /* find it or malloc it */ struct aoedev * -aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt) +aoedev_by_sysminor_m(ulong sysminor) { struct aoedev *d; ulong flags; @@ -151,40 +120,60 @@ aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt) for (d=devlist; d; d=d->next) if (d->sysminor == sysminor) break; - - if (d == NULL) { - d = aoedev_newdev(bufcnt); - if (d == NULL) { - spin_unlock_irqrestore(&devlist_lock, flags); - printk(KERN_INFO "aoe: aoedev_newdev failure.\n"); - return NULL; - } - d->sysminor = sysminor; - d->aoemajor = AOEMAJOR(sysminor); - d->aoeminor = AOEMINOR(sysminor); + if (d || !(d = kcalloc(1, sizeof *d, GFP_ATOMIC))) { + spin_unlock_irqrestore(&devlist_lock, flags); + return d; } + INIT_WORK(&d->work, aoecmd_sleepwork); + spin_lock_init(&d->lock); + init_timer(&d->timer); + d->timer.data = (ulong) d; + d->timer.function = dummy_timer; + d->timer.expires = jiffies + HZ; + add_timer(&d->timer); + d->bufpool = NULL; /* defer to aoeblk_gdalloc */ + d->tgt = d->targets; + INIT_LIST_HEAD(&d->bufq); + d->sysminor = sysminor; + d->aoemajor = AOEMAJOR(sysminor); + d->aoeminor = AOEMINOR(sysminor); + d->mintimer = MINTIMER; + d->next = devlist; + devlist = d; spin_unlock_irqrestore(&devlist_lock, flags); return d; } static void -aoedev_freedev(struct aoedev *d) +freetgt(struct aoetgt *t) { struct frame *f, *e; + f = t->frames; + e = f + t->nframes; + for (; fskb)->nr_frags = 0; + dev_kfree_skb(f->skb); + } + kfree(t->frames); + kfree(t); +} + +static void +aoedev_freedev(struct aoedev *d) +{ + struct aoetgt **t, **e; + if (d->gd) { aoedisk_rm_sysfs(d); del_gendisk(d->gd); put_disk(d->gd); } - f = d->frames; - e = f + d->nframes; - for (; fskb)->nr_frags = 0; - dev_kfree_skb(f->skb); - } - kfree(d->frames); + t = d->targets; + e = t + NTARGETS; + for (; tbufpool) mempool_destroy(d->bufpool); kfree(d); diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index f9ddfda..f335099 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -133,8 +133,8 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, if (n > NECODES) n = 0; if (net_ratelimit()) - printk(KERN_ERR "aoe: error packet from %d.%d; ecode=%d '%s'\n", - be16_to_cpu(get_unaligned(&h->major)), h->minor, + printk(KERN_ERR "aoe: error packet from %d.%d@%s; ecode=%d '%s'\n", + be16_to_cpu(get_unaligned(&h->major)), h->minor, skb->dev->name, h->err, aoe_errlist[n]); goto exit; } -- 1.5.2.1 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/