Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932375Ab3CDUmG (ORCPT ); Mon, 4 Mar 2013 15:42:06 -0500 Received: from userp1040.oracle.com ([156.151.31.81]:34662 "EHLO userp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932225Ab3CDUmD convert rfc822-to-8bit (ORCPT ); Mon, 4 Mar 2013 15:42:03 -0500 Date: Mon, 4 Mar 2013 15:41:54 -0500 From: Konrad Rzeszutek Wilk To: Roger Pau Monne Cc: linux-kernel@vger.kernel.org, xen-devel@lists.xen.org Subject: Re: [PATCH RFC 12/12] xen-block: implement indirect descriptors Message-ID: <20130304204154.GL15386@phenom.dumpdata.com> References: <1362047335-26402-1-git-send-email-roger.pau@citrix.com> <1362047335-26402-13-git-send-email-roger.pau@citrix.com> MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-1 Content-Disposition: inline In-Reply-To: <1362047335-26402-13-git-send-email-roger.pau@citrix.com> User-Agent: Mutt/1.5.21 (2010-09-15) Content-Transfer-Encoding: 8BIT X-Source-IP: acsinet21.oracle.com [141.146.126.237] Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 44473 Lines: 1258 On Thu, Feb 28, 2013 at 11:28:55AM +0100, Roger Pau Monne wrote: > Indirect descriptors introduce a new block operation > (BLKIF_OP_INDIRECT) that passes grant references instead of segments > in the request. This grant references are filled with arrays of > blkif_request_segment_aligned, this way we can send more segments in a > request. > > The proposed implementation sets the maximum number of indirect grefs > (frames filled with blkif_request_segment_aligned) to 256 in the > backend and 64 in the frontend. The value in the frontend has been > chosen experimentally, and the backend value has been set to a sane > value that allows expanding the maximum number of indirect descriptors > in the frontend if needed. So we are still using a similar format of the form: , etc. Why not utilize a layout that fits with the bio sg? That way we might not even have to do the bio_alloc call and instead can setup an bio (and bio-list) with the appropiate offsets/list? Meaning that the format of the indirect descriptors is: We already know what the first_sec and last_sect are - they are basically: sector_number + nr_segments * (whatever the sector size is) + offset > > The migration code has changed from the previous implementation, in > which we simply remapped the segments on the shared ring. Now the > maximum number of segments allowed in a request can change depending > on the backend, so we have to requeue all the requests in the ring and > in the queue and split the bios in them if they are bigger than the > new maximum number of segments. > > Signed-off-by: Roger Pau Monn? > Cc: Konrad Rzeszutek Wilk > Cc: xen-devel@lists.xen.org > --- > drivers/block/xen-blkback/blkback.c | 129 +++++++--- > drivers/block/xen-blkback/common.h | 80 ++++++- > drivers/block/xen-blkback/xenbus.c | 8 + > drivers/block/xen-blkfront.c | 498 +++++++++++++++++++++++++++++------ > include/xen/interface/io/blkif.h | 25 ++ > 5 files changed, 622 insertions(+), 118 deletions(-) > > diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c > index 0fa30db..98eb16b 100644 > --- a/drivers/block/xen-blkback/blkback.c > +++ b/drivers/block/xen-blkback/blkback.c > @@ -70,7 +70,7 @@ MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate per backend"); > * algorithm. > */ > > -static int xen_blkif_max_pgrants = 352; > +static int xen_blkif_max_pgrants = 1024; > module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644); > MODULE_PARM_DESC(max_persistent_grants, > "Maximum number of grants to map persistently"); > @@ -578,10 +578,6 @@ purge_gnt_list: > return 0; > } > > -struct seg_buf { > - unsigned long buf; > - unsigned int nsec; > -}; > /* > * Unmap the grant references, and also remove the M2P over-rides > * used in the 'pending_req'. > @@ -761,32 +757,79 @@ out_of_memory: > return -ENOMEM; > } > > -static int xen_blkbk_map_seg(struct blkif_request *req, > - struct pending_req *pending_req, > +static int xen_blkbk_map_seg(struct pending_req *pending_req, > struct seg_buf seg[], > struct page *pages[]) > { > int i, rc; > - grant_ref_t grefs[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > > - for (i = 0; i < req->u.rw.nr_segments; i++) > - grefs[i] = req->u.rw.seg[i].gref; > - > - rc = xen_blkbk_map(pending_req->blkif, grefs, > + rc = xen_blkbk_map(pending_req->blkif, pending_req->grefs, > pending_req->persistent_gnts, > pending_req->grant_handles, pending_req->pages, > - req->u.rw.nr_segments, > + pending_req->nr_pages, > (pending_req->operation != BLKIF_OP_READ)); > if (rc) > return rc; > > - for (i = 0; i < req->u.rw.nr_segments; i++) > - seg[i].buf = pfn_to_mfn(page_to_pfn(pending_req->pages[i])) > - << PAGE_SHIFT | (req->u.rw.seg[i].first_sect << 9); > + for (i = 0; i < pending_req->nr_pages; i++) > + seg[i].buf |= pfn_to_mfn(page_to_pfn(pending_req->pages[i])) > + << PAGE_SHIFT; > > return 0; > } > > +static int xen_blkbk_parse_indirect(struct blkif_request *req, > + struct pending_req *pending_req, > + struct seg_buf seg[], > + struct phys_req *preq) > +{ > + struct persistent_gnt **persistent = > + pending_req->indirect_persistent_gnts; > + struct page **pages = pending_req->indirect_pages; > + struct xen_blkif *blkif = pending_req->blkif; > + int indirect_grefs, rc, n, nseg, i; > + struct blkif_request_segment_aligned *segments = NULL; > + > + nseg = pending_req->nr_pages; > + indirect_grefs = (nseg + SEGS_PER_INDIRECT_FRAME - 1) / > + SEGS_PER_INDIRECT_FRAME; > + > + rc = xen_blkbk_map(blkif, req->u.indirect.indirect_grefs, > + persistent, pending_req->indirect_handles, > + pages, indirect_grefs, true); > + if (rc) > + goto unmap; > + > + for (n = 0, i = 0; n < nseg; n++) { > + if ((n % SEGS_PER_INDIRECT_FRAME) == 0) { > + /* Map indirect segments */ > + if (segments) > + kunmap_atomic(segments); > + segments = > + kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]); > + } > + i = n % SEGS_PER_INDIRECT_FRAME; > + pending_req->grefs[n] = segments[i].gref; > + seg[n].nsec = segments[i].last_sect - > + segments[i].first_sect + 1; > + seg[n].buf = segments[i].first_sect << 9; > + if ((segments[i].last_sect >= (PAGE_SIZE >> 9)) || > + (segments[i].last_sect < > + segments[i].first_sect)) { > + rc = -EINVAL; > + goto unmap; > + } > + preq->nr_sects += seg[n].nsec; > + } > + > +unmap: > + if (segments) > + kunmap_atomic(segments); > + xen_blkbk_unmap(blkif, pending_req->indirect_handles, > + pages, persistent, indirect_grefs); > + return rc; > +} > + > static int dispatch_discard_io(struct xen_blkif *blkif, > struct blkif_request *req) > { > @@ -980,17 +1023,21 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, > struct pending_req *pending_req) > { > struct phys_req preq; > - struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > + struct seg_buf *seg = pending_req->seg; > unsigned int nseg; > struct bio *bio = NULL; > - struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > + struct bio **biolist = pending_req->biolist; > int i, nbio = 0; > int operation; > struct blk_plug plug; > bool drain = false; > struct page **pages = pending_req->pages; > + unsigned short req_operation; > + > + req_operation = req->operation == BLKIF_OP_INDIRECT ? > + req->u.indirect.indirect_op : req->operation; > > - switch (req->operation) { > + switch (req_operation) { > case BLKIF_OP_READ: > blkif->st_rd_req++; > operation = READ; > @@ -1012,33 +1059,49 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, > } > > /* Check that the number of segments is sane. */ > - nseg = req->u.rw.nr_segments; > + nseg = req->operation == BLKIF_OP_INDIRECT ? > + req->u.indirect.nr_segments : req->u.rw.nr_segments; > > if (unlikely(nseg == 0 && operation != WRITE_FLUSH) || > - unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { > + unlikely((req->operation != BLKIF_OP_INDIRECT) && > + (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) || > + unlikely((req->operation == BLKIF_OP_INDIRECT) && > + (nseg > MAX_INDIRECT_SEGMENTS))) { > pr_debug(DRV_PFX "Bad number of segments in request (%d)\n", > nseg); > /* Haven't submitted any bio's yet. */ > goto fail_response; > } > > - preq.sector_number = req->u.rw.sector_number; > preq.nr_sects = 0; > > pending_req->blkif = blkif; > - pending_req->id = req->u.rw.id; > - pending_req->operation = req->operation; > pending_req->status = BLKIF_RSP_OKAY; > pending_req->nr_pages = nseg; > > - for (i = 0; i < nseg; i++) { > - seg[i].nsec = req->u.rw.seg[i].last_sect - > - req->u.rw.seg[i].first_sect + 1; > - if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || > - (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) > + if (req->operation != BLKIF_OP_INDIRECT) { > + preq.dev = req->u.rw.handle; > + preq.sector_number = req->u.rw.sector_number; > + pending_req->id = req->u.rw.id; > + pending_req->operation = req->operation; > + for (i = 0; i < nseg; i++) { > + pending_req->grefs[i] = req->u.rw.seg[i].gref; > + seg[i].nsec = req->u.rw.seg[i].last_sect - > + req->u.rw.seg[i].first_sect + 1; > + seg[i].buf = req->u.rw.seg[i].first_sect << 9; > + if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || > + (req->u.rw.seg[i].last_sect < > + req->u.rw.seg[i].first_sect)) > + goto fail_response; > + preq.nr_sects += seg[i].nsec; > + } > + } else { > + preq.dev = req->u.indirect.handle; > + preq.sector_number = req->u.indirect.sector_number; > + pending_req->id = req->u.indirect.id; > + pending_req->operation = req->u.indirect.indirect_op; > + if (xen_blkbk_parse_indirect(req, pending_req, seg, &preq)) > goto fail_response; > - preq.nr_sects += seg[i].nsec; > - > } > > if (xen_vbd_translate(&preq, blkif, operation) != 0) { > @@ -1074,7 +1137,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, > * the hypercall to unmap the grants - that is all done in > * xen_blkbk_unmap. > */ > - if (xen_blkbk_map_seg(req, pending_req, seg, pages)) > + if (xen_blkbk_map_seg(pending_req, seg, pages)) > goto fail_flush; > > /* > @@ -1146,7 +1209,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, > pending_req->nr_pages); > fail_response: > /* Haven't submitted any bio's yet. */ > - make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR); > + make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR); > free_req(blkif, pending_req); > msleep(1); /* back off a bit */ > return -EIO; > diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h > index 0b0ad3f..d3656d2 100644 > --- a/drivers/block/xen-blkback/common.h > +++ b/drivers/block/xen-blkback/common.h > @@ -50,6 +50,17 @@ > __func__, __LINE__, ##args) > > > +/* > + * This is the maximum number of segments that would be allowed in indirect > + * requests. This value will also be passed to the frontend. > + */ > +#define MAX_INDIRECT_SEGMENTS 256 > + > +#define SEGS_PER_INDIRECT_FRAME \ > +(PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) > +#define MAX_INDIRECT_GREFS \ > +((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) > + > /* Not a real protocol. Used to generate ring structs which contain > * the elements common to all protocols only. This way we get a > * compiler-checkable way to use common struct elements, so we can > @@ -77,11 +88,21 @@ struct blkif_x86_32_request_discard { > uint64_t nr_sectors; > } __attribute__((__packed__)); > > +struct blkif_x86_32_request_indirect { > + uint8_t indirect_op; > + uint16_t nr_segments; > + uint64_t id; > + blkif_vdev_t handle; > + blkif_sector_t sector_number; > + grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_GREFS_PER_REQUEST]; > +} __attribute__((__packed__)); > + > struct blkif_x86_32_request { > uint8_t operation; /* BLKIF_OP_??? */ > union { > struct blkif_x86_32_request_rw rw; > struct blkif_x86_32_request_discard discard; > + struct blkif_x86_32_request_indirect indirect; > } u; > } __attribute__((__packed__)); > > @@ -113,11 +134,22 @@ struct blkif_x86_64_request_discard { > uint64_t nr_sectors; > } __attribute__((__packed__)); > > +struct blkif_x86_64_request_indirect { > + uint8_t indirect_op; > + uint16_t nr_segments; > + uint32_t _pad1; /* offsetof(blkif_..,u.indirect.id)==8 */ > + uint64_t id; > + blkif_vdev_t handle; > + blkif_sector_t sector_number; > + grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_GREFS_PER_REQUEST]; > +} __attribute__((__packed__)); > + > struct blkif_x86_64_request { > uint8_t operation; /* BLKIF_OP_??? */ > union { > struct blkif_x86_64_request_rw rw; > struct blkif_x86_64_request_discard discard; > + struct blkif_x86_64_request_indirect indirect; > } u; > } __attribute__((__packed__)); > > @@ -235,6 +267,11 @@ struct xen_blkif { > wait_queue_head_t waiting_to_free; > }; > > +struct seg_buf { > + unsigned long buf; > + unsigned int nsec; > +}; > + > /* > * Each outstanding request that we've passed to the lower device layers has a > * 'pending_req' allocated to it. Each buffer_head that completes decrements > @@ -249,9 +286,16 @@ struct pending_req { > unsigned short operation; > int status; > struct list_head free_list; > - struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > - struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > - grant_handle_t grant_handles[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > + struct persistent_gnt *persistent_gnts[MAX_INDIRECT_SEGMENTS]; > + struct page *pages[MAX_INDIRECT_SEGMENTS]; > + grant_handle_t grant_handles[MAX_INDIRECT_SEGMENTS]; > + grant_ref_t grefs[MAX_INDIRECT_SEGMENTS]; > + /* Indirect descriptors */ > + struct persistent_gnt *indirect_persistent_gnts[MAX_INDIRECT_GREFS]; > + struct page *indirect_pages[MAX_INDIRECT_GREFS]; > + grant_handle_t indirect_handles[MAX_INDIRECT_GREFS]; > + struct seg_buf seg[MAX_INDIRECT_SEGMENTS]; > + struct bio *biolist[MAX_INDIRECT_SEGMENTS]; > }; > > > @@ -289,7 +333,7 @@ struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); > static inline void blkif_get_x86_32_req(struct blkif_request *dst, > struct blkif_x86_32_request *src) > { > - int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; > + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j = MAX_INDIRECT_GREFS; > dst->operation = src->operation; > switch (src->operation) { > case BLKIF_OP_READ: > @@ -312,6 +356,19 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, > dst->u.discard.sector_number = src->u.discard.sector_number; > dst->u.discard.nr_sectors = src->u.discard.nr_sectors; > break; > + case BLKIF_OP_INDIRECT: > + dst->u.indirect.indirect_op = src->u.indirect.indirect_op; > + dst->u.indirect.nr_segments = src->u.indirect.nr_segments; > + dst->u.indirect.handle = src->u.indirect.handle; > + dst->u.indirect.id = src->u.indirect.id; > + dst->u.indirect.sector_number = src->u.indirect.sector_number; > + barrier(); > + if (j > dst->u.indirect.nr_segments) > + j = dst->u.indirect.nr_segments; > + for (i = 0; i < j; i++) > + dst->u.indirect.indirect_grefs[i] = > + src->u.indirect.indirect_grefs[i]; > + break; > default: > break; > } > @@ -320,7 +377,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, > static inline void blkif_get_x86_64_req(struct blkif_request *dst, > struct blkif_x86_64_request *src) > { > - int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; > + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j = MAX_INDIRECT_GREFS; > dst->operation = src->operation; > switch (src->operation) { > case BLKIF_OP_READ: > @@ -343,6 +400,19 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, > dst->u.discard.sector_number = src->u.discard.sector_number; > dst->u.discard.nr_sectors = src->u.discard.nr_sectors; > break; > + case BLKIF_OP_INDIRECT: > + dst->u.indirect.indirect_op = src->u.indirect.indirect_op; > + dst->u.indirect.nr_segments = src->u.indirect.nr_segments; > + dst->u.indirect.handle = src->u.indirect.handle; > + dst->u.indirect.id = src->u.indirect.id; > + dst->u.indirect.sector_number = src->u.indirect.sector_number; > + barrier(); > + if (j > dst->u.indirect.nr_segments) > + j = dst->u.indirect.nr_segments; > + for (i = 0; i < j; i++) > + dst->u.indirect.indirect_grefs[i] = > + src->u.indirect.indirect_grefs[i]; > + break; > default: > break; > } > diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c > index 8f929cb..9e16abb 100644 > --- a/drivers/block/xen-blkback/xenbus.c > +++ b/drivers/block/xen-blkback/xenbus.c > @@ -700,6 +700,14 @@ again: > goto abort; > } > > + err = xenbus_printf(xbt, dev->nodename, "max-indirect-segments", "%u", > + MAX_INDIRECT_SEGMENTS); > + if (err) { > + xenbus_dev_fatal(dev, err, "writing %s/max-indirect-segments", > + dev->nodename); > + goto abort; > + } > + > err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", > (unsigned long long)vbd_sz(&be->blkif->vbd)); > if (err) { > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > index 4d81fcc..074d302 100644 > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -74,12 +74,30 @@ struct grant { > struct blk_shadow { > struct blkif_request req; > struct request *request; > - struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > + struct grant **grants_used; > + struct grant **indirect_grants; > +}; > + > +struct split_bio { > + struct bio *bio; > + atomic_t pending; > + int err; > }; > > static DEFINE_MUTEX(blkfront_mutex); > static const struct block_device_operations xlvbd_block_fops; > > +/* > + * Maximum number of segments in indirect requests, the actual value used by > + * the frontend driver is the minimum of this value and the value provided > + * by the backend driver. > + */ > + > +static int xen_blkif_max_segments = 64; > +module_param_named(max_segments, xen_blkif_max_segments, int, 0); > +MODULE_PARM_DESC(max_segments, > +"Maximum number of segments in indirect requests"); > + > #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) > > /* > @@ -98,7 +116,7 @@ struct blkfront_info > enum blkif_state connected; > int ring_ref; > struct blkif_front_ring ring; > - struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > + struct scatterlist *sg; > unsigned int evtchn, irq; > struct request_queue *rq; > struct work_struct work; > @@ -114,6 +132,8 @@ struct blkfront_info > unsigned int discard_granularity; > unsigned int discard_alignment; > unsigned int feature_persistent:1; > + unsigned int max_indirect_segments; > + unsigned int sector_size; > int is_ready; > }; > > @@ -142,6 +162,14 @@ static DEFINE_SPINLOCK(minor_lock); > > #define DEV_NAME "xvd" /* name in /dev */ > > +#define SEGS_PER_INDIRECT_FRAME \ > + (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) > +#define INDIRECT_GREFS(_segs) \ > + ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) > +#define MIN(_a, _b) ((_a) < (_b) ? (_a) : (_b)) > + > +static int blkfront_setup_indirect(struct blkfront_info *info); > + > static int get_id_from_freelist(struct blkfront_info *info) > { > unsigned long free = info->shadow_free; > @@ -358,7 +386,8 @@ static int blkif_queue_request(struct request *req) > struct blkif_request *ring_req; > unsigned long id; > unsigned int fsect, lsect; > - int i, ref; > + int i, ref, n; > + struct blkif_request_segment_aligned *segments = NULL; > > /* > * Used to store if we are able to queue the request by just using > @@ -369,21 +398,27 @@ static int blkif_queue_request(struct request *req) > grant_ref_t gref_head; > struct grant *gnt_list_entry = NULL; > struct scatterlist *sg; > + int nseg, max_grefs; > > if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) > return 1; > > - /* Check if we have enought grants to allocate a requests */ > - if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) { > + max_grefs = info->max_indirect_segments ? > + info->max_indirect_segments + > + INDIRECT_GREFS(info->max_indirect_segments) : > + BLKIF_MAX_SEGMENTS_PER_REQUEST; > + > + /* Check if we have enough grants to allocate a requests */ > + if (info->persistent_gnts_c < max_grefs) { > new_persistent_gnts = 1; > if (gnttab_alloc_grant_references( > - BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c, > + max_grefs - info->persistent_gnts_c, > &gref_head) < 0) { > gnttab_request_free_callback( > &info->callback, > blkif_restart_queue_callback, > info, > - BLKIF_MAX_SEGMENTS_PER_REQUEST); > + max_grefs); > return 1; > } > } else > @@ -394,42 +429,82 @@ static int blkif_queue_request(struct request *req) > id = get_id_from_freelist(info); > info->shadow[id].request = req; > > - ring_req->u.rw.id = id; > - ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); > - ring_req->u.rw.handle = info->handle; > - > - ring_req->operation = rq_data_dir(req) ? > - BLKIF_OP_WRITE : BLKIF_OP_READ; > - > - if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { > - /* > - * Ideally we can do an unordered flush-to-disk. In case the > - * backend onlysupports barriers, use that. A barrier request > - * a superset of FUA, so we can implement it the same > - * way. (It's also a FLUSH+FUA, since it is > - * guaranteed ordered WRT previous writes.) > - */ > - ring_req->operation = info->flush_op; > - } > - > if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) { > /* id, sector_number and handle are set above. */ > ring_req->operation = BLKIF_OP_DISCARD; > ring_req->u.discard.nr_sectors = blk_rq_sectors(req); > + ring_req->u.discard.id = id; > + ring_req->u.discard.sector_number = > + (blkif_sector_t)blk_rq_pos(req); > if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard) > ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; > else > ring_req->u.discard.flag = 0; > } else { > - ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req, > - info->sg); > - BUG_ON(ring_req->u.rw.nr_segments > > - BLKIF_MAX_SEGMENTS_PER_REQUEST); > - > - for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) { > + BUG_ON(info->max_indirect_segments == 0 && > + req->nr_phys_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); > + BUG_ON(info->max_indirect_segments && > + req->nr_phys_segments > info->max_indirect_segments); > + nseg = blk_rq_map_sg(req->q, req, info->sg); > + if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) { > + /* Indirect OP */ > + ring_req->operation = BLKIF_OP_INDIRECT; > + ring_req->u.indirect.indirect_op = rq_data_dir(req) ? > + BLKIF_OP_WRITE : BLKIF_OP_READ; > + ring_req->u.indirect.id = id; > + ring_req->u.indirect.sector_number = > + (blkif_sector_t)blk_rq_pos(req); > + ring_req->u.indirect.handle = info->handle; > + if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { > + /* > + * Ideally we can do an unordered flush-to-disk. In case the > + * backend onlysupports barriers, use that. A barrier request > + * a superset of FUA, so we can implement it the same > + * way. (It's also a FLUSH+FUA, since it is > + * guaranteed ordered WRT previous writes.) > + */ > + ring_req->u.indirect.indirect_op = > + info->flush_op; > + } > + ring_req->u.indirect.nr_segments = nseg; > + } else { > + ring_req->u.rw.id = id; > + ring_req->u.rw.sector_number = > + (blkif_sector_t)blk_rq_pos(req); > + ring_req->u.rw.handle = info->handle; > + ring_req->operation = rq_data_dir(req) ? > + BLKIF_OP_WRITE : BLKIF_OP_READ; > + if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { > + /* > + * Ideally we can do an unordered flush-to-disk. In case the > + * backend onlysupports barriers, use that. A barrier request > + * a superset of FUA, so we can implement it the same > + * way. (It's also a FLUSH+FUA, since it is > + * guaranteed ordered WRT previous writes.) > + */ > + ring_req->operation = info->flush_op; > + } > + ring_req->u.rw.nr_segments = nseg; > + } > + for_each_sg(info->sg, sg, nseg, i) { > fsect = sg->offset >> 9; > lsect = fsect + (sg->length >> 9) - 1; > > + if ((ring_req->operation == BLKIF_OP_INDIRECT) && > + (i % SEGS_PER_INDIRECT_FRAME == 0)) { > + if (segments) > + kunmap_atomic(segments); > + > + n = i / SEGS_PER_INDIRECT_FRAME; > + gnt_list_entry = get_grant(&gref_head, info); > + info->shadow[id].indirect_grants[n] = > + gnt_list_entry; > + segments = kmap_atomic( > + pfn_to_page(gnt_list_entry->pfn)); > + ring_req->u.indirect.indirect_grefs[n] = > + gnt_list_entry->gref; > + } > + > gnt_list_entry = get_grant(&gref_head, info); > ref = gnt_list_entry->gref; > > @@ -461,13 +536,23 @@ static int blkif_queue_request(struct request *req) > kunmap_atomic(bvec_data); > kunmap_atomic(shared_data); > } > - > - ring_req->u.rw.seg[i] = > - (struct blkif_request_segment) { > - .gref = ref, > - .first_sect = fsect, > - .last_sect = lsect }; > + if (ring_req->operation != BLKIF_OP_INDIRECT) { > + ring_req->u.rw.seg[i] = > + (struct blkif_request_segment) { > + .gref = ref, > + .first_sect = fsect, > + .last_sect = lsect }; > + } else { > + n = i % SEGS_PER_INDIRECT_FRAME; > + segments[n] = > + (struct blkif_request_segment_aligned) { > + .gref = ref, > + .first_sect = fsect, > + .last_sect = lsect }; > + } > } > + if (segments) > + kunmap_atomic(segments); > } > > info->ring.req_prod_pvt++; > @@ -542,7 +627,8 @@ wait: > flush_requests(info); > } > > -static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) > +static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, > + unsigned int segments) > { > struct request_queue *rq; > struct blkfront_info *info = gd->private_data; > @@ -571,7 +657,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) > blk_queue_max_segment_size(rq, PAGE_SIZE); > > /* Ensure a merged request will fit in a single I/O ring slot. */ > - blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); > + blk_queue_max_segments(rq, segments); > > /* Make sure buffer addresses are sector-aligned. */ > blk_queue_dma_alignment(rq, 511); > @@ -588,13 +674,14 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) > static void xlvbd_flush(struct blkfront_info *info) > { > blk_queue_flush(info->rq, info->feature_flush); > - printk(KERN_INFO "blkfront: %s: %s: %s %s\n", > + printk(KERN_INFO "blkfront: %s: %s: %s %s %s\n", > info->gd->disk_name, > info->flush_op == BLKIF_OP_WRITE_BARRIER ? > "barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ? > "flush diskcache" : "barrier or flush"), > info->feature_flush ? "enabled" : "disabled", > - info->feature_persistent ? "using persistent grants" : ""); > + info->feature_persistent ? "using persistent grants" : "", > + info->max_indirect_segments ? "using indirect descriptors" : ""); > } > > static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) > @@ -734,7 +821,9 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, > gd->driverfs_dev = &(info->xbdev->dev); > set_capacity(gd, capacity); > > - if (xlvbd_init_blk_queue(gd, sector_size)) { > + if (xlvbd_init_blk_queue(gd, sector_size, > + info->max_indirect_segments ? : > + BLKIF_MAX_SEGMENTS_PER_REQUEST)) { > del_gendisk(gd); > goto release; > } > @@ -818,6 +907,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) > { > struct grant *persistent_gnt; > struct grant *n; > + int i, j, segs; > > /* Prevent new requests being issued until we fix things up. */ > spin_lock_irq(&info->io_lock); > @@ -843,6 +933,47 @@ static void blkif_free(struct blkfront_info *info, int suspend) > } > BUG_ON(info->persistent_gnts_c != 0); > > + kfree(info->sg); > + info->sg = NULL; > + for (i = 0; i < BLK_RING_SIZE; i++) { > + /* > + * Clear persistent grants present in requests already > + * on the shared ring > + */ > + if (!info->shadow[i].request) > + goto free_shadow; > + > + segs = info->shadow[i].req.operation == BLKIF_OP_INDIRECT ? > + info->shadow[i].req.u.indirect.nr_segments : > + info->shadow[i].req.u.rw.nr_segments; > + for (j = 0; j < segs; j++) { > + persistent_gnt = info->shadow[i].grants_used[j]; > + gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); > + __free_page(pfn_to_page(persistent_gnt->pfn)); > + kfree(persistent_gnt); > + } > + > + if (info->shadow[i].req.operation != BLKIF_OP_INDIRECT) > + /* > + * If this is not an indirect operation don't try to > + * free indirect segments > + */ > + goto free_shadow; > + > + for (j = 0; j < INDIRECT_GREFS(segs); j++) { > + persistent_gnt = info->shadow[i].indirect_grants[j]; > + gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); > + __free_page(pfn_to_page(persistent_gnt->pfn)); > + kfree(persistent_gnt); > + } > + > +free_shadow: > + kfree(info->shadow[i].grants_used); > + info->shadow[i].grants_used = NULL; > + kfree(info->shadow[i].indirect_grants); > + info->shadow[i].indirect_grants = NULL; > + } > + > /* No more gnttab callback work. */ > gnttab_cancel_free_callback(&info->callback); > spin_unlock_irq(&info->io_lock); > @@ -873,6 +1004,10 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, > char *bvec_data; > void *shared_data; > unsigned int offset = 0; > + int nseg; > + > + nseg = s->req.operation == BLKIF_OP_INDIRECT ? > + s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments; > > if (bret->operation == BLKIF_OP_READ) { > /* > @@ -885,7 +1020,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, > BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE); > if (bvec->bv_offset < offset) > i++; > - BUG_ON(i >= s->req.u.rw.nr_segments); > + BUG_ON(i >= nseg); > shared_data = kmap_atomic( > pfn_to_page(s->grants_used[i]->pfn)); > bvec_data = bvec_kmap_irq(bvec, &flags); > @@ -897,10 +1032,17 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, > } > } > /* Add the persistent grant into the list of free grants */ > - for (i = 0; i < s->req.u.rw.nr_segments; i++) { > + for (i = 0; i < nseg; i++) { > list_add(&s->grants_used[i]->node, &info->persistent_gnts); > info->persistent_gnts_c++; > } > + if (s->req.operation == BLKIF_OP_INDIRECT) { > + for (i = 0; i < INDIRECT_GREFS(nseg); i++) { > + list_add(&s->indirect_grants[i]->node, > + &info->persistent_gnts); > + info->persistent_gnts_c++; > + } > + } > } > > static irqreturn_t blkif_interrupt(int irq, void *dev_id) > @@ -1034,8 +1176,6 @@ static int setup_blkring(struct xenbus_device *dev, > SHARED_RING_INIT(sring); > FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); > > - sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); > - > err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); > if (err < 0) { > free_page((unsigned long)sring); > @@ -1116,12 +1256,6 @@ again: > goto destroy_blkring; > } > > - /* Allocate memory for grants */ > - err = fill_grant_buffer(info, BLK_RING_SIZE * > - BLKIF_MAX_SEGMENTS_PER_REQUEST); > - if (err) > - goto out; > - > xenbus_switch_state(dev, XenbusStateInitialised); > > return 0; > @@ -1223,13 +1357,84 @@ static int blkfront_probe(struct xenbus_device *dev, > return 0; > } > > +/* > + * This is a clone of md_trim_bio, used to split a bio into smaller ones > + */ > +static void trim_bio(struct bio *bio, int offset, int size) > +{ > + /* 'bio' is a cloned bio which we need to trim to match > + * the given offset and size. > + * This requires adjusting bi_sector, bi_size, and bi_io_vec > + */ > + int i; > + struct bio_vec *bvec; > + int sofar = 0; > + > + size <<= 9; > + if (offset == 0 && size == bio->bi_size) > + return; > + > + bio->bi_sector += offset; > + bio->bi_size = size; > + offset <<= 9; > + clear_bit(BIO_SEG_VALID, &bio->bi_flags); > + > + while (bio->bi_idx < bio->bi_vcnt && > + bio->bi_io_vec[bio->bi_idx].bv_len <= offset) { > + /* remove this whole bio_vec */ > + offset -= bio->bi_io_vec[bio->bi_idx].bv_len; > + bio->bi_idx++; > + } > + if (bio->bi_idx < bio->bi_vcnt) { > + bio->bi_io_vec[bio->bi_idx].bv_offset += offset; > + bio->bi_io_vec[bio->bi_idx].bv_len -= offset; > + } > + /* avoid any complications with bi_idx being non-zero*/ > + if (bio->bi_idx) { > + memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx, > + (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec)); > + bio->bi_vcnt -= bio->bi_idx; > + bio->bi_idx = 0; > + } > + /* Make sure vcnt and last bv are not too big */ > + bio_for_each_segment(bvec, bio, i) { > + if (sofar + bvec->bv_len > size) > + bvec->bv_len = size - sofar; > + if (bvec->bv_len == 0) { > + bio->bi_vcnt = i; > + break; > + } > + sofar += bvec->bv_len; > + } > +} > + > +static void split_bio_end(struct bio *bio, int error) > +{ > + struct split_bio *split_bio = bio->bi_private; > + > + if (error) > + split_bio->err = error; > + > + if (atomic_dec_and_test(&split_bio->pending)) { > + split_bio->bio->bi_phys_segments = 0; > + bio_endio(split_bio->bio, split_bio->err); > + kfree(split_bio); > + } > + bio_put(bio); > +} > > static int blkif_recover(struct blkfront_info *info) > { > int i; > - struct blkif_request *req; > + struct request *req, *n; > struct blk_shadow *copy; > - int j; > + int rc; > + struct bio *bio, *cloned_bio; > + struct bio_list bio_list, merge_bio; > + unsigned int segs; > + int pending, offset, size; > + struct split_bio *split_bio; > + struct list_head requests; > > /* Stage 1: Make a safe copy of the shadow state. */ > copy = kmalloc(sizeof(info->shadow), > @@ -1245,36 +1450,64 @@ static int blkif_recover(struct blkfront_info *info) > info->shadow_free = info->ring.req_prod_pvt; > info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; > > - /* Stage 3: Find pending requests and requeue them. */ > + rc = blkfront_setup_indirect(info); > + if (rc) { > + kfree(copy); > + return rc; > + } > + > + segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; > + blk_queue_max_segments(info->rq, segs); > + bio_list_init(&bio_list); > + INIT_LIST_HEAD(&requests); > for (i = 0; i < BLK_RING_SIZE; i++) { > /* Not in use? */ > if (!copy[i].request) > continue; > > - /* Grab a request slot and copy shadow state into it. */ > - req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); > - *req = copy[i].req; > - > - /* We get a new request id, and must reset the shadow state. */ > - req->u.rw.id = get_id_from_freelist(info); > - memcpy(&info->shadow[req->u.rw.id], ©[i], sizeof(copy[i])); > - > - if (req->operation != BLKIF_OP_DISCARD) { > - /* Rewrite any grant references invalidated by susp/resume. */ > - for (j = 0; j < req->u.rw.nr_segments; j++) > - gnttab_grant_foreign_access_ref( > - req->u.rw.seg[j].gref, > - info->xbdev->otherend_id, > - pfn_to_mfn(copy[i].grants_used[j]->pfn), > - 0); > + /* > + * Get the bios in the request so we can re-queue them. > + */ > + if (copy[i].request->cmd_flags & > + (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { > + /* > + * Flush operations don't contain bios, so > + * we need to requeue the whole request > + */ > + list_add(©[i].request->queuelist, &requests); > + continue; > } > - info->shadow[req->u.rw.id].req = *req; > - > - info->ring.req_prod_pvt++; > + merge_bio.head = copy[i].request->bio; > + merge_bio.tail = copy[i].request->biotail; > + bio_list_merge(&bio_list, &merge_bio); > + copy[i].request->bio = NULL; > + blk_put_request(copy[i].request); > } > > kfree(copy); > > + /* > + * Empty the queue, this is important because we might have > + * requests in the queue with more segments than what we > + * can handle now. > + */ > + spin_lock_irq(&info->io_lock); > + while ((req = blk_fetch_request(info->rq)) != NULL) { > + if (req->cmd_flags & > + (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) { > + list_add(&req->queuelist, &requests); > + continue; > + } > + merge_bio.head = req->bio; > + merge_bio.tail = req->biotail; > + bio_list_merge(&bio_list, &merge_bio); > + req->bio = NULL; > + if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) > + pr_alert("diskcache flush request found!\n"); > + __blk_put_request(info->rq, req); > + } > + spin_unlock_irq(&info->io_lock); > + > xenbus_switch_state(info->xbdev, XenbusStateConnected); > > spin_lock_irq(&info->io_lock); > @@ -1282,14 +1515,50 @@ static int blkif_recover(struct blkfront_info *info) > /* Now safe for us to use the shared ring */ > info->connected = BLKIF_STATE_CONNECTED; > > - /* Send off requeued requests */ > - flush_requests(info); > - > /* Kick any other new requests queued since we resumed */ > kick_pending_request_queues(info); > > + list_for_each_entry_safe(req, n, &requests, queuelist) { > + /* Requeue pending requests (flush or discard) */ > + list_del_init(&req->queuelist); > + BUG_ON(req->nr_phys_segments > segs); > + blk_requeue_request(info->rq, req); > + } > spin_unlock_irq(&info->io_lock); > > + while ((bio = bio_list_pop(&bio_list)) != NULL) { > + /* Traverse the list of pending bios and re-queue them */ > + if (bio_segments(bio) > segs) { > + /* > + * This bio has more segments than what we can > + * handle, we have to split it. > + */ > + pending = (bio_segments(bio) + segs - 1) / segs; > + split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO); > + BUG_ON(split_bio == NULL); > + atomic_set(&split_bio->pending, pending); > + split_bio->bio = bio; > + for (i = 0; i < pending; i++) { > + offset = (i * segs * PAGE_SIZE) >> 9; > + size = MIN((segs * PAGE_SIZE) >> 9, > + (bio->bi_size >> 9) - offset); > + cloned_bio = bio_clone(bio, GFP_NOIO); > + BUG_ON(cloned_bio == NULL); > + trim_bio(cloned_bio, offset, size); > + cloned_bio->bi_private = split_bio; > + cloned_bio->bi_end_io = split_bio_end; > + submit_bio(cloned_bio->bi_rw, cloned_bio); > + } > + /* > + * Now we have to wait for all those smaller bios to > + * end, so we can also end the "parent" bio. > + */ > + continue; > + } > + /* We don't need to split this bio */ > + submit_bio(bio->bi_rw, bio); > + } > + > return 0; > } > > @@ -1309,8 +1578,12 @@ static int blkfront_resume(struct xenbus_device *dev) > blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); > > err = talk_to_blkback(dev, info); > - if (info->connected == BLKIF_STATE_SUSPENDED && !err) > - err = blkif_recover(info); > + > + /* > + * We have to wait for the backend to switch to > + * connected state, since we want to read which > + * features it supports. > + */ > > return err; > } > @@ -1388,6 +1661,62 @@ static void blkfront_setup_discard(struct blkfront_info *info) > kfree(type); > } > > +static int blkfront_setup_indirect(struct blkfront_info *info) > +{ > + unsigned int indirect_segments, segs; > + int err, i; > + > + err = xenbus_gather(XBT_NIL, info->xbdev->otherend, > + "max-indirect-segments", "%u", &indirect_segments, > + NULL); > + if (err) { > + info->max_indirect_segments = 0; > + segs = BLKIF_MAX_SEGMENTS_PER_REQUEST; > + } else { > + info->max_indirect_segments = MIN(indirect_segments, > + xen_blkif_max_segments); > + segs = info->max_indirect_segments; > + } > + info->sg = kzalloc(sizeof(info->sg[0]) * segs, GFP_KERNEL); > + if (info->sg == NULL) > + goto out_of_memory; > + sg_init_table(info->sg, segs); > + > + err = fill_grant_buffer(info, > + (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE); > + if (err) > + goto out_of_memory; > + > + for (i = 0; i < BLK_RING_SIZE; i++) { > + info->shadow[i].grants_used = kzalloc( > + sizeof(info->shadow[i].grants_used[0]) * segs, > + GFP_NOIO); > + if (info->max_indirect_segments) > + info->shadow[i].indirect_grants = kzalloc( > + sizeof(info->shadow[i].indirect_grants[0]) * > + INDIRECT_GREFS(segs), > + GFP_NOIO); > + if ((info->shadow[i].grants_used == NULL) || > + (info->max_indirect_segments && > + (info->shadow[i].indirect_grants == NULL))) > + goto out_of_memory; > + } > + > + > + return 0; > + > +out_of_memory: > + kfree(info->sg); > + info->sg = NULL; > + for (i = 0; i < BLK_RING_SIZE; i++) { > + kfree(info->shadow[i].grants_used); > + info->shadow[i].grants_used = NULL; > + kfree(info->shadow[i].indirect_grants); > + info->shadow[i].indirect_grants = NULL; > + } > + return -ENOMEM; > +} > + > /* > * Invoked when the backend is finally 'ready' (and has told produced > * the details about the physical device - #sectors, size, etc). > @@ -1415,8 +1744,9 @@ static void blkfront_connect(struct blkfront_info *info) > set_capacity(info->gd, sectors); > revalidate_disk(info->gd); > > - /* fall through */ > + return; > case BLKIF_STATE_SUSPENDED: > + blkif_recover(info); > return; > > default: > @@ -1437,6 +1767,7 @@ static void blkfront_connect(struct blkfront_info *info) > info->xbdev->otherend); > return; > } > + info->sector_size = sector_size; > > info->feature_flush = 0; > info->flush_op = 0; > @@ -1484,6 +1815,13 @@ static void blkfront_connect(struct blkfront_info *info) > else > info->feature_persistent = persistent; > > + err = blkfront_setup_indirect(info); > + if (err) { > + xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s", > + info->xbdev->otherend); > + return; > + } > + > err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); > if (err) { > xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", > diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h > index 01c3d62..6d99849 100644 > --- a/include/xen/interface/io/blkif.h > +++ b/include/xen/interface/io/blkif.h > @@ -102,6 +102,8 @@ typedef uint64_t blkif_sector_t; > */ > #define BLKIF_OP_DISCARD 5 > > +#define BLKIF_OP_INDIRECT 6 > + > /* > * Maximum scatter/gather segments per request. > * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. > @@ -109,6 +111,16 @@ typedef uint64_t blkif_sector_t; > */ > #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 > > +#define BLKIF_MAX_INDIRECT_GREFS_PER_REQUEST 8 > + > +struct blkif_request_segment_aligned { > + grant_ref_t gref; /* reference to I/O buffer frame */ > + /* @first_sect: first sector in frame to transfer (inclusive). */ > + /* @last_sect: last sector in frame to transfer (inclusive). */ > + uint8_t first_sect, last_sect; > + uint16_t _pad; /* padding to make it 8 bytes, so it's cache-aligned */ > +} __attribute__((__packed__)); > + > struct blkif_request_rw { > uint8_t nr_segments; /* number of segments */ > blkif_vdev_t handle; /* only for read/write requests */ > @@ -138,11 +150,24 @@ struct blkif_request_discard { > uint8_t _pad3; > } __attribute__((__packed__)); > > +struct blkif_request_indirect { > + uint8_t indirect_op; > + uint16_t nr_segments; > +#ifdef CONFIG_X86_64 > + uint32_t _pad1; /* offsetof(blkif_...,u.indirect.id) == 8 */ > +#endif > + uint64_t id; > + blkif_vdev_t handle; > + blkif_sector_t sector_number; > + grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_GREFS_PER_REQUEST]; > +} __attribute__((__packed__)); > + > struct blkif_request { > uint8_t operation; /* BLKIF_OP_??? */ > union { > struct blkif_request_rw rw; > struct blkif_request_discard discard; > + struct blkif_request_indirect indirect; > } u; > } __attribute__((__packed__)); > > -- > 1.7.7.5 (Apple Git-26) > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/