Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1031919AbbKEDZA (ORCPT ); Wed, 4 Nov 2015 22:25:00 -0500 Received: from aserp1040.oracle.com ([141.146.126.69]:37682 "EHLO aserp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1031632AbbKEDY6 convert rfc822-to-8bit (ORCPT ); Wed, 4 Nov 2015 22:24:58 -0500 User-Agent: K-9 Mail for Android In-Reply-To: <563AC6C2.9080909@oracle.com> References: <1446438106-20171-1-git-send-email-bob.liu@oracle.com> <1446438106-20171-8-git-send-email-bob.liu@oracle.com> <20151105023004.GA3949@x230.dumpdata.com> <563AC6C2.9080909@oracle.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8BIT Subject: Re: [PATCH v4 07/10] xen/blkback: pseudo support for multi hardware queues/rings From: Konrad Rzeszutek Wilk Date: Wed, 04 Nov 2015 22:24:41 -0500 To: Bob Liu CC: xen-devel@lists.xen.org, linux-kernel@vger.kernel.org, roger.pau@citrix.com, felipe.franciosi@citrix.com, axboe@fb.com, avanzini.arianna@gmail.com, rafal.mielniczuk@citrix.com, jonathan.davies@citrix.com, david.vrabel@citrix.com Message-ID: X-Source-IP: userv0022.oracle.com [156.151.31.74] Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 11316 Lines: 342 On November 4, 2015 10:02:26 PM EST, Bob Liu wrote: > >On 11/05/2015 10:30 AM, Konrad Rzeszutek Wilk wrote: >> On Mon, Nov 02, 2015 at 12:21:43PM +0800, Bob Liu wrote: >>> Preparatory patch for multiple hardware queues (rings). The number >of >>> rings is unconditionally set to 1, larger number will be enabled in >next >>> patch so as to make every single patch small and readable. >> >> Instead of saying 'next patch' - spell out the title of the patch. >> >> >>> >>> Signed-off-by: Arianna Avanzini >>> Signed-off-by: Bob Liu >>> --- >>> drivers/block/xen-blkback/common.h | 3 +- >>> drivers/block/xen-blkback/xenbus.c | 292 >+++++++++++++++++++++++-------------- >>> 2 files changed, 185 insertions(+), 110 deletions(-) >>> >>> diff --git a/drivers/block/xen-blkback/common.h >b/drivers/block/xen-blkback/common.h >>> index f0dd69a..4de1326 100644 >>> --- a/drivers/block/xen-blkback/common.h >>> +++ b/drivers/block/xen-blkback/common.h >>> @@ -341,7 +341,8 @@ struct xen_blkif { >>> struct work_struct free_work; >>> unsigned int nr_ring_pages; >>> /* All rings for this device */ >>> - struct xen_blkif_ring ring; >>> + struct xen_blkif_ring *rings; >>> + unsigned int nr_rings; >>> }; >>> >>> struct seg_buf { >>> diff --git a/drivers/block/xen-blkback/xenbus.c >b/drivers/block/xen-blkback/xenbus.c >>> index 7bdd5fd..ac4b458 100644 >>> --- a/drivers/block/xen-blkback/xenbus.c >>> +++ b/drivers/block/xen-blkback/xenbus.c >>> @@ -84,11 +84,12 @@ static int blkback_name(struct xen_blkif *blkif, >char *buf) >>> >>> static void xen_update_blkif_status(struct xen_blkif *blkif) >>> { >>> - int err; >>> + int err, i; >> >> unsigned int. >>> char name[BLKBACK_NAME_LEN]; >>> + struct xen_blkif_ring *ring; >>> >>> /* Not ready to connect? */ >>> - if (!blkif->ring.irq || !blkif->vbd.bdev) >>> + if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev) >>> return; >>> >>> /* Already connected? */ >>> @@ -113,19 +114,57 @@ static void xen_update_blkif_status(struct >xen_blkif *blkif) >>> } >>> invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); >>> >>> - blkif->ring.xenblkd = kthread_run(xen_blkif_schedule, >&blkif->ring, "%s", name); >>> - if (IS_ERR(blkif->ring.xenblkd)) { >>> - err = PTR_ERR(blkif->ring.xenblkd); >>> - blkif->ring.xenblkd = NULL; >>> - xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); >>> - return; >>> + if (blkif->nr_rings == 1) { >>> + blkif->rings[0].xenblkd = kthread_run(xen_blkif_schedule, >&blkif->rings[0], "%s", name); >>> + if (IS_ERR(blkif->rings[0].xenblkd)) { >>> + err = PTR_ERR(blkif->rings[0].xenblkd); >>> + blkif->rings[0].xenblkd = NULL; >>> + xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); >>> + return; >>> + } >>> + } else { >>> + for (i = 0; i < blkif->nr_rings; i++) { >>> + ring = &blkif->rings[i]; >>> + ring->xenblkd = kthread_run(xen_blkif_schedule, ring, "%s-%d", >name, i); >>> + if (IS_ERR(ring->xenblkd)) { >>> + err = PTR_ERR(ring->xenblkd); >>> + ring->xenblkd = NULL; >>> + xenbus_dev_error(blkif->be->dev, err, >>> + "start %s-%d xenblkd", name, i); >>> + return; >>> + } >>> + } >> >> Please collapse this together and just have one loop. >> >> And just use the same name throughout ('%s-%d') >> >> Also this does not take care of actually freeing the allocated >> ring->xenblkd if one of them fails to start. >> >> Hmm, looking at this function .. we seem to forget to change the >> state if something fails. >> >> As in, connect switches the state to Connected.. And if anything >blows >> up after the connect() we don't switch the state. We do report an >error >> in the XenBus, but the state is the same. >> >> We should be using xenbus_dev_fatal I believe - so at least the state >> changes to Closing (and then the frontend can switch itself to >> Closing->Closed) - and we will call 'xen_blkif_disconnect' on Closed. > >> > >Will update.. > >>> + } >>> +} >>> + >>> +static int xen_blkif_alloc_rings(struct xen_blkif *blkif) >>> +{ >>> + int r; >> >> unsigned int i; >> >>> + >>> + blkif->rings = kzalloc(blkif->nr_rings * sizeof(struct >xen_blkif_ring), GFP_KERNEL); >>> + if (!blkif->rings) >>> + return -ENOMEM; >>> + >>> + for (r = 0; r < blkif->nr_rings; r++) { >>> + struct xen_blkif_ring *ring = &blkif->rings[r]; >>> + >>> + spin_lock_init(&ring->blk_ring_lock); >>> + init_waitqueue_head(&ring->wq); >>> + INIT_LIST_HEAD(&ring->pending_free); >>> + >>> + spin_lock_init(&ring->pending_free_lock); >>> + init_waitqueue_head(&ring->pending_free_wq); >>> + init_waitqueue_head(&ring->shutdown_wq); >>> + ring->blkif = blkif; >>> + xen_blkif_get(blkif); >>> } >>> + >>> + return 0; >>> } >>> >>> static struct xen_blkif *xen_blkif_alloc(domid_t domid) >>> { >>> struct xen_blkif *blkif; >>> - struct xen_blkif_ring *ring; >>> >>> BUILD_BUG_ON(MAX_INDIRECT_PAGES > >BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST); >>> >>> @@ -136,27 +175,17 @@ static struct xen_blkif >*xen_blkif_alloc(domid_t domid) >>> blkif->domid = domid; >>> atomic_set(&blkif->refcnt, 1); >>> init_completion(&blkif->drain_complete); >>> - atomic_set(&blkif->drain, 0); >>> INIT_WORK(&blkif->free_work, xen_blkif_deferred_free); >>> spin_lock_init(&blkif->free_pages_lock); >>> INIT_LIST_HEAD(&blkif->free_pages); >>> - blkif->free_pages_num = 0; >>> - blkif->persistent_gnts.rb_node = NULL; >>> INIT_LIST_HEAD(&blkif->persistent_purge_list); >>> - atomic_set(&blkif->persistent_gnt_in_use, 0); >>> INIT_WORK(&blkif->persistent_purge_work, >xen_blkbk_unmap_purged_grants); >>> >>> - ring = &blkif->ring; >>> - ring->blkif = blkif; >>> - spin_lock_init(&ring->blk_ring_lock); >>> - init_waitqueue_head(&ring->wq); >>> - ring->st_print = jiffies; >>> - atomic_set(&ring->inflight, 0); >>> - >>> - INIT_LIST_HEAD(&ring->pending_free); >>> - spin_lock_init(&ring->pending_free_lock); >>> - init_waitqueue_head(&ring->pending_free_wq); >>> - init_waitqueue_head(&ring->shutdown_wq); >>> + blkif->nr_rings = 1; >>> + if (xen_blkif_alloc_rings(blkif)) { >>> + kmem_cache_free(xen_blkif_cachep, blkif); >>> + return ERR_PTR(-ENOMEM); >>> + } >>> >>> return blkif; >>> } >>> @@ -218,50 +247,54 @@ static int xen_blkif_map(struct xen_blkif_ring >*ring, grant_ref_t *gref, >>> static int xen_blkif_disconnect(struct xen_blkif *blkif) >>> { >>> struct pending_req *req, *n; >>> - int i = 0, j; >>> - struct xen_blkif_ring *ring = &blkif->ring; >>> + int j, r; >>> >> >> unsigned int i; >>> - if (ring->xenblkd) { >>> - kthread_stop(ring->xenblkd); >>> - wake_up(&ring->shutdown_wq); >>> - ring->xenblkd = NULL; >>> - } >>> + for (r = 0; r < blkif->nr_rings; r++) { >>> + struct xen_blkif_ring *ring = &blkif->rings[r]; >>> + int i = 0; >> >> unsigned int >>> >>> - /* The above kthread_stop() guarantees that at this point we >>> - * don't have any discard_io or other_io requests. So, checking >>> - * for inflight IO is enough. >>> - */ >>> - if (atomic_read(&ring->inflight) > 0) >>> - return -EBUSY; >>> + if (ring->xenblkd) { >>> + kthread_stop(ring->xenblkd); >>> + wake_up(&ring->shutdown_wq); >>> + ring->xenblkd = NULL; >>> + } >>> >>> - if (ring->irq) { >>> - unbind_from_irqhandler(ring->irq, ring); >>> - ring->irq = 0; >>> - } >>> + /* The above kthread_stop() guarantees that at this point we >>> + * don't have any discard_io or other_io requests. So, checking >>> + * for inflight IO is enough. >>> + */ >>> + if (atomic_read(&ring->inflight) > 0) >>> + return -EBUSY; >>> >>> - if (ring->blk_rings.common.sring) { >>> - xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring); >>> - ring->blk_rings.common.sring = NULL; >>> - } >>> + if (ring->irq) { >>> + unbind_from_irqhandler(ring->irq, ring); >>> + ring->irq = 0; >>> + } >>> >>> - /* Remove all persistent grants and the cache of ballooned pages. >*/ >>> - xen_blkbk_free_caches(ring); >>> + if (ring->blk_rings.common.sring) { >>> + xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring); >>> + ring->blk_rings.common.sring = NULL; >>> + } >>> >>> - /* Check that there is no request in use */ >>> - list_for_each_entry_safe(req, n, &ring->pending_free, free_list) { >>> - list_del(&req->free_list); >>> + /* Remove all persistent grants and the cache of ballooned pages. >*/ >>> + xen_blkbk_free_caches(ring); >>> >>> - for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) >>> - kfree(req->segments[j]); >>> + /* Check that there is no request in use */ >>> + list_for_each_entry_safe(req, n, &ring->pending_free, free_list) >{ >>> + list_del(&req->free_list); >>> >>> - for (j = 0; j < MAX_INDIRECT_PAGES; j++) >>> - kfree(req->indirect_pages[j]); >>> + for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) >>> + kfree(req->segments[j]); >>> >>> - kfree(req); >>> - i++; >>> - } >>> + for (j = 0; j < MAX_INDIRECT_PAGES; j++) >>> + kfree(req->indirect_pages[j]); >>> >>> - WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); >>> + kfree(req); >>> + i++; >>> + } >>> + >>> + WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); >>> + } >>> blkif->nr_ring_pages = 0; >>> >>> return 0; >>> @@ -281,6 +314,7 @@ static void xen_blkif_free(struct xen_blkif >*blkif) >>> BUG_ON(!list_empty(&blkif->free_pages)); >>> BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); >>> >>> + kfree(blkif->rings); >>> kmem_cache_free(xen_blkif_cachep, blkif); >>> } >>> >>> @@ -307,15 +341,19 @@ int __init xen_blkif_interface_init(void) >>> struct xenbus_device *dev = to_xenbus_device(_dev); \ >>> struct backend_info *be = dev_get_drvdata(&dev->dev); \ >>> struct xen_blkif *blkif = be->blkif; \ >>> - struct xen_blkif_ring *ring = &blkif->ring; \ >>> + int i; \ >>> + \ >>> + for (i = 0; i < blkif->nr_rings; i++) { \ >>> + struct xen_blkif_ring *ring = &blkif->rings[i]; \ >>> \ >>> - blkif->st_oo_req = ring->st_oo_req; \ >>> - blkif->st_rd_req = ring->st_rd_req; \ >>> - blkif->st_wr_req = ring->st_wr_req; \ >>> - blkif->st_f_req = ring->st_f_req; \ >>> - blkif->st_ds_req = ring->st_ds_req; \ >>> - blkif->st_rd_sect = ring->st_rd_sect; \ >>> - blkif->st_wr_sect = ring->st_wr_sect; \ >>> + blkif->st_oo_req += ring->st_oo_req; \ >>> + blkif->st_rd_req += ring->st_rd_req; \ >>> + blkif->st_wr_req += ring->st_wr_req; \ >>> + blkif->st_f_req += ring->st_f_req; \ >>> + blkif->st_ds_req += ring->st_ds_req; \ >>> + blkif->st_rd_sect += ring->st_rd_sect; \ >>> + blkif->st_wr_sect += ring->st_wr_sect; \ >>> + } \ >> >> That needs fixing. You could alter the macro to only read the values >> from the proper member. > >Do you think we still need these debug values for per-ring? >I'm thinking of just leave them for per-device(blkif), but that >requires extra lock to protect? It should be per device. However I think you are OK with locking - as long as the sysfs is torn down before we deallocate the rings. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/