Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756691AbYKKMvh (ORCPT ); Tue, 11 Nov 2008 07:51:37 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1756173AbYKKMv1 (ORCPT ); Tue, 11 Nov 2008 07:51:27 -0500 Received: from ms01.sssup.it ([193.205.80.99]:54524 "EHLO sssup.it" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1756383AbYKKMvY (ORCPT ); Tue, 11 Nov 2008 07:51:24 -0500 Date: Tue, 11 Nov 2008 13:53:33 +0100 From: Fabio Checconi To: jens.axboe@oracle.com Cc: linux-kernel@vger.kernel.org, paolo.valente@unimore.it, fabio@gandalf.sssup.it Subject: [PATCH 01/03] block: prepare I/O context code for BFQ Message-ID: <20081111125333.GE9508@gandalf.sssup.it> References: <20081111125148.GD9508@gandalf.sssup.it> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20081111125148.GD9508@gandalf.sssup.it> User-Agent: Mutt/1.4.2.3i Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7136 Lines: 245 BFQ uses struct cfq_io_context to store its per-process per-device data, reusing the same code for cic handling of CFQ. The code is not shared ATM to minimize the impact of these patches. This patch introduces a new hlist to each io_context to store all the cic's allocated by BFQ to allow calling the right destructor on module unload; the radix tree used for cic lookup needs to be duplicated because it can contain dead keys inserted by a scheduler and later retrieved by the other one. Update the io_context exit and free paths to take care also of the BFQ cic's. Change the type of cfqq inside struct cfq_io_context to void * to use it also for BFQ per-queue data. A new bfq-specific ioprio_changed field is necessary, too, to avoid clobbering cfq's one, so switch ioprio_changed to a bitmap, with one element per scheduler. Signed-off-by: Fabio Checconi Signed-off-by: Paolo Valente --- block/blk-ioc.c | 27 ++++++++++++++++----------- block/cfq-iosched.c | 10 +++++++--- fs/ioprio.c | 9 +++++++-- include/linux/iocontext.h | 18 +++++++++++++++--- 4 files changed, 45 insertions(+), 19 deletions(-) diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 012f065..c18bb82 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include /* for max_pfn/max_low_pfn */ @@ -15,13 +16,12 @@ */ static struct kmem_cache *iocontext_cachep; -static void cfq_dtor(struct io_context *ioc) +static void hlist_sched_dtor(struct io_context *ioc, struct hlist_head *list) { - if (!hlist_empty(&ioc->cic_list)) { + if (!hlist_empty(list)) { struct cfq_io_context *cic; - cic = list_entry(ioc->cic_list.first, struct cfq_io_context, - cic_list); + cic = list_entry(list->first, struct cfq_io_context, cic_list); cic->dtor(ioc); } } @@ -41,7 +41,9 @@ int put_io_context(struct io_context *ioc) rcu_read_lock(); if (ioc->aic && ioc->aic->dtor) ioc->aic->dtor(ioc->aic); - cfq_dtor(ioc); + + hlist_sched_dtor(ioc, &ioc->cic_list); + hlist_sched_dtor(ioc, &ioc->bfq_cic_list); rcu_read_unlock(); kmem_cache_free(iocontext_cachep, ioc); @@ -51,18 +53,18 @@ int put_io_context(struct io_context *ioc) } EXPORT_SYMBOL(put_io_context); -static void cfq_exit(struct io_context *ioc) +static void hlist_sched_exit(struct io_context *ioc, struct hlist_head *list) { rcu_read_lock(); - if (!hlist_empty(&ioc->cic_list)) { + if (!hlist_empty(list)) { struct cfq_io_context *cic; - cic = list_entry(ioc->cic_list.first, struct cfq_io_context, - cic_list); + cic = list_entry(list->first, struct cfq_io_context, cic_list); cic->exit(ioc); } rcu_read_unlock(); + } /* Called by the exitting task */ @@ -78,7 +80,8 @@ void exit_io_context(void) if (atomic_dec_and_test(&ioc->nr_tasks)) { if (ioc->aic && ioc->aic->exit) ioc->aic->exit(ioc->aic); - cfq_exit(ioc); + hlist_sched_exit(ioc, &ioc->cic_list); + hlist_sched_exit(ioc, &ioc->bfq_cic_list); put_io_context(ioc); } @@ -93,13 +96,15 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node) atomic_set(&ret->refcount, 1); atomic_set(&ret->nr_tasks, 1); spin_lock_init(&ret->lock); - ret->ioprio_changed = 0; + bitmap_zero(ret->ioprio_changed, IOC_IOPRIO_CHANGED_BITS); ret->ioprio = 0; ret->last_waited = jiffies; /* doesn't matter... */ ret->nr_batch_requests = 0; /* because this is 0 */ ret->aic = NULL; INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH); INIT_HLIST_HEAD(&ret->cic_list); + INIT_RADIX_TREE(&ret->bfq_radix_root, GFP_ATOMIC | __GFP_HIGH); + INIT_HLIST_HEAD(&ret->bfq_cic_list); ret->ioc_data = NULL; } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 1e2aff8..ab503ec 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1417,7 +1417,6 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) static void cfq_ioc_set_ioprio(struct io_context *ioc) { call_for_each_cic(ioc, changed_ioprio); - ioc->ioprio_changed = 0; } static struct cfq_queue * @@ -1664,8 +1663,13 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) goto err_free; out: - smp_read_barrier_depends(); - if (unlikely(ioc->ioprio_changed)) + /* + * test_and_clear_bit() implies a memory barrier, paired with + * the wmb() in fs/ioprio.c, so the value seen for ioprio is the + * new one. + */ + if (unlikely(test_and_clear_bit(IOC_CFQ_IOPRIO_CHANGED, + ioc->ioprio_changed))) cfq_ioc_set_ioprio(ioc); return cic; diff --git a/fs/ioprio.c b/fs/ioprio.c index da3cc46..9296ac3 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -29,7 +29,7 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) { - int err; + int err, i; struct io_context *ioc; if (task->uid != current->euid && @@ -53,12 +53,17 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) err = -ENOMEM; break; } + /* let other ioc users see the new values */ + smp_wmb(); task->io_context = ioc; } while (1); if (!err) { ioc->ioprio = ioprio; - ioc->ioprio_changed = 1; + /* make sure schedulers see the new ioprio value */ + wmb(); + for (i = 0; i < IOC_IOPRIO_CHANGED_BITS; i++) + set_bit(i, ioc->ioprio_changed); } task_unlock(task); diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 08b987b..335bbb9 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -1,6 +1,7 @@ #ifndef IOCONTEXT_H #define IOCONTEXT_H +#include #include #include @@ -30,12 +31,11 @@ struct as_io_context { sector_t seek_mean; }; -struct cfq_queue; struct cfq_io_context { void *key; unsigned long dead_key; - struct cfq_queue *cfqq[2]; + void *cfqq[2]; struct io_context *ioc; @@ -60,6 +60,16 @@ struct cfq_io_context { }; /* + * Indexes into the ioprio_changed bitmap. A bit set indicates that + * the corresponding I/O scheduler needs to see a ioprio update. + */ +enum { + IOC_CFQ_IOPRIO_CHANGED, + IOC_BFQ_IOPRIO_CHANGED, + IOC_IOPRIO_CHANGED_BITS +}; + +/* * I/O subsystem state of the associated processes. It is refcounted * and kmalloc'ed. These could be shared between processes. */ @@ -71,7 +81,7 @@ struct io_context { spinlock_t lock; unsigned short ioprio; - unsigned short ioprio_changed; + DECLARE_BITMAP(ioprio_changed, IOC_IOPRIO_CHANGED_BITS); /* * For request batching @@ -82,6 +92,8 @@ struct io_context { struct as_io_context *aic; struct radix_tree_root radix_root; struct hlist_head cic_list; + struct radix_tree_root bfq_radix_root; + struct hlist_head bfq_cic_list; void *ioc_data; }; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/