Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1760987Ab0HMBZX (ORCPT ); Thu, 12 Aug 2010 21:25:23 -0400 Received: from smtp-out.google.com ([74.125.121.35]:12574 "EHLO smtp-out.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754546Ab0HMBZS convert rfc822-to-8bit (ORCPT ); Thu, 12 Aug 2010 21:25:18 -0400 DomainKey-Signature: a=rsa-sha1; s=beta; d=google.com; c=nofws; q=dns; h=mime-version:in-reply-to:references:from:date:message-id: subject:to:cc:content-type:content-transfer-encoding:x-system-of-record; b=YylkJGu7XkrD1Gf5ZRUdKSj115meop55ivTKspqIedLVydg9frTD0w7hjTy2kZEEw Qtge4gta5Eu1DLqiK7kSg== MIME-Version: 1.0 In-Reply-To: <4C3695D9.1060203@ds.jp.nec.com> References: <4C369009.80503@ds.jp.nec.com> <4C3695D9.1060203@ds.jp.nec.com> From: Nauman Rafique Date: Thu, 12 Aug 2010 18:24:49 -0700 Message-ID: Subject: Re: [RFC][PATCH 10/11] blkiocg async: Async queue per cfq_group To: Munehiro Ikeda Cc: linux-kernel@vger.kernel.org, jens.axboe@oracle.com, Vivek Goyal , Ryo Tsuruta , taka@valinux.co.jp, kamezawa.hiroyu@jp.fujitsu.com, Andrea Righi , Gui Jianfeng , akpm@linux-foundation.org, balbir@linux.vnet.ibm.com Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 8BIT X-System-Of-Record: true Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 28788 Lines: 753 Muuhh, I do not understand the motivation behind making cfq_io_context per cgroup. We can extract cgroup id from bio, use that to lookup cgroup and its async queues. What am I missing? On Thu, Jul 8, 2010 at 8:22 PM, Munehiro Ikeda wrote: > This is the main body for async capability of blkio controller. > The basic ideas are > ?- To move async queues from cfq_data to cfq_group, and > ?- To associate cfq_io_context with cfq_group > > Each cfq_io_context, which was created per an io_context > per a device, is now created per an io_context per a cfq_group. > Each cfq_group is created per a cgroup per a device, so > cfq_io_context is now resulted in to be created per > an io_context per a cgroup per a device. > > To protect link between cfq_io_context and cfq_group, > locking code is modified in several parts. > > ToDo: > - Lock protection still might be imperfect and more investigation > ?is needed. > > - cic_index of root cfq_group (cfqd->root_group.cic_index) is not > ?removed and lasts beyond elevator switching. > ?This issues should be solved. > > Signed-off-by: Munehiro "Muuhh" Ikeda > --- > ?block/cfq-iosched.c ? ? ? | ?277 ++++++++++++++++++++++++++++----------------- > ?include/linux/iocontext.h | ? ?2 +- > ?2 files changed, 175 insertions(+), 104 deletions(-) > > diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c > index 68f76e9..4186c30 100644 > --- a/block/cfq-iosched.c > +++ b/block/cfq-iosched.c > @@ -191,10 +191,23 @@ struct cfq_group { > ? ? ? ?struct cfq_rb_root service_trees[2][3]; > ? ? ? ?struct cfq_rb_root service_tree_idle; > > + ? ? ? /* > + ? ? ? ?* async queue for each priority case > + ? ? ? ?*/ > + ? ? ? struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR]; > + ? ? ? struct cfq_queue *async_idle_cfqq; > + > ? ? ? ?unsigned long saved_workload_slice; > ? ? ? ?enum wl_type_t saved_workload; > ? ? ? ?enum wl_prio_t saved_serving_prio; > ? ? ? ?struct blkio_group blkg; > + ? ? ? struct cfq_data *cfqd; > + > + ? ? ? /* lock for cic_list */ > + ? ? ? spinlock_t lock; > + ? ? ? unsigned int cic_index; > + ? ? ? struct list_head cic_list; > + > ?#ifdef CONFIG_CFQ_GROUP_IOSCHED > ? ? ? ?struct hlist_node cfqd_node; > ? ? ? ?atomic_t ref; > @@ -254,12 +267,6 @@ struct cfq_data { > ? ? ? ?struct cfq_queue *active_queue; > ? ? ? ?struct cfq_io_context *active_cic; > > - ? ? ? /* > - ? ? ? ?* async queue for each priority case > - ? ? ? ?*/ > - ? ? ? struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR]; > - ? ? ? struct cfq_queue *async_idle_cfqq; > - > ? ? ? ?sector_t last_position; > > ? ? ? ?/* > @@ -275,8 +282,6 @@ struct cfq_data { > ? ? ? ?unsigned int cfq_latency; > ? ? ? ?unsigned int cfq_group_isolation; > > - ? ? ? unsigned int cic_index; > - ? ? ? struct list_head cic_list; > > ? ? ? ?/* > ? ? ? ? * Fallback dummy cfqq for extreme OOM conditions > @@ -418,10 +423,16 @@ static inline int cfqg_busy_async_queues(struct cfq_data *cfqd, > ?} > > ?static void cfq_dispatch_insert(struct request_queue *, struct request *); > +static void __cfq_exit_single_io_context(struct cfq_data *, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct cfq_group *, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct cfq_io_context *); > ?static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct io_context *, gfp_t); > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct cfq_io_context *, gfp_t); > ?static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct io_context *); > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct cfq_group *, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct io_context *); > +static void cfq_put_async_queues(struct cfq_group *); > +static int cfq_alloc_cic_index(void); > > ?static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?bool is_sync) > @@ -438,17 +449,28 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic, > ?#define CIC_DEAD_KEY ? 1ul > ?#define CIC_DEAD_INDEX_SHIFT ? 1 > > -static inline void *cfqd_dead_key(struct cfq_data *cfqd) > +static inline void *cfqg_dead_key(struct cfq_group *cfqg) > ?{ > - ? ? ? return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY); > + ? ? ? return (void *)(cfqg->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY); > +} > + > +static inline struct cfq_group *cic_to_cfqg(struct cfq_io_context *cic) > +{ > + ? ? ? struct cfq_group *cfqg = cic->key; > + > + ? ? ? if (unlikely((unsigned long) cfqg & CIC_DEAD_KEY)) > + ? ? ? ? ? ? ? cfqg = NULL; > + > + ? ? ? return cfqg; > ?} > > ?static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic) > ?{ > - ? ? ? struct cfq_data *cfqd = cic->key; > + ? ? ? struct cfq_data *cfqd = NULL; > + ? ? ? struct cfq_group *cfqg = cic_to_cfqg(cic); > > - ? ? ? if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY)) > - ? ? ? ? ? ? ? return NULL; > + ? ? ? if (likely(cfqg)) > + ? ? ? ? ? ? ? cfqd = ?cfqg->cfqd; > > ? ? ? ?return cfqd; > ?} > @@ -959,12 +981,12 @@ cfq_update_blkio_group_weight(struct blkio_group *blkg, unsigned int weight) > ?} > > ?static struct cfq_group * > -cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) > +cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg, > + ? ? ? ? ? ? ? ? ? int create) > ?{ > - ? ? ? struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); > ? ? ? ?struct cfq_group *cfqg = NULL; > ? ? ? ?void *key = cfqd; > - ? ? ? int i, j; > + ? ? ? int i, j, idx; > ? ? ? ?struct cfq_rb_root *st; > ? ? ? ?struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; > ? ? ? ?unsigned int major, minor; > @@ -978,14 +1000,21 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) > ? ? ? ?if (cfqg || !create) > ? ? ? ? ? ? ? ?goto done; > > + ? ? ? idx = cfq_alloc_cic_index(); > + ? ? ? if (idx < 0) > + ? ? ? ? ? ? ? goto done; > + > ? ? ? ?cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node); > ? ? ? ?if (!cfqg) > - ? ? ? ? ? ? ? goto done; > + ? ? ? ? ? ? ? goto err; > > ? ? ? ?for_each_cfqg_st(cfqg, i, j, st) > ? ? ? ? ? ? ? ?*st = CFQ_RB_ROOT; > ? ? ? ?RB_CLEAR_NODE(&cfqg->rb_node); > > + ? ? ? spin_lock_init(&cfqg->lock); > + ? ? ? INIT_LIST_HEAD(&cfqg->cic_list); > + > ? ? ? ?/* > ? ? ? ? * Take the initial reference that will be released on destroy > ? ? ? ? * This can be thought of a joint reference by cgroup and > @@ -1002,7 +1031,14 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) > > ? ? ? ?/* Add group on cfqd list */ > ? ? ? ?hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); > + ? ? ? cfqg->cfqd = cfqd; > + ? ? ? cfqg->cic_index = idx; > + ? ? ? goto done; > > +err: > + ? ? ? spin_lock(&cic_index_lock); > + ? ? ? ida_remove(&cic_index_ida, idx); > + ? ? ? spin_unlock(&cic_index_lock); > ?done: > ? ? ? ?return cfqg; > ?} > @@ -1095,10 +1131,6 @@ static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) > > ?static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) > ?{ > - ? ? ? /* Currently, all async queues are mapped to root group */ > - ? ? ? if (!cfq_cfqq_sync(cfqq)) > - ? ? ? ? ? ? ? cfqg = &cfqq->cfqd->root_group; > - > ? ? ? ?cfqq->cfqg = cfqg; > ? ? ? ?/* cfqq reference on cfqg */ > ? ? ? ?atomic_inc(&cfqq->cfqg->ref); > @@ -1114,6 +1146,16 @@ static void cfq_put_cfqg(struct cfq_group *cfqg) > ? ? ? ? ? ? ? ?return; > ? ? ? ?for_each_cfqg_st(cfqg, i, j, st) > ? ? ? ? ? ? ? ?BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL); > + > + ? ? ? /** > + ? ? ? ?* ToDo: > + ? ? ? ?* root_group never reaches here and cic_index is never > + ? ? ? ?* removed. ?Unused cic_index lasts by elevator switching. > + ? ? ? ?*/ > + ? ? ? spin_lock(&cic_index_lock); > + ? ? ? ida_remove(&cic_index_ida, cfqg->cic_index); > + ? ? ? spin_unlock(&cic_index_lock); > + > ? ? ? ?kfree(cfqg); > ?} > > @@ -1122,6 +1164,15 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg) > ? ? ? ?/* Something wrong if we are trying to remove same group twice */ > ? ? ? ?BUG_ON(hlist_unhashed(&cfqg->cfqd_node)); > > + ? ? ? while (!list_empty(&cfqg->cic_list)) { > + ? ? ? ? ? ? ? struct cfq_io_context *cic = list_entry(cfqg->cic_list.next, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct cfq_io_context, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? group_list); > + > + ? ? ? ? ? ? ? __cfq_exit_single_io_context(cfqd, cfqg, cic); > + ? ? ? } > + > + ? ? ? cfq_put_async_queues(cfqg); > ? ? ? ?hlist_del_init(&cfqg->cfqd_node); > > ? ? ? ?/* > @@ -1497,10 +1548,12 @@ static struct request * > ?cfq_find_rq_fmerge(struct cfq_data *cfqd, struct bio *bio) > ?{ > ? ? ? ?struct task_struct *tsk = current; > + ? ? ? struct cfq_group *cfqg; > ? ? ? ?struct cfq_io_context *cic; > ? ? ? ?struct cfq_queue *cfqq; > > - ? ? ? cic = cfq_cic_lookup(cfqd, tsk->io_context); > + ? ? ? cfqg = cfq_get_cfqg(cfqd, bio, 0); > + ? ? ? cic = cfq_cic_lookup(cfqd, cfqg, tsk->io_context); > ? ? ? ?if (!cic) > ? ? ? ? ? ? ? ?return NULL; > > @@ -1611,6 +1664,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, > ? ? ? ? ? ? ? ? ? ? ? ? ? struct bio *bio) > ?{ > ? ? ? ?struct cfq_data *cfqd = q->elevator->elevator_data; > + ? ? ? struct cfq_group *cfqg; > ? ? ? ?struct cfq_io_context *cic; > ? ? ? ?struct cfq_queue *cfqq; > > @@ -1624,7 +1678,8 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, > ? ? ? ? * Lookup the cfqq that this bio will be queued with. Allow > ? ? ? ? * merge only if rq is queued there. > ? ? ? ? */ > - ? ? ? cic = cfq_cic_lookup(cfqd, current->io_context); > + ? ? ? cfqg = cfq_get_cfqg(cfqd, bio, 0); > + ? ? ? cic = cfq_cic_lookup(cfqd, cfqg, current->io_context); > ? ? ? ?if (!cic) > ? ? ? ? ? ? ? ?return false; > > @@ -2667,17 +2722,18 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) > ?} > > ?static void __cfq_exit_single_io_context(struct cfq_data *cfqd, > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct cfq_group *cfqg, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct cfq_io_context *cic) > ?{ > ? ? ? ?struct io_context *ioc = cic->ioc; > > - ? ? ? list_del_init(&cic->queue_list); > + ? ? ? list_del_init(&cic->group_list); > > ? ? ? ?/* > ? ? ? ? * Make sure dead mark is seen for dead queues > ? ? ? ? */ > ? ? ? ?smp_wmb(); > - ? ? ? cic->key = cfqd_dead_key(cfqd); > + ? ? ? cic->key = cfqg_dead_key(cfqg); > > ? ? ? ?if (ioc->ioc_data == cic) > ? ? ? ? ? ? ? ?rcu_assign_pointer(ioc->ioc_data, NULL); > @@ -2696,23 +2752,23 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, > ?static void cfq_exit_single_io_context(struct io_context *ioc, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct cfq_io_context *cic) > ?{ > - ? ? ? struct cfq_data *cfqd = cic_to_cfqd(cic); > + ? ? ? struct cfq_group *cfqg = cic_to_cfqg(cic); > > - ? ? ? if (cfqd) { > - ? ? ? ? ? ? ? struct request_queue *q = cfqd->queue; > + ? ? ? if (cfqg) { > + ? ? ? ? ? ? ? struct cfq_data *cfqd = cfqg->cfqd; > ? ? ? ? ? ? ? ?unsigned long flags; > > - ? ? ? ? ? ? ? spin_lock_irqsave(q->queue_lock, flags); > + ? ? ? ? ? ? ? spin_lock_irqsave(&cfqg->lock, flags); > > ? ? ? ? ? ? ? ?/* > ? ? ? ? ? ? ? ? * Ensure we get a fresh copy of the ->key to prevent > ? ? ? ? ? ? ? ? * race between exiting task and queue > ? ? ? ? ? ? ? ? */ > ? ? ? ? ? ? ? ?smp_read_barrier_depends(); > - ? ? ? ? ? ? ? if (cic->key == cfqd) > - ? ? ? ? ? ? ? ? ? ? ? __cfq_exit_single_io_context(cfqd, cic); > + ? ? ? ? ? ? ? if (cic->key == cfqg) > + ? ? ? ? ? ? ? ? ? ? ? __cfq_exit_single_io_context(cfqd, cfqg, cic); > > - ? ? ? ? ? ? ? spin_unlock_irqrestore(q->queue_lock, flags); > + ? ? ? ? ? ? ? spin_unlock_irqrestore(&cfqg->lock, flags); > ? ? ? ?} > ?} > > @@ -2734,7 +2790,7 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?cfqd->queue->node); > ? ? ? ?if (cic) { > ? ? ? ? ? ? ? ?cic->last_end_request = jiffies; > - ? ? ? ? ? ? ? INIT_LIST_HEAD(&cic->queue_list); > + ? ? ? ? ? ? ? INIT_LIST_HEAD(&cic->group_list); > ? ? ? ? ? ? ? ?INIT_HLIST_NODE(&cic->cic_list); > ? ? ? ? ? ? ? ?cic->dtor = cfq_free_io_context; > ? ? ? ? ? ? ? ?cic->exit = cfq_exit_io_context; > @@ -2801,8 +2857,7 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) > ? ? ? ?cfqq = cic->cfqq[BLK_RW_ASYNC]; > ? ? ? ?if (cfqq) { > ? ? ? ? ? ? ? ?struct cfq_queue *new_cfqq; > - ? ? ? ? ? ? ? new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? GFP_ATOMIC); > + ? ? ? ? ? ? ? new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic, GFP_ATOMIC); > ? ? ? ? ? ? ? ?if (new_cfqq) { > ? ? ? ? ? ? ? ? ? ? ? ?cic->cfqq[BLK_RW_ASYNC] = new_cfqq; > ? ? ? ? ? ? ? ? ? ? ? ?cfq_put_queue(cfqq); > @@ -2879,16 +2934,14 @@ static void cfq_ioc_set_cgroup(struct io_context *ioc) > > ?static struct cfq_queue * > ?cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync, > - ? ? ? ? ? ? ? ? ? ?struct io_context *ioc, gfp_t gfp_mask) > + ? ? ? ? ? ? ? ? ? ?struct cfq_io_context *cic, gfp_t gfp_mask) > ?{ > ? ? ? ?struct cfq_queue *cfqq, *new_cfqq = NULL; > - ? ? ? struct cfq_io_context *cic; > + ? ? ? struct io_context *ioc = cic->ioc; > ? ? ? ?struct cfq_group *cfqg; > > ?retry: > - ? ? ? cfqg = cfq_get_cfqg(cfqd, NULL, 1); > - ? ? ? cic = cfq_cic_lookup(cfqd, ioc); > - ? ? ? /* cic always exists here */ > + ? ? ? cfqg = cic_to_cfqg(cic); > ? ? ? ?cfqq = cic_to_cfqq(cic, is_sync); > > ? ? ? ?/* > @@ -2930,36 +2983,38 @@ retry: > ?} > > ?static struct cfq_queue ** > -cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio) > +cfq_async_queue_prio(struct cfq_group *cfqg, int ioprio_class, int ioprio) > ?{ > ? ? ? ?switch (ioprio_class) { > ? ? ? ?case IOPRIO_CLASS_RT: > - ? ? ? ? ? ? ? return &cfqd->async_cfqq[0][ioprio]; > + ? ? ? ? ? ? ? return &cfqg->async_cfqq[0][ioprio]; > ? ? ? ?case IOPRIO_CLASS_BE: > - ? ? ? ? ? ? ? return &cfqd->async_cfqq[1][ioprio]; > + ? ? ? ? ? ? ? return &cfqg->async_cfqq[1][ioprio]; > ? ? ? ?case IOPRIO_CLASS_IDLE: > - ? ? ? ? ? ? ? return &cfqd->async_idle_cfqq; > + ? ? ? ? ? ? ? return &cfqg->async_idle_cfqq; > ? ? ? ?default: > ? ? ? ? ? ? ? ?BUG(); > ? ? ? ?} > ?} > > ?static struct cfq_queue * > -cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc, > +cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_context *cic, > ? ? ? ? ? ? ?gfp_t gfp_mask) > ?{ > + ? ? ? struct io_context *ioc = cic->ioc; > ? ? ? ?const int ioprio = task_ioprio(ioc); > ? ? ? ?const int ioprio_class = task_ioprio_class(ioc); > + ? ? ? struct cfq_group *cfqg = cic_to_cfqg(cic); > ? ? ? ?struct cfq_queue **async_cfqq = NULL; > ? ? ? ?struct cfq_queue *cfqq = NULL; > > ? ? ? ?if (!is_sync) { > - ? ? ? ? ? ? ? async_cfqq = cfq_async_queue_prio(cfqd, ioprio_class, ioprio); > + ? ? ? ? ? ? ? async_cfqq = cfq_async_queue_prio(cfqg, ioprio_class, ioprio); > ? ? ? ? ? ? ? ?cfqq = *async_cfqq; > ? ? ? ?} > > ? ? ? ?if (!cfqq) > - ? ? ? ? ? ? ? cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); > + ? ? ? ? ? ? ? cfqq = cfq_find_alloc_queue(cfqd, is_sync, cic, gfp_mask); > > ? ? ? ?/* > ? ? ? ? * pin the queue now that it's allocated, scheduler exit will prune it > @@ -2977,19 +3032,19 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc, > ?* We drop cfq io contexts lazily, so we may find a dead one. > ?*/ > ?static void > -cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, > - ? ? ? ? ? ? ? ? struct cfq_io_context *cic) > +cfq_drop_dead_cic(struct cfq_data *cfqd, struct cfq_group *cfqg, > + ? ? ? ? ? ? ? ? struct io_context *ioc, struct cfq_io_context *cic) > ?{ > ? ? ? ?unsigned long flags; > > - ? ? ? WARN_ON(!list_empty(&cic->queue_list)); > - ? ? ? BUG_ON(cic->key != cfqd_dead_key(cfqd)); > + ? ? ? WARN_ON(!list_empty(&cic->group_list)); > + ? ? ? BUG_ON(cic->key != cfqg_dead_key(cfqg)); > > ? ? ? ?spin_lock_irqsave(&ioc->lock, flags); > > ? ? ? ?BUG_ON(ioc->ioc_data == cic); > > - ? ? ? radix_tree_delete(&ioc->radix_root, cfqd->cic_index); > + ? ? ? radix_tree_delete(&ioc->radix_root, cfqg->cic_index); > ? ? ? ?hlist_del_rcu(&cic->cic_list); > ? ? ? ?spin_unlock_irqrestore(&ioc->lock, flags); > > @@ -2997,11 +3052,14 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, > ?} > > ?static struct cfq_io_context * > -cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) > +cfq_cic_lookup(struct cfq_data *cfqd, struct cfq_group *cfqg, > + ? ? ? ? ? ? ?struct io_context *ioc) > ?{ > ? ? ? ?struct cfq_io_context *cic; > ? ? ? ?unsigned long flags; > > + ? ? ? if (!cfqg) > + ? ? ? ? ? ? ? return NULL; > ? ? ? ?if (unlikely(!ioc)) > ? ? ? ? ? ? ? ?return NULL; > > @@ -3011,18 +3069,18 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) > ? ? ? ? * we maintain a last-hit cache, to avoid browsing over the tree > ? ? ? ? */ > ? ? ? ?cic = rcu_dereference(ioc->ioc_data); > - ? ? ? if (cic && cic->key == cfqd) { > + ? ? ? if (cic && cic->key == cfqg) { > ? ? ? ? ? ? ? ?rcu_read_unlock(); > ? ? ? ? ? ? ? ?return cic; > ? ? ? ?} > > ? ? ? ?do { > - ? ? ? ? ? ? ? cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index); > + ? ? ? ? ? ? ? cic = radix_tree_lookup(&ioc->radix_root, cfqg->cic_index); > ? ? ? ? ? ? ? ?rcu_read_unlock(); > ? ? ? ? ? ? ? ?if (!cic) > ? ? ? ? ? ? ? ? ? ? ? ?break; > - ? ? ? ? ? ? ? if (unlikely(cic->key != cfqd)) { > - ? ? ? ? ? ? ? ? ? ? ? cfq_drop_dead_cic(cfqd, ioc, cic); > + ? ? ? ? ? ? ? if (unlikely(cic->key != cfqg)) { > + ? ? ? ? ? ? ? ? ? ? ? cfq_drop_dead_cic(cfqd, cfqg, ioc, cic); > ? ? ? ? ? ? ? ? ? ? ? ?rcu_read_lock(); > ? ? ? ? ? ? ? ? ? ? ? ?continue; > ? ? ? ? ? ? ? ?} > @@ -3037,24 +3095,25 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) > ?} > > ?/* > - * Add cic into ioc, using cfqd as the search key. This enables us to lookup > + * Add cic into ioc, using cfqg as the search key. This enables us to lookup > ?* the process specific cfq io context when entered from the block layer. > - * Also adds the cic to a per-cfqd list, used when this queue is removed. > + * Also adds the cic to a per-cfqg list, used when the group is removed. > + * request_queue lock must be held. > ?*/ > -static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, > - ? ? ? ? ? ? ? ? ? ? ? struct cfq_io_context *cic, gfp_t gfp_mask) > +static int cfq_cic_link(struct cfq_data *cfqd, struct cfq_group *cfqg, > + ? ? ? ? ? ? ? ? ? ? ? struct io_context *ioc, struct cfq_io_context *cic) > ?{ > ? ? ? ?unsigned long flags; > ? ? ? ?int ret; > > - ? ? ? ret = radix_tree_preload(gfp_mask); > + ? ? ? ret = radix_tree_preload(GFP_ATOMIC); > ? ? ? ?if (!ret) { > ? ? ? ? ? ? ? ?cic->ioc = ioc; > - ? ? ? ? ? ? ? cic->key = cfqd; > + ? ? ? ? ? ? ? cic->key = cfqg; > > ? ? ? ? ? ? ? ?spin_lock_irqsave(&ioc->lock, flags); > ? ? ? ? ? ? ? ?ret = radix_tree_insert(&ioc->radix_root, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? cfqd->cic_index, cic); > + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? cfqg->cic_index, cic); > ? ? ? ? ? ? ? ?if (!ret) > ? ? ? ? ? ? ? ? ? ? ? ?hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); > ? ? ? ? ? ? ? ?spin_unlock_irqrestore(&ioc->lock, flags); > @@ -3062,9 +3121,9 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, > ? ? ? ? ? ? ? ?radix_tree_preload_end(); > > ? ? ? ? ? ? ? ?if (!ret) { > - ? ? ? ? ? ? ? ? ? ? ? spin_lock_irqsave(cfqd->queue->queue_lock, flags); > - ? ? ? ? ? ? ? ? ? ? ? list_add(&cic->queue_list, &cfqd->cic_list); > - ? ? ? ? ? ? ? ? ? ? ? spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); > + ? ? ? ? ? ? ? ? ? ? ? spin_lock_irqsave(&cfqg->lock, flags); > + ? ? ? ? ? ? ? ? ? ? ? list_add(&cic->group_list, &cfqg->cic_list); > + ? ? ? ? ? ? ? ? ? ? ? spin_unlock_irqrestore(&cfqg->lock, flags); > ? ? ? ? ? ? ? ?} > ? ? ? ?} > > @@ -3080,10 +3139,12 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, > ?* than one device managed by cfq. > ?*/ > ?static struct cfq_io_context * > -cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) > +cfq_get_io_context(struct cfq_data *cfqd, struct bio *bio, gfp_t gfp_mask) > ?{ > ? ? ? ?struct io_context *ioc = NULL; > ? ? ? ?struct cfq_io_context *cic; > + ? ? ? struct cfq_group *cfqg, *cfqg2; > + ? ? ? unsigned long flags; > > ? ? ? ?might_sleep_if(gfp_mask & __GFP_WAIT); > > @@ -3091,18 +3152,38 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) > ? ? ? ?if (!ioc) > ? ? ? ? ? ? ? ?return NULL; > > - ? ? ? cic = cfq_cic_lookup(cfqd, ioc); > + ? ? ? spin_lock_irqsave(cfqd->queue->queue_lock, flags); > +retry_cfqg: > + ? ? ? cfqg = cfq_get_cfqg(cfqd, bio, 1); > +retry_cic: > + ? ? ? cic = cfq_cic_lookup(cfqd, cfqg, ioc); > ? ? ? ?if (cic) > ? ? ? ? ? ? ? ?goto out; > + ? ? ? spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); > > ? ? ? ?cic = cfq_alloc_io_context(cfqd, gfp_mask); > ? ? ? ?if (cic == NULL) > ? ? ? ? ? ? ? ?goto err; > > - ? ? ? if (cfq_cic_link(cfqd, ioc, cic, gfp_mask)) > + ? ? ? spin_lock_irqsave(cfqd->queue->queue_lock, flags); > + > + ? ? ? /* check the consistency breakage during unlock period */ > + ? ? ? cfqg2 = cfq_get_cfqg(cfqd, bio, 0); > + ? ? ? if (cfqg != cfqg2) { > + ? ? ? ? ? ? ? cfq_cic_free(cic); > + ? ? ? ? ? ? ? if (!cfqg2) > + ? ? ? ? ? ? ? ? ? ? ? goto retry_cfqg; > + ? ? ? ? ? ? ? else { > + ? ? ? ? ? ? ? ? ? ? ? cfqg = cfqg2; > + ? ? ? ? ? ? ? ? ? ? ? goto retry_cic; > + ? ? ? ? ? ? ? } > + ? ? ? } > + > + ? ? ? if (cfq_cic_link(cfqd, cfqg, ioc, cic)) > ? ? ? ? ? ? ? ?goto err_free; > > ?out: > + ? ? ? spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); > ? ? ? ?smp_read_barrier_depends(); > ? ? ? ?if (unlikely(ioc->ioprio_changed)) > ? ? ? ? ? ? ? ?cfq_ioc_set_ioprio(ioc); > @@ -3113,6 +3194,7 @@ out: > ?#endif > ? ? ? ?return cic; > ?err_free: > + ? ? ? spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); > ? ? ? ?cfq_cic_free(cic); > ?err: > ? ? ? ?put_io_context(ioc); > @@ -3537,6 +3619,7 @@ static inline int __cfq_may_queue(struct cfq_queue *cfqq) > ?static int cfq_may_queue(struct request_queue *q, struct bio *bio, int rw) > ?{ > ? ? ? ?struct cfq_data *cfqd = q->elevator->elevator_data; > + ? ? ? struct cfq_group *cfqg; > ? ? ? ?struct task_struct *tsk = current; > ? ? ? ?struct cfq_io_context *cic; > ? ? ? ?struct cfq_queue *cfqq; > @@ -3547,7 +3630,8 @@ static int cfq_may_queue(struct request_queue *q, struct bio *bio, int rw) > ? ? ? ? * so just lookup a possibly existing queue, or return 'may queue' > ? ? ? ? * if that fails > ? ? ? ? */ > - ? ? ? cic = cfq_cic_lookup(cfqd, tsk->io_context); > + ? ? ? cfqg = cfq_get_cfqg(cfqd, bio, 0); > + ? ? ? cic = cfq_cic_lookup(cfqd, cfqg, tsk->io_context); > ? ? ? ?if (!cic) > ? ? ? ? ? ? ? ?return ELV_MQUEUE_MAY; > > @@ -3636,7 +3720,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio, > > ? ? ? ?might_sleep_if(gfp_mask & __GFP_WAIT); > > - ? ? ? cic = cfq_get_io_context(cfqd, gfp_mask); > + ? ? ? cic = cfq_get_io_context(cfqd, bio, gfp_mask); > > ? ? ? ?spin_lock_irqsave(q->queue_lock, flags); > > @@ -3646,7 +3730,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, struct bio *bio, > ?new_queue: > ? ? ? ?cfqq = cic_to_cfqq(cic, is_sync); > ? ? ? ?if (!cfqq || cfqq == &cfqd->oom_cfqq) { > - ? ? ? ? ? ? ? cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); > + ? ? ? ? ? ? ? cfqq = cfq_get_queue(cfqd, is_sync, cic, gfp_mask); > ? ? ? ? ? ? ? ?cic_set_cfqq(cic, cfqq, is_sync); > ? ? ? ?} else { > ? ? ? ? ? ? ? ?/* > @@ -3762,19 +3846,19 @@ static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) > ? ? ? ?cancel_work_sync(&cfqd->unplug_work); > ?} > > -static void cfq_put_async_queues(struct cfq_data *cfqd) > +static void cfq_put_async_queues(struct cfq_group *cfqg) > ?{ > ? ? ? ?int i; > > ? ? ? ?for (i = 0; i < IOPRIO_BE_NR; i++) { > - ? ? ? ? ? ? ? if (cfqd->async_cfqq[0][i]) > - ? ? ? ? ? ? ? ? ? ? ? cfq_put_queue(cfqd->async_cfqq[0][i]); > - ? ? ? ? ? ? ? if (cfqd->async_cfqq[1][i]) > - ? ? ? ? ? ? ? ? ? ? ? cfq_put_queue(cfqd->async_cfqq[1][i]); > + ? ? ? ? ? ? ? if (cfqg->async_cfqq[0][i]) > + ? ? ? ? ? ? ? ? ? ? ? cfq_put_queue(cfqg->async_cfqq[0][i]); > + ? ? ? ? ? ? ? if (cfqg->async_cfqq[1][i]) > + ? ? ? ? ? ? ? ? ? ? ? cfq_put_queue(cfqg->async_cfqq[1][i]); > ? ? ? ?} > > - ? ? ? if (cfqd->async_idle_cfqq) > - ? ? ? ? ? ? ? cfq_put_queue(cfqd->async_idle_cfqq); > + ? ? ? if (cfqg->async_idle_cfqq) > + ? ? ? ? ? ? ? cfq_put_queue(cfqg->async_idle_cfqq); > ?} > > ?static void cfq_cfqd_free(struct rcu_head *head) > @@ -3794,15 +3878,6 @@ static void cfq_exit_queue(struct elevator_queue *e) > ? ? ? ?if (cfqd->active_queue) > ? ? ? ? ? ? ? ?__cfq_slice_expired(cfqd, cfqd->active_queue, 0); > > - ? ? ? while (!list_empty(&cfqd->cic_list)) { > - ? ? ? ? ? ? ? struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct cfq_io_context, > - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? queue_list); > - > - ? ? ? ? ? ? ? __cfq_exit_single_io_context(cfqd, cic); > - ? ? ? } > - > - ? ? ? cfq_put_async_queues(cfqd); > ? ? ? ?cfq_release_cfq_groups(cfqd); > ? ? ? ?cfq_blkiocg_del_blkio_group(&cfqd->root_group.blkg); > > @@ -3810,10 +3885,6 @@ static void cfq_exit_queue(struct elevator_queue *e) > > ? ? ? ?cfq_shutdown_timer_wq(cfqd); > > - ? ? ? spin_lock(&cic_index_lock); > - ? ? ? ida_remove(&cic_index_ida, cfqd->cic_index); > - ? ? ? spin_unlock(&cic_index_lock); > - > ? ? ? ?/* Wait for cfqg->blkg->key accessors to exit their grace periods. */ > ? ? ? ?call_rcu(&cfqd->rcu, cfq_cfqd_free); > ?} > @@ -3823,7 +3894,7 @@ static int cfq_alloc_cic_index(void) > ? ? ? ?int index, error; > > ? ? ? ?do { > - ? ? ? ? ? ? ? if (!ida_pre_get(&cic_index_ida, GFP_KERNEL)) > + ? ? ? ? ? ? ? if (!ida_pre_get(&cic_index_ida, GFP_ATOMIC)) > ? ? ? ? ? ? ? ? ? ? ? ?return -ENOMEM; > > ? ? ? ? ? ? ? ?spin_lock(&cic_index_lock); > @@ -3839,20 +3910,18 @@ static int cfq_alloc_cic_index(void) > ?static void *cfq_init_queue(struct request_queue *q) > ?{ > ? ? ? ?struct cfq_data *cfqd; > - ? ? ? int i, j; > + ? ? ? int i, j, idx; > ? ? ? ?struct cfq_group *cfqg; > ? ? ? ?struct cfq_rb_root *st; > > - ? ? ? i = cfq_alloc_cic_index(); > - ? ? ? if (i < 0) > + ? ? ? idx = cfq_alloc_cic_index(); > + ? ? ? if (idx < 0) > ? ? ? ? ? ? ? ?return NULL; > > ? ? ? ?cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); > ? ? ? ?if (!cfqd) > ? ? ? ? ? ? ? ?return NULL; > > - ? ? ? cfqd->cic_index = i; > - > ? ? ? ?/* Init root service tree */ > ? ? ? ?cfqd->grp_service_tree = CFQ_RB_ROOT; > > @@ -3861,6 +3930,9 @@ static void *cfq_init_queue(struct request_queue *q) > ? ? ? ?for_each_cfqg_st(cfqg, i, j, st) > ? ? ? ? ? ? ? ?*st = CFQ_RB_ROOT; > ? ? ? ?RB_CLEAR_NODE(&cfqg->rb_node); > + ? ? ? cfqg->cfqd = cfqd; > + ? ? ? cfqg->cic_index = idx; > + ? ? ? INIT_LIST_HEAD(&cfqg->cic_list); > > ? ? ? ?/* Give preference to root group over other groups */ > ? ? ? ?cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT; > @@ -3874,6 +3946,7 @@ static void *cfq_init_queue(struct request_queue *q) > ? ? ? ?rcu_read_lock(); > ? ? ? ?cfq_blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?(void *)cfqd, 0); > + ? ? ? hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); > ? ? ? ?rcu_read_unlock(); > ?#endif > ? ? ? ?/* > @@ -3893,8 +3966,6 @@ static void *cfq_init_queue(struct request_queue *q) > ? ? ? ?atomic_inc(&cfqd->oom_cfqq.ref); > ? ? ? ?cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, &cfqd->root_group); > > - ? ? ? INIT_LIST_HEAD(&cfqd->cic_list); > - > ? ? ? ?cfqd->queue = q; > > ? ? ? ?init_timer(&cfqd->idle_slice_timer); > diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h > index 64d5291..6c05b54 100644 > --- a/include/linux/iocontext.h > +++ b/include/linux/iocontext.h > @@ -18,7 +18,7 @@ struct cfq_io_context { > ? ? ? ?unsigned long ttime_samples; > ? ? ? ?unsigned long ttime_mean; > > - ? ? ? struct list_head queue_list; > + ? ? ? struct list_head group_list; > ? ? ? ?struct hlist_node cic_list; > > ? ? ? ?void (*dtor)(struct io_context *); /* destructor */ > -- > 1.6.2.5 > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at ?http://vger.kernel.org/majordomo-info.html > Please read the FAQ at ?http://www.tux.org/lkml/ > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/