Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752115AbbFXJqL (ORCPT ); Wed, 24 Jun 2015 05:46:11 -0400 Received: from mail-ig0-f172.google.com ([209.85.213.172]:36930 "EHLO mail-ig0-f172.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751055AbbFXJqB (ORCPT ); Wed, 24 Jun 2015 05:46:01 -0400 MIME-Version: 1.0 In-Reply-To: <1434894751-6877-4-git-send-email-akinobu.mita@gmail.com> References: <1434894751-6877-1-git-send-email-akinobu.mita@gmail.com> <1434894751-6877-4-git-send-email-akinobu.mita@gmail.com> Date: Wed, 24 Jun 2015 17:46:00 +0800 Message-ID: Subject: Re: [PATCH 3/4] blk-mq: establish new mapping before cpu starts handling requests From: Ming Lei To: Akinobu Mita Cc: Linux Kernel Mailing List , Jens Axboe Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7389 Lines: 196 On Sun, Jun 21, 2015 at 9:52 PM, Akinobu Mita wrote: > ctx->index_hw is zero for the CPUs which have never been onlined since > the block queue was initialized. If one of those CPUs is hotadded and > starts handling request before new mappings are established, pending Could you explain a bit what the handling request is? The fact is that blk_mq_queue_reinit() is run after all queues are put into freezing. > bitmap is not correctly marked for inserted requests as ctx->index_hw > is still zero. So flush_busy_ctxs can't find the requests on the > software queue. > > Fix it by establishing new mapping before hotadded cpu starts handling > requests. > > Signed-off-by: Akinobu Mita > Cc: Jens Axboe > --- > block/blk-mq-cpumap.c | 9 +++++---- > block/blk-mq.c | 41 ++++++++++++++++++++++++++--------------- > block/blk-mq.h | 3 ++- > 3 files changed, 33 insertions(+), 20 deletions(-) > > diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c > index 5f13f4d..99f65c0 100644 > --- a/block/blk-mq-cpumap.c > +++ b/block/blk-mq-cpumap.c > @@ -31,7 +31,8 @@ static int get_first_sibling(unsigned int cpu) > return cpu; > } > > -int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues) > +int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues, > + const struct cpumask *online_mask) > { > unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling; > cpumask_var_t cpus; > @@ -41,7 +42,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues) > > cpumask_clear(cpus); > nr_cpus = nr_uniq_cpus = 0; > - for_each_online_cpu(i) { > + for_each_cpu(i, online_mask) { > nr_cpus++; > first_sibling = get_first_sibling(i); > if (!cpumask_test_cpu(first_sibling, cpus)) > @@ -51,7 +52,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues) > > queue = 0; > for_each_possible_cpu(i) { > - if (!cpu_online(i)) { > + if (!cpumask_test_cpu(i, online_mask)) { > map[i] = 0; > continue; > } > @@ -95,7 +96,7 @@ unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set) > if (!map) > return NULL; > > - if (!blk_mq_update_queue_map(map, set->nr_hw_queues)) > + if (!blk_mq_update_queue_map(map, set->nr_hw_queues, cpu_online_mask)) > return map; > > kfree(map); > diff --git a/block/blk-mq.c b/block/blk-mq.c > index 6487710..64d93e4 100644 > --- a/block/blk-mq.c > +++ b/block/blk-mq.c > @@ -1771,7 +1771,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, > } > } > > -static void blk_mq_map_swqueue(struct request_queue *q) > +static void blk_mq_map_swqueue(struct request_queue *q, > + const struct cpumask *online_mask) > { > unsigned int i; > struct blk_mq_hw_ctx *hctx; > @@ -1788,7 +1789,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) > */ > queue_for_each_ctx(q, ctx, i) { > /* If the cpu isn't online, the cpu is mapped to first hctx */ > - if (!cpu_online(i)) > + if (!cpumask_test_cpu(i, online_mask)) > continue; > > hctx = q->mq_ops->map_queue(q, i); > @@ -2014,7 +2015,10 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, > > blk_mq_add_queue_tag_set(set, q); > > - blk_mq_map_swqueue(q); > + get_online_cpus(); > + blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, cpu_online_mask); > + blk_mq_map_swqueue(q, cpu_online_mask); > + put_online_cpus(); > > return q; > > @@ -2055,13 +2059,14 @@ void blk_mq_free_queue(struct request_queue *q) > } > > /* Basically redo blk_mq_init_queue with queue frozen */ > -static void blk_mq_queue_reinit(struct request_queue *q) > +static void blk_mq_queue_reinit(struct request_queue *q, > + const struct cpumask *online_mask) > { > WARN_ON_ONCE(!q->mq_freeze_depth); > > blk_mq_sysfs_unregister(q); > > - blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues); > + blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, online_mask); > > /* > * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe > @@ -2069,25 +2074,31 @@ static void blk_mq_queue_reinit(struct request_queue *q) > * involves free and re-allocate memory, worthy doing?) > */ > > - blk_mq_map_swqueue(q); > + blk_mq_map_swqueue(q, online_mask); > > blk_mq_sysfs_register(q); > } > > +static struct cpumask online_new; > + > static int blk_mq_queue_reinit_notify(struct notifier_block *nb, > unsigned long action, void *hcpu) > { > struct request_queue *q; > + int cpu = (unsigned long)hcpu; > > - /* > - * Before new mappings are established, hotadded cpu might already > - * start handling requests. This doesn't break anything as we map > - * offline CPUs to first hardware queue. We will re-init the queue > - * below to get optimal settings. > - */ > - if (action != CPU_DEAD && action != CPU_DEAD_FROZEN && > - action != CPU_ONLINE && action != CPU_ONLINE_FROZEN) > + switch (action & ~CPU_TASKS_FROZEN) { > + case CPU_DEAD: > + case CPU_UP_CANCELED: > + cpumask_copy(&online_new, cpu_online_mask); > + break; > + case CPU_UP_PREPARE: > + cpumask_copy(&online_new, cpu_online_mask); > + cpumask_set_cpu(cpu, &online_new); > + break; > + default: > return NOTIFY_OK; > + } > > mutex_lock(&all_q_mutex); > > @@ -2111,7 +2122,7 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, > } > > list_for_each_entry(q, &all_q_list, all_q_node) > - blk_mq_queue_reinit(q); > + blk_mq_queue_reinit(q, &online_new); > > list_for_each_entry(q, &all_q_list, all_q_node) > blk_mq_unfreeze_queue(q); > diff --git a/block/blk-mq.h b/block/blk-mq.h > index 6a48c4c..f4fea79 100644 > --- a/block/blk-mq.h > +++ b/block/blk-mq.h > @@ -51,7 +51,8 @@ void blk_mq_disable_hotplug(void); > * CPU -> queue mappings > */ > extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set); > -extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues); > +extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues, > + const struct cpumask *online_mask); > extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int); > > /* > -- > 1.9.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > Please read the FAQ at http://www.tux.org/lkml/ -- Ming Lei -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/