Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755877AbbFUNxx (ORCPT ); Sun, 21 Jun 2015 09:53:53 -0400 Received: from mail-pd0-f181.google.com ([209.85.192.181]:35421 "EHLO mail-pd0-f181.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753264AbbFUNxD (ORCPT ); Sun, 21 Jun 2015 09:53:03 -0400 From: Akinobu Mita To: linux-kernel@vger.kernel.org, Jens Axboe Cc: Akinobu Mita Subject: [PATCH 3/4] blk-mq: establish new mapping before cpu starts handling requests Date: Sun, 21 Jun 2015 22:52:30 +0900 Message-Id: <1434894751-6877-4-git-send-email-akinobu.mita@gmail.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1434894751-6877-1-git-send-email-akinobu.mita@gmail.com> References: <1434894751-6877-1-git-send-email-akinobu.mita@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5809 Lines: 181 ctx->index_hw is zero for the CPUs which have never been onlined since the block queue was initialized. If one of those CPUs is hotadded and starts handling request before new mappings are established, pending bitmap is not correctly marked for inserted requests as ctx->index_hw is still zero. So flush_busy_ctxs can't find the requests on the software queue. Fix it by establishing new mapping before hotadded cpu starts handling requests. Signed-off-by: Akinobu Mita Cc: Jens Axboe --- block/blk-mq-cpumap.c | 9 +++++---- block/blk-mq.c | 41 ++++++++++++++++++++++++++--------------- block/blk-mq.h | 3 ++- 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 5f13f4d..99f65c0 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -31,7 +31,8 @@ static int get_first_sibling(unsigned int cpu) return cpu; } -int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues) +int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues, + const struct cpumask *online_mask) { unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling; cpumask_var_t cpus; @@ -41,7 +42,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues) cpumask_clear(cpus); nr_cpus = nr_uniq_cpus = 0; - for_each_online_cpu(i) { + for_each_cpu(i, online_mask) { nr_cpus++; first_sibling = get_first_sibling(i); if (!cpumask_test_cpu(first_sibling, cpus)) @@ -51,7 +52,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues) queue = 0; for_each_possible_cpu(i) { - if (!cpu_online(i)) { + if (!cpumask_test_cpu(i, online_mask)) { map[i] = 0; continue; } @@ -95,7 +96,7 @@ unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set) if (!map) return NULL; - if (!blk_mq_update_queue_map(map, set->nr_hw_queues)) + if (!blk_mq_update_queue_map(map, set->nr_hw_queues, cpu_online_mask)) return map; kfree(map); diff --git a/block/blk-mq.c b/block/blk-mq.c index 6487710..64d93e4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1771,7 +1771,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, } } -static void blk_mq_map_swqueue(struct request_queue *q) +static void blk_mq_map_swqueue(struct request_queue *q, + const struct cpumask *online_mask) { unsigned int i; struct blk_mq_hw_ctx *hctx; @@ -1788,7 +1789,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) */ queue_for_each_ctx(q, ctx, i) { /* If the cpu isn't online, the cpu is mapped to first hctx */ - if (!cpu_online(i)) + if (!cpumask_test_cpu(i, online_mask)) continue; hctx = q->mq_ops->map_queue(q, i); @@ -2014,7 +2015,10 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, blk_mq_add_queue_tag_set(set, q); - blk_mq_map_swqueue(q); + get_online_cpus(); + blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, cpu_online_mask); + blk_mq_map_swqueue(q, cpu_online_mask); + put_online_cpus(); return q; @@ -2055,13 +2059,14 @@ void blk_mq_free_queue(struct request_queue *q) } /* Basically redo blk_mq_init_queue with queue frozen */ -static void blk_mq_queue_reinit(struct request_queue *q) +static void blk_mq_queue_reinit(struct request_queue *q, + const struct cpumask *online_mask) { WARN_ON_ONCE(!q->mq_freeze_depth); blk_mq_sysfs_unregister(q); - blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues); + blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, online_mask); /* * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe @@ -2069,25 +2074,31 @@ static void blk_mq_queue_reinit(struct request_queue *q) * involves free and re-allocate memory, worthy doing?) */ - blk_mq_map_swqueue(q); + blk_mq_map_swqueue(q, online_mask); blk_mq_sysfs_register(q); } +static struct cpumask online_new; + static int blk_mq_queue_reinit_notify(struct notifier_block *nb, unsigned long action, void *hcpu) { struct request_queue *q; + int cpu = (unsigned long)hcpu; - /* - * Before new mappings are established, hotadded cpu might already - * start handling requests. This doesn't break anything as we map - * offline CPUs to first hardware queue. We will re-init the queue - * below to get optimal settings. - */ - if (action != CPU_DEAD && action != CPU_DEAD_FROZEN && - action != CPU_ONLINE && action != CPU_ONLINE_FROZEN) + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DEAD: + case CPU_UP_CANCELED: + cpumask_copy(&online_new, cpu_online_mask); + break; + case CPU_UP_PREPARE: + cpumask_copy(&online_new, cpu_online_mask); + cpumask_set_cpu(cpu, &online_new); + break; + default: return NOTIFY_OK; + } mutex_lock(&all_q_mutex); @@ -2111,7 +2122,7 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, } list_for_each_entry(q, &all_q_list, all_q_node) - blk_mq_queue_reinit(q); + blk_mq_queue_reinit(q, &online_new); list_for_each_entry(q, &all_q_list, all_q_node) blk_mq_unfreeze_queue(q); diff --git a/block/blk-mq.h b/block/blk-mq.h index 6a48c4c..f4fea79 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -51,7 +51,8 @@ void blk_mq_disable_hotplug(void); * CPU -> queue mappings */ extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set); -extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues); +extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues, + const struct cpumask *online_mask); extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int); /* -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in Please read the FAQ at http://www.tux.org/lkml/