MIME-Version: 1.0
In-Reply-To: <1434894751-6877-4-git-send-email-akinobu.mita@gmail.com>
References: <1434894751-6877-1-git-send-email-akinobu.mita@gmail.com>
	<1434894751-6877-4-git-send-email-akinobu.mita@gmail.com>
Date: Wed, 24 Jun 2015 17:46:00 +0800
Message-ID: <CACVXFVO6cuZE3cq7PgveMN9oiRw0yRYns7sDg3KnkqmmRWNDSA@mail.gmail.com>
Subject: Re: [PATCH 3/4] blk-mq: establish new mapping before cpu starts
 handling requests
From: Ming Lei <tom.leiming@gmail.com>
To: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
        Jens Axboe <axboe@kernel.dk>
Content-Type: text/plain; charset=UTF-8
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 7389
Lines: 196

On Sun, Jun 21, 2015 at 9:52 PM, Akinobu Mita <akinobu.mita@gmail.com> wrote:
> ctx->index_hw is zero for the CPUs which have never been onlined since
> the block queue was initialized.  If one of those CPUs is hotadded and
> starts handling request before new mappings are established, pending

Could you explain a bit what the handling request is? The fact is that
blk_mq_queue_reinit() is run after all queues are put into freezing.

> bitmap is not correctly marked for inserted requests as ctx->index_hw
> is still zero.  So flush_busy_ctxs can't find the requests on the
> software queue.
>
> Fix it by establishing new mapping before hotadded cpu starts handling
> requests.
>
> Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
> Cc: Jens Axboe <axboe@kernel.dk>
> ---
>  block/blk-mq-cpumap.c |  9 +++++----
>  block/blk-mq.c        | 41 ++++++++++++++++++++++++++---------------
>  block/blk-mq.h        |  3 ++-
>  3 files changed, 33 insertions(+), 20 deletions(-)
>
> diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
> index 5f13f4d..99f65c0 100644
> --- a/block/blk-mq-cpumap.c
> +++ b/block/blk-mq-cpumap.c
> @@ -31,7 +31,8 @@ static int get_first_sibling(unsigned int cpu)
>         return cpu;
>  }
>
> -int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues)
> +int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
> +                           const struct cpumask *online_mask)
>  {
>         unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
>         cpumask_var_t cpus;
> @@ -41,7 +42,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues)
>
>         cpumask_clear(cpus);
>         nr_cpus = nr_uniq_cpus = 0;
> -       for_each_online_cpu(i) {
> +       for_each_cpu(i, online_mask) {
>                 nr_cpus++;
>                 first_sibling = get_first_sibling(i);
>                 if (!cpumask_test_cpu(first_sibling, cpus))
> @@ -51,7 +52,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues)
>
>         queue = 0;
>         for_each_possible_cpu(i) {
> -               if (!cpu_online(i)) {
> +               if (!cpumask_test_cpu(i, online_mask)) {
>                         map[i] = 0;
>                         continue;
>                 }
> @@ -95,7 +96,7 @@ unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set)
>         if (!map)
>                 return NULL;
>
> -       if (!blk_mq_update_queue_map(map, set->nr_hw_queues))
> +       if (!blk_mq_update_queue_map(map, set->nr_hw_queues, cpu_online_mask))
>                 return map;
>
>         kfree(map);
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 6487710..64d93e4 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -1771,7 +1771,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
>         }
>  }
>
> -static void blk_mq_map_swqueue(struct request_queue *q)
> +static void blk_mq_map_swqueue(struct request_queue *q,
> +                              const struct cpumask *online_mask)
>  {
>         unsigned int i;
>         struct blk_mq_hw_ctx *hctx;
> @@ -1788,7 +1789,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
>          */
>         queue_for_each_ctx(q, ctx, i) {
>                 /* If the cpu isn't online, the cpu is mapped to first hctx */
> -               if (!cpu_online(i))
> +               if (!cpumask_test_cpu(i, online_mask))
>                         continue;
>
>                 hctx = q->mq_ops->map_queue(q, i);
> @@ -2014,7 +2015,10 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
>
>         blk_mq_add_queue_tag_set(set, q);
>
> -       blk_mq_map_swqueue(q);
> +       get_online_cpus();
> +       blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, cpu_online_mask);
> +       blk_mq_map_swqueue(q, cpu_online_mask);
> +       put_online_cpus();
>
>         return q;
>
> @@ -2055,13 +2059,14 @@ void blk_mq_free_queue(struct request_queue *q)
>  }
>
>  /* Basically redo blk_mq_init_queue with queue frozen */
> -static void blk_mq_queue_reinit(struct request_queue *q)
> +static void blk_mq_queue_reinit(struct request_queue *q,
> +                               const struct cpumask *online_mask)
>  {
>         WARN_ON_ONCE(!q->mq_freeze_depth);
>
>         blk_mq_sysfs_unregister(q);
>
> -       blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues);
> +       blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, online_mask);
>
>         /*
>          * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe
> @@ -2069,25 +2074,31 @@ static void blk_mq_queue_reinit(struct request_queue *q)
>          * involves free and re-allocate memory, worthy doing?)
>          */
>
> -       blk_mq_map_swqueue(q);
> +       blk_mq_map_swqueue(q, online_mask);
>
>         blk_mq_sysfs_register(q);
>  }
>
> +static struct cpumask online_new;
> +
>  static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
>                                       unsigned long action, void *hcpu)
>  {
>         struct request_queue *q;
> +       int cpu = (unsigned long)hcpu;
>
> -       /*
> -        * Before new mappings are established, hotadded cpu might already
> -        * start handling requests. This doesn't break anything as we map
> -        * offline CPUs to first hardware queue. We will re-init the queue
> -        * below to get optimal settings.
> -        */
> -       if (action != CPU_DEAD && action != CPU_DEAD_FROZEN &&
> -           action != CPU_ONLINE && action != CPU_ONLINE_FROZEN)
> +       switch (action & ~CPU_TASKS_FROZEN) {
> +       case CPU_DEAD:
> +       case CPU_UP_CANCELED:
> +               cpumask_copy(&online_new, cpu_online_mask);
> +               break;
> +       case CPU_UP_PREPARE:
> +               cpumask_copy(&online_new, cpu_online_mask);
> +               cpumask_set_cpu(cpu, &online_new);
> +               break;
> +       default:
>                 return NOTIFY_OK;
> +       }
>
>         mutex_lock(&all_q_mutex);
>
> @@ -2111,7 +2122,7 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
>         }
>
>         list_for_each_entry(q, &all_q_list, all_q_node)
> -               blk_mq_queue_reinit(q);
> +               blk_mq_queue_reinit(q, &online_new);
>
>         list_for_each_entry(q, &all_q_list, all_q_node)
>                 blk_mq_unfreeze_queue(q);
> diff --git a/block/blk-mq.h b/block/blk-mq.h
> index 6a48c4c..f4fea79 100644
> --- a/block/blk-mq.h
> +++ b/block/blk-mq.h
> @@ -51,7 +51,8 @@ void blk_mq_disable_hotplug(void);
>   * CPU -> queue mappings
>   */
>  extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set);
> -extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues);
> +extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
> +                                  const struct cpumask *online_mask);
>  extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
>
>  /*
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> Please read the FAQ at  http://www.tux.org/lkml/


-- 
Ming Lei
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/