Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933823AbbGHLsa (ORCPT ); Wed, 8 Jul 2015 07:48:30 -0400 Received: from mail-ig0-f181.google.com ([209.85.213.181]:37298 "EHLO mail-ig0-f181.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S933000AbbGHLs1 (ORCPT ); Wed, 8 Jul 2015 07:48:27 -0400 MIME-Version: 1.0 In-Reply-To: <1435847397-724-2-git-send-email-akinobu.mita@gmail.com> References: <1435847397-724-1-git-send-email-akinobu.mita@gmail.com> <1435847397-724-2-git-send-email-akinobu.mita@gmail.com> Date: Wed, 8 Jul 2015 19:48:26 +0800 Message-ID: Subject: Re: [PATCH v2 1/6] blk-mq: fix sysfs registration/unregistration race From: Ming Lei To: Akinobu Mita Cc: Linux Kernel Mailing List , Jens Axboe Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7047 Lines: 230 On Thu, Jul 2, 2015 at 10:29 PM, Akinobu Mita wrote: > There is a race between cpu hotplug handling and adding/deleting > gendisk for blk-mq, where both are trying to register and unregister > the same sysfs entries. > > CPU hotplug handling for blk-mq (blk_mq_queue_reinit) does unregister > and register sysfs entries of hctx for all request queues in > all_q_list. On the other hand, these entries for a request queue may > have already been unregistered or in the middle of registration when > CPU hotplug event occurs. Because those sysfs entries are registered > by blk_mq_register_disk() after the request queue is added to > all_q_list, and similarily the request queue is deleted from > all_q_list after those are unregistered by blk_mq_unregister_disk(). > > So we need proper mutual exclusion for those registration and > unregistration. But that may not be enough, and you miss another point in which blk_mq_sysfs_register() has to be called after the remapping is built, so the introduced lock should have been held in blk_mq_queue_reinit(). > > Signed-off-by: Akinobu Mita > Cc: Jens Axboe > Cc: Ming Lei > --- > block/blk-core.c | 1 + > block/blk-mq-sysfs.c | 44 ++++++++++++++++++++++++++++++++++---------- > include/linux/blkdev.h | 3 +++ > 3 files changed, 38 insertions(+), 10 deletions(-) > > diff --git a/block/blk-core.c b/block/blk-core.c > index 82819e6..bbf67cd 100644 > --- a/block/blk-core.c > +++ b/block/blk-core.c > @@ -687,6 +687,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) > __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); > > init_waitqueue_head(&q->mq_freeze_wq); > + mutex_init(&q->mq_sysfs_lock); The above should be put into blk_mq_init_allocated_queue(). > > if (blkcg_init_queue(q)) > goto fail_bdi; > diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c > index b79685e..79a3e8d 100644 > --- a/block/blk-mq-sysfs.c > +++ b/block/blk-mq-sysfs.c > @@ -332,13 +332,15 @@ static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx) > struct blk_mq_ctx *ctx; > int i; > > - if (!hctx->nr_ctx || !(hctx->flags & BLK_MQ_F_SYSFS_UP)) > + if (!(hctx->flags & BLK_MQ_F_SYSFS_UP)) > return; It isn't needed to introduce above change. > > hctx_for_each_ctx(hctx, ctx, i) > kobject_del(&ctx->kobj); > > kobject_del(&hctx->kobj); > + > + hctx->flags &= ~BLK_MQ_F_SYSFS_UP; I guess you misunderstand the flag, which just means the syfs stuff for the hcts has been setup, and you needn't clear it. > } > > static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) > @@ -347,7 +349,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) > struct blk_mq_ctx *ctx; > int i, ret; > > - if (!hctx->nr_ctx || !(hctx->flags & BLK_MQ_F_SYSFS_UP)) > + if (!hctx->nr_ctx) > return 0; > > ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", hctx->queue_num); > @@ -357,10 +359,12 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) > hctx_for_each_ctx(hctx, ctx, i) { > ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu); > if (ret) > - break; > + return ret; > } > > - return ret; > + hctx->flags |= BLK_MQ_F_SYSFS_UP; Same with above. > + > + return 0; > } > > void blk_mq_unregister_disk(struct gendisk *disk) > @@ -370,6 +374,8 @@ void blk_mq_unregister_disk(struct gendisk *disk) > struct blk_mq_ctx *ctx; > int i, j; > > + mutex_lock(&q->mq_sysfs_lock); > + > queue_for_each_hw_ctx(q, hctx, i) { > blk_mq_unregister_hctx(hctx); > > @@ -384,6 +390,9 @@ void blk_mq_unregister_disk(struct gendisk *disk) > kobject_put(&q->mq_kobj); > > kobject_put(&disk_to_dev(disk)->kobj); > + > + q->mq_sysfs_init_done = false; The flag isn't needed, but you should hold q->mq_sysfs_lock inside blk_mq_queue_reinit(). > + mutex_unlock(&q->mq_sysfs_lock); > } > > static void blk_mq_sysfs_init(struct request_queue *q) > @@ -414,27 +423,30 @@ int blk_mq_register_disk(struct gendisk *disk) > struct blk_mq_hw_ctx *hctx; > int ret, i; > > + mutex_lock(&q->mq_sysfs_lock); > + > blk_mq_sysfs_init(q); > > ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); > if (ret < 0) > - return ret; > + goto out; > > kobject_uevent(&q->mq_kobj, KOBJ_ADD); > > queue_for_each_hw_ctx(q, hctx, i) { > - hctx->flags |= BLK_MQ_F_SYSFS_UP; Same with above. > ret = blk_mq_register_hctx(hctx); > if (ret) > break; > } > > - if (ret) { > + if (ret) > blk_mq_unregister_disk(disk); > - return ret; > - } > + else > + q->mq_sysfs_init_done = true; > +out: > + mutex_unlock(&q->mq_sysfs_lock); > > - return 0; > + return ret; > } > EXPORT_SYMBOL_GPL(blk_mq_register_disk); > > @@ -443,8 +455,14 @@ void blk_mq_sysfs_unregister(struct request_queue *q) > struct blk_mq_hw_ctx *hctx; > int i; > > + mutex_lock(&q->mq_sysfs_lock); > + if (!q->mq_sysfs_init_done) > + goto out; > + > queue_for_each_hw_ctx(q, hctx, i) > blk_mq_unregister_hctx(hctx); > +out: > + mutex_unlock(&q->mq_sysfs_lock); > } > > int blk_mq_sysfs_register(struct request_queue *q) > @@ -452,11 +470,17 @@ int blk_mq_sysfs_register(struct request_queue *q) > struct blk_mq_hw_ctx *hctx; > int i, ret = 0; > > + mutex_lock(&q->mq_sysfs_lock); > + if (!q->mq_sysfs_init_done) > + goto out; The above check needn't. > + > queue_for_each_hw_ctx(q, hctx, i) { > ret = blk_mq_register_hctx(hctx); > if (ret) > break; > } > +out: > + mutex_unlock(&q->mq_sysfs_lock); > > return ret; > } > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index d4068c1..c56f5a6 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -462,6 +462,9 @@ struct request_queue { > > struct blk_mq_tag_set *tag_set; > struct list_head tag_set_list; > + > + struct mutex mq_sysfs_lock; > + bool mq_sysfs_init_done; Same with above. > }; > > #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ > -- > 1.9.1 > -- Ming Lei -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/