commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
introduce leak forbfq_group and blkcg_gq objects because of get/put
imbalance. See trace balow:
-> blkg_alloc
-> bfq_pq_alloc
-> bfqg_get (+1)
->bfq_activate_bfqq
->bfq_activate_requeue_entity
-> __bfq_activate_entity
->bfq_get_entity
->bfqg_and_blkg_get (+1) <==== : Note1
->bfq_del_bfqq_busy
->bfq_deactivate_entity+0x53/0xc0 [bfq]
->__bfq_deactivate_entity+0x1b8/0x210 [bfq]
-> bfq_forget_entity(is_in_service = true)
entity->on_st_or_in_serv = false <=== :Note2
if (is_in_service)
return; ==> do not touch reference
-> blkcg_css_offline
-> blkcg_destroy_blkgs
-> blkg_destroy
-> bfq_pd_offline
-> __bfq_deactivate_entity
if (!entity->on_st_or_in_serv) /* true, because (Note2)
return false;
-> bfq_pd_free
-> bfqg_put() (-1, byt bfqg->ref == 2) because of (Note2)
So bfq_group and blkcg_gq will leak forever, see test-case below.
If fact bfq_group objects reference counting are quite different
from bfq_queue. bfq_groups object are referenced by blkcg_gq via
blkg_policy_data pointer, so neither nor blkg_get() neither bfqg_get
required here.
This patch drop commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
and add corresponding comment.
##TESTCASE_BEGIN:
#!/bin/bash
max_iters=${1:-100}
#prep cgroup mounts
mount -t tmpfs cgroup_root /sys/fs/cgroup
mkdir /sys/fs/cgroup/blkio
mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
# Prepare blkdev
grep blkio /proc/cgroups
truncate -s 1M img
losetup /dev/loop0 img
echo bfq > /sys/block/loop0/queue/scheduler
grep blkio /proc/cgroups
for ((i=0;i<max_iters;i++))
do
mkdir -p /sys/fs/cgroup/blkio/a
echo 0 > /sys/fs/cgroup/blkio/a/cgroup.procs
dd if=/dev/loop0 bs=4k count=1 of=/dev/null iflag=direct 2> /dev/null
echo 0 > /sys/fs/cgroup/blkio/cgroup.procs
rmdir /sys/fs/cgroup/blkio/a
grep blkio /proc/cgroups
done
##TESTCASE_END:
Signed-off-by: Dmitry Monakhov <[email protected]>
---
block/bfq-cgroup.c | 2 +-
block/bfq-iosched.h | 1 -
block/bfq-wf2q.c | 15 +++++----------
3 files changed, 6 insertions(+), 12 deletions(-)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 68882b9..b791e20 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -332,7 +332,7 @@ static void bfqg_put(struct bfq_group *bfqg)
kfree(bfqg);
}
-void bfqg_and_blkg_get(struct bfq_group *bfqg)
+static void bfqg_and_blkg_get(struct bfq_group *bfqg)
{
/* see comments in bfq_bic_update_cgroup for why refcounting bfqg */
bfqg_get(bfqg);
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index cd224aa..7038952 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -986,7 +986,6 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
-void bfqg_and_blkg_get(struct bfq_group *bfqg);
void bfqg_and_blkg_put(struct bfq_group *bfqg);
#ifdef CONFIG_BFQ_GROUP_IOSCHED
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 34ad095..6a363bb 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -529,13 +529,14 @@ static void bfq_get_entity(struct bfq_entity *entity)
{
struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
+ /* Grab reference only for bfq_queue's objects, bfq_group ones
+ * are owned by blkcg_gq
+ */
if (bfqq) {
bfqq->ref++;
bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
bfqq, bfqq->ref);
- } else
- bfqg_and_blkg_get(container_of(entity, struct bfq_group,
- entity));
+ }
}
/**
@@ -649,14 +650,8 @@ static void bfq_forget_entity(struct bfq_service_tree *st,
entity->on_st_or_in_serv = false;
st->wsum -= entity->weight;
- if (is_in_service)
- return;
-
- if (bfqq)
+ if (bfqq && !is_in_service)
bfq_put_queue(bfqq);
- else
- bfqg_and_blkg_put(container_of(entity, struct bfq_group,
- entity));
}
/**
--
2.7.4
Dmitry Monakhov <[email protected]> writes:
Ping. Do you have any objections against this patch?
> commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
> introduce leak forbfq_group and blkcg_gq objects because of get/put
> imbalance. See trace balow:
> -> blkg_alloc
> -> bfq_pq_alloc
> -> bfqg_get (+1)
> ->bfq_activate_bfqq
> ->bfq_activate_requeue_entity
> -> __bfq_activate_entity
> ->bfq_get_entity
> ->bfqg_and_blkg_get (+1) <==== : Note1
> ->bfq_del_bfqq_busy
> ->bfq_deactivate_entity+0x53/0xc0 [bfq]
> ->__bfq_deactivate_entity+0x1b8/0x210 [bfq]
> -> bfq_forget_entity(is_in_service = true)
> entity->on_st_or_in_serv = false <=== :Note2
> if (is_in_service)
> return; ==> do not touch reference
> -> blkcg_css_offline
> -> blkcg_destroy_blkgs
> -> blkg_destroy
> -> bfq_pd_offline
> -> __bfq_deactivate_entity
> if (!entity->on_st_or_in_serv) /* true, because (Note2)
> return false;
> -> bfq_pd_free
> -> bfqg_put() (-1, byt bfqg->ref == 2) because of (Note2)
> So bfq_group and blkcg_gq will leak forever, see test-case below.
> If fact bfq_group objects reference counting are quite different
> from bfq_queue. bfq_groups object are referenced by blkcg_gq via
> blkg_policy_data pointer, so neither nor blkg_get() neither bfqg_get
> required here.
>
>
> This patch drop commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
> and add corresponding comment.
>
> ##TESTCASE_BEGIN:
> #!/bin/bash
>
> max_iters=${1:-100}
> #prep cgroup mounts
> mount -t tmpfs cgroup_root /sys/fs/cgroup
> mkdir /sys/fs/cgroup/blkio
> mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
>
> # Prepare blkdev
> grep blkio /proc/cgroups
> truncate -s 1M img
> losetup /dev/loop0 img
> echo bfq > /sys/block/loop0/queue/scheduler
>
> grep blkio /proc/cgroups
> for ((i=0;i<max_iters;i++))
> do
> mkdir -p /sys/fs/cgroup/blkio/a
> echo 0 > /sys/fs/cgroup/blkio/a/cgroup.procs
> dd if=/dev/loop0 bs=4k count=1 of=/dev/null iflag=direct 2> /dev/null
> echo 0 > /sys/fs/cgroup/blkio/cgroup.procs
> rmdir /sys/fs/cgroup/blkio/a
> grep blkio /proc/cgroups
> done
> ##TESTCASE_END:
>
> Signed-off-by: Dmitry Monakhov <[email protected]>
> ---
> block/bfq-cgroup.c | 2 +-
> block/bfq-iosched.h | 1 -
> block/bfq-wf2q.c | 15 +++++----------
> 3 files changed, 6 insertions(+), 12 deletions(-)
>
> diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
> index 68882b9..b791e20 100644
> --- a/block/bfq-cgroup.c
> +++ b/block/bfq-cgroup.c
> @@ -332,7 +332,7 @@ static void bfqg_put(struct bfq_group *bfqg)
> kfree(bfqg);
> }
>
> -void bfqg_and_blkg_get(struct bfq_group *bfqg)
> +static void bfqg_and_blkg_get(struct bfq_group *bfqg)
> {
> /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */
> bfqg_get(bfqg);
> diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
> index cd224aa..7038952 100644
> --- a/block/bfq-iosched.h
> +++ b/block/bfq-iosched.h
> @@ -986,7 +986,6 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
> struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
> struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
> struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
> -void bfqg_and_blkg_get(struct bfq_group *bfqg);
> void bfqg_and_blkg_put(struct bfq_group *bfqg);
>
> #ifdef CONFIG_BFQ_GROUP_IOSCHED
> diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
> index 34ad095..6a363bb 100644
> --- a/block/bfq-wf2q.c
> +++ b/block/bfq-wf2q.c
> @@ -529,13 +529,14 @@ static void bfq_get_entity(struct bfq_entity *entity)
> {
> struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
>
> + /* Grab reference only for bfq_queue's objects, bfq_group ones
> + * are owned by blkcg_gq
> + */
> if (bfqq) {
> bfqq->ref++;
> bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
> bfqq, bfqq->ref);
> - } else
> - bfqg_and_blkg_get(container_of(entity, struct bfq_group,
> - entity));
> + }
> }
>
> /**
> @@ -649,14 +650,8 @@ static void bfq_forget_entity(struct bfq_service_tree *st,
>
> entity->on_st_or_in_serv = false;
> st->wsum -= entity->weight;
> - if (is_in_service)
> - return;
> -
> - if (bfqq)
> + if (bfqq && !is_in_service)
> bfq_put_queue(bfqq);
> - else
> - bfqg_and_blkg_put(container_of(entity, struct bfq_group,
> - entity));
> }
>
> /**
> --
> 2.7.4
Hi,
sorry for the delay. The commit you propose to drop fix the issues
reported in [1].
Such a commit does introduce the leak that you report (thank you for
spotting it). Yet, according to the threads mentioned in [1],
dropping that commit would take us back to those issues.
Maybe the solution is to fix the unbalance that you spotted?
I'll check it ASAP, unless you do it before me.
Thanks,
Paolo
[1] https://lkml.org/lkml/2020/1/31/94
> Il giorno 2 lug 2020, alle ore 12:57, Dmitry Monakhov <[email protected]> ha scritto:
>
> commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
> introduce leak forbfq_group and blkcg_gq objects because of get/put
> imbalance. See trace balow:
> -> blkg_alloc
> -> bfq_pq_alloc
> -> bfqg_get (+1)
> ->bfq_activate_bfqq
> ->bfq_activate_requeue_entity
> -> __bfq_activate_entity
> ->bfq_get_entity
> ->bfqg_and_blkg_get (+1) <==== : Note1
> ->bfq_del_bfqq_busy
> ->bfq_deactivate_entity+0x53/0xc0 [bfq]
> ->__bfq_deactivate_entity+0x1b8/0x210 [bfq]
> -> bfq_forget_entity(is_in_service = true)
> entity->on_st_or_in_serv = false <=== :Note2
> if (is_in_service)
> return; ==> do not touch reference
> -> blkcg_css_offline
> -> blkcg_destroy_blkgs
> -> blkg_destroy
> -> bfq_pd_offline
> -> __bfq_deactivate_entity
> if (!entity->on_st_or_in_serv) /* true, because (Note2)
> return false;
> -> bfq_pd_free
> -> bfqg_put() (-1, byt bfqg->ref == 2) because of (Note2)
> So bfq_group and blkcg_gq will leak forever, see test-case below.
> If fact bfq_group objects reference counting are quite different
> from bfq_queue. bfq_groups object are referenced by blkcg_gq via
> blkg_policy_data pointer, so neither nor blkg_get() neither bfqg_get
> required here.
>
>
> This patch drop commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
> and add corresponding comment.
>
> ##TESTCASE_BEGIN:
> #!/bin/bash
>
> max_iters=${1:-100}
> #prep cgroup mounts
> mount -t tmpfs cgroup_root /sys/fs/cgroup
> mkdir /sys/fs/cgroup/blkio
> mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
>
> # Prepare blkdev
> grep blkio /proc/cgroups
> truncate -s 1M img
> losetup /dev/loop0 img
> echo bfq > /sys/block/loop0/queue/scheduler
>
> grep blkio /proc/cgroups
> for ((i=0;i<max_iters;i++))
> do
> mkdir -p /sys/fs/cgroup/blkio/a
> echo 0 > /sys/fs/cgroup/blkio/a/cgroup.procs
> dd if=/dev/loop0 bs=4k count=1 of=/dev/null iflag=direct 2> /dev/null
> echo 0 > /sys/fs/cgroup/blkio/cgroup.procs
> rmdir /sys/fs/cgroup/blkio/a
> grep blkio /proc/cgroups
> done
> ##TESTCASE_END:
>
> Signed-off-by: Dmitry Monakhov <[email protected]>
> ---
> block/bfq-cgroup.c | 2 +-
> block/bfq-iosched.h | 1 -
> block/bfq-wf2q.c | 15 +++++----------
> 3 files changed, 6 insertions(+), 12 deletions(-)
>
> diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
> index 68882b9..b791e20 100644
> --- a/block/bfq-cgroup.c
> +++ b/block/bfq-cgroup.c
> @@ -332,7 +332,7 @@ static void bfqg_put(struct bfq_group *bfqg)
> kfree(bfqg);
> }
>
> -void bfqg_and_blkg_get(struct bfq_group *bfqg)
> +static void bfqg_and_blkg_get(struct bfq_group *bfqg)
> {
> /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */
> bfqg_get(bfqg);
> diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
> index cd224aa..7038952 100644
> --- a/block/bfq-iosched.h
> +++ b/block/bfq-iosched.h
> @@ -986,7 +986,6 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
> struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
> struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
> struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
> -void bfqg_and_blkg_get(struct bfq_group *bfqg);
> void bfqg_and_blkg_put(struct bfq_group *bfqg);
>
> #ifdef CONFIG_BFQ_GROUP_IOSCHED
> diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
> index 34ad095..6a363bb 100644
> --- a/block/bfq-wf2q.c
> +++ b/block/bfq-wf2q.c
> @@ -529,13 +529,14 @@ static void bfq_get_entity(struct bfq_entity *entity)
> {
> struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
>
> + /* Grab reference only for bfq_queue's objects, bfq_group ones
> + * are owned by blkcg_gq
> + */
> if (bfqq) {
> bfqq->ref++;
> bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
> bfqq, bfqq->ref);
> - } else
> - bfqg_and_blkg_get(container_of(entity, struct bfq_group,
> - entity));
> + }
> }
>
> /**
> @@ -649,14 +650,8 @@ static void bfq_forget_entity(struct bfq_service_tree *st,
>
> entity->on_st_or_in_serv = false;
> st->wsum -= entity->weight;
> - if (is_in_service)
> - return;
> -
> - if (bfqq)
> + if (bfqq && !is_in_service)
> bfq_put_queue(bfqq);
> - else
> - bfqg_and_blkg_put(container_of(entity, struct bfq_group,
> - entity));
> }
>
> /**
> --
> 2.7.4
>
Paolo Valente <[email protected]> writes:
> Hi,
> sorry for the delay. The commit you propose to drop fix the issues
> reported in [1].
>
> Such a commit does introduce the leak that you report (thank you for
> spotting it). Yet, according to the threads mentioned in [1],
> dropping that commit would take us back to those issues.
>
> Maybe the solution is to fix the unbalance that you spotted?
I'm not quite shure that do I understand which bug was addressed for commit db37a34c563b.
AFAIU both bugs mentioned in original patchset was fixed by:
478de3380 ("block, bfq: deschedule empty bfq_queues not referred by any proces")
f718b0932 ( block, bfq: do not plug I/O for bfq_queues with no proc refs)"
So I review commit db37a34c563b as independent one.
It introduces extra reference for bfq_groups via bfqg_and_blkg_get(),
but do we actually need it here?
#IF CONFIG_BFQ_GROUP_IOSCHED is enabled:
bfqd->root_group is holded by bfqd from bfq_init_queue()
other bfq_queue objects are owned by corresponding blkcg from bfq_pd_alloc()
So bfq_queue can not disappear under us.
#IF CONFIG_BFQ_GROUP_IOSCHED is disabled:
we have only one bfqd->root_group object which allocated from bfq_create_group_hierarch()
and bfqg_and_blkg_get() bfqg_and_blkg_put() are noop
Resume: in both cases extra reference is not required, so I continue to
insist that we should revert commit db37a34c563b because it tries to
solve a non existing issue, but introduce the real one.
Please correct me if I'm wrong.
>
> I'll check it ASAP, unless you do it before me.
>
> Thanks,
> Paolo
>
> [1] https://lkml.org/lkml/2020/1/31/94
>
>> Il giorno 2 lug 2020, alle ore 12:57, Dmitry Monakhov <[email protected]> ha scritto:
>>
>> commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
>> introduce leak forbfq_group and blkcg_gq objects because of get/put
>> imbalance. See trace balow:
>> -> blkg_alloc
>> -> bfq_pq_alloc
>> -> bfqg_get (+1)
>> ->bfq_activate_bfqq
>> ->bfq_activate_requeue_entity
>> -> __bfq_activate_entity
>> ->bfq_get_entity
->> ->bfqg_and_blkg_get (+1) <==== : Note1
>> ->bfq_del_bfqq_busy
>> ->bfq_deactivate_entity+0x53/0xc0 [bfq]
>> ->__bfq_deactivate_entity+0x1b8/0x210 [bfq]
>> -> bfq_forget_entity(is_in_service = true)
>> entity->on_st_or_in_serv = false <=== :Note2
>> if (is_in_service)
>> return; ==> do not touch reference
>> -> blkcg_css_offline
>> -> blkcg_destroy_blkgs
>> -> blkg_destroy
>> -> bfq_pd_offline
>> -> __bfq_deactivate_entity
>> if (!entity->on_st_or_in_serv) /* true, because (Note2)
>> return false;
>> -> bfq_pd_free
>> -> bfqg_put() (-1, byt bfqg->ref == 2) because of (Note2)
>> So bfq_group and blkcg_gq will leak forever, see test-case below.
>> If fact bfq_group objects reference counting are quite different
>> from bfq_queue. bfq_groups object are referenced by blkcg_gq via
>> blkg_policy_data pointer, so neither nor blkg_get() neither bfqg_get
>> required here.
>>
>>
>> This patch drop commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
>> and add corresponding comment.
>>
>> ##TESTCASE_BEGIN:
>> #!/bin/bash
>>
>> max_iters=${1:-100}
>> #prep cgroup mounts
>> mount -t tmpfs cgroup_root /sys/fs/cgroup
>> mkdir /sys/fs/cgroup/blkio
>> mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
>>
>> # Prepare blkdev
>> grep blkio /proc/cgroups
>> truncate -s 1M img
>> losetup /dev/loop0 img
>> echo bfq > /sys/block/loop0/queue/scheduler
>>
>> grep blkio /proc/cgroups
>> for ((i=0;i<max_iters;i++))
>> do
>> mkdir -p /sys/fs/cgroup/blkio/a
>> echo 0 > /sys/fs/cgroup/blkio/a/cgroup.procs
>> dd if=/dev/loop0 bs=4k count=1 of=/dev/null iflag=direct 2> /dev/null
>> echo 0 > /sys/fs/cgroup/blkio/cgroup.procs
>> rmdir /sys/fs/cgroup/blkio/a
>> grep blkio /proc/cgroups
>> done
>> ##TESTCASE_END:
>>
>> Signed-off-by: Dmitry Monakhov <[email protected]>
>> ---
>> block/bfq-cgroup.c | 2 +-
>> block/bfq-iosched.h | 1 -
>> block/bfq-wf2q.c | 15 +++++----------
>> 3 files changed, 6 insertions(+), 12 deletions(-)
>>
>> diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
>> index 68882b9..b791e20 100644
>> --- a/block/bfq-cgroup.c
>> +++ b/block/bfq-cgroup.c
>> @@ -332,7 +332,7 @@ static void bfqg_put(struct bfq_group *bfqg)
>> kfree(bfqg);
>> }
>>
>> -void bfqg_and_blkg_get(struct bfq_group *bfqg)
>> +static void bfqg_and_blkg_get(struct bfq_group *bfqg)
>> {
>> /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */
>> bfqg_get(bfqg);
>> diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
>> index cd224aa..7038952 100644
>> --- a/block/bfq-iosched.h
>> +++ b/block/bfq-iosched.h
>> @@ -986,7 +986,6 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
>> struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
>> struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
>> struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
>> -void bfqg_and_blkg_get(struct bfq_group *bfqg);
>> void bfqg_and_blkg_put(struct bfq_group *bfqg);
>>
>> #ifdef CONFIG_BFQ_GROUP_IOSCHED
>> diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
>> index 34ad095..6a363bb 100644
>> --- a/block/bfq-wf2q.c
>> +++ b/block/bfq-wf2q.c
>> @@ -529,13 +529,14 @@ static void bfq_get_entity(struct bfq_entity *entity)
>> {
>> struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
>>
>> + /* Grab reference only for bfq_queue's objects, bfq_group ones
>> + * are owned by blkcg_gq
>> + */
>> if (bfqq) {
>> bfqq->ref++;
>> bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
>> bfqq, bfqq->ref);
>> - } else
>> - bfqg_and_blkg_get(container_of(entity, struct bfq_group,
>> - entity));
>> + }
>> }
>>
>> /**
>> @@ -649,14 +650,8 @@ static void bfq_forget_entity(struct bfq_service_tree *st,
>>
>> entity->on_st_or_in_serv = false;
>> st->wsum -= entity->weight;
>> - if (is_in_service)
>> - return;
>> -
>> - if (bfqq)
>> + if (bfqq && !is_in_service)
>> bfq_put_queue(bfqq);
>> - else
>> - bfqg_and_blkg_put(container_of(entity, struct bfq_group,
>> - entity));
>> }
>>
>> /**
>> --
>> 2.7.4
>>
> Il giorno 8 lug 2020, alle ore 19:48, Dmitry Monakhov <[email protected]> ha scritto:
>
> Paolo Valente <[email protected]> writes:
>
>> Hi,
>> sorry for the delay. The commit you propose to drop fix the issues
>> reported in [1].
>>
>> Such a commit does introduce the leak that you report (thank you for
>> spotting it). Yet, according to the threads mentioned in [1],
>> dropping that commit would take us back to those issues.
>>
>> Maybe the solution is to fix the unbalance that you spotted?
> I'm not quite shure that do I understand which bug was addressed for commit db37a34c563b.
> AFAIU both bugs mentioned in original patchset was fixed by:
> 478de3380 ("block, bfq: deschedule empty bfq_queues not referred by any proces")
> f718b0932 ( block, bfq: do not plug I/O for bfq_queues with no proc refs)"
>
> So I review commit db37a34c563b as independent one.
> It introduces extra reference for bfq_groups via bfqg_and_blkg_get(),
> but do we actually need it here?
>
> #IF CONFIG_BFQ_GROUP_IOSCHED is enabled:
> bfqd->root_group is holded by bfqd from bfq_init_queue()
> other bfq_queue objects are owned by corresponding blkcg from bfq_pd_alloc()
> So bfq_queue can not disappear under us.
>
You are right, but incomplete. No extra ref is needed for an entity
that represents a bfq_queue. And this consideration mistook me before
I realized that that commit was needed. The problem is that an entity
may also represent a group of entities. In that case no reference is
taken through any bfq_queue. The commit you want to remove takes this
missing reference.
Paolo
> #IF CONFIG_BFQ_GROUP_IOSCHED is disabled:
> we have only one bfqd->root_group object which allocated from bfq_create_group_hierarch()
> and bfqg_and_blkg_get() bfqg_and_blkg_put() are noop
>
> Resume: in both cases extra reference is not required, so I continue to
> insist that we should revert commit db37a34c563b because it tries to
> solve a non existing issue, but introduce the real one.
>
> Please correct me if I'm wrong.
>>
>> I'll check it ASAP, unless you do it before me.
>>
>> Thanks,
>> Paolo
>>
>> [1] https://lkml.org/lkml/2020/1/31/94
>>
>>> Il giorno 2 lug 2020, alle ore 12:57, Dmitry Monakhov <[email protected]> ha scritto:
>>>
>>> commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
>>> introduce leak forbfq_group and blkcg_gq objects because of get/put
>>> imbalance. See trace balow:
>>> -> blkg_alloc
>>> -> bfq_pq_alloc
>>> -> bfqg_get (+1)
>>> ->bfq_activate_bfqq
>>> ->bfq_activate_requeue_entity
>>> -> __bfq_activate_entity
>>> ->bfq_get_entity
> ->> ->bfqg_and_blkg_get (+1) <==== : Note1
>>> ->bfq_del_bfqq_busy
>>> ->bfq_deactivate_entity+0x53/0xc0 [bfq]
>>> ->__bfq_deactivate_entity+0x1b8/0x210 [bfq]
>>> -> bfq_forget_entity(is_in_service = true)
>>> entity->on_st_or_in_serv = false <=== :Note2
>>> if (is_in_service)
>>> return; ==> do not touch reference
>>> -> blkcg_css_offline
>>> -> blkcg_destroy_blkgs
>>> -> blkg_destroy
>>> -> bfq_pd_offline
>>> -> __bfq_deactivate_entity
>>> if (!entity->on_st_or_in_serv) /* true, because (Note2)
>>> return false;
>>> -> bfq_pd_free
>>> -> bfqg_put() (-1, byt bfqg->ref == 2) because of (Note2)
>>> So bfq_group and blkcg_gq will leak forever, see test-case below.
>>> If fact bfq_group objects reference counting are quite different
>>> from bfq_queue. bfq_groups object are referenced by blkcg_gq via
>>> blkg_policy_data pointer, so neither nor blkg_get() neither bfqg_get
>>> required here.
>>>
>>>
>>> This patch drop commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
>>> and add corresponding comment.
>>>
>>> ##TESTCASE_BEGIN:
>>> #!/bin/bash
>>>
>>> max_iters=${1:-100}
>>> #prep cgroup mounts
>>> mount -t tmpfs cgroup_root /sys/fs/cgroup
>>> mkdir /sys/fs/cgroup/blkio
>>> mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
>>>
>>> # Prepare blkdev
>>> grep blkio /proc/cgroups
>>> truncate -s 1M img
>>> losetup /dev/loop0 img
>>> echo bfq > /sys/block/loop0/queue/scheduler
>>>
>>> grep blkio /proc/cgroups
>>> for ((i=0;i<max_iters;i++))
>>> do
>>> mkdir -p /sys/fs/cgroup/blkio/a
>>> echo 0 > /sys/fs/cgroup/blkio/a/cgroup.procs
>>> dd if=/dev/loop0 bs=4k count=1 of=/dev/null iflag=direct 2> /dev/null
>>> echo 0 > /sys/fs/cgroup/blkio/cgroup.procs
>>> rmdir /sys/fs/cgroup/blkio/a
>>> grep blkio /proc/cgroups
>>> done
>>> ##TESTCASE_END:
>>>
>>> Signed-off-by: Dmitry Monakhov <[email protected]>
>>> ---
>>> block/bfq-cgroup.c | 2 +-
>>> block/bfq-iosched.h | 1 -
>>> block/bfq-wf2q.c | 15 +++++----------
>>> 3 files changed, 6 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
>>> index 68882b9..b791e20 100644
>>> --- a/block/bfq-cgroup.c
>>> +++ b/block/bfq-cgroup.c
>>> @@ -332,7 +332,7 @@ static void bfqg_put(struct bfq_group *bfqg)
>>> kfree(bfqg);
>>> }
>>>
>>> -void bfqg_and_blkg_get(struct bfq_group *bfqg)
>>> +static void bfqg_and_blkg_get(struct bfq_group *bfqg)
>>> {
>>> /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */
>>> bfqg_get(bfqg);
>>> diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
>>> index cd224aa..7038952 100644
>>> --- a/block/bfq-iosched.h
>>> +++ b/block/bfq-iosched.h
>>> @@ -986,7 +986,6 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
>>> struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
>>> struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
>>> struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
>>> -void bfqg_and_blkg_get(struct bfq_group *bfqg);
>>> void bfqg_and_blkg_put(struct bfq_group *bfqg);
>>>
>>> #ifdef CONFIG_BFQ_GROUP_IOSCHED
>>> diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
>>> index 34ad095..6a363bb 100644
>>> --- a/block/bfq-wf2q.c
>>> +++ b/block/bfq-wf2q.c
>>> @@ -529,13 +529,14 @@ static void bfq_get_entity(struct bfq_entity *entity)
>>> {
>>> struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
>>>
>>> + /* Grab reference only for bfq_queue's objects, bfq_group ones
>>> + * are owned by blkcg_gq
>>> + */
>>> if (bfqq) {
>>> bfqq->ref++;
>>> bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
>>> bfqq, bfqq->ref);
>>> - } else
>>> - bfqg_and_blkg_get(container_of(entity, struct bfq_group,
>>> - entity));
>>> + }
>>> }
>>>
>>> /**
>>> @@ -649,14 +650,8 @@ static void bfq_forget_entity(struct bfq_service_tree *st,
>>>
>>> entity->on_st_or_in_serv = false;
>>> st->wsum -= entity->weight;
>>> - if (is_in_service)
>>> - return;
>>> -
>>> - if (bfqq)
>>> + if (bfqq && !is_in_service)
>>> bfq_put_queue(bfqq);
>>> - else
>>> - bfqg_and_blkg_put(container_of(entity, struct bfq_group,
>>> - entity));
>>> }
>>>
>>> /**
>>> --
>>> 2.7.4
>>>
Paolo Valente <[email protected]> writes:
>> Il giorno 8 lug 2020, alle ore 19:48, Dmitry Monakhov <[email protected]> ha scritto:
>>
>> Paolo Valente <[email protected]> writes:
>>
>>> Hi,
>>> sorry for the delay. The commit you propose to drop fix the issues
>>> reported in [1].
>>>
>>> Such a commit does introduce the leak that you report (thank you for
>>> spotting it). Yet, according to the threads mentioned in [1],
>>> dropping that commit would take us back to those issues.
>>>
>>> Maybe the solution is to fix the unbalance that you spotted?
>> I'm not quite shure that do I understand which bug was addressed for commit db37a34c563b.
>> AFAIU both bugs mentioned in original patchset was fixed by:
>> 478de3380 ("block, bfq: deschedule empty bfq_queues not referred by any proces")
>> f718b0932 ( block, bfq: do not plug I/O for bfq_queues with no proc refs)"
>>
>> So I review commit db37a34c563b as independent one.
>> It introduces extra reference for bfq_groups via bfqg_and_blkg_get(),
>> but do we actually need it here?
>>
>> #IF CONFIG_BFQ_GROUP_IOSCHED is enabled:
>> bfqd->root_group is holded by bfqd from bfq_init_queue()
>> other bfq_queue objects are owned by corresponding blkcg from bfq_pd_alloc()
>> So bfq_queue can not disappear under us.
>>
>
> You are right, but incomplete. No extra ref is needed for an entity
> that represents a bfq_queue. And this consideration mistook me before
> I realized that that commit was needed. The problem is that an entity
> may also represent a group of entities. In that case no reference is
> taken through any bfq_queue. The commit you want to remove takes this
> missing reference.
Sorry, It looks like I've mistyped sentance above, I ment to say bfq_group.
So here is my statement corrected:
#IF CONFIG_BFQ_GROUP_IOSCHED is enabled:
bfqd->root_group is holded by bfqd from bfq_init_queue()
other *bfq_group* objects are owned by corresponding blkcg, reference get from bfq_pd_alloc()
So *bfq_group* can not disappear under us.
So no extra reference is required for entity represents bfq_group. Commit is not required.
>
> Paolo
>
>> #IF CONFIG_BFQ_GROUP_IOSCHED is disabled:
>> we have only one bfqd->root_group object which allocated from bfq_create_group_hierarch()
>> and bfqg_and_blkg_get() bfqg_and_blkg_put() are noop
>>
>> Resume: in both cases extra reference is not required, so I continue to
>> insist that we should revert commit db37a34c563b because it tries to
>> solve a non existing issue, but introduce the real one.
>>
>> Please correct me if I'm wrong.
>>>
>>> I'll check it ASAP, unless you do it before me.
>>>
>>> Thanks,
>>> Paolo
>>>
>>> [1] https://lkml.org/lkml/2020/1/31/94
>>>
>>>> Il giorno 2 lug 2020, alle ore 12:57, Dmitry Monakhov <[email protected]> ha scritto:
>>>>
>>>> commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
>>>> introduce leak forbfq_group and blkcg_gq objects because of get/put
>>>> imbalance. See trace balow:
>>>> -> blkg_alloc
>>>> -> bfq_pq_alloc
>>>> -> bfqg_get (+1)
>>>> ->bfq_activate_bfqq
>>>> ->bfq_activate_requeue_entity
>>>> -> __bfq_activate_entity
>>>> ->bfq_get_entity
>> ->> ->bfqg_and_blkg_get (+1) <==== : Note1
>>>> ->bfq_del_bfqq_busy
>>>> ->bfq_deactivate_entity+0x53/0xc0 [bfq]
>>>> ->__bfq_deactivate_entity+0x1b8/0x210 [bfq]
>>>> -> bfq_forget_entity(is_in_service = true)
>>>> entity->on_st_or_in_serv = false <=== :Note2
>>>> if (is_in_service)
>>>> return; ==> do not touch reference
>>>> -> blkcg_css_offline
>>>> -> blkcg_destroy_blkgs
>>>> -> blkg_destroy
>>>> -> bfq_pd_offline
>>>> -> __bfq_deactivate_entity
>>>> if (!entity->on_st_or_in_serv) /* true, because (Note2)
>>>> return false;
>>>> -> bfq_pd_free
>>>> -> bfqg_put() (-1, byt bfqg->ref == 2) because of (Note2)
>>>> So bfq_group and blkcg_gq will leak forever, see test-case below.
>>>> If fact bfq_group objects reference counting are quite different
>>>> from bfq_queue. bfq_groups object are referenced by blkcg_gq via
>>>> blkg_policy_data pointer, so neither nor blkg_get() neither bfqg_get
>>>> required here.
>>>>
>>>>
>>>> This patch drop commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
>>>> and add corresponding comment.
>>>>
>>>> ##TESTCASE_BEGIN:
>>>> #!/bin/bash
>>>>
>>>> max_iters=${1:-100}
>>>> #prep cgroup mounts
>>>> mount -t tmpfs cgroup_root /sys/fs/cgroup
>>>> mkdir /sys/fs/cgroup/blkio
>>>> mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
>>>>
>>>> # Prepare blkdev
>>>> grep blkio /proc/cgroups
>>>> truncate -s 1M img
>>>> losetup /dev/loop0 img
>>>> echo bfq > /sys/block/loop0/queue/scheduler
>>>>
>>>> grep blkio /proc/cgroups
>>>> for ((i=0;i<max_iters;i++))
>>>> do
>>>> mkdir -p /sys/fs/cgroup/blkio/a
>>>> echo 0 > /sys/fs/cgroup/blkio/a/cgroup.procs
>>>> dd if=/dev/loop0 bs=4k count=1 of=/dev/null iflag=direct 2> /dev/null
>>>> echo 0 > /sys/fs/cgroup/blkio/cgroup.procs
>>>> rmdir /sys/fs/cgroup/blkio/a
>>>> grep blkio /proc/cgroups
>>>> done
>>>> ##TESTCASE_END:
>>>>
>>>> Signed-off-by: Dmitry Monakhov <[email protected]>
>>>> ---
>>>> block/bfq-cgroup.c | 2 +-
>>>> block/bfq-iosched.h | 1 -
>>>> block/bfq-wf2q.c | 15 +++++----------
>>>> 3 files changed, 6 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
>>>> index 68882b9..b791e20 100644
>>>> --- a/block/bfq-cgroup.c
>>>> +++ b/block/bfq-cgroup.c
>>>> @@ -332,7 +332,7 @@ static void bfqg_put(struct bfq_group *bfqg)
>>>> kfree(bfqg);
>>>> }
>>>>
>>>> -void bfqg_and_blkg_get(struct bfq_group *bfqg)
>>>> +static void bfqg_and_blkg_get(struct bfq_group *bfqg)
>>>> {
>>>> /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */
>>>> bfqg_get(bfqg);
>>>> diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
>>>> index cd224aa..7038952 100644
>>>> --- a/block/bfq-iosched.h
>>>> +++ b/block/bfq-iosched.h
>>>> @@ -986,7 +986,6 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
>>>> struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
>>>> struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
>>>> struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
>>>> -void bfqg_and_blkg_get(struct bfq_group *bfqg);
>>>> void bfqg_and_blkg_put(struct bfq_group *bfqg);
>>>>
>>>> #ifdef CONFIG_BFQ_GROUP_IOSCHED
>>>> diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
>>>> index 34ad095..6a363bb 100644
>>>> --- a/block/bfq-wf2q.c
>>>> +++ b/block/bfq-wf2q.c
>>>> @@ -529,13 +529,14 @@ static void bfq_get_entity(struct bfq_entity *entity)
>>>> {
>>>> struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
>>>>
>>>> + /* Grab reference only for bfq_queue's objects, bfq_group ones
>>>> + * are owned by blkcg_gq
>>>> + */
>>>> if (bfqq) {
>>>> bfqq->ref++;
>>>> bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
>>>> bfqq, bfqq->ref);
>>>> - } else
>>>> - bfqg_and_blkg_get(container_of(entity, struct bfq_group,
>>>> - entity));
>>>> + }
>>>> }
>>>>
>>>> /**
>>>> @@ -649,14 +650,8 @@ static void bfq_forget_entity(struct bfq_service_tree *st,
>>>>
>>>> entity->on_st_or_in_serv = false;
>>>> st->wsum -= entity->weight;
>>>> - if (is_in_service)
>>>> - return;
>>>> -
>>>> - if (bfqq)
>>>> + if (bfqq && !is_in_service)
>>>> bfq_put_queue(bfqq);
>>>> - else
>>>> - bfqg_and_blkg_put(container_of(entity, struct bfq_group,
>>>> - entity));
>>>> }
>>>>
>>>> /**
>>>> --
>>>> 2.7.4
>>>>
> Il giorno 9 lug 2020, alle ore 10:19, Dmitry Monakhov <[email protected]> ha scritto:
>
> Paolo Valente <[email protected]> writes:
>
>>> Il giorno 8 lug 2020, alle ore 19:48, Dmitry Monakhov <[email protected]> ha scritto:
>>>
>>> Paolo Valente <[email protected]> writes:
>>>
>>>> Hi,
>>>> sorry for the delay. The commit you propose to drop fix the issues
>>>> reported in [1].
>>>>
>>>> Such a commit does introduce the leak that you report (thank you for
>>>> spotting it). Yet, according to the threads mentioned in [1],
>>>> dropping that commit would take us back to those issues.
>>>>
>>>> Maybe the solution is to fix the unbalance that you spotted?
>>> I'm not quite shure that do I understand which bug was addressed for commit db37a34c563b.
>>> AFAIU both bugs mentioned in original patchset was fixed by:
>>> 478de3380 ("block, bfq: deschedule empty bfq_queues not referred by any proces")
>>> f718b0932 ( block, bfq: do not plug I/O for bfq_queues with no proc refs)"
>>>
>>> So I review commit db37a34c563b as independent one.
>>> It introduces extra reference for bfq_groups via bfqg_and_blkg_get(),
>>> but do we actually need it here?
>>>
>>> #IF CONFIG_BFQ_GROUP_IOSCHED is enabled:
>>> bfqd->root_group is holded by bfqd from bfq_init_queue()
>>> other bfq_queue objects are owned by corresponding blkcg from bfq_pd_alloc()
>>> So bfq_queue can not disappear under us.
>>>
>>
>> You are right, but incomplete. No extra ref is needed for an entity
>> that represents a bfq_queue. And this consideration mistook me before
>> I realized that that commit was needed. The problem is that an entity
>> may also represent a group of entities. In that case no reference is
>> taken through any bfq_queue. The commit you want to remove takes this
>> missing reference.
> Sorry, It looks like I've mistyped sentance above, I ment to say bfq_group.
> So here is my statement corrected:
> #IF CONFIG_BFQ_GROUP_IOSCHED is enabled:
> bfqd->root_group is holded by bfqd from bfq_init_queue()
> other *bfq_group* objects are owned by corresponding blkcg, reference get from bfq_pd_alloc()
> So *bfq_group* can not disappear under us.
>
> So no extra reference is required for entity represents bfq_group. Commit is not required.
No, the entity may remain alive and on some tree after bfq_pd_offline has been invoked.
Paolo
>>
>> Paolo
>>
>>> #IF CONFIG_BFQ_GROUP_IOSCHED is disabled:
>>> we have only one bfqd->root_group object which allocated from bfq_create_group_hierarch()
>>> and bfqg_and_blkg_get() bfqg_and_blkg_put() are noop
>>>
>>> Resume: in both cases extra reference is not required, so I continue to
>>> insist that we should revert commit db37a34c563b because it tries to
>>> solve a non existing issue, but introduce the real one.
>>>
>>> Please correct me if I'm wrong.
>>>>
>>>> I'll check it ASAP, unless you do it before me.
>>>>
>>>> Thanks,
>>>> Paolo
>>>>
>>>> [1] https://lkml.org/lkml/2020/1/31/94
>>>>
>>>>> Il giorno 2 lug 2020, alle ore 12:57, Dmitry Monakhov <[email protected]> ha scritto:
>>>>>
>>>>> commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
>>>>> introduce leak forbfq_group and blkcg_gq objects because of get/put
>>>>> imbalance. See trace balow:
>>>>> -> blkg_alloc
>>>>> -> bfq_pq_alloc
>>>>> -> bfqg_get (+1)
>>>>> ->bfq_activate_bfqq
>>>>> ->bfq_activate_requeue_entity
>>>>> -> __bfq_activate_entity
>>>>> ->bfq_get_entity
>>> ->> ->bfqg_and_blkg_get (+1) <==== : Note1
>>>>> ->bfq_del_bfqq_busy
>>>>> ->bfq_deactivate_entity+0x53/0xc0 [bfq]
>>>>> ->__bfq_deactivate_entity+0x1b8/0x210 [bfq]
>>>>> -> bfq_forget_entity(is_in_service = true)
>>>>> entity->on_st_or_in_serv = false <=== :Note2
>>>>> if (is_in_service)
>>>>> return; ==> do not touch reference
>>>>> -> blkcg_css_offline
>>>>> -> blkcg_destroy_blkgs
>>>>> -> blkg_destroy
>>>>> -> bfq_pd_offline
>>>>> -> __bfq_deactivate_entity
>>>>> if (!entity->on_st_or_in_serv) /* true, because (Note2)
>>>>> return false;
>>>>> -> bfq_pd_free
>>>>> -> bfqg_put() (-1, byt bfqg->ref == 2) because of (Note2)
>>>>> So bfq_group and blkcg_gq will leak forever, see test-case below.
>>>>> If fact bfq_group objects reference counting are quite different
>>>>> from bfq_queue. bfq_groups object are referenced by blkcg_gq via
>>>>> blkg_policy_data pointer, so neither nor blkg_get() neither bfqg_get
>>>>> required here.
>>>>>
>>>>>
>>>>> This patch drop commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
>>>>> and add corresponding comment.
>>>>>
>>>>> ##TESTCASE_BEGIN:
>>>>> #!/bin/bash
>>>>>
>>>>> max_iters=${1:-100}
>>>>> #prep cgroup mounts
>>>>> mount -t tmpfs cgroup_root /sys/fs/cgroup
>>>>> mkdir /sys/fs/cgroup/blkio
>>>>> mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
>>>>>
>>>>> # Prepare blkdev
>>>>> grep blkio /proc/cgroups
>>>>> truncate -s 1M img
>>>>> losetup /dev/loop0 img
>>>>> echo bfq > /sys/block/loop0/queue/scheduler
>>>>>
>>>>> grep blkio /proc/cgroups
>>>>> for ((i=0;i<max_iters;i++))
>>>>> do
>>>>> mkdir -p /sys/fs/cgroup/blkio/a
>>>>> echo 0 > /sys/fs/cgroup/blkio/a/cgroup.procs
>>>>> dd if=/dev/loop0 bs=4k count=1 of=/dev/null iflag=direct 2> /dev/null
>>>>> echo 0 > /sys/fs/cgroup/blkio/cgroup.procs
>>>>> rmdir /sys/fs/cgroup/blkio/a
>>>>> grep blkio /proc/cgroups
>>>>> done
>>>>> ##TESTCASE_END:
>>>>>
>>>>> Signed-off-by: Dmitry Monakhov <[email protected]>
>>>>> ---
>>>>> block/bfq-cgroup.c | 2 +-
>>>>> block/bfq-iosched.h | 1 -
>>>>> block/bfq-wf2q.c | 15 +++++----------
>>>>> 3 files changed, 6 insertions(+), 12 deletions(-)
>>>>>
>>>>> diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
>>>>> index 68882b9..b791e20 100644
>>>>> --- a/block/bfq-cgroup.c
>>>>> +++ b/block/bfq-cgroup.c
>>>>> @@ -332,7 +332,7 @@ static void bfqg_put(struct bfq_group *bfqg)
>>>>> kfree(bfqg);
>>>>> }
>>>>>
>>>>> -void bfqg_and_blkg_get(struct bfq_group *bfqg)
>>>>> +static void bfqg_and_blkg_get(struct bfq_group *bfqg)
>>>>> {
>>>>> /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */
>>>>> bfqg_get(bfqg);
>>>>> diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
>>>>> index cd224aa..7038952 100644
>>>>> --- a/block/bfq-iosched.h
>>>>> +++ b/block/bfq-iosched.h
>>>>> @@ -986,7 +986,6 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
>>>>> struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
>>>>> struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
>>>>> struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
>>>>> -void bfqg_and_blkg_get(struct bfq_group *bfqg);
>>>>> void bfqg_and_blkg_put(struct bfq_group *bfqg);
>>>>>
>>>>> #ifdef CONFIG_BFQ_GROUP_IOSCHED
>>>>> diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
>>>>> index 34ad095..6a363bb 100644
>>>>> --- a/block/bfq-wf2q.c
>>>>> +++ b/block/bfq-wf2q.c
>>>>> @@ -529,13 +529,14 @@ static void bfq_get_entity(struct bfq_entity *entity)
>>>>> {
>>>>> struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
>>>>>
>>>>> + /* Grab reference only for bfq_queue's objects, bfq_group ones
>>>>> + * are owned by blkcg_gq
>>>>> + */
>>>>> if (bfqq) {
>>>>> bfqq->ref++;
>>>>> bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
>>>>> bfqq, bfqq->ref);
>>>>> - } else
>>>>> - bfqg_and_blkg_get(container_of(entity, struct bfq_group,
>>>>> - entity));
>>>>> + }
>>>>> }
>>>>>
>>>>> /**
>>>>> @@ -649,14 +650,8 @@ static void bfq_forget_entity(struct bfq_service_tree *st,
>>>>>
>>>>> entity->on_st_or_in_serv = false;
>>>>> st->wsum -= entity->weight;
>>>>> - if (is_in_service)
>>>>> - return;
>>>>> -
>>>>> - if (bfqq)
>>>>> + if (bfqq && !is_in_service)
>>>>> bfq_put_queue(bfqq);
>>>>> - else
>>>>> - bfqg_and_blkg_put(container_of(entity, struct bfq_group,
>>>>> - entity));
>>>>> }
>>>>>
>>>>> /**
>>>>> --
>>>>> 2.7.4
Paolo Valente <[email protected]> writes:
>> Il giorno 9 lug 2020, alle ore 10:19, Dmitry Monakhov <[email protected]> ha scritto:
>>
>> Paolo Valente <[email protected]> writes:
>>
>>>> Il giorno 8 lug 2020, alle ore 19:48, Dmitry Monakhov <[email protected]> ha scritto:
>>>>
>>>> Paolo Valente <[email protected]> writes:
>>>>
>>>>> Hi,
>>>>> sorry for the delay. The commit you propose to drop fix the issues
>>>>> reported in [1].
>>>>>
>>>>> Such a commit does introduce the leak that you report (thank you for
>>>>> spotting it). Yet, according to the threads mentioned in [1],
>>>>> dropping that commit would take us back to those issues.
>>>>>
>>>>> Maybe the solution is to fix the unbalance that you spotted?
>>>> I'm not quite shure that do I understand which bug was addressed for commit db37a34c563b.
>>>> AFAIU both bugs mentioned in original patchset was fixed by:
>>>> 478de3380 ("block, bfq: deschedule empty bfq_queues not referred by any proces")
>>>> f718b0932 ( block, bfq: do not plug I/O for bfq_queues with no proc refs)"
>>>>
>>>> So I review commit db37a34c563b as independent one.
>>>> It introduces extra reference for bfq_groups via bfqg_and_blkg_get(),
>>>> but do we actually need it here?
>>>>
>>>> #IF CONFIG_BFQ_GROUP_IOSCHED is enabled:
>>>> bfqd->root_group is holded by bfqd from bfq_init_queue()
>>>> other bfq_queue objects are owned by corresponding blkcg from bfq_pd_alloc()
>>>> So bfq_queue can not disappear under us.
>>>>
>>>
>>> You are right, but incomplete. No extra ref is needed for an entity
>>> that represents a bfq_queue. And this consideration mistook me before
>>> I realized that that commit was needed. The problem is that an entity
>>> may also represent a group of entities. In that case no reference is
>>> taken through any bfq_queue. The commit you want to remove takes this
>>> missing reference.
>> Sorry, It looks like I've mistyped sentance above, I ment to say bfq_group.
>> So here is my statement corrected:
>> #IF CONFIG_BFQ_GROUP_IOSCHED is enabled:
>> bfqd->root_group is holded by bfqd from bfq_init_queue()
>> other *bfq_group* objects are owned by corresponding blkcg, reference get from bfq_pd_alloc()
>> So *bfq_group* can not disappear under us.
>>
>> So no extra reference is required for entity represents bfq_group. Commit is not required.
>
> No, the entity may remain alive and on some tree after bfq_pd_offline has been invoked.
Ok you right, we should drop the group reference inside __bfq_bfqd_reset_in_service()
as we do for queue's entities. Please see updated patch version.
On 7/20/20 6:19 AM, Dmitry Monakhov wrote:
> Ok you right, we should drop the group reference inside
> __bfq_bfqd_reset_in_service() as we do for queue's entities. Please
> see updated patch version.
Can you please send that out as a proper v2?
--
Jens Axboe
commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
introduce leak forbfq_group and blkcg_gq objects because of get/put
imbalance. See trace balow:
-> blkg_alloc
-> bfq_pq_alloc
-> bfqg_get (+1)
->bfq_activate_bfqq
->bfq_activate_requeue_entity
-> __bfq_activate_entity
->bfq_get_entity
->bfqg_and_blkg_get (+1) <==== : Note1
->bfq_del_bfqq_busy
->bfq_deactivate_entity+0x53/0xc0 [bfq]
->__bfq_deactivate_entity+0x1b8/0x210 [bfq]
-> bfq_forget_entity(is_in_service = true)
entity->on_st_or_in_serv = false <=== :Note2
if (is_in_service)
return; ==> do not touch reference
-> blkcg_css_offline
-> blkcg_destroy_blkgs
-> blkg_destroy
-> bfq_pd_offline
-> __bfq_deactivate_entity
if (!entity->on_st_or_in_serv) /* true, because (Note2)
return false;
-> bfq_pd_free
-> bfqg_put() (-1, byt bfqg->ref == 2) because of (Note2)
So bfq_group and blkcg_gq will leak forever, see test-case below.
We should drop group reference once it finaly removed from service
inside __bfq_bfqd_reset_in_service, as we do with queue entities.
##TESTCASE_BEGIN:
#!/bin/bash
max_iters=${1:-100}
#prep cgroup mounts
mount -t tmpfs cgroup_root /sys/fs/cgroup
mkdir /sys/fs/cgroup/blkio
mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
# Prepare blkdev
grep blkio /proc/cgroups
truncate -s 1M img
losetup /dev/loop0 img
echo bfq > /sys/block/loop0/queue/scheduler
grep blkio /proc/cgroups
for ((i=0;i<max_iters;i++))
do
mkdir -p /sys/fs/cgroup/blkio/a
echo 0 > /sys/fs/cgroup/blkio/a/cgroup.procs
dd if=/dev/loop0 bs=4k count=1 of=/dev/null iflag=direct 2> /dev/null
echo 0 > /sys/fs/cgroup/blkio/cgroup.procs
rmdir /sys/fs/cgroup/blkio/a
grep blkio /proc/cgroups
done
##TESTCASE_END:
Signed-off-by: Dmitry Monakhov <[email protected]>
---
block/bfq-wf2q.c | 23 +++++++++++++----------
1 file changed, 13 insertions(+), 10 deletions(-)
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 8113138..93b236c 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -635,14 +635,10 @@ static void bfq_idle_insert(struct bfq_service_tree *st,
* @entity: the entity being removed.
* @is_in_service: true if entity is currently the in-service entity.
*
- * Forget everything about @entity. In addition, if entity represents
- * a queue, and the latter is not in service, then release the service
- * reference to the queue (the one taken through bfq_get_entity). In
- * fact, in this case, there is really no more service reference to
- * the queue, as the latter is also outside any service tree. If,
- * instead, the queue is in service, then __bfq_bfqd_reset_in_service
- * will take care of putting the reference when the queue finally
- * stops being served.
+ * Forget everything about @entity. If entity is not in service, then release
+ * the service reference to the entity (the one taken through bfq_get_entity).
+ * If the entity is in service, then __bfq_bfqd_reset_in_service will take care
+ * of putting the reference when the entity finally stops being served.
*/
static void bfq_forget_entity(struct bfq_service_tree *st,
struct bfq_entity *entity,
@@ -1626,9 +1622,16 @@ bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
* execute the final step: reset in_service_entity along the
* path from entity to the root.
*/
- for_each_entity(entity)
+ for_each_entity(entity) {
entity->sched_data->in_service_entity = NULL;
-
+ /*
+ * Release bfq_groups reference if it was not released in
+ * bfq_forget_entity, which was taken in bfq_get_entity.
+ */
+ if (!bfq_entity_to_bfqq(entity) && !entity->on_st_or_in_serv)
+ bfqg_and_blkg_put(container_of(entity, struct bfq_group,
+ entity));
+ }
/*
* in_serv_entity is no longer in service, so, if it is in no
* service tree either, then release the service reference to
--
2.7.4
Hello.
On 20.07.2020 19:04, Dmitry Monakhov wrote:
> commit db37a34c563b ("block, bfq: get a ref to a group when adding it
> to a service tree")
> introduce leak forbfq_group and blkcg_gq objects because of get/put
> imbalance. See trace balow:
> -> blkg_alloc
> -> bfq_pq_alloc
> -> bfqg_get (+1)
> ->bfq_activate_bfqq
> ->bfq_activate_requeue_entity
> -> __bfq_activate_entity
> ->bfq_get_entity
> ->bfqg_and_blkg_get (+1) <==== : Note1
> ->bfq_del_bfqq_busy
> ->bfq_deactivate_entity+0x53/0xc0 [bfq]
> ->__bfq_deactivate_entity+0x1b8/0x210 [bfq]
> -> bfq_forget_entity(is_in_service = true)
> entity->on_st_or_in_serv = false <=== :Note2
> if (is_in_service)
> return; ==> do not touch reference
> -> blkcg_css_offline
> -> blkcg_destroy_blkgs
> -> blkg_destroy
> -> bfq_pd_offline
> -> __bfq_deactivate_entity
> if (!entity->on_st_or_in_serv) /* true, because (Note2)
> return false;
> -> bfq_pd_free
> -> bfqg_put() (-1, byt bfqg->ref == 2) because of (Note2)
> So bfq_group and blkcg_gq will leak forever, see test-case below.
>
> We should drop group reference once it finaly removed from service
> inside __bfq_bfqd_reset_in_service, as we do with queue entities.
>
> ##TESTCASE_BEGIN:
> #!/bin/bash
>
> max_iters=${1:-100}
> #prep cgroup mounts
> mount -t tmpfs cgroup_root /sys/fs/cgroup
> mkdir /sys/fs/cgroup/blkio
> mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
>
> # Prepare blkdev
> grep blkio /proc/cgroups
> truncate -s 1M img
> losetup /dev/loop0 img
> echo bfq > /sys/block/loop0/queue/scheduler
>
> grep blkio /proc/cgroups
> for ((i=0;i<max_iters;i++))
> do
> mkdir -p /sys/fs/cgroup/blkio/a
> echo 0 > /sys/fs/cgroup/blkio/a/cgroup.procs
> dd if=/dev/loop0 bs=4k count=1 of=/dev/null iflag=direct 2>
> /dev/null
> echo 0 > /sys/fs/cgroup/blkio/cgroup.procs
> rmdir /sys/fs/cgroup/blkio/a
> grep blkio /proc/cgroups
> done
> ##TESTCASE_END:
>
> Signed-off-by: Dmitry Monakhov <[email protected]>
> ---
> block/bfq-wf2q.c | 23 +++++++++++++----------
> 1 file changed, 13 insertions(+), 10 deletions(-)
>
> diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
> index 8113138..93b236c 100644
> --- a/block/bfq-wf2q.c
> +++ b/block/bfq-wf2q.c
> @@ -635,14 +635,10 @@ static void bfq_idle_insert(struct
> bfq_service_tree *st,
> * @entity: the entity being removed.
> * @is_in_service: true if entity is currently the in-service entity.
> *
> - * Forget everything about @entity. In addition, if entity represents
> - * a queue, and the latter is not in service, then release the service
> - * reference to the queue (the one taken through bfq_get_entity). In
> - * fact, in this case, there is really no more service reference to
> - * the queue, as the latter is also outside any service tree. If,
> - * instead, the queue is in service, then __bfq_bfqd_reset_in_service
> - * will take care of putting the reference when the queue finally
> - * stops being served.
> + * Forget everything about @entity. If entity is not in service, then
> release
> + * the service reference to the entity (the one taken through
> bfq_get_entity).
> + * If the entity is in service, then __bfq_bfqd_reset_in_service will
> take care
> + * of putting the reference when the entity finally stops being
> served.
> */
> static void bfq_forget_entity(struct bfq_service_tree *st,
> struct bfq_entity *entity,
> @@ -1626,9 +1622,16 @@ bool __bfq_bfqd_reset_in_service(struct bfq_data
> *bfqd)
> * execute the final step: reset in_service_entity along the
> * path from entity to the root.
> */
> - for_each_entity(entity)
> + for_each_entity(entity) {
> entity->sched_data->in_service_entity = NULL;
> -
> + /*
> + * Release bfq_groups reference if it was not released in
> + * bfq_forget_entity, which was taken in bfq_get_entity.
> + */
> + if (!bfq_entity_to_bfqq(entity) && !entity->on_st_or_in_serv)
> + bfqg_and_blkg_put(container_of(entity, struct bfq_group,
> + entity));
> + }
> /*
> * in_serv_entity is no longer in service, so, if it is in no
> * service tree either, then release the service reference to
As reported by one of my customers, this patch causes the following
crash: [1]
[1]
http://pix.academ.info/images/img/2020/07/26/52d097c02b6061657443bba92de75e8a.jpg
--
Oleksandr Natalenko (post-factum)
commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
introduce leak forbfq_group and blkcg_gq objects because of get/put
imbalance. See trace balow:
-> blkg_alloc
-> bfq_pq_alloc
-> bfqg_get (+1)
->bfq_activate_bfqq
->bfq_activate_requeue_entity
-> __bfq_activate_entity
->bfq_get_entity
->bfqg_and_blkg_get (+1) <==== : Note1
->bfq_del_bfqq_busy
->bfq_deactivate_entity+0x53/0xc0 [bfq]
->__bfq_deactivate_entity+0x1b8/0x210 [bfq]
-> bfq_forget_entity(is_in_service = true)
entity->on_st_or_in_serv = false <=== :Note2
if (is_in_service)
return; ==> do not touch reference
-> blkcg_css_offline
-> blkcg_destroy_blkgs
-> blkg_destroy
-> bfq_pd_offline
-> __bfq_deactivate_entity
if (!entity->on_st_or_in_serv) /* true, because (Note2)
return false;
-> bfq_pd_free
-> bfqg_put() (-1, byt bfqg->ref == 2) because of (Note2)
So bfq_group and blkcg_gq will leak forever, see test-case below.
We should drop group reference once it finaly removed from service
inside __bfq_bfqd_reset_in_service, as we do with queue entities.
##TESTCASE_BEGIN:
#!/bin/bash
max_iters=${1:-100}
#prep cgroup mounts
mount -t tmpfs cgroup_root /sys/fs/cgroup
mkdir /sys/fs/cgroup/blkio
mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
# Prepare blkdev
grep blkio /proc/cgroups
truncate -s 1M img
losetup /dev/loop0 img
echo bfq > /sys/block/loop0/queue/scheduler
grep blkio /proc/cgroups
for ((i=0;i<max_iters;i++))
do
mkdir -p /sys/fs/cgroup/blkio/a
echo 0 > /sys/fs/cgroup/blkio/a/cgroup.procs
dd if=/dev/loop0 bs=4k count=1 of=/dev/null iflag=direct 2> /dev/null
echo 0 > /sys/fs/cgroup/blkio/cgroup.procs
rmdir /sys/fs/cgroup/blkio/a
grep blkio /proc/cgroups
done
##TESTCASE_END:
changes since v2:
- use safe iteration macro to prevent freed object dereference.
Signed-off-by: Dmitry Monakhov <[email protected]>
---
block/bfq-wf2q.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 8113138..13b417a 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -635,14 +635,10 @@ static void bfq_idle_insert(struct bfq_service_tree *st,
* @entity: the entity being removed.
* @is_in_service: true if entity is currently the in-service entity.
*
- * Forget everything about @entity. In addition, if entity represents
- * a queue, and the latter is not in service, then release the service
- * reference to the queue (the one taken through bfq_get_entity). In
- * fact, in this case, there is really no more service reference to
- * the queue, as the latter is also outside any service tree. If,
- * instead, the queue is in service, then __bfq_bfqd_reset_in_service
- * will take care of putting the reference when the queue finally
- * stops being served.
+ * Forget everything about @entity. If entity is not in service, then release
+ * the service reference to the entity (the one taken through bfq_get_entity).
+ * If the entity is in service, then __bfq_bfqd_reset_in_service will take care
+ * of putting the reference when the entity finally stops being served.
*/
static void bfq_forget_entity(struct bfq_service_tree *st,
struct bfq_entity *entity,
@@ -1615,6 +1611,7 @@ bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue;
struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity;
struct bfq_entity *entity = in_serv_entity;
+ struct bfq_entity *parent = NULL;
bfq_clear_bfqq_wait_request(in_serv_bfqq);
hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
@@ -1626,9 +1623,16 @@ bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
* execute the final step: reset in_service_entity along the
* path from entity to the root.
*/
- for_each_entity(entity)
+ for_each_entity_safe(entity, parent) {
entity->sched_data->in_service_entity = NULL;
-
+ /*
+ * Release bfq_groups reference if it was not released in
+ * bfq_forget_entity, which was taken in bfq_get_entity.
+ */
+ if (!bfq_entity_to_bfqq(entity) && !entity->on_st_or_in_serv)
+ bfqg_and_blkg_put(container_of(entity, struct bfq_group,
+ entity));
+ }
/*
* in_serv_entity is no longer in service, so, if it is in no
* service tree either, then release the service reference to
--
2.7.4
On 27.07.2020 10:01, Dmitry Monakhov wrote:
> commit db37a34c563b ("block, bfq: get a ref to a group when adding it
> to a service tree")
> introduce leak forbfq_group and blkcg_gq objects because of get/put
> imbalance. See trace balow:
> -> blkg_alloc
> -> bfq_pq_alloc
> -> bfqg_get (+1)
> ->bfq_activate_bfqq
> ->bfq_activate_requeue_entity
> -> __bfq_activate_entity
> ->bfq_get_entity
> ->bfqg_and_blkg_get (+1) <==== : Note1
> ->bfq_del_bfqq_busy
> ->bfq_deactivate_entity+0x53/0xc0 [bfq]
> ->__bfq_deactivate_entity+0x1b8/0x210 [bfq]
> -> bfq_forget_entity(is_in_service = true)
> entity->on_st_or_in_serv = false <=== :Note2
> if (is_in_service)
> return; ==> do not touch reference
> -> blkcg_css_offline
> -> blkcg_destroy_blkgs
> -> blkg_destroy
> -> bfq_pd_offline
> -> __bfq_deactivate_entity
> if (!entity->on_st_or_in_serv) /* true, because (Note2)
> return false;
> -> bfq_pd_free
> -> bfqg_put() (-1, byt bfqg->ref == 2) because of (Note2)
> So bfq_group and blkcg_gq will leak forever, see test-case below.
>
> We should drop group reference once it finaly removed from service
> inside __bfq_bfqd_reset_in_service, as we do with queue entities.
>
> ##TESTCASE_BEGIN:
> #!/bin/bash
>
> max_iters=${1:-100}
> #prep cgroup mounts
> mount -t tmpfs cgroup_root /sys/fs/cgroup
> mkdir /sys/fs/cgroup/blkio
> mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
>
> # Prepare blkdev
> grep blkio /proc/cgroups
> truncate -s 1M img
> losetup /dev/loop0 img
> echo bfq > /sys/block/loop0/queue/scheduler
>
> grep blkio /proc/cgroups
> for ((i=0;i<max_iters;i++))
> do
> mkdir -p /sys/fs/cgroup/blkio/a
> echo 0 > /sys/fs/cgroup/blkio/a/cgroup.procs
> dd if=/dev/loop0 bs=4k count=1 of=/dev/null iflag=direct 2>
> /dev/null
> echo 0 > /sys/fs/cgroup/blkio/cgroup.procs
> rmdir /sys/fs/cgroup/blkio/a
> grep blkio /proc/cgroups
> done
> ##TESTCASE_END:
>
> changes since v2:
> - use safe iteration macro to prevent freed object dereference.
>
> Signed-off-by: Dmitry Monakhov <[email protected]>
> ---
> block/bfq-wf2q.c | 24 ++++++++++++++----------
> 1 file changed, 14 insertions(+), 10 deletions(-)
>
> diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
> index 8113138..13b417a 100644
> --- a/block/bfq-wf2q.c
> +++ b/block/bfq-wf2q.c
> @@ -635,14 +635,10 @@ static void bfq_idle_insert(struct
> bfq_service_tree *st,
> * @entity: the entity being removed.
> * @is_in_service: true if entity is currently the in-service entity.
> *
> - * Forget everything about @entity. In addition, if entity represents
> - * a queue, and the latter is not in service, then release the service
> - * reference to the queue (the one taken through bfq_get_entity). In
> - * fact, in this case, there is really no more service reference to
> - * the queue, as the latter is also outside any service tree. If,
> - * instead, the queue is in service, then __bfq_bfqd_reset_in_service
> - * will take care of putting the reference when the queue finally
> - * stops being served.
> + * Forget everything about @entity. If entity is not in service, then
> release
> + * the service reference to the entity (the one taken through
> bfq_get_entity).
> + * If the entity is in service, then __bfq_bfqd_reset_in_service will
> take care
> + * of putting the reference when the entity finally stops being
> served.
> */
> static void bfq_forget_entity(struct bfq_service_tree *st,
> struct bfq_entity *entity,
> @@ -1615,6 +1611,7 @@ bool __bfq_bfqd_reset_in_service(struct bfq_data
> *bfqd)
> struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue;
> struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity;
> struct bfq_entity *entity = in_serv_entity;
> + struct bfq_entity *parent = NULL;
>
> bfq_clear_bfqq_wait_request(in_serv_bfqq);
> hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
> @@ -1626,9 +1623,16 @@ bool __bfq_bfqd_reset_in_service(struct bfq_data
> *bfqd)
> * execute the final step: reset in_service_entity along the
> * path from entity to the root.
> */
> - for_each_entity(entity)
> + for_each_entity_safe(entity, parent) {
> entity->sched_data->in_service_entity = NULL;
> -
> + /*
> + * Release bfq_groups reference if it was not released in
> + * bfq_forget_entity, which was taken in bfq_get_entity.
> + */
> + if (!bfq_entity_to_bfqq(entity) && !entity->on_st_or_in_serv)
> + bfqg_and_blkg_put(container_of(entity, struct bfq_group,
> + entity));
> + }
> /*
> * in_serv_entity is no longer in service, so, if it is in no
> * service tree either, then release the service reference to
Reportedly, this one crashes too [1], and this happens even earlier than
with v2.
[1]
http://pix.academ.info/images/img/2020/07/27/91f656514707728730b0b67f8c9f4a04.jpg
--
Oleksandr Natalenko (post-factum)
Paolo Valente <[email protected]> writes:
>> Il giorno 9 lug 2020, alle ore 10:19, Dmitry Monakhov <[email protected]> ha scritto:
>>
>> Paolo Valente <[email protected]> writes:
>>
>>>> Il giorno 8 lug 2020, alle ore 19:48, Dmitry Monakhov <[email protected]> ha scritto:
>>>>
>>>> Paolo Valente <[email protected]> writes:
>>>>
>>>>> Hi,
>>>>> sorry for the delay. The commit you propose to drop fix the issues
>>>>> reported in [1].
>>>>>
>>>>> Such a commit does introduce the leak that you report (thank you for
>>>>> spotting it). Yet, according to the threads mentioned in [1],
>>>>> dropping that commit would take us back to those issues.
>>>>>
>>>>> Maybe the solution is to fix the unbalance that you spotted?
>>>> I'm not quite shure that do I understand which bug was addressed for commit db37a34c563b.
>>>> AFAIU both bugs mentioned in original patchset was fixed by:
>>>> 478de3380 ("block, bfq: deschedule empty bfq_queues not referred by any proces")
>>>> f718b0932 ( block, bfq: do not plug I/O for bfq_queues with no proc refs)"
>>>>
>>>> So I review commit db37a34c563b as independent one.
>>>> It introduces extra reference for bfq_groups via bfqg_and_blkg_get(),
>>>> but do we actually need it here?
>>>>
>>>> #IF CONFIG_BFQ_GROUP_IOSCHED is enabled:
>>>> bfqd->root_group is holded by bfqd from bfq_init_queue()
>>>> other bfq_queue objects are owned by corresponding blkcg from bfq_pd_alloc()
>>>> So bfq_queue can not disappear under us.
>>>>
>>>
>>> You are right, but incomplete. No extra ref is needed for an entity
>>> that represents a bfq_queue. And this consideration mistook me before
>>> I realized that that commit was needed. The problem is that an entity
>>> may also represent a group of entities. In that case no reference is
>>> taken through any bfq_queue. The commit you want to remove takes this
>>> missing reference.
>> Sorry, It looks like I've mistyped sentance above, I ment to say bfq_group.
>> So here is my statement corrected:
>> #IF CONFIG_BFQ_GROUP_IOSCHED is enabled:
>> bfqd->root_group is holded by bfqd from bfq_init_queue()
>> other *bfq_group* objects are owned by corresponding blkcg, reference get from bfq_pd_alloc()
>> So *bfq_group* can not disappear under us.
>>
>> So no extra reference is required for entity represents bfq_group. Commit is not required.
>
> No, the entity may remain alive and on some tree after bfq_pd_offline has been invoked.
But bfq_group's entity stil holded by child's entity from here,
-> bfq_init_entity()
->bfqg_and_blkg_get(bfqg);
->entity->parent = bfqg->my_entity
-> bfq_put_queue(bfqq)
FINAL_PUT
->bfqg_and_blkg_put(bfqq_group(bfqq))
->kmem_cache_free(bfq_pool, bfqq);
so group can not just disappear us while tree is in service. Please corect me if I'm wrong.
BTW, I've send new version with updated description here [1]
Footnotes:
[1] https://lore.kernel.org/linux-block/[email protected]
>
> Paolo
>
>>>
>>> Paolo
>>>
>>>> #IF CONFIG_BFQ_GROUP_IOSCHED is disabled:
>>>> we have only one bfqd->root_group object which allocated from bfq_create_group_hierarch()
>>>> and bfqg_and_blkg_get() bfqg_and_blkg_put() are noop
>>>>
>>>> Resume: in both cases extra reference is not required, so I continue to
>>>> insist that we should revert commit db37a34c563b because it tries to
>>>> solve a non existing issue, but introduce the real one.
>>>>
>>>> Please correct me if I'm wrong.
>>>>>
>>>>> I'll check it ASAP, unless you do it before me.
>>>>>
>>>>> Thanks,
>>>>> Paolo
>>>>>
>>>>> [1] https://lkml.org/lkml/2020/1/31/94
>>>>>
>>>>>> Il giorno 2 lug 2020, alle ore 12:57, Dmitry Monakhov <[email protected]> ha scritto:
>>>>>>
>>>>>> commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
>>>>>> introduce leak forbfq_group and blkcg_gq objects because of get/put
>>>>>> imbalance. See trace balow:
>>>>>> -> blkg_alloc
>>>>>> -> bfq_pq_alloc
>>>>>> -> bfqg_get (+1)
>>>>>> ->bfq_activate_bfqq
>>>>>> ->bfq_activate_requeue_entity
>>>>>> -> __bfq_activate_entity
>>>>>> ->bfq_get_entity
>>>> ->> ->bfqg_and_blkg_get (+1) <==== : Note1
>>>>>> ->bfq_del_bfqq_busy
>>>>>> ->bfq_deactivate_entity+0x53/0xc0 [bfq]
>>>>>> ->__bfq_deactivate_entity+0x1b8/0x210 [bfq]
>>>>>> -> bfq_forget_entity(is_in_service = true)
>>>>>> entity->on_st_or_in_serv = false <=== :Note2
>>>>>> if (is_in_service)
>>>>>> return; ==> do not touch reference
>>>>>> -> blkcg_css_offline
>>>>>> -> blkcg_destroy_blkgs
>>>>>> -> blkg_destroy
>>>>>> -> bfq_pd_offline
>>>>>> -> __bfq_deactivate_entity
>>>>>> if (!entity->on_st_or_in_serv) /* true, because (Note2)
>>>>>> return false;
>>>>>> -> bfq_pd_free
>>>>>> -> bfqg_put() (-1, byt bfqg->ref == 2) because of (Note2)
>>>>>> So bfq_group and blkcg_gq will leak forever, see test-case below.
>>>>>> If fact bfq_group objects reference counting are quite different
>>>>>> from bfq_queue. bfq_groups object are referenced by blkcg_gq via
>>>>>> blkg_policy_data pointer, so neither nor blkg_get() neither bfqg_get
>>>>>> required here.
>>>>>>
>>>>>>
>>>>>> This patch drop commit db37a34c563b ("block, bfq: get a ref to a group when adding it to a service tree")
>>>>>> and add corresponding comment.
>>>>>>
>>>>>> ##TESTCASE_BEGIN:
>>>>>> #!/bin/bash
>>>>>>
>>>>>> max_iters=${1:-100}
>>>>>> #prep cgroup mounts
>>>>>> mount -t tmpfs cgroup_root /sys/fs/cgroup
>>>>>> mkdir /sys/fs/cgroup/blkio
>>>>>> mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
>>>>>>
>>>>>> # Prepare blkdev
>>>>>> grep blkio /proc/cgroups
>>>>>> truncate -s 1M img
>>>>>> losetup /dev/loop0 img
>>>>>> echo bfq > /sys/block/loop0/queue/scheduler
>>>>>>
>>>>>> grep blkio /proc/cgroups
>>>>>> for ((i=0;i<max_iters;i++))
>>>>>> do
>>>>>> mkdir -p /sys/fs/cgroup/blkio/a
>>>>>> echo 0 > /sys/fs/cgroup/blkio/a/cgroup.procs
>>>>>> dd if=/dev/loop0 bs=4k count=1 of=/dev/null iflag=direct 2> /dev/null
>>>>>> echo 0 > /sys/fs/cgroup/blkio/cgroup.procs
>>>>>> rmdir /sys/fs/cgroup/blkio/a
>>>>>> grep blkio /proc/cgroups
>>>>>> done
>>>>>> ##TESTCASE_END:
>>>>>>
>>>>>> Signed-off-by: Dmitry Monakhov <[email protected]>
>>>>>> ---
>>>>>> block/bfq-cgroup.c | 2 +-
>>>>>> block/bfq-iosched.h | 1 -
>>>>>> block/bfq-wf2q.c | 15 +++++----------
>>>>>> 3 files changed, 6 insertions(+), 12 deletions(-)
>>>>>>
>>>>>> diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
>>>>>> index 68882b9..b791e20 100644
>>>>>> --- a/block/bfq-cgroup.c
>>>>>> +++ b/block/bfq-cgroup.c
>>>>>> @@ -332,7 +332,7 @@ static void bfqg_put(struct bfq_group *bfqg)
>>>>>> kfree(bfqg);
>>>>>> }
>>>>>>
>>>>>> -void bfqg_and_blkg_get(struct bfq_group *bfqg)
>>>>>> +static void bfqg_and_blkg_get(struct bfq_group *bfqg)
>>>>>> {
>>>>>> /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */
>>>>>> bfqg_get(bfqg);
>>>>>> diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
>>>>>> index cd224aa..7038952 100644
>>>>>> --- a/block/bfq-iosched.h
>>>>>> +++ b/block/bfq-iosched.h
>>>>>> @@ -986,7 +986,6 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd,
>>>>>> struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg);
>>>>>> struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
>>>>>> struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node);
>>>>>> -void bfqg_and_blkg_get(struct bfq_group *bfqg);
>>>>>> void bfqg_and_blkg_put(struct bfq_group *bfqg);
>>>>>>
>>>>>> #ifdef CONFIG_BFQ_GROUP_IOSCHED
>>>>>> diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
>>>>>> index 34ad095..6a363bb 100644
>>>>>> --- a/block/bfq-wf2q.c
>>>>>> +++ b/block/bfq-wf2q.c
>>>>>> @@ -529,13 +529,14 @@ static void bfq_get_entity(struct bfq_entity *entity)
>>>>>> {
>>>>>> struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity);
>>>>>>
>>>>>> + /* Grab reference only for bfq_queue's objects, bfq_group ones
>>>>>> + * are owned by blkcg_gq
>>>>>> + */
>>>>>> if (bfqq) {
>>>>>> bfqq->ref++;
>>>>>> bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d",
>>>>>> bfqq, bfqq->ref);
>>>>>> - } else
>>>>>> - bfqg_and_blkg_get(container_of(entity, struct bfq_group,
>>>>>> - entity));
>>>>>> + }
>>>>>> }
>>>>>>
>>>>>> /**
>>>>>> @@ -649,14 +650,8 @@ static void bfq_forget_entity(struct bfq_service_tree *st,
>>>>>>
>>>>>> entity->on_st_or_in_serv = false;
>>>>>> st->wsum -= entity->weight;
>>>>>> - if (is_in_service)
>>>>>> - return;
>>>>>> -
>>>>>> - if (bfqq)
>>>>>> + if (bfqq && !is_in_service)
>>>>>> bfq_put_queue(bfqq);
>>>>>> - else
>>>>>> - bfqg_and_blkg_put(container_of(entity, struct bfq_group,
>>>>>> - entity));
>>>>>> }
>>>>>>
>>>>>> /**
>>>>>> --
>>>>>> 2.7.4