2024-03-01 09:57:59

by Wenchao Hao

[permalink] [raw]
Subject: [PATCH] RDMA/restrack: Fix potential invalid address access

struct rdma_restrack_entry's kern_name was set to KBUILD_MODNAME
in ib_create_cq(), while if the module exited but forgot del this
rdma_restrack_entry, it would cause a invalid address access in
rdma_restrack_clean() when print the owner of this rdma_restrack_entry.

Fix this issue by using kstrdup() to set rdma_restrack_entry's
kern_name.

Signed-off-by: Wenchao Hao <[email protected]>
---
drivers/infiniband/core/restrack.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 01a499a8b88d..6605011c4edc 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -177,7 +177,8 @@ static void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
void rdma_restrack_set_name(struct rdma_restrack_entry *res, const char *caller)
{
if (caller) {
- res->kern_name = caller;
+ kfree(res->kern_name);
+ res->kern_name = kstrdup(caller, GFP_KERNEL);
return;
}

@@ -195,7 +196,7 @@ void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
const struct rdma_restrack_entry *parent)
{
if (rdma_is_kernel_res(parent))
- dst->kern_name = parent->kern_name;
+ dst->kern_name = kstrdup(parent->kern_name, GFP_KERNEL);
else
rdma_restrack_attach_task(dst, parent->task);
}
@@ -306,6 +307,7 @@ static void restrack_release(struct kref *kref)
put_task_struct(res->task);
res->task = NULL;
}
+ kfree(res->kern_name);
complete(&res->comp);
}

--
2.32.0



2024-03-03 12:57:52

by Leon Romanovsky

[permalink] [raw]
Subject: Re: [PATCH] RDMA/restrack: Fix potential invalid address access

On Fri, Mar 01, 2024 at 05:55:15PM +0800, Wenchao Hao wrote:
> struct rdma_restrack_entry's kern_name was set to KBUILD_MODNAME
> in ib_create_cq(), while if the module exited but forgot del this
> rdma_restrack_entry, it would cause a invalid address access in
> rdma_restrack_clean() when print the owner of this rdma_restrack_entry.

How is it possible to exit owner module without cleaning the resources?

Thanks

>
> Fix this issue by using kstrdup() to set rdma_restrack_entry's
> kern_name.
>
> Signed-off-by: Wenchao Hao <[email protected]>
> ---
> drivers/infiniband/core/restrack.c | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
> index 01a499a8b88d..6605011c4edc 100644
> --- a/drivers/infiniband/core/restrack.c
> +++ b/drivers/infiniband/core/restrack.c
> @@ -177,7 +177,8 @@ static void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
> void rdma_restrack_set_name(struct rdma_restrack_entry *res, const char *caller)
> {
> if (caller) {
> - res->kern_name = caller;
> + kfree(res->kern_name);
> + res->kern_name = kstrdup(caller, GFP_KERNEL);
> return;
> }
>
> @@ -195,7 +196,7 @@ void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
> const struct rdma_restrack_entry *parent)
> {
> if (rdma_is_kernel_res(parent))
> - dst->kern_name = parent->kern_name;
> + dst->kern_name = kstrdup(parent->kern_name, GFP_KERNEL);
> else
> rdma_restrack_attach_task(dst, parent->task);
> }
> @@ -306,6 +307,7 @@ static void restrack_release(struct kref *kref)
> put_task_struct(res->task);
> res->task = NULL;
> }
> + kfree(res->kern_name);
> complete(&res->comp);
> }
>
> --
> 2.32.0
>

2024-03-04 03:21:51

by Wenchao Hao

[permalink] [raw]
Subject: Re: [PATCH] RDMA/restrack: Fix potential invalid address access

On 2024/3/3 20:57, Leon Romanovsky wrote:
> On Fri, Mar 01, 2024 at 05:55:15PM +0800, Wenchao Hao wrote:
>> struct rdma_restrack_entry's kern_name was set to KBUILD_MODNAME
>> in ib_create_cq(), while if the module exited but forgot del this
>> rdma_restrack_entry, it would cause a invalid address access in
>> rdma_restrack_clean() when print the owner of this rdma_restrack_entry.
>
> How is it possible to exit owner module without cleaning the resources?
>

I meet this issue with one of our product who develop their owner kernel
modules based on ib_core, and there are terrible logic with the exit
code which cause resource leak.

Of curse it's bug of module who did not clear resource when exit, but
I think ib_core should avoid accessing memory of other modules directly
to provides better stability.

What's more, from the context of rdma_restrack_clean() when print
"restack: %s %s object allocated by %s is not freed ...", it seems
designed for the above scene where client has bug to alerts there
are resource leak, so we should not panic on this log print.

> Thanks
>
>>
>> Fix this issue by using kstrdup() to set rdma_restrack_entry's
>> kern_name.
>>
>> Signed-off-by: Wenchao Hao <[email protected]>
>> ---
>> drivers/infiniband/core/restrack.c | 6 ++++--
>> 1 file changed, 4 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
>> index 01a499a8b88d..6605011c4edc 100644
>> --- a/drivers/infiniband/core/restrack.c
>> +++ b/drivers/infiniband/core/restrack.c
>> @@ -177,7 +177,8 @@ static void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
>> void rdma_restrack_set_name(struct rdma_restrack_entry *res, const char *caller)
>> {
>> if (caller) {
>> - res->kern_name = caller;
>> + kfree(res->kern_name);
>> + res->kern_name = kstrdup(caller, GFP_KERNEL);
>> return;
>> }
>>
>> @@ -195,7 +196,7 @@ void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
>> const struct rdma_restrack_entry *parent)
>> {
>> if (rdma_is_kernel_res(parent))
>> - dst->kern_name = parent->kern_name;
>> + dst->kern_name = kstrdup(parent->kern_name, GFP_KERNEL);
>> else
>> rdma_restrack_attach_task(dst, parent->task);
>> }
>> @@ -306,6 +307,7 @@ static void restrack_release(struct kref *kref)
>> put_task_struct(res->task);
>> res->task = NULL;
>> }
>> + kfree(res->kern_name);
>> complete(&res->comp);
>> }
>>
>> --
>> 2.32.0
>>


2024-03-04 07:36:03

by Leon Romanovsky

[permalink] [raw]
Subject: Re: [PATCH] RDMA/restrack: Fix potential invalid address access

On Mon, Mar 04, 2024 at 11:21:19AM +0800, Wenchao Hao wrote:
> On 2024/3/3 20:57, Leon Romanovsky wrote:
> > On Fri, Mar 01, 2024 at 05:55:15PM +0800, Wenchao Hao wrote:
> > > struct rdma_restrack_entry's kern_name was set to KBUILD_MODNAME
> > > in ib_create_cq(), while if the module exited but forgot del this
> > > rdma_restrack_entry, it would cause a invalid address access in
> > > rdma_restrack_clean() when print the owner of this rdma_restrack_entry.
> >
> > How is it possible to exit owner module without cleaning the resources?
> >
>
> I meet this issue with one of our product who develop their owner kernel
> modules based on ib_core, and there are terrible logic with the exit
> code which cause resource leak.
>
> Of curse it's bug of module who did not clear resource when exit, but
> I think ib_core should avoid accessing memory of other modules directly
> to provides better stability.
>
> What's more, from the context of rdma_restrack_clean() when print
> "restack: %s %s object allocated by %s is not freed ...", it seems
> designed for the above scene where client has bug to alerts there
> are resource leak, so we should not panic on this log print.

Can you please share the kernel panic?

Thanks

2024-03-04 13:41:38

by Wenchao Hao

[permalink] [raw]
Subject: Re: [PATCH] RDMA/restrack: Fix potential invalid address access

On 2024/3/4 15:35, Leon Romanovsky wrote:
> On Mon, Mar 04, 2024 at 11:21:19AM +0800, Wenchao Hao wrote:
>> On 2024/3/3 20:57, Leon Romanovsky wrote:
>>> On Fri, Mar 01, 2024 at 05:55:15PM +0800, Wenchao Hao wrote:
>>>> struct rdma_restrack_entry's kern_name was set to KBUILD_MODNAME
>>>> in ib_create_cq(), while if the module exited but forgot del this
>>>> rdma_restrack_entry, it would cause a invalid address access in
>>>> rdma_restrack_clean() when print the owner of this rdma_restrack_entry.
>>>
>>> How is it possible to exit owner module without cleaning the resources?
>>>
>>
>> I meet this issue with one of our product who develop their owner kernel
>> modules based on ib_core, and there are terrible logic with the exit
>> code which cause resource leak.
>>
>> Of curse it's bug of module who did not clear resource when exit, but
>> I think ib_core should avoid accessing memory of other modules directly
>> to provides better stability.
>>
>> What's more, from the context of rdma_restrack_clean() when print
>> "restack: %s %s object allocated by %s is not freed ...", it seems
>> designed for the above scene where client has bug to alerts there
>> are resource leak, so we should not panic on this log print.
>
> Can you please share the kernel panic?
>

Sorry, there is no stack or panic info recorded. This is because
another issue of "printk". It seems printk would cause a deadlock
when printk access invalid address with our kernel.

Actually, I found this issue with ftrace/kprobe but not printk, that's
why it takes me a long time to address it.

BTW, I am not developer of rdma, after found the issue, I think it's
better to enhance, so send this patch, and the patch has been tested with
the origin scene.

> Thanks


2024-03-07 09:13:44

by Leon Romanovsky

[permalink] [raw]
Subject: Re: [PATCH] RDMA/restrack: Fix potential invalid address access

On Fri, Mar 01, 2024 at 05:55:15PM +0800, Wenchao Hao wrote:
> struct rdma_restrack_entry's kern_name was set to KBUILD_MODNAME
> in ib_create_cq(), while if the module exited but forgot del this
> rdma_restrack_entry, it would cause a invalid address access in
> rdma_restrack_clean() when print the owner of this rdma_restrack_entry.
>
> Fix this issue by using kstrdup() to set rdma_restrack_entry's
> kern_name.

I don't like kstrdup() and would like to avoid it, this rdma_restrack_clean()
is purely for debugging and for a long time all upstream ULPs are "clean"
from these not-released bugs.

So my suggestion is to delete that part of code and it will be good enough.

diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 01a499a8b88d..27727138f188 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -60,47 +60,14 @@ static const char *type2str(enum rdma_restrack_type type)
void rdma_restrack_clean(struct ib_device *dev)
{
struct rdma_restrack_root *rt = dev->res;
- struct rdma_restrack_entry *e;
- char buf[TASK_COMM_LEN];
- bool found = false;
- const char *owner;
int i;

for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) {
struct xarray *xa = &dev->res[i].xa;

- if (!xa_empty(xa)) {
- unsigned long index;
-
- if (!found) {
- pr_err("restrack: %s", CUT_HERE);
- dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
- }
- xa_for_each(xa, index, e) {
- if (rdma_is_kernel_res(e)) {
- owner = e->kern_name;
- } else {
- /*
- * There is no need to call get_task_struct here,
- * because we can be here only if there are more
- * get_task_struct() call than put_task_struct().
- */
- get_task_comm(buf, e->task);
- owner = buf;
- }
-
- pr_err("restrack: %s %s object allocated by %s is not freed\n",
- rdma_is_kernel_res(e) ? "Kernel" :
- "User",
- type2str(e->type), owner);
- }
- found = true;
- }
+ WARN_ON(!xa_empty(xa));
xa_destroy(xa);
}
- if (found)
- pr_err("restrack: %s", CUT_HERE);
-
kfree(rt);
}


>
> Signed-off-by: Wenchao Hao <[email protected]>
> ---
> drivers/infiniband/core/restrack.c | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
> index 01a499a8b88d..6605011c4edc 100644
> --- a/drivers/infiniband/core/restrack.c
> +++ b/drivers/infiniband/core/restrack.c
> @@ -177,7 +177,8 @@ static void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
> void rdma_restrack_set_name(struct rdma_restrack_entry *res, const char *caller)
> {
> if (caller) {
> - res->kern_name = caller;
> + kfree(res->kern_name);
> + res->kern_name = kstrdup(caller, GFP_KERNEL);
> return;
> }
>
> @@ -195,7 +196,7 @@ void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
> const struct rdma_restrack_entry *parent)
> {
> if (rdma_is_kernel_res(parent))
> - dst->kern_name = parent->kern_name;
> + dst->kern_name = kstrdup(parent->kern_name, GFP_KERNEL);
> else
> rdma_restrack_attach_task(dst, parent->task);
> }
> @@ -306,6 +307,7 @@ static void restrack_release(struct kref *kref)
> put_task_struct(res->task);
> res->task = NULL;
> }
> + kfree(res->kern_name);
> complete(&res->comp);
> }
>
> --
> 2.32.0
>

2024-03-07 14:18:28

by Wenchao Hao

[permalink] [raw]
Subject: Re: [PATCH] RDMA/restrack: Fix potential invalid address access

On 2024/3/7 17:13, Leon Romanovsky wrote:
> On Fri, Mar 01, 2024 at 05:55:15PM +0800, Wenchao Hao wrote:
>> struct rdma_restrack_entry's kern_name was set to KBUILD_MODNAME
>> in ib_create_cq(), while if the module exited but forgot del this
>> rdma_restrack_entry, it would cause a invalid address access in
>> rdma_restrack_clean() when print the owner of this rdma_restrack_entry.
>>
>> Fix this issue by using kstrdup() to set rdma_restrack_entry's
>> kern_name.
>
> I don't like kstrdup() and would like to avoid it, this rdma_restrack_clean()
> is purely for debugging and for a long time all upstream ULPs are "clean"
> from these not-released bugs.
>
> So my suggestion is to delete that part of code and it will be good enough.
>

It's OK for me. When found this issue, my first plan is to remove the code, but
I do not know why these code is added, so decide to using kstrdup() to work around
it.

Then what to do next? Do I need to post another patch or you would fix it by yourself?

> diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
> index 01a499a8b88d..27727138f188 100644
> --- a/drivers/infiniband/core/restrack.c
> +++ b/drivers/infiniband/core/restrack.c
> @@ -60,47 +60,14 @@ static const char *type2str(enum rdma_restrack_type type)
> void rdma_restrack_clean(struct ib_device *dev)
> {
> struct rdma_restrack_root *rt = dev->res;
> - struct rdma_restrack_entry *e;
> - char buf[TASK_COMM_LEN];
> - bool found = false;
> - const char *owner;
> int i;
>
> for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) {
> struct xarray *xa = &dev->res[i].xa;
>
> - if (!xa_empty(xa)) {
> - unsigned long index;
> -
> - if (!found) {
> - pr_err("restrack: %s", CUT_HERE);
> - dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
> - }
> - xa_for_each(xa, index, e) {
> - if (rdma_is_kernel_res(e)) {
> - owner = e->kern_name;
> - } else {
> - /*
> - * There is no need to call get_task_struct here,
> - * because we can be here only if there are more
> - * get_task_struct() call than put_task_struct().
> - */
> - get_task_comm(buf, e->task);
> - owner = buf;
> - }
> -
> - pr_err("restrack: %s %s object allocated by %s is not freed\n",
> - rdma_is_kernel_res(e) ? "Kernel" :
> - "User",
> - type2str(e->type), owner);
> - }
> - found = true;
> - }
> + WARN_ON(!xa_empty(xa));
> xa_destroy(xa);
> }
> - if (found)
> - pr_err("restrack: %s", CUT_HERE);
> -
> kfree(rt);
> }
>
>
>>
>> Signed-off-by: Wenchao Hao <[email protected]>
>> ---
>> drivers/infiniband/core/restrack.c | 6 ++++--
>> 1 file changed, 4 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
>> index 01a499a8b88d..6605011c4edc 100644
>> --- a/drivers/infiniband/core/restrack.c
>> +++ b/drivers/infiniband/core/restrack.c
>> @@ -177,7 +177,8 @@ static void rdma_restrack_attach_task(struct rdma_restrack_entry *res,
>> void rdma_restrack_set_name(struct rdma_restrack_entry *res, const char *caller)
>> {
>> if (caller) {
>> - res->kern_name = caller;
>> + kfree(res->kern_name);
>> + res->kern_name = kstrdup(caller, GFP_KERNEL);
>> return;
>> }
>>
>> @@ -195,7 +196,7 @@ void rdma_restrack_parent_name(struct rdma_restrack_entry *dst,
>> const struct rdma_restrack_entry *parent)
>> {
>> if (rdma_is_kernel_res(parent))
>> - dst->kern_name = parent->kern_name;
>> + dst->kern_name = kstrdup(parent->kern_name, GFP_KERNEL);
>> else
>> rdma_restrack_attach_task(dst, parent->task);
>> }
>> @@ -306,6 +307,7 @@ static void restrack_release(struct kref *kref)
>> put_task_struct(res->task);
>> res->task = NULL;
>> }
>> + kfree(res->kern_name);
>> complete(&res->comp);
>> }
>>
>> --
>> 2.32.0
>>


2024-03-10 09:25:07

by Leon Romanovsky

[permalink] [raw]
Subject: Re: [PATCH] RDMA/restrack: Fix potential invalid address access

On Thu, Mar 07, 2024 at 10:17:59PM +0800, Wenchao Hao wrote:
> On 2024/3/7 17:13, Leon Romanovsky wrote:
> > On Fri, Mar 01, 2024 at 05:55:15PM +0800, Wenchao Hao wrote:
> > > struct rdma_restrack_entry's kern_name was set to KBUILD_MODNAME
> > > in ib_create_cq(), while if the module exited but forgot del this
> > > rdma_restrack_entry, it would cause a invalid address access in
> > > rdma_restrack_clean() when print the owner of this rdma_restrack_entry.
> > >
> > > Fix this issue by using kstrdup() to set rdma_restrack_entry's
> > > kern_name.
> >
> > I don't like kstrdup() and would like to avoid it, this rdma_restrack_clean()
> > is purely for debugging and for a long time all upstream ULPs are "clean"
> > from these not-released bugs.
> >
> > So my suggestion is to delete that part of code and it will be good enough.
> >
>
> It's OK for me. When found this issue, my first plan is to remove the code, but
> I do not know why these code is added, so decide to using kstrdup() to work around
> it.

This code helped us to find one forgotten PD release in one of the ULPs. But it is not needed anymore.

>
> Then what to do next? Do I need to post another patch or you would fix it by yourself?

Please send new patch, thanks.