2010-12-20 17:12:41

by Felipe Contreras

[permalink] [raw]
Subject: [PATCH] staging: tidspbridge: protect dmm_map properly

We need to protect not only the dmm_map list, but the individual
map_obj's, otherwise, we might be building the scatter-gather list with
garbage. So, use the existing proc_lock for that.

I observed race conditions which caused kernel panics while running
stress tests. This patch fixes those.

Signed-off-by: Felipe Contreras <[email protected]>
---
drivers/staging/tidspbridge/rmgr/proc.c | 18 ++++++++++++++----
1 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/tidspbridge/rmgr/proc.c b/drivers/staging/tidspbridge/rmgr/proc.c
index b47d7aa..21052e3 100644
--- a/drivers/staging/tidspbridge/rmgr/proc.c
+++ b/drivers/staging/tidspbridge/rmgr/proc.c
@@ -781,12 +781,14 @@ int proc_begin_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
(u32)pmpu_addr,
ul_size, dir);

+ mutex_lock(&proc_lock);
+
/* find requested memory are in cached mapping information */
map_obj = find_containing_mapping(pr_ctxt, (u32) pmpu_addr, ul_size);
if (!map_obj) {
pr_err("%s: find_containing_mapping failed\n", __func__);
status = -EFAULT;
- goto err_out;
+ goto no_map;
}

if (memory_give_ownership(map_obj, (u32) pmpu_addr, ul_size, dir)) {
@@ -795,6 +797,8 @@ int proc_begin_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
status = -EFAULT;
}

+no_map:
+ mutex_unlock(&proc_lock);
err_out:

return status;
@@ -819,12 +823,14 @@ int proc_end_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
(u32)pmpu_addr,
ul_size, dir);

+ mutex_lock(&proc_lock);
+
/* find requested memory are in cached mapping information */
map_obj = find_containing_mapping(pr_ctxt, (u32) pmpu_addr, ul_size);
if (!map_obj) {
pr_err("%s: find_containing_mapping failed\n", __func__);
status = -EFAULT;
- goto err_out;
+ goto no_map;
}

if (memory_regain_ownership(map_obj, (u32) pmpu_addr, ul_size, dir)) {
@@ -834,6 +840,8 @@ int proc_end_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
goto err_out;
}

+no_map:
+ mutex_unlock(&proc_lock);
err_out:
return status;
}
@@ -1726,9 +1734,8 @@ int proc_un_map(void *hprocessor, void *map_addr,
(p_proc_object->hbridge_context, va_align, size_align);
}

- mutex_unlock(&proc_lock);
if (status)
- goto func_end;
+ goto unmap_failed;

/*
* A successful unmap should be followed by removal of map_obj
@@ -1737,6 +1744,9 @@ int proc_un_map(void *hprocessor, void *map_addr,
*/
remove_mapping_information(pr_ctxt, (u32) map_addr, size_align);

+unmap_failed:
+ mutex_unlock(&proc_lock);
+
func_end:
dev_dbg(bridge, "%s: hprocessor: 0x%p map_addr: 0x%p status: 0x%x\n",
__func__, hprocessor, map_addr, status);
--
1.7.3.3


2010-12-20 18:30:29

by Kanigeri, Hari

[permalink] [raw]
Subject: Re: [PATCH] staging: tidspbridge: protect dmm_map properly

Felipe,

On Mon, Dec 20, 2010 at 11:12 AM, Felipe Contreras
<[email protected]> wrote:
> We need to protect not only the dmm_map list, but the individual
> map_obj's, otherwise, we might be building the scatter-gather list with
> garbage. So, use the existing proc_lock for that.
>
> I observed race conditions which caused kernel panics while running
> stress tests. This patch fixes those.
>
> Signed-off-by: Felipe Contreras <[email protected]>
> ---
> ?drivers/staging/tidspbridge/rmgr/proc.c | ? 18 ++++++++++++++----
> ?1 files changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/staging/tidspbridge/rmgr/proc.c b/drivers/staging/tidspbridge/rmgr/proc.c
> index b47d7aa..21052e3 100644
> --- a/drivers/staging/tidspbridge/rmgr/proc.c
> +++ b/drivers/staging/tidspbridge/rmgr/proc.c
> @@ -781,12 +781,14 @@ int proc_begin_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?(u32)pmpu_addr,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?ul_size, dir);
>
> + ? ? ? mutex_lock(&proc_lock);

May be you should use mutex_lock_interruptable instead of mutex_lock.

> +
> ? ? ? ?/* find requested memory are in cached mapping information */
> ? ? ? ?map_obj = find_containing_mapping(pr_ctxt, (u32) pmpu_addr, ul_size);
> ? ? ? ?if (!map_obj) {
> ? ? ? ? ? ? ? ?pr_err("%s: find_containing_mapping failed\n", __func__);
> ? ? ? ? ? ? ? ?status = -EFAULT;
> - ? ? ? ? ? ? ? goto err_out;
> + ? ? ? ? ? ? ? goto no_map;
> ? ? ? ?}
>
> ? ? ? ?if (memory_give_ownership(map_obj, (u32) pmpu_addr, ul_size, dir)) {
> @@ -795,6 +797,8 @@ int proc_begin_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
> ? ? ? ? ? ? ? ?status = -EFAULT;
> ? ? ? ?}
>
> +no_map:
> + ? ? ? mutex_unlock(&proc_lock);
> ?err_out:
>
> ? ? ? ?return status;
> @@ -819,12 +823,14 @@ int proc_end_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?(u32)pmpu_addr,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?ul_size, dir);
>
> + ? ? ? mutex_lock(&proc_lock);
> +
> ? ? ? ?/* find requested memory are in cached mapping information */
> ? ? ? ?map_obj = find_containing_mapping(pr_ctxt, (u32) pmpu_addr, ul_size);
> ? ? ? ?if (!map_obj) {
> ? ? ? ? ? ? ? ?pr_err("%s: find_containing_mapping failed\n", __func__);
> ? ? ? ? ? ? ? ?status = -EFAULT;
> - ? ? ? ? ? ? ? goto err_out;
> + ? ? ? ? ? ? ? goto no_map;
> ? ? ? ?}
>
> ? ? ? ?if (memory_regain_ownership(map_obj, (u32) pmpu_addr, ul_size, dir)) {
> @@ -834,6 +840,8 @@ int proc_end_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
> ? ? ? ? ? ? ? ?goto err_out;

Mutex is not released in this case as it is released only at no_map.


> ? ? ? ?}
>
> +no_map:
> + ? ? ? mutex_unlock(&proc_lock);
> ?err_out:
> ? ? ? ?return status;
> ?}
> @@ -1726,9 +1734,8 @@ int proc_un_map(void *hprocessor, void *map_addr,
> ? ? ? ? ? ? ? ? ? ?(p_proc_object->hbridge_context, va_align, size_align);
> ? ? ? ?}
>
> - ? ? ? mutex_unlock(&proc_lock);
> ? ? ? ?if (status)
> - ? ? ? ? ? ? ? goto func_end;
> + ? ? ? ? ? ? ? goto unmap_failed;
>
> ? ? ? ?/*
> ? ? ? ? * A successful unmap should be followed by removal of map_obj
> @@ -1737,6 +1744,9 @@ int proc_un_map(void *hprocessor, void *map_addr,
> ? ? ? ? */
> ? ? ? ?remove_mapping_information(pr_ctxt, (u32) map_addr, size_align);
>
> +unmap_failed:
> + ? ? ? mutex_unlock(&proc_lock);
> +
> ?func_end:
> ? ? ? ?dev_dbg(bridge, "%s: hprocessor: 0x%p map_addr: 0x%p status: 0x%x\n",
> ? ? ? ? ? ? ? ?__func__, hprocessor, map_addr, status);
> --
> 1.7.3.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-omap" in
> the body of a message to [email protected]
> More majordomo info at ?http://vger.kernel.org/majordomo-info.html
>



--
Thank you,
Best regards,
Hari Kanigeri

2010-12-20 18:43:53

by Felipe Contreras

[permalink] [raw]
Subject: Re: [PATCH] staging: tidspbridge: protect dmm_map properly

Hi,

On Mon, Dec 20, 2010 at 8:30 PM, Kanigeri, Hari <[email protected]> wrote:
> On Mon, Dec 20, 2010 at 11:12 AM, Felipe Contreras
> <[email protected]> wrote:
>> We need to protect not only the dmm_map list, but the individual
>> map_obj's, otherwise, we might be building the scatter-gather list with
>> garbage. So, use the existing proc_lock for that.
>>
>> I observed race conditions which caused kernel panics while running
>> stress tests. This patch fixes those.
>>
>> Signed-off-by: Felipe Contreras <[email protected]>
>> ---
>>  drivers/staging/tidspbridge/rmgr/proc.c |   18 ++++++++++++++----
>>  1 files changed, 14 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/staging/tidspbridge/rmgr/proc.c b/drivers/staging/tidspbridge/rmgr/proc.c
>> index b47d7aa..21052e3 100644
>> --- a/drivers/staging/tidspbridge/rmgr/proc.c
>> +++ b/drivers/staging/tidspbridge/rmgr/proc.c
>> @@ -781,12 +781,14 @@ int proc_begin_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
>>                                                        (u32)pmpu_addr,
>>                                                        ul_size, dir);
>>
>> +       mutex_lock(&proc_lock);
>
> May be you should use mutex_lock_interruptable instead of  mutex_lock.

Right, but I think that should be a separate patch since
mutex_lock(&proc_lock) is already being used.

>> @@ -819,12 +823,14 @@ int proc_end_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
>>                                                        (u32)pmpu_addr,
>>                                                        ul_size, dir);
>>
>> +       mutex_lock(&proc_lock);
>> +
>>        /* find requested memory are in cached mapping information */
>>        map_obj = find_containing_mapping(pr_ctxt, (u32) pmpu_addr, ul_size);
>>        if (!map_obj) {
>>                pr_err("%s: find_containing_mapping failed\n", __func__);
>>                status = -EFAULT;
>> -               goto err_out;
>> +               goto no_map;
>>        }
>>
>>        if (memory_regain_ownership(map_obj, (u32) pmpu_addr, ul_size, dir)) {
>> @@ -834,6 +840,8 @@ int proc_end_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
>>                goto err_out;
>
> Mutex is not released in this case as it is released only at no_map.

Oops! I didn't test proc_end_dma() and quickly added those locks after
I noticed it. I'll resend with the fix.

--
Felipe Contreras

2011-03-07 18:02:48

by Felipe Contreras

[permalink] [raw]
Subject: Re: [PATCH] staging: tidspbridge: protect dmm_map properly

On Mon, Dec 20, 2010 at 7:12 PM, Felipe Contreras
<[email protected]> wrote:
> We need to protect not only the dmm_map list, but the individual
> map_obj's, otherwise, we might be building the scatter-gather list with
> garbage. So, use the existing proc_lock for that.
>
> I observed race conditions which caused kernel panics while running
> stress tests. This patch fixes those.

I just heard that Tuomas Kulve is getting a lot of panics on Gumstix
Overo. I propose we apply this patch on the stable tree ASAP, and if
there's no better proposals, also on .38.

--
Felipe Contreras

2011-03-07 18:14:44

by Ohad Ben Cohen

[permalink] [raw]
Subject: Re: [PATCH] staging: tidspbridge: protect dmm_map properly

On Mon, Mar 7, 2011 at 8:02 PM, Felipe Contreras
<[email protected]> wrote:
> On Mon, Dec 20, 2010 at 7:12 PM, Felipe Contreras
> <[email protected]> wrote:
>> We need to protect not only the dmm_map list, but the individual
>> map_obj's, otherwise, we might be building the scatter-gather list with
>> garbage. So, use the existing proc_lock for that.
>>
>> I observed race conditions which caused kernel panics while running
>> stress tests. This patch fixes those.
>
> I just heard that Tuomas Kulve is getting a lot of panics on Gumstix
> Overo. I propose we apply this patch on the stable tree ASAP, and if
> there's no better proposals, also on .38.

Agree that a fix should be merged asap, and I don't mind which.
Especially since things have changed lately and chances that we will
use this code for OMAP4 too are really small now.

2011-03-07 18:50:05

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH] staging: tidspbridge: protect dmm_map properly

On Mon, Mar 07, 2011 at 08:02:44PM +0200, Felipe Contreras wrote:
> On Mon, Dec 20, 2010 at 7:12 PM, Felipe Contreras
> <[email protected]> wrote:
> > We need to protect not only the dmm_map list, but the individual
> > map_obj's, otherwise, we might be building the scatter-gather list with
> > garbage. So, use the existing proc_lock for that.
> >
> > I observed race conditions which caused kernel panics while running
> > stress tests. This patch fixes those.
>
> I just heard that Tuomas Kulve is getting a lot of panics on Gumstix
> Overo. I propose we apply this patch on the stable tree ASAP, and if
> there's no better proposals, also on .38.

Um, I don't think you realize _how_ stable trees work, please go read
Documentation/stable_kernel_rules.txt to see how it is not possible to
take something into the stable releases before it goes to Linus's tree.

Sorry,

greg k-h

2011-03-07 19:29:06

by Ramirez Luna, Omar

[permalink] [raw]
Subject: Re: [PATCH] staging: tidspbridge: protect dmm_map properly

Hi Felipe,

On Mon, Mar 7, 2011 at 12:02 PM, Felipe Contreras
<[email protected]> wrote:
> On Mon, Dec 20, 2010 at 7:12 PM, Felipe Contreras
> <[email protected]> wrote:
>> We need to protect not only the dmm_map list, but the individual
>> map_obj's, otherwise, we might be building the scatter-gather list with
>> garbage. So, use the existing proc_lock for that.
>>
>> I observed race conditions which caused kernel panics while running
>> stress tests. This patch fixes those.
>
> I just heard that Tuomas Kulve is getting a lot of panics on Gumstix
> Overo. I propose we apply this patch on the stable tree ASAP, and if
> there's no better proposals, also on .38.

Can you or Tuomas share the bug report data (panic log, test case
maybe)? I would like to discard issues affected by timing that could
be hidden with this patch.

I agree that for the time being this needs to be sent upstream, even
if in paper Ohad's patch solves the issue without side effects of
locking.

Thanks,

Omar

2011-03-07 21:48:37

by Felipe Contreras

[permalink] [raw]
Subject: Re: [PATCH] staging: tidspbridge: protect dmm_map properly

On Mon, Mar 7, 2011 at 9:29 PM, Ramirez Luna, Omar <[email protected]> wrote:
> On Mon, Mar 7, 2011 at 12:02 PM, Felipe Contreras
> <[email protected]> wrote:
>> On Mon, Dec 20, 2010 at 7:12 PM, Felipe Contreras
>> <[email protected]> wrote:
>>> We need to protect not only the dmm_map list, but the individual
>>> map_obj's, otherwise, we might be building the scatter-gather list with
>>> garbage. So, use the existing proc_lock for that.
>>>
>>> I observed race conditions which caused kernel panics while running
>>> stress tests. This patch fixes those.
>>
>> I just heard that Tuomas Kulve is getting a lot of panics on Gumstix
>> Overo. I propose we apply this patch on the stable tree ASAP, and if
>> there's no better proposals, also on .38.
>
> Can you or Tuomas share the bug report data (panic log, test case
> maybe)? I would like to discard issues affected by timing that could
> be hidden with this patch.

I got this from Tuomas:
http://pastie.org/1643677

It seems it's very easy to reproduce on Gumstix Overo with a
gst-launch command. The way I reproduced it was very tedious; running
a full blown Maemo test suite with GBs of clips.

I think the issue is very clear; if you build a sg list with garbage
memory, problems are expected =/

> I agree that for the time being this needs to be sent upstream, even
> if in paper Ohad's patch solves the issue without side effects of
> locking.

Perhaps. I don't remember if I ack'ed Ohad's patch, but even if it's
ok, I think it can be applied on top of my patch.

Cheers.

--
Felipe Contreras