We need to protect not only the dmm_map list, but the individual
map_obj's, otherwise, we might be building the scatter-gather list with
garbage. So, use the existing proc_lock for that.
I observed race conditions which caused kernel panics while running
stress tests. This patch fixes those.
Signed-off-by: Felipe Contreras <[email protected]>
---
drivers/staging/tidspbridge/rmgr/proc.c | 18 ++++++++++++++----
1 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/drivers/staging/tidspbridge/rmgr/proc.c b/drivers/staging/tidspbridge/rmgr/proc.c
index b47d7aa..21052e3 100644
--- a/drivers/staging/tidspbridge/rmgr/proc.c
+++ b/drivers/staging/tidspbridge/rmgr/proc.c
@@ -781,12 +781,14 @@ int proc_begin_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
(u32)pmpu_addr,
ul_size, dir);
+ mutex_lock(&proc_lock);
+
/* find requested memory are in cached mapping information */
map_obj = find_containing_mapping(pr_ctxt, (u32) pmpu_addr, ul_size);
if (!map_obj) {
pr_err("%s: find_containing_mapping failed\n", __func__);
status = -EFAULT;
- goto err_out;
+ goto no_map;
}
if (memory_give_ownership(map_obj, (u32) pmpu_addr, ul_size, dir)) {
@@ -795,6 +797,8 @@ int proc_begin_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
status = -EFAULT;
}
+no_map:
+ mutex_unlock(&proc_lock);
err_out:
return status;
@@ -819,12 +823,14 @@ int proc_end_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
(u32)pmpu_addr,
ul_size, dir);
+ mutex_lock(&proc_lock);
+
/* find requested memory are in cached mapping information */
map_obj = find_containing_mapping(pr_ctxt, (u32) pmpu_addr, ul_size);
if (!map_obj) {
pr_err("%s: find_containing_mapping failed\n", __func__);
status = -EFAULT;
- goto err_out;
+ goto no_map;
}
if (memory_regain_ownership(map_obj, (u32) pmpu_addr, ul_size, dir)) {
@@ -834,6 +840,8 @@ int proc_end_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
goto err_out;
}
+no_map:
+ mutex_unlock(&proc_lock);
err_out:
return status;
}
@@ -1726,9 +1734,8 @@ int proc_un_map(void *hprocessor, void *map_addr,
(p_proc_object->hbridge_context, va_align, size_align);
}
- mutex_unlock(&proc_lock);
if (status)
- goto func_end;
+ goto unmap_failed;
/*
* A successful unmap should be followed by removal of map_obj
@@ -1737,6 +1744,9 @@ int proc_un_map(void *hprocessor, void *map_addr,
*/
remove_mapping_information(pr_ctxt, (u32) map_addr, size_align);
+unmap_failed:
+ mutex_unlock(&proc_lock);
+
func_end:
dev_dbg(bridge, "%s: hprocessor: 0x%p map_addr: 0x%p status: 0x%x\n",
__func__, hprocessor, map_addr, status);
--
1.7.3.3
Felipe,
On Mon, Dec 20, 2010 at 11:12 AM, Felipe Contreras
<[email protected]> wrote:
> We need to protect not only the dmm_map list, but the individual
> map_obj's, otherwise, we might be building the scatter-gather list with
> garbage. So, use the existing proc_lock for that.
>
> I observed race conditions which caused kernel panics while running
> stress tests. This patch fixes those.
>
> Signed-off-by: Felipe Contreras <[email protected]>
> ---
> ?drivers/staging/tidspbridge/rmgr/proc.c | ? 18 ++++++++++++++----
> ?1 files changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/staging/tidspbridge/rmgr/proc.c b/drivers/staging/tidspbridge/rmgr/proc.c
> index b47d7aa..21052e3 100644
> --- a/drivers/staging/tidspbridge/rmgr/proc.c
> +++ b/drivers/staging/tidspbridge/rmgr/proc.c
> @@ -781,12 +781,14 @@ int proc_begin_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?(u32)pmpu_addr,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?ul_size, dir);
>
> + ? ? ? mutex_lock(&proc_lock);
May be you should use mutex_lock_interruptable instead of mutex_lock.
> +
> ? ? ? ?/* find requested memory are in cached mapping information */
> ? ? ? ?map_obj = find_containing_mapping(pr_ctxt, (u32) pmpu_addr, ul_size);
> ? ? ? ?if (!map_obj) {
> ? ? ? ? ? ? ? ?pr_err("%s: find_containing_mapping failed\n", __func__);
> ? ? ? ? ? ? ? ?status = -EFAULT;
> - ? ? ? ? ? ? ? goto err_out;
> + ? ? ? ? ? ? ? goto no_map;
> ? ? ? ?}
>
> ? ? ? ?if (memory_give_ownership(map_obj, (u32) pmpu_addr, ul_size, dir)) {
> @@ -795,6 +797,8 @@ int proc_begin_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
> ? ? ? ? ? ? ? ?status = -EFAULT;
> ? ? ? ?}
>
> +no_map:
> + ? ? ? mutex_unlock(&proc_lock);
> ?err_out:
>
> ? ? ? ?return status;
> @@ -819,12 +823,14 @@ int proc_end_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?(u32)pmpu_addr,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?ul_size, dir);
>
> + ? ? ? mutex_lock(&proc_lock);
> +
> ? ? ? ?/* find requested memory are in cached mapping information */
> ? ? ? ?map_obj = find_containing_mapping(pr_ctxt, (u32) pmpu_addr, ul_size);
> ? ? ? ?if (!map_obj) {
> ? ? ? ? ? ? ? ?pr_err("%s: find_containing_mapping failed\n", __func__);
> ? ? ? ? ? ? ? ?status = -EFAULT;
> - ? ? ? ? ? ? ? goto err_out;
> + ? ? ? ? ? ? ? goto no_map;
> ? ? ? ?}
>
> ? ? ? ?if (memory_regain_ownership(map_obj, (u32) pmpu_addr, ul_size, dir)) {
> @@ -834,6 +840,8 @@ int proc_end_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
> ? ? ? ? ? ? ? ?goto err_out;
Mutex is not released in this case as it is released only at no_map.
> ? ? ? ?}
>
> +no_map:
> + ? ? ? mutex_unlock(&proc_lock);
> ?err_out:
> ? ? ? ?return status;
> ?}
> @@ -1726,9 +1734,8 @@ int proc_un_map(void *hprocessor, void *map_addr,
> ? ? ? ? ? ? ? ? ? ?(p_proc_object->hbridge_context, va_align, size_align);
> ? ? ? ?}
>
> - ? ? ? mutex_unlock(&proc_lock);
> ? ? ? ?if (status)
> - ? ? ? ? ? ? ? goto func_end;
> + ? ? ? ? ? ? ? goto unmap_failed;
>
> ? ? ? ?/*
> ? ? ? ? * A successful unmap should be followed by removal of map_obj
> @@ -1737,6 +1744,9 @@ int proc_un_map(void *hprocessor, void *map_addr,
> ? ? ? ? */
> ? ? ? ?remove_mapping_information(pr_ctxt, (u32) map_addr, size_align);
>
> +unmap_failed:
> + ? ? ? mutex_unlock(&proc_lock);
> +
> ?func_end:
> ? ? ? ?dev_dbg(bridge, "%s: hprocessor: 0x%p map_addr: 0x%p status: 0x%x\n",
> ? ? ? ? ? ? ? ?__func__, hprocessor, map_addr, status);
> --
> 1.7.3.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-omap" in
> the body of a message to [email protected]
> More majordomo info at ?http://vger.kernel.org/majordomo-info.html
>
--
Thank you,
Best regards,
Hari Kanigeri
Hi,
On Mon, Dec 20, 2010 at 8:30 PM, Kanigeri, Hari <[email protected]> wrote:
> On Mon, Dec 20, 2010 at 11:12 AM, Felipe Contreras
> <[email protected]> wrote:
>> We need to protect not only the dmm_map list, but the individual
>> map_obj's, otherwise, we might be building the scatter-gather list with
>> garbage. So, use the existing proc_lock for that.
>>
>> I observed race conditions which caused kernel panics while running
>> stress tests. This patch fixes those.
>>
>> Signed-off-by: Felipe Contreras <[email protected]>
>> ---
>> drivers/staging/tidspbridge/rmgr/proc.c | 18 ++++++++++++++----
>> 1 files changed, 14 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/staging/tidspbridge/rmgr/proc.c b/drivers/staging/tidspbridge/rmgr/proc.c
>> index b47d7aa..21052e3 100644
>> --- a/drivers/staging/tidspbridge/rmgr/proc.c
>> +++ b/drivers/staging/tidspbridge/rmgr/proc.c
>> @@ -781,12 +781,14 @@ int proc_begin_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
>> (u32)pmpu_addr,
>> ul_size, dir);
>>
>> + mutex_lock(&proc_lock);
>
> May be you should use mutex_lock_interruptable instead of mutex_lock.
Right, but I think that should be a separate patch since
mutex_lock(&proc_lock) is already being used.
>> @@ -819,12 +823,14 @@ int proc_end_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
>> (u32)pmpu_addr,
>> ul_size, dir);
>>
>> + mutex_lock(&proc_lock);
>> +
>> /* find requested memory are in cached mapping information */
>> map_obj = find_containing_mapping(pr_ctxt, (u32) pmpu_addr, ul_size);
>> if (!map_obj) {
>> pr_err("%s: find_containing_mapping failed\n", __func__);
>> status = -EFAULT;
>> - goto err_out;
>> + goto no_map;
>> }
>>
>> if (memory_regain_ownership(map_obj, (u32) pmpu_addr, ul_size, dir)) {
>> @@ -834,6 +840,8 @@ int proc_end_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
>> goto err_out;
>
> Mutex is not released in this case as it is released only at no_map.
Oops! I didn't test proc_end_dma() and quickly added those locks after
I noticed it. I'll resend with the fix.
--
Felipe Contreras
On Mon, Dec 20, 2010 at 7:12 PM, Felipe Contreras
<[email protected]> wrote:
> We need to protect not only the dmm_map list, but the individual
> map_obj's, otherwise, we might be building the scatter-gather list with
> garbage. So, use the existing proc_lock for that.
>
> I observed race conditions which caused kernel panics while running
> stress tests. This patch fixes those.
I just heard that Tuomas Kulve is getting a lot of panics on Gumstix
Overo. I propose we apply this patch on the stable tree ASAP, and if
there's no better proposals, also on .38.
--
Felipe Contreras
On Mon, Mar 7, 2011 at 8:02 PM, Felipe Contreras
<[email protected]> wrote:
> On Mon, Dec 20, 2010 at 7:12 PM, Felipe Contreras
> <[email protected]> wrote:
>> We need to protect not only the dmm_map list, but the individual
>> map_obj's, otherwise, we might be building the scatter-gather list with
>> garbage. So, use the existing proc_lock for that.
>>
>> I observed race conditions which caused kernel panics while running
>> stress tests. This patch fixes those.
>
> I just heard that Tuomas Kulve is getting a lot of panics on Gumstix
> Overo. I propose we apply this patch on the stable tree ASAP, and if
> there's no better proposals, also on .38.
Agree that a fix should be merged asap, and I don't mind which.
Especially since things have changed lately and chances that we will
use this code for OMAP4 too are really small now.
On Mon, Mar 07, 2011 at 08:02:44PM +0200, Felipe Contreras wrote:
> On Mon, Dec 20, 2010 at 7:12 PM, Felipe Contreras
> <[email protected]> wrote:
> > We need to protect not only the dmm_map list, but the individual
> > map_obj's, otherwise, we might be building the scatter-gather list with
> > garbage. So, use the existing proc_lock for that.
> >
> > I observed race conditions which caused kernel panics while running
> > stress tests. This patch fixes those.
>
> I just heard that Tuomas Kulve is getting a lot of panics on Gumstix
> Overo. I propose we apply this patch on the stable tree ASAP, and if
> there's no better proposals, also on .38.
Um, I don't think you realize _how_ stable trees work, please go read
Documentation/stable_kernel_rules.txt to see how it is not possible to
take something into the stable releases before it goes to Linus's tree.
Sorry,
greg k-h
Hi Felipe,
On Mon, Mar 7, 2011 at 12:02 PM, Felipe Contreras
<[email protected]> wrote:
> On Mon, Dec 20, 2010 at 7:12 PM, Felipe Contreras
> <[email protected]> wrote:
>> We need to protect not only the dmm_map list, but the individual
>> map_obj's, otherwise, we might be building the scatter-gather list with
>> garbage. So, use the existing proc_lock for that.
>>
>> I observed race conditions which caused kernel panics while running
>> stress tests. This patch fixes those.
>
> I just heard that Tuomas Kulve is getting a lot of panics on Gumstix
> Overo. I propose we apply this patch on the stable tree ASAP, and if
> there's no better proposals, also on .38.
Can you or Tuomas share the bug report data (panic log, test case
maybe)? I would like to discard issues affected by timing that could
be hidden with this patch.
I agree that for the time being this needs to be sent upstream, even
if in paper Ohad's patch solves the issue without side effects of
locking.
Thanks,
Omar
On Mon, Mar 7, 2011 at 9:29 PM, Ramirez Luna, Omar <[email protected]> wrote:
> On Mon, Mar 7, 2011 at 12:02 PM, Felipe Contreras
> <[email protected]> wrote:
>> On Mon, Dec 20, 2010 at 7:12 PM, Felipe Contreras
>> <[email protected]> wrote:
>>> We need to protect not only the dmm_map list, but the individual
>>> map_obj's, otherwise, we might be building the scatter-gather list with
>>> garbage. So, use the existing proc_lock for that.
>>>
>>> I observed race conditions which caused kernel panics while running
>>> stress tests. This patch fixes those.
>>
>> I just heard that Tuomas Kulve is getting a lot of panics on Gumstix
>> Overo. I propose we apply this patch on the stable tree ASAP, and if
>> there's no better proposals, also on .38.
>
> Can you or Tuomas share the bug report data (panic log, test case
> maybe)? I would like to discard issues affected by timing that could
> be hidden with this patch.
I got this from Tuomas:
http://pastie.org/1643677
It seems it's very easy to reproduce on Gumstix Overo with a
gst-launch command. The way I reproduced it was very tedious; running
a full blown Maemo test suite with GBs of clips.
I think the issue is very clear; if you build a sg list with garbage
memory, problems are expected =/
> I agree that for the time being this needs to be sent upstream, even
> if in paper Ohad's patch solves the issue without side effects of
> locking.
Perhaps. I don't remember if I ack'ed Ohad's patch, but even if it's
ok, I think it can be applied on top of my patch.
Cheers.
--
Felipe Contreras