Hi,
I found a race condition that triggers a kernel panic. It's explained in the
following patches, but basically the map_obj that contains the user pages is
being destroyed while doing a DMA operation (which requires that map_obj).
My solution is to convert the spinlock to a semaphore, and exten the area
protected (which might sleep).
I have not tested these specific patches; they have been forward ported. But in
a similar branch, they solve the issue.
Felipe Contreras (2):
staging: tidspbridge: convert dmm_map_lock to sema
staging: tidspbridge: extend dmm_map semaphore
.../staging/tidspbridge/include/dspbridge/drv.h | 2 +-
drivers/staging/tidspbridge/rmgr/drv_interface.c | 2 +-
drivers/staging/tidspbridge/rmgr/proc.c | 23 +++++++++++++-------
3 files changed, 17 insertions(+), 10 deletions(-)
--
1.7.3.3
This is needed because the lock needs to be extended to protect the
mapping info access, which is used to construct a scatter-gather list,
and in the process, we might sleep.
Signed-off-by: Felipe Contreras <[email protected]>
---
.../staging/tidspbridge/include/dspbridge/drv.h | 2 +-
drivers/staging/tidspbridge/rmgr/drv_interface.c | 2 +-
drivers/staging/tidspbridge/rmgr/proc.c | 12 ++++++------
3 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/staging/tidspbridge/include/dspbridge/drv.h b/drivers/staging/tidspbridge/include/dspbridge/drv.h
index c1f363e..217c918 100644
--- a/drivers/staging/tidspbridge/include/dspbridge/drv.h
+++ b/drivers/staging/tidspbridge/include/dspbridge/drv.h
@@ -163,7 +163,7 @@ struct process_context {
/* DMM mapped memory resources */
struct list_head dmm_map_list;
- spinlock_t dmm_map_lock;
+ struct semaphore dmm_map_sema;
/* DMM reserved memory resources */
struct list_head dmm_rsv_list;
diff --git a/drivers/staging/tidspbridge/rmgr/drv_interface.c b/drivers/staging/tidspbridge/rmgr/drv_interface.c
index 324fcdf..82c25c6 100644
--- a/drivers/staging/tidspbridge/rmgr/drv_interface.c
+++ b/drivers/staging/tidspbridge/rmgr/drv_interface.c
@@ -507,7 +507,7 @@ static int bridge_open(struct inode *ip, struct file *filp)
pr_ctxt = kzalloc(sizeof(struct process_context), GFP_KERNEL);
if (pr_ctxt) {
pr_ctxt->res_state = PROC_RES_ALLOCATED;
- spin_lock_init(&pr_ctxt->dmm_map_lock);
+ sema_init(&pr_ctxt->dmm_map_sema, 1);
INIT_LIST_HEAD(&pr_ctxt->dmm_map_list);
spin_lock_init(&pr_ctxt->dmm_rsv_lock);
INIT_LIST_HEAD(&pr_ctxt->dmm_rsv_list);
diff --git a/drivers/staging/tidspbridge/rmgr/proc.c b/drivers/staging/tidspbridge/rmgr/proc.c
index b47d7aa..77ab5f5 100644
--- a/drivers/staging/tidspbridge/rmgr/proc.c
+++ b/drivers/staging/tidspbridge/rmgr/proc.c
@@ -144,9 +144,9 @@ static struct dmm_map_object *add_mapping_info(struct process_context *pr_ctxt,
map_obj->size = size;
map_obj->num_usr_pgs = num_usr_pgs;
- spin_lock(&pr_ctxt->dmm_map_lock);
+ down(&pr_ctxt->dmm_map_sema);
list_add(&map_obj->link, &pr_ctxt->dmm_map_list);
- spin_unlock(&pr_ctxt->dmm_map_lock);
+ up(&pr_ctxt->dmm_map_sema);
return map_obj;
}
@@ -170,7 +170,7 @@ static void remove_mapping_information(struct process_context *pr_ctxt,
pr_debug("%s: looking for virt 0x%x size 0x%x\n", __func__,
dsp_addr, size);
- spin_lock(&pr_ctxt->dmm_map_lock);
+ down(&pr_ctxt->dmm_map_sema);
list_for_each_entry(map_obj, &pr_ctxt->dmm_map_list, link) {
pr_debug("%s: candidate: mpu_addr 0x%x virt 0x%x size 0x%x\n",
__func__,
@@ -191,7 +191,7 @@ static void remove_mapping_information(struct process_context *pr_ctxt,
pr_err("%s: failed to find given map info\n", __func__);
out:
- spin_unlock(&pr_ctxt->dmm_map_lock);
+ up(&pr_ctxt->dmm_map_sema);
}
static int match_containing_map_obj(struct dmm_map_object *map_obj,
@@ -211,7 +211,7 @@ static struct dmm_map_object *find_containing_mapping(
pr_debug("%s: looking for mpu_addr 0x%x size 0x%x\n", __func__,
mpu_addr, size);
- spin_lock(&pr_ctxt->dmm_map_lock);
+ down(&pr_ctxt->dmm_map_sema);
list_for_each_entry(map_obj, &pr_ctxt->dmm_map_list, link) {
pr_debug("%s: candidate: mpu_addr 0x%x virt 0x%x size 0x%x\n",
__func__,
@@ -228,7 +228,7 @@ static struct dmm_map_object *find_containing_mapping(
map_obj = NULL;
out:
- spin_unlock(&pr_ctxt->dmm_map_lock);
+ up(&pr_ctxt->dmm_map_sema);
return map_obj;
}
--
1.7.3.3
We need to protect not only the list, but the individual map_obj's.
Otherwise, we might be building the scatter-gather list while the
map_obj is being destroyed.
I observed race conditions which caused kernel panics while running
stress tests. This patch fixes those.
Signed-off-by: Felipe Contreras <[email protected]>
---
drivers/staging/tidspbridge/rmgr/proc.c | 15 +++++++++++----
1 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/drivers/staging/tidspbridge/rmgr/proc.c b/drivers/staging/tidspbridge/rmgr/proc.c
index 77ab5f5..ea2d105 100644
--- a/drivers/staging/tidspbridge/rmgr/proc.c
+++ b/drivers/staging/tidspbridge/rmgr/proc.c
@@ -203,6 +203,7 @@ static int match_containing_map_obj(struct dmm_map_object *map_obj,
mpu_addr + size <= map_obj_end;
}
+/* must be called while holding dmm_map semaphore */
static struct dmm_map_object *find_containing_mapping(
struct process_context *pr_ctxt,
u32 mpu_addr, u32 size)
@@ -211,7 +212,6 @@ static struct dmm_map_object *find_containing_mapping(
pr_debug("%s: looking for mpu_addr 0x%x size 0x%x\n", __func__,
mpu_addr, size);
- down(&pr_ctxt->dmm_map_sema);
list_for_each_entry(map_obj, &pr_ctxt->dmm_map_list, link) {
pr_debug("%s: candidate: mpu_addr 0x%x virt 0x%x size 0x%x\n",
__func__,
@@ -228,7 +228,6 @@ static struct dmm_map_object *find_containing_mapping(
map_obj = NULL;
out:
- up(&pr_ctxt->dmm_map_sema);
return map_obj;
}
@@ -781,12 +780,14 @@ int proc_begin_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
(u32)pmpu_addr,
ul_size, dir);
+ down(&pr_ctxt->dmm_map_sema);
+
/* find requested memory are in cached mapping information */
map_obj = find_containing_mapping(pr_ctxt, (u32) pmpu_addr, ul_size);
if (!map_obj) {
pr_err("%s: find_containing_mapping failed\n", __func__);
status = -EFAULT;
- goto err_out;
+ goto no_map;
}
if (memory_give_ownership(map_obj, (u32) pmpu_addr, ul_size, dir)) {
@@ -795,6 +796,8 @@ int proc_begin_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
status = -EFAULT;
}
+no_map:
+ up(&pr_ctxt->dmm_map_sema);
err_out:
return status;
@@ -819,12 +822,14 @@ int proc_end_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
(u32)pmpu_addr,
ul_size, dir);
+ down(&pr_ctxt->dmm_map_sema);
+
/* find requested memory are in cached mapping information */
map_obj = find_containing_mapping(pr_ctxt, (u32) pmpu_addr, ul_size);
if (!map_obj) {
pr_err("%s: find_containing_mapping failed\n", __func__);
status = -EFAULT;
- goto err_out;
+ goto no_map;
}
if (memory_regain_ownership(map_obj, (u32) pmpu_addr, ul_size, dir)) {
@@ -834,6 +839,8 @@ int proc_end_dma(void *hprocessor, void *pmpu_addr, u32 ul_size,
goto err_out;
}
+no_map:
+ up(&pr_ctxt->dmm_map_sema);
err_out:
return status;
}
--
1.7.3.3
On Mon, Dec 20, 2010 at 4:25 PM, Felipe Contreras
<[email protected]> wrote:
> I found a race condition that triggers a kernel panic. It's explained in the
> following patches, but basically the map_obj that contains the user pages is
> being destroyed while doing a DMA operation (which requires that map_obj).
>
> My solution is to convert the spinlock to a semaphore, and exten the area
> protected (which might sleep).
>
> I have not tested these specific patches; they have been forward ported. But in
> a similar branch, they solve the issue.
Please disregard this patch series, I'll send a simpler single patch
that does the trick.
--
Felipe Contreras