This patch is the extension of following upstream commit to fix
the race condition between get_task_mm() and core dumping
for IB->mlx4 and IB->mlx5 drivers:
commit 04f5866e41fb ("coredump: fix race condition between
mmget_not_zero()/get_task_mm() and core dumping")'
Thanks to Jason for pointing this.
Signed-off-by: Ajay Kaher <[email protected]>
---
drivers/infiniband/hw/mlx4/main.c | 4 +++-
drivers/infiniband/hw/mlx5/main.c | 3 +++
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index e2beb18..0299c06 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1197,6 +1197,8 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
* mlx4_ib_vma_close().
*/
down_write(&owning_mm->mmap_sem);
+ if (!mmget_still_valid(owning_mm))
+ goto skip_mm;
for (i = 0; i < HW_BAR_COUNT; i++) {
vma = context->hw_bar_info[i].vma;
if (!vma)
@@ -1215,7 +1217,7 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
/* context going to be destroyed, should not access ops any more */
context->hw_bar_info[i].vma->vm_ops = NULL;
}
-
+skip_mm:
up_write(&owning_mm->mmap_sem);
mmput(owning_mm);
put_task_struct(owning_process);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 13a9206..3fbe396 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1646,6 +1646,8 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
* mlx5_ib_vma_close.
*/
down_write(&owning_mm->mmap_sem);
+ if (!mmget_still_valid(owning_mm))
+ goto skip_mm;
mutex_lock(&context->vma_private_list_mutex);
list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
list) {
@@ -1662,6 +1664,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
kfree(vma_private);
}
mutex_unlock(&context->vma_private_list_mutex);
+skip_mm:
up_write(&owning_mm->mmap_sem);
mmput(owning_mm);
put_task_struct(owning_process);
--
2.7.4
On Tue 11-06-19 02:22:17, Ajay Kaher wrote:
> This patch is the extension of following upstream commit to fix
> the race condition between get_task_mm() and core dumping
> for IB->mlx4 and IB->mlx5 drivers:
>
> commit 04f5866e41fb ("coredump: fix race condition between
> mmget_not_zero()/get_task_mm() and core dumping")'
>
> Thanks to Jason for pointing this.
>
> Signed-off-by: Ajay Kaher <[email protected]>
> ---
> drivers/infiniband/hw/mlx4/main.c | 4 +++-
> drivers/infiniband/hw/mlx5/main.c | 3 +++
> 2 files changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
> index e2beb18..0299c06 100644
> --- a/drivers/infiniband/hw/mlx4/main.c
> +++ b/drivers/infiniband/hw/mlx4/main.c
> @@ -1197,6 +1197,8 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
> * mlx4_ib_vma_close().
> */
> down_write(&owning_mm->mmap_sem);
> + if (!mmget_still_valid(owning_mm))
> + goto skip_mm;
> for (i = 0; i < HW_BAR_COUNT; i++) {
> vma = context->hw_bar_info[i].vma;
> if (!vma)
I have missed this part in 4.4 stable backport. Thanks for catching it.
I have updated my backport.
> @@ -1215,7 +1217,7 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
> /* context going to be destroyed, should not access ops any more */
> context->hw_bar_info[i].vma->vm_ops = NULL;
> }
> -
> +skip_mm:
> up_write(&owning_mm->mmap_sem);
> mmput(owning_mm);
> put_task_struct(owning_process);
--
Michal Hocko
SUSE Labs
On Tue, Jun 11, 2019 at 02:22:17AM +0530, Ajay Kaher wrote:
> This patch is the extension of following upstream commit to fix
> the race condition between get_task_mm() and core dumping
> for IB->mlx4 and IB->mlx5 drivers:
>
> commit 04f5866e41fb ("coredump: fix race condition between
> mmget_not_zero()/get_task_mm() and core dumping")'
>
> Thanks to Jason for pointing this.
>
> Signed-off-by: Ajay Kaher <[email protected]>
> ---
> drivers/infiniband/hw/mlx4/main.c | 4 +++-
> drivers/infiniband/hw/mlx5/main.c | 3 +++
> 2 files changed, 6 insertions(+), 1 deletion(-)
Acked-by: Jason Gunthorpe <[email protected]>
Jason
On 10/06/19, 6:22 PM, "Ajay Kaher" <[email protected]> wrote:
> This patch is the extension of following upstream commit to fix
> the race condition between get_task_mm() and core dumping
> for IB->mlx4 and IB->mlx5 drivers:
>
> commit 04f5866e41fb ("coredump: fix race condition between
> mmget_not_zero()/get_task_mm() and core dumping")'
>
> Thanks to Jason for pointing this.
>
> Signed-off-by: Ajay Kaher <[email protected]>
> Acked-by: Jason Gunthorpe <[email protected]>
Greg, I hope you would like to review and proceed further with this patch.
> ---
> drivers/infiniband/hw/mlx4/main.c | 4 +++-
> drivers/infiniband/hw/mlx5/main.c | 3 +++
> 2 files changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
> index e2beb18..0299c06 100644
> --- a/drivers/infiniband/hw/mlx4/main.c
> +++ b/drivers/infiniband/hw/mlx4/main.c
> @@ -1197,6 +1197,8 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
> * mlx4_ib_vma_close().
> */
> down_write(&owning_mm->mmap_sem);
> + if (!mmget_still_valid(owning_mm))
> + goto skip_mm;
> for (i = 0; i < HW_BAR_COUNT; i++) {
> vma = context->hw_bar_info[i].vma;
> if (!vma)
> @@ -1215,7 +1217,7 @@ static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
> /* context going to be destroyed, should not access ops any more */
> context->hw_bar_info[i].vma->vm_ops = NULL;
> }
> -
> +skip_mm:
> up_write(&owning_mm->mmap_sem);
> mmput(owning_mm);
> put_task_struct(owning_process);
> diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
> index 13a9206..3fbe396 100644
> --- a/drivers/infiniband/hw/mlx5/main.c
> +++ b/drivers/infiniband/hw/mlx5/main.c
> @@ -1646,6 +1646,8 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
> * mlx5_ib_vma_close.
> */
> down_write(&owning_mm->mmap_sem);
> + if (!mmget_still_valid(owning_mm))
> + goto skip_mm;
> mutex_lock(&context->vma_private_list_mutex);
> list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
> list) {
> @@ -1662,6 +1664,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
> kfree(vma_private);
> }
> mutex_unlock(&context->vma_private_list_mutex);
> +skip_mm:
> up_write(&owning_mm->mmap_sem);
> mmput(owning_mm);
> put_task_struct(owning_process);
> --
> 2.7.4
On Fri, Jun 14, 2019 at 02:41:39PM +0000, Ajay Kaher wrote:
>
> On 10/06/19, 6:22 PM, "Ajay Kaher" <[email protected]> wrote:
>
> > This patch is the extension of following upstream commit to fix
> > the race condition between get_task_mm() and core dumping
> > for IB->mlx4 and IB->mlx5 drivers:
> >
> > commit 04f5866e41fb ("coredump: fix race condition between
> > mmget_not_zero()/get_task_mm() and core dumping")'
> >
> > Thanks to Jason for pointing this.
> >
> > Signed-off-by: Ajay Kaher <[email protected]>
> > Acked-by: Jason Gunthorpe <[email protected]>
>
> Greg, I hope you would like to review and proceed further with this patch.
If this is all calmed down now, I'll look at it next week, thanks.
greg k-h