2005-01-21 13:48:39

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH] fix put_user under mmap_sem in sys_get_mempolicy()

Hello.

sys_get_mempolicy() accesses user memory with mmap_sem held.
If I understand correctly, this can cause deadlock:

sys_get_mempolicy: Another thread, same mm:

down_read(mmap_sem);
down_write(mmap_sem);
put_user();
do_page_fault:
down_read(mmap_sem);

Compile tested only, I have no NUMA machine.

Signed-off-by: Oleg Nesterov <[email protected]>

--- 2.6.11-rc1/mm/mempolicy.c~ Wed Jan 12 11:44:55 2005
+++ 2.6.11-rc1/mm/mempolicy.c Fri Jan 21 17:41:47 2005
@@ -482,26 +482,38 @@ asmlinkage long sys_get_mempolicy(int __
unsigned long maxnode,
unsigned long addr, unsigned long flags)
{
- int err, pval;
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma = NULL;
+ int err, pval = 0; /* make compiler happy */
struct mempolicy *pol = current->mempolicy;

if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
return -EINVAL;
if (nmask != NULL && maxnode < MAX_NUMNODES)
return -EINVAL;
+
if (flags & MPOL_F_ADDR) {
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+
+ err = 0;
down_read(&mm->mmap_sem);
vma = find_vma_intersection(mm, addr, addr+1);
- if (!vma) {
- up_read(&mm->mmap_sem);
- return -EFAULT;
+ if (!vma)
+ err = -EFAULT;
+ else {
+ if (vma->vm_ops && vma->vm_ops->get_policy)
+ pol = vma->vm_ops->get_policy(vma, addr);
+ else
+ pol = vma->vm_policy;
+
+ if (flags & MPOL_F_NODE) {
+ pval = lookup_node(mm, addr);
+ if (pval < 0)
+ err = pval;
+ }
}
- if (vma->vm_ops && vma->vm_ops->get_policy)
- pol = vma->vm_ops->get_policy(vma, addr);
- else
- pol = vma->vm_policy;
+ up_read(&mm->mmap_sem);
+ if (err)
+ goto out;
} else if (addr)
return -EINVAL;

@@ -509,17 +521,14 @@ asmlinkage long sys_get_mempolicy(int __
pol = &default_policy;

if (flags & MPOL_F_NODE) {
- if (flags & MPOL_F_ADDR) {
- err = lookup_node(mm, addr);
- if (err < 0)
+ if (!(flags & MPOL_F_ADDR)) {
+ if (pol == current->mempolicy &&
+ pol->policy == MPOL_INTERLEAVE) {
+ pval = current->il_next;
+ } else {
+ err = -EINVAL;
goto out;
- pval = err;
- } else if (pol == current->mempolicy &&
- pol->policy == MPOL_INTERLEAVE) {
- pval = current->il_next;
- } else {
- err = -EINVAL;
- goto out;
+ }
}
} else
pval = pol->policy;
@@ -534,10 +543,7 @@ asmlinkage long sys_get_mempolicy(int __
get_zonemask(pol, nodes);
err = copy_nodes_to_user(nmask, maxnode, nodes, sizeof(nodes));
}
-
- out:
- if (vma)
- up_read(&current->mm->mmap_sem);
+out:
return err;
}


2005-01-21 14:29:16

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] fix put_user under mmap_sem in sys_get_mempolicy()

On Fri, Jan 21, 2005 at 05:51:07PM +0300, Oleg Nesterov wrote:
> Hello.
>
> sys_get_mempolicy() accesses user memory with mmap_sem held.
> If I understand correctly, this can cause deadlock:
>
> sys_get_mempolicy: Another thread, same mm:
>
> down_read(mmap_sem);
> down_write(mmap_sem);
> put_user();
> do_page_fault:
> down_read(mmap_sem);

Hrm. Normal Linux policy seems to ignore this potential
and rare deadlock and using nesting safely (e.g. it's been
known for a long time for the tasklist rw spinlock, but
nobody really cares and it doesn't seem to be hit by users).
Do you really think it is likely to happen?
>
> Compile tested only, I have no NUMA machine.

It's hard to figure out what your patch actually does because
of all the gratious white space changes.

I suppose this simpler patch has the same effect (also untested).


diff -u linux-2.6.11-rc1-bk4/mm/mempolicy.c-o linux-2.6.11-rc1-bk4/mm/mempolicy.c
--- linux-2.6.11-rc1-bk4/mm/mempolicy.c-o 2005-01-14 10:12:27.000000000 +0100
+++ linux-2.6.11-rc1-bk4/mm/mempolicy.c 2005-01-21 15:26:12.000000000 +0100
@@ -485,7 +485,7 @@
int err, pval;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma = NULL;
- struct mempolicy *pol = current->mempolicy;
+ struct mempolicy *pol = current->mempolicy, *pol2 = NULL;

if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
return -EINVAL;
@@ -502,6 +502,10 @@
pol = vma->vm_ops->get_policy(vma, addr);
else
pol = vma->vm_policy;
+ pol2 = mpol_copy(pol);
+ up_read(&mm->mmap_sem);
+ if (IS_ERR(pol2))
+ return PTR_ERR(pol2);
} else if (addr)
return -EINVAL;

@@ -536,8 +540,7 @@
}

out:
- if (vma)
- up_read(&current->mm->mmap_sem);
+ mpol_free(pol2);
return err;
}


2005-01-21 14:58:27

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH] fix put_user under mmap_sem in sys_get_mempolicy()

Andi Kleen wrote:
>
> I suppose this simpler patch has the same effect (also untested).
>
> if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
> return -EINVAL;
>@@ -502,6 +502,10 @@
> pol = vma->vm_ops->get_policy(vma, addr);
> else
> pol = vma->vm_policy;
>+ pol2 = mpol_copy(pol);
>+ up_read(&mm->mmap_sem);
>+ if (IS_ERR(pol2))
>+ return PTR_ERR(pol2);
>

I don't think so. With MPOL_F_ADDR|MPOL_F_NODE sys_get_mempolicy
calls lookup_node()->get_user_pages() few lines below, so we can't
up_read(&mm->mmap_sem) here.

> It's hard to figure out what your patch actually does because
> of all the gratious white space changes.

For your convenience here is the code with the patch applied.

if (flags & MPOL_F_ADDR) {
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;

err = 0;
down_read(&mm->mmap_sem);
vma = find_vma_intersection(mm, addr, addr+1);
if (!vma)
err = -EFAULT;
else {
if (vma->vm_ops && vma->vm_ops->get_policy)
pol = vma->vm_ops->get_policy(vma, addr);
else
pol = vma->vm_policy;

if (flags & MPOL_F_NODE) {
pval = lookup_node(mm, addr);
if (pval < 0)
err = pval;
}
}
up_read(&mm->mmap_sem);
if (err)
goto out;
} else if (addr)
return -EINVAL;

if (!pol)
pol = &default_policy;

if (flags & MPOL_F_NODE) {
if (!(flags & MPOL_F_ADDR)) {
if (pol == current->mempolicy &&
pol->policy == MPOL_INTERLEAVE) {
pval = current->il_next;
} else {
err = -EINVAL;
goto out;
}
}
} else
pval = pol->policy;

2005-01-21 15:12:25

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH] fix put_user under mmap_sem in sys_get_mempolicy()

On Fri, Jan 21, 2005 at 07:01:55PM +0300, Oleg Nesterov wrote:
> Andi Kleen wrote:
> >
> > I suppose this simpler patch has the same effect (also untested).
> >
> > if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
> > return -EINVAL;
> >@@ -502,6 +502,10 @@
> > pol = vma->vm_ops->get_policy(vma, addr);
> > else
> > pol = vma->vm_policy;
> >+ pol2 = mpol_copy(pol);
> >+ up_read(&mm->mmap_sem);
> >+ if (IS_ERR(pol2))
> >+ return PTR_ERR(pol2);
> >
>
> I don't think so. With MPOL_F_ADDR|MPOL_F_NODE sys_get_mempolicy
> calls lookup_node()->get_user_pages() few lines below, so we can't
> up_read(&mm->mmap_sem) here.

True.

>
> > It's hard to figure out what your patch actually does because
> > of all the gratious white space changes.
>
> For your convenience here is the code with the patch applied.

Looks reasonable.

-Andi

P.S.: Again if you really care about these class of deadlocks take a look at
tasklist_lock.