LinuxLists.cc - Re: [PATCH RESEND] Fix race in process_vm_rw

2012-02-01 05:53:43

Subject: Re: [PATCH RESEND] Fix race in process_vm_rw_core

On Mon, 30 Jan 2012 16:09:22 +0100
Oleg Nesterov <[email protected]> wrote:
> On 01/30, Christopher Yeoh wrote:
> >
> > --- a/fs/proc/base.c
> > +++ b/fs/proc/base.c
> > @@ -198,7 +198,7 @@ static int proc_root_link(struct dentry
> > *dentry, struct path *path) return result;
> > }
> >
> > -static struct mm_struct *mm_access(struct task_struct *task,
> > unsigned int mode) +struct mm_struct *mm_access(struct task_struct
> > *task, unsigned int mode)
>
> This is not enough, we should move it outside of fs/proc/, otherwise
> the kernel can't be compiled without CONFIG_PROC.
>

Updated patch below. Have moved mm_access to kernel/fork.c

This patch fixes the race in process_vm_rw_core found by Oleg
(http://article.gmane.org/gmane.linux.kernel/1235667/) which can occur
between __ptrace_may_access on the target task and if that task
is executing through execve at the same time.

Chris
--
[email protected]
fs/proc/base.c | 20 --------------------
include/linux/sched.h | 6 ++++++
kernel/fork.c | 20 ++++++++++++++++++++
mm/process_vm_access.c | 20 ++++++--------------
4 files changed, 32 insertions(+), 34 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9cde9edf..2773412 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -198,26 +198,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
return result;
}

-static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
-{
- struct mm_struct *mm;
- int err;
-
- err = mutex_lock_killable(&task->signal->cred_guard_mutex);
- if (err)
- return ERR_PTR(err);
-
- mm = get_task_mm(task);
- if (mm && mm != current->mm &&
- !ptrace_may_access(task, mode)) {
- mmput(mm);
- mm = ERR_PTR(-EACCES);
- }
- mutex_unlock(&task->signal->cred_guard_mutex);
-
- return mm;
-}
-
struct mm_struct *mm_for_maps(struct task_struct *task)
{
return mm_access(task, PTRACE_MODE_READ);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2234985..7d379a6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2259,6 +2259,12 @@ static inline void mmdrop(struct mm_struct * mm)
extern void mmput(struct mm_struct *);
/* Grab a reference to a task's mm, if it is not already going away */
extern struct mm_struct *get_task_mm(struct task_struct *task);
+/*
+ * Grab a reference to a task's mm, if it is not already going away
+ * and ptrace_may_access with the mode parameter passed to it
+ * succeeds.
+ */
+extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
/* Remove the current tasks stale references to the old mm_struct */
extern void mm_release(struct task_struct *, struct mm_struct *);
/* Allocate a new mm structure and copy contents from tsk->mm */
diff --git a/kernel/fork.c b/kernel/fork.c
index 051f090..1b2ef3c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -647,6 +647,26 @@ struct mm_struct *get_task_mm(struct task_struct *task)
}
EXPORT_SYMBOL_GPL(get_task_mm);

+struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
+{
+ struct mm_struct *mm;
+ int err;
+
+ err = mutex_lock_killable(&task->signal->cred_guard_mutex);
+ if (err)
+ return ERR_PTR(err);
+
+ mm = get_task_mm(task);
+ if (mm && mm != current->mm &&
+ !ptrace_may_access(task, mode)) {
+ mmput(mm);
+ mm = ERR_PTR(-EACCES);
+ }
+ mutex_unlock(&task->signal->cred_guard_mutex);
+
+ return mm;
+}
+
/* Please note the differences between mmput and mm_release.
* mmput is called whenever we stop holding onto a mm_struct,
* error success whatever.
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index e920aa3..82b6824 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -298,23 +298,15 @@ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec,
goto free_proc_pages;
}

- task_lock(task);
- if (__ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
- task_unlock(task);
- rc = -EPERM;
- goto put_task_struct;
- }
- mm = task->mm;
-
- if (!mm || (task->flags & PF_KTHREAD)) {
- task_unlock(task);
- rc = -EINVAL;
+ mm = mm_access(task, PTRACE_MODE_ATTACH);
+ if (!mm || IS_ERR(mm)) {
+ if (!mm)
+ rc = -EINVAL;
+ else
+ rc = -EPERM;
goto put_task_struct;
}

- atomic_inc(&mm->mm_users);
- task_unlock(task);
-
for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) {
rc = process_vm_rw_single_vec(
(unsigned long)rvec[i].iov_base, rvec[i].iov_len,

2012-02-01 06:10:04

by Cong Wang

[permalink] [raw]

Subject: Re: [PATCH RESEND] Fix race in process_vm_rw_core

On 02/01/2012 01:53 PM, Christopher Yeoh wrote:
> + mm = mm_access(task, PTRACE_MODE_ATTACH);
> + if (!mm || IS_ERR(mm)) {
> + if (!mm)
> + rc = -EINVAL;
> + else
> + rc = -EPERM;
> goto put_task_struct;

If IS_ERR(mm), you need to return PTR_ERR(mm), rather than -EPERM.

And, is -EINVAL proper for !mm case? For me, -ENOENT is better.

2012-02-01 06:10:35

by Linus Torvalds

[permalink] [raw]

Subject: Re: [PATCH RESEND] Fix race in process_vm_rw_core

On Tue, Jan 31, 2012 at 9:53 PM, Christopher Yeoh <[email protected]> wrote:
> + ? ? ? mm = mm_access(task, PTRACE_MODE_ATTACH);
> + ? ? ? if (!mm || IS_ERR(mm)) {
> + ? ? ? ? ? ? ? if (!mm)
> + ? ? ? ? ? ? ? ? ? ? ? rc = -EINVAL;
> + ? ? ? ? ? ? ? else
> + ? ? ? ? ? ? ? ? ? ? ? rc = -EPERM;
> ? ? ? ? ? ? ? ?goto put_task_struct;

Btw, do you really want to throw away the error code?

IOW, maybe it should be

rc = IS_ERR(mm) ? PTR_ERR(mm) : -EINVAL;

or something? Instead of forcing the EPERM? And the -EINVAL might be
better off as an ESRCH? I dunno.

Right now mm_access() returns EACCES for a permission problem, not
EPERM. EACCES is the normal filesystem access "Permission denied",
while EPERM is "Operation not permitted". I do agree that EPERM tends
to go with non-filesystem system calls (like ptrace() or sending
signals to a process that you aren't allowed to), so I do agree that
EPERM makes perfect sense within the context of process_vm_rw().

HOWEVER.

mm_access() can actually also return EINTR. Now, admittedly it only
returns that for killable signals, so user applications should never
*see* that, but it's a special-case example of other errors at least
being possible. What if we ever have some situation where we end up
needing a temporary memory allocation and could return ENOMEM?

Right now you turn all errors into EPERM, whether they were really
about permission problems or not. And that just makes be a bit
nervous. I wonder if we wouldn't be better off just returning EACCES
(and any possible future problem) than try so hard to always return
EPERM?

I dunno. I don't have any really *strong* opinion and I see why you do
it, but my gut feel is still that the error number change really does
seem a bit arbitrary.

Linus

2012-02-01 08:08:44

by Christopher Yeoh

[permalink] [raw]

Subject: Re: [PATCH RESEND] Fix race in process_vm_rw_core

On Tue, 31 Jan 2012 22:10:13 -0800
Linus Torvalds <[email protected]> wrote:

> On Tue, Jan 31, 2012 at 9:53 PM, Christopher Yeoh <[email protected]>
> wrote:
> > + mm = mm_access(task, PTRACE_MODE_ATTACH);
> > + if (!mm || IS_ERR(mm)) {
> > + if (!mm)
> > + rc = -EINVAL;
> > + else
> > + rc = -EPERM;
> > goto put_task_struct;
>
> Btw, do you really want to throw away the error code?
>
> IOW, maybe it should be
>
> rc = IS_ERR(mm) ? PTR_ERR(mm) : -EINVAL;
>
> or something? Instead of forcing the EPERM? And the -EINVAL might be
> better off as an ESRCH? I dunno.

Yea, that probably makes more sense.

> Right now you turn all errors into EPERM, whether they were really
> about permission problems or not. And that just makes be a bit
> nervous. I wonder if we wouldn't be better off just returning EACCES
> (and any possible future problem) than try so hard to always return
> EPERM?
>
> I dunno. I don't have any really *strong* opinion and I see why you do
> it, but my gut feel is still that the error number change really does
> seem a bit arbitrary.

I'm not super attached to returning EPERM instead of EACCES though I do
think it would look at bit odd from a user of the syscall view. Is it
too ugly to do:

rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
if (rc == -EACCES)
rc = -EPERM;

That way we avoid the problem of overwriting EINTR and if there are
changes in the future which return different error codes we won't
override those.

If you think it is too ugly then I'll give in and just return EACESS.
Just should to get it settled before too many people start using the
syscalls.

Chris
--
[email protected]

2012-02-02 01:04:51

by Christopher Yeoh

[permalink] [raw]

Subject: Re: [PATCH RESEND] Fix race in process_vm_rw_core

On Wed, 1 Feb 2012 18:38:36 +1030
[email protected] wrote:

> On Tue, 31 Jan 2012 22:10:13 -0800
> Linus Torvalds <[email protected]> wrote:
>
> > On Tue, Jan 31, 2012 at 9:53 PM, Christopher Yeoh
> > <[email protected]> wrote:
> > > + mm = mm_access(task, PTRACE_MODE_ATTACH);
> > > + if (!mm || IS_ERR(mm)) {
> > > + if (!mm)
> > > + rc = -EINVAL;
> > > + else
> > > + rc = -EPERM;
> > > goto put_task_struct;
> >
> > Btw, do you really want to throw away the error code?
> >
> > IOW, maybe it should be
> >
> > rc = IS_ERR(mm) ? PTR_ERR(mm) : -EINVAL;
> >
> > or something? Instead of forcing the EPERM? And the -EINVAL might be
> > better off as an ESRCH? I dunno.
>
> Yea, that probably makes more sense.
>
> > Right now you turn all errors into EPERM, whether they were really
> > about permission problems or not. And that just makes be a bit
> > nervous. I wonder if we wouldn't be better off just returning EACCES
> > (and any possible future problem) than try so hard to always return
> > EPERM?
> >
> > I dunno. I don't have any really *strong* opinion and I see why you
> > do it, but my gut feel is still that the error number change really
> > does seem a bit arbitrary.
>
> I'm not super attached to returning EPERM instead of EACCES though I
> do think it would look at bit odd from a user of the syscall view. Is
> it too ugly to do:
>
> rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
> if (rc == -EACCES)
> rc = -EPERM;
>
> That way we avoid the problem of overwriting EINTR and if there are
> changes in the future which return different error codes we won't
> override those.
>

Here's an updated patch doing the above of mapping EACCES but only
EACCES to EPERM.

--
[email protected]
Signed-off-by: Chris Yeoh <[email protected]>
fs/proc/base.c | 20 --------------------
include/linux/sched.h | 6 ++++++
kernel/fork.c | 20 ++++++++++++++++++++
mm/process_vm_access.c | 23 +++++++++--------------
4 files changed, 35 insertions(+), 34 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9cde9edf..2773412 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -198,26 +198,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
return result;
}

-static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
-{
- struct mm_struct *mm;
- int err;
-
- err = mutex_lock_killable(&task->signal->cred_guard_mutex);
- if (err)
- return ERR_PTR(err);
-
- mm = get_task_mm(task);
- if (mm && mm != current->mm &&
- !ptrace_may_access(task, mode)) {
- mmput(mm);
- mm = ERR_PTR(-EACCES);
- }
- mutex_unlock(&task->signal->cred_guard_mutex);
-
- return mm;
-}
-
struct mm_struct *mm_for_maps(struct task_struct *task)
{
return mm_access(task, PTRACE_MODE_READ);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2234985..7d379a6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2259,6 +2259,12 @@ static inline void mmdrop(struct mm_struct * mm)
extern void mmput(struct mm_struct *);
/* Grab a reference to a task's mm, if it is not already going away */
extern struct mm_struct *get_task_mm(struct task_struct *task);
+/*
+ * Grab a reference to a task's mm, if it is not already going away
+ * and ptrace_may_access with the mode parameter passed to it
+ * succeeds.
+ */
+extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode);
/* Remove the current tasks stale references to the old mm_struct */
extern void mm_release(struct task_struct *, struct mm_struct *);
/* Allocate a new mm structure and copy contents from tsk->mm */
diff --git a/kernel/fork.c b/kernel/fork.c
index 051f090..1b2ef3c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -647,6 +647,26 @@ struct mm_struct *get_task_mm(struct task_struct *task)
}
EXPORT_SYMBOL_GPL(get_task_mm);

+struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
+{
+ struct mm_struct *mm;
+ int err;
+
+ err = mutex_lock_killable(&task->signal->cred_guard_mutex);
+ if (err)
+ return ERR_PTR(err);
+
+ mm = get_task_mm(task);
+ if (mm && mm != current->mm &&
+ !ptrace_may_access(task, mode)) {
+ mmput(mm);
+ mm = ERR_PTR(-EACCES);
+ }
+ mutex_unlock(&task->signal->cred_guard_mutex);
+
+ return mm;
+}
+
/* Please note the differences between mmput and mm_release.
* mmput is called whenever we stop holding onto a mm_struct,
* error success whatever.
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index e920aa3..c20ff48 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -298,23 +298,18 @@ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec,
goto free_proc_pages;
}

- task_lock(task);
- if (__ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
- task_unlock(task);
- rc = -EPERM;
- goto put_task_struct;
- }
- mm = task->mm;
-
- if (!mm || (task->flags & PF_KTHREAD)) {
- task_unlock(task);
- rc = -EINVAL;
+ mm = mm_access(task, PTRACE_MODE_ATTACH);
+ if (!mm || IS_ERR(mm)) {
+ rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
+ /*
+ * Explicitly map EACCES to EPERM as EPERM is a more a
+ * appropriate error code for process_vw_readv/writev
+ */
+ if (rc == -EACCES)
+ rc = -EPERM;
goto put_task_struct;
}

- atomic_inc(&mm->mm_users);
- task_unlock(task);
-
for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) {
rc = process_vm_rw_single_vec(
(unsigned long)rvec[i].iov_base, rvec[i].iov_len,