Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752770Ab1AAEqO (ORCPT ); Fri, 31 Dec 2010 23:46:14 -0500 Received: from 184-106-158-135.static.cloud-ips.com ([184.106.158.135]:51588 "EHLO mail" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1751105Ab1AAEqL (ORCPT ); Fri, 31 Dec 2010 23:46:11 -0500 Date: Sat, 1 Jan 2011 04:47:06 +0000 From: "Serge E. Hallyn" To: "Eric W. Biederman" Cc: "Serge E. Hallyn" , LSM , James Morris , Kees Cook , containers@lists.linux-foundation.org, kernel list , Alexey Dobriyan , Michael Kerrisk Subject: Re: [RFC 5/5] user namespaces: Allow ptrace from non-init user namespaces Message-ID: <20110101044706.GB26476@mail.hallyn.com> References: <20101217152246.GA8221@mail.hallyn.com> <20101217152458.GA11162@mail.hallyn.com> <20101217152547.GB11162@mail.hallyn.com> <20101217152625.GC11162@mail.hallyn.com> <20101217152659.GD11162@mail.hallyn.com> <20101217152737.GE11162@mail.hallyn.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: User-Agent: Mutt/1.5.20 (2009-06-14) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8510 Lines: 259 Quoting Eric W. Biederman (ebiederm@xmission.com): > "Serge E. Hallyn" writes: > > > ptrace is allowed to tasks in the same user namespace according to > > the usual rules (i.e. the same rules as for two tasks in the init > > user namespace). ptrace is also allowed to a user namespace to > > which the current task the has CAP_SYS_PTRACE capability. > > The uid equality check below is broken. Thanks for the review, Eric. Updated version appended. Assuming there are no big problems with this version, I hope to do setuid/setgid and start the simplest vfs access controls next. Subject: [PATCH 5/5] Allow ptrace from non-init user namespaces ptrace is allowed to tasks in the same user namespace according to the usual rules (i.e. the same rules as for two tasks in the init user namespace). ptrace is also allowed to a user namespace to which the current task the has CAP_SYS_PTRACE capability. Changelog: Dec 31: Address feedback by Eric: . Correct ptrace uid check . Rename may_ptrace_ns to ptrace_capable . Also fix the cap_ptrace checks. Signed-off-by: Serge E. Hallyn --- include/linux/capability.h | 2 + include/linux/user_namespace.h | 9 +++++++ kernel/ptrace.c | 40 +++++++++++++++++++++++---------- kernel/user_namespace.c | 16 +++++++++++++ security/commoncap.c | 48 +++++++++++++++++++++++++++++++++------ 5 files changed, 95 insertions(+), 20 deletions(-) diff --git a/include/linux/capability.h b/include/linux/capability.h index cc3e976..777a166 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -543,6 +543,8 @@ extern const kernel_cap_t __cap_init_eff_set; */ #define has_capability(t, cap) (security_real_capable((t), &init_user_ns, (cap)) == 0) +#define has_ns_capability(t, ns, cap) (security_real_capable((t), (ns), (cap)) == 0) + /** * has_capability_noaudit - Determine if a task has a superior capability available (unaudited) * @t: The task in question diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 8178156..91c4f10 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -39,6 +39,9 @@ static inline void put_user_ns(struct user_namespace *ns) uid_t user_ns_map_uid(struct user_namespace *to, const struct cred *cred, uid_t uid); gid_t user_ns_map_gid(struct user_namespace *to, const struct cred *cred, gid_t gid); +int same_or_ancestor_user_ns(struct task_struct *task, + struct task_struct *victim); + #else static inline struct user_namespace *get_user_ns(struct user_namespace *ns) @@ -66,6 +69,12 @@ static inline gid_t user_ns_map_gid(struct user_namespace *to, return gid; } +static inline int same_or_ancestor_user_ns(struct task_struct *task, + struct task_struct *victim) +{ + return 1; +} + #endif #endif /* _LINUX_USER_H */ diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 99bbaa3..88e3fb3 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -116,6 +116,19 @@ int ptrace_check_attach(struct task_struct *child, int kill) return ret; } +static inline int ptrace_capable(struct task_struct *t) +{ + struct user_namespace *ns; + int ret; + + rcu_read_lock(); + ns = task_cred_xxx(t, user)->user_ns; + ret = ns_capable(ns, CAP_SYS_PTRACE); + rcu_read_unlock(); + + return ret; +} + int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; @@ -134,21 +147,24 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) return 0; rcu_read_lock(); tcred = __task_cred(task); - if ((cred->uid != tcred->euid || - cred->uid != tcred->suid || - cred->uid != tcred->uid || - cred->gid != tcred->egid || - cred->gid != tcred->sgid || - cred->gid != tcred->gid) && - !capable(CAP_SYS_PTRACE)) { - rcu_read_unlock(); - return -EPERM; - } + if (cred->user->user_ns == tcred->user->user_ns && + (cred->uid == tcred->euid && + cred->uid == tcred->suid && + cred->uid == tcred->uid && + cred->gid == tcred->egid && + cred->gid == tcred->sgid && + cred->gid == tcred->gid)) + goto ok; + if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE)) + goto ok; + rcu_read_unlock(); + return -EPERM; +ok: rcu_read_unlock(); smp_rmb(); if (task->mm) dumpable = get_dumpable(task->mm); - if (!dumpable && !capable(CAP_SYS_PTRACE)) + if (!dumpable && !ptrace_capable(task)) return -EPERM; return security_ptrace_access_check(task, mode); @@ -198,7 +214,7 @@ int ptrace_attach(struct task_struct *task) goto unlock_tasklist; task->ptrace = PT_PTRACED; - if (capable(CAP_SYS_PTRACE)) + if (ptrace_capable(task)) task->ptrace |= PT_PTRACE_CAP; __ptrace_link(task, current); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 2591583..4b70999 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -126,3 +126,19 @@ gid_t user_ns_map_gid(struct user_namespace *to, const struct cred *cred, gid_t /* No useful relationship so no mapping */ return overflowgid; } + +int same_or_ancestor_user_ns(struct task_struct *task, + struct task_struct *victim) +{ + struct user_namespace *u1 = task_cred_xxx(task, user)->user_ns; + struct user_namespace *u2 = task_cred_xxx(victim, user)->user_ns; + for (;;) { + if (u1 == u2) + return 1; + if (u1 == &init_user_ns) + return 0; + u1 = u1->creator->user_ns; + } + /* We never get here */ + return 0; +} diff --git a/security/commoncap.c b/security/commoncap.c index 9d910e6..8fa4285 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -130,18 +130,34 @@ int cap_settime(struct timespec *ts, struct timezone *tz) * @child: The process to be accessed * @mode: The mode of attachment. * + * If we are in the same or an ancestor user_ns and have all the target + * task's capabilities, then ptrace access is allowed. + * If we have the ptrace capability to the target user_ns, then ptrace + * access is allowed. + * Else denied. + * * Determine whether a process may access another, returning 0 if permission * granted, -ve if denied. */ int cap_ptrace_access_check(struct task_struct *child, unsigned int mode) { int ret = 0; + struct cred *cred, *tcred; rcu_read_lock(); - if (!cap_issubset(__task_cred(child)->cap_permitted, - current_cred()->cap_permitted) && - !capable(CAP_SYS_PTRACE)) - ret = -EPERM; + cred = current_cred(); + tcred = __task_cred(child); + /* + * The ancestor user_ns check may be gratuitous, as I think + * we've already guaranteed that in kernel/ptrace.c. + */ + if (same_or_ancestor_user_ns(current, child) && + cap_issubset(tcred->cap_permitted, cred->cap_permitted)) + goto out; + if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE)) + goto out; + ret = -EPERM; +out: rcu_read_unlock(); return ret; } @@ -150,18 +166,34 @@ int cap_ptrace_access_check(struct task_struct *child, unsigned int mode) * cap_ptrace_traceme - Determine whether another process may trace the current * @parent: The task proposed to be the tracer * + * If parent is in the same or an ancestor user_ns and has all current's + * capabilities, then ptrace access is allowed. + * If parent has the ptrace capability to current's user_ns, then ptrace + * access is allowed. + * Else denied. + * * Determine whether the nominated task is permitted to trace the current * process, returning 0 if permission is granted, -ve if denied. */ int cap_ptrace_traceme(struct task_struct *parent) { int ret = 0; + struct cred *cred, *tcred; rcu_read_lock(); - if (!cap_issubset(current_cred()->cap_permitted, - __task_cred(parent)->cap_permitted) && - !has_capability(parent, CAP_SYS_PTRACE)) - ret = -EPERM; + cred = __task_cred(parent); + tcred = current_cred(); + /* + * The ancestor user_ns check may be gratuitous, as I think + * we've already guaranteed that in kernel/ptrace.c. + */ + if (same_or_ancestor_user_ns(parent, current) && + cap_issubset(tcred->cap_permitted, cred->cap_permitted)) + goto out; + if (has_ns_capability(parent, tcred->user->user_ns, CAP_SYS_PTRACE)) + goto out; + ret = -EPERM; +out: rcu_read_unlock(); return ret; } -- 1.7.0.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/