Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751520AbbEYRoh (ORCPT ); Mon, 25 May 2015 13:44:37 -0400 Received: from relay.parallels.com ([195.214.232.42]:43952 "EHLO relay.parallels.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750732AbbEYRof (ORCPT ); Mon, 25 May 2015 13:44:35 -0400 Message-ID: <1432575869.6866.35.camel@odin.com> Subject: [PATCH RFC 03/13] pid_ns: Implement rwlock_t pid_ns::cr_lock for locking child_reaper From: Kirill Tkhai To: CC: Oleg Nesterov , Andrew Morton , Ingo Molnar , "Peter Zijlstra" , Michal Hocko , "Rik van Riel" , Ionut Alexa , Peter Hurley , Kirill Tkhai Date: Mon, 25 May 2015 20:44:29 +0300 In-Reply-To: <20150525162722.5171.15901.stgit@pro> References: <20150525162722.5171.15901.stgit@pro> Content-Type: text/plain; charset="UTF-8" X-Mailer: Evolution 3.12.9-1+b1 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Originating-IP: [10.30.16.109] Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 4839 Lines: 156 Protects child_reaper modifitations. Signed-off-by: Kirill Tkhai --- include/linux/pid_namespace.h | 1 + kernel/exit.c | 15 ++++++++++++--- kernel/fork.c | 1 + kernel/pid.c | 10 +++++++++- kernel/pid_namespace.c | 5 +++-- 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 918b117..3e59d2a 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -28,6 +28,7 @@ struct pid_namespace { int last_pid; unsigned int nr_hashed; struct task_struct *child_reaper; + rwlock_t cr_lock; struct kmem_cache *pid_cachep; unsigned int level; struct pid_namespace *parent; diff --git a/kernel/exit.c b/kernel/exit.c index a29c35d..a1b2bf7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -455,16 +455,23 @@ static void check_pid_ns_reaper_exit(struct task_struct *father) __acquires(&tasklist_lock) { struct pid_namespace *pid_ns = task_active_pid_ns(father); - struct task_struct *reaper = pid_ns->child_reaper; + struct task_struct *reaper; + + read_lock(&pid_ns->cr_lock); + reaper = pid_ns->child_reaper; + read_unlock(&pid_ns->cr_lock); if (likely(reaper != father)) return; + write_lock(&pid_ns->cr_lock); reaper = find_alive_thread(father); - if (reaper) { + if (reaper) pid_ns->child_reaper = reaper; + write_unlock(&pid_ns->cr_lock); + + if (reaper) return; - } write_unlock_irq(&tasklist_lock); if (unlikely(pid_ns == &init_pid_ns)) { @@ -560,6 +567,7 @@ static void forget_original_parent(struct task_struct *father, if (list_empty(&father->children)) return; + read_lock(&task_active_pid_ns(father)->cr_lock); reaper = find_new_reaper(father); list_for_each_entry(p, &father->children, sibling) { for_each_thread(p, t) { @@ -579,6 +587,7 @@ static void forget_original_parent(struct task_struct *father, reparent_leader(father, p, dead); } list_splice_tail_init(&father->children, &reaper->children); + read_unlock(&task_active_pid_ns(father)->cr_lock); } /* diff --git a/kernel/fork.c b/kernel/fork.c index 0bb88b5..66e31eb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1563,6 +1563,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, init_task_pid(p, PIDTYPE_SID, task_session(current)); if (is_child_reaper(pid)) { + /* Lockless, as we're the only process in ns */ ns_of_pid(pid)->child_reaper = p; p->signal->flags |= SIGNAL_UNKILLABLE; } diff --git a/kernel/pid.c b/kernel/pid.c index 4fd07d5..39a8b0a 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -78,6 +78,7 @@ struct pid_namespace init_pid_ns = { .nr_hashed = PIDNS_HASH_ADDING, .level = 0, .child_reaper = &init_task, + .cr_lock = __RW_LOCK_UNLOCKED(&init_pid_ns.cr_lock), .user_ns = &init_user_ns, .ns.inum = PROC_PID_INIT_INO, #ifdef CONFIG_PID_NS @@ -259,6 +260,7 @@ static void delayed_put_pid(struct rcu_head *rhp) void free_pid(struct pid *pid) { /* We can be called with write_lock_irq(&tasklist_lock) held */ + struct task_struct *child_reaper = NULL; int i; unsigned long flags; @@ -274,7 +276,8 @@ void free_pid(struct pid *pid) * is the reaper wake up the reaper. The reaper * may be sleeping in zap_pid_ns_processes(). */ - wake_up_process(ns->child_reaper); + child_reaper = ns->child_reaper; + get_task_struct(child_reaper); break; case PIDNS_HASH_ADDING: /* Handle a fork failure of the first process */ @@ -288,6 +291,11 @@ void free_pid(struct pid *pid) } spin_unlock_irqrestore(&pidmap_lock, flags); + if (child_reaper) { + wake_up_process(child_reaper); + put_task_struct(child_reaper); + } + for (i = 0; i <= pid->level; i++) free_pidmap(pid->numbers + i); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index a65ba13..bbaa072 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -115,6 +115,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns ns->parent = get_pid_ns(parent_pid_ns); ns->user_ns = get_user_ns(user_ns); ns->nr_hashed = PIDNS_HASH_ADDING; + rwlock_init(&ns->cr_lock); INIT_WORK(&ns->proc_work, proc_cleanup_work); set_bit(0, ns->pidmap[0].page); @@ -324,9 +325,9 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) return -EINVAL; } - read_lock(&tasklist_lock); + read_lock(&pid_ns->cr_lock); force_sig(SIGKILL, pid_ns->child_reaper); - read_unlock(&tasklist_lock); + read_unlock(&pid_ns->cr_lock); do_exit(0); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/