Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751747AbbEYRq1 (ORCPT ); Mon, 25 May 2015 13:46:27 -0400 Received: from relay.parallels.com ([195.214.232.42]:44493 "EHLO relay.parallels.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751723AbbEYRqX (ORCPT ); Mon, 25 May 2015 13:46:23 -0400 Message-ID: <1432575977.6866.46.camel@odin.com> Subject: [PATCH RFC 13/13] core: Nest tasklist_lock into task_struct::kin_lock From: Kirill Tkhai To: CC: Oleg Nesterov , Andrew Morton , Ingo Molnar , "Peter Zijlstra" , Michal Hocko , "Rik van Riel" , Ionut Alexa , Peter Hurley , Kirill Tkhai Date: Mon, 25 May 2015 20:46:17 +0300 In-Reply-To: <20150525162722.5171.15901.stgit@pro> References: <20150525162722.5171.15901.stgit@pro> Content-Type: text/plain; charset="UTF-8" X-Mailer: Evolution 3.12.9-1+b1 MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Originating-IP: [10.30.16.109] Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 6547 Lines: 213 tasklist_lock is global lock, and it should be locked as small time as possible. So that, lets nest it under kin_lock to minimize this time. Signed-off-by: Kirill Tkhai --- fs/exec.c | 20 ++++++++++---------- kernel/exit.c | 13 +++++++------ kernel/fork.c | 15 +++++++-------- kernel/sys.c | 17 ++++++++--------- 4 files changed, 32 insertions(+), 33 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 4de7770..0b44752 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -931,8 +931,7 @@ static int de_thread(struct task_struct *tsk) for (;;) { threadgroup_change_begin(tsk); - write_lock_irq(&tasklist_lock); - write_real_parent_lock(tsk); + write_real_parent_lock_irq(tsk); /* * Do this under real_parent's kin_lock to ensure * that exit_notify() can't miss ->group_exit_task. @@ -941,8 +940,7 @@ static int de_thread(struct task_struct *tsk) if (likely(leader->exit_state)) break; __set_current_state(TASK_KILLABLE); - write_real_parent_unlock(tsk); - write_unlock_irq(&tasklist_lock); + write_real_parent_unlock_irq(tsk); threadgroup_change_end(tsk); schedule(); if (unlikely(__fatal_signal_pending(tsk))) @@ -964,6 +962,8 @@ static int de_thread(struct task_struct *tsk) BUG_ON(!same_thread_group(leader, tsk)); BUG_ON(has_group_leader_pid(tsk)); + + write_lock(&tasklist_lock); /* * An exec() starts a new thread group with the * TGID of the previous thread group. Rehash the @@ -982,6 +982,7 @@ static int de_thread(struct task_struct *tsk) transfer_pid(leader, tsk, PIDTYPE_SID); list_replace_rcu(&leader->tasks, &tsk->tasks); + write_unlock(&tasklist_lock); list_replace_init(&leader->sibling, &tsk->sibling); tsk->group_leader = tsk; @@ -1003,8 +1004,7 @@ static int de_thread(struct task_struct *tsk) */ if (unlikely(leader->ptrace)) __wake_up_parent(leader, leader->parent); - write_real_parent_unlock(tsk); - write_unlock_irq(&tasklist_lock); + write_real_parent_unlock_irq(tsk); threadgroup_change_end(tsk); release_task(leader); @@ -1034,13 +1034,13 @@ static int de_thread(struct task_struct *tsk) memcpy(newsighand->action, oldsighand->action, sizeof(newsighand->action)); - write_lock_irq(&tasklist_lock); - write_parent_and_real_parent_lock(tsk); + write_parent_and_real_parent_lock_irq(tsk); + write_lock(&tasklist_lock); spin_lock(&oldsighand->siglock); rcu_assign_pointer(tsk->sighand, newsighand); spin_unlock(&oldsighand->siglock); - write_parent_and_real_parent_unlock(tsk); - write_unlock_irq(&tasklist_lock); + write_unlock(&tasklist_lock); + write_parent_and_real_parent_unlock_irq(tsk); __cleanup_sighand(oldsighand); } diff --git a/kernel/exit.c b/kernel/exit.c index 99d7aa3..44641aa 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -179,9 +179,9 @@ void release_task(struct task_struct *p) proc_flush_task(p); - write_lock_irq(&tasklist_lock); - write_parent_and_real_parent_lock(p); + write_parent_and_real_parent_lock_irq(p); ptrace_release_task(p); + write_lock(&tasklist_lock); __exit_signal(p); write_unlock(&tasklist_lock); @@ -642,11 +642,12 @@ static void exit_notify(struct task_struct *tsk, int group_dead) forget_original_parent(tsk, &dead); - read_lock_irq(&tasklist_lock); - write_parent_and_real_parent_lock(tsk); - if (group_dead) + write_parent_and_real_parent_lock_irq(tsk); + if (group_dead) { + read_lock(&tasklist_lock); kill_orphaned_pgrp(tsk->group_leader, NULL); - read_unlock(&tasklist_lock); + read_unlock(&tasklist_lock); + } if (unlikely(tsk->ptrace)) { int sig = thread_group_leader(tsk) && diff --git a/kernel/fork.c b/kernel/fork.c index 63e225b..b7cb176 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1520,19 +1520,18 @@ static struct task_struct *copy_process(unsigned long clone_flags, * Make it visible to the rest of the system, but dont wake it up yet. * Need tasklist lock for parent etc handling! */ - write_lock_irq(&tasklist_lock); - /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { - write_real_parent_lock(current); + write_real_parent_lock_irq(current); p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; } else { - write_lock(¤t->kin_lock); + write_lock_irq(¤t->kin_lock); p->real_parent = current; p->parent_exec_id = current->self_exec_id; } + write_lock(&tasklist_lock); spin_lock(¤t->sighand->siglock); /* @@ -1552,8 +1551,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, recalc_sigpending(); if (signal_pending(current)) { spin_unlock(¤t->sighand->siglock); - write_real_parent_unlock(p); - write_unlock_irq(&tasklist_lock); + write_unlock(&tasklist_lock); + write_real_parent_unlock_irq(p); retval = -ERESTARTNOINTR; goto bad_fork_free_pid; } @@ -1594,9 +1593,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); - write_real_parent_unlock(p); + write_unlock(&tasklist_lock); + write_real_parent_unlock_irq(p); syscall_tracepoint_update(p); - write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); diff --git a/kernel/sys.c b/kernel/sys.c index 783aec4..013be3e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -931,17 +931,16 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) return -EINVAL; rcu_read_lock(); - /* From this point forward we keep holding onto the tasklist lock - * so that our parent does not change from under us. -DaveM - */ - write_lock_irq(&tasklist_lock); - err = -ESRCH; p = find_task_by_vpid(pid); if (!p) goto out2; - write_real_parent_lock(p); + write_real_parent_lock_irq(p); + if (p->flags & PF_EXITPIDONE) + goto out3; + + write_lock(&tasklist_lock); err = -EINVAL; if (!thread_group_leader(p)) @@ -983,10 +982,10 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) err = 0; out: - /* All (almost) paths lead to here, thus we are safe. -DaveM */ - write_real_parent_unlock(p); + write_unlock(&tasklist_lock); +out3: + write_real_parent_unlock_irq(p); out2: - write_unlock_irq(&tasklist_lock); rcu_read_unlock(); return err; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/