Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752032Ab0FDFyH (ORCPT ); Fri, 4 Jun 2010 01:54:07 -0400 Received: from fgwmail6.fujitsu.co.jp ([192.51.44.36]:37869 "EHLO fgwmail6.fujitsu.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751809Ab0FDFyF (ORCPT ); Fri, 4 Jun 2010 01:54:05 -0400 X-SecurityPolicyCheck-FJ: OK by FujitsuOutboundMailChecker v1.3.1 Date: Fri, 4 Jun 2010 14:49:43 +0900 From: KAMEZAWA Hiroyuki To: KAMEZAWA Hiroyuki Cc: "linux-mm@kvack.org" , "akpm@linux-foundation.org" , "linux-kernel@vger.kernel.org" , "balbir@linux.vnet.ibm.com" , "nishimura@mxp.nes.nec.co.jp" Subject: [PATCH 2/2] memcg clean up waiting move acct v2 Message-Id: <20100604144943.760312ea.kamezawa.hiroyu@jp.fujitsu.com> In-Reply-To: <20100604144553.54b52c9e.kamezawa.hiroyu@jp.fujitsu.com> References: <20100603114837.6e6d4d0f.kamezawa.hiroyu@jp.fujitsu.com> <20100604144553.54b52c9e.kamezawa.hiroyu@jp.fujitsu.com> Organization: FUJITSU Co. LTD. X-Mailer: Sylpheed 3.0.2 (GTK+ 2.10.14; i686-pc-mingw32) Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 4322 Lines: 146 No functional changes but rebased onto mmotm-2010-0603 and tested again. == From: KAMEZAWA Hiroyuki Now, for checking a memcg is under task-account-moving, we do css_tryget() against mc.to and mc.from. But this is just complicating things. This patch makes the check easier. This patch adds a spinlock to move_charge_struct and guard modification of mc.to and mc.from. By this, we don't have to think about complicated races arount this not-critical path. Changelog: 2010-06-04 - rebased onto mmotm-2010-06-03 Changelog: - removed disable/enable irq. - removed unnecessary css_put() Signed-off-by: KAMEZAWA Hiroyuki --- mm/memcontrol.c | 49 ++++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 21 deletions(-) Index: mmotm-2.6.34-Jun6/mm/memcontrol.c =================================================================== --- mmotm-2.6.34-Jun6.orig/mm/memcontrol.c +++ mmotm-2.6.34-Jun6/mm/memcontrol.c @@ -268,6 +268,7 @@ enum move_type { /* "mc" and its members are protected by cgroup_mutex */ static struct move_charge_struct { + spinlock_t lock; /* for from, to, moving_task */ struct mem_cgroup *from; struct mem_cgroup *to; unsigned long precharge; @@ -276,6 +277,7 @@ static struct move_charge_struct { struct task_struct *moving_task; /* a task moving charges */ wait_queue_head_t waitq; /* a waitq for other context */ } mc = { + .lock = __SPIN_LOCK_UNLOCKED(mc.lock), .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq), }; @@ -1076,26 +1078,24 @@ static unsigned int get_swappiness(struc static bool mem_cgroup_under_move(struct mem_cgroup *mem) { - struct mem_cgroup *from = mc.from; - struct mem_cgroup *to = mc.to; + struct mem_cgroup *from; + struct mem_cgroup *to; bool ret = false; - - if (from == mem || to == mem) - return true; - - if (!from || !to || !mem->use_hierarchy) - return false; - - rcu_read_lock(); - if (css_tryget(&from->css)) { - ret = css_is_ancestor(&from->css, &mem->css); - css_put(&from->css); - } - if (!ret && css_tryget(&to->css)) { - ret = css_is_ancestor(&to->css, &mem->css); - css_put(&to->css); - } - rcu_read_unlock(); + /* + * Unlike task_move routines, we access mc.to, mc.from not under + * mutual exclusion by cgroup_mutex. Here, we take spinlock instead. + */ + spin_lock(&mc.lock); + from = mc.from; + to = mc.to; + if (!from) + goto unlock; + if (from == mem || to == mem + || (mem->use_hierarchy && css_is_ancestor(&from->css, &mem->css)) + || (mem->use_hierarchy && css_is_ancestor(&to->css, &mem->css))) + ret = true; +unlock: + spin_unlock(&mc.lock); return ret; } @@ -4448,11 +4448,13 @@ static int mem_cgroup_precharge_mc(struc static void mem_cgroup_clear_mc(void) { + struct mem_cgroup *from = mc.from; + struct mem_cgroup *to = mc.to; + /* we must uncharge all the leftover precharges from mc.to */ if (mc.precharge) { __mem_cgroup_cancel_charge(mc.to, mc.precharge); mc.precharge = 0; - memcg_oom_recover(mc.to); } /* * we didn't uncharge from mc.from at mem_cgroup_move_account(), so @@ -4461,7 +4463,6 @@ static void mem_cgroup_clear_mc(void) if (mc.moved_charge) { __mem_cgroup_cancel_charge(mc.from, mc.moved_charge); mc.moved_charge = 0; - memcg_oom_recover(mc.from); } /* we must fixup refcnts and charges */ if (mc.moved_swap) { @@ -4486,9 +4487,13 @@ static void mem_cgroup_clear_mc(void) mc.moved_swap = 0; } + spin_lock(&mc.lock); mc.from = NULL; mc.to = NULL; mc.moving_task = NULL; + spin_unlock(&mc.lock); + memcg_oom_recover(from); + memcg_oom_recover(to); wake_up_all(&mc.waitq); } @@ -4517,12 +4522,14 @@ static int mem_cgroup_can_attach(struct VM_BUG_ON(mc.moved_charge); VM_BUG_ON(mc.moved_swap); VM_BUG_ON(mc.moving_task); + spin_lock(&mc.lock); mc.from = from; mc.to = mem; mc.precharge = 0; mc.moved_charge = 0; mc.moved_swap = 0; mc.moving_task = current; + spin_unlock(&mc.lock); ret = mem_cgroup_precharge_mc(mm); if (ret) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/