Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753116Ab2KSCQd (ORCPT ); Sun, 18 Nov 2012 21:16:33 -0500 Received: from mail-ea0-f174.google.com ([209.85.215.174]:44992 "EHLO mail-ea0-f174.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752953Ab2KSCQR (ORCPT ); Sun, 18 Nov 2012 21:16:17 -0500 From: Ingo Molnar To: linux-kernel@vger.kernel.org, linux-mm@kvack.org Cc: Peter Zijlstra , Paul Turner , Lee Schermerhorn , Christoph Lameter , Rik van Riel , Mel Gorman , Andrew Morton , Andrea Arcangeli , Linus Torvalds , Thomas Gleixner , Johannes Weiner , Hugh Dickins Subject: [PATCH 27/27] sched: Use the best-buddy 'ideal cpu' in balancing decisions Date: Mon, 19 Nov 2012 03:14:44 +0100 Message-Id: <1353291284-2998-28-git-send-email-mingo@kernel.org> X-Mailer: git-send-email 1.7.11.7 In-Reply-To: <1353291284-2998-1-git-send-email-mingo@kernel.org> References: <1353291284-2998-1-git-send-email-mingo@kernel.org> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 4438 Lines: 132 Now that we have a notion of (one of the) best CPUs we interrelate with in terms of memory usage, use that information to improve can_migrate_task() balancing decisions: allow the migration to occur even if we locally cache-hot, if we are on another node and want to migrate towards our best buddy's node. ( Note that this is not hard affinity - if imbalance persists long enough then the scheduler will eventually balance tasks anyway, to maximize CPU utilization. ) Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Mel Gorman Cc: Hugh Dickins Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 35 ++++++++++++++++++++++++++++++++--- kernel/sched/features.h | 2 ++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 67f7fd2..24a5588 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -840,6 +840,14 @@ static void task_numa_migrate(struct task_struct *p, int next_cpu) p->numa_migrate_seq = 0; } +static int task_ideal_cpu(struct task_struct *p) +{ + if (!sched_feat(IDEAL_CPU)) + return -1; + + return p->ideal_cpu; +} + /* * Called for every full scan - here we consider switching to a new * shared buddy, if the one we found during this scan is good enough: @@ -1028,7 +1036,7 @@ out_hit: * but don't stop the discovery of process level sharing * either: */ - if (this_task->mm == last_task->mm) + if (sched_feat(IDEAL_CPU_THREAD_BIAS) && this_task->mm == last_task->mm) pages *= 2; this_task->shared_buddy_faults_curr += pages; @@ -1189,6 +1197,7 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr) } #else /* !CONFIG_NUMA_BALANCING: */ #ifdef CONFIG_SMP +static inline int task_ideal_cpu(struct task_struct *p) { return -1; } static inline void account_numa_enqueue(struct rq *rq, struct task_struct *p) { } #endif static inline void account_numa_dequeue(struct rq *rq, struct task_struct *p) { } @@ -4064,6 +4073,7 @@ struct lb_env { static void move_task(struct task_struct *p, struct lb_env *env) { deactivate_task(env->src_rq, p, 0); + set_task_cpu(p, env->dst_cpu); activate_task(env->dst_rq, p, 0); check_preempt_curr(env->dst_rq, p, 0); @@ -4242,15 +4252,17 @@ static bool can_migrate_numa_task(struct task_struct *p, struct lb_env *env) * * LBF_NUMA_RUN -- numa only, only allow improvement * LBF_NUMA_SHARED -- shared only + * LBF_NUMA_IDEAL -- ideal only * * LBF_KEEP_SHARED -- do not touch shared tasks */ /* a numa run can only move numa tasks about to improve things */ if (env->flags & LBF_NUMA_RUN) { - if (task_numa_shared(p) < 0) + if (task_numa_shared(p) < 0 && task_ideal_cpu(p) < 0) return false; - /* can only pull shared tasks */ + + /* If we are only allowed to pull shared tasks: */ if ((env->flags & LBF_NUMA_SHARED) && !task_numa_shared(p)) return false; } else { @@ -4307,6 +4319,23 @@ static int can_migrate_task(struct task_struct *p, struct lb_env *env) if (!can_migrate_running_task(p, env)) return false; +#ifdef CONFIG_NUMA_BALANCING + /* If we are only allowed to pull ideal tasks: */ + if ((task_ideal_cpu(p) >= 0) && (p->shared_buddy_faults > 1000)) { + int ideal_node; + int dst_node; + + BUG_ON(env->dst_cpu < 0); + + ideal_node = cpu_to_node(p->ideal_cpu); + dst_node = cpu_to_node(env->dst_cpu); + + if (ideal_node == dst_node) + return true; + return false; + } +#endif + if (env->sd->flags & SD_NUMA) return can_migrate_numa_task(p, env); diff --git a/kernel/sched/features.h b/kernel/sched/features.h index b75a10d..737d2c8 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -66,6 +66,8 @@ SCHED_FEAT(TTWU_QUEUE, true) SCHED_FEAT(FORCE_SD_OVERLAP, false) SCHED_FEAT(RT_RUNTIME_SHARE, true) SCHED_FEAT(LB_MIN, false) +SCHED_FEAT(IDEAL_CPU, true) +SCHED_FEAT(IDEAL_CPU_THREAD_BIAS, false) #ifdef CONFIG_NUMA_BALANCING /* Do the working set probing faults: */ -- 1.7.11.7 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/