Hi,
I repatched it against the latest bk.
This patch favors the local node more than the others to prevent unnecessary task migration by doing a better estimation of the after-migration load (see inline C documentation).
Frank.
sched.c | 16 ++++++++++------
1 files changed, 10 insertions(+), 6 deletions(-)
diff -Nru a/kernel/sched.c b/kernel/sched.c
--- a/kernel/sched.c Thu Aug 21 16:10:41 2003
+++ b/kernel/sched.c Thu Aug 21 16:10:41 2003
@@ -793,29 +793,33 @@
*/
static int sched_best_cpu(struct task_struct *p)
{
- int i, minload, load, best_cpu, node = 0;
+ int i, minload, load, best_cpu, node, pnode;
cpumask_t cpumask;
best_cpu = task_cpu(p);
if (cpu_rq(best_cpu)->nr_running <= 2)
return best_cpu;
- minload = 10000000;
+ minload = INT_MAX;
+ node = pnode = cpu_to_node(best_cpu);
for_each_node_with_cpus(i) {
/*
* Node load is always divided by nr_cpus_node to normalise
* load values in case cpu count differs from node to node.
- * We first multiply node_nr_running by 10 to get a little
- * better resolution.
+ * If node != our node we add the load of the migrating task;
+ * we only want to migrate if:
+ * load(other node) + 1 < load(our node) - 1
+ * The '+ 1' and '- 1' denote the migration.
+ * We multiply by NR_CPUS for best resolution.
*/
- load = 10 * atomic_read(&node_nr_running[i]) / nr_cpus_node(i);
+ load = NR_CPUS * (atomic_read(&node_nr_running[i]) + ((i != pnode) << 1)) / nr_cpus_node(i);
if (load < minload) {
minload = load;
node = i;
}
}
- minload = 10000000;
+ minload = INT_MAX;
cpumask = node_to_cpumask(node);
for (i = 0; i < NR_CPUS; ++i) {
if (!cpu_isset(i, cpumask))