2019-06-28 08:52:55

by Wanpeng Li

[permalink] [raw]
Subject: [PATCH v4 2/2] sched/nohz: Optimize get_nohz_timer_target()

From: Wanpeng Li <[email protected]>

On a machine, cpu 0 is used for housekeeping, the other 39 cpus in the
same socket are in nohz_full mode. We can observe huge time burn in the
loop for seaching nearest busy housekeeper cpu by ftrace.

2) | get_nohz_timer_target() {
2) 0.240 us | housekeeping_test_cpu();
2) 0.458 us | housekeeping_test_cpu();

...

2) 0.292 us | housekeeping_test_cpu();
2) 0.240 us | housekeeping_test_cpu();
2) 0.227 us | housekeeping_any_cpu();
2) + 43.460 us | }

This patch optimizes the searching logic by finding a nearest housekeeper
cpu in the housekeeping cpumask, it can minimize the worst searching time
from ~44us to < 10us in my testing. In addition, the last iterated busy
housekeeper can become a random candidate while current CPU is a better
fallback if it is a housekeeper.

Reviewed-by: Frederic Weisbecker <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Signed-off-by: Wanpeng Li <[email protected]>
---
v1 -> v2:
* current CPU is a better fallback if it is a housekeeper

kernel/sched/core.c | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 102dfcf..04a0f6a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -539,27 +539,32 @@ void resched_cpu(int cpu)
*/
int get_nohz_timer_target(void)
{
- int i, cpu = smp_processor_id();
+ int i, cpu = smp_processor_id(), default_cpu = -1;
struct sched_domain *sd;

- if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
- return cpu;
+ if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
+ if (!idle_cpu(cpu))
+ return cpu;
+ default_cpu = cpu;
+ }

rcu_read_lock();
for_each_domain(cpu, sd) {
- for_each_cpu(i, sched_domain_span(sd)) {
+ for_each_cpu_and(i, sched_domain_span(sd),
+ housekeeping_cpumask(HK_FLAG_TIMER)) {
if (cpu == i)
continue;

- if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) {
+ if (!idle_cpu(i)) {
cpu = i;
goto unlock;
}
}
}

- if (!housekeeping_cpu(cpu, HK_FLAG_TIMER))
- cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
+ if (default_cpu == -1)
+ default_cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
+ cpu = default_cpu;
unlock:
rcu_read_unlock();
return cpu;
--
1.8.3.1


2019-06-28 08:53:28

by Wanpeng Li

[permalink] [raw]
Subject: [PATCH v4 1/2] sched/isolation: Prefer housekeeping cpu in local node

From: Wanpeng Li <[email protected]>

In real product setup, there will be houseeking cpus in each nodes, it
is prefer to do housekeeping from local node, fallback to global online
cpumask if failed to find houseeking cpu from local node.

Reviewed-by: Frederic Weisbecker <[email protected]>
Reviewed-by: Srikar Dronamraju <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Signed-off-by: Wanpeng Li <[email protected]>
---
v3 -> v4:
* have a static function for sched_numa_find_closest
* cleanup sched_numa_find_closest comments
v2 -> v3:
* add sched_numa_find_closest comments
v1 -> v2:
* introduce sched_numa_find_closest

kernel/sched/isolation.c | 12 ++++++++++--
kernel/sched/sched.h | 8 +++++---
kernel/sched/topology.c | 20 ++++++++++++++++++++
3 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 7b9e1e0..191f751 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -16,9 +16,17 @@ static unsigned int housekeeping_flags;

int housekeeping_any_cpu(enum hk_flags flags)
{
- if (static_branch_unlikely(&housekeeping_overridden))
- if (housekeeping_flags & flags)
+ int cpu;
+
+ if (static_branch_unlikely(&housekeeping_overridden)) {
+ if (housekeeping_flags & flags) {
+ cpu = sched_numa_find_closest(housekeeping_mask, smp_processor_id());
+ if (cpu < nr_cpu_ids)
+ return cpu;
+
return cpumask_any_and(housekeeping_mask, cpu_online_mask);
+ }
+ }
return smp_processor_id();
}
EXPORT_SYMBOL_GPL(housekeeping_any_cpu);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 802b1f3..ec65d90 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1261,16 +1261,18 @@ enum numa_topology_type {
extern enum numa_topology_type sched_numa_topology_type;
extern int sched_max_numa_distance;
extern bool find_numa_distance(int distance);
-#endif
-
-#ifdef CONFIG_NUMA
extern void sched_init_numa(void);
extern void sched_domains_numa_masks_set(unsigned int cpu);
extern void sched_domains_numa_masks_clear(unsigned int cpu);
+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
#else
static inline void sched_init_numa(void) { }
static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
+{
+ return nr_cpu_ids;
+}
#endif

#ifdef CONFIG_NUMA_BALANCING
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index f751ce0..4eea2c9 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1724,6 +1724,26 @@ void sched_domains_numa_masks_clear(unsigned int cpu)
}
}

+/*
+ * sched_numa_find_closest() - given the NUMA topology, find the cpu
+ * closest to @cpu from @cpumask.
+ * cpumask: cpumask to find a cpu from
+ * cpu: cpu to be close to
+ *
+ * returns: cpu, or nr_cpu_ids when nothing found.
+ */
+int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
+{
+ int i, j = cpu_to_node(cpu);
+
+ for (i = 0; i < sched_domains_numa_levels; i++) {
+ cpu = cpumask_any_and(cpus, sched_domains_numa_masks[i][j]);
+ if (cpu < nr_cpu_ids)
+ return cpu;
+ }
+ return nr_cpu_ids;
+}
+
#endif /* CONFIG_NUMA */

static int __sdt_alloc(const struct cpumask *cpu_map)
--
2.7.4

2019-07-08 07:33:42

by Wanpeng Li

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] sched/isolation: Prefer housekeeping cpu in local node

Kindly ping for these two patches, :)
On Fri, 28 Jun 2019 at 16:51, Wanpeng Li <[email protected]> wrote:
>
> From: Wanpeng Li <[email protected]>
>
> In real product setup, there will be houseeking cpus in each nodes, it
> is prefer to do housekeeping from local node, fallback to global online
> cpumask if failed to find houseeking cpu from local node.
>
> Reviewed-by: Frederic Weisbecker <[email protected]>
> Reviewed-by: Srikar Dronamraju <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Frederic Weisbecker <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> Cc: Srikar Dronamraju <[email protected]>
> Signed-off-by: Wanpeng Li <[email protected]>
> ---
> v3 -> v4:
> * have a static function for sched_numa_find_closest
> * cleanup sched_numa_find_closest comments
> v2 -> v3:
> * add sched_numa_find_closest comments
> v1 -> v2:
> * introduce sched_numa_find_closest
>
> kernel/sched/isolation.c | 12 ++++++++++--
> kernel/sched/sched.h | 8 +++++---
> kernel/sched/topology.c | 20 ++++++++++++++++++++
> 3 files changed, 35 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
> index 7b9e1e0..191f751 100644
> --- a/kernel/sched/isolation.c
> +++ b/kernel/sched/isolation.c
> @@ -16,9 +16,17 @@ static unsigned int housekeeping_flags;
>
> int housekeeping_any_cpu(enum hk_flags flags)
> {
> - if (static_branch_unlikely(&housekeeping_overridden))
> - if (housekeeping_flags & flags)
> + int cpu;
> +
> + if (static_branch_unlikely(&housekeeping_overridden)) {
> + if (housekeeping_flags & flags) {
> + cpu = sched_numa_find_closest(housekeeping_mask, smp_processor_id());
> + if (cpu < nr_cpu_ids)
> + return cpu;
> +
> return cpumask_any_and(housekeeping_mask, cpu_online_mask);
> + }
> + }
> return smp_processor_id();
> }
> EXPORT_SYMBOL_GPL(housekeeping_any_cpu);
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 802b1f3..ec65d90 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1261,16 +1261,18 @@ enum numa_topology_type {
> extern enum numa_topology_type sched_numa_topology_type;
> extern int sched_max_numa_distance;
> extern bool find_numa_distance(int distance);
> -#endif
> -
> -#ifdef CONFIG_NUMA
> extern void sched_init_numa(void);
> extern void sched_domains_numa_masks_set(unsigned int cpu);
> extern void sched_domains_numa_masks_clear(unsigned int cpu);
> +extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
> #else
> static inline void sched_init_numa(void) { }
> static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
> static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
> +static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
> +{
> + return nr_cpu_ids;
> +}
> #endif
>
> #ifdef CONFIG_NUMA_BALANCING
> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> index f751ce0..4eea2c9 100644
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -1724,6 +1724,26 @@ void sched_domains_numa_masks_clear(unsigned int cpu)
> }
> }
>
> +/*
> + * sched_numa_find_closest() - given the NUMA topology, find the cpu
> + * closest to @cpu from @cpumask.
> + * cpumask: cpumask to find a cpu from
> + * cpu: cpu to be close to
> + *
> + * returns: cpu, or nr_cpu_ids when nothing found.
> + */
> +int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
> +{
> + int i, j = cpu_to_node(cpu);
> +
> + for (i = 0; i < sched_domains_numa_levels; i++) {
> + cpu = cpumask_any_and(cpus, sched_domains_numa_masks[i][j]);
> + if (cpu < nr_cpu_ids)
> + return cpu;
> + }
> + return nr_cpu_ids;
> +}
> +
> #endif /* CONFIG_NUMA */
>
> static int __sdt_alloc(const struct cpumask *cpu_map)
> --
> 2.7.4
>

Subject: [tip:sched/core] sched/isolation: Prefer housekeeping CPU in local node

Commit-ID: e0e8d4911ed2695b12c3a01c15634000ede9bc73
Gitweb: https://git.kernel.org/tip/e0e8d4911ed2695b12c3a01c15634000ede9bc73
Author: Wanpeng Li <[email protected]>
AuthorDate: Fri, 28 Jun 2019 16:51:41 +0800
Committer: Ingo Molnar <[email protected]>
CommitDate: Thu, 25 Jul 2019 15:51:55 +0200

sched/isolation: Prefer housekeeping CPU in local node

In real product setup, there will be houseeking CPUs in each nodes, it
is prefer to do housekeeping from local node, fallback to global online
cpumask if failed to find houseeking CPU from local node.

Signed-off-by: Wanpeng Li <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Frederic Weisbecker <[email protected]>
Reviewed-by: Srikar Dronamraju <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
kernel/sched/isolation.c | 12 ++++++++++--
kernel/sched/sched.h | 8 +++++---
kernel/sched/topology.c | 20 ++++++++++++++++++++
3 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index ccb28085b114..9fcb2a695a41 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -22,9 +22,17 @@ EXPORT_SYMBOL_GPL(housekeeping_enabled);

int housekeeping_any_cpu(enum hk_flags flags)
{
- if (static_branch_unlikely(&housekeeping_overridden))
- if (housekeeping_flags & flags)
+ int cpu;
+
+ if (static_branch_unlikely(&housekeeping_overridden)) {
+ if (housekeeping_flags & flags) {
+ cpu = sched_numa_find_closest(housekeeping_mask, smp_processor_id());
+ if (cpu < nr_cpu_ids)
+ return cpu;
+
return cpumask_any_and(housekeeping_mask, cpu_online_mask);
+ }
+ }
return smp_processor_id();
}
EXPORT_SYMBOL_GPL(housekeeping_any_cpu);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index aaca0e743776..16126efd14ed 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1262,16 +1262,18 @@ enum numa_topology_type {
extern enum numa_topology_type sched_numa_topology_type;
extern int sched_max_numa_distance;
extern bool find_numa_distance(int distance);
-#endif
-
-#ifdef CONFIG_NUMA
extern void sched_init_numa(void);
extern void sched_domains_numa_masks_set(unsigned int cpu);
extern void sched_domains_numa_masks_clear(unsigned int cpu);
+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
#else
static inline void sched_init_numa(void) { }
static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
+{
+ return nr_cpu_ids;
+}
#endif

#ifdef CONFIG_NUMA_BALANCING
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index f751ce0b783e..4eea2c9bc732 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1724,6 +1724,26 @@ void sched_domains_numa_masks_clear(unsigned int cpu)
}
}

+/*
+ * sched_numa_find_closest() - given the NUMA topology, find the cpu
+ * closest to @cpu from @cpumask.
+ * cpumask: cpumask to find a cpu from
+ * cpu: cpu to be close to
+ *
+ * returns: cpu, or nr_cpu_ids when nothing found.
+ */
+int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
+{
+ int i, j = cpu_to_node(cpu);
+
+ for (i = 0; i < sched_domains_numa_levels; i++) {
+ cpu = cpumask_any_and(cpus, sched_domains_numa_masks[i][j]);
+ if (cpu < nr_cpu_ids)
+ return cpu;
+ }
+ return nr_cpu_ids;
+}
+
#endif /* CONFIG_NUMA */

static int __sdt_alloc(const struct cpumask *cpu_map)