2019-07-01 12:43:21

by Wanpeng Li

[permalink] [raw]
Subject: [PATCH] sched/core: Cache timer busy housekeeping target

From: Wanpeng Li <[email protected]>

Cache the busy housekeeping target for timer instead of researching each time.
This patch reduces the total time of get_nohz_timer_target() for busy housekeeping
CPU from 2u~3us to less than 1us which can be observed by ftrace.

Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Signed-off-by: Wanpeng Li <[email protected]>
---
include/linux/hrtimer.h | 1 +
include/linux/sched/nohz.h | 2 +-
kernel/sched/core.c | 6 +++++-
kernel/time/hrtimer.c | 6 ++++--
kernel/time/timer.c | 7 +++++--
5 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 2e8957e..0d8b271 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -198,6 +198,7 @@ enum hrtimer_base_type {
struct hrtimer_cpu_base {
raw_spinlock_t lock;
unsigned int cpu;
+ unsigned int last_target_cpu;
unsigned int active_bases;
unsigned int clock_was_set_seq;
unsigned int hres_active : 1,
diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index 1abe91f..0afb094 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -8,7 +8,7 @@

#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
extern void nohz_balance_enter_idle(int cpu);
-extern int get_nohz_timer_target(void);
+extern int get_nohz_timer_target(unsigned int cpu);
#else
static inline void nohz_balance_enter_idle(int cpu) { }
#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7968e0f..f4ba63e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -549,11 +549,15 @@ void resched_cpu(int cpu)
* selecting an idle CPU will add more delays to the timers than intended
* (as that CPU's timer base may not be uptodate wrt jiffies etc).
*/
-int get_nohz_timer_target(void)
+int get_nohz_timer_target(unsigned int last_target_cpu)
{
int i, cpu = smp_processor_id(), default_cpu = -1;
struct sched_domain *sd;

+ if (!idle_cpu(last_target_cpu) &&
+ housekeeping_cpu(last_target_cpu, HK_FLAG_TIMER))
+ return last_target_cpu;
+
if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
if (!idle_cpu(cpu))
return cpu;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 41dfff2..0d49bef 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -195,8 +195,10 @@ struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
int pinned)
{
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
- if (static_branch_likely(&timers_migration_enabled) && !pinned)
- return &per_cpu(hrtimer_bases, get_nohz_timer_target());
+ if (static_branch_likely(&timers_migration_enabled) && !pinned) {
+ base->last_target_cpu = get_nohz_timer_target(base->last_target_cpu);
+ return &per_cpu(hrtimer_bases, base->last_target_cpu);
+ }
#endif
return base;
}
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 343c7ba..6ae045a 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -199,6 +199,7 @@ struct timer_base {
unsigned long clk;
unsigned long next_expiry;
unsigned int cpu;
+ unsigned int last_target_cpu;
bool is_idle;
bool must_forward_clk;
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
@@ -865,8 +866,10 @@ get_target_base(struct timer_base *base, unsigned tflags)
{
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
if (static_branch_likely(&timers_migration_enabled) &&
- !(tflags & TIMER_PINNED))
- return get_timer_cpu_base(tflags, get_nohz_timer_target());
+ !(tflags & TIMER_PINNED)) {
+ base->last_target_cpu = get_nohz_timer_target(base->last_target_cpu);
+ return get_timer_cpu_base(tflags, base->last_target_cpu);
+ }
#endif
return get_timer_this_cpu_base(tflags);
}
--
2.7.4


2019-07-08 02:26:30

by Wanpeng Li

[permalink] [raw]
Subject: Re: [PATCH] sched/core: Cache timer busy housekeeping target

Ping Frederic, Peterz, any comments?
On Mon, 1 Jul 2019 at 20:24, Wanpeng Li <[email protected]> wrote:
>
> From: Wanpeng Li <[email protected]>
>
> Cache the busy housekeeping target for timer instead of researching each time.
> This patch reduces the total time of get_nohz_timer_target() for busy housekeeping
> CPU from 2u~3us to less than 1us which can be observed by ftrace.
>
> Cc: Ingo Molnar <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Frederic Weisbecker <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> Signed-off-by: Wanpeng Li <[email protected]>
> ---
> include/linux/hrtimer.h | 1 +
> include/linux/sched/nohz.h | 2 +-
> kernel/sched/core.c | 6 +++++-
> kernel/time/hrtimer.c | 6 ++++--
> kernel/time/timer.c | 7 +++++--
> 5 files changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
> index 2e8957e..0d8b271 100644
> --- a/include/linux/hrtimer.h
> +++ b/include/linux/hrtimer.h
> @@ -198,6 +198,7 @@ enum hrtimer_base_type {
> struct hrtimer_cpu_base {
> raw_spinlock_t lock;
> unsigned int cpu;
> + unsigned int last_target_cpu;
> unsigned int active_bases;
> unsigned int clock_was_set_seq;
> unsigned int hres_active : 1,
> diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
> index 1abe91f..0afb094 100644
> --- a/include/linux/sched/nohz.h
> +++ b/include/linux/sched/nohz.h
> @@ -8,7 +8,7 @@
>
> #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
> extern void nohz_balance_enter_idle(int cpu);
> -extern int get_nohz_timer_target(void);
> +extern int get_nohz_timer_target(unsigned int cpu);
> #else
> static inline void nohz_balance_enter_idle(int cpu) { }
> #endif
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 7968e0f..f4ba63e 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -549,11 +549,15 @@ void resched_cpu(int cpu)
> * selecting an idle CPU will add more delays to the timers than intended
> * (as that CPU's timer base may not be uptodate wrt jiffies etc).
> */
> -int get_nohz_timer_target(void)
> +int get_nohz_timer_target(unsigned int last_target_cpu)
> {
> int i, cpu = smp_processor_id(), default_cpu = -1;
> struct sched_domain *sd;
>
> + if (!idle_cpu(last_target_cpu) &&
> + housekeeping_cpu(last_target_cpu, HK_FLAG_TIMER))
> + return last_target_cpu;
> +
> if (housekeeping_cpu(cpu, HK_FLAG_TIMER)) {
> if (!idle_cpu(cpu))
> return cpu;
> diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
> index 41dfff2..0d49bef 100644
> --- a/kernel/time/hrtimer.c
> +++ b/kernel/time/hrtimer.c
> @@ -195,8 +195,10 @@ struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
> int pinned)
> {
> #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
> - if (static_branch_likely(&timers_migration_enabled) && !pinned)
> - return &per_cpu(hrtimer_bases, get_nohz_timer_target());
> + if (static_branch_likely(&timers_migration_enabled) && !pinned) {
> + base->last_target_cpu = get_nohz_timer_target(base->last_target_cpu);
> + return &per_cpu(hrtimer_bases, base->last_target_cpu);
> + }
> #endif
> return base;
> }
> diff --git a/kernel/time/timer.c b/kernel/time/timer.c
> index 343c7ba..6ae045a 100644
> --- a/kernel/time/timer.c
> +++ b/kernel/time/timer.c
> @@ -199,6 +199,7 @@ struct timer_base {
> unsigned long clk;
> unsigned long next_expiry;
> unsigned int cpu;
> + unsigned int last_target_cpu;
> bool is_idle;
> bool must_forward_clk;
> DECLARE_BITMAP(pending_map, WHEEL_SIZE);
> @@ -865,8 +866,10 @@ get_target_base(struct timer_base *base, unsigned tflags)
> {
> #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
> if (static_branch_likely(&timers_migration_enabled) &&
> - !(tflags & TIMER_PINNED))
> - return get_timer_cpu_base(tflags, get_nohz_timer_target());
> + !(tflags & TIMER_PINNED)) {
> + base->last_target_cpu = get_nohz_timer_target(base->last_target_cpu);
> + return get_timer_cpu_base(tflags, base->last_target_cpu);
> + }
> #endif
> return get_timer_this_cpu_base(tflags);
> }
> --
> 2.7.4
>

2019-07-09 01:41:35

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH] sched/core: Cache timer busy housekeeping target

On Mon, Jul 01, 2019 at 08:24:37PM +0800, Wanpeng Li wrote:
> diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
> index 41dfff2..0d49bef 100644
> --- a/kernel/time/hrtimer.c
> +++ b/kernel/time/hrtimer.c
> @@ -195,8 +195,10 @@ struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
> int pinned)
> {
> #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
> - if (static_branch_likely(&timers_migration_enabled) && !pinned)
> - return &per_cpu(hrtimer_bases, get_nohz_timer_target());
> + if (static_branch_likely(&timers_migration_enabled) && !pinned) {
> + base->last_target_cpu = get_nohz_timer_target(base->last_target_cpu);
> + return &per_cpu(hrtimer_bases, base->last_target_cpu);


I'm not sure this is exactly what we intend to cache here.

This doesn't return the last CPU for a given timer
(that would be timer->flags & TIMER_CPUMASK) but the last CPU that
was returned when "base" was passed.

First of all, it's always initialized to CPU 0, which is perhaps
not exactly what we want.

Also the result can be very stale and awkward. If for some reason we have:

base(CPU 5)->last_target_cpu = 255

then later a timer is enqueued to CPU 5, the next time we re-enqueue that
timer will be to CPU 255, then the second re-enqueue will be to whatever
value we have in base(CPU 255)->last_target_cpu, etc...

For example imagine that:

base(CPU 255)->last_target_cpu = 5

the timer will bounce between those two very distant CPU 5 and 255. So I think
you rather want "timer->flags & TIMER_CPUMASK". But note that those flags
can also be initialized to zero and therefore CPU 0, while we actually want
the CPU of the timer enqueuer for a first use. And I can't think of a
simple solution to solve that :-( Perhaps keeping the enqueuer CPU as the
first choice (as we do upstream) is still the best thing we have.

Thanks.

2019-07-09 02:18:29

by Wanpeng Li

[permalink] [raw]
Subject: Re: [PATCH] sched/core: Cache timer busy housekeeping target

On Tue, 9 Jul 2019 at 09:39, Frederic Weisbecker <[email protected]> wrote:
>
> On Mon, Jul 01, 2019 at 08:24:37PM +0800, Wanpeng Li wrote:
> > diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
> > index 41dfff2..0d49bef 100644
> > --- a/kernel/time/hrtimer.c
> > +++ b/kernel/time/hrtimer.c
> > @@ -195,8 +195,10 @@ struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
> > int pinned)
> > {
> > #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
> > - if (static_branch_likely(&timers_migration_enabled) && !pinned)
> > - return &per_cpu(hrtimer_bases, get_nohz_timer_target());
> > + if (static_branch_likely(&timers_migration_enabled) && !pinned) {
> > + base->last_target_cpu = get_nohz_timer_target(base->last_target_cpu);
> > + return &per_cpu(hrtimer_bases, base->last_target_cpu);
>
>
> I'm not sure this is exactly what we intend to cache here.
>
> This doesn't return the last CPU for a given timer
> (that would be timer->flags & TIMER_CPUMASK) but the last CPU that
> was returned when "base" was passed.
>
> First of all, it's always initialized to CPU 0, which is perhaps
> not exactly what we want.
>
> Also the result can be very stale and awkward. If for some reason we have:
>
> base(CPU 5)->last_target_cpu = 255
>
> then later a timer is enqueued to CPU 5, the next time we re-enqueue that
> timer will be to CPU 255, then the second re-enqueue will be to whatever
> value we have in base(CPU 255)->last_target_cpu, etc...
>
> For example imagine that:
>
> base(CPU 255)->last_target_cpu = 5
>
> the timer will bounce between those two very distant CPU 5 and 255. So I think
> you rather want "timer->flags & TIMER_CPUMASK". But note that those flags
> can also be initialized to zero and therefore CPU 0, while we actually want
> the CPU of the timer enqueuer for a first use. And I can't think of a
> simple solution to solve that :-( Perhaps keeping the enqueuer CPU as the
> first choice (as we do upstream) is still the best thing we have.

Got it, thanks for pointing out this.

Wanpeng