2024-02-25 22:55:30

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 00/16 v3] timers/nohz cleanups and hotplug reorganization

Hi,

Here are some cleanups here and there and also some more rational tick
related CPU hotplug code reorganization.

Changes since v3:

* Rebase against latest tip:timers/core (after tmigr introduction)
* New patch "timers: Assert no next dyntick timer look-up while CPU"

git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks.git
timers/hotplug-v3

HEAD: a0d432ebab0b7f75e03049d5d2baabff7f39ee1d

Frederic Weisbecker (14):
tick: Remove useless oneshot ifdeffery
tick: Use IS_ENABLED() whenever possible
tick: s/tick_nohz_stop_sched_tick/tick_nohz_full_stop_tick
tick: No need to clear ts->next_tick again
tick: Start centralizing tick related CPU hotplug operations
tick: Move tick cancellation up to CPUHP_AP_TICK_DYING
tick: Move broadcast cancellation up to CPUHP_AP_TICK_DYING
tick: Assume the tick can't be stopped in NOHZ_MODE_INACTIVE mode
tick: Move got_idle_tick away from common flags
tick: Move individual bit features to debuggable mask accesses
tick: Split nohz and highres features from nohz_mode
tick: Shut down low-res tick from dying CPU
tick: Assume timekeeping is correctly handed over upon last offline
idle call
timers: Assert no next dyntick timer look-up while CPU is offline

Peng Liu (2):
tick/nohz: Remove duplicate between tick_nohz_switch_to_nohz() and
tick_setup_sched_timer()
tick/nohz: Remove duplicate between lowres and highres handlers

include/linux/cpuhotplug.h | 1 +
include/linux/tick.h | 16 +-
kernel/cpu.c | 11 +-
kernel/sched/idle.c | 1 -
kernel/time/hrtimer.c | 4 +-
kernel/time/tick-common.c | 31 ++--
kernel/time/tick-internal.h | 2 +
kernel/time/tick-sched.c | 297 +++++++++++++++++-------------------
kernel/time/tick-sched.h | 40 ++---
kernel/time/timer.c | 6 +-
kernel/time/timer_list.c | 10 +-
11 files changed, 215 insertions(+), 204 deletions(-)

--
2.43.0



2024-02-25 22:55:44

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 02/16] tick/nohz: Remove duplicate between lowres and highres handlers

From: Peng Liu <[email protected]>

tick_nohz_lowres_handler() does the same work as
tick_nohz_highres_handler() plus the clockevent device reprogramming, so
make the former reuse the latter and rename it accordingly.

Signed-off-by: Peng Liu <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/tick-sched.c | 96 +++++++++++++++-------------------------
1 file changed, 36 insertions(+), 60 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d4901654148d..88c992f48126 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -255,6 +255,40 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
update_process_times(user_mode(regs));
profile_tick(CPU_PROFILING);
}
+
+/*
+ * We rearm the timer until we get disabled by the idle code.
+ * Called with interrupts disabled.
+ */
+static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer)
+{
+ struct tick_sched *ts = container_of(timer, struct tick_sched, sched_timer);
+ struct pt_regs *regs = get_irq_regs();
+ ktime_t now = ktime_get();
+
+ tick_sched_do_timer(ts, now);
+
+ /*
+ * Do not call when we are not in IRQ context and have
+ * no valid 'regs' pointer
+ */
+ if (regs)
+ tick_sched_handle(ts, regs);
+ else
+ ts->next_tick = 0;
+
+ /*
+ * In dynticks mode, tick reprogram is deferred:
+ * - to the idle task if in dynticks-idle
+ * - to IRQ exit if in full-dynticks.
+ */
+ if (unlikely(ts->tick_stopped))
+ return HRTIMER_NORESTART;
+
+ hrtimer_forward(timer, now, TICK_NSEC);
+
+ return HRTIMER_RESTART;
+}
#endif

#ifdef CONFIG_NO_HZ_FULL
@@ -1429,31 +1463,15 @@ void tick_nohz_idle_exit(void)
* at the clockevent level. hrtimer can't be used instead, because its
* infrastructure actually relies on the tick itself as a backend in
* low-resolution mode (see hrtimer_run_queues()).
- *
- * This low-resolution handler still makes use of some hrtimer APIs meanwhile
- * for convenience with expiration calculation and forwarding.
*/
static void tick_nohz_lowres_handler(struct clock_event_device *dev)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
- struct pt_regs *regs = get_irq_regs();
- ktime_t now = ktime_get();

dev->next_event = KTIME_MAX;

- tick_sched_do_timer(ts, now);
- tick_sched_handle(ts, regs);
-
- /*
- * In dynticks mode, tick reprogram is deferred:
- * - to the idle task if in dynticks-idle
- * - to IRQ exit if in full-dynticks.
- */
- if (likely(!ts->tick_stopped)) {
- hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
+ if (likely(tick_nohz_handler(&ts->sched_timer) == HRTIMER_RESTART))
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
- }
-
}

static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
@@ -1522,48 +1540,6 @@ void tick_irq_enter(void)
tick_nohz_irq_enter();
}

-/*
- * High resolution timer specific code
- */
-#ifdef CONFIG_HIGH_RES_TIMERS
-/*
- * We rearm the timer until we get disabled by the idle code.
- * Called with interrupts disabled.
- */
-static enum hrtimer_restart tick_nohz_highres_handler(struct hrtimer *timer)
-{
- struct tick_sched *ts =
- container_of(timer, struct tick_sched, sched_timer);
- struct pt_regs *regs = get_irq_regs();
- ktime_t now = ktime_get();
-
- tick_sched_do_timer(ts, now);
-
- /*
- * Do not call when we are not in IRQ context and have
- * no valid 'regs' pointer
- */
- if (regs)
- tick_sched_handle(ts, regs);
- else
- ts->next_tick = 0;
-
- /*
- * In dynticks mode, tick reprogram is deferred:
- * - to the idle task if in dynticks-idle
- * - to IRQ exit if in full-dynticks.
- */
- if (unlikely(ts->tick_stopped))
- return HRTIMER_NORESTART;
-
- hrtimer_forward(timer, now, TICK_NSEC);
-
- return HRTIMER_RESTART;
-}
-#else
-#define tick_nohz_highres_handler NULL
-#endif /* CONFIG_HIGH_RES_TIMERS */
-
#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
static int sched_skew_tick;

@@ -1587,7 +1563,7 @@ void tick_setup_sched_timer(int mode)
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);

if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && mode == NOHZ_MODE_HIGHRES)
- ts->sched_timer.function = tick_nohz_highres_handler;
+ ts->sched_timer.function = tick_nohz_handler;

/* Get the next period (per-CPU) */
hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
--
2.43.0


2024-02-25 22:55:45

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 01/16] tick/nohz: Remove duplicate between tick_nohz_switch_to_nohz() and tick_setup_sched_timer()

From: Peng Liu <[email protected]>

The ts->sched_timer initialization work of tick_nohz_switch_to_nohz()
is almost the same as that of tick_setup_sched_timer(), so adjust the
latter to get it reused by tick_nohz_switch_to_nohz().

This also makes low-res mode sched_timer benefit from the tick skew
boot option.

Signed-off-by: Peng Liu <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/hrtimer.c | 2 +-
kernel/time/tick-sched.c | 39 ++++++++++++++++++---------------------
kernel/time/tick-sched.h | 2 +-
3 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 1fd106af747d..95f1f351dcd9 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -747,7 +747,7 @@ static void hrtimer_switch_to_hres(void)
base->hres_active = 1;
hrtimer_resolution = HIGH_RES_NSEC;

- tick_setup_sched_timer();
+ tick_setup_sched_timer(NOHZ_MODE_HIGHRES);
/* "Retrigger" the interrupt to get things going */
retrigger_next_event(NULL);
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 417bb7f880ca..d4901654148d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1471,9 +1471,6 @@ static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
*/
static void tick_nohz_switch_to_nohz(void)
{
- struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
- ktime_t next;
-
if (!tick_nohz_enabled)
return;

@@ -1482,16 +1479,9 @@ static void tick_nohz_switch_to_nohz(void)

/*
* Recycle the hrtimer in 'ts', so we can share the
- * hrtimer_forward_now() function with the highres code.
+ * highres code.
*/
- hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
- /* Get the next period */
- next = tick_init_jiffy_update();
-
- hrtimer_set_expires(&ts->sched_timer, next);
- hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
- tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
- tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
+ tick_setup_sched_timer(NOHZ_MODE_LOWRES);
}

static inline void tick_nohz_irq_enter(void)
@@ -1570,7 +1560,11 @@ static enum hrtimer_restart tick_nohz_highres_handler(struct hrtimer *timer)

return HRTIMER_RESTART;
}
+#else
+#define tick_nohz_highres_handler NULL
+#endif /* CONFIG_HIGH_RES_TIMERS */

+#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
static int sched_skew_tick;

static int __init skew_tick(char *str)
@@ -1583,15 +1577,17 @@ early_param("skew_tick", skew_tick);

/**
* tick_setup_sched_timer - setup the tick emulation timer
+ * @mode: tick_nohz_mode to setup for
*/
-void tick_setup_sched_timer(void)
+void tick_setup_sched_timer(int mode)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
- ktime_t now = ktime_get();

/* Emulate tick processing via per-CPU hrtimers: */
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
- ts->sched_timer.function = tick_nohz_highres_handler;
+
+ if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && mode == NOHZ_MODE_HIGHRES)
+ ts->sched_timer.function = tick_nohz_highres_handler;

/* Get the next period (per-CPU) */
hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
@@ -1604,13 +1600,14 @@ void tick_setup_sched_timer(void)
hrtimer_add_expires_ns(&ts->sched_timer, offset);
}

- hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
- hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
- tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
+ hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
+ if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && mode == NOHZ_MODE_HIGHRES)
+ hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
+ else
+ tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
+ tick_nohz_activate(ts, mode);
}
-#endif /* HIGH_RES_TIMERS */

-#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
void tick_cancel_sched_timer(int cpu)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
@@ -1632,7 +1629,7 @@ void tick_cancel_sched_timer(int cpu)
ts->idle_calls = idle_calls;
ts->idle_sleeps = idle_sleeps;
}
-#endif
+#endif /* CONFIG_NO_HZ_COMMON || CONFIG_HIGH_RES_TIMERS */

/*
* Async notification about clocksource changes
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 5ed5a9d41d5a..35808bbb8a47 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -102,7 +102,7 @@ struct tick_sched {

extern struct tick_sched *tick_get_tick_sched(int cpu);

-extern void tick_setup_sched_timer(void);
+extern void tick_setup_sched_timer(int mode);
#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
extern void tick_cancel_sched_timer(int cpu);
#else
--
2.43.0


2024-02-25 22:55:59

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 03/16] tick: Remove useless oneshot ifdeffery

tick-sched.c is only built when CONFIG_TICK_ONESHOT=y, which is selected
only if CONFIG_NO_HZ_COMMON=y or CONFIG_HIGH_RES_TIMERS=y. Therefore
the related ifdeferry in this file is needless and can be removed.

Reviewed-by: Thomas Gleixner <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/tick-sched.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 88c992f48126..27aaecb2e50c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -43,7 +43,6 @@ struct tick_sched *tick_get_tick_sched(int cpu)
return &per_cpu(tick_cpu_sched, cpu);
}

-#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
/*
* The time when the last jiffy update happened. Write access must hold
* jiffies_lock and jiffies_seq. tick_nohz_next_event() needs to get a
@@ -289,7 +288,6 @@ static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer)

return HRTIMER_RESTART;
}
-#endif

#ifdef CONFIG_NO_HZ_FULL
cpumask_var_t tick_nohz_full_mask;
@@ -635,7 +633,7 @@ void __init tick_nohz_init(void)
pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
cpumask_pr_args(tick_nohz_full_mask));
}
-#endif
+#endif /* #ifdef CONFIG_NO_HZ_FULL */

/*
* NOHZ - aka dynamic tick functionality
@@ -1540,7 +1538,6 @@ void tick_irq_enter(void)
tick_nohz_irq_enter();
}

-#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
static int sched_skew_tick;

static int __init skew_tick(char *str)
@@ -1605,7 +1602,6 @@ void tick_cancel_sched_timer(int cpu)
ts->idle_calls = idle_calls;
ts->idle_sleeps = idle_sleeps;
}
-#endif /* CONFIG_NO_HZ_COMMON || CONFIG_HIGH_RES_TIMERS */

/*
* Async notification about clocksource changes
--
2.43.0


2024-02-25 22:56:10

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 04/16] tick: Use IS_ENABLED() whenever possible

Avoid ifdeferry if it can be converted to IS_ENABLED() whenever possible

Reviewed-by: Thomas Gleixner <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/tick-common.c | 4 +---
kernel/time/tick-sched.c | 14 +++++---------
2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index e9138cd7a0f5..0084e1ae2583 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -111,15 +111,13 @@ void tick_handle_periodic(struct clock_event_device *dev)

tick_periodic(cpu);

-#if defined(CONFIG_HIGH_RES_TIMERS) || defined(CONFIG_NO_HZ_COMMON)
/*
* The cpu might have transitioned to HIGHRES or NOHZ mode via
* update_process_times() -> run_local_timers() ->
* hrtimer_run_queues().
*/
- if (dev->event_handler != tick_handle_periodic)
+ if (IS_ENABLED(CONFIG_TICK_ONESHOT) && dev->event_handler != tick_handle_periodic)
return;
-#endif

if (!clockevent_state_oneshot(dev))
return;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 27aaecb2e50c..4e34967edc0d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -186,7 +186,6 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
int cpu = smp_processor_id();

-#ifdef CONFIG_NO_HZ_COMMON
/*
* Check if the do_timer duty was dropped. We don't care about
* concurrency: This happens only when the CPU in charge went
@@ -197,13 +196,13 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
* If nohz_full is enabled, this should not happen because the
* 'tick_do_timer_cpu' CPU never relinquishes.
*/
- if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
+ unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
#ifdef CONFIG_NO_HZ_FULL
WARN_ON_ONCE(tick_nohz_full_running);
#endif
tick_do_timer_cpu = cpu;
}
-#endif

/* Check if jiffies need an update */
if (tick_do_timer_cpu == cpu)
@@ -230,7 +229,6 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)

static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
{
-#ifdef CONFIG_NO_HZ_COMMON
/*
* When we are idle and the tick is stopped, we have to touch
* the watchdog as we might not schedule for a really long
@@ -239,7 +237,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
* idle" jiffy stamp so the idle accounting adjustment we do
* when we go busy again does not account too many ticks.
*/
- if (ts->tick_stopped) {
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && ts->tick_stopped) {
touch_softlockup_watchdog_sched();
if (is_idle_task(current))
ts->idle_jiffies++;
@@ -250,7 +248,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
*/
ts->next_tick = 0;
}
-#endif
+
update_process_times(user_mode(regs));
profile_tick(CPU_PROFILING);
}
@@ -1587,10 +1585,8 @@ void tick_cancel_sched_timer(int cpu)
ktime_t idle_sleeptime, iowait_sleeptime;
unsigned long idle_calls, idle_sleeps;

-# ifdef CONFIG_HIGH_RES_TIMERS
- if (ts->sched_timer.base)
+ if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && ts->sched_timer.base)
hrtimer_cancel(&ts->sched_timer);
-# endif

idle_sleeptime = ts->idle_sleeptime;
iowait_sleeptime = ts->iowait_sleeptime;
--
2.43.0


2024-02-25 22:56:22

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 05/16] tick: s/tick_nohz_stop_sched_tick/tick_nohz_full_stop_tick

tick_nohz_stop_sched_tick() is only about nohz_full and not about
dynticks-idle. Reflect that in the function name to avoid confusion.

Reviewed-by: Thomas Gleixner <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/tick-sched.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 4e34967edc0d..9f75f5621965 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1040,7 +1040,7 @@ static void tick_nohz_retain_tick(struct tick_sched *ts)
}

#ifdef CONFIG_NO_HZ_FULL
-static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
+static void tick_nohz_full_stop_tick(struct tick_sched *ts, int cpu)
{
if (tick_nohz_next_event(ts, cpu))
tick_nohz_stop_tick(ts, cpu);
@@ -1075,7 +1075,7 @@ static void __tick_nohz_full_update_tick(struct tick_sched *ts,
int cpu = smp_processor_id();

if (can_stop_full_tick(cpu, ts))
- tick_nohz_stop_sched_tick(ts, cpu);
+ tick_nohz_full_stop_tick(ts, cpu);
else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, now);
#endif
--
2.43.0


2024-02-25 22:56:34

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 06/16] tick: No need to clear ts->next_tick again

The tick sched structure is already cleared from
tick_cancel_sched_timer(), so there is no need to clear that field
again.

Reviewed-by: Thomas Gleixner <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/tick-sched.c | 5 -----
1 file changed, 5 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 9f75f5621965..b17895de26b9 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1143,11 +1143,6 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
if (unlikely(!cpu_online(cpu))) {
if (cpu == tick_do_timer_cpu)
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
- /*
- * Make sure the CPU doesn't get fooled by obsolete tick
- * deadline if it comes back online later.
- */
- ts->next_tick = 0;
return false;
}

--
2.43.0


2024-02-25 22:56:48

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 07/16] tick: Start centralizing tick related CPU hotplug operations

During the CPU offlining process, the various timer tick features are
shut down from scattered places, sometimes from teardown callbacks on
stop machine, sometimes through explicit calls, sometimes from the
control CPU after the CPU died. The reason why these shutdown operations
are spread around is not always clear and it makes the tick lifecycle
hard to follow.

The tick should be shut down in order from highest to lowest level:

On stop machine from the dying CPU (high-level):

1) Hand-over the timekeeping duty (tick_handover_do_timer())
2) Cancel the tick implementation called by the clockevent callback
(tick_cancel_sched_timer())
3) Shutdown broadcasting (tick_offline_cpu() / tick_broadcast_offline())

On stop machine from the dying CPU (low-level):

4) Shutdown clockevents drivers (CPUHP_AP_*_TIMER_STARTING states)

From the control CPU after the CPU died (low-level):

5) Shutdown/unregister/cleanup clockevents for the dead CPU
(tick_cleanup_dead_cpu())

Instead the current order is 2, 4 (both from CPU hotplug states), then
1 and 3 through direct calls. This layout and order don't make much
sense. The operations 1, 2, 3 should be gathered together and in order.

Sort this situation with creating a new TICK shut-down CPU hotplug state
and start with introducing the timekeeping duty hand-over there. The
state must precede hrtimers migration because the tick hrtimer will be
stopped from it in a further patch.

Reviewed-by: Thomas Gleixner <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/cpuhotplug.h | 1 +
include/linux/tick.h | 8 ++++++--
kernel/cpu.c | 8 +++++---
kernel/time/tick-common.c | 17 +++++++++++------
4 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 7651904c6db5..35e78ddb2b37 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -184,6 +184,7 @@ enum cpuhp_state {
CPUHP_AP_ARM64_ISNDEP_STARTING,
CPUHP_AP_SMPCFD_DYING,
CPUHP_AP_HRTIMERS_DYING,
+ CPUHP_AP_TICK_DYING,
CPUHP_AP_X86_TBOOT_DYING,
CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
CPUHP_AP_ONLINE,
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 716d17f31c45..afff4c207bd8 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -19,16 +19,20 @@ extern void __init tick_init(void);
extern void tick_suspend_local(void);
/* Should be core only, but XEN resume magic and ARM BL switcher require it */
extern void tick_resume_local(void);
-extern void tick_handover_do_timer(void);
extern void tick_cleanup_dead_cpu(int cpu);
#else /* CONFIG_GENERIC_CLOCKEVENTS */
static inline void tick_init(void) { }
static inline void tick_suspend_local(void) { }
static inline void tick_resume_local(void) { }
-static inline void tick_handover_do_timer(void) { }
static inline void tick_cleanup_dead_cpu(int cpu) { }
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */

+#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU)
+extern int tick_cpu_dying(unsigned int cpu);
+#else
+#define tick_cpu_dying NULL
+#endif
+
#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND)
extern void tick_freeze(void);
extern void tick_unfreeze(void);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e6ec3ba4950b..263508073da8 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1324,8 +1324,6 @@ static int take_cpu_down(void *_param)
*/
cpuhp_invoke_callback_range_nofail(false, cpu, st, target);

- /* Give up timekeeping duties */
- tick_handover_do_timer();
/* Remove CPU from timer broadcasting */
tick_offline_cpu(cpu);
/* Park the stopper thread */
@@ -2205,7 +2203,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = NULL,
.teardown.single = hrtimers_cpu_dying,
},
-
+ [CPUHP_AP_TICK_DYING] = {
+ .name = "tick:dying",
+ .startup.single = NULL,
+ .teardown.single = tick_cpu_dying,
+ },
/* Entry state on starting. Interrupts enabled from here on. Transient
* state for synchronsization */
[CPUHP_AP_ONLINE] = {
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 0084e1ae2583..a89ef450fda7 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -397,15 +397,20 @@ EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);

#ifdef CONFIG_HOTPLUG_CPU
/*
- * Transfer the do_timer job away from a dying cpu.
- *
- * Called with interrupts disabled. No locking required. If
- * tick_do_timer_cpu is owned by this cpu, nothing can change it.
+ * Stop the tick and transfer the timekeeping job away from a dying cpu.
*/
-void tick_handover_do_timer(void)
+int tick_cpu_dying(unsigned int dying_cpu)
{
- if (tick_do_timer_cpu == smp_processor_id())
+ /*
+ * If the current CPU is the timekeeper, it's the only one that
+ * can safely hand over its duty. Also all online CPUs are in
+ * stop machine, guaranteed not to be idle, therefore it's safe
+ * to pick any online successor.
+ */
+ if (tick_do_timer_cpu == dying_cpu)
tick_do_timer_cpu = cpumask_first(cpu_online_mask);
+
+ return 0;
}

/*
--
2.43.0


2024-02-25 22:56:58

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 08/16] tick: Move tick cancellation up to CPUHP_AP_TICK_DYING

The tick hrtimer is cancelled right before hrtimers are migrated. This
is done from the hrtimer subsystem even though it shouldn't know about
its actual users.

Move instead the tick hrtimer cancellation to the relevant CPU hotplug
state that aims at centralizing high level tick shutdown operations so
that the related flow is easy to follow.

Reviewed-by: Thomas Gleixner <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/hrtimer.c | 2 --
kernel/time/tick-common.c | 2 ++
2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 95f1f351dcd9..3e95474199ac 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -2229,8 +2229,6 @@ int hrtimers_cpu_dying(unsigned int dying_cpu)
int i, ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER));
struct hrtimer_cpu_base *old_base, *new_base;

- tick_cancel_sched_timer(dying_cpu);
-
old_base = this_cpu_ptr(&hrtimer_bases);
new_base = &per_cpu(hrtimer_bases, ncpu);

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index a89ef450fda7..b4af8c743b73 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -410,6 +410,8 @@ int tick_cpu_dying(unsigned int dying_cpu)
if (tick_do_timer_cpu == dying_cpu)
tick_do_timer_cpu = cpumask_first(cpu_online_mask);

+ tick_cancel_sched_timer(dying_cpu);
+
return 0;
}

--
2.43.0


2024-02-25 22:57:59

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 13/16] tick: Split nohz and highres features from nohz_mode

The nohz mode field tells about low resolution nohz mode or high
resolution nohz mode but it doesn't tell about high resolution non-nohz
mode.

In order to retrieve the latter state, tick_cancel_sched_timer() must
fiddle with struct hrtimer's internals to guess if the tick has been
initialized in high resolution.

Move instead the nohz mode field information into the tick flags and
provide two new bits: one to know if the tick is in nohz mode and
another one to know if the tick is in high resolution. The combination
of those two flags provides all the needed informations to determine
which of the three tick modes is running.

Reviewed-by: Thomas Gleixner <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/hrtimer.c | 2 +-
kernel/time/tick-sched.c | 32 +++++++++++++++++---------------
kernel/time/tick-sched.h | 13 +++++--------
kernel/time/timer_list.c | 5 +++--
4 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 3e95474199ac..70625dff62ce 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -747,7 +747,7 @@ static void hrtimer_switch_to_hres(void)
base->hres_active = 1;
hrtimer_resolution = HIGH_RES_NSEC;

- tick_setup_sched_timer(NOHZ_MODE_HIGHRES);
+ tick_setup_sched_timer(true);
/* "Retrigger" the interrupt to get things going */
retrigger_next_event(NULL);
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 4aa7ce04a72c..dcb9f0394182 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -831,7 +831,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
/* Forward the time to expire in the future */
hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);

- if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+ if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) {
hrtimer_start_expires(&ts->sched_timer,
HRTIMER_MODE_ABS_PINNED_HARD);
} else {
@@ -1040,14 +1040,14 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
* the tick timer.
*/
if (unlikely(expires == KTIME_MAX)) {
- if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
+ if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
hrtimer_cancel(&ts->sched_timer);
else
tick_program_event(KTIME_MAX, 1);
return;
}

- if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+ if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) {
hrtimer_start(&ts->sched_timer, expires,
HRTIMER_MODE_ABS_PINNED_HARD);
} else {
@@ -1108,7 +1108,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (!tick_nohz_full_cpu(smp_processor_id()))
return;

- if (ts->nohz_mode == NOHZ_MODE_INACTIVE)
+ if (!tick_sched_flag_test(ts, TS_FLAG_NOHZ))
return;

__tick_nohz_full_update_tick(ts, ktime_get());
@@ -1168,7 +1168,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
return false;
}

- if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
+ if (unlikely(!tick_sched_flag_test(ts, TS_FLAG_NOHZ)))
return false;

if (need_resched())
@@ -1487,11 +1487,11 @@ static void tick_nohz_lowres_handler(struct clock_event_device *dev)
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
}

-static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
+static inline void tick_nohz_activate(struct tick_sched *ts)
{
if (!tick_nohz_enabled)
return;
- ts->nohz_mode = mode;
+ tick_sched_flag_set(ts, TS_FLAG_NOHZ);
/* One update is enough */
if (!test_and_set_bit(0, &tick_nohz_active))
timers_update_nohz();
@@ -1512,7 +1512,7 @@ static void tick_nohz_switch_to_nohz(void)
* Recycle the hrtimer in 'ts', so we can share the
* highres code.
*/
- tick_setup_sched_timer(NOHZ_MODE_LOWRES);
+ tick_setup_sched_timer(false);
}

static inline void tick_nohz_irq_enter(void)
@@ -1540,7 +1540,7 @@ static inline void tick_nohz_irq_enter(void)

static inline void tick_nohz_switch_to_nohz(void) { }
static inline void tick_nohz_irq_enter(void) { }
-static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
+static inline void tick_nohz_activate(struct tick_sched *ts) { }

#endif /* CONFIG_NO_HZ_COMMON */

@@ -1567,15 +1567,17 @@ early_param("skew_tick", skew_tick);
* tick_setup_sched_timer - setup the tick emulation timer
* @mode: tick_nohz_mode to setup for
*/
-void tick_setup_sched_timer(int mode)
+void tick_setup_sched_timer(bool hrtimer)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

/* Emulate tick processing via per-CPU hrtimers: */
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);

- if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && mode == NOHZ_MODE_HIGHRES)
+ if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && hrtimer) {
+ tick_sched_flag_set(ts, TS_FLAG_HIGHRES);
ts->sched_timer.function = tick_nohz_handler;
+ }

/* Get the next period (per-CPU) */
hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
@@ -1589,11 +1591,11 @@ void tick_setup_sched_timer(int mode)
}

hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
- if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && mode == NOHZ_MODE_HIGHRES)
+ if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && hrtimer)
hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
else
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
- tick_nohz_activate(ts, mode);
+ tick_nohz_activate(ts);
}

void tick_cancel_sched_timer(int cpu)
@@ -1602,7 +1604,7 @@ void tick_cancel_sched_timer(int cpu)
ktime_t idle_sleeptime, iowait_sleeptime;
unsigned long idle_calls, idle_sleeps;

- if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && ts->sched_timer.base)
+ if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
hrtimer_cancel(&ts->sched_timer);

idle_sleeptime = ts->idle_sleeptime;
@@ -1652,7 +1654,7 @@ int tick_check_oneshot_change(int allow_nohz)
if (!test_and_clear_bit(0, &ts->check_clocks))
return 0;

- if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
+ if (tick_sched_flag_test(ts, TS_FLAG_NOHZ))
return 0;

if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 07a4c0144c47..bbe72a078985 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -14,12 +14,6 @@ struct tick_device {
enum tick_device_mode mode;
};

-enum tick_nohz_mode {
- NOHZ_MODE_INACTIVE,
- NOHZ_MODE_LOWRES,
- NOHZ_MODE_HIGHRES,
-};
-
/* The CPU is in the tick idle mode */
#define TS_FLAG_INIDLE BIT(0)
/* The idle tick has been stopped */
@@ -31,6 +25,10 @@ enum tick_nohz_mode {
#define TS_FLAG_IDLE_ACTIVE BIT(2)
/* CPU was the last one doing do_timer before going idle */
#define TS_FLAG_DO_TIMER_LAST BIT(3)
+/* NO_HZ is enabled */
+#define TS_FLAG_NOHZ BIT(4)
+/* High resolution tick mode */
+#define TS_FLAG_HIGHRES BIT(5)

/**
* struct tick_sched - sched tick emulation and no idle tick control/stats
@@ -84,7 +82,6 @@ struct tick_sched {
ktime_t idle_entrytime;

/* Tick stop */
- enum tick_nohz_mode nohz_mode;
unsigned long last_jiffies;
u64 timer_expires_base;
u64 timer_expires;
@@ -107,7 +104,7 @@ struct tick_sched {

extern struct tick_sched *tick_get_tick_sched(int cpu);

-extern void tick_setup_sched_timer(int mode);
+extern void tick_setup_sched_timer(bool hrtimer);
#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
extern void tick_cancel_sched_timer(int cpu);
#else
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 38f81d836fc5..1c311c46da50 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -152,7 +152,8 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)

{
struct tick_sched *ts = tick_get_tick_sched(cpu);
- P(nohz_mode);
+ P_flag(nohz, TS_FLAG_NOHZ);
+ P_flag(highres, TS_FLAG_HIGHRES);
P_ns(last_tick);
P_flag(tick_stopped, TS_FLAG_STOPPED);
P(idle_jiffies);
@@ -259,7 +260,7 @@ static void timer_list_show_tickdevices_header(struct seq_file *m)

static inline void timer_list_header(struct seq_file *m, u64 now)
{
- SEQ_printf(m, "Timer List Version: v0.9\n");
+ SEQ_printf(m, "Timer List Version: v0.10\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
SEQ_printf(m, "\n");
--
2.43.0


2024-02-25 22:58:13

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 14/16] tick: Shut down low-res tick from dying CPU

The timekeeping duty is handed over from the outgoing CPU within stop
machine. This works well if CONFIG_NO_HZ_COMMON=n or the tick is in
high-res mode. However in low-res dynticks mode, the tick isn't
cancelled until the clockevent is shut down, which can happen later. The
tick may therefore fire again once IRQs are re-enabled on stop machine
and until IRQs are disabled for good upon the last call to idle.

That's so many opportunities for a timekeeper to go idle and the
outgoing CPU to take over that duty. This is why
tick_nohz_idle_stop_tick() is called one last time on idle if the CPU
is seen offline: so that the timekeeping duty is handed over again in
case the CPU has re-taken the duty.

This means there are two timekeeping handovers on CPU down hotplug with
different undocumented constraints and purposes:

1) A handover on stop machine for !dynticks || highres. All online CPUs
are guaranteed to be non-idle and the timekeeping duty can be safely
handed-over. The hrtimer tick is cancelled so it is guaranteed that in
dynticks mode the outgoing CPU won't take again the duty.

2) A handover on last idle call for dynticks && lowres. Setting the
duty to TICK_DO_TIMER_NONE makes sure that a CPU will take over the
timekeeping.

Prepare for consolidating the handover to a single place (the first one)
with shutting down the low-res tick as well from
tick_cancel_sched_timer() as well. This will simplify the handover and
unify the tick cancellation between high-res and low-res.

Reviewed-by: Thomas Gleixner <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/tick-common.c | 3 ++-
kernel/time/tick-sched.c | 32 +++++++++++++++++++++++++-------
kernel/time/tick-sched.h | 4 ++--
3 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 522414089c0d..9cd09eea06d6 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -410,7 +410,8 @@ int tick_cpu_dying(unsigned int dying_cpu)
if (tick_do_timer_cpu == dying_cpu)
tick_do_timer_cpu = cpumask_first(cpu_online_mask);

- tick_cancel_sched_timer(dying_cpu);
+ /* Make sure the CPU won't try to retake the timekeeping duty */
+ tick_sched_timer_dying(dying_cpu);

/* Remove CPU from timer broadcasting */
tick_offline_cpu(dying_cpu);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index dcb9f0394182..89d16b8ea2c4 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -308,6 +308,14 @@ static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer)
return HRTIMER_RESTART;
}

+static void tick_sched_timer_cancel(struct tick_sched *ts)
+{
+ if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
+ hrtimer_cancel(&ts->sched_timer);
+ else if (tick_sched_flag_test(ts, TS_FLAG_NOHZ))
+ tick_program_event(KTIME_MAX, 1);
+}
+
#ifdef CONFIG_NO_HZ_FULL
cpumask_var_t tick_nohz_full_mask;
EXPORT_SYMBOL_GPL(tick_nohz_full_mask);
@@ -1040,10 +1048,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
* the tick timer.
*/
if (unlikely(expires == KTIME_MAX)) {
- if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
- hrtimer_cancel(&ts->sched_timer);
- else
- tick_program_event(KTIME_MAX, 1);
+ tick_sched_timer_cancel(ts);
return;
}

@@ -1598,14 +1603,27 @@ void tick_setup_sched_timer(bool hrtimer)
tick_nohz_activate(ts);
}

-void tick_cancel_sched_timer(int cpu)
+/*
+ * Shut down the tick and make sure the CPU won't try to retake the timekeeping
+ * duty before disabling IRQs in idle for the last time.
+ */
+void tick_sched_timer_dying(int cpu)
{
+ struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+ struct clock_event_device *dev = td->evtdev;
ktime_t idle_sleeptime, iowait_sleeptime;
unsigned long idle_calls, idle_sleeps;

- if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
- hrtimer_cancel(&ts->sched_timer);
+ /* This must happen before hrtimers are migrated! */
+ tick_sched_timer_cancel(ts);
+
+ /*
+ * If the clockevents doesn't support CLOCK_EVT_STATE_ONESHOT_STOPPED,
+ * make sure not to call low-res tick handler.
+ */
+ if (tick_sched_flag_test(ts, TS_FLAG_NOHZ))
+ dev->event_handler = clockevents_handle_noop;

idle_sleeptime = ts->idle_sleeptime;
iowait_sleeptime = ts->iowait_sleeptime;
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index bbe72a078985..58d8d1c49dd3 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -106,9 +106,9 @@ extern struct tick_sched *tick_get_tick_sched(int cpu);

extern void tick_setup_sched_timer(bool hrtimer);
#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
-extern void tick_cancel_sched_timer(int cpu);
+extern void tick_sched_timer_dying(int cpu);
#else
-static inline void tick_cancel_sched_timer(int cpu) { }
+static inline void tick_sched_timer_dying(int cpu) { }
#endif

#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
--
2.43.0


2024-02-25 22:58:39

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 16/16] timers: Assert no next dyntick timer look-up while CPU is offline

The next timer (re-)evaluation, with the purpose of entering/updating
the dyntick mode, can happen from 3 sites and none of them are relevant
while the CPU is offline:

1) The idle loop:
a) From the quick check helping the cpuidle governor to heuristically
predict the best C-state.
b) While stopping the tick.

But if the CPU is offline, the tick has been cancelled and there is
consequently no need to further stop the tick.

2) Remote expiry: when a CPU remotely expires global timers on behalf of
another CPU, the latter target's next timer is re-evaluated
afterwards. However remote expîry doesn't happen on offline CPUs.

3) IRQ exit: on nohz_full mode, the tick is (re-)evaluated on IRQ exit.
But full dynticks is disabled on offline CPUs.

Therefore it is safe to assume that no next dyntick timer lookup can
be performed on offline CPUs.

Assert this expectation to report any surprise.

Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/timer.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 4f4930da6448..e69e75d3858c 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -2233,10 +2233,10 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
bool idle_is_possible;

/*
- * Pretend that there is no timer pending if the cpu is offline.
- * Possible pending timers will be migrated later to an active cpu.
+ * When the CPU is offline, the tick is cancelled and nothing is supposed
+ * to try to stop it.
*/
- if (cpu_is_offline(smp_processor_id())) {
+ if (WARN_ON_ONCE(cpu_is_offline(smp_processor_id()))) {
if (idle)
*idle = true;
return tevt.local;
--
2.43.0


2024-02-25 23:06:36

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 09/16] tick: Move broadcast cancellation up to CPUHP_AP_TICK_DYING

The broadcast shutdown code is executed through a random explicit call
within stop machine from the outgoing CPU.

However the tick broadcast is a midware between the tick callback and
the clocksource, therefore it makes more sense to shut it down after the
tick callback and before the clocksource drivers.

Move it instead to the common tick shutdown CPU hotplug state where
related operations can be ordered from highest to lowest level.

Reviewed-by: Thomas Gleixner <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/tick.h | 6 ------
kernel/cpu.c | 2 --
kernel/time/tick-common.c | 3 +++
kernel/time/tick-internal.h | 2 ++
4 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index afff4c207bd8..c7840ae8ebaf 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -73,12 +73,6 @@ extern void tick_broadcast_control(enum tick_broadcast_mode mode);
static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { }
#endif /* BROADCAST */

-#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU)
-extern void tick_offline_cpu(unsigned int cpu);
-#else
-static inline void tick_offline_cpu(unsigned int cpu) { }
-#endif
-
#ifdef CONFIG_GENERIC_CLOCKEVENTS
extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state);
#else
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 263508073da8..5a8ad4f5ccf3 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1324,8 +1324,6 @@ static int take_cpu_down(void *_param)
*/
cpuhp_invoke_callback_range_nofail(false, cpu, st, target);

- /* Remove CPU from timer broadcasting */
- tick_offline_cpu(cpu);
/* Park the stopper thread */
stop_machine_park(cpu);
return 0;
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index b4af8c743b73..522414089c0d 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -412,6 +412,9 @@ int tick_cpu_dying(unsigned int dying_cpu)

tick_cancel_sched_timer(dying_cpu);

+ /* Remove CPU from timer broadcasting */
+ tick_offline_cpu(dying_cpu);
+
return 0;
}

diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index a3243c4ac45f..5f2105e637bd 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -142,8 +142,10 @@ static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_
#endif /* !(BROADCAST && ONESHOT) */

#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU)
+extern void tick_offline_cpu(unsigned int cpu);
extern void tick_broadcast_offline(unsigned int cpu);
#else
+static inline void tick_offline_cpu(unsigned int cpu) { }
static inline void tick_broadcast_offline(unsigned int cpu) { }
#endif

--
2.43.0


2024-02-25 23:06:45

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 10/16] tick: Assume the tick can't be stopped in NOHZ_MODE_INACTIVE mode

The full-nohz update function checks if the nohz mode is active before
proceeding. It considers one exception though: if the tick is already
stopped even though the nohz mode is inactive, it still moves on in
order to update/restart the tick if needed.

However in order for the tick to be stopped, the nohz_mode has to be
either NOHZ_MODE_LOWRES or NOHZ_MODE_HIGHRES. Therefore it doesn't make
sense to test if the tick is stopped before verifying NOHZ_MODE_INACTIVE
mode.

Remove the needless related condition.

Reviewed-by: Thomas Gleixner <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/tick-sched.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b17895de26b9..b79f5403433b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1086,7 +1086,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (!tick_nohz_full_cpu(smp_processor_id()))
return;

- if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
+ if (ts->nohz_mode == NOHZ_MODE_INACTIVE)
return;

__tick_nohz_full_update_tick(ts, ktime_get());
--
2.43.0


2024-02-25 23:16:37

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 12/16] tick: Move individual bit features to debuggable mask accesses

The individual bitfields of struct tick_sched must be modified from
IRQs disabled places, otherwise local modifications can race due to them
sharing the same memory storage.

The recent move of the "got_idle_tick" bitfield to its own storage shows
that the use of these bitfields, as pretty as they look, can be as much
error prone.

In order to avoid future issues of the like and make sure that those
bitfields are safely accessed, move those flags to an explicit mask
along with a mutator function performing the basic IRQs disabled sanity
check.

Reviewed-by: Thomas Gleixner <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/tick-sched.c | 88 +++++++++++++++++++++++++---------------
kernel/time/tick-sched.h | 23 +++++++----
kernel/time/timer_list.c | 5 ++-
3 files changed, 73 insertions(+), 43 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b79f5403433b..4aa7ce04a72c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -180,6 +180,26 @@ static ktime_t tick_init_jiffy_update(void)
return period;
}

+static inline int tick_sched_flag_test(struct tick_sched *ts,
+ unsigned long flag)
+{
+ return !!(ts->flags & flag);
+}
+
+static inline void tick_sched_flag_set(struct tick_sched *ts,
+ unsigned long flag)
+{
+ lockdep_assert_irqs_disabled();
+ ts->flags |= flag;
+}
+
+static inline void tick_sched_flag_clear(struct tick_sched *ts,
+ unsigned long flag)
+{
+ lockdep_assert_irqs_disabled();
+ ts->flags &= ~flag;
+}
+
#define MAX_STALLED_JIFFIES 5

static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
@@ -223,7 +243,7 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
}
}

- if (ts->inidle)
+ if (tick_sched_flag_test(ts, TS_FLAG_INIDLE))
ts->got_idle_tick = 1;
}

@@ -237,7 +257,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
* idle" jiffy stamp so the idle accounting adjustment we do
* when we go busy again does not account too many ticks.
*/
- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && ts->tick_stopped) {
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
+ tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
touch_softlockup_watchdog_sched();
if (is_idle_task(current))
ts->idle_jiffies++;
@@ -279,7 +300,7 @@ static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer)
* - to the idle task if in dynticks-idle
* - to IRQ exit if in full-dynticks.
*/
- if (unlikely(ts->tick_stopped))
+ if (unlikely(tick_sched_flag_test(ts, TS_FLAG_STOPPED)))
return HRTIMER_NORESTART;

hrtimer_forward(timer, now, TICK_NSEC);
@@ -559,7 +580,7 @@ void __tick_nohz_task_switch(void)

ts = this_cpu_ptr(&tick_cpu_sched);

- if (ts->tick_stopped) {
+ if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
if (atomic_read(&current->tick_dep_mask) ||
atomic_read(&current->signal->tick_dep_mask))
tick_nohz_full_kick();
@@ -656,14 +677,14 @@ bool tick_nohz_tick_stopped(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

- return ts->tick_stopped;
+ return tick_sched_flag_test(ts, TS_FLAG_STOPPED);
}

bool tick_nohz_tick_stopped_cpu(int cpu)
{
struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);

- return ts->tick_stopped;
+ return tick_sched_flag_test(ts, TS_FLAG_STOPPED);
}

/**
@@ -693,7 +714,7 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
{
ktime_t delta;

- if (WARN_ON_ONCE(!ts->idle_active))
+ if (WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE)))
return;

delta = ktime_sub(now, ts->idle_entrytime);
@@ -705,7 +726,7 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);

ts->idle_entrytime = now;
- ts->idle_active = 0;
+ tick_sched_flag_clear(ts, TS_FLAG_IDLE_ACTIVE);
write_seqcount_end(&ts->idle_sleeptime_seq);

sched_clock_idle_wakeup_event();
@@ -715,7 +736,7 @@ static void tick_nohz_start_idle(struct tick_sched *ts)
{
write_seqcount_begin(&ts->idle_sleeptime_seq);
ts->idle_entrytime = ktime_get();
- ts->idle_active = 1;
+ tick_sched_flag_set(ts, TS_FLAG_IDLE_ACTIVE);
write_seqcount_end(&ts->idle_sleeptime_seq);

sched_clock_idle_sleep_event();
@@ -737,7 +758,7 @@ static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime,
do {
seq = read_seqcount_begin(&ts->idle_sleeptime_seq);

- if (ts->idle_active && compute_delta) {
+ if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE) && compute_delta) {
ktime_t delta = ktime_sub(now, ts->idle_entrytime);

idle = ktime_add(*sleeptime, delta);
@@ -905,7 +926,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
* We've not stopped the tick yet, and there's a timer in the
* next period, so no point in stopping it either, bail.
*/
- if (!ts->tick_stopped) {
+ if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
ts->timer_expires = 0;
goto out;
}
@@ -918,7 +939,8 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
*/
delta = timekeeping_max_deferment();
if (cpu != tick_do_timer_cpu &&
- (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
+ (tick_do_timer_cpu != TICK_DO_TIMER_NONE ||
+ !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
delta = KTIME_MAX;

/* Calculate the next expiry time */
@@ -938,7 +960,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
unsigned long basejiff = ts->last_jiffies;
u64 basemono = ts->timer_expires_base;
- bool timer_idle = ts->tick_stopped;
+ bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
u64 expires;

/* Make sure we won't be trying to stop it twice in a row. */
@@ -978,13 +1000,13 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
*/
if (cpu == tick_do_timer_cpu) {
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
- ts->do_timer_last = 1;
+ tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST);
} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
- ts->do_timer_last = 0;
+ tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST);
}

/* Skip reprogram of event if it's not changed */
- if (ts->tick_stopped && (expires == ts->next_tick)) {
+ if (tick_sched_flag_test(ts, TS_FLAG_STOPPED) && (expires == ts->next_tick)) {
/* Sanity check: make sure clockevent is actually programmed */
if (expires == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
return;
@@ -1002,12 +1024,12 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
* call we save the current tick time, so we can restart the
* scheduler tick in tick_nohz_restart_sched_tick().
*/
- if (!ts->tick_stopped) {
+ if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
calc_load_nohz_start();
quiet_vmstat();

ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
- ts->tick_stopped = 1;
+ tick_sched_flag_set(ts, TS_FLAG_STOPPED);
trace_tick_stop(1, TICK_DEP_MASK_NONE);
}

@@ -1064,7 +1086,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
touch_softlockup_watchdog_sched();

/* Cancel the scheduled timer and restore the tick: */
- ts->tick_stopped = 0;
+ tick_sched_flag_clear(ts, TS_FLAG_STOPPED);
tick_nohz_restart(ts, now);
}

@@ -1076,7 +1098,7 @@ static void __tick_nohz_full_update_tick(struct tick_sched *ts,

if (can_stop_full_tick(cpu, ts))
tick_nohz_full_stop_tick(ts, cpu);
- else if (ts->tick_stopped)
+ else if (tick_sched_flag_test(ts, TS_FLAG_STOPPED))
tick_nohz_restart_sched_tick(ts, now);
#endif
}
@@ -1196,14 +1218,14 @@ void tick_nohz_idle_stop_tick(void)
ts->idle_calls++;

if (expires > 0LL) {
- int was_stopped = ts->tick_stopped;
+ int was_stopped = tick_sched_flag_test(ts, TS_FLAG_STOPPED);

tick_nohz_stop_tick(ts, cpu);

ts->idle_sleeps++;
ts->idle_expires = expires;

- if (!was_stopped && ts->tick_stopped) {
+ if (!was_stopped && tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
ts->idle_jiffies = ts->last_jiffies;
nohz_balance_enter_idle(cpu);
}
@@ -1234,7 +1256,7 @@ void tick_nohz_idle_enter(void)

WARN_ON_ONCE(ts->timer_expires_base);

- ts->inidle = 1;
+ tick_sched_flag_set(ts, TS_FLAG_INIDLE);
tick_nohz_start_idle(ts);

local_irq_enable();
@@ -1263,7 +1285,7 @@ void tick_nohz_irq_exit(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

- if (ts->inidle)
+ if (tick_sched_flag_test(ts, TS_FLAG_INIDLE))
tick_nohz_start_idle(ts);
else
tick_nohz_full_update_tick(ts);
@@ -1317,7 +1339,7 @@ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
ktime_t now = ts->idle_entrytime;
ktime_t next_event;

- WARN_ON_ONCE(!ts->inidle);
+ WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_INIDLE));

*delta_next = ktime_sub(dev->next_event, now);

@@ -1389,7 +1411,7 @@ void tick_nohz_idle_restart_tick(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

- if (ts->tick_stopped) {
+ if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
ktime_t now = ktime_get();
tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_time(ts, now);
@@ -1430,12 +1452,12 @@ void tick_nohz_idle_exit(void)

local_irq_disable();

- WARN_ON_ONCE(!ts->inidle);
+ WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_INIDLE));
WARN_ON_ONCE(ts->timer_expires_base);

- ts->inidle = 0;
- idle_active = ts->idle_active;
- tick_stopped = ts->tick_stopped;
+ tick_sched_flag_clear(ts, TS_FLAG_INIDLE);
+ idle_active = tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE);
+ tick_stopped = tick_sched_flag_test(ts, TS_FLAG_STOPPED);

if (idle_active || tick_stopped)
now = ktime_get();
@@ -1498,10 +1520,10 @@ static inline void tick_nohz_irq_enter(void)
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
ktime_t now;

- if (!ts->idle_active && !ts->tick_stopped)
+ if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED | TS_FLAG_IDLE_ACTIVE))
return;
now = ktime_get();
- if (ts->idle_active)
+ if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE))
tick_nohz_stop_idle(ts, now);
/*
* If all CPUs are idle we may need to update a stale jiffies value.
@@ -1510,7 +1532,7 @@ static inline void tick_nohz_irq_enter(void)
* rare case (typically stop machine). So we must make sure we have a
* last resort.
*/
- if (ts->tick_stopped)
+ if (tick_sched_flag_test(ts, TS_FLAG_STOPPED))
tick_nohz_update_jiffies(now);
}

diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 3b555e0fa937..07a4c0144c47 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -20,14 +20,22 @@ enum tick_nohz_mode {
NOHZ_MODE_HIGHRES,
};

+/* The CPU is in the tick idle mode */
+#define TS_FLAG_INIDLE BIT(0)
+/* The idle tick has been stopped */
+#define TS_FLAG_STOPPED BIT(1)
+/*
+ * Indicator that the CPU is actively in the tick idle mode;
+ * it is reset during irq handling phases.
+ */
+#define TS_FLAG_IDLE_ACTIVE BIT(2)
+/* CPU was the last one doing do_timer before going idle */
+#define TS_FLAG_DO_TIMER_LAST BIT(3)
+
/**
* struct tick_sched - sched tick emulation and no idle tick control/stats
*
- * @inidle: Indicator that the CPU is in the tick idle mode
- * @tick_stopped: Indicator that the idle tick has been stopped
- * @idle_active: Indicator that the CPU is actively in the tick idle mode;
- * it is reset during irq handling phases.
- * @do_timer_last: CPU was the last one doing do_timer before going idle
+ * @flags: State flags gathering the TS_FLAG_* features
* @got_idle_tick: Tick timer function has run with @inidle set
* @stalled_jiffies: Number of stalled jiffies detected across ticks
* @last_tick_jiffies: Value of jiffies seen on last tick
@@ -57,10 +65,7 @@ enum tick_nohz_mode {
*/
struct tick_sched {
/* Common flags */
- unsigned int inidle : 1;
- unsigned int tick_stopped : 1;
- unsigned int idle_active : 1;
- unsigned int do_timer_last : 1;
+ unsigned long flags;

/* Tick handling: jiffies stall check */
unsigned int stalled_jiffies;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index ed7d6ad694fb..38f81d836fc5 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -147,11 +147,14 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
# define P_ns(x) \
SEQ_printf(m, " .%-15s: %Lu nsecs\n", #x, \
(unsigned long long)(ktime_to_ns(ts->x)))
+# define P_flag(x, f) \
+ SEQ_printf(m, " .%-15s: %d\n", #x, !!(ts->flags & (f)))
+
{
struct tick_sched *ts = tick_get_tick_sched(cpu);
P(nohz_mode);
P_ns(last_tick);
- P(tick_stopped);
+ P_flag(tick_stopped, TS_FLAG_STOPPED);
P(idle_jiffies);
P(idle_calls);
P(idle_sleeps);
--
2.43.0


2024-02-25 23:16:44

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 11/16] tick: Move got_idle_tick away from common flags

tick_nohz_idle_got_tick() is called by cpuidle_reflect() within the idle
loop with interrupts enabled. This function modifies the struct
tick_sched's bitfield "got_idle_tick". However this bitfield is stored
within the same mask as other bitfields that can be modified from
interrupts.

Fortunately so far it looks like the only race that can happen is while
writing ->got_idle_tick to 0, an interrupt fires and writes the
->idle_active field to 0. It's then possible that the interrupted write
to ->got_idle_tick writes back the old value of ->idle_active back to 1.

However if that happens, the worst possible outcome is that the time
spent between that interrupt and the upcoming call to
tick_nohz_idle_exit() is accounted as idle, which is negligible quantity.

Still all the bitfield writes within this struct tick_sched's shadow
mask should be IRQ-safe. Therefore move this bitfield out to its own
storage to avoid further suprises.

Reviewed-by: Thomas Gleixner <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/time/tick-sched.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 35808bbb8a47..3b555e0fa937 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -61,7 +61,6 @@ struct tick_sched {
unsigned int tick_stopped : 1;
unsigned int idle_active : 1;
unsigned int do_timer_last : 1;
- unsigned int got_idle_tick : 1;

/* Tick handling: jiffies stall check */
unsigned int stalled_jiffies;
@@ -73,6 +72,7 @@ struct tick_sched {
ktime_t next_tick;
unsigned long idle_jiffies;
ktime_t idle_waketime;
+ unsigned int got_idle_tick;

/* Idle entry */
seqcount_t idle_sleeptime_seq;
--
2.43.0


2024-02-25 23:16:49

by Frederic Weisbecker

[permalink] [raw]
Subject: [PATCH 15/16] tick: Assume timekeeping is correctly handed over upon last offline idle call

The timekeeping duty is handed over from the outgoing CPU on stop
machine, then the oneshot tick is stopped right after. Therefore it's
guaranteed that the current CPU isn't the timekeeper upon its last call
to idle.

Besides, calling tick_nohz_idle_stop_tick() while the dying CPU goes
into idle suggests that the tick is going to be stopped while it is
actually stopped already from the appropriate CPU hotplug state.

Remove the confusing call and the obsolete case handling and convert it
to a sanity check that verifies the above assumption.

Reviewed-by: Thomas Gleixner <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
include/linux/tick.h | 2 ++
kernel/cpu.c | 1 +
kernel/sched/idle.c | 1 -
kernel/time/tick-common.c | 4 ++++
kernel/time/tick-sched.c | 13 +------------
5 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index c7840ae8ebaf..44fddfa93e18 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -29,8 +29,10 @@ static inline void tick_cleanup_dead_cpu(int cpu) { }

#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU)
extern int tick_cpu_dying(unsigned int cpu);
+extern void tick_assert_timekeeping_handover(void);
#else
#define tick_cpu_dying NULL
+static inline void tick_assert_timekeeping_handover(void) { }
#endif

#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5a8ad4f5ccf3..7e84a7b0675e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1399,6 +1399,7 @@ void cpuhp_report_idle_dead(void)
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

BUG_ON(st->state != CPUHP_AP_OFFLINE);
+ tick_assert_timekeeping_handover();
rcutree_report_cpu_dead();
st->state = CPUHP_AP_IDLE_DEAD;
/*
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 31231925f1ec..b15d40cad7ea 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -291,7 +291,6 @@ static void do_idle(void)
local_irq_disable();

if (cpu_is_offline(cpu)) {
- tick_nohz_idle_stop_tick();
cpuhp_report_idle_dead();
arch_cpu_idle_dead();
}
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 9cd09eea06d6..fb0fdec8719a 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -396,6 +396,10 @@ int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);

#ifdef CONFIG_HOTPLUG_CPU
+void tick_assert_timekeeping_handover(void)
+{
+ WARN_ON_ONCE(tick_do_timer_cpu == smp_processor_id());
+}
/*
* Stop the tick and transfer the timekeeping job away from a dying cpu.
*/
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 89d16b8ea2c4..269e21590df5 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1160,18 +1160,7 @@ static bool report_idle_softirq(void)

static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
{
- /*
- * If this CPU is offline and it is the one which updates
- * jiffies, then give up the assignment and let it be taken by
- * the CPU which runs the tick timer next. If we don't drop
- * this here, the jiffies might be stale and do_timer() never
- * gets invoked.
- */
- if (unlikely(!cpu_online(cpu))) {
- if (cpu == tick_do_timer_cpu)
- tick_do_timer_cpu = TICK_DO_TIMER_NONE;
- return false;
- }
+ WARN_ON_ONCE(cpu_is_offline(cpu));

if (unlikely(!tick_sched_flag_test(ts, TS_FLAG_NOHZ)))
return false;
--
2.43.0


2024-02-26 08:15:39

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 01/16] tick/nohz: Remove duplicate between tick_nohz_switch_to_nohz() and tick_setup_sched_timer()

On Sun, Feb 25 2024 at 23:54, Frederic Weisbecker wrote:

> From: Peng Liu <[email protected]>
>
> The ts->sched_timer initialization work of tick_nohz_switch_to_nohz()
> is almost the same as that of tick_setup_sched_timer(), so adjust the
> latter to get it reused by tick_nohz_switch_to_nohz().
>
> This also makes low-res mode sched_timer benefit from the tick skew
> boot option.
>
> Signed-off-by: Peng Liu <[email protected]>
> Signed-off-by: Frederic Weisbecker <[email protected]>

Reviewed-by: Thomas Gleixner <[email protected]>

2024-02-26 08:21:55

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 02/16] tick/nohz: Remove duplicate between lowres and highres handlers

On Sun, Feb 25 2024 at 23:54, Frederic Weisbecker wrote:

> From: Peng Liu <[email protected]>
>
> tick_nohz_lowres_handler() does the same work as
> tick_nohz_highres_handler() plus the clockevent device reprogramming, so
> make the former reuse the latter and rename it accordingly.
>
> Signed-off-by: Peng Liu <[email protected]>
> Signed-off-by: Frederic Weisbecker <[email protected]>

Reviewed-by: Thomas Gleixner <[email protected]>

2024-02-26 22:24:38

by tip-bot2 for Tony Luck

[permalink] [raw]
Subject: [tip: timers/core] timers: Assert no next dyntick timer look-up while CPU is offline

The following commit has been merged into the timers/core branch of tip:

Commit-ID: 19b344a91ff79f65be377825635bcf5f5bb8df67
Gitweb: https://git.kernel.org/tip/19b344a91ff79f65be377825635bcf5f5bb8df67
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Sun, 25 Feb 2024 23:55:08 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Feb 2024 11:37:32 +01:00

timers: Assert no next dyntick timer look-up while CPU is offline

The next timer (re-)evaluation, with the purpose of entering/updating
the dyntick mode, can happen from 3 sites and none of them are relevant
while the CPU is offline:

1) The idle loop:
a) From the quick check helping the cpuidle governor to heuristically
predict the best C-state.
b) While stopping the tick.

But if the CPU is offline, the tick has been cancelled and there is
consequently no need to further stop the tick.

2) Remote expiry: when a CPU remotely expires global timers on behalf of
another CPU, the latter target's next timer is re-evaluated
afterwards. However remote expîry doesn't happen on offline CPUs.

3) IRQ exit: on nohz_full mode, the tick is (re-)evaluated on IRQ exit.
But full dynticks is disabled on offline CPUs.

Therefore it is safe to assume that no next dyntick timer lookup can
be performed on offline CPUs.

Assert this expectation to report any surprise.

Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
kernel/time/timer.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 4f4930d..e69e75d 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -2233,10 +2233,10 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
bool idle_is_possible;

/*
- * Pretend that there is no timer pending if the cpu is offline.
- * Possible pending timers will be migrated later to an active cpu.
+ * When the CPU is offline, the tick is cancelled and nothing is supposed
+ * to try to stop it.
*/
- if (cpu_is_offline(smp_processor_id())) {
+ if (WARN_ON_ONCE(cpu_is_offline(smp_processor_id()))) {
if (idle)
*idle = true;
return tevt.local;

2024-02-26 22:24:47

by tip-bot2 for Tony Luck

[permalink] [raw]
Subject: [tip: timers/core] tick: Assume timekeeping is correctly handed over upon last offline idle call

The following commit has been merged into the timers/core branch of tip:

Commit-ID: 500f8f9bced86f0c0f2482773bd64a1b7ec9c4e1
Gitweb: https://git.kernel.org/tip/500f8f9bced86f0c0f2482773bd64a1b7ec9c4e1
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Sun, 25 Feb 2024 23:55:07 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Feb 2024 11:37:32 +01:00

tick: Assume timekeeping is correctly handed over upon last offline idle call

The timekeeping duty is handed over from the outgoing CPU on stop
machine, then the oneshot tick is stopped right after. Therefore it's
guaranteed that the current CPU isn't the timekeeper upon its last call
to idle.

Besides, calling tick_nohz_idle_stop_tick() while the dying CPU goes
into idle suggests that the tick is going to be stopped while it is
actually stopped already from the appropriate CPU hotplug state.

Remove the confusing call and the obsolete case handling and convert it
to a sanity check that verifies the above assumption.

Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
include/linux/tick.h | 2 ++
kernel/cpu.c | 1 +
kernel/sched/idle.c | 1 -
kernel/time/tick-common.c | 4 ++++
kernel/time/tick-sched.c | 13 +------------
5 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index c7840ae..44fddfa 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -29,8 +29,10 @@ static inline void tick_cleanup_dead_cpu(int cpu) { }

#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU)
extern int tick_cpu_dying(unsigned int cpu);
+extern void tick_assert_timekeeping_handover(void);
#else
#define tick_cpu_dying NULL
+static inline void tick_assert_timekeeping_handover(void) { }
#endif

#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5a8ad4f..7e84a7b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1399,6 +1399,7 @@ void cpuhp_report_idle_dead(void)
struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

BUG_ON(st->state != CPUHP_AP_OFFLINE);
+ tick_assert_timekeeping_handover();
rcutree_report_cpu_dead();
st->state = CPUHP_AP_IDLE_DEAD;
/*
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 3123192..b15d40c 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -291,7 +291,6 @@ static void do_idle(void)
local_irq_disable();

if (cpu_is_offline(cpu)) {
- tick_nohz_idle_stop_tick();
cpuhp_report_idle_dead();
arch_cpu_idle_dead();
}
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 9cd09ee..fb0fdec 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -396,6 +396,10 @@ int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);

#ifdef CONFIG_HOTPLUG_CPU
+void tick_assert_timekeeping_handover(void)
+{
+ WARN_ON_ONCE(tick_do_timer_cpu == smp_processor_id());
+}
/*
* Stop the tick and transfer the timekeeping job away from a dying cpu.
*/
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 89d16b8..269e215 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1160,18 +1160,7 @@ static bool report_idle_softirq(void)

static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
{
- /*
- * If this CPU is offline and it is the one which updates
- * jiffies, then give up the assignment and let it be taken by
- * the CPU which runs the tick timer next. If we don't drop
- * this here, the jiffies might be stale and do_timer() never
- * gets invoked.
- */
- if (unlikely(!cpu_online(cpu))) {
- if (cpu == tick_do_timer_cpu)
- tick_do_timer_cpu = TICK_DO_TIMER_NONE;
- return false;
- }
+ WARN_ON_ONCE(cpu_is_offline(cpu));

if (unlikely(!tick_sched_flag_test(ts, TS_FLAG_NOHZ)))
return false;

2024-02-26 22:25:04

by tip-bot2 for Tony Luck

[permalink] [raw]
Subject: [tip: timers/core] tick: Shut down low-res tick from dying CPU

The following commit has been merged into the timers/core branch of tip:

Commit-ID: 3f69d04e146c6e14ccdd4e7b37d93f789229202a
Gitweb: https://git.kernel.org/tip/3f69d04e146c6e14ccdd4e7b37d93f789229202a
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Sun, 25 Feb 2024 23:55:06 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Feb 2024 11:37:32 +01:00

tick: Shut down low-res tick from dying CPU

The timekeeping duty is handed over from the outgoing CPU within stop
machine. This works well if CONFIG_NO_HZ_COMMON=n or the tick is in
high-res mode. However in low-res dynticks mode, the tick isn't
cancelled until the clockevent is shut down, which can happen later. The
tick may therefore fire again once IRQs are re-enabled on stop machine
and until IRQs are disabled for good upon the last call to idle.

That's so many opportunities for a timekeeper to go idle and the
outgoing CPU to take over that duty. This is why
tick_nohz_idle_stop_tick() is called one last time on idle if the CPU
is seen offline: so that the timekeeping duty is handed over again in
case the CPU has re-taken the duty.

This means there are two timekeeping handovers on CPU down hotplug with
different undocumented constraints and purposes:

1) A handover on stop machine for !dynticks || highres. All online CPUs
are guaranteed to be non-idle and the timekeeping duty can be safely
handed-over. The hrtimer tick is cancelled so it is guaranteed that in
dynticks mode the outgoing CPU won't take again the duty.

2) A handover on last idle call for dynticks && lowres. Setting the
duty to TICK_DO_TIMER_NONE makes sure that a CPU will take over the
timekeeping.

Prepare for consolidating the handover to a single place (the first one)
with shutting down the low-res tick as well from
tick_cancel_sched_timer() as well. This will simplify the handover and
unify the tick cancellation between high-res and low-res.

Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
kernel/time/tick-common.c | 3 ++-
kernel/time/tick-sched.c | 32 +++++++++++++++++++++++++-------
kernel/time/tick-sched.h | 4 ++--
3 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 5224140..9cd09ee 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -410,7 +410,8 @@ int tick_cpu_dying(unsigned int dying_cpu)
if (tick_do_timer_cpu == dying_cpu)
tick_do_timer_cpu = cpumask_first(cpu_online_mask);

- tick_cancel_sched_timer(dying_cpu);
+ /* Make sure the CPU won't try to retake the timekeeping duty */
+ tick_sched_timer_dying(dying_cpu);

/* Remove CPU from timer broadcasting */
tick_offline_cpu(dying_cpu);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index dcb9f03..89d16b8 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -308,6 +308,14 @@ static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer)
return HRTIMER_RESTART;
}

+static void tick_sched_timer_cancel(struct tick_sched *ts)
+{
+ if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
+ hrtimer_cancel(&ts->sched_timer);
+ else if (tick_sched_flag_test(ts, TS_FLAG_NOHZ))
+ tick_program_event(KTIME_MAX, 1);
+}
+
#ifdef CONFIG_NO_HZ_FULL
cpumask_var_t tick_nohz_full_mask;
EXPORT_SYMBOL_GPL(tick_nohz_full_mask);
@@ -1040,10 +1048,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
* the tick timer.
*/
if (unlikely(expires == KTIME_MAX)) {
- if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
- hrtimer_cancel(&ts->sched_timer);
- else
- tick_program_event(KTIME_MAX, 1);
+ tick_sched_timer_cancel(ts);
return;
}

@@ -1598,14 +1603,27 @@ void tick_setup_sched_timer(bool hrtimer)
tick_nohz_activate(ts);
}

-void tick_cancel_sched_timer(int cpu)
+/*
+ * Shut down the tick and make sure the CPU won't try to retake the timekeeping
+ * duty before disabling IRQs in idle for the last time.
+ */
+void tick_sched_timer_dying(int cpu)
{
+ struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+ struct clock_event_device *dev = td->evtdev;
ktime_t idle_sleeptime, iowait_sleeptime;
unsigned long idle_calls, idle_sleeps;

- if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
- hrtimer_cancel(&ts->sched_timer);
+ /* This must happen before hrtimers are migrated! */
+ tick_sched_timer_cancel(ts);
+
+ /*
+ * If the clockevents doesn't support CLOCK_EVT_STATE_ONESHOT_STOPPED,
+ * make sure not to call low-res tick handler.
+ */
+ if (tick_sched_flag_test(ts, TS_FLAG_NOHZ))
+ dev->event_handler = clockevents_handle_noop;

idle_sleeptime = ts->idle_sleeptime;
iowait_sleeptime = ts->iowait_sleeptime;
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index bbe72a0..58d8d1c 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -106,9 +106,9 @@ extern struct tick_sched *tick_get_tick_sched(int cpu);

extern void tick_setup_sched_timer(bool hrtimer);
#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
-extern void tick_cancel_sched_timer(int cpu);
+extern void tick_sched_timer_dying(int cpu);
#else
-static inline void tick_cancel_sched_timer(int cpu) { }
+static inline void tick_sched_timer_dying(int cpu) { }
#endif

#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST

2024-02-26 22:25:15

by tip-bot2 for Tony Luck

[permalink] [raw]
Subject: [tip: timers/core] tick: Split nohz and highres features from nohz_mode

The following commit has been merged into the timers/core branch of tip:

Commit-ID: 7988e5ae2be70b110db9d4b8ec163bd42e67d3be
Gitweb: https://git.kernel.org/tip/7988e5ae2be70b110db9d4b8ec163bd42e67d3be
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Sun, 25 Feb 2024 23:55:05 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Feb 2024 11:37:32 +01:00

tick: Split nohz and highres features from nohz_mode

The nohz mode field tells about low resolution nohz mode or high
resolution nohz mode but it doesn't tell about high resolution non-nohz
mode.

In order to retrieve the latter state, tick_cancel_sched_timer() must
fiddle with struct hrtimer's internals to guess if the tick has been
initialized in high resolution.

Move instead the nohz mode field information into the tick flags and
provide two new bits: one to know if the tick is in nohz mode and
another one to know if the tick is in high resolution. The combination
of those two flags provides all the needed informations to determine
which of the three tick modes is running.

Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
kernel/time/hrtimer.c | 2 +-
kernel/time/tick-sched.c | 32 +++++++++++++++++---------------
kernel/time/tick-sched.h | 13 +++++--------
kernel/time/timer_list.c | 5 +++--
4 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 3e95474..70625df 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -747,7 +747,7 @@ static void hrtimer_switch_to_hres(void)
base->hres_active = 1;
hrtimer_resolution = HIGH_RES_NSEC;

- tick_setup_sched_timer(NOHZ_MODE_HIGHRES);
+ tick_setup_sched_timer(true);
/* "Retrigger" the interrupt to get things going */
retrigger_next_event(NULL);
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 4aa7ce0..dcb9f03 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -831,7 +831,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
/* Forward the time to expire in the future */
hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);

- if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+ if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) {
hrtimer_start_expires(&ts->sched_timer,
HRTIMER_MODE_ABS_PINNED_HARD);
} else {
@@ -1040,14 +1040,14 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
* the tick timer.
*/
if (unlikely(expires == KTIME_MAX)) {
- if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
+ if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
hrtimer_cancel(&ts->sched_timer);
else
tick_program_event(KTIME_MAX, 1);
return;
}

- if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+ if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES)) {
hrtimer_start(&ts->sched_timer, expires,
HRTIMER_MODE_ABS_PINNED_HARD);
} else {
@@ -1108,7 +1108,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (!tick_nohz_full_cpu(smp_processor_id()))
return;

- if (ts->nohz_mode == NOHZ_MODE_INACTIVE)
+ if (!tick_sched_flag_test(ts, TS_FLAG_NOHZ))
return;

__tick_nohz_full_update_tick(ts, ktime_get());
@@ -1168,7 +1168,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
return false;
}

- if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
+ if (unlikely(!tick_sched_flag_test(ts, TS_FLAG_NOHZ)))
return false;

if (need_resched())
@@ -1487,11 +1487,11 @@ static void tick_nohz_lowres_handler(struct clock_event_device *dev)
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
}

-static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
+static inline void tick_nohz_activate(struct tick_sched *ts)
{
if (!tick_nohz_enabled)
return;
- ts->nohz_mode = mode;
+ tick_sched_flag_set(ts, TS_FLAG_NOHZ);
/* One update is enough */
if (!test_and_set_bit(0, &tick_nohz_active))
timers_update_nohz();
@@ -1512,7 +1512,7 @@ static void tick_nohz_switch_to_nohz(void)
* Recycle the hrtimer in 'ts', so we can share the
* highres code.
*/
- tick_setup_sched_timer(NOHZ_MODE_LOWRES);
+ tick_setup_sched_timer(false);
}

static inline void tick_nohz_irq_enter(void)
@@ -1540,7 +1540,7 @@ static inline void tick_nohz_irq_enter(void)

static inline void tick_nohz_switch_to_nohz(void) { }
static inline void tick_nohz_irq_enter(void) { }
-static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
+static inline void tick_nohz_activate(struct tick_sched *ts) { }

#endif /* CONFIG_NO_HZ_COMMON */

@@ -1567,15 +1567,17 @@ early_param("skew_tick", skew_tick);
* tick_setup_sched_timer - setup the tick emulation timer
* @mode: tick_nohz_mode to setup for
*/
-void tick_setup_sched_timer(int mode)
+void tick_setup_sched_timer(bool hrtimer)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

/* Emulate tick processing via per-CPU hrtimers: */
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);

- if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && mode == NOHZ_MODE_HIGHRES)
+ if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && hrtimer) {
+ tick_sched_flag_set(ts, TS_FLAG_HIGHRES);
ts->sched_timer.function = tick_nohz_handler;
+ }

/* Get the next period (per-CPU) */
hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
@@ -1589,11 +1591,11 @@ void tick_setup_sched_timer(int mode)
}

hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
- if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && mode == NOHZ_MODE_HIGHRES)
+ if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && hrtimer)
hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
else
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
- tick_nohz_activate(ts, mode);
+ tick_nohz_activate(ts);
}

void tick_cancel_sched_timer(int cpu)
@@ -1602,7 +1604,7 @@ void tick_cancel_sched_timer(int cpu)
ktime_t idle_sleeptime, iowait_sleeptime;
unsigned long idle_calls, idle_sleeps;

- if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && ts->sched_timer.base)
+ if (tick_sched_flag_test(ts, TS_FLAG_HIGHRES))
hrtimer_cancel(&ts->sched_timer);

idle_sleeptime = ts->idle_sleeptime;
@@ -1652,7 +1654,7 @@ int tick_check_oneshot_change(int allow_nohz)
if (!test_and_clear_bit(0, &ts->check_clocks))
return 0;

- if (ts->nohz_mode != NOHZ_MODE_INACTIVE)
+ if (tick_sched_flag_test(ts, TS_FLAG_NOHZ))
return 0;

if (!timekeeping_valid_for_hres() || !tick_is_oneshot_available())
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 07a4c01..bbe72a0 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -14,12 +14,6 @@ struct tick_device {
enum tick_device_mode mode;
};

-enum tick_nohz_mode {
- NOHZ_MODE_INACTIVE,
- NOHZ_MODE_LOWRES,
- NOHZ_MODE_HIGHRES,
-};
-
/* The CPU is in the tick idle mode */
#define TS_FLAG_INIDLE BIT(0)
/* The idle tick has been stopped */
@@ -31,6 +25,10 @@ enum tick_nohz_mode {
#define TS_FLAG_IDLE_ACTIVE BIT(2)
/* CPU was the last one doing do_timer before going idle */
#define TS_FLAG_DO_TIMER_LAST BIT(3)
+/* NO_HZ is enabled */
+#define TS_FLAG_NOHZ BIT(4)
+/* High resolution tick mode */
+#define TS_FLAG_HIGHRES BIT(5)

/**
* struct tick_sched - sched tick emulation and no idle tick control/stats
@@ -84,7 +82,6 @@ struct tick_sched {
ktime_t idle_entrytime;

/* Tick stop */
- enum tick_nohz_mode nohz_mode;
unsigned long last_jiffies;
u64 timer_expires_base;
u64 timer_expires;
@@ -107,7 +104,7 @@ struct tick_sched {

extern struct tick_sched *tick_get_tick_sched(int cpu);

-extern void tick_setup_sched_timer(int mode);
+extern void tick_setup_sched_timer(bool hrtimer);
#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
extern void tick_cancel_sched_timer(int cpu);
#else
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 38f81d8..1c311c4 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -152,7 +152,8 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)

{
struct tick_sched *ts = tick_get_tick_sched(cpu);
- P(nohz_mode);
+ P_flag(nohz, TS_FLAG_NOHZ);
+ P_flag(highres, TS_FLAG_HIGHRES);
P_ns(last_tick);
P_flag(tick_stopped, TS_FLAG_STOPPED);
P(idle_jiffies);
@@ -259,7 +260,7 @@ static void timer_list_show_tickdevices_header(struct seq_file *m)

static inline void timer_list_header(struct seq_file *m, u64 now)
{
- SEQ_printf(m, "Timer List Version: v0.9\n");
+ SEQ_printf(m, "Timer List Version: v0.10\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
SEQ_printf(m, "\n");

2024-02-26 22:25:25

by tip-bot2 for Tony Luck

[permalink] [raw]
Subject: [tip: timers/core] tick: Move individual bit features to debuggable mask accesses

The following commit has been merged into the timers/core branch of tip:

Commit-ID: a478ffb2ae234ee1ece2b84719762c54d304e2c7
Gitweb: https://git.kernel.org/tip/a478ffb2ae234ee1ece2b84719762c54d304e2c7
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Sun, 25 Feb 2024 23:55:04 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Feb 2024 11:37:32 +01:00

tick: Move individual bit features to debuggable mask accesses

The individual bitfields of struct tick_sched must be modified from
IRQs disabled places, otherwise local modifications can race due to them
sharing the same memory storage.

The recent move of the "got_idle_tick" bitfield to its own storage shows
that the use of these bitfields, as pretty as they look, can be as much
error prone.

In order to avoid future issues of the like and make sure that those
bitfields are safely accessed, move those flags to an explicit mask
along with a mutator function performing the basic IRQs disabled sanity
check.

Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
kernel/time/tick-sched.c | 88 ++++++++++++++++++++++++---------------
kernel/time/tick-sched.h | 23 ++++++----
kernel/time/timer_list.c | 5 +-
3 files changed, 73 insertions(+), 43 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b79f540..4aa7ce0 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -180,6 +180,26 @@ static ktime_t tick_init_jiffy_update(void)
return period;
}

+static inline int tick_sched_flag_test(struct tick_sched *ts,
+ unsigned long flag)
+{
+ return !!(ts->flags & flag);
+}
+
+static inline void tick_sched_flag_set(struct tick_sched *ts,
+ unsigned long flag)
+{
+ lockdep_assert_irqs_disabled();
+ ts->flags |= flag;
+}
+
+static inline void tick_sched_flag_clear(struct tick_sched *ts,
+ unsigned long flag)
+{
+ lockdep_assert_irqs_disabled();
+ ts->flags &= ~flag;
+}
+
#define MAX_STALLED_JIFFIES 5

static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
@@ -223,7 +243,7 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
}
}

- if (ts->inidle)
+ if (tick_sched_flag_test(ts, TS_FLAG_INIDLE))
ts->got_idle_tick = 1;
}

@@ -237,7 +257,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
* idle" jiffy stamp so the idle accounting adjustment we do
* when we go busy again does not account too many ticks.
*/
- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && ts->tick_stopped) {
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
+ tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
touch_softlockup_watchdog_sched();
if (is_idle_task(current))
ts->idle_jiffies++;
@@ -279,7 +300,7 @@ static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer)
* - to the idle task if in dynticks-idle
* - to IRQ exit if in full-dynticks.
*/
- if (unlikely(ts->tick_stopped))
+ if (unlikely(tick_sched_flag_test(ts, TS_FLAG_STOPPED)))
return HRTIMER_NORESTART;

hrtimer_forward(timer, now, TICK_NSEC);
@@ -559,7 +580,7 @@ void __tick_nohz_task_switch(void)

ts = this_cpu_ptr(&tick_cpu_sched);

- if (ts->tick_stopped) {
+ if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
if (atomic_read(&current->tick_dep_mask) ||
atomic_read(&current->signal->tick_dep_mask))
tick_nohz_full_kick();
@@ -656,14 +677,14 @@ bool tick_nohz_tick_stopped(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

- return ts->tick_stopped;
+ return tick_sched_flag_test(ts, TS_FLAG_STOPPED);
}

bool tick_nohz_tick_stopped_cpu(int cpu)
{
struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);

- return ts->tick_stopped;
+ return tick_sched_flag_test(ts, TS_FLAG_STOPPED);
}

/**
@@ -693,7 +714,7 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
{
ktime_t delta;

- if (WARN_ON_ONCE(!ts->idle_active))
+ if (WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE)))
return;

delta = ktime_sub(now, ts->idle_entrytime);
@@ -705,7 +726,7 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);

ts->idle_entrytime = now;
- ts->idle_active = 0;
+ tick_sched_flag_clear(ts, TS_FLAG_IDLE_ACTIVE);
write_seqcount_end(&ts->idle_sleeptime_seq);

sched_clock_idle_wakeup_event();
@@ -715,7 +736,7 @@ static void tick_nohz_start_idle(struct tick_sched *ts)
{
write_seqcount_begin(&ts->idle_sleeptime_seq);
ts->idle_entrytime = ktime_get();
- ts->idle_active = 1;
+ tick_sched_flag_set(ts, TS_FLAG_IDLE_ACTIVE);
write_seqcount_end(&ts->idle_sleeptime_seq);

sched_clock_idle_sleep_event();
@@ -737,7 +758,7 @@ static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime,
do {
seq = read_seqcount_begin(&ts->idle_sleeptime_seq);

- if (ts->idle_active && compute_delta) {
+ if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE) && compute_delta) {
ktime_t delta = ktime_sub(now, ts->idle_entrytime);

idle = ktime_add(*sleeptime, delta);
@@ -905,7 +926,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
* We've not stopped the tick yet, and there's a timer in the
* next period, so no point in stopping it either, bail.
*/
- if (!ts->tick_stopped) {
+ if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
ts->timer_expires = 0;
goto out;
}
@@ -918,7 +939,8 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
*/
delta = timekeeping_max_deferment();
if (cpu != tick_do_timer_cpu &&
- (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
+ (tick_do_timer_cpu != TICK_DO_TIMER_NONE ||
+ !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
delta = KTIME_MAX;

/* Calculate the next expiry time */
@@ -938,7 +960,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
unsigned long basejiff = ts->last_jiffies;
u64 basemono = ts->timer_expires_base;
- bool timer_idle = ts->tick_stopped;
+ bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
u64 expires;

/* Make sure we won't be trying to stop it twice in a row. */
@@ -978,13 +1000,13 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
*/
if (cpu == tick_do_timer_cpu) {
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
- ts->do_timer_last = 1;
+ tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST);
} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
- ts->do_timer_last = 0;
+ tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST);
}

/* Skip reprogram of event if it's not changed */
- if (ts->tick_stopped && (expires == ts->next_tick)) {
+ if (tick_sched_flag_test(ts, TS_FLAG_STOPPED) && (expires == ts->next_tick)) {
/* Sanity check: make sure clockevent is actually programmed */
if (expires == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
return;
@@ -1002,12 +1024,12 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
* call we save the current tick time, so we can restart the
* scheduler tick in tick_nohz_restart_sched_tick().
*/
- if (!ts->tick_stopped) {
+ if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
calc_load_nohz_start();
quiet_vmstat();

ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
- ts->tick_stopped = 1;
+ tick_sched_flag_set(ts, TS_FLAG_STOPPED);
trace_tick_stop(1, TICK_DEP_MASK_NONE);
}

@@ -1064,7 +1086,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
touch_softlockup_watchdog_sched();

/* Cancel the scheduled timer and restore the tick: */
- ts->tick_stopped = 0;
+ tick_sched_flag_clear(ts, TS_FLAG_STOPPED);
tick_nohz_restart(ts, now);
}

@@ -1076,7 +1098,7 @@ static void __tick_nohz_full_update_tick(struct tick_sched *ts,

if (can_stop_full_tick(cpu, ts))
tick_nohz_full_stop_tick(ts, cpu);
- else if (ts->tick_stopped)
+ else if (tick_sched_flag_test(ts, TS_FLAG_STOPPED))
tick_nohz_restart_sched_tick(ts, now);
#endif
}
@@ -1196,14 +1218,14 @@ void tick_nohz_idle_stop_tick(void)
ts->idle_calls++;

if (expires > 0LL) {
- int was_stopped = ts->tick_stopped;
+ int was_stopped = tick_sched_flag_test(ts, TS_FLAG_STOPPED);

tick_nohz_stop_tick(ts, cpu);

ts->idle_sleeps++;
ts->idle_expires = expires;

- if (!was_stopped && ts->tick_stopped) {
+ if (!was_stopped && tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
ts->idle_jiffies = ts->last_jiffies;
nohz_balance_enter_idle(cpu);
}
@@ -1234,7 +1256,7 @@ void tick_nohz_idle_enter(void)

WARN_ON_ONCE(ts->timer_expires_base);

- ts->inidle = 1;
+ tick_sched_flag_set(ts, TS_FLAG_INIDLE);
tick_nohz_start_idle(ts);

local_irq_enable();
@@ -1263,7 +1285,7 @@ void tick_nohz_irq_exit(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

- if (ts->inidle)
+ if (tick_sched_flag_test(ts, TS_FLAG_INIDLE))
tick_nohz_start_idle(ts);
else
tick_nohz_full_update_tick(ts);
@@ -1317,7 +1339,7 @@ ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
ktime_t now = ts->idle_entrytime;
ktime_t next_event;

- WARN_ON_ONCE(!ts->inidle);
+ WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_INIDLE));

*delta_next = ktime_sub(dev->next_event, now);

@@ -1389,7 +1411,7 @@ void tick_nohz_idle_restart_tick(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);

- if (ts->tick_stopped) {
+ if (tick_sched_flag_test(ts, TS_FLAG_STOPPED)) {
ktime_t now = ktime_get();
tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_time(ts, now);
@@ -1430,12 +1452,12 @@ void tick_nohz_idle_exit(void)

local_irq_disable();

- WARN_ON_ONCE(!ts->inidle);
+ WARN_ON_ONCE(!tick_sched_flag_test(ts, TS_FLAG_INIDLE));
WARN_ON_ONCE(ts->timer_expires_base);

- ts->inidle = 0;
- idle_active = ts->idle_active;
- tick_stopped = ts->tick_stopped;
+ tick_sched_flag_clear(ts, TS_FLAG_INIDLE);
+ idle_active = tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE);
+ tick_stopped = tick_sched_flag_test(ts, TS_FLAG_STOPPED);

if (idle_active || tick_stopped)
now = ktime_get();
@@ -1498,10 +1520,10 @@ static inline void tick_nohz_irq_enter(void)
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
ktime_t now;

- if (!ts->idle_active && !ts->tick_stopped)
+ if (!tick_sched_flag_test(ts, TS_FLAG_STOPPED | TS_FLAG_IDLE_ACTIVE))
return;
now = ktime_get();
- if (ts->idle_active)
+ if (tick_sched_flag_test(ts, TS_FLAG_IDLE_ACTIVE))
tick_nohz_stop_idle(ts, now);
/*
* If all CPUs are idle we may need to update a stale jiffies value.
@@ -1510,7 +1532,7 @@ static inline void tick_nohz_irq_enter(void)
* rare case (typically stop machine). So we must make sure we have a
* last resort.
*/
- if (ts->tick_stopped)
+ if (tick_sched_flag_test(ts, TS_FLAG_STOPPED))
tick_nohz_update_jiffies(now);
}

diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 3b555e0..07a4c01 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -20,14 +20,22 @@ enum tick_nohz_mode {
NOHZ_MODE_HIGHRES,
};

+/* The CPU is in the tick idle mode */
+#define TS_FLAG_INIDLE BIT(0)
+/* The idle tick has been stopped */
+#define TS_FLAG_STOPPED BIT(1)
+/*
+ * Indicator that the CPU is actively in the tick idle mode;
+ * it is reset during irq handling phases.
+ */
+#define TS_FLAG_IDLE_ACTIVE BIT(2)
+/* CPU was the last one doing do_timer before going idle */
+#define TS_FLAG_DO_TIMER_LAST BIT(3)
+
/**
* struct tick_sched - sched tick emulation and no idle tick control/stats
*
- * @inidle: Indicator that the CPU is in the tick idle mode
- * @tick_stopped: Indicator that the idle tick has been stopped
- * @idle_active: Indicator that the CPU is actively in the tick idle mode;
- * it is reset during irq handling phases.
- * @do_timer_last: CPU was the last one doing do_timer before going idle
+ * @flags: State flags gathering the TS_FLAG_* features
* @got_idle_tick: Tick timer function has run with @inidle set
* @stalled_jiffies: Number of stalled jiffies detected across ticks
* @last_tick_jiffies: Value of jiffies seen on last tick
@@ -57,10 +65,7 @@ enum tick_nohz_mode {
*/
struct tick_sched {
/* Common flags */
- unsigned int inidle : 1;
- unsigned int tick_stopped : 1;
- unsigned int idle_active : 1;
- unsigned int do_timer_last : 1;
+ unsigned long flags;

/* Tick handling: jiffies stall check */
unsigned int stalled_jiffies;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index ed7d6ad..38f81d8 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -147,11 +147,14 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
# define P_ns(x) \
SEQ_printf(m, " .%-15s: %Lu nsecs\n", #x, \
(unsigned long long)(ktime_to_ns(ts->x)))
+# define P_flag(x, f) \
+ SEQ_printf(m, " .%-15s: %d\n", #x, !!(ts->flags & (f)))
+
{
struct tick_sched *ts = tick_get_tick_sched(cpu);
P(nohz_mode);
P_ns(last_tick);
- P(tick_stopped);
+ P_flag(tick_stopped, TS_FLAG_STOPPED);
P(idle_jiffies);
P(idle_calls);
P(idle_sleeps);

2024-02-26 22:25:50

by tip-bot2 for Tony Luck

[permalink] [raw]
Subject: [tip: timers/core] tick: Assume the tick can't be stopped in NOHZ_MODE_INACTIVE mode

The following commit has been merged into the timers/core branch of tip:

Commit-ID: d9b1865c86aec7c515db5718e4820106c2c12db3
Gitweb: https://git.kernel.org/tip/d9b1865c86aec7c515db5718e4820106c2c12db3
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Sun, 25 Feb 2024 23:55:02 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Feb 2024 11:37:32 +01:00

tick: Assume the tick can't be stopped in NOHZ_MODE_INACTIVE mode

The full-nohz update function checks if the nohz mode is active before
proceeding. It considers one exception though: if the tick is already
stopped even though the nohz mode is inactive, it still moves on in
order to update/restart the tick if needed.

However in order for the tick to be stopped, the nohz_mode has to be
either NOHZ_MODE_LOWRES or NOHZ_MODE_HIGHRES. Therefore it doesn't make
sense to test if the tick is stopped before verifying NOHZ_MODE_INACTIVE
mode.

Remove the needless related condition.

Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
kernel/time/tick-sched.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b17895d..b79f540 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1086,7 +1086,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (!tick_nohz_full_cpu(smp_processor_id()))
return;

- if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
+ if (ts->nohz_mode == NOHZ_MODE_INACTIVE)
return;

__tick_nohz_full_update_tick(ts, ktime_get());

2024-02-26 22:26:01

by tip-bot2 for Tony Luck

[permalink] [raw]
Subject: [tip: timers/core] tick: Move tick cancellation up to CPUHP_AP_TICK_DYING

The following commit has been merged into the timers/core branch of tip:

Commit-ID: f04e51220ad5cf35540f67f3ca15c8617c1f0bef
Gitweb: https://git.kernel.org/tip/f04e51220ad5cf35540f67f3ca15c8617c1f0bef
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Sun, 25 Feb 2024 23:55:00 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Feb 2024 11:37:31 +01:00

tick: Move tick cancellation up to CPUHP_AP_TICK_DYING

The tick hrtimer is cancelled right before hrtimers are migrated. This
is done from the hrtimer subsystem even though it shouldn't know about
its actual users.

Move instead the tick hrtimer cancellation to the relevant CPU hotplug
state that aims at centralizing high level tick shutdown operations so
that the related flow is easy to follow.

Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
kernel/time/hrtimer.c | 2 --
kernel/time/tick-common.c | 2 ++
2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 95f1f35..3e95474 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -2229,8 +2229,6 @@ int hrtimers_cpu_dying(unsigned int dying_cpu)
int i, ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER));
struct hrtimer_cpu_base *old_base, *new_base;

- tick_cancel_sched_timer(dying_cpu);
-
old_base = this_cpu_ptr(&hrtimer_bases);
new_base = &per_cpu(hrtimer_bases, ncpu);

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index a89ef45..b4af8c7 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -410,6 +410,8 @@ int tick_cpu_dying(unsigned int dying_cpu)
if (tick_do_timer_cpu == dying_cpu)
tick_do_timer_cpu = cpumask_first(cpu_online_mask);

+ tick_cancel_sched_timer(dying_cpu);
+
return 0;
}


2024-02-26 22:26:03

by tip-bot2 for Tony Luck

[permalink] [raw]
Subject: [tip: timers/core] tick: Move broadcast cancellation up to CPUHP_AP_TICK_DYING

The following commit has been merged into the timers/core branch of tip:

Commit-ID: ef8969bb552c1c75e997a42d3e2c576b6ed4025a
Gitweb: https://git.kernel.org/tip/ef8969bb552c1c75e997a42d3e2c576b6ed4025a
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Sun, 25 Feb 2024 23:55:01 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Feb 2024 11:37:32 +01:00

tick: Move broadcast cancellation up to CPUHP_AP_TICK_DYING

The broadcast shutdown code is executed through a random explicit call
within stop machine from the outgoing CPU.

However the tick broadcast is a midware between the tick callback and
the clocksource, therefore it makes more sense to shut it down after the
tick callback and before the clocksource drivers.

Move it instead to the common tick shutdown CPU hotplug state where
related operations can be ordered from highest to lowest level.

Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
include/linux/tick.h | 6 ------
kernel/cpu.c | 2 --
kernel/time/tick-common.c | 3 +++
kernel/time/tick-internal.h | 2 ++
4 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index afff4c2..c7840ae 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -73,12 +73,6 @@ extern void tick_broadcast_control(enum tick_broadcast_mode mode);
static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { }
#endif /* BROADCAST */

-#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU)
-extern void tick_offline_cpu(unsigned int cpu);
-#else
-static inline void tick_offline_cpu(unsigned int cpu) { }
-#endif
-
#ifdef CONFIG_GENERIC_CLOCKEVENTS
extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state);
#else
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 2635080..5a8ad4f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1324,8 +1324,6 @@ static int take_cpu_down(void *_param)
*/
cpuhp_invoke_callback_range_nofail(false, cpu, st, target);

- /* Remove CPU from timer broadcasting */
- tick_offline_cpu(cpu);
/* Park the stopper thread */
stop_machine_park(cpu);
return 0;
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index b4af8c7..5224140 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -412,6 +412,9 @@ int tick_cpu_dying(unsigned int dying_cpu)

tick_cancel_sched_timer(dying_cpu);

+ /* Remove CPU from timer broadcasting */
+ tick_offline_cpu(dying_cpu);
+
return 0;
}

diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index a3243c4..5f2105e 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -142,8 +142,10 @@ static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_
#endif /* !(BROADCAST && ONESHOT) */

#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU)
+extern void tick_offline_cpu(unsigned int cpu);
extern void tick_broadcast_offline(unsigned int cpu);
#else
+static inline void tick_offline_cpu(unsigned int cpu) { }
static inline void tick_broadcast_offline(unsigned int cpu) { }
#endif


2024-02-26 22:26:28

by tip-bot2 for Tony Luck

[permalink] [raw]
Subject: [tip: timers/core] tick: Start centralizing tick related CPU hotplug operations

The following commit has been merged into the timers/core branch of tip:

Commit-ID: 3ad6eb0683a1edbb4bb117b85d61f17a879155a1
Gitweb: https://git.kernel.org/tip/3ad6eb0683a1edbb4bb117b85d61f17a879155a1
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Sun, 25 Feb 2024 23:54:59 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Feb 2024 11:37:31 +01:00

tick: Start centralizing tick related CPU hotplug operations

During the CPU offlining process, the various timer tick features are
shut down from scattered places, sometimes from teardown callbacks on
stop machine, sometimes through explicit calls, sometimes from the
control CPU after the CPU died. The reason why these shutdown operations
are spread around is not always clear and it makes the tick lifecycle
hard to follow.

The tick should be shut down in order from highest to lowest level:

On stop machine from the dying CPU (high-level):

1) Hand-over the timekeeping duty (tick_handover_do_timer())
2) Cancel the tick implementation called by the clockevent callback
(tick_cancel_sched_timer())
3) Shutdown broadcasting (tick_offline_cpu() / tick_broadcast_offline())

On stop machine from the dying CPU (low-level):

4) Shutdown clockevents drivers (CPUHP_AP_*_TIMER_STARTING states)

>From the control CPU after the CPU died (low-level):

5) Shutdown/unregister/cleanup clockevents for the dead CPU
(tick_cleanup_dead_cpu())

Instead the current order is 2, 4 (both from CPU hotplug states), then
1 and 3 through direct calls. This layout and order don't make much
sense. The operations 1, 2, 3 should be gathered together and in order.

Sort this situation with creating a new TICK shut-down CPU hotplug state
and start with introducing the timekeeping duty hand-over there. The
state must precede hrtimers migration because the tick hrtimer will be
stopped from it in a further patch.

Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
include/linux/cpuhotplug.h | 1 +
include/linux/tick.h | 8 ++++++--
kernel/cpu.c | 8 +++++---
kernel/time/tick-common.c | 17 +++++++++++------
4 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 7651904..35e78dd 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -184,6 +184,7 @@ enum cpuhp_state {
CPUHP_AP_ARM64_ISNDEP_STARTING,
CPUHP_AP_SMPCFD_DYING,
CPUHP_AP_HRTIMERS_DYING,
+ CPUHP_AP_TICK_DYING,
CPUHP_AP_X86_TBOOT_DYING,
CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
CPUHP_AP_ONLINE,
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 716d17f..afff4c2 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -19,16 +19,20 @@ extern void __init tick_init(void);
extern void tick_suspend_local(void);
/* Should be core only, but XEN resume magic and ARM BL switcher require it */
extern void tick_resume_local(void);
-extern void tick_handover_do_timer(void);
extern void tick_cleanup_dead_cpu(int cpu);
#else /* CONFIG_GENERIC_CLOCKEVENTS */
static inline void tick_init(void) { }
static inline void tick_suspend_local(void) { }
static inline void tick_resume_local(void) { }
-static inline void tick_handover_do_timer(void) { }
static inline void tick_cleanup_dead_cpu(int cpu) { }
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */

+#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU)
+extern int tick_cpu_dying(unsigned int cpu);
+#else
+#define tick_cpu_dying NULL
+#endif
+
#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND)
extern void tick_freeze(void);
extern void tick_unfreeze(void);
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e6ec3ba..2635080 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1324,8 +1324,6 @@ static int take_cpu_down(void *_param)
*/
cpuhp_invoke_callback_range_nofail(false, cpu, st, target);

- /* Give up timekeeping duties */
- tick_handover_do_timer();
/* Remove CPU from timer broadcasting */
tick_offline_cpu(cpu);
/* Park the stopper thread */
@@ -2205,7 +2203,11 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.startup.single = NULL,
.teardown.single = hrtimers_cpu_dying,
},
-
+ [CPUHP_AP_TICK_DYING] = {
+ .name = "tick:dying",
+ .startup.single = NULL,
+ .teardown.single = tick_cpu_dying,
+ },
/* Entry state on starting. Interrupts enabled from here on. Transient
* state for synchronsization */
[CPUHP_AP_ONLINE] = {
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 0084e1a..a89ef45 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -397,15 +397,20 @@ EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);

#ifdef CONFIG_HOTPLUG_CPU
/*
- * Transfer the do_timer job away from a dying cpu.
- *
- * Called with interrupts disabled. No locking required. If
- * tick_do_timer_cpu is owned by this cpu, nothing can change it.
+ * Stop the tick and transfer the timekeeping job away from a dying cpu.
*/
-void tick_handover_do_timer(void)
+int tick_cpu_dying(unsigned int dying_cpu)
{
- if (tick_do_timer_cpu == smp_processor_id())
+ /*
+ * If the current CPU is the timekeeper, it's the only one that
+ * can safely hand over its duty. Also all online CPUs are in
+ * stop machine, guaranteed not to be idle, therefore it's safe
+ * to pick any online successor.
+ */
+ if (tick_do_timer_cpu == dying_cpu)
tick_do_timer_cpu = cpumask_first(cpu_online_mask);
+
+ return 0;
}

/*

2024-02-26 22:26:34

by tip-bot2 for Tony Luck

[permalink] [raw]
Subject: [tip: timers/core] tick/sched: Don't clear ts::next_tick again in can_stop_idle_tick()

The following commit has been merged into the timers/core branch of tip:

Commit-ID: 60313c21c33abc08108bdd60390fa89563977e64
Gitweb: https://git.kernel.org/tip/60313c21c33abc08108bdd60390fa89563977e64
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Sun, 25 Feb 2024 23:54:58 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Feb 2024 11:37:31 +01:00

tick/sched: Don't clear ts::next_tick again in can_stop_idle_tick()

The tick sched structure is already cleared from tick_cancel_sched_timer(),
so there is no need to clear that field again.

Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
kernel/time/tick-sched.c | 5 -----
1 file changed, 5 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 9f75f56..b17895d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1143,11 +1143,6 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
if (unlikely(!cpu_online(cpu))) {
if (cpu == tick_do_timer_cpu)
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
- /*
- * Make sure the CPU doesn't get fooled by obsolete tick
- * deadline if it comes back online later.
- */
- ts->next_tick = 0;
return false;
}


2024-02-26 22:26:50

by tip-bot2 for Tony Luck

[permalink] [raw]
Subject: [tip: timers/core] tick/sched: Rename tick_nohz_stop_sched_tick() to tick_nohz_full_stop_tick()

The following commit has been merged into the timers/core branch of tip:

Commit-ID: 3650f49bfb954119a33613672abe4ca3bbbf6243
Gitweb: https://git.kernel.org/tip/3650f49bfb954119a33613672abe4ca3bbbf6243
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Sun, 25 Feb 2024 23:54:57 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Feb 2024 11:37:31 +01:00

tick/sched: Rename tick_nohz_stop_sched_tick() to tick_nohz_full_stop_tick()

tick_nohz_stop_sched_tick() is only about NOHZ_full and not about
dynticks-idle. Reflect that in the function name to avoid confusion.

Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
kernel/time/tick-sched.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 4e34967..9f75f56 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1040,7 +1040,7 @@ static void tick_nohz_retain_tick(struct tick_sched *ts)
}

#ifdef CONFIG_NO_HZ_FULL
-static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
+static void tick_nohz_full_stop_tick(struct tick_sched *ts, int cpu)
{
if (tick_nohz_next_event(ts, cpu))
tick_nohz_stop_tick(ts, cpu);
@@ -1075,7 +1075,7 @@ static void __tick_nohz_full_update_tick(struct tick_sched *ts,
int cpu = smp_processor_id();

if (can_stop_full_tick(cpu, ts))
- tick_nohz_stop_sched_tick(ts, cpu);
+ tick_nohz_full_stop_tick(ts, cpu);
else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, now);
#endif

2024-02-26 22:27:02

by tip-bot2 for Tony Luck

[permalink] [raw]
Subject: [tip: timers/core] tick: Use IS_ENABLED() whenever possible

The following commit has been merged into the timers/core branch of tip:

Commit-ID: 27dc08096ce498ec8b87fb12ce4b9932c8111898
Gitweb: https://git.kernel.org/tip/27dc08096ce498ec8b87fb12ce4b9932c8111898
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Sun, 25 Feb 2024 23:54:56 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Feb 2024 11:37:31 +01:00

tick: Use IS_ENABLED() whenever possible

Avoid ifdeferry if it can be converted to IS_ENABLED() whenever possible

Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
kernel/time/tick-common.c | 4 +---
kernel/time/tick-sched.c | 14 +++++---------
2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index e9138cd..0084e1a 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -111,15 +111,13 @@ void tick_handle_periodic(struct clock_event_device *dev)

tick_periodic(cpu);

-#if defined(CONFIG_HIGH_RES_TIMERS) || defined(CONFIG_NO_HZ_COMMON)
/*
* The cpu might have transitioned to HIGHRES or NOHZ mode via
* update_process_times() -> run_local_timers() ->
* hrtimer_run_queues().
*/
- if (dev->event_handler != tick_handle_periodic)
+ if (IS_ENABLED(CONFIG_TICK_ONESHOT) && dev->event_handler != tick_handle_periodic)
return;
-#endif

if (!clockevent_state_oneshot(dev))
return;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 27aaecb..4e34967 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -186,7 +186,6 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
int cpu = smp_processor_id();

-#ifdef CONFIG_NO_HZ_COMMON
/*
* Check if the do_timer duty was dropped. We don't care about
* concurrency: This happens only when the CPU in charge went
@@ -197,13 +196,13 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
* If nohz_full is enabled, this should not happen because the
* 'tick_do_timer_cpu' CPU never relinquishes.
*/
- if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
+ unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
#ifdef CONFIG_NO_HZ_FULL
WARN_ON_ONCE(tick_nohz_full_running);
#endif
tick_do_timer_cpu = cpu;
}
-#endif

/* Check if jiffies need an update */
if (tick_do_timer_cpu == cpu)
@@ -230,7 +229,6 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)

static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
{
-#ifdef CONFIG_NO_HZ_COMMON
/*
* When we are idle and the tick is stopped, we have to touch
* the watchdog as we might not schedule for a really long
@@ -239,7 +237,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
* idle" jiffy stamp so the idle accounting adjustment we do
* when we go busy again does not account too many ticks.
*/
- if (ts->tick_stopped) {
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && ts->tick_stopped) {
touch_softlockup_watchdog_sched();
if (is_idle_task(current))
ts->idle_jiffies++;
@@ -250,7 +248,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
*/
ts->next_tick = 0;
}
-#endif
+
update_process_times(user_mode(regs));
profile_tick(CPU_PROFILING);
}
@@ -1587,10 +1585,8 @@ void tick_cancel_sched_timer(int cpu)
ktime_t idle_sleeptime, iowait_sleeptime;
unsigned long idle_calls, idle_sleeps;

-# ifdef CONFIG_HIGH_RES_TIMERS
- if (ts->sched_timer.base)
+ if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && ts->sched_timer.base)
hrtimer_cancel(&ts->sched_timer);
-# endif

idle_sleeptime = ts->idle_sleeptime;
iowait_sleeptime = ts->iowait_sleeptime;

2024-02-26 22:27:08

by tip-bot2 for Tony Luck

[permalink] [raw]
Subject: [tip: timers/core] tick/sched: Remove useless oneshot ifdeffery

The following commit has been merged into the timers/core branch of tip:

Commit-ID: 3aedb7fcd88af25c215be098bf8ecb9ae8cb60ab
Gitweb: https://git.kernel.org/tip/3aedb7fcd88af25c215be098bf8ecb9ae8cb60ab
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Sun, 25 Feb 2024 23:54:55 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Feb 2024 11:37:31 +01:00

tick/sched: Remove useless oneshot ifdeffery

tick-sched.c is only built when CONFIG_TICK_ONESHOT=y, which is selected
only if CONFIG_NO_HZ_COMMON=y or CONFIG_HIGH_RES_TIMERS=y. Therefore
the related ifdeferry in this file is needless and can be removed.

Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
kernel/time/tick-sched.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 88c992f..27aaecb 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -43,7 +43,6 @@ struct tick_sched *tick_get_tick_sched(int cpu)
return &per_cpu(tick_cpu_sched, cpu);
}

-#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
/*
* The time when the last jiffy update happened. Write access must hold
* jiffies_lock and jiffies_seq. tick_nohz_next_event() needs to get a
@@ -289,7 +288,6 @@ static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer)

return HRTIMER_RESTART;
}
-#endif

#ifdef CONFIG_NO_HZ_FULL
cpumask_var_t tick_nohz_full_mask;
@@ -635,7 +633,7 @@ void __init tick_nohz_init(void)
pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
cpumask_pr_args(tick_nohz_full_mask));
}
-#endif
+#endif /* #ifdef CONFIG_NO_HZ_FULL */

/*
* NOHZ - aka dynamic tick functionality
@@ -1540,7 +1538,6 @@ void tick_irq_enter(void)
tick_nohz_irq_enter();
}

-#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
static int sched_skew_tick;

static int __init skew_tick(char *str)
@@ -1605,7 +1602,6 @@ void tick_cancel_sched_timer(int cpu)
ts->idle_calls = idle_calls;
ts->idle_sleeps = idle_sleeps;
}
-#endif /* CONFIG_NO_HZ_COMMON || CONFIG_HIGH_RES_TIMERS */

/*
* Async notification about clocksource changes

2024-02-26 22:27:34

by tip-bot2 for Tony Luck

[permalink] [raw]
Subject: [tip: timers/core] tick/nohz: Remove duplicate between tick_nohz_switch_to_nohz() and tick_setup_sched_timer()

The following commit has been merged into the timers/core branch of tip:

Commit-ID: ffb7e01c4e654d5c8bf2ce2a4830b826fa1f149e
Gitweb: https://git.kernel.org/tip/ffb7e01c4e654d5c8bf2ce2a4830b826fa1f149e
Author: Peng Liu <[email protected]>
AuthorDate: Sun, 25 Feb 2024 23:54:53 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Feb 2024 11:37:31 +01:00

tick/nohz: Remove duplicate between tick_nohz_switch_to_nohz() and tick_setup_sched_timer()

The ts->sched_timer initialization work of tick_nohz_switch_to_nohz()
is almost the same as that of tick_setup_sched_timer(), so adjust the
latter to get it reused by tick_nohz_switch_to_nohz().

This also makes the low resolution mode sched_timer benefit from the tick
skew boot option.

Signed-off-by: Peng Liu <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
kernel/time/hrtimer.c | 2 +-
kernel/time/tick-sched.c | 39 ++++++++++++++++++---------------------
kernel/time/tick-sched.h | 2 +-
3 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 1fd106a..95f1f35 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -747,7 +747,7 @@ static void hrtimer_switch_to_hres(void)
base->hres_active = 1;
hrtimer_resolution = HIGH_RES_NSEC;

- tick_setup_sched_timer();
+ tick_setup_sched_timer(NOHZ_MODE_HIGHRES);
/* "Retrigger" the interrupt to get things going */
retrigger_next_event(NULL);
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 417bb7f..d490165 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1471,9 +1471,6 @@ static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
*/
static void tick_nohz_switch_to_nohz(void)
{
- struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
- ktime_t next;
-
if (!tick_nohz_enabled)
return;

@@ -1482,16 +1479,9 @@ static void tick_nohz_switch_to_nohz(void)

/*
* Recycle the hrtimer in 'ts', so we can share the
- * hrtimer_forward_now() function with the highres code.
+ * highres code.
*/
- hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
- /* Get the next period */
- next = tick_init_jiffy_update();
-
- hrtimer_set_expires(&ts->sched_timer, next);
- hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
- tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
- tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
+ tick_setup_sched_timer(NOHZ_MODE_LOWRES);
}

static inline void tick_nohz_irq_enter(void)
@@ -1570,7 +1560,11 @@ static enum hrtimer_restart tick_nohz_highres_handler(struct hrtimer *timer)

return HRTIMER_RESTART;
}
+#else
+#define tick_nohz_highres_handler NULL
+#endif /* CONFIG_HIGH_RES_TIMERS */

+#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
static int sched_skew_tick;

static int __init skew_tick(char *str)
@@ -1583,15 +1577,17 @@ early_param("skew_tick", skew_tick);

/**
* tick_setup_sched_timer - setup the tick emulation timer
+ * @mode: tick_nohz_mode to setup for
*/
-void tick_setup_sched_timer(void)
+void tick_setup_sched_timer(int mode)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
- ktime_t now = ktime_get();

/* Emulate tick processing via per-CPU hrtimers: */
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
- ts->sched_timer.function = tick_nohz_highres_handler;
+
+ if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && mode == NOHZ_MODE_HIGHRES)
+ ts->sched_timer.function = tick_nohz_highres_handler;

/* Get the next period (per-CPU) */
hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
@@ -1604,13 +1600,14 @@ void tick_setup_sched_timer(void)
hrtimer_add_expires_ns(&ts->sched_timer, offset);
}

- hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
- hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
- tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
+ hrtimer_forward_now(&ts->sched_timer, TICK_NSEC);
+ if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && mode == NOHZ_MODE_HIGHRES)
+ hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
+ else
+ tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
+ tick_nohz_activate(ts, mode);
}
-#endif /* HIGH_RES_TIMERS */

-#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
void tick_cancel_sched_timer(int cpu)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
@@ -1632,7 +1629,7 @@ void tick_cancel_sched_timer(int cpu)
ts->idle_calls = idle_calls;
ts->idle_sleeps = idle_sleeps;
}
-#endif
+#endif /* CONFIG_NO_HZ_COMMON || CONFIG_HIGH_RES_TIMERS */

/*
* Async notification about clocksource changes
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 5ed5a9d..35808bb 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -102,7 +102,7 @@ struct tick_sched {

extern struct tick_sched *tick_get_tick_sched(int cpu);

-extern void tick_setup_sched_timer(void);
+extern void tick_setup_sched_timer(int mode);
#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
extern void tick_cancel_sched_timer(int cpu);
#else

2024-02-26 22:29:37

by tip-bot2 for Tony Luck

[permalink] [raw]
Subject: [tip: timers/core] tick: Move got_idle_tick away from common flags

The following commit has been merged into the timers/core branch of tip:

Commit-ID: 3ce74f1a8566dbbc9774f85fb0ce781fe290fd32
Gitweb: https://git.kernel.org/tip/3ce74f1a8566dbbc9774f85fb0ce781fe290fd32
Author: Frederic Weisbecker <[email protected]>
AuthorDate: Sun, 25 Feb 2024 23:55:03 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Feb 2024 11:37:32 +01:00

tick: Move got_idle_tick away from common flags

tick_nohz_idle_got_tick() is called by cpuidle_reflect() within the idle
loop with interrupts enabled. This function modifies the struct
tick_sched's bitfield "got_idle_tick". However this bitfield is stored
within the same mask as other bitfields that can be modified from
interrupts.

Fortunately so far it looks like the only race that can happen is while
writing ->got_idle_tick to 0, an interrupt fires and writes the
->idle_active field to 0. It's then possible that the interrupted write
to ->got_idle_tick writes back the old value of ->idle_active back to 1.

However if that happens, the worst possible outcome is that the time
spent between that interrupt and the upcoming call to
tick_nohz_idle_exit() is accounted as idle, which is negligible quantity.

Still all the bitfield writes within this struct tick_sched's shadow
mask should be IRQ-safe. Therefore move this bitfield out to its own
storage to avoid further suprises.

Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
kernel/time/tick-sched.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 35808bb..3b555e0 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -61,7 +61,6 @@ struct tick_sched {
unsigned int tick_stopped : 1;
unsigned int idle_active : 1;
unsigned int do_timer_last : 1;
- unsigned int got_idle_tick : 1;

/* Tick handling: jiffies stall check */
unsigned int stalled_jiffies;
@@ -73,6 +72,7 @@ struct tick_sched {
ktime_t next_tick;
unsigned long idle_jiffies;
ktime_t idle_waketime;
+ unsigned int got_idle_tick;

/* Idle entry */
seqcount_t idle_sleeptime_seq;

2024-02-26 22:31:37

by tip-bot2 for Tony Luck

[permalink] [raw]
Subject: [tip: timers/core] tick/nohz: Remove duplicate between lowres and highres handlers

The following commit has been merged into the timers/core branch of tip:

Commit-ID: 37263ba0c44b13b2025398ac81919df9f0a91368
Gitweb: https://git.kernel.org/tip/37263ba0c44b13b2025398ac81919df9f0a91368
Author: Peng Liu <[email protected]>
AuthorDate: Sun, 25 Feb 2024 23:54:54 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Feb 2024 11:37:31 +01:00

tick/nohz: Remove duplicate between lowres and highres handlers

tick_nohz_lowres_handler() does the same work as
tick_nohz_highres_handler() plus the clockevent device reprogramming, so
make the former reuse the latter and rename it accordingly.

Signed-off-by: Peng Liu <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]

---
kernel/time/tick-sched.c | 96 ++++++++++++++-------------------------
1 file changed, 36 insertions(+), 60 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d490165..88c992f 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -255,6 +255,40 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
update_process_times(user_mode(regs));
profile_tick(CPU_PROFILING);
}
+
+/*
+ * We rearm the timer until we get disabled by the idle code.
+ * Called with interrupts disabled.
+ */
+static enum hrtimer_restart tick_nohz_handler(struct hrtimer *timer)
+{
+ struct tick_sched *ts = container_of(timer, struct tick_sched, sched_timer);
+ struct pt_regs *regs = get_irq_regs();
+ ktime_t now = ktime_get();
+
+ tick_sched_do_timer(ts, now);
+
+ /*
+ * Do not call when we are not in IRQ context and have
+ * no valid 'regs' pointer
+ */
+ if (regs)
+ tick_sched_handle(ts, regs);
+ else
+ ts->next_tick = 0;
+
+ /*
+ * In dynticks mode, tick reprogram is deferred:
+ * - to the idle task if in dynticks-idle
+ * - to IRQ exit if in full-dynticks.
+ */
+ if (unlikely(ts->tick_stopped))
+ return HRTIMER_NORESTART;
+
+ hrtimer_forward(timer, now, TICK_NSEC);
+
+ return HRTIMER_RESTART;
+}
#endif

#ifdef CONFIG_NO_HZ_FULL
@@ -1429,31 +1463,15 @@ void tick_nohz_idle_exit(void)
* at the clockevent level. hrtimer can't be used instead, because its
* infrastructure actually relies on the tick itself as a backend in
* low-resolution mode (see hrtimer_run_queues()).
- *
- * This low-resolution handler still makes use of some hrtimer APIs meanwhile
- * for convenience with expiration calculation and forwarding.
*/
static void tick_nohz_lowres_handler(struct clock_event_device *dev)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
- struct pt_regs *regs = get_irq_regs();
- ktime_t now = ktime_get();

dev->next_event = KTIME_MAX;

- tick_sched_do_timer(ts, now);
- tick_sched_handle(ts, regs);
-
- /*
- * In dynticks mode, tick reprogram is deferred:
- * - to the idle task if in dynticks-idle
- * - to IRQ exit if in full-dynticks.
- */
- if (likely(!ts->tick_stopped)) {
- hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
+ if (likely(tick_nohz_handler(&ts->sched_timer) == HRTIMER_RESTART))
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
- }
-
}

static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
@@ -1522,48 +1540,6 @@ void tick_irq_enter(void)
tick_nohz_irq_enter();
}

-/*
- * High resolution timer specific code
- */
-#ifdef CONFIG_HIGH_RES_TIMERS
-/*
- * We rearm the timer until we get disabled by the idle code.
- * Called with interrupts disabled.
- */
-static enum hrtimer_restart tick_nohz_highres_handler(struct hrtimer *timer)
-{
- struct tick_sched *ts =
- container_of(timer, struct tick_sched, sched_timer);
- struct pt_regs *regs = get_irq_regs();
- ktime_t now = ktime_get();
-
- tick_sched_do_timer(ts, now);
-
- /*
- * Do not call when we are not in IRQ context and have
- * no valid 'regs' pointer
- */
- if (regs)
- tick_sched_handle(ts, regs);
- else
- ts->next_tick = 0;
-
- /*
- * In dynticks mode, tick reprogram is deferred:
- * - to the idle task if in dynticks-idle
- * - to IRQ exit if in full-dynticks.
- */
- if (unlikely(ts->tick_stopped))
- return HRTIMER_NORESTART;
-
- hrtimer_forward(timer, now, TICK_NSEC);
-
- return HRTIMER_RESTART;
-}
-#else
-#define tick_nohz_highres_handler NULL
-#endif /* CONFIG_HIGH_RES_TIMERS */
-
#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
static int sched_skew_tick;

@@ -1587,7 +1563,7 @@ void tick_setup_sched_timer(int mode)
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);

if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && mode == NOHZ_MODE_HIGHRES)
- ts->sched_timer.function = tick_nohz_highres_handler;
+ ts->sched_timer.function = tick_nohz_handler;

/* Get the next period (per-CPU) */
hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());