From: Peter Zijlstra <[email protected]>
This code is mostly duplicated. The redudant store in the force reprogram
case does no harm and the in hrtimer interrupt condition cannot be true for
the force reprogram invocations.
Signed-off-by: Peter Zijlstra <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
---
kernel/time/hrtimer.c | 72 ++++++++++++++++++++------------------------------
1 file changed, 29 insertions(+), 43 deletions(-)
Signed-off-by: Thomas Gleixner <[email protected]>
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -652,21 +652,24 @@ static inline int hrtimer_hres_active(vo
return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
}
-/*
- * Reprogram the event source with checking both queues for the
- * next event
- * Called with interrupts disabled and base->lock held
- */
static void
-hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
+__hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal,
+ struct hrtimer *next_timer, ktime_t expires_next)
{
- ktime_t expires_next;
+ /*
+ * If the hrtimer interrupt is running, then it will reevaluate the
+ * clock bases and reprogram the clock event device.
+ */
+ if (cpu_base->in_hrtirq)
+ return;
- expires_next = hrtimer_update_next_event(cpu_base);
+ if (expires_next > cpu_base->expires_next)
+ return;
if (skip_equal && expires_next == cpu_base->expires_next)
return;
+ cpu_base->next_timer = next_timer;
cpu_base->expires_next = expires_next;
/*
@@ -689,7 +692,23 @@ hrtimer_force_reprogram(struct hrtimer_c
if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
return;
- tick_program_event(cpu_base->expires_next, 1);
+ tick_program_event(expires_next, 1);
+}
+
+/*
+ * Reprogram the event source with checking both queues for the
+ * next event
+ * Called with interrupts disabled and base->lock held
+ */
+static void
+hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
+{
+ ktime_t expires_next;
+
+ expires_next = hrtimer_update_next_event(cpu_base);
+
+ __hrtimer_reprogram(cpu_base, skip_equal, cpu_base->next_timer,
+ expires_next);
}
/* High resolution timer related functions */
@@ -835,40 +854,7 @@ static void hrtimer_reprogram(struct hrt
if (base->cpu_base != cpu_base)
return;
- /*
- * If the hrtimer interrupt is running, then it will
- * reevaluate the clock bases and reprogram the clock event
- * device. The callbacks are always executed in hard interrupt
- * context so we don't need an extra check for a running
- * callback.
- */
- if (cpu_base->in_hrtirq)
- return;
-
- if (expires >= cpu_base->expires_next)
- return;
-
- /* Update the pointer to the next expiring timer */
- cpu_base->next_timer = timer;
- cpu_base->expires_next = expires;
-
- /*
- * If hres is not active, hardware does not have to be
- * programmed yet.
- *
- * If a hang was detected in the last timer interrupt then we
- * do not schedule a timer which is earlier than the expiry
- * which we enforced in the hang detection. We want the system
- * to make progress.
- */
- if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
- return;
-
- /*
- * Program the timer hardware. We enforce the expiry for
- * events which are already in the past.
- */
- tick_program_event(expires, 1);
+ __hrtimer_reprogram(cpu_base, true, timer, expires);
}
/*
The following commit has been merged into the timers/core branch of tip:
Commit-ID: b14bca97c9f5c3e3f133445b01c723e95490d843
Gitweb: https://git.kernel.org/tip/b14bca97c9f5c3e3f133445b01c723e95490d843
Author: Peter Zijlstra <[email protected]>
AuthorDate: Tue, 13 Jul 2021 15:39:47 +02:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Tue, 10 Aug 2021 17:57:22 +02:00
hrtimer: Consolidate reprogramming code
This code is mostly duplicated. The redudant store in the force reprogram
case does no harm and the in hrtimer interrupt condition cannot be true for
the force reprogram invocations.
Signed-off-by: Peter Zijlstra <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/time/hrtimer.c | 72 ++++++++++++++++--------------------------
1 file changed, 29 insertions(+), 43 deletions(-)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index ba2e0d0..5f7c465 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -652,21 +652,24 @@ static inline int hrtimer_hres_active(void)
return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
}
-/*
- * Reprogram the event source with checking both queues for the
- * next event
- * Called with interrupts disabled and base->lock held
- */
static void
-hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
+__hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal,
+ struct hrtimer *next_timer, ktime_t expires_next)
{
- ktime_t expires_next;
+ /*
+ * If the hrtimer interrupt is running, then it will reevaluate the
+ * clock bases and reprogram the clock event device.
+ */
+ if (cpu_base->in_hrtirq)
+ return;
- expires_next = hrtimer_update_next_event(cpu_base);
+ if (expires_next > cpu_base->expires_next)
+ return;
if (skip_equal && expires_next == cpu_base->expires_next)
return;
+ cpu_base->next_timer = next_timer;
cpu_base->expires_next = expires_next;
/*
@@ -689,7 +692,23 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
return;
- tick_program_event(cpu_base->expires_next, 1);
+ tick_program_event(expires_next, 1);
+}
+
+/*
+ * Reprogram the event source with checking both queues for the
+ * next event
+ * Called with interrupts disabled and base->lock held
+ */
+static void
+hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
+{
+ ktime_t expires_next;
+
+ expires_next = hrtimer_update_next_event(cpu_base);
+
+ __hrtimer_reprogram(cpu_base, skip_equal, cpu_base->next_timer,
+ expires_next);
}
/* High resolution timer related functions */
@@ -835,40 +854,7 @@ static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
if (base->cpu_base != cpu_base)
return;
- /*
- * If the hrtimer interrupt is running, then it will
- * reevaluate the clock bases and reprogram the clock event
- * device. The callbacks are always executed in hard interrupt
- * context so we don't need an extra check for a running
- * callback.
- */
- if (cpu_base->in_hrtirq)
- return;
-
- if (expires >= cpu_base->expires_next)
- return;
-
- /* Update the pointer to the next expiring timer */
- cpu_base->next_timer = timer;
- cpu_base->expires_next = expires;
-
- /*
- * If hres is not active, hardware does not have to be
- * programmed yet.
- *
- * If a hang was detected in the last timer interrupt then we
- * do not schedule a timer which is earlier than the expiry
- * which we enforced in the hang detection. We want the system
- * to make progress.
- */
- if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
- return;
-
- /*
- * Program the timer hardware. We enforce the expiry for
- * events which are already in the past.
- */
- tick_program_event(expires, 1);
+ __hrtimer_reprogram(cpu_base, true, timer, expires);
}
/*
Greetings Peter, may your day get off to a better start than my box's
did :)
On Tue, 2021-08-10 at 16:02 +0000, tip-bot2 for Peter Zijlstra wrote:
> The following commit has been merged into the timers/core branch of tip:
>
> Commit-ID: b14bca97c9f5c3e3f133445b01c723e95490d843
> Gitweb: https://git.kernel.org/tip/b14bca97c9f5c3e3f133445b01c723e95490d843
> Author: Peter Zijlstra <[email protected]>
> AuthorDate: Tue, 13 Jul 2021 15:39:47 +02:00
> Committer: Thomas Gleixner <[email protected]>
> CommitterDate: Tue, 10 Aug 2021 17:57:22 +02:00
>
> hrtimer: Consolidate reprogramming code
Per git-bisect, this is the tip.today commit that's bricking my box
early in boot. Another inspires the moan below, courtesy of VM, which
unlike desktop box does not brick immediately thereafter.
[ 0.556416] rtc_cmos 00:04: RTC can wake from S4
[ 0.557184] rtc_cmos 00:04: registered as rtc0
[ 0.557636] BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1
[ 0.558169] caller is debug_smp_processor_id+0x13/0x20
[ 0.558511] CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.14.0.g1fd628c-tip #15
[ 0.558946] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a-rebuilt.opensuse.org 04/01/2014
[ 0.559623] Call Trace:
[ 0.559796] dump_stack_lvl+0x62/0x78
[ 0.560041] dump_stack+0xc/0xd
[ 0.560263] check_preemption_disabled+0xd3/0xe0
[ 0.560576] debug_smp_processor_id+0x13/0x20
[ 0.560954] clock_was_set+0x1c/0x310
[ 0.561118] ? timekeeping_update+0x298/0x2b0
[ 0.561118] do_settimeofday64+0x31e/0x340
[ 0.561118] __devm_rtc_register_device+0x371/0x450
[ 0.561118] cmos_do_probe+0x4a2/0x6e0
[ 0.561118] ? cmos_interrupt+0x120/0x120
[ 0.561118] ? cmos_nvram_read+0x90/0x90
[ 0.561118] cmos_pnp_probe+0x91/0xe0
[ 0.561118] pnp_device_probe+0x15e/0x1d0
[ 0.561118] ? cmos_irq_enable+0x150/0x150
[ 0.561118] call_driver_probe+0x4a/0x130
[ 0.561118] really_probe+0x150/0x540
[ 0.561118] __driver_probe_device+0x160/0x200
[ 0.561118] driver_probe_device+0x3a/0x2b0
[ 0.561118] __driver_attach+0xb4/0x370
[ 0.561118] ? driver_attach+0x30/0x30
[ 0.561118] bus_for_each_dev+0xb0/0xe0
[ 0.561118] driver_attach+0x27/0x30
[ 0.561118] bus_add_driver+0x1ba/0x310
[ 0.561118] driver_register+0x104/0x200
[ 0.561118] pnp_register_driver+0x3e/0x50
[ 0.561118] ? rtc_dev_init+0x33/0x33
[ 0.561118] cmos_init+0x14/0xbc
[ 0.561118] ? rtc_dev_init+0x33/0x33
[ 0.561118] do_one_initcall+0xcf/0x2c0
[ 0.561118] ? strlen+0x18/0x30
[ 0.561118] ? parse_one+0x2b9/0x350
[ 0.561118] ? do_initcall_level+0x106/0x106
[ 0.561118] ? parse_args+0x133/0x280
[ 0.561118] ? parse_args+0x94/0x280
[ 0.561118] do_initcall_level+0x95/0x106
[ 0.561118] do_initcalls+0x61/0x8b
[ 0.561118] do_basic_setup+0x20/0x21
[ 0.561118] kernel_init_freeable+0x171/0x1de
[ 0.561118] ? rest_init+0xf0/0xf0
[ 0.561118] kernel_init+0x17/0x1e0
[ 0.561118] ? rest_init+0xf0/0xf0
[ 0.561118] ret_from_fork+0x1f/0x30
Hi,
On 13.07.2021 15:39, Thomas Gleixner wrote:
> From: Peter Zijlstra <[email protected]>
>
> This code is mostly duplicated. The redudant store in the force reprogram
> case does no harm and the in hrtimer interrupt condition cannot be true for
> the force reprogram invocations.
>
> Signed-off-by: Peter Zijlstra <[email protected]>
> Signed-off-by: Thomas Gleixner <[email protected]>
> ---
> kernel/time/hrtimer.c | 72 ++++++++++++++++++++------------------------------
> 1 file changed, 29 insertions(+), 43 deletions(-)
>
> Signed-off-by: Thomas Gleixner <[email protected]>
This patch landed in today's linux-next (next-20210812) as commit
b14bca97c9f5 ("hrtimer: Consolidate reprogramming code"). It breaks
booting of many of my test machines: ARM 32bit Exynos based boards, ARM
64bit QEmu virt machine or ARM64 Qualcomm DragonBoard410c board.
I've managed to catch the following log on QEmu's virt ARM64 machine:
rcu: INFO: rcu_preempt detected stalls on CPUs/tasks:
rcu: 0-...!: (0 ticks this GP) idle=330/0/0x0 softirq=42/42 fqs=0
(false positive?)
(detected by 1, t=6502 jiffies, g=-1091, q=115)
============================================
WARNING: possible recursive locking detected
5.14.0-rc5+ #10668 Not tainted
--------------------------------------------
swapper/1/0 is trying to acquire lock:
ffffbb9c1e4ca1d8 (rcu_node_0){-.-.}-{2:2}, at:
rcu_dump_cpu_stacks+0x68/0x1c4
but task is already holding lock:
ffffbb9c1e4ca1d8 (rcu_node_0){-.-.}-{2:2}, at:
rcu_sched_clock_irq+0x83c/0x1778
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(rcu_node_0);
lock(rcu_node_0);
*** DEADLOCK ***
May be due to missing lock nesting notation
1 lock held by swapper/1/0:
#0: ffffbb9c1e4ca1d8 (rcu_node_0){-.-.}-{2:2}, at:
rcu_sched_clock_irq+0x83c/0x1778
stack backtrace:
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.14.0-rc5+ #10668
Hardware name: linux,dummy-virt (DT)
Call trace:
dump_backtrace+0x0/0x1d0
show_stack+0x14/0x20
dump_stack_lvl+0x88/0xb0
dump_stack+0x14/0x2c
__lock_acquire+0x17a4/0x1840
lock_acquire+0x130/0x3e8
_raw_spin_lock_irqsave+0x78/0x148
rcu_dump_cpu_stacks+0x68/0x1c4
rcu_sched_clock_irq+0x11e8/0x1778
update_process_times+0x88/0xd0
tick_sched_handle.isra.19+0x30/0x50
tick_sched_timer+0x48/0x98
__hrtimer_run_queues+0x380/0x5b0
hrtimer_interrupt+0xe4/0x240
arch_timer_handler_virt+0x30/0x40
handle_percpu_devid_irq+0xc0/0x3d0
handle_domain_irq+0x58/0x88
gic_handle_irq+0xa8/0xc8
call_on_irq_stack+0x28/0x38
do_interrupt_handler+0x54/0x60
el1_interrupt+0x2c/0x108
el1h_64_irq_handler+0x14/0x20
el1h_64_irq+0x74/0x78
arch_cpu_idle+0x14/0x20
default_idle_call+0x88/0x390
do_idle+0x200/0x290
cpu_startup_entry+0x20/0x80
secondary_start_kernel+0x1c0/0x1f0
__secondary_switched+0x7c/0x80
I hope it helps fixing the issue.
> --- a/kernel/time/hrtimer.c
> +++ b/kernel/time/hrtimer.c
> @@ -652,21 +652,24 @@ static inline int hrtimer_hres_active(vo
> return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
> }
>
> -/*
> - * Reprogram the event source with checking both queues for the
> - * next event
> - * Called with interrupts disabled and base->lock held
> - */
> static void
> -hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
> +__hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal,
> + struct hrtimer *next_timer, ktime_t expires_next)
> {
> - ktime_t expires_next;
> + /*
> + * If the hrtimer interrupt is running, then it will reevaluate the
> + * clock bases and reprogram the clock event device.
> + */
> + if (cpu_base->in_hrtirq)
> + return;
>
> - expires_next = hrtimer_update_next_event(cpu_base);
> + if (expires_next > cpu_base->expires_next)
> + return;
>
> if (skip_equal && expires_next == cpu_base->expires_next)
> return;
>
> + cpu_base->next_timer = next_timer;
> cpu_base->expires_next = expires_next;
>
> /*
> @@ -689,7 +692,23 @@ hrtimer_force_reprogram(struct hrtimer_c
> if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
> return;
>
> - tick_program_event(cpu_base->expires_next, 1);
> + tick_program_event(expires_next, 1);
> +}
> +
> +/*
> + * Reprogram the event source with checking both queues for the
> + * next event
> + * Called with interrupts disabled and base->lock held
> + */
> +static void
> +hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
> +{
> + ktime_t expires_next;
> +
> + expires_next = hrtimer_update_next_event(cpu_base);
> +
> + __hrtimer_reprogram(cpu_base, skip_equal, cpu_base->next_timer,
> + expires_next);
> }
>
> /* High resolution timer related functions */
> @@ -835,40 +854,7 @@ static void hrtimer_reprogram(struct hrt
> if (base->cpu_base != cpu_base)
> return;
>
> - /*
> - * If the hrtimer interrupt is running, then it will
> - * reevaluate the clock bases and reprogram the clock event
> - * device. The callbacks are always executed in hard interrupt
> - * context so we don't need an extra check for a running
> - * callback.
> - */
> - if (cpu_base->in_hrtirq)
> - return;
> -
> - if (expires >= cpu_base->expires_next)
> - return;
> -
> - /* Update the pointer to the next expiring timer */
> - cpu_base->next_timer = timer;
> - cpu_base->expires_next = expires;
> -
> - /*
> - * If hres is not active, hardware does not have to be
> - * programmed yet.
> - *
> - * If a hang was detected in the last timer interrupt then we
> - * do not schedule a timer which is earlier than the expiry
> - * which we enforced in the hang detection. We want the system
> - * to make progress.
> - */
> - if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
> - return;
> -
> - /*
> - * Program the timer hardware. We enforce the expiry for
> - * events which are already in the past.
> - */
> - tick_program_event(expires, 1);
> + __hrtimer_reprogram(cpu_base, true, timer, expires);
> }
>
> /*
>
>
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
On Thu, Aug 12 2021 at 16:11, Thomas Gleixner wrote:
> On Thu, Aug 12 2021 at 09:19, Mike Galbraith wrote:
>> Greetings Peter, may your day get off to a better start than my box's
>> did :)
>>
>> On Tue, 2021-08-10 at 16:02 +0000, tip-bot2 for Peter Zijlstra wrote:
>>> The following commit has been merged into the timers/core branch of tip:
>>>
>>> Commit-ID: b14bca97c9f5c3e3f133445b01c723e95490d843
>>> Gitweb: https://git.kernel.org/tip/b14bca97c9f5c3e3f133445b01c723e95490d843
>>> Author: Peter Zijlstra <[email protected]>
>>> AuthorDate: Tue, 13 Jul 2021 15:39:47 +02:00
>>> Committer: Thomas Gleixner <[email protected]>
>>> CommitterDate: Tue, 10 Aug 2021 17:57:22 +02:00
>>>
>>> hrtimer: Consolidate reprogramming code
>>
>> Per git-bisect, this is the tip.today commit that's bricking my box
>> early in boot.
>
> Let me stare at that.
Can you please test whether the below fixes it for you?
I have yet to find a machine which reproduces it as I really want to
understand which particular part is causing this.
Thanks,
tglx
---
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -652,24 +652,10 @@ static inline int hrtimer_hres_active(vo
return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
}
-static void
-__hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal,
- struct hrtimer *next_timer, ktime_t expires_next)
+static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base,
+ struct hrtimer *next_timer,
+ ktime_t expires_next)
{
- /*
- * If the hrtimer interrupt is running, then it will reevaluate the
- * clock bases and reprogram the clock event device.
- */
- if (cpu_base->in_hrtirq)
- return;
-
- if (expires_next > cpu_base->expires_next)
- return;
-
- if (skip_equal && expires_next == cpu_base->expires_next)
- return;
-
- cpu_base->next_timer = next_timer;
cpu_base->expires_next = expires_next;
/*
@@ -707,8 +693,10 @@ hrtimer_force_reprogram(struct hrtimer_c
expires_next = hrtimer_update_next_event(cpu_base);
- __hrtimer_reprogram(cpu_base, skip_equal, cpu_base->next_timer,
- expires_next);
+ if (skip_equal && expires_next == cpu_base->expires_next)
+ return;
+
+ __hrtimer_reprogram(cpu_base, cpu_base->next_timer, expires_next);
}
/* High resolution timer related functions */
@@ -863,7 +851,19 @@ static void hrtimer_reprogram(struct hrt
if (base->cpu_base != cpu_base)
return;
- __hrtimer_reprogram(cpu_base, true, timer, expires);
+ if (expires >= cpu_base->expires_next)
+ return;
+
+ /*
+ * If the hrtimer interrupt is running, then it will reevaluate the
+ * clock bases and reprogram the clock event device.
+ */
+ if (cpu_base->in_hrtirq)
+ return;
+
+ cpu_base->next_timer = timer;
+
+ __hrtimer_reprogram(cpu_base, timer, expires);
}
static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base,
On Thu, Aug 12 2021 at 09:19, Mike Galbraith wrote:
> Greetings Peter, may your day get off to a better start than my box's
> did :)
>
> On Tue, 2021-08-10 at 16:02 +0000, tip-bot2 for Peter Zijlstra wrote:
>> The following commit has been merged into the timers/core branch of tip:
>>
>> Commit-ID: b14bca97c9f5c3e3f133445b01c723e95490d843
>> Gitweb: https://git.kernel.org/tip/b14bca97c9f5c3e3f133445b01c723e95490d843
>> Author: Peter Zijlstra <[email protected]>
>> AuthorDate: Tue, 13 Jul 2021 15:39:47 +02:00
>> Committer: Thomas Gleixner <[email protected]>
>> CommitterDate: Tue, 10 Aug 2021 17:57:22 +02:00
>>
>> hrtimer: Consolidate reprogramming code
>
> Per git-bisect, this is the tip.today commit that's bricking my box
> early in boot.
Let me stare at that.
> Another inspires the moan below, courtesy of VM, which
> unlike desktop box does not brick immediately thereafter.
> [ 0.556416] rtc_cmos 00:04: RTC can wake from S4
> [ 0.557184] rtc_cmos 00:04: registered as rtc0
> [ 0.557636] BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1
> [ 0.558169] caller is debug_smp_processor_id+0x13/0x20
> [ 0.558511] CPU: 3 PID: 1 Comm: swapper/0 Not tainted 5.14.0.g1fd628c-tip #15
> [ 0.558946] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a-rebuilt.opensuse.org 04/01/2014
> [ 0.559623] Call Trace:
> [ 0.559796] dump_stack_lvl+0x62/0x78
> [ 0.560041] dump_stack+0xc/0xd
> [ 0.560263] check_preemption_disabled+0xd3/0xe0
> [ 0.560576] debug_smp_processor_id+0x13/0x20
> [ 0.560954] clock_was_set+0x1c/0x310
My stupidity. Fix for that below.
Thanks,
tglx
---
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -944,10 +944,11 @@ static bool update_needs_ipi(struct hrti
*/
void clock_was_set(unsigned int bases)
{
+ struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases);
cpumask_var_t mask;
int cpu;
- if (!hrtimer_hres_active() && !tick_nohz_active)
+ if (!__hrtimer_hres_active(cpu_base) && !tick_nohz_active)
goto out_timerfd;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
@@ -958,9 +959,9 @@ void clock_was_set(unsigned int bases)
/* Avoid interrupting CPUs if possible */
cpus_read_lock();
for_each_online_cpu(cpu) {
- struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
unsigned long flags;
+ cpu_base = &per_cpu(hrtimer_bases, cpu);
raw_spin_lock_irqsave(&cpu_base->lock, flags);
if (update_needs_ipi(cpu_base, bases))
On Thu, 2021-08-12 at 16:32 +0200, Thomas Gleixner wrote:
>
> Can you please test whether the below fixes it for you?
Yes, that boots.
> I have yet to find a machine which reproduces it as I really want to
> understand which particular part is causing this.
Config attached just in case.
> Thanks,
>
> tglx
> ---
> --- a/kernel/time/hrtimer.c
> +++ b/kernel/time/hrtimer.c
> @@ -652,24 +652,10 @@ static inline int hrtimer_hres_active(vo
> return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
> }
>
> -static void
> -__hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal,
> - struct hrtimer *next_timer, ktime_t expires_next)
> +static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base,
> + struct hrtimer *next_timer,
> + ktime_t expires_next)
> {
> - /*
> - * If the hrtimer interrupt is running, then it will reevaluate
> the
> - * clock bases and reprogram the clock event device.
> - */
> - if (cpu_base->in_hrtirq)
> - return;
> -
> - if (expires_next > cpu_base->expires_next)
> - return;
> -
> - if (skip_equal && expires_next == cpu_base->expires_next)
> - return;
> -
> - cpu_base->next_timer = next_timer;
> cpu_base->expires_next = expires_next;
>
> /*
> @@ -707,8 +693,10 @@ hrtimer_force_reprogram(struct hrtimer_c
>
> expires_next = hrtimer_update_next_event(cpu_base);
>
> - __hrtimer_reprogram(cpu_base, skip_equal, cpu_base->next_timer,
> - expires_next);
> + if (skip_equal && expires_next == cpu_base->expires_next)
> + return;
> +
> + __hrtimer_reprogram(cpu_base, cpu_base->next_timer,
> expires_next);
> }
>
> /* High resolution timer related functions */
> @@ -863,7 +851,19 @@ static void hrtimer_reprogram(struct hrt
> if (base->cpu_base != cpu_base)
> return;
>
> - __hrtimer_reprogram(cpu_base, true, timer, expires);
> + if (expires >= cpu_base->expires_next)
> + return;
> +
> + /*
> + * If the hrtimer interrupt is running, then it will reevaluate
> the
> + * clock bases and reprogram the clock event device.
> + */
> + if (cpu_base->in_hrtirq)
> + return;
> +
> + cpu_base->next_timer = timer;
> +
> + __hrtimer_reprogram(cpu_base, timer, expires);
> }
>
> static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base,
On Thu, Aug 12 2021 at 17:04, Mike Galbraith wrote:
> On Thu, 2021-08-12 at 16:32 +0200, Thomas Gleixner wrote:
>>
>> Can you please test whether the below fixes it for you?
>
> Yes, that boots.
Not that I'm surprised, but I still do not know why :)
>> I have yet to find a machine which reproduces it as I really want to
>> understand which particular part is causing this.
>
> Config attached just in case.
I rather assume it's a hardware dependency. What kind of machine are you
using?
Thanks,
tglx
On Thu, 2021-08-12 at 17:04 +0200, Mike Galbraith wrote:
> On Thu, 2021-08-12 at 16:32 +0200, Thomas Gleixner wrote:
> >
> > Can you please test whether the below fixes it for you?
>
> Yes, that boots.
>
> > I have yet to find a machine which reproduces it as I really want to
> > understand which particular part is causing this.
>
> Config attached just in case.
My HP lappy bricks as well, and with tip-rt. Fix verified there too.
-Mike
On Thu, 2021-08-12 at 17:22 +0200, Thomas Gleixner wrote:
> >
> > Config attached just in case.
>
> I rather assume it's a hardware dependency. What kind of machine are you
> using?
Desktop box is a garden variety MEDION i4790 box with an Nvidia GTX980.
Lappy is an HP Spectre 360 i5-6200U w. i915 graphics. Very mundane.
-Mike
On Thu, Aug 12 2021 at 17:31, Mike Galbraith wrote:
> On Thu, 2021-08-12 at 17:22 +0200, Thomas Gleixner wrote:
>> >
>> > Config attached just in case.
>>
>> I rather assume it's a hardware dependency. What kind of machine are you
>> using?
>
> Desktop box is a garden variety MEDION i4790 box with an Nvidia GTX980.
> Lappy is an HP Spectre 360 i5-6200U w. i915 graphics. Very mundane.
Hmm, spectre induced timer meltdown? :)
Anyway I found a box which exposes the problem. Investigating...
Thanks,
tglx
clock_was_set() can be invoked from preemptible context. Use raw_cpu_ptr()
to check whether high resolution mode is active or not. It does not matter
whether the task migrates after acquiring the pointer.
Fixes: e71a4153b7c2 ("hrtimer: Force clock_was_set() handling for the HIGHRES=n, NOHZ=y case")
Reported-by: Mike Galbraith <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
---
kernel/time/hrtimer.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -944,10 +944,11 @@ static bool update_needs_ipi(struct hrti
*/
void clock_was_set(unsigned int bases)
{
+ struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases);
cpumask_var_t mask;
int cpu;
- if (!hrtimer_hres_active() && !tick_nohz_active)
+ if (!__hrtimer_hres_active(cpu_base) && !tick_nohz_active)
goto out_timerfd;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
@@ -958,9 +959,9 @@ void clock_was_set(unsigned int bases)
/* Avoid interrupting CPUs if possible */
cpus_read_lock();
for_each_online_cpu(cpu) {
- struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
unsigned long flags;
+ cpu_base = &per_cpu(hrtimer_bases, cpu);
raw_spin_lock_irqsave(&cpu_base->lock, flags);
if (update_needs_ipi(cpu_base, bases))
Since the recent consoliation of reprogramming functions,
hrtimer_force_reprogram() is affected by a check whether the new expiry
time is past the current expiry time.
This breaks the NOHZ logic as that relies on the fact that the tick hrtimer
is moved into the future. That means cpu_base->expires_next becomes stale
and subsequent reprogramming attempts fail as well until the situation is
cleaned up by an hrtimer interrupts.
For some yet unknown reason this leads to a complete stall, so for now
partially revert the offending commit to a known working state. The root
cause for the stall is still investigated and will be fixed in a subsequent
commit.
Fixes: b14bca97c9f5 ("hrtimer: Consolidate reprogramming code")
Reported-by: Mike Galbraith <[email protected]>
Reported-by: Marek Szyprowski <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Mike Galbraith <[email protected]>
---
kernel/time/hrtimer.c | 40 ++++++++++++++++++++--------------------
1 file changed, 20 insertions(+), 20 deletions(-)
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -652,24 +652,10 @@ static inline int hrtimer_hres_active(vo
return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
}
-static void
-__hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal,
- struct hrtimer *next_timer, ktime_t expires_next)
+static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base,
+ struct hrtimer *next_timer,
+ ktime_t expires_next)
{
- /*
- * If the hrtimer interrupt is running, then it will reevaluate the
- * clock bases and reprogram the clock event device.
- */
- if (cpu_base->in_hrtirq)
- return;
-
- if (expires_next > cpu_base->expires_next)
- return;
-
- if (skip_equal && expires_next == cpu_base->expires_next)
- return;
-
- cpu_base->next_timer = next_timer;
cpu_base->expires_next = expires_next;
/*
@@ -707,8 +693,10 @@ hrtimer_force_reprogram(struct hrtimer_c
expires_next = hrtimer_update_next_event(cpu_base);
- __hrtimer_reprogram(cpu_base, skip_equal, cpu_base->next_timer,
- expires_next);
+ if (skip_equal && expires_next == cpu_base->expires_next)
+ return;
+
+ __hrtimer_reprogram(cpu_base, cpu_base->next_timer, expires_next);
}
/* High resolution timer related functions */
@@ -863,7 +851,19 @@ static void hrtimer_reprogram(struct hrt
if (base->cpu_base != cpu_base)
return;
- __hrtimer_reprogram(cpu_base, true, timer, expires);
+ if (expires >= cpu_base->expires_next)
+ return;
+
+ /*
+ * If the hrtimer interrupt is running, then it will reevaluate the
+ * clock bases and reprogram the clock event device.
+ */
+ if (cpu_base->in_hrtirq)
+ return;
+
+ cpu_base->next_timer = timer;
+
+ __hrtimer_reprogram(cpu_base, timer, expires);
}
static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base,
On 12.08.2021 22:31, Thomas Gleixner wrote:
> clock_was_set() can be invoked from preemptible context. Use raw_cpu_ptr()
> to check whether high resolution mode is active or not. It does not matter
> whether the task migrates after acquiring the pointer.
>
> Fixes: e71a4153b7c2 ("hrtimer: Force clock_was_set() handling for the HIGHRES=n, NOHZ=y case")
> Reported-by: Mike Galbraith <[email protected]>
> Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Marek Szyprowski <[email protected]>
This fixes the following issue:
BUG: using smp_processor_id() in preemptible [00000000] code: hwclock/227
> ---
> kernel/time/hrtimer.c | 5 +++--
> 1 file changed, 3 insertions(+), 2 deletions(-)
>
> --- a/kernel/time/hrtimer.c
> +++ b/kernel/time/hrtimer.c
> @@ -944,10 +944,11 @@ static bool update_needs_ipi(struct hrti
> */
> void clock_was_set(unsigned int bases)
> {
> + struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases);
> cpumask_var_t mask;
> int cpu;
>
> - if (!hrtimer_hres_active() && !tick_nohz_active)
> + if (!__hrtimer_hres_active(cpu_base) && !tick_nohz_active)
> goto out_timerfd;
>
> if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
> @@ -958,9 +959,9 @@ void clock_was_set(unsigned int bases)
> /* Avoid interrupting CPUs if possible */
> cpus_read_lock();
> for_each_online_cpu(cpu) {
> - struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
> unsigned long flags;
>
> + cpu_base = &per_cpu(hrtimer_bases, cpu);
> raw_spin_lock_irqsave(&cpu_base->lock, flags);
>
> if (update_needs_ipi(cpu_base, bases))
>
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
On Thu, Aug 12 2021 at 22:32, Thomas Gleixner wrote:
> Since the recent consoliation of reprogramming functions,
> hrtimer_force_reprogram() is affected by a check whether the new expiry
> time is past the current expiry time.
>
> This breaks the NOHZ logic as that relies on the fact that the tick hrtimer
> is moved into the future. That means cpu_base->expires_next becomes stale
> and subsequent reprogramming attempts fail as well until the situation is
> cleaned up by an hrtimer interrupts.
>
> For some yet unknown reason this leads to a complete stall, so for now
> partially revert the offending commit to a known working state. The root
> cause for the stall is still investigated and will be fixed in a subsequent
> commit.
So with brain more awake I actually managed to decode the problem. It's
definitely the
expires > cpu_base->expires_next
check. It not only prevents the NOHZ idle case from moving the next
timer interrupt into the future, it also causes the stall when switching
into high resolution / NOHZ mode. At that point the initial base value
can be smaller than the next event which prevents reprogramming and as
the base value stays stale it prevents any further reprogramming unless
there is a full update of the base which makes the problem go away.
TBH, that optimization logic to prevent reprogramming the timer hardware
for nothing is a bit fragile and non-obvious. I'll have a look to make
this more robust and less obscure.
Thanks,
tglx