Hi,
This is your friendly bug reporter. :-)
The environment is the vanilla torvalds kernel on Ubuntu 22.04 LTS and a Ryzen 7950X assembled box.
[ 7145.500079] ==================================================================
[ 7145.500109] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event
[ 7145.500139] write to 0xffffffff8a2bda30 of 4 bytes by task 0 on cpu 26:
[ 7145.500155] tick_nohz_idle_stop_tick+0x3b1/0x4a0
[ 7145.500173] do_idle+0x1e3/0x250
[ 7145.500186] cpu_startup_entry+0x20/0x30
[ 7145.500199] start_secondary+0x129/0x160
[ 7145.500216] secondary_startup_64_no_verify+0x17e/0x18b
[ 7145.500245] read to 0xffffffff8a2bda30 of 4 bytes by task 0 on cpu 16:
[ 7145.500261] tick_nohz_next_event+0xe7/0x1e0
[ 7145.500277] tick_nohz_get_sleep_length+0xa7/0xe0
[ 7145.500293] menu_select+0x82/0xb90
[ 7145.500309] cpuidle_select+0x44/0x60
[ 7145.500324] do_idle+0x1c2/0x250
[ 7145.500336] cpu_startup_entry+0x20/0x30
[ 7145.500350] start_secondary+0x129/0x160
[ 7145.500367] secondary_startup_64_no_verify+0x17e/0x18b
[ 7145.500392] value changed: 0x0000001a -> 0xffffffff
[ 7145.500411] Reported by Kernel Concurrency Sanitizer on:
[ 7145.500421] CPU: 16 PID: 0 Comm: swapper/16 Tainted: G L 6.5.0-rc6-net-cfg-kcsan-00038-g16931859a650 #35
[ 7145.500439] Hardware name: ASRock X670E PG Lightning/X670E PG Lightning, BIOS 1.21 04/26/2023
[ 7145.500449] ==================================================================
Please find dmesg log (what was available in the ring buffer) and the lshw output attached.
NOTE: There are no proprietary drivers loaded and Canonical livepatch isn't working for this kernel,
probably previous ACPI KCSAN BUGs turn this taint on.
Thank you very much for your time evaluating this report.
Best regards,
Mirsad Todorovac
On 8/18/23 10:15, Mirsad Todorovac wrote:
>
>
> On 8/17/23 21:32, Mirsad Todorovac wrote:
>> Hi,
>>
>> This is your friendly bug reporter. :-)
>>
>> The environment is the vanilla torvalds kernel on Ubuntu 22.04 LTS and a Ryzen 7950X assembled box.
>>
>> [ 7145.500079] ==================================================================
>> [ 7145.500109] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event
>>
>> [ 7145.500139] write to 0xffffffff8a2bda30 of 4 bytes by task 0 on cpu 26:
>> [ 7145.500155] tick_nohz_idle_stop_tick+0x3b1/0x4a0
>> [ 7145.500173] do_idle+0x1e3/0x250
>> [ 7145.500186] cpu_startup_entry+0x20/0x30
>> [ 7145.500199] start_secondary+0x129/0x160
>> [ 7145.500216] secondary_startup_64_no_verify+0x17e/0x18b
>>
>> [ 7145.500245] read to 0xffffffff8a2bda30 of 4 bytes by task 0 on cpu 16:
>> [ 7145.500261] tick_nohz_next_event+0xe7/0x1e0
>> [ 7145.500277] tick_nohz_get_sleep_length+0xa7/0xe0
>> [ 7145.500293] menu_select+0x82/0xb90
>> [ 7145.500309] cpuidle_select+0x44/0x60
>> [ 7145.500324] do_idle+0x1c2/0x250
>> [ 7145.500336] cpu_startup_entry+0x20/0x30
>> [ 7145.500350] start_secondary+0x129/0x160
>> [ 7145.500367] secondary_startup_64_no_verify+0x17e/0x18b
>>
>> [ 7145.500392] value changed: 0x0000001a -> 0xffffffff
>>
>> [ 7145.500411] Reported by Kernel Concurrency Sanitizer on:
>> [ 7145.500421] CPU: 16 PID: 0 Comm: swapper/16 Tainted: G L 6.5.0-rc6-net-cfg-kcsan-00038-g16931859a650 #35
>> [ 7145.500439] Hardware name: ASRock X670E PG Lightning/X670E PG Lightning, BIOS 1.21 04/26/2023
>> [ 7145.500449] ==================================================================
>>
>> Please find dmesg log (what was available in the ring buffer) and the lshw output attached.
>>
>> NOTE: There are no proprietary drivers loaded and Canonical livepatch isn't working for this kernel,
>> probably previous ACPI KCSAN BUGs turn this taint on.
>>
>> Thank you very much for your time evaluating this report.
>
> P.S.
>
> According to Mr. Heo, I will add the decoded stacktrace to this bug report, which ought to have been done
> in the first place :-/
>
> [ 7145.500079] ==================================================================
> [ 7145.500109] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event
>
> [ 7145.500139] write to 0xffffffff8a2bda30 of 4 bytes by task 0 on cpu 26:
> [ 7145.500155] tick_nohz_idle_stop_tick (kernel/time/tick-sched.c:904 kernel/time/tick-sched.c:1130)
> [ 7145.500173] do_idle (kernel/sched/idle.c:215 kernel/sched/idle.c:282)
> [ 7145.500186] cpu_startup_entry (kernel/sched/idle.c:378 (discriminator 1))
> [ 7145.500199] start_secondary (arch/x86/kernel/smpboot.c:210 arch/x86/kernel/smpboot.c:294)
> [ 7145.500216] secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:441)
>
> [ 7145.500245] read to 0xffffffff8a2bda30 of 4 bytes by task 0 on cpu 16:
> [ 7145.500261] tick_nohz_next_event (kernel/time/tick-sched.c:868)
> [ 7145.500277] tick_nohz_get_sleep_length (kernel/time/tick-sched.c:1250)
> [ 7145.500293] menu_select (drivers/cpuidle/governors/menu.c:283)
> [ 7145.500309] cpuidle_select (drivers/cpuidle/cpuidle.c:359)
> [ 7145.500324] do_idle (kernel/sched/idle.c:208 kernel/sched/idle.c:282)
> [ 7145.500336] cpu_startup_entry (kernel/sched/idle.c:378 (discriminator 1))
> [ 7145.500350] start_secondary (arch/x86/kernel/smpboot.c:210 arch/x86/kernel/smpboot.c:294)
> [ 7145.500367] secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:441)
>
> [ 7145.500392] value changed: 0x0000001a -> 0xffffffff
>
> [ 7145.500411] Reported by Kernel Concurrency Sanitizer on:
> [ 7145.500421] CPU: 16 PID: 0 Comm: swapper/16 Tainted: G L 6.5.0-rc6-net-cfg-kcsan-00038-g16931859a650 #35
> [ 7145.500439] Hardware name: ASRock X670E PG Lightning/X670E PG Lightning, BIOS 1.21 04/26/2023
> [ 7145.500449] ==================================================================
>
> The code seems to be:
>
>
> /*
> * If this CPU is the one which updates jiffies, then give up
> * the assignment and let it be taken by the CPU which runs
> * the tick timer next, which might be this CPU as well. If we
> * don't drop this here the jiffies might be stale and
> * do_timer() never invoked. Keep track of the fact that it
> * was the one which had the do_timer() duty last.
> */
> if (cpu == tick_do_timer_cpu) {
> tick_do_timer_cpu = TICK_DO_TIMER_NONE;
> → ts->do_timer_last = 1;
> } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
> ts->do_timer_last = 0;
>
> }
>
> but I am not sure why this happens.
>
> Maybe a WRITE_ONCE(ts->do_timer_last, 1) is required?
>
> Or is it another KCSAN false positive ...
I have seen a few more KCSAN reports regarding this variable:
[ 153.285199] ==================================================================
[ 153.292529] BUG: KCSAN: data-race in tick_nohz_next_event+0x15c/0x29c
[ 153.299118]
[ 153.300685] race at unknown origin, with read to 0xffffffc082aa1048 of 4 bytes by task 0 on cpu 2:
[ 153.309760] tick_nohz_next_event+0x15c/0x29c
[ 153.314256] tick_nohz_get_sleep_length+0xd8/0x118
[ 153.319186] menu_select+0x1e0/0xc08
[ 153.322875] cpuidle_select+0x4c/0x60
[ 153.326686] do_idle+0x1e0/0x2b4
[ 153.330062] cpu_startup_entry+0x34/0x3c
[ 153.334133] secondary_start_kernel+0x1e8/0x21c
[ 153.338821] __secondary_switched+0xb8/0xbc
[ 153.343143]
[ 153.344706] no locks held by swapper/2/0.
[ 153.348802] irq event stamp: 129586
[ 153.352370] hardirqs last enabled at (129585): [<ffffffc081acaa2c>] el1_interrupt+0x40/0x50
[ 153.360964] hardirqs last disabled at (129586): [<ffffffc0800d8fec>] do_idle+0xc8/0x2b4
[ 153.369132] softirqs last enabled at (129560): [<ffffffc08001071c>] __do_softirq+0x47c/0x500
[ 153.377786] softirqs last disabled at (129549): [<ffffffc080017134>] ____do_softirq+0x10/0x1c
[ 153.386457]
[ 153.388019] value changed: 0xffffffff -> 0x00000001
[ 153.392993]
[ 153.394555] Reported by Kernel Concurrency Sanitizer on:
[ 153.399947] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 6.6.20+ #102
[ 153.406595] Hardware name: xlnx,zynqmp (DT)
[ 153.410866] ==================================================================
and:
[ 75.362055] ==================================================================
[ 75.369461] BUG: KCSAN: data-race in tick_sched_do_timer / tick_sched_do_timer
[ 75.376854]
[ 75.378426] write to 0xffffffc082aa1048 of 4 bytes by interrupt on cpu 1:
[ 75.385335] tick_sched_do_timer+0x124/0x168
[ 75.389744] tick_sched_timer+0x48/0xc8
[ 75.393720] __hrtimer_run_queues+0x200/0x5f0
[ 75.398198] hrtimer_interrupt+0x1b0/0x3f4
[ 75.402417] arch_timer_handler_phys+0x40/0x54
[ 75.406983] handle_percpu_devid_irq+0xf8/0x20c
[ 75.411679] generic_handle_domain_irq+0x48/0x64
[ 75.416435] gic_handle_irq+0xc4/0x100
[ 75.420298] call_on_irq_stack+0x24/0x4c
[ 75.424360] do_interrupt_handler+0xc0/0xc4
[ 75.428692] el1_interrupt+0x34/0x50
[ 75.432415] el1h_64_irq_handler+0x18/0x24
[ 75.436668] el1h_64_irq+0x64/0x68
[ 75.440184] percpu_counter_add_batch+0xd8/0x1b8
[ 75.444967] set_pte_range+0x14c/0x2c8
[ 75.448838] filemap_map_pages+0x2dc/0xa38
[ 75.453091] __handle_mm_fault+0x1304/0x17dc
[ 75.457492] handle_mm_fault+0xa0/0x374
[ 75.461450] do_page_fault+0x1f8/0x620
[ 75.465365] do_translation_fault+0x118/0x138
[ 75.469887] do_mem_abort+0x5c/0xe4
[ 75.473516] el0_da+0x5c/0xac
[ 75.476623] el0t_64_sync_handler+0xe4/0x158
[ 75.481050] el0t_64_sync+0x190/0x194
[ 75.484834]
[ 75.486405] 5 locks held by (d-logind)/293:
[ 75.490693] #0: ffffff88076753c0 (&vma->vm_lock->lock){++++}-{3:3}, at: lock_vma_under_rcu+0x15c/0x5c0
[ 75.500441] #1: ffffffc082b042f0 (rcu_read_lock){....}-{1:2}, at: __handle_mm_fault+0x1260/0x17dc
[ 75.509755] #2: ffffffc082b042f0 (rcu_read_lock){....}-{1:2}, at: filemap_map_pages+0x0/0xa38
[ 75.518748] #3: ffffffc082b042f0 (rcu_read_lock){....}-{1:2}, at: __pte_offset_map+0x0/0x2a0
[ 75.527662] #4: ffffff880762b018 (ptlock_ptr(ptdesc)){+.+.}-{2:2}, at: __pte_offset_map_lock+0xa4/0x264
[ 75.537488] irq event stamp: 1761
[ 75.540890] hardirqs last enabled at (1760): [<ffffffc080bc1f78>] percpu_counter_add_batch+0x160/0x1b8
[ 75.550465] hardirqs last disabled at (1761): [<ffffffc081aca990>] el1_interrupt+0x24/0x50
[ 75.558893] softirqs last enabled at (1534): [<ffffffc08001071c>] __do_softirq+0x47c/0x500
[ 75.567382] softirqs last disabled at (1529): [<ffffffc080017134>] ____do_softirq+0x10/0x1c
[ 75.575889]
[ 75.577460] read to 0xffffffc082aa1048 of 4 bytes by interrupt on cpu 2:
[ 75.584273] tick_sched_do_timer+0x48/0x168
[ 75.588596] tick_sched_timer+0x48/0xc8
[ 75.592572] __hrtimer_run_queues+0x200/0x5f0
[ 75.597050] hrtimer_interrupt+0x1b0/0x3f4
[ 75.601269] arch_timer_handler_phys+0x40/0x54
[ 75.605835] handle_percpu_devid_irq+0xf8/0x20c
[ 75.610531] generic_handle_domain_irq+0x48/0x64
[ 75.615287] gic_handle_irq+0xc4/0x100
[ 75.619150] call_on_irq_stack+0x24/0x4c
[ 75.623204] do_interrupt_handler+0xc0/0xc4
[ 75.627535] el1_interrupt+0x34/0x50
[ 75.631258] el1h_64_irq_handler+0x18/0x24
[ 75.635512] el1h_64_irq+0x64/0x68
[ 75.639027] lock_acquire+0x13c/0x2f0
[ 75.642820] count_memcg_event_mm.part.0+0x70/0x2a8
[ 75.647829] handle_mm_fault+0x1d8/0x374
[ 75.651874] do_page_fault+0x1f8/0x620
[ 75.655789] do_mem_abort+0x5c/0xe4
[ 75.659417] el0_da+0x5c/0xac
[ 75.662524] el0t_64_sync_handler+0xe4/0x158
[ 75.666951] el0t_64_sync+0x190/0x194
[ 75.670736]
[ 75.672307] 2 locks held by systemd/1:
[ 75.676152] #0: ffffff88042627b0 (&vma->vm_lock->lock){++++}-{3:3}, at: lock_vma_under_rcu+0x15c/0x5c0
[ 75.685900] #1: ffffffc082b042f0 (rcu_read_lock){....}-{1:2}, at: count_memcg_event_mm.part.0+0x0/0x2a8
[ 75.695726] irq event stamp: 576347
[ 75.699302] hardirqs last enabled at (576346): [<ffffffc08001af70>] local_daif_restore+0x38/0x54
[ 75.708338] hardirqs last disabled at (576347): [<ffffffc081aca990>] el1_interrupt+0x24/0x50
[ 75.716940] softirqs last enabled at (576161): [<ffffffc08001071c>] __do_softirq+0x47c/0x500
[ 75.725602] softirqs last disabled at (576154): [<ffffffc080017134>] ____do_softirq+0x10/0x1c
[ 75.734283]
[ 75.735854] value changed: 0xffffffff -> 0x00000000
[ 75.740836]
[ 75.742407] Reported by Kernel Concurrency Sanitizer on:
[ 75.747808] CPU: 2 PID: 1 Comm: systemd Not tainted 6.6.20+ #101
[ 75.754290] Hardware name: xlnx,zynqmp (DT)
[ 75.758563] ==================================================================
The justification seems to be in tick_sched_do_timer:
/*
* Check if the do_timer duty was dropped. We don't care about
* concurrency: This happens only when the CPU in charge went
* into a long sleep. If two CPUs happen to assign themselves to
* this duty, then the jiffies update is still serialized by
* jiffies_lock.
*
* If nohz_full is enabled, this should not happen because the
* tick_do_timer_cpu never relinquishes.
*/
with the other assignment being in tick_nohz_stop_tick. I'm not familiar
enough with this code to say whether we should be using READ/WRITE_ONCE
or maybe just data_race (as would be implied by the comment above).
--Sean
[Embedded World 2024, SECO SpA]<https://www.messe-ticket.de/Nuernberg/embeddedworld2024/Register/ew24517689>
Hi, Sean,
I am reducing Cc: so if you could please attach the decode_stacktrace.sh output of your build.
Thanks,
Mirsad
On 3/7/24 20:43, Sean Anderson wrote:
> On 8/18/23 10:15, Mirsad Todorovac wrote:
>>
>>
>> On 8/17/23 21:32, Mirsad Todorovac wrote:
>>> Hi,
>>>
>>> This is your friendly bug reporter. :-)
>>>
>>> The environment is the vanilla torvalds kernel on Ubuntu 22.04 LTS and a Ryzen 7950X assembled box.
>>>
>>> [ 7145.500079] ==================================================================
>>> [ 7145.500109] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event
>>>
>>> [ 7145.500139] write to 0xffffffff8a2bda30 of 4 bytes by task 0 on cpu 26:
>>> [ 7145.500155] tick_nohz_idle_stop_tick+0x3b1/0x4a0
>>> [ 7145.500173] do_idle+0x1e3/0x250
>>> [ 7145.500186] cpu_startup_entry+0x20/0x30
>>> [ 7145.500199] start_secondary+0x129/0x160
>>> [ 7145.500216] secondary_startup_64_no_verify+0x17e/0x18b
>>>
>>> [ 7145.500245] read to 0xffffffff8a2bda30 of 4 bytes by task 0 on cpu 16:
>>> [ 7145.500261] tick_nohz_next_event+0xe7/0x1e0
>>> [ 7145.500277] tick_nohz_get_sleep_length+0xa7/0xe0
>>> [ 7145.500293] menu_select+0x82/0xb90
>>> [ 7145.500309] cpuidle_select+0x44/0x60
>>> [ 7145.500324] do_idle+0x1c2/0x250
>>> [ 7145.500336] cpu_startup_entry+0x20/0x30
>>> [ 7145.500350] start_secondary+0x129/0x160
>>> [ 7145.500367] secondary_startup_64_no_verify+0x17e/0x18b
>>>
>>> [ 7145.500392] value changed: 0x0000001a -> 0xffffffff
>>>
>>> [ 7145.500411] Reported by Kernel Concurrency Sanitizer on:
>>> [ 7145.500421] CPU: 16 PID: 0 Comm: swapper/16 Tainted: G L 6.5.0-rc6-net-cfg-kcsan-00038-g16931859a650 #35
>>> [ 7145.500439] Hardware name: ASRock X670E PG Lightning/X670E PG Lightning, BIOS 1.21 04/26/2023
>>> [ 7145.500449] ==================================================================
>>>
>>> Please find dmesg log (what was available in the ring buffer) and the lshw output attached.
>>>
>>> NOTE: There are no proprietary drivers loaded and Canonical livepatch isn't working for this kernel,
>>> probably previous ACPI KCSAN BUGs turn this taint on.
>>>
>>> Thank you very much for your time evaluating this report.
>>
>> P.S.
>>
>> According to Mr. Heo, I will add the decoded stacktrace to this bug report, which ought to have been done
>> in the first place :-/
>>
>> [ 7145.500079] ==================================================================
>> [ 7145.500109] BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event
>>
>> [ 7145.500139] write to 0xffffffff8a2bda30 of 4 bytes by task 0 on cpu 26:
>> [ 7145.500155] tick_nohz_idle_stop_tick (kernel/time/tick-sched.c:904 kernel/time/tick-sched.c:1130)
>> [ 7145.500173] do_idle (kernel/sched/idle.c:215 kernel/sched/idle.c:282)
>> [ 7145.500186] cpu_startup_entry (kernel/sched/idle.c:378 (discriminator 1))
>> [ 7145.500199] start_secondary (arch/x86/kernel/smpboot.c:210 arch/x86/kernel/smpboot.c:294)
>> [ 7145.500216] secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:441)
>>
>> [ 7145.500245] read to 0xffffffff8a2bda30 of 4 bytes by task 0 on cpu 16:
>> [ 7145.500261] tick_nohz_next_event (kernel/time/tick-sched.c:868)
>> [ 7145.500277] tick_nohz_get_sleep_length (kernel/time/tick-sched.c:1250)
>> [ 7145.500293] menu_select (drivers/cpuidle/governors/menu.c:283)
>> [ 7145.500309] cpuidle_select (drivers/cpuidle/cpuidle.c:359)
>> [ 7145.500324] do_idle (kernel/sched/idle.c:208 kernel/sched/idle.c:282)
>> [ 7145.500336] cpu_startup_entry (kernel/sched/idle.c:378 (discriminator 1))
>> [ 7145.500350] start_secondary (arch/x86/kernel/smpboot.c:210 arch/x86/kernel/smpboot.c:294)
>> [ 7145.500367] secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:441)
>>
>> [ 7145.500392] value changed: 0x0000001a -> 0xffffffff
>>
>> [ 7145.500411] Reported by Kernel Concurrency Sanitizer on:
>> [ 7145.500421] CPU: 16 PID: 0 Comm: swapper/16 Tainted: G L 6.5.0-rc6-net-cfg-kcsan-00038-g16931859a650 #35
>> [ 7145.500439] Hardware name: ASRock X670E PG Lightning/X670E PG Lightning, BIOS 1.21 04/26/2023
>> [ 7145.500449] ==================================================================
>>
>> The code seems to be:
>>
>>
>> /*
>> * If this CPU is the one which updates jiffies, then give up
>> * the assignment and let it be taken by the CPU which runs
>> * the tick timer next, which might be this CPU as well. If we
>> * don't drop this here the jiffies might be stale and
>> * do_timer() never invoked. Keep track of the fact that it
>> * was the one which had the do_timer() duty last.
>> */
>> if (cpu == tick_do_timer_cpu) {
>> tick_do_timer_cpu = TICK_DO_TIMER_NONE;
>> → ts->do_timer_last = 1;
>> } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
>> ts->do_timer_last = 0;
>>
>> }
>>
>> but I am not sure why this happens.
>>
>> Maybe a WRITE_ONCE(ts->do_timer_last, 1) is required?
>>
>> Or is it another KCSAN false positive ...
>
> I have seen a few more KCSAN reports regarding this variable:
>
> [ 153.285199] ==================================================================
> [ 153.292529] BUG: KCSAN: data-race in tick_nohz_next_event+0x15c/0x29c
> [ 153.299118]
> [ 153.300685] race at unknown origin, with read to 0xffffffc082aa1048 of 4 bytes by task 0 on cpu 2:
> [ 153.309760] tick_nohz_next_event+0x15c/0x29c
> [ 153.314256] tick_nohz_get_sleep_length+0xd8/0x118
> [ 153.319186] menu_select+0x1e0/0xc08
> [ 153.322875] cpuidle_select+0x4c/0x60
> [ 153.326686] do_idle+0x1e0/0x2b4
> [ 153.330062] cpu_startup_entry+0x34/0x3c
> [ 153.334133] secondary_start_kernel+0x1e8/0x21c
> [ 153.338821] __secondary_switched+0xb8/0xbc
> [ 153.343143]
> [ 153.344706] no locks held by swapper/2/0.
> [ 153.348802] irq event stamp: 129586
> [ 153.352370] hardirqs last enabled at (129585): [<ffffffc081acaa2c>] el1_interrupt+0x40/0x50
> [ 153.360964] hardirqs last disabled at (129586): [<ffffffc0800d8fec>] do_idle+0xc8/0x2b4
> [ 153.369132] softirqs last enabled at (129560): [<ffffffc08001071c>] __do_softirq+0x47c/0x500
> [ 153.377786] softirqs last disabled at (129549): [<ffffffc080017134>] ____do_softirq+0x10/0x1c
> [ 153.386457]
> [ 153.388019] value changed: 0xffffffff -> 0x00000001
> [ 153.392993]
> [ 153.394555] Reported by Kernel Concurrency Sanitizer on:
> [ 153.399947] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 6.6.20+ #102
> [ 153.406595] Hardware name: xlnx,zynqmp (DT)
> [ 153.410866] ==================================================================
>
> and:
>
> [ 75.362055] ==================================================================
> [ 75.369461] BUG: KCSAN: data-race in tick_sched_do_timer / tick_sched_do_timer
> [ 75.376854]
> [ 75.378426] write to 0xffffffc082aa1048 of 4 bytes by interrupt on cpu 1:
> [ 75.385335] tick_sched_do_timer+0x124/0x168
> [ 75.389744] tick_sched_timer+0x48/0xc8
> [ 75.393720] __hrtimer_run_queues+0x200/0x5f0
> [ 75.398198] hrtimer_interrupt+0x1b0/0x3f4
> [ 75.402417] arch_timer_handler_phys+0x40/0x54
> [ 75.406983] handle_percpu_devid_irq+0xf8/0x20c
> [ 75.411679] generic_handle_domain_irq+0x48/0x64
> [ 75.416435] gic_handle_irq+0xc4/0x100
> [ 75.420298] call_on_irq_stack+0x24/0x4c
> [ 75.424360] do_interrupt_handler+0xc0/0xc4
> [ 75.428692] el1_interrupt+0x34/0x50
> [ 75.432415] el1h_64_irq_handler+0x18/0x24
> [ 75.436668] el1h_64_irq+0x64/0x68
> [ 75.440184] percpu_counter_add_batch+0xd8/0x1b8
> [ 75.444967] set_pte_range+0x14c/0x2c8
> [ 75.448838] filemap_map_pages+0x2dc/0xa38
> [ 75.453091] __handle_mm_fault+0x1304/0x17dc
> [ 75.457492] handle_mm_fault+0xa0/0x374
> [ 75.461450] do_page_fault+0x1f8/0x620
> [ 75.465365] do_translation_fault+0x118/0x138
> [ 75.469887] do_mem_abort+0x5c/0xe4
> [ 75.473516] el0_da+0x5c/0xac
> [ 75.476623] el0t_64_sync_handler+0xe4/0x158
> [ 75.481050] el0t_64_sync+0x190/0x194
> [ 75.484834]
> [ 75.486405] 5 locks held by (d-logind)/293:
> [ 75.490693] #0: ffffff88076753c0 (&vma->vm_lock->lock){++++}-{3:3}, at: lock_vma_under_rcu+0x15c/0x5c0
> [ 75.500441] #1: ffffffc082b042f0 (rcu_read_lock){....}-{1:2}, at: __handle_mm_fault+0x1260/0x17dc
> [ 75.509755] #2: ffffffc082b042f0 (rcu_read_lock){....}-{1:2}, at: filemap_map_pages+0x0/0xa38
> [ 75.518748] #3: ffffffc082b042f0 (rcu_read_lock){....}-{1:2}, at: __pte_offset_map+0x0/0x2a0
> [ 75.527662] #4: ffffff880762b018 (ptlock_ptr(ptdesc)){+.+.}-{2:2}, at: __pte_offset_map_lock+0xa4/0x264
> [ 75.537488] irq event stamp: 1761
> [ 75.540890] hardirqs last enabled at (1760): [<ffffffc080bc1f78>] percpu_counter_add_batch+0x160/0x1b8
> [ 75.550465] hardirqs last disabled at (1761): [<ffffffc081aca990>] el1_interrupt+0x24/0x50
> [ 75.558893] softirqs last enabled at (1534): [<ffffffc08001071c>] __do_softirq+0x47c/0x500
> [ 75.567382] softirqs last disabled at (1529): [<ffffffc080017134>] ____do_softirq+0x10/0x1c
> [ 75.575889]
> [ 75.577460] read to 0xffffffc082aa1048 of 4 bytes by interrupt on cpu 2:
> [ 75.584273] tick_sched_do_timer+0x48/0x168
> [ 75.588596] tick_sched_timer+0x48/0xc8
> [ 75.592572] __hrtimer_run_queues+0x200/0x5f0
> [ 75.597050] hrtimer_interrupt+0x1b0/0x3f4
> [ 75.601269] arch_timer_handler_phys+0x40/0x54
> [ 75.605835] handle_percpu_devid_irq+0xf8/0x20c
> [ 75.610531] generic_handle_domain_irq+0x48/0x64
> [ 75.615287] gic_handle_irq+0xc4/0x100
> [ 75.619150] call_on_irq_stack+0x24/0x4c
> [ 75.623204] do_interrupt_handler+0xc0/0xc4
> [ 75.627535] el1_interrupt+0x34/0x50
> [ 75.631258] el1h_64_irq_handler+0x18/0x24
> [ 75.635512] el1h_64_irq+0x64/0x68
> [ 75.639027] lock_acquire+0x13c/0x2f0
> [ 75.642820] count_memcg_event_mm.part.0+0x70/0x2a8
> [ 75.647829] handle_mm_fault+0x1d8/0x374
> [ 75.651874] do_page_fault+0x1f8/0x620
> [ 75.655789] do_mem_abort+0x5c/0xe4
> [ 75.659417] el0_da+0x5c/0xac
> [ 75.662524] el0t_64_sync_handler+0xe4/0x158
> [ 75.666951] el0t_64_sync+0x190/0x194
> [ 75.670736]
> [ 75.672307] 2 locks held by systemd/1:
> [ 75.676152] #0: ffffff88042627b0 (&vma->vm_lock->lock){++++}-{3:3}, at: lock_vma_under_rcu+0x15c/0x5c0
> [ 75.685900] #1: ffffffc082b042f0 (rcu_read_lock){....}-{1:2}, at: count_memcg_event_mm.part.0+0x0/0x2a8
> [ 75.695726] irq event stamp: 576347
> [ 75.699302] hardirqs last enabled at (576346): [<ffffffc08001af70>] local_daif_restore+0x38/0x54
> [ 75.708338] hardirqs last disabled at (576347): [<ffffffc081aca990>] el1_interrupt+0x24/0x50
> [ 75.716940] softirqs last enabled at (576161): [<ffffffc08001071c>] __do_softirq+0x47c/0x500
> [ 75.725602] softirqs last disabled at (576154): [<ffffffc080017134>] ____do_softirq+0x10/0x1c
> [ 75.734283]
> [ 75.735854] value changed: 0xffffffff -> 0x00000000
> [ 75.740836]
> [ 75.742407] Reported by Kernel Concurrency Sanitizer on:
> [ 75.747808] CPU: 2 PID: 1 Comm: systemd Not tainted 6.6.20+ #101
> [ 75.754290] Hardware name: xlnx,zynqmp (DT)
> [ 75.758563] ==================================================================
>
> The justification seems to be in tick_sched_do_timer:
>
> /*
> * Check if the do_timer duty was dropped. We don't care about
> * concurrency: This happens only when the CPU in charge went
> * into a long sleep. If two CPUs happen to assign themselves to
> * this duty, then the jiffies update is still serialized by
> * jiffies_lock.
> *
> * If nohz_full is enabled, this should not happen because the
> * tick_do_timer_cpu never relinquishes.
> */
>
> with the other assignment being in tick_nohz_stop_tick. I'm not familiar
> enough with this code to say whether we should be using READ/WRITE_ONCE
> or maybe just data_race (as would be implied by the comment above).
>
> --Sean
>
> [Embedded World 2024, SECO SpA]<https://www.messe-ticket.de/Nuernberg/embeddedworld2024/Register/ew24517689>
On Thu, Mar 07 2024 at 14:43, Sean Anderson wrote:
> On 8/18/23 10:15, Mirsad Todorovac wrote:
>
> The justification seems to be in tick_sched_do_timer:
>
> /*
> * Check if the do_timer duty was dropped. We don't care about
> * concurrency: This happens only when the CPU in charge went
> * into a long sleep. If two CPUs happen to assign themselves to
> * this duty, then the jiffies update is still serialized by
> * jiffies_lock.
> *
> * If nohz_full is enabled, this should not happen because the
> * tick_do_timer_cpu never relinquishes.
> */
>
> with the other assignment being in tick_nohz_stop_tick. I'm not familiar
> enough with this code to say whether we should be using READ/WRITE_ONCE
> or maybe just data_race (as would be implied by the comment above).
It wants to be READ/WRITE_ONCE(). Something like the untested below,
which applies to -next or the master branch of the tip tree.
Thanks,
tglx
---
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -7,6 +7,7 @@
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*/
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
@@ -84,7 +85,7 @@ int tick_is_oneshot_available(void)
*/
static void tick_periodic(int cpu)
{
- if (tick_do_timer_cpu == cpu) {
+ if (READ_ONCE(tick_do_timer_cpu) == cpu) {
raw_spin_lock(&jiffies_lock);
write_seqcount_begin(&jiffies_seq);
@@ -215,8 +216,8 @@ static void tick_setup_device(struct tic
* If no cpu took the do_timer update, assign it to
* this cpu:
*/
- if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
- tick_do_timer_cpu = cpu;
+ if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
tick_next_period = ktime_get();
#ifdef CONFIG_NO_HZ_FULL
/*
@@ -232,7 +233,7 @@ static void tick_setup_device(struct tic
!tick_nohz_full_cpu(cpu)) {
tick_take_do_timer_from_boot();
tick_do_timer_boot_cpu = -1;
- WARN_ON(tick_do_timer_cpu != cpu);
+ WARN_ON(READ_ON_ONCE(tick_do_timer_cpu) != cpu);
#endif
}
@@ -406,10 +407,10 @@ void tick_assert_timekeeping_handover(vo
int tick_cpu_dying(unsigned int dying_cpu)
{
/*
- * If the current CPU is the timekeeper, it's the only one that
- * can safely hand over its duty. Also all online CPUs are in
- * stop machine, guaranteed not to be idle, therefore it's safe
- * to pick any online successor.
+ * If the current CPU is the timekeeper, it's the only one that can
+ * safely hand over its duty. Also all online CPUs are in stop
+ * machine, guaranteed not to be idle, therefore there is no
+ * concurrency and it's safe to pick any online successor.
*/
if (tick_do_timer_cpu == dying_cpu)
tick_do_timer_cpu = cpumask_first(cpu_online_mask);
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -8,6 +8,7 @@
*
* Started by: Thomas Gleixner and Ingo Molnar
*/
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
@@ -204,7 +205,7 @@ static inline void tick_sched_flag_clear
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
- int cpu = smp_processor_id();
+ int tick_cpu, cpu = smp_processor_id();
/*
* Check if the do_timer duty was dropped. We don't care about
@@ -216,16 +217,18 @@ static void tick_sched_do_timer(struct t
* If nohz_full is enabled, this should not happen because the
* 'tick_do_timer_cpu' CPU never relinquishes.
*/
- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
- unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && unlikely(tick_cpu == TICK_DO_TIMER_NONE)) {
#ifdef CONFIG_NO_HZ_FULL
WARN_ON_ONCE(tick_nohz_full_running);
#endif
- tick_do_timer_cpu = cpu;
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
+ tick_cpu = cpu;
}
/* Check if jiffies need an update */
- if (tick_do_timer_cpu == cpu)
+ if (tick_cpu == cpu)
tick_do_update_jiffies64(now);
/*
@@ -610,7 +613,7 @@ bool tick_nohz_cpu_hotpluggable(unsigned
* timers, workqueues, timekeeping, ...) on behalf of full dynticks
* CPUs. It must remain online when nohz full is enabled.
*/
- if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
+ if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu)
return false;
return true;
}
@@ -890,6 +893,7 @@ static ktime_t tick_nohz_next_event(stru
{
u64 basemono, next_tick, delta, expires;
unsigned long basejiff;
+ int tick_cpu;
basemono = get_jiffies_update(&basejiff);
ts->last_jiffies = basejiff;
@@ -946,9 +950,9 @@ static ktime_t tick_nohz_next_event(stru
* Otherwise we can sleep as long as we want.
*/
delta = timekeeping_max_deferment();
- if (cpu != tick_do_timer_cpu &&
- (tick_do_timer_cpu != TICK_DO_TIMER_NONE ||
- !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+ if (tick_cpu != cpu &&
+ (tick_cpu != TICK_DO_TIMER_NONE || !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
delta = KTIME_MAX;
/* Calculate the next expiry time */
@@ -969,6 +973,7 @@ static void tick_nohz_stop_tick(struct t
unsigned long basejiff = ts->last_jiffies;
u64 basemono = ts->timer_expires_base;
bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
+ int tick_cpu;
u64 expires;
/* Make sure we won't be trying to stop it twice in a row. */
@@ -1006,10 +1011,11 @@ static void tick_nohz_stop_tick(struct t
* do_timer() never gets invoked. Keep track of the fact that it
* was the one which had the do_timer() duty last.
*/
- if (cpu == tick_do_timer_cpu) {
- tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+ if (tick_cpu == cpu) {
+ WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST);
- } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+ } else if (tick_cpu != TICK_DO_TIMER_NONE) {
tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST);
}
@@ -1172,15 +1178,17 @@ static bool can_stop_idle_tick(int cpu,
return false;
if (tick_nohz_full_enabled()) {
+ int tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
/*
* Keep the tick alive to guarantee timekeeping progression
* if there are full dynticks CPUs around
*/
- if (tick_do_timer_cpu == cpu)
+ if (tick_cpu == cpu)
return false;
/* Should not happen for nohz-full */
- if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
+ if (WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE))
return false;
}
On 3/11/24 10:48, Thomas Gleixner wrote:
> On Thu, Mar 07 2024 at 14:43, Sean Anderson wrote:
>> On 8/18/23 10:15, Mirsad Todorovac wrote:
>>
>> The justification seems to be in tick_sched_do_timer:
>>
>> /*
>> * Check if the do_timer duty was dropped. We don't care about
>> * concurrency: This happens only when the CPU in charge went
>> * into a long sleep. If two CPUs happen to assign themselves to
>> * this duty, then the jiffies update is still serialized by
>> * jiffies_lock.
>> *
>> * If nohz_full is enabled, this should not happen because the
>> * tick_do_timer_cpu never relinquishes.
>> */
>>
>> with the other assignment being in tick_nohz_stop_tick. I'm not familiar
>> enough with this code to say whether we should be using READ/WRITE_ONCE
>> or maybe just data_race (as would be implied by the comment above).
>
> It wants to be READ/WRITE_ONCE(). Something like the untested below,
> which applies to -next or the master branch of the tip tree.
>
> Thanks,
>
> tglx
> ---
>
> --- a/kernel/time/tick-common.c
> +++ b/kernel/time/tick-common.c
> @@ -7,6 +7,7 @@
> * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
> * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
> */
> +#include <linux/compiler.h>
> #include <linux/cpu.h>
> #include <linux/err.h>
> #include <linux/hrtimer.h>
> @@ -84,7 +85,7 @@ int tick_is_oneshot_available(void)
> */
> static void tick_periodic(int cpu)
> {
> - if (tick_do_timer_cpu == cpu) {
> + if (READ_ONCE(tick_do_timer_cpu) == cpu) {
> raw_spin_lock(&jiffies_lock);
> write_seqcount_begin(&jiffies_seq);
>
> @@ -215,8 +216,8 @@ static void tick_setup_device(struct tic
> * If no cpu took the do_timer update, assign it to
> * this cpu:
> */
> - if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
> - tick_do_timer_cpu = cpu;
> + if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
> + WRITE_ONCE(tick_do_timer_cpu, cpu);
> tick_next_period = ktime_get();
> #ifdef CONFIG_NO_HZ_FULL
> /*
> @@ -232,7 +233,7 @@ static void tick_setup_device(struct tic
> !tick_nohz_full_cpu(cpu)) {
> tick_take_do_timer_from_boot();
> tick_do_timer_boot_cpu = -1;
> - WARN_ON(tick_do_timer_cpu != cpu);
> + WARN_ON(READ_ON_ONCE(tick_do_timer_cpu) != cpu);
> #endif
> }
>
> @@ -406,10 +407,10 @@ void tick_assert_timekeeping_handover(vo
> int tick_cpu_dying(unsigned int dying_cpu)
> {
> /*
> - * If the current CPU is the timekeeper, it's the only one that
> - * can safely hand over its duty. Also all online CPUs are in
> - * stop machine, guaranteed not to be idle, therefore it's safe
> - * to pick any online successor.
> + * If the current CPU is the timekeeper, it's the only one that can
> + * safely hand over its duty. Also all online CPUs are in stop
> + * machine, guaranteed not to be idle, therefore there is no
> + * concurrency and it's safe to pick any online successor.
> */
> if (tick_do_timer_cpu == dying_cpu)
> tick_do_timer_cpu = cpumask_first(cpu_online_mask);
> --- a/kernel/time/tick-sched.c
> +++ b/kernel/time/tick-sched.c
> @@ -8,6 +8,7 @@
> *
> * Started by: Thomas Gleixner and Ingo Molnar
> */
> +#include <linux/compiler.h>
> #include <linux/cpu.h>
> #include <linux/err.h>
> #include <linux/hrtimer.h>
> @@ -204,7 +205,7 @@ static inline void tick_sched_flag_clear
>
> static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
> {
> - int cpu = smp_processor_id();
> + int tick_cpu, cpu = smp_processor_id();
>
> /*
> * Check if the do_timer duty was dropped. We don't care about
> @@ -216,16 +217,18 @@ static void tick_sched_do_timer(struct t
> * If nohz_full is enabled, this should not happen because the
> * 'tick_do_timer_cpu' CPU never relinquishes.
> */
> - if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
> - unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
> + tick_cpu = READ_ONCE(tick_do_timer_cpu);
> +
> + if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && unlikely(tick_cpu == TICK_DO_TIMER_NONE)) {
> #ifdef CONFIG_NO_HZ_FULL
> WARN_ON_ONCE(tick_nohz_full_running);
> #endif
> - tick_do_timer_cpu = cpu;
> + WRITE_ONCE(tick_do_timer_cpu, cpu);
> + tick_cpu = cpu;
> }
>
> /* Check if jiffies need an update */
> - if (tick_do_timer_cpu == cpu)
> + if (tick_cpu == cpu)
> tick_do_update_jiffies64(now);
>
> /*
> @@ -610,7 +613,7 @@ bool tick_nohz_cpu_hotpluggable(unsigned
> * timers, workqueues, timekeeping, ...) on behalf of full dynticks
> * CPUs. It must remain online when nohz full is enabled.
> */
> - if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
> + if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu)
> return false;
> return true;
> }
> @@ -890,6 +893,7 @@ static ktime_t tick_nohz_next_event(stru
> {
> u64 basemono, next_tick, delta, expires;
> unsigned long basejiff;
> + int tick_cpu;
>
> basemono = get_jiffies_update(&basejiff);
> ts->last_jiffies = basejiff;
> @@ -946,9 +950,9 @@ static ktime_t tick_nohz_next_event(stru
> * Otherwise we can sleep as long as we want.
> */
> delta = timekeeping_max_deferment();
> - if (cpu != tick_do_timer_cpu &&
> - (tick_do_timer_cpu != TICK_DO_TIMER_NONE ||
> - !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
> + tick_cpu = READ_ONCE(tick_do_timer_cpu);
> + if (tick_cpu != cpu &&
> + (tick_cpu != TICK_DO_TIMER_NONE || !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
> delta = KTIME_MAX;
>
> /* Calculate the next expiry time */
> @@ -969,6 +973,7 @@ static void tick_nohz_stop_tick(struct t
> unsigned long basejiff = ts->last_jiffies;
> u64 basemono = ts->timer_expires_base;
> bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
> + int tick_cpu;
> u64 expires;
>
> /* Make sure we won't be trying to stop it twice in a row. */
> @@ -1006,10 +1011,11 @@ static void tick_nohz_stop_tick(struct t
> * do_timer() never gets invoked. Keep track of the fact that it
> * was the one which had the do_timer() duty last.
> */
> - if (cpu == tick_do_timer_cpu) {
> - tick_do_timer_cpu = TICK_DO_TIMER_NONE;
> + tick_cpu = READ_ONCE(tick_do_timer_cpu);
> + if (tick_cpu == cpu) {
> + WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
> tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST);
> - } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
> + } else if (tick_cpu != TICK_DO_TIMER_NONE) {
> tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST);
> }
>
> @@ -1172,15 +1178,17 @@ static bool can_stop_idle_tick(int cpu,
> return false;
>
> if (tick_nohz_full_enabled()) {
> + int tick_cpu = READ_ONCE(tick_do_timer_cpu);
> +
> /*
> * Keep the tick alive to guarantee timekeeping progression
> * if there are full dynticks CPUs around
> */
> - if (tick_do_timer_cpu == cpu)
> + if (tick_cpu == cpu)
> return false;
>
> /* Should not happen for nohz-full */
> - if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
> + if (WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE))
> return false;
> }
This fixes the issue for me (tested on 6.6; tweaked a bit for conflicts).
--Sean
[Embedded World 2024, SECO SpA]<https://www.messe-ticket.de/Nuernberg/embeddedworld2024/Register/ew24517689>
tick_do_timer_cpu is used lockless to check which CPU needs to take care
of the per tick timekeeping duty. This is done to avoid a thundering
herd problem on jiffies_lock.
The read and writes are not annotated so KCSAN complains about data races:
BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event
write to 0xffffffff8a2bda30 of 4 bytes by task 0 on cpu 26:
tick_nohz_idle_stop_tick+0x3b1/0x4a0
do_idle+0x1e3/0x250
read to 0xffffffff8a2bda30 of 4 bytes by task 0 on cpu 16:
tick_nohz_next_event+0xe7/0x1e0
tick_nohz_get_sleep_length+0xa7/0xe0
menu_select+0x82/0xb90
cpuidle_select+0x44/0x60
do_idle+0x1c2/0x250
value changed: 0x0000001a -> 0xffffffff
Annotate them with READ/WRITE_ONCE() to document the intentional data race.
Reported-by: Mirsad Todorovac <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Sean Anderson <[email protected]>
---
kernel/time/tick-common.c | 17 +++++++++--------
kernel/time/tick-sched.c | 36 ++++++++++++++++++++++--------------
2 files changed, 31 insertions(+), 22 deletions(-)
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -7,6 +7,7 @@
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*/
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
@@ -84,7 +85,7 @@ int tick_is_oneshot_available(void)
*/
static void tick_periodic(int cpu)
{
- if (tick_do_timer_cpu == cpu) {
+ if (READ_ONCE(tick_do_timer_cpu) == cpu) {
raw_spin_lock(&jiffies_lock);
write_seqcount_begin(&jiffies_seq);
@@ -215,8 +216,8 @@ static void tick_setup_device(struct tic
* If no cpu took the do_timer update, assign it to
* this cpu:
*/
- if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
- tick_do_timer_cpu = cpu;
+ if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
tick_next_period = ktime_get();
#ifdef CONFIG_NO_HZ_FULL
/*
@@ -232,7 +233,7 @@ static void tick_setup_device(struct tic
!tick_nohz_full_cpu(cpu)) {
tick_take_do_timer_from_boot();
tick_do_timer_boot_cpu = -1;
- WARN_ON(tick_do_timer_cpu != cpu);
+ WARN_ON(READ_ON_ONCE(tick_do_timer_cpu) != cpu);
#endif
}
@@ -406,10 +407,10 @@ void tick_assert_timekeeping_handover(vo
int tick_cpu_dying(unsigned int dying_cpu)
{
/*
- * If the current CPU is the timekeeper, it's the only one that
- * can safely hand over its duty. Also all online CPUs are in
- * stop machine, guaranteed not to be idle, therefore it's safe
- * to pick any online successor.
+ * If the current CPU is the timekeeper, it's the only one that can
+ * safely hand over its duty. Also all online CPUs are in stop
+ * machine, guaranteed not to be idle, therefore there is no
+ * concurrency and it's safe to pick any online successor.
*/
if (tick_do_timer_cpu == dying_cpu)
tick_do_timer_cpu = cpumask_first(cpu_online_mask);
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -8,6 +8,7 @@
*
* Started by: Thomas Gleixner and Ingo Molnar
*/
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
@@ -204,7 +205,7 @@ static inline void tick_sched_flag_clear
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
- int cpu = smp_processor_id();
+ int tick_cpu, cpu = smp_processor_id();
/*
* Check if the do_timer duty was dropped. We don't care about
@@ -216,16 +217,18 @@ static void tick_sched_do_timer(struct t
* If nohz_full is enabled, this should not happen because the
* 'tick_do_timer_cpu' CPU never relinquishes.
*/
- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
- unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && unlikely(tick_cpu == TICK_DO_TIMER_NONE)) {
#ifdef CONFIG_NO_HZ_FULL
WARN_ON_ONCE(tick_nohz_full_running);
#endif
- tick_do_timer_cpu = cpu;
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
+ tick_cpu = cpu;
}
/* Check if jiffies need an update */
- if (tick_do_timer_cpu == cpu)
+ if (tick_cpu == cpu)
tick_do_update_jiffies64(now);
/*
@@ -610,7 +613,7 @@ bool tick_nohz_cpu_hotpluggable(unsigned
* timers, workqueues, timekeeping, ...) on behalf of full dynticks
* CPUs. It must remain online when nohz full is enabled.
*/
- if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
+ if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu)
return false;
return true;
}
@@ -891,6 +894,7 @@ static ktime_t tick_nohz_next_event(stru
{
u64 basemono, next_tick, delta, expires;
unsigned long basejiff;
+ int tick_cpu;
basemono = get_jiffies_update(&basejiff);
ts->last_jiffies = basejiff;
@@ -947,9 +951,9 @@ static ktime_t tick_nohz_next_event(stru
* Otherwise we can sleep as long as we want.
*/
delta = timekeeping_max_deferment();
- if (cpu != tick_do_timer_cpu &&
- (tick_do_timer_cpu != TICK_DO_TIMER_NONE ||
- !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+ if (tick_cpu != cpu &&
+ (tick_cpu != TICK_DO_TIMER_NONE || !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
delta = KTIME_MAX;
/* Calculate the next expiry time */
@@ -970,6 +974,7 @@ static void tick_nohz_stop_tick(struct t
unsigned long basejiff = ts->last_jiffies;
u64 basemono = ts->timer_expires_base;
bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
+ int tick_cpu;
u64 expires;
/* Make sure we won't be trying to stop it twice in a row. */
@@ -1007,10 +1012,11 @@ static void tick_nohz_stop_tick(struct t
* do_timer() never gets invoked. Keep track of the fact that it
* was the one which had the do_timer() duty last.
*/
- if (cpu == tick_do_timer_cpu) {
- tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+ if (tick_cpu == cpu) {
+ WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST);
- } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+ } else if (tick_cpu != TICK_DO_TIMER_NONE) {
tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST);
}
@@ -1173,15 +1179,17 @@ static bool can_stop_idle_tick(int cpu,
return false;
if (tick_nohz_full_enabled()) {
+ int tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
/*
* Keep the tick alive to guarantee timekeeping progression
* if there are full dynticks CPUs around
*/
- if (tick_do_timer_cpu == cpu)
+ if (tick_cpu == cpu)
return false;
/* Should not happen for nohz-full */
- if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
+ if (WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE))
return false;
}
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: 0200ddd723d6056d0b1c1ebde4b11e75a0f2ed7e
Gitweb: https://git.kernel.org/tip/0200ddd723d6056d0b1c1ebde4b11e75a0f2ed7e
Author: Thomas Gleixner <[email protected]>
AuthorDate: Tue, 09 Apr 2024 12:29:12 +02:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Tue, 09 Apr 2024 23:09:44 +02:00
timekeeping: Use READ/WRITE_ONCE() for tick_do_timer_cpu
tick_do_timer_cpu is used lockless to check which CPU needs to take care
of the per tick timekeeping duty. This is done to avoid a thundering
herd problem on jiffies_lock.
The read and writes are not annotated so KCSAN complains about data races:
BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event
write to 0xffffffff8a2bda30 of 4 bytes by task 0 on cpu 26:
tick_nohz_idle_stop_tick+0x3b1/0x4a0
do_idle+0x1e3/0x250
read to 0xffffffff8a2bda30 of 4 bytes by task 0 on cpu 16:
tick_nohz_next_event+0xe7/0x1e0
tick_nohz_get_sleep_length+0xa7/0xe0
menu_select+0x82/0xb90
cpuidle_select+0x44/0x60
do_idle+0x1c2/0x250
value changed: 0x0000001a -> 0xffffffff
Annotate them with READ/WRITE_ONCE() to document the intentional data race.
Reported-by: Mirsad Todorovac <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Sean Anderson <[email protected]>
Link: https://lore.kernel.org/r/87cyqy7rt3.ffs@tglx
---
kernel/time/tick-common.c | 17 +++++++++--------
kernel/time/tick-sched.c | 36 ++++++++++++++++++++++--------------
2 files changed, 31 insertions(+), 22 deletions(-)
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index fb0fdec..a285f4d 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -7,6 +7,7 @@
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*/
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
@@ -84,7 +85,7 @@ int tick_is_oneshot_available(void)
*/
static void tick_periodic(int cpu)
{
- if (tick_do_timer_cpu == cpu) {
+ if (READ_ONCE(tick_do_timer_cpu) == cpu) {
raw_spin_lock(&jiffies_lock);
write_seqcount_begin(&jiffies_seq);
@@ -215,8 +216,8 @@ static void tick_setup_device(struct tick_device *td,
* If no cpu took the do_timer update, assign it to
* this cpu:
*/
- if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
- tick_do_timer_cpu = cpu;
+ if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
tick_next_period = ktime_get();
#ifdef CONFIG_NO_HZ_FULL
/*
@@ -232,7 +233,7 @@ static void tick_setup_device(struct tick_device *td,
!tick_nohz_full_cpu(cpu)) {
tick_take_do_timer_from_boot();
tick_do_timer_boot_cpu = -1;
- WARN_ON(tick_do_timer_cpu != cpu);
+ WARN_ON(READ_ON_ONCE(tick_do_timer_cpu) != cpu);
#endif
}
@@ -406,10 +407,10 @@ void tick_assert_timekeeping_handover(void)
int tick_cpu_dying(unsigned int dying_cpu)
{
/*
- * If the current CPU is the timekeeper, it's the only one that
- * can safely hand over its duty. Also all online CPUs are in
- * stop machine, guaranteed not to be idle, therefore it's safe
- * to pick any online successor.
+ * If the current CPU is the timekeeper, it's the only one that can
+ * safely hand over its duty. Also all online CPUs are in stop
+ * machine, guaranteed not to be idle, therefore there is no
+ * concurrency and it's safe to pick any online successor.
*/
if (tick_do_timer_cpu == dying_cpu)
tick_do_timer_cpu = cpumask_first(cpu_online_mask);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1331216..71a792c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -8,6 +8,7 @@
*
* Started by: Thomas Gleixner and Ingo Molnar
*/
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
@@ -204,7 +205,7 @@ static inline void tick_sched_flag_clear(struct tick_sched *ts,
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
- int cpu = smp_processor_id();
+ int tick_cpu, cpu = smp_processor_id();
/*
* Check if the do_timer duty was dropped. We don't care about
@@ -216,16 +217,18 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
* If nohz_full is enabled, this should not happen because the
* 'tick_do_timer_cpu' CPU never relinquishes.
*/
- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
- unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && unlikely(tick_cpu == TICK_DO_TIMER_NONE)) {
#ifdef CONFIG_NO_HZ_FULL
WARN_ON_ONCE(tick_nohz_full_running);
#endif
- tick_do_timer_cpu = cpu;
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
+ tick_cpu = cpu;
}
/* Check if jiffies need an update */
- if (tick_do_timer_cpu == cpu)
+ if (tick_cpu == cpu)
tick_do_update_jiffies64(now);
/*
@@ -610,7 +613,7 @@ bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
* timers, workqueues, timekeeping, ...) on behalf of full dynticks
* CPUs. It must remain online when nohz full is enabled.
*/
- if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
+ if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu)
return false;
return true;
}
@@ -891,6 +894,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
{
u64 basemono, next_tick, delta, expires;
unsigned long basejiff;
+ int tick_cpu;
basemono = get_jiffies_update(&basejiff);
ts->last_jiffies = basejiff;
@@ -947,9 +951,9 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
* Otherwise we can sleep as long as we want.
*/
delta = timekeeping_max_deferment();
- if (cpu != tick_do_timer_cpu &&
- (tick_do_timer_cpu != TICK_DO_TIMER_NONE ||
- !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+ if (tick_cpu != cpu &&
+ (tick_cpu != TICK_DO_TIMER_NONE || !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
delta = KTIME_MAX;
/* Calculate the next expiry time */
@@ -970,6 +974,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
unsigned long basejiff = ts->last_jiffies;
u64 basemono = ts->timer_expires_base;
bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
+ int tick_cpu;
u64 expires;
/* Make sure we won't be trying to stop it twice in a row. */
@@ -1007,10 +1012,11 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
* do_timer() never gets invoked. Keep track of the fact that it
* was the one which had the do_timer() duty last.
*/
- if (cpu == tick_do_timer_cpu) {
- tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+ if (tick_cpu == cpu) {
+ WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST);
- } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+ } else if (tick_cpu != TICK_DO_TIMER_NONE) {
tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST);
}
@@ -1173,15 +1179,17 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
return false;
if (tick_nohz_full_enabled()) {
+ int tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
/*
* Keep the tick alive to guarantee timekeeping progression
* if there are full dynticks CPUs around
*/
- if (tick_do_timer_cpu == cpu)
+ if (tick_cpu == cpu)
return false;
/* Should not happen for nohz-full */
- if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
+ if (WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE))
return false;
}
* tip-bot2 for Thomas Gleixner <[email protected]> wrote:
> The following commit has been merged into the timers/urgent branch of tip:
>
> Commit-ID: 0200ddd723d6056d0b1c1ebde4b11e75a0f2ed7e
> Gitweb: https://git.kernel.org/tip/0200ddd723d6056d0b1c1ebde4b11e75a0f2ed7e
> Author: Thomas Gleixner <[email protected]>
> AuthorDate: Tue, 09 Apr 2024 12:29:12 +02:00
> Committer: Thomas Gleixner <[email protected]>
> CommitterDate: Tue, 09 Apr 2024 23:09:44 +02:00
>
> timekeeping: Use READ/WRITE_ONCE() for tick_do_timer_cpu
> @@ -232,7 +233,7 @@ static void tick_setup_device(struct tick_device *td,
> !tick_nohz_full_cpu(cpu)) {
> tick_take_do_timer_from_boot();
> tick_do_timer_boot_cpu = -1;
> - WARN_ON(tick_do_timer_cpu != cpu);
> + WARN_ON(READ_ON_ONCE(tick_do_timer_cpu) != cpu);
So there's no such thing as READ_ON_ONCE() - I've amended the commit with
the fix below.
Thanks,
Ingo
kernel/time/tick-common.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index a285f4deb96d..d88b13076b79 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -233,7 +233,7 @@ static void tick_setup_device(struct tick_device *td,
!tick_nohz_full_cpu(cpu)) {
tick_take_do_timer_from_boot();
tick_do_timer_boot_cpu = -1;
- WARN_ON(READ_ON_ONCE(tick_do_timer_cpu) != cpu);
+ WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu);
#endif
}
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: f87cbcb345d059f0377b4fa0ba1b766a17fc3710
Gitweb: https://git.kernel.org/tip/f87cbcb345d059f0377b4fa0ba1b766a17fc3710
Author: Thomas Gleixner <[email protected]>
AuthorDate: Tue, 09 Apr 2024 12:29:12 +02:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Wed, 10 Apr 2024 10:13:42 +02:00
timekeeping: Use READ/WRITE_ONCE() for tick_do_timer_cpu
tick_do_timer_cpu is used lockless to check which CPU needs to take care
of the per tick timekeeping duty. This is done to avoid a thundering
herd problem on jiffies_lock.
The read and writes are not annotated so KCSAN complains about data races:
BUG: KCSAN: data-race in tick_nohz_idle_stop_tick / tick_nohz_next_event
write to 0xffffffff8a2bda30 of 4 bytes by task 0 on cpu 26:
tick_nohz_idle_stop_tick+0x3b1/0x4a0
do_idle+0x1e3/0x250
read to 0xffffffff8a2bda30 of 4 bytes by task 0 on cpu 16:
tick_nohz_next_event+0xe7/0x1e0
tick_nohz_get_sleep_length+0xa7/0xe0
menu_select+0x82/0xb90
cpuidle_select+0x44/0x60
do_idle+0x1c2/0x250
value changed: 0x0000001a -> 0xffffffff
Annotate them with READ/WRITE_ONCE() to document the intentional data race.
Reported-by: Mirsad Todorovac <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Tested-by: Sean Anderson <[email protected]>
Link: https://lore.kernel.org/r/87cyqy7rt3.ffs@tglx
---
kernel/time/tick-common.c | 17 +++++++++--------
kernel/time/tick-sched.c | 36 ++++++++++++++++++++++--------------
2 files changed, 31 insertions(+), 22 deletions(-)
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index fb0fdec..d88b130 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -7,6 +7,7 @@
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
*/
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
@@ -84,7 +85,7 @@ int tick_is_oneshot_available(void)
*/
static void tick_periodic(int cpu)
{
- if (tick_do_timer_cpu == cpu) {
+ if (READ_ONCE(tick_do_timer_cpu) == cpu) {
raw_spin_lock(&jiffies_lock);
write_seqcount_begin(&jiffies_seq);
@@ -215,8 +216,8 @@ static void tick_setup_device(struct tick_device *td,
* If no cpu took the do_timer update, assign it to
* this cpu:
*/
- if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
- tick_do_timer_cpu = cpu;
+ if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
tick_next_period = ktime_get();
#ifdef CONFIG_NO_HZ_FULL
/*
@@ -232,7 +233,7 @@ static void tick_setup_device(struct tick_device *td,
!tick_nohz_full_cpu(cpu)) {
tick_take_do_timer_from_boot();
tick_do_timer_boot_cpu = -1;
- WARN_ON(tick_do_timer_cpu != cpu);
+ WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu);
#endif
}
@@ -406,10 +407,10 @@ void tick_assert_timekeeping_handover(void)
int tick_cpu_dying(unsigned int dying_cpu)
{
/*
- * If the current CPU is the timekeeper, it's the only one that
- * can safely hand over its duty. Also all online CPUs are in
- * stop machine, guaranteed not to be idle, therefore it's safe
- * to pick any online successor.
+ * If the current CPU is the timekeeper, it's the only one that can
+ * safely hand over its duty. Also all online CPUs are in stop
+ * machine, guaranteed not to be idle, therefore there is no
+ * concurrency and it's safe to pick any online successor.
*/
if (tick_do_timer_cpu == dying_cpu)
tick_do_timer_cpu = cpumask_first(cpu_online_mask);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1331216..71a792c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -8,6 +8,7 @@
*
* Started by: Thomas Gleixner and Ingo Molnar
*/
+#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/hrtimer.h>
@@ -204,7 +205,7 @@ static inline void tick_sched_flag_clear(struct tick_sched *ts,
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
- int cpu = smp_processor_id();
+ int tick_cpu, cpu = smp_processor_id();
/*
* Check if the do_timer duty was dropped. We don't care about
@@ -216,16 +217,18 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
* If nohz_full is enabled, this should not happen because the
* 'tick_do_timer_cpu' CPU never relinquishes.
*/
- if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
- unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
+ if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && unlikely(tick_cpu == TICK_DO_TIMER_NONE)) {
#ifdef CONFIG_NO_HZ_FULL
WARN_ON_ONCE(tick_nohz_full_running);
#endif
- tick_do_timer_cpu = cpu;
+ WRITE_ONCE(tick_do_timer_cpu, cpu);
+ tick_cpu = cpu;
}
/* Check if jiffies need an update */
- if (tick_do_timer_cpu == cpu)
+ if (tick_cpu == cpu)
tick_do_update_jiffies64(now);
/*
@@ -610,7 +613,7 @@ bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
* timers, workqueues, timekeeping, ...) on behalf of full dynticks
* CPUs. It must remain online when nohz full is enabled.
*/
- if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
+ if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu)
return false;
return true;
}
@@ -891,6 +894,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
{
u64 basemono, next_tick, delta, expires;
unsigned long basejiff;
+ int tick_cpu;
basemono = get_jiffies_update(&basejiff);
ts->last_jiffies = basejiff;
@@ -947,9 +951,9 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
* Otherwise we can sleep as long as we want.
*/
delta = timekeeping_max_deferment();
- if (cpu != tick_do_timer_cpu &&
- (tick_do_timer_cpu != TICK_DO_TIMER_NONE ||
- !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+ if (tick_cpu != cpu &&
+ (tick_cpu != TICK_DO_TIMER_NONE || !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
delta = KTIME_MAX;
/* Calculate the next expiry time */
@@ -970,6 +974,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
unsigned long basejiff = ts->last_jiffies;
u64 basemono = ts->timer_expires_base;
bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
+ int tick_cpu;
u64 expires;
/* Make sure we won't be trying to stop it twice in a row. */
@@ -1007,10 +1012,11 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
* do_timer() never gets invoked. Keep track of the fact that it
* was the one which had the do_timer() duty last.
*/
- if (cpu == tick_do_timer_cpu) {
- tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+ tick_cpu = READ_ONCE(tick_do_timer_cpu);
+ if (tick_cpu == cpu) {
+ WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST);
- } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+ } else if (tick_cpu != TICK_DO_TIMER_NONE) {
tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST);
}
@@ -1173,15 +1179,17 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
return false;
if (tick_nohz_full_enabled()) {
+ int tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
/*
* Keep the tick alive to guarantee timekeeping progression
* if there are full dynticks CPUs around
*/
- if (tick_do_timer_cpu == cpu)
+ if (tick_cpu == cpu)
return false;
/* Should not happen for nohz-full */
- if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
+ if (WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE))
return false;
}
On Wed, Apr 10 2024 at 10:15, Ingo Molnar wrote:
>> @@ -232,7 +233,7 @@ static void tick_setup_device(struct tick_device *td,
>> !tick_nohz_full_cpu(cpu)) {
>> tick_take_do_timer_from_boot();
>> tick_do_timer_boot_cpu = -1;
>> - WARN_ON(tick_do_timer_cpu != cpu);
>> + WARN_ON(READ_ON_ONCE(tick_do_timer_cpu) != cpu);
>
> So there's no such thing as READ_ON_ONCE() - I've amended the commit with
> the fix below.
Can you hand me a brown paper bag please?