2022-03-24 11:51:57

by Artem Savkov

[permalink] [raw]
Subject: [PATCH 1/2] timer: introduce upper bound timers

Add TIMER_UPPER_BOUND flag which allows creation of timers that would
expire at most at specified time or earlier.

This was previously discussed here:
https://lore.kernel.org/all/20210302001054.4qgrvnkltvkgikzr@treble/T/#u

Suggested-by: Josh Poimboeuf <[email protected]>
Signed-off-by: Artem Savkov <[email protected]>
---
include/linux/timer.h | 3 ++-
kernel/time/timer.c | 36 ++++++++++++++++++++++--------------
2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/include/linux/timer.h b/include/linux/timer.h
index fda13c9d1256..9b0963f49528 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -67,7 +67,8 @@ struct timer_list {
#define TIMER_DEFERRABLE 0x00080000
#define TIMER_PINNED 0x00100000
#define TIMER_IRQSAFE 0x00200000
-#define TIMER_INIT_FLAGS (TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE)
+#define TIMER_UPPER_BOUND 0x00400000
+#define TIMER_INIT_FLAGS (TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE | TIMER_UPPER_BOUND)
#define TIMER_ARRAYSHIFT 22
#define TIMER_ARRAYMASK 0xFFC00000

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 85f1021ad459..60253ad9ed86 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -491,7 +491,7 @@ static inline void timer_set_idx(struct timer_list *timer, unsigned int idx)
* time.
*/
static inline unsigned calc_index(unsigned long expires, unsigned lvl,
- unsigned long *bucket_expiry)
+ unsigned long *bucket_expiry, bool upper_bound)
{

/*
@@ -501,34 +501,39 @@ static inline unsigned calc_index(unsigned long expires, unsigned lvl,
* - Truncation of the expiry time in the outer wheel levels
*
* Round up with level granularity to prevent this.
+ * Do not perform round up in case of upper bound timer.
*/
- expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
+ if (upper_bound)
+ expires = expires >> LVL_SHIFT(lvl);
+ else
+ expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
+
*bucket_expiry = expires << LVL_SHIFT(lvl);
return LVL_OFFS(lvl) + (expires & LVL_MASK);
}

static int calc_wheel_index(unsigned long expires, unsigned long clk,
- unsigned long *bucket_expiry)
+ unsigned long *bucket_expiry, bool upper_bound)
{
unsigned long delta = expires - clk;
unsigned int idx;

if (delta < LVL_START(1)) {
- idx = calc_index(expires, 0, bucket_expiry);
+ idx = calc_index(expires, 0, bucket_expiry, upper_bound);
} else if (delta < LVL_START(2)) {
- idx = calc_index(expires, 1, bucket_expiry);
+ idx = calc_index(expires, 1, bucket_expiry, upper_bound);
} else if (delta < LVL_START(3)) {
- idx = calc_index(expires, 2, bucket_expiry);
+ idx = calc_index(expires, 2, bucket_expiry, upper_bound);
} else if (delta < LVL_START(4)) {
- idx = calc_index(expires, 3, bucket_expiry);
+ idx = calc_index(expires, 3, bucket_expiry, upper_bound);
} else if (delta < LVL_START(5)) {
- idx = calc_index(expires, 4, bucket_expiry);
+ idx = calc_index(expires, 4, bucket_expiry, upper_bound);
} else if (delta < LVL_START(6)) {
- idx = calc_index(expires, 5, bucket_expiry);
+ idx = calc_index(expires, 5, bucket_expiry, upper_bound);
} else if (delta < LVL_START(7)) {
- idx = calc_index(expires, 6, bucket_expiry);
+ idx = calc_index(expires, 6, bucket_expiry, upper_bound);
} else if (LVL_DEPTH > 8 && delta < LVL_START(8)) {
- idx = calc_index(expires, 7, bucket_expiry);
+ idx = calc_index(expires, 7, bucket_expiry, upper_bound);
} else if ((long) delta < 0) {
idx = clk & LVL_MASK;
*bucket_expiry = clk;
@@ -540,7 +545,8 @@ static int calc_wheel_index(unsigned long expires, unsigned long clk,
if (delta >= WHEEL_TIMEOUT_CUTOFF)
expires = clk + WHEEL_TIMEOUT_MAX;

- idx = calc_index(expires, LVL_DEPTH - 1, bucket_expiry);
+ idx = calc_index(expires, LVL_DEPTH - 1, bucket_expiry,
+ upper_bound);
}
return idx;
}
@@ -607,7 +613,8 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer
unsigned long bucket_expiry;
unsigned int idx;

- idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry);
+ idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry,
+ timer->flags & TIMER_UPPER_BOUND);
enqueue_timer(base, timer, idx, bucket_expiry);
}

@@ -1000,7 +1007,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
}

clk = base->clk;
- idx = calc_wheel_index(expires, clk, &bucket_expiry);
+ idx = calc_wheel_index(expires, clk, &bucket_expiry,
+ timer->flags & TIMER_UPPER_BOUND);

/*
* Retrieve and compare the array index of the pending
--
2.34.1


2022-03-25 06:12:52

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 1/2] timer: introduce upper bound timers

Artem,

On Wed, Mar 23 2022 at 12:16, Artem Savkov wrote:
> Add TIMER_UPPER_BOUND flag which allows creation of timers that would
> expire at most at specified time or earlier.
>
> This was previously discussed here:
> https://lore.kernel.org/all/20210302001054.4qgrvnkltvkgikzr@treble/T/#u

please add the context to the changelog. A link is only supplemental
information and does not replace content.

> static inline unsigned calc_index(unsigned long expires, unsigned lvl,
> - unsigned long *bucket_expiry)
> + unsigned long *bucket_expiry, bool upper_bound)
> {
>
> /*
> @@ -501,34 +501,39 @@ static inline unsigned calc_index(unsigned long expires, unsigned lvl,
> * - Truncation of the expiry time in the outer wheel levels
> *
> * Round up with level granularity to prevent this.
> + * Do not perform round up in case of upper bound timer.
> */
> - expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
> + if (upper_bound)
> + expires = expires >> LVL_SHIFT(lvl);
> + else
> + expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);

While this "works", I fundamentally hate this because it adds an extra
conditional into the common case. That affects every user of the timer
wheel. We went great length to optimize that code and I'm not really enthused
to sacrifice that just because of _one_ use case.

The resulting text bloat is +25% on x86/64 and a quick test case shows
that this is well measurable overhead. The first ones who will complain
about that are going to be - drumroll - the network people.

There must be smarter ways to solve that. Let me think about it.

Thanks,

tglx

2022-03-25 19:36:57

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 1/2] timer: introduce upper bound timers

On Wed, Mar 23, 2022 at 12:16:41PM +0100, Artem Savkov wrote:
> Add TIMER_UPPER_BOUND flag which allows creation of timers that would
> expire at most at specified time or earlier.
>
> This was previously discussed here:
> https://lore.kernel.org/all/20210302001054.4qgrvnkltvkgikzr@treble/T/#u
>
> Suggested-by: Josh Poimboeuf <[email protected]>
> Signed-off-by: Artem Savkov <[email protected]>

Thanks Artem, this approach looks good to me. Some kernel style nits...

The commit log could use some more background and motivation for the
change:

a) describe the current timer bound functionality and why it works for
most cases

b) describe the type of situation where it doesn't work and why

c) describe the solution

> ---
> include/linux/timer.h | 3 ++-
> kernel/time/timer.c | 36 ++++++++++++++++++++++--------------
> 2 files changed, 24 insertions(+), 15 deletions(-)
>
> diff --git a/include/linux/timer.h b/include/linux/timer.h
> index fda13c9d1256..9b0963f49528 100644
> --- a/include/linux/timer.h
> +++ b/include/linux/timer.h
> @@ -67,7 +67,8 @@ struct timer_list {
> #define TIMER_DEFERRABLE 0x00080000
> #define TIMER_PINNED 0x00100000
> #define TIMER_IRQSAFE 0x00200000
> -#define TIMER_INIT_FLAGS (TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE)
> +#define TIMER_UPPER_BOUND 0x00400000
> +#define TIMER_INIT_FLAGS (TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE | TIMER_UPPER_BOUND)
> #define TIMER_ARRAYSHIFT 22
> #define TIMER_ARRAYMASK 0xFFC00000

This new flag needs a comment above, where the other user flags are also
commented.

> }
> return idx;
> }
> @@ -607,7 +613,8 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer
> unsigned long bucket_expiry;
> unsigned int idx;
>
> - idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry);
> + idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry,
> + timer->flags & TIMER_UPPER_BOUND);

Here and elsewhere, to follow kernel convention, the 2nd line needs
spaces after the tabs to align it with the previous line. That makes it
more obvious that the 2nd line is just another function argument. Like:

idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry,
timer->flags & TIMER_UPPER_BOUND);

--
Josh

2022-03-25 19:42:39

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 1/2] timer: introduce upper bound timers

On Thu, Mar 24 2022 at 13:28, Thomas Gleixner wrote:
> On Wed, Mar 23 2022 at 12:16, Artem Savkov wrote:
>> Add TIMER_UPPER_BOUND flag which allows creation of timers that would
>> expire at most at specified time or earlier.
>>
>> This was previously discussed here:
>> https://lore.kernel.org/all/20210302001054.4qgrvnkltvkgikzr@treble/T/#u
>
> please add the context to the changelog. A link is only supplemental
> information and does not replace content.
>
>> static inline unsigned calc_index(unsigned long expires, unsigned lvl,
>> - unsigned long *bucket_expiry)
>> + unsigned long *bucket_expiry, bool upper_bound)
>> {
>>
>> /*
>> @@ -501,34 +501,39 @@ static inline unsigned calc_index(unsigned long expires, unsigned lvl,
>> * - Truncation of the expiry time in the outer wheel levels
>> *
>> * Round up with level granularity to prevent this.
>> + * Do not perform round up in case of upper bound timer.
>> */
>> - expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
>> + if (upper_bound)
>> + expires = expires >> LVL_SHIFT(lvl);
>> + else
>> + expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
>
> While this "works", I fundamentally hate this because it adds an extra
> conditional into the common case. That affects every user of the timer
> wheel. We went great length to optimize that code and I'm not really enthused
> to sacrifice that just because of _one_ use case.

Aside of that this is not mathematically correct. Why?

The level selection makes the cutoff at: LEVEL_MAX(lvl) - 1. E.g. 62
instead of 63 for the first level.

The reason is that this accomodates for the + LVL_GRAN(lvl). Now with
surpressing the roundup this creates a gap. Not a horrible problem, but
not correct either.

Thanks,

tglx

2022-03-25 19:55:01

by kernel test robot

[permalink] [raw]
Subject: [timer] d41e0719d5: UBSAN:shift-out-of-bounds_in_lib/flex_proportions.c



Greeting,

FYI, we noticed the following commit (built with gcc-9):

commit: d41e0719d576bc515e6de2112fff2c6b20f357e8 ("[PATCH 1/2] timer: introduce upper bound timers")
url: https://github.com/intel-lab-lkp/linux/commits/Artem-Savkov/Upper-bound-mode-for-kernel-timers/20220323-191831
patch link: https://lore.kernel.org/netdev/[email protected]

in testcase: xfstests
version: xfstests-x86_64-1de1db8-1_20220217
with following parameters:

disk: 4HDD
fs: btrfs
test: generic-group-28
ucode: 0xec

test-description: xfstests is a regression test suite for xfs and other files ystems.
test-url: git://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git


on test machine: 4 threads Intel(R) Core(TM) i5-6500 CPU @ 3.20GHz with 32G memory

caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):



If you fix the issue, kindly add following tag
Reported-by: kernel test robot <[email protected]>


[ 42.401895][ C0] UBSAN: shift-out-of-bounds in lib/flex_proportions.c:80:20
[ 42.410963][ C0] shift exponent -1007885658 is negative
[ 42.416462][ C0] CPU: 0 PID: 330 Comm: sed Tainted: G I 5.17.0-rc6-00027-gd41e0719d576 #1
[ 42.426240][ C0] Hardware name: Dell Inc. OptiPlex 7040/0Y7WYT, BIOS 1.1.1 10/07/2015
[ 42.434363][ C0] Call Trace:
[ 42.437516][ C0] <TASK>
[ 42.440319][ C0] dump_stack_lvl (lib/dump_stack.c:107)
[ 42.444699][ C0] ubsan_epilogue (lib/ubsan.c:152)
[ 42.448985][ C0] __ubsan_handle_shift_out_of_bounds.cold (lib/ubsan.c:330)
[ 42.455618][ C0] ? cpumask_next (lib/cpumask.c:23)
[ 42.459996][ C0] ? __percpu_counter_sum (lib/percpu_counter.c:138)
[ 42.465248][ C0] fprop_new_period.cold (lib/flex_proportions.c:80 (discriminator 1))
[ 42.470224][ C0] writeout_period (mm/page-writeback.c:623)
[ 42.474768][ C0] ? wb_position_ratio (mm/page-writeback.c:617)
[ 42.479747][ C0] call_timer_fn (arch/x86/include/asm/jump_label.h:27 include/linux/jump_label.h:212 include/trace/events/timer.h:125 kernel/time/timer.c:1430)
[ 42.484115][ C0] run_timer_softirq (kernel/time/timer.c:1475 kernel/time/timer.c:1742 kernel/time/timer.c:1718 kernel/time/timer.c:1757)
[ 42.489019][ C0] ? del_timer_sync (kernel/time/timer.c:1752)
[ 42.493561][ C0] ? __next_base (kernel/time/hrtimer.c:506)
[ 42.498014][ C0] ? sched_clock_cpu (kernel/sched/clock.c:371)
[ 42.502732][ C0] ? setup_local_APIC (arch/x86/kernel/apic/apic.c:475)
[ 42.507624][ C0] __do_softirq (arch/x86/include/asm/jump_label.h:27 include/linux/jump_label.h:212 include/trace/events/irq.h:142 kernel/softirq.c:559)
[ 42.511990][ C0] irq_exit_rcu (kernel/softirq.c:432 kernel/softirq.c:637 kernel/softirq.c:649)
[ 42.516359][ C0] sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1097 (discriminator 13))
[ 42.521864][ C0] ? asm_sysvec_apic_timer_interrupt (arch/x86/include/asm/idtentry.h:638)
[ 42.527812][ C0] asm_sysvec_apic_timer_interrupt (arch/x86/include/asm/idtentry.h:638)
[ 42.533660][ C0] RIP: 0033:0x55e961adf002
[ 42.537942][ C0] Code: ff ff ff 41 b8 10 00 00 00 ba 01 00 00 00 48 89 c7 e8 a2 ed ff ff 49 8b 4d 08 eb 89 0f 1f 40 00 44 09 71 08 48 83 c4 08 5b 5d <41> 5c 41 5d 41 5e 41 5f c3 48 3b 6a 10 7d 08 48 89 e9 48 89 c2 eb
All code
========
0: ff (bad)
1: ff (bad)
2: ff 41 b8 incl -0x48(%rcx)
5: 10 00 adc %al,(%rax)
7: 00 00 add %al,(%rax)
9: ba 01 00 00 00 mov $0x1,%edx
e: 48 89 c7 mov %rax,%rdi
11: e8 a2 ed ff ff callq 0xffffffffffffedb8
16: 49 8b 4d 08 mov 0x8(%r13),%rcx
1a: eb 89 jmp 0xffffffffffffffa5
1c: 0f 1f 40 00 nopl 0x0(%rax)
20: 44 09 71 08 or %r14d,0x8(%rcx)
24: 48 83 c4 08 add $0x8,%rsp
28: 5b pop %rbx
29: 5d pop %rbp
2a:* 41 5c pop %r12 <-- trapping instruction
2c: 41 5d pop %r13
2e: 41 5e pop %r14
30: 41 5f pop %r15
32: c3 retq
33: 48 3b 6a 10 cmp 0x10(%rdx),%rbp
37: 7d 08 jge 0x41
39: 48 89 e9 mov %rbp,%rcx
3c: 48 89 c2 mov %rax,%rdx
3f: eb .byte 0xeb

Code starting with the faulting instruction
===========================================
0: 41 5c pop %r12
2: 41 5d pop %r13
4: 41 5e pop %r14
6: 41 5f pop %r15
8: c3 retq
9: 48 3b 6a 10 cmp 0x10(%rdx),%rbp
d: 7d 08 jge 0x17
f: 48 89 e9 mov %rbp,%rcx
12: 48 89 c2 mov %rax,%rdx
15: eb .byte 0xeb
[ 42.557457][ C0] RSP: 002b:00007fff12170f68 EFLAGS: 00000202
[ 42.563393][ C0] RAX: 000055e966fd5de0 RBX: 0000000000002ac0 RCX: 000055e966fe1ad0
[ 42.571245][ C0] RDX: 0000000000000bcf RSI: 0000000000000bd1 RDI: 0000000000000bd0
[ 42.579093][ C0] RBP: 00000000000019a0 R08: 000055e966fd5dd0 R09: 00007f781695a430
[ 42.586954][ C0] R10: 000055e961b62010 R11: 00007f781695a430 R12: 0000000000000bd0
[ 42.594805][ C0] R13: 00007fff12170fe0 R14: 00000000000001ff R15: 000055e962f771e0
[ 42.602664][ C0] </TASK>
[ 42.605562][ C0] ================================================================================
[ 42.619841][ T2170] BTRFS: device fsid c931cb40-7bb4-4aea-a481-3898abe22c7f devid 1 transid 5 /dev/sda2 scanned by mkfs.btrfs (2170)
[ 42.658545][ T2181] BTRFS info (device sda2): flagging fs with big metadata feature
[ 42.666306][ T2181] BTRFS info (device sda2): disk space caching is enabled
[ 42.673308][ T2181] BTRFS info (device sda2): has skinny extents
[ 42.685475][ T2181] BTRFS info (device sda2): checking UUID tree
[ 43.019857][ T2218] BTRFS: device fsid 196e6b2a-cd50-4fb7-9e22-b255906549fb devid 1 transid 5 /dev/sda2 scanned by mkfs.btrfs (2218)
[ 43.055315][ T2232] BTRFS info (device sda2): flagging fs with big metadata feature
[ 43.063092][ T2232] BTRFS info (device sda2): disk space caching is enabled
[ 43.070097][ T2232] BTRFS info (device sda2): has skinny extents
[ 43.082188][ T2232] BTRFS info (device sda2): checking UUID tree
[ 43.646926][ T330] 262144 bytes (262 kB, 256 KiB) copied, 0.0121294 s, 21.6 MB/s
[ 43.646936][ T330]
[ 43.657040][ T330] 512+0 records in
[ 43.657045][ T330]
[ 43.663189][ T330] 512+0 records out
[ 43.663194][ T330]
[ 43.670327][ T330] 262144 bytes (262 kB, 256 KiB) copied, 0.0340874 s, 7.7 MB/s
[ 43.670333][ T330]
[ 43.680303][ T330] 512+0 records in
[ 43.680308][ T330]
[ 43.686450][ T330] 512+0 records out
[ 43.686470][ T330]
[ 43.693603][ T330] 262144 bytes (262 kB, 256 KiB) copied, 0.0436927 s, 6.0 MB/s
[ 43.693609][ T330]
[ 75.600365][ T2983] BTRFS info (device sda2): flagging fs with big metadata feature
[ 75.608412][ T2983] BTRFS info (device sda2): disk space caching is enabled
[ 75.615416][ T2983] BTRFS info (device sda2): has skinny extents
[ 83.853890][ T3053] BTRFS info (device sda2): flagging fs with big metadata feature
[ 83.861582][ T3053] BTRFS info (device sda2): disk space caching is enabled
[ 83.868571][ T3053] BTRFS info (device sda2): has skinny extents
[ 84.030348][ T328] generic/560 _check_dmesg: something found in dmesg (see /lkp/benchmarks/xfstests/results//generic/560.dmesg)
[ 84.030358][ T328]
[ 84.044262][ T328]
[ 84.044269][ T328]
[ 84.088818][ T1625] run fstests generic/561 at 2022-03-24 12:37:26
[ 84.816005][ T3246] BTRFS info (device sda1): flagging fs with big metadata feature
[ 84.823700][ T3246] BTRFS info (device sda1): using free space tree
[ 84.830014][ T3246] BTRFS info (device sda1): has skinny extents
[ 85.074278][ T3295] BTRFS: device fsid b05a03cd-540a-4c2a-a470-7eb68b5ab847 devid 1 transid 5 /dev/sda2 scanned by mkfs.btrfs (3295)
[ 85.110629][ T3306] BTRFS info (device sda2): flagging fs with big metadata feature
[ 85.118389][ T3306] BTRFS info (device sda2): disk space caching is enabled
[ 85.125437][ T3306] BTRFS info (device sda2): has skinny extents
[ 85.138213][ T3306] BTRFS info (device sda2): checking UUID tree
[ 85.416006][ T3348] BTRFS: device fsid 0d6806aa-5e85-40dd-b571-5227cd8c2d80 devid 1 transid 5 /dev/sda2 scanned by mkfs.btrfs (3348)
[ 85.453695][ T3359] BTRFS info (device sda2): flagging fs with big metadata feature
[ 85.461394][ T3359] BTRFS info (device sda2): disk space caching is enabled
[ 85.468386][ T3359] BTRFS info (device sda2): has skinny extents
[ 85.480643][ T3359] BTRFS info (device sda2): checking UUID tree
[ 85.675145][ T3381] Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!
[ 85.675448][ T3385] Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!
[ 85.686755][ T3381] File: /fs/scratch/dir/p0/f1XX PID: 3381 Comm: fsstress
[ 85.705365][ T3385] File: /fs/scratch/dir/p2/f7XXXXXXXXXXXXX PID: 3385 Comm: fsstress
[ 86.009824][ T3383] Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!
[ 86.021432][ T3383] File: /fs/scratch/dir/p1/f8XX PID: 3383 Comm: fsstress
[ 87.014795][ T3381] Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!
[ 87.026427][ T3381] File: /fs/scratch/dir/p0/f14XXXXXXXXXXXX PID: 3381 Comm: fsstress
[ 98.693320][ T328] generic/561 IPMI BMC is not supported on this machine, skip bmc-watchdog setup!
[ 98.693329][ T328]
[ 103.829084][ T3381] Page cache invalidation failure on direct I/O. Possible data corruption due to collision with buffered I/O!
[ 103.840703][ T3381] File: /fs/scratch/dir/p0/d1eXXXXXXXXXXXX/d42XX/fabXXXXX PID: 3381 Comm: fsstress
[ 142.694608][ T4737] BTRFS info (device sda2): flagging fs with big metadata feature
[ 142.702316][ T4737] BTRFS info (device sda2): disk space caching is enabled
[ 142.709324][ T4737] BTRFS info (device sda2): has skinny extents
[ 159.500542][ T4838] BTRFS info (device sda2): flagging fs with big metadata feature
[ 159.508232][ T4838] BTRFS info (device sda2): disk space caching is enabled
[ 159.515223][ T4838] BTRFS info (device sda2): has skinny extents
[ 159.704146][ T328] 76s
[ 159.704155][ T328]
[ 159.748026][ T1625] run fstests generic/562 at 2022-03-24 12:38:42
[ 160.507334][ T5033] BTRFS info (device sda1): flagging fs with big metadata feature
[ 160.515029][ T5033] BTRFS info (device sda1): using free space tree
[ 160.521330][ T5033] BTRFS info (device sda1): has skinny extents
[ 160.777005][ T5095] BTRFS: device fsid 08444c95-7d09-450b-9e80-4897d2b3bfae devid 1 transid 5 /dev/sda2 scanned by mkfs.btrfs (5095)
[ 160.811850][ T5109] BTRFS info (device sda2): flagging fs with big metadata feature
[ 160.819593][ T5109] BTRFS info (device sda2): disk space caching is enabled
[ 160.827020][ T5109] BTRFS info (device sda2): has skinny extents


To reproduce:

git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
sudo bin/lkp install job.yaml # job file is attached in this email
bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run
sudo bin/lkp run generated-yaml-file

# if come across any failure that blocks the test,
# please remove ~/.lkp and /lkp dir to run from a clean state.



--
0-DAY CI Kernel Test Service
https://01.org/lkp



Attachments:
(No filename) (11.67 kB)
config-5.17.0-rc6-00027-gd41e0719d576 (168.22 kB)
job-script (5.74 kB)
dmesg.xz (29.17 kB)
xfstests (2.12 kB)
job.yaml (4.72 kB)
reproduce (955.00 B)
Download all attachments

2022-03-25 20:26:48

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [timer] d41e0719d5: UBSAN:shift-out-of-bounds_in_lib/flex_proportions.c

On Fri, Mar 25 2022 at 15:38, kernel test robot wrote:
> [ 42.401895][ C0] UBSAN: shift-out-of-bounds in lib/flex_proportions.c:80:20
> [ 42.410963][ C0] shift exponent -1007885658 is negative

Cute.

> [ 42.416462][ C0] CPU: 0 PID: 330 Comm: sed Tainted: G I 5.17.0-rc6-00027-gd41e0719d576 #1
> [ 42.426240][ C0] Hardware name: Dell Inc. OptiPlex 7040/0Y7WYT, BIOS 1.1.1 10/07/2015
> [ 42.434363][ C0] Call Trace:
> [ 42.437516][ C0] <TASK>
> [ 42.440319][ C0] dump_stack_lvl (lib/dump_stack.c:107)
> [ 42.444699][ C0] ubsan_epilogue (lib/ubsan.c:152)
> [ 42.448985][ C0] __ubsan_handle_shift_out_of_bounds.cold (lib/ubsan.c:330)
> [ 42.455618][ C0] ? cpumask_next (lib/cpumask.c:23)
> [ 42.459996][ C0] ? __percpu_counter_sum (lib/percpu_counter.c:138)
> [ 42.465248][ C0] fprop_new_period.cold (lib/flex_proportions.c:80 (discriminator 1))
> [ 42.470224][ C0] writeout_period (mm/page-writeback.c:623)

So it seems a timer fired early. Which then makes writeout_period() go south:

int miss_periods = (jiffies - dom->period_time) / VM_COMPLETIONS_PERIOD_LEN;

If jiffies < dom->period_time the result is a very large negative
number.

This happens because of:

> @@ -67,7 +67,8 @@ struct timer_list {
> #define TIMER_DEFERRABLE 0x00080000
> #define TIMER_PINNED 0x00100000
> #define TIMER_IRQSAFE 0x00200000
> -#define TIMER_INIT_FLAGS (TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE)
> +#define TIMER_UPPER_BOUND 0x00400000
> +#define TIMER_INIT_FLAGS (TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE | TIMER_UPPER_BOUND)
> #define TIMER_ARRAYSHIFT 22
> #define TIMER_ARRAYMASK 0xFFC00000

TIMER_UPPER_BOUND steals a bit from the ARRAYMASK. So if the timer is
armed and the stored arraymask happens to have bit 22 set, then on the
next arming of the timer it will be treated as upper bound timer,
expires early and all hell breaks lose. The same can happen the other
way round. So I really have to ask how this ever "worked".

Thanks,

tglx

2022-03-28 02:29:39

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 1/2] timer: introduce upper bound timers

Artem,

On Thu, Mar 24 2022 at 13:28, Thomas Gleixner wrote:
> On Wed, Mar 23 2022 at 12:16, Artem Savkov wrote:
>> * Round up with level granularity to prevent this.
>> + * Do not perform round up in case of upper bound timer.
>> */
>> - expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
>> + if (upper_bound)
>> + expires = expires >> LVL_SHIFT(lvl);
>> + else
>> + expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl);
>
> While this "works", I fundamentally hate this because it adds an extra

actually it cannot not work. At least not in a realiable and predictable
way.

The timer wheel is fundamentaly relying on moving the timer one bucket
out. Let's look how this all works.

The wheel has N levels of bucket arrays. Each level has 64 buckets and
the granularity increases by a factor of 8 per level, i.e. the worst
case deviation is 12.5% per level.

The original timer wheel was able to fire the timer at expiry time + one
tick for the price of cascading timers every so often from one level to
the next lower level. Here are a few pointers:

https://lwn.net/Articles/152436/
https://lwn.net/Articles/646950/

The accuracy of the original wheel implementation was weakened already
in course of the NOHZ development where the concept of slack
(algorithmic batching at enqueue time for the price of overhead) was
introduced. It had the same 12.5% worst case deviation of the resulting
granularity level, though the batching was not enforced and only worked
most of the time. So in theory the same issue could have been seen back
then already.

The enqueue placement and the expiry is driven by base::clock, which
is nothing else than jiffies. When a timer is queued then base::clock is
the time on which the next tick and expiry check happens.

Now let's look how the expiry mechanism works. The first level (0) is
obviously expired every tick. On every eigth tick the second level (1) is
expired, on every 64th tick the third level (2)...

IOW, the expiry events of a level happen at 8^index(level) intervals.

Let's assume that base::clk is 0. That means at the next tick (which
could be imminent) in _all_ levels bucket[0] is due for expiry.

Now let's enqueue a timer with expiry value of 64:

delta = 64 - 0 = 64

That means the timer ends up in the second level in bucket[0], which
makes it eligible for expiry at the next tick. The same is true for any
expiry value of 8^N.

Not what you are trying to achieve, right? You try to enforce an upper
bound, but you expect that the timer does not fire earlier than 12.5% of
the granularity level of that upper bound, right?

IOW, you created a expiry lottery and there is no way to prevent that
except with more conditionals and heuristics which are hardely welcomed.

You've also seen the outcome of a timer firing unexpectedly due to the
bit abuse, right?

Now let's take a step back and look at the problem at hand.

TCP alive timer, which is affected by the batching and the resulting
inaccuracy, is (re)armed with a relative timeout, which is known
upfront, right?

The important part is *relative* timeout. Why?

Because the timeout is relative it's trivial to calculate a relative
timeout value from the given accurate relative timeout value, which is
guaranteed to not expire late and within the timerwheel's error margin,
and use that for the actual rearming.

I'm pretty sure that you can come up with a conversion function for that
and use this function in the TCP code at the point where the TCP
keepalive timer is configured.

Hint 1: The output of calc_wheel_index() should be good enough to figure
that out.

Hint 2: If you get frustrated, try

git grep johnstul arch/x86/ | awk '{ $1=$2=$3=""; print $0 }'

Hint 3: Feel free to ask questions anytime

Thanks,

tglx

2022-03-30 20:49:49

by Artem Savkov

[permalink] [raw]
Subject: [PATCH v3 0/2] Upper bound kernel timers

As previously discussed [1] we had a report of a regression in TCP keepalive

timer where timers were up to 4 minutes late resulting in disconnects.



This patchset tries to fix the problem by introducing upper bound kernel timers

and making tcp keepalive timer use those.



[1] https://lore.kernel.org/all/20210302001054.4qgrvnkltvkgikzr@treble/T/#u



---

Changes in v3:

- A different approach: instead of introducing upper bound timers try to

account for timer wheen granularity on timer (re)arming step.

- Not sure whether moving lvl calculation into a separate function is worth

it.

- Had a hard time naming the upper_bount_timeout() function - any suggestions

welcome.



Changes in v2:

- TIMER_UPPER_BOUND flag description added as a comment in timer.h

- Code style fixes

- More elaborate commit message in timer commit



Artem Savkov (2):

timer: add a function to adjust timeouts to be upper bound

net: make tcp keepalive timer upper bound



include/linux/timer.h | 1 +

kernel/time/timer.c | 92 ++++++++++++++++++++++++++++-----

net/ipv4/inet_connection_sock.c | 2 +-

3 files changed, 81 insertions(+), 14 deletions(-)



--

2.34.1




2022-03-31 03:39:53

by Artem Savkov

[permalink] [raw]
Subject: [PATCH v3 2/2] net: make tcp keepalive timer upper bound

Make sure TCP keepalive timer does not expire late. Switching to upper
bound timers means it can fire off early but in case of keepalive
tcp_keepalive_timer() handler checks elapsed time and resets the timer
if it was triggered early. This results in timer "cascading" to a
higher precision and being just a couple of milliseconds off it's
original mark.

Signed-off-by: Artem Savkov <[email protected]>
---
net/ipv4/inet_connection_sock.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1e5b53c2bb267..bb2dbfb6f5b50 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -589,7 +589,7 @@ EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);

void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
{
- sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + upper_bound_timeout(len));
}
EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);

--
2.34.1

2022-03-31 04:37:26

by David Laight

[permalink] [raw]
Subject: RE: [PATCH v3 0/2] Upper bound kernel timers

From: Artem Savkov
> Sent: 30 March 2022 09:21
>
> As previously discussed [1] we had a report of a regression in TCP keepalive
> timer where timers were up to 4 minutes late resulting in disconnects.
>
> This patchset tries to fix the problem by introducing upper bound kernel timers
> and making tcp keepalive timer use those.

Why not just fix the timer code to work properly (as it used to) so that the
timers expire within a short time of the requested interval.

This just requires that expiring 'long' timers get moved into the
higher precision 'wheels' (or whatever) before they actually expire.

The burden for this is minimal - it only affects long duration timers
that actually expire, and each timer only gets moved once for each
level of timer precision.

Perhaps you only need to move them two or three times in order to
get a reasonable accuracy.
No one is going to mind if a 5 minute timer is a second late.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

2022-04-03 05:49:07

by kernel test robot

[permalink] [raw]
Subject: [net] 6ef3f95797: UBSAN:shift-out-of-bounds_in_kernel/time/timer.c



Greeting,

FYI, we noticed the following commit (built with gcc-9):

commit: 6ef3f95797546781829db3bb6228c9990ba1d49f ("[PATCH v3 2/2] net: make tcp keepalive timer upper bound")
url: https://github.com/intel-lab-lkp/linux/commits/Artem-Savkov/timer-add-a-function-to-adjust-timeouts-to-be-upper-bound/20220330-172140
base: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git b166e52541f2357ce126a92ce1d9a580fdca719d
patch link: https://lore.kernel.org/netdev/[email protected]

in testcase: kernel-selftests
version: kernel-selftests-x86_64-a17aac1b-1_20220328
with following parameters:

group: tc-testing
ucode: 0xec

test-description: The kernel contains a set of "self tests" under the tools/testing/selftests/ directory. These are intended to be small unit tests to exercise individual code paths in the kernel.
test-url: https://www.kernel.org/doc/Documentation/kselftest.txt


on test machine: 8 threads Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz with 16G memory

caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):



If you fix the issue, kindly add following tag
Reported-by: kernel test robot <[email protected]>


[ 158.913672][ C1] UBSAN: shift-out-of-bounds in kernel/time/timer.c:584:32
[ 158.922603][ C1] shift exponent -3 is negative
[ 158.927346][ C1] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.17.0-rc6-00028-g6ef3f9579754 #1
[ 158.936050][ C1] Hardware name: HP HP Z240 SFF Workstation/802E, BIOS N51 Ver. 01.63 10/05/2017
[ 158.945011][ C1] Call Trace:
[ 158.948161][ C1] <IRQ>
[ 158.950872][ C1] dump_stack_lvl (lib/dump_stack.c:107)
[ 158.955242][ C1] ubsan_epilogue (lib/ubsan.c:152)
[ 158.959517][ C1] __ubsan_handle_shift_out_of_bounds.cold (lib/ubsan.c:330)
[ 158.966138][ C1] ? generic_powersave_bias_target (drivers/cpufreq/cpufreq_ondemand.c:68)
[ 158.972155][ C1] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4224 kernel/locking/lockdep.c:4292 kernel/locking/lockdep.c:4244)
[ 158.977994][ C1] ? ktime_get (arch/x86/include/asm/irqflags.h:45 arch/x86/include/asm/irqflags.h:80 arch/x86/include/asm/irqflags.h:138 include/linux/seqlock.h:105 kernel/time/timekeeping.c:827)
[ 158.982275][ C1] ? trace_hardirqs_on (kernel/trace/trace_preemptirq.c:50 (discriminator 22))
[ 158.987164][ C1] upper_bound_timeout.cold (kernel/time/timer.c:584)
[ 158.992402][ C1] inet_csk_reset_keepalive_timer (net/ipv4/inet_connection_sock.c:592)
[ 158.998153][ C1] tcp_keepalive_timer (net/ipv4/tcp_timer.c:750)
[ 159.003132][ C1] ? tcp_compressed_ack_kick (net/ipv4/tcp_timer.c:674)
[ 159.008623][ C1] call_timer_fn (arch/x86/include/asm/jump_label.h:27 include/linux/jump_label.h:212 include/trace/events/timer.h:125 kernel/time/timer.c:1488)
[ 159.013077][ C1] ? lock_release (kernel/locking/lockdep.c:5315 kernel/locking/lockdep.c:5659)
[ 159.017617][ C1] ? add_timer (kernel/time/timer.c:1464)
[ 159.021891][ C1] ? lock_downgrade (kernel/locking/lockdep.c:5647)
[ 159.026607][ C1] ? tcp_compressed_ack_kick (net/ipv4/tcp_timer.c:674)
[ 159.032095][ C1] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4224 kernel/locking/lockdep.c:4292 kernel/locking/lockdep.c:4244)
[ 159.034064][ T433] HDRINST usr/include/linux/arcfb.h
[ 159.037937][ C1] ? tcp_compressed_ack_kick (net/ipv4/tcp_timer.c:674)
[ 159.043186][ T433]
[ 159.048657][ C1] run_timer_softirq (kernel/time/timer.c:1533 kernel/time/timer.c:1800 kernel/time/timer.c:1776 kernel/time/timer.c:1813)
[ 159.048676][ C1] ? call_timer_fn (kernel/time/timer.c:1810)
[ 159.048681][ C1] ? __next_base (kernel/time/hrtimer.c:506)
[ 159.051777][ T433] HDRINST usr/include/linux/am437x-vpfe.h
[ 159.055760][ C1] ? rcu_read_lock_sched_held (include/linux/lockdep.h:283 kernel/rcu/update.c:125)
[ 159.060401][ T433]
[ 159.064827][ C1] ? rcu_read_lock_bh_held (kernel/rcu/update.c:120)
[ 159.064842][ C1] __do_softirq (arch/x86/include/asm/jump_label.h:27 include/linux/jump_label.h:212 include/trace/events/irq.h:142 kernel/softirq.c:559)
[ 159.064864][ C1] irq_exit_rcu (kernel/softirq.c:432 kernel/softirq.c:637 kernel/softirq.c:649)
[ 159.071477][ T433] HDRINST usr/include/linux/perf_event.h
[ 159.076075][ C1] sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1097 (discriminator 14))
[ 159.076082][ C1] </IRQ>
[ 159.078286][ T433]
[ 159.083411][ C1] <TASK>
[ 159.083414][ C1] asm_sysvec_apic_timer_interrupt (arch/x86/include/asm/idtentry.h:638)
[ 159.083421][ C1] RIP: 0010:cpuidle_enter_state (drivers/cpuidle/cpuidle.c:259)
[ 159.083428][ C1] Code: 00 00 31 ff e8 47 3f 43 fe 80 3c 24 00 74 12 9c 58 f6 c4 02 0f 85 1f 08 00 00 31 ff e8 af c1 5b fe e8 ea 43 71 fe fb 45 85 f6 <0f> 88 90 03 00 00 49 63 ee 48 83 fd 09 0f 87 62 09 00 00 48 8d 44
All code
========
0: 00 00 add %al,(%rax)
2: 31 ff xor %edi,%edi
4: e8 47 3f 43 fe callq 0xfffffffffe433f50
9: 80 3c 24 00 cmpb $0x0,(%rsp)
d: 74 12 je 0x21
f: 9c pushfq
10: 58 pop %rax
11: f6 c4 02 test $0x2,%ah
14: 0f 85 1f 08 00 00 jne 0x839
1a: 31 ff xor %edi,%edi
1c: e8 af c1 5b fe callq 0xfffffffffe5bc1d0
21: e8 ea 43 71 fe callq 0xfffffffffe714410
26: fb sti
27: 45 85 f6 test %r14d,%r14d
2a:* 0f 88 90 03 00 00 js 0x3c0 <-- trapping instruction
30: 49 63 ee movslq %r14d,%rbp
33: 48 83 fd 09 cmp $0x9,%rbp
37: 0f 87 62 09 00 00 ja 0x99f
3d: 48 rex.W
3e: 8d .byte 0x8d
3f: 44 rex.R

Code starting with the faulting instruction
===========================================
0: 0f 88 90 03 00 00 js 0x396
6: 49 63 ee movslq %r14d,%rbp
9: 48 83 fd 09 cmp $0x9,%rbp
d: 0f 87 62 09 00 00 ja 0x975
13: 48 rex.W
14: 8d .byte 0x8d
15: 44 rex.R
[ 159.088738][ T433] HDRINST usr/include/linux/netfilter_arp.h
[ 159.092147][ C1] RSP: 0018:ffffc9000014fd80 EFLAGS: 00000202
[ 159.092151][ C1] RAX: 000000000039d49f RBX: ffffe8ffffa82330 RCX: 1ffffffff0bf9cc1
[ 159.092154][ C1] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff830167d6
[ 159.097833][ T433]
[ 159.103298][ C1] RBP: 0000000000000004 R08: 0000000000000001 R09: 0000000000000001
[ 159.103301][ C1] R10: ffffffff85fd3c67 R11: fffffbfff0bfa78c R12: ffffffff85a74a40
[ 159.103303][ C1] R13: 00000024ff70d012 R14: 0000000000000004 R15: 0000000000000001
[ 159.103319][ C1] ? cpuidle_enter_state (arch/x86/include/asm/irqflags.h:45 arch/x86/include/asm/irqflags.h:80 drivers/cpuidle/cpuidle.c:257)
[ 159.107090][ T433] HDRINST usr/include/linux/virtio_console.h
[ 159.108317][ C1] ? menu_reflect (drivers/cpuidle/governors/menu.c:441)
[ 159.111135][ T433]
[ 159.116958][ C1] cpuidle_enter (drivers/cpuidle/cpuidle.c:353)
[ 159.116969][ C1] do_idle (kernel/sched/idle.c:158 kernel/sched/idle.c:239 kernel/sched/idle.c:306)
[ 159.116985][ C1] ? arch_cpu_idle_exit+0xc0/0xc0
[ 159.151856][ T433] HDRINST usr/include/linux/hdlcdrv.h
[ 159.153998][ C1] cpu_startup_entry (kernel/sched/idle.c:402 (discriminator 1))
[ 159.161840][ T433]
[ 159.169651][ C1] start_secondary (arch/x86/kernel/smpboot.c:224)
[ 159.169658][ C1] ? set_cpu_sibling_map (arch/x86/kernel/smpboot.c:224)
[ 159.169673][ C1] secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:300)
[ 159.169703][ C1] </TASK>
[ 159.197643][ T433] HDRINST usr/include/linux/posix_acl_xattr.h
[ 159.200540][ C1] ================================================================================
[ 159.206541][ T433]
[ 159.210985][ C1] ================================================================================


To reproduce:

git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
sudo bin/lkp install job.yaml # job file is attached in this email
bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run
sudo bin/lkp run generated-yaml-file

# if come across any failure that blocks the test,
# please remove ~/.lkp and /lkp dir to run from a clean state.



--
0-DAY CI Kernel Test Service
https://01.org/lkp



Attachments:
(No filename) (8.41 kB)
config-5.17.0-rc6-00028-g6ef3f9579754 (56.63 kB)
dmesg.xz (53.55 kB)
job.yaml (4.98 kB)
Download all attachments

2022-04-05 00:08:11

by Artem Savkov

[permalink] [raw]
Subject: Re: [net] 6ef3f95797: UBSAN:shift-out-of-bounds_in_kernel/time/timer.c

On Sat, Apr 02, 2022 at 11:09:40AM +0800, kernel test robot wrote:
>
>
> Greeting,
>
> FYI, we noticed the following commit (built with gcc-9):
>
> commit: 6ef3f95797546781829db3bb6228c9990ba1d49f ("[PATCH v3 2/2] net: make tcp keepalive timer upper bound")
> url: https://github.com/intel-lab-lkp/linux/commits/Artem-Savkov/timer-add-a-function-to-adjust-timeouts-to-be-upper-bound/20220330-172140
> base: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git b166e52541f2357ce126a92ce1d9a580fdca719d
> patch link: https://lore.kernel.org/netdev/[email protected]
>
> in testcase: kernel-selftests
> version: kernel-selftests-x86_64-a17aac1b-1_20220328
> with following parameters:
>
> group: tc-testing
> ucode: 0xec
>
> test-description: The kernel contains a set of "self tests" under the tools/testing/selftests/ directory. These are intended to be small unit tests to exercise individual code paths in the kernel.
> test-url: https://www.kernel.org/doc/Documentation/kselftest.txt
>
>
> on test machine: 8 threads Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz with 16G memory
>
> caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
>
>
>
> If you fix the issue, kindly add following tag
> Reported-by: kernel test robot <[email protected]>
>
>
> [ 158.913672][ C1] UBSAN: shift-out-of-bounds in kernel/time/timer.c:584:32
> [ 158.922603][ C1] shift exponent -3 is negative

This is caused by LVL_START(0). Levels 0 and 1 need to be handled
separately to insure we don't end up with negative values.

--
Artem