2024-03-08 13:15:46

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 00/19] timekeeping: Handle potential multiplication overflow

Hi

Kernel timekeeping calculates a clock value by keeping a base value and
adding the number of nanoseconds since that time. Those nanoseconds are
calculated from the clocksource delta. Then periodically, the base value is
moved forwards (refer timekeeping_advance()) which is done by the local
timer interrupt handler. It is designed such that there will always be a
timer interrupt before the delta becomes big enough to overflow the 64-bit
multiplication used in the conversion of delta to nanoseconds (refer
timekeeping_delta_to_ns()). Obviously if timer interrupts are stopped, then
the multiplication does eventually overflow.

Timekeeping multiplication overflow results in a "time loop", typically
cycling about every 15 minutes with x86 TSC, for example starting at 10:00:

10:00, 10:01, 10:02 ... 10:15, 10:00, 10:01, ... 10:15, 10:00, 10:01 ...

Because a VMM can deliberately stop timer interrupts for a guest, a virtual
machine can be exposed to this issue.

TDX maintains a monotonically increasing virtual TSC for a TDX guest, so
the overflow is allowing a backwards movement of timekeeping that would not
happen otherwise.

It is considered this could break security of cryptographic protocols that
rely on the timestamps for freshness / replay protection, and consequently
the kernel should prevent such a time loop.

Handle multiplication overflows by falling back to higher precision
calculation when the possibility of an overflow is detected.

Extend the facility also to VDSO, dependent on new config option
GENERIC_VDSO_OVERFLOW_PROTECT which is selected by x86 only, so other
architectures are not affected. The result is a calculation that has
similar performance as before. Most machines showed performance benefit,
except Skylake-based hardware such as Intel Kaby Lake which was seen <1%
worse.


Adrian Hunter (19):
vdso: Consolidate vdso_calc_delta()
vdso: Consolidate nanoseconds calculation
vdso: Add CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT
math64: Tidy mul_u64_u32_shr()
vdso: math64: Provide mul_u64_u32_add_u64_shr()
vdso: Add vdso_data::max_cycles
vdso: Make delta calculation overflow safe
x86/vdso: Make delta calculation overflow safe
timekeeping: Move timekeeping helper functions
timekeeping: Rename fast_tk_get_delta_ns() to __timekeeping_get_ns()
timekeeping: Tidy timekeeping_cycles_to_ns() slightly
timekeeping: Reuse timekeeping_cycles_to_ns()
timekeeping: Refactor timekeeping helpers
timekeeping: Consolidate timekeeping helpers
timekeeping: Fold in timekeeping_delta_to_ns()
timekeeping: Prepare timekeeping_cycles_to_ns() for overflow safety
timekeeping: Make delta calculation overflow safe
timekeeping: Let timekeeping_cycles_to_ns() handle both under and overflow
clocksource: Make watchdog and suspend-timing multiplication overflow safe

arch/powerpc/include/asm/vdso/gettimeofday.h | 17 +----
arch/s390/include/asm/vdso/gettimeofday.h | 7 +-
arch/x86/Kconfig | 1 +
arch/x86/include/asm/vdso/gettimeofday.h | 42 +++++++----
include/linux/math64.h | 8 +-
include/vdso/datapage.h | 4 +
include/vdso/math64.h | 38 ++++++++++
kernel/time/clocksource.c | 42 +++++------
kernel/time/timekeeping.c | 106 ++++++++++++++-------------
kernel/time/vsyscall.c | 6 ++
lib/vdso/Kconfig | 7 ++
lib/vdso/gettimeofday.c | 55 +++++++++-----
12 files changed, 199 insertions(+), 134 deletions(-)


Regards
Adrian


2024-03-08 13:15:58

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 01/19] vdso: Consolidate vdso_calc_delta()

Consolidate vdso_calc_delta(), in preparation for further simplification.

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
arch/powerpc/include/asm/vdso/gettimeofday.h | 17 ++---------------
arch/s390/include/asm/vdso/gettimeofday.h | 7 ++-----
lib/vdso/gettimeofday.c | 4 ++++
3 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h
index f0a4cf01e85c..f4da8e18cdf3 100644
--- a/arch/powerpc/include/asm/vdso/gettimeofday.h
+++ b/arch/powerpc/include/asm/vdso/gettimeofday.h
@@ -14,6 +14,8 @@

#define VDSO_HAS_TIME 1

+#define VDSO_DELTA_NOMASK 1
+
static __always_inline int do_syscall_2(const unsigned long _r0, const unsigned long _r3,
const unsigned long _r4)
{
@@ -105,21 +107,6 @@ static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
}
#define vdso_clocksource_ok vdso_clocksource_ok

-/*
- * powerpc specific delta calculation.
- *
- * This variant removes the masking of the subtraction because the
- * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX
- * which would result in a pointless operation. The compiler cannot
- * optimize it away as the mask comes from the vdso data and is not compile
- * time constant.
- */
-static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
-{
- return (cycles - last) * mult;
-}
-#define vdso_calc_delta vdso_calc_delta
-
#ifndef __powerpc64__
static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
{
diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h
index db84942eb78f..7937765ccfa5 100644
--- a/arch/s390/include/asm/vdso/gettimeofday.h
+++ b/arch/s390/include/asm/vdso/gettimeofday.h
@@ -6,16 +6,13 @@

#define VDSO_HAS_CLOCK_GETRES 1

+#define VDSO_DELTA_NOMASK 1
+
#include <asm/syscall.h>
#include <asm/timex.h>
#include <asm/unistd.h>
#include <linux/compiler.h>

-#define vdso_calc_delta __arch_vdso_calc_delta
-static __always_inline u64 __arch_vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
-{
- return (cycles - last) * mult;
-}

static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
{
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index ce2f69552003..042b95e8164d 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -13,7 +13,11 @@
static __always_inline
u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
{
+#ifdef VDSO_DELTA_NOMASK
+ return (cycles - last) * mult;
+#else
return ((cycles - last) & mask) * mult;
+#endif
}
#endif

--
2.34.1


2024-03-08 13:16:22

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 03/19] vdso: Add CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT

Add CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT in preparation to add
multiplication overflow protection to the VDSO time getter functions.

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
lib/vdso/Kconfig | 7 +++++++
1 file changed, 7 insertions(+)

diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig
index d883ac299508..c46c2300517c 100644
--- a/lib/vdso/Kconfig
+++ b/lib/vdso/Kconfig
@@ -30,4 +30,11 @@ config GENERIC_VDSO_TIME_NS
Selected by architectures which support time namespaces in the
VDSO

+config GENERIC_VDSO_OVERFLOW_PROTECT
+ bool
+ help
+ Select to add multiplication overflow protection to the VDSO
+ time getter functions for the price of an extra conditional
+ in the hotpath.
+
endif
--
2.34.1


2024-03-08 13:16:34

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 04/19] math64: Tidy mul_u64_u32_shr()

Put together declaration and initialization of local variables.

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
include/linux/math64.h | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/linux/math64.h b/include/linux/math64.h
index bf74478926d4..fd13622b2056 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -179,16 +179,12 @@ static __always_inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift)
#ifndef mul_u64_u32_shr
static __always_inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
{
- u32 ah, al;
+ u32 ah = a >> 32, al = a;
u64 ret;

- al = a;
- ah = a >> 32;
-
ret = mul_u32_u32(al, mul) >> shift;
if (ah)
ret += mul_u32_u32(ah, mul) << (32 - shift);
-
return ret;
}
#endif /* mul_u64_u32_shr */
--
2.34.1


2024-03-08 13:16:46

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 05/19] vdso: math64: Provide mul_u64_u32_add_u64_shr()

Provide mul_u64_u32_add_u64_shr() which is a calculation that will be used
by timekeeping and VDSO.

Place #include <vdso/math64.h> after #include <asm/div64.h> to allow
architecture-specific overrides, at least for the kernel.

Signed-off-by: Adrian Hunter <[email protected]>
---
include/linux/math64.h | 2 +-
include/vdso/math64.h | 38 ++++++++++++++++++++++++++++++++++++++
2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/include/linux/math64.h b/include/linux/math64.h
index fd13622b2056..d34def7f9a8c 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -4,8 +4,8 @@

#include <linux/types.h>
#include <linux/math.h>
-#include <vdso/math64.h>
#include <asm/div64.h>
+#include <vdso/math64.h>

#if BITS_PER_LONG == 64

diff --git a/include/vdso/math64.h b/include/vdso/math64.h
index 7da703ee5561..22ae212f8b28 100644
--- a/include/vdso/math64.h
+++ b/include/vdso/math64.h
@@ -21,4 +21,42 @@ __iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
return ret;
}

+#if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
+
+#ifndef mul_u64_u32_add_u64_shr
+static __always_inline u64 mul_u64_u32_add_u64_shr(u64 a, u32 mul, u64 b, unsigned int shift)
+{
+ return (u64)((((unsigned __int128)a * mul) + b) >> shift);
+}
+#endif /* mul_u64_u32_add_u64_shr */
+
+#else
+
+#ifndef mul_u64_u32_add_u64_shr
+#ifndef mul_u32_u32
+static inline u64 mul_u32_u32(u32 a, u32 b)
+{
+ return (u64)a * b;
+}
+#define mul_u32_u32 mul_u32_u32
+#endif
+static __always_inline u64 mul_u64_u32_add_u64_shr(u64 a, u32 mul, u64 b, unsigned int shift)
+{
+ u32 ah = a >> 32, al = a;
+ bool ovf;
+ u64 ret;
+
+ ovf = __builtin_add_overflow(mul_u32_u32(al, mul), b, &ret);
+ ret >>= shift;
+ if (ovf && shift)
+ ret += 1ULL << (64 - shift);
+ if (ah)
+ ret += mul_u32_u32(ah, mul) << (32 - shift);
+
+ return ret;
+}
+#endif /* mul_u64_u32_add_u64_shr */
+
+#endif
+
#endif /* __VDSO_MATH64_H */
--
2.34.1


2024-03-08 13:17:35

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 08/19] x86/vdso: Make delta calculation overflow safe

Kernel timekeeping is designed to keep the change in cycles (since the last
timer interrupt) below max_cycles, which prevents multiplication overflow
when converting cycles to nanoseconds. However, if timer interrupts stop,
the calculation will eventually overflow.

Add protection against that. Select GENERIC_VDSO_OVERFLOW_PROTECT so that
max_cycles is made available in the VDSO data page. Check against
max_cycles, falling back to a slower higher precision calculation. Take
advantage of the opportunity to move masking and negative motion check
into the slow path.

The result is a calculation that has similar performance as before. Newer
machines showed performance benefit, whereas older Skylake-based hardware
such as Intel Kaby Lake was seen <1% worse.

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/vdso/gettimeofday.h | 29 +++++++++++++++++-------
2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 720b96388191..200f80a36593 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -168,6 +168,7 @@ config X86
select GENERIC_TIME_VSYSCALL
select GENERIC_GETTIMEOFDAY
select GENERIC_VDSO_TIME_NS
+ select GENERIC_VDSO_OVERFLOW_PROTECT
select GUP_GET_PXX_LOW_HIGH if X86_PAE
select HARDIRQS_SW_RESEND
select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index 5727dedd3549..0ef36190abe6 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -319,18 +319,31 @@ static inline bool arch_vdso_cycles_ok(u64 cycles)
*/
static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, u64 base)
{
+ u64 delta = cycles - vd->cycle_last;
+
/*
+ * Negative motion and deltas which can cause multiplication
+ * overflow require special treatment. This check covers both as
+ * negative motion is guaranteed to be greater than @vd::max_cycles
+ * due to unsigned comparison.
+ *
* Due to the MSB/Sign-bit being used as invalid marker (see
- * arch_vdso_cycles_valid() above), the effective mask is S64_MAX.
+ * arch_vdso_cycles_valid() above), the effective mask is S64_MAX,
+ * but that case is also unlikely and will also take the unlikely path
+ * here.
*/
- u64 delta = (cycles - vd->cycle_last) & S64_MAX;
+ if (unlikely(delta > vd->max_cycles)) {
+ /*
+ * Due to the above mentioned TSC wobbles, filter out
+ * negative motion. Per the above masking, the effective
+ * sign bit is now bit 62.
+ */
+ if (delta & (1ULL << 62))
+ return base >> vd->shift;

- /*
- * Due to the above mentioned TSC wobbles, filter out negative motion.
- * Per the above masking, the effective sign bit is now bit 62.
- */
- if (unlikely(delta & (1ULL << 62)))
- return base >> vd->shift;
+ /* Handle multiplication overflow gracefully */
+ return mul_u64_u32_add_u64_shr(delta & S64_MAX, vd->mult, base, vd->shift);
+ }

return ((delta * vd->mult) + base) >> vd->shift;
}
--
2.34.1


2024-03-08 13:17:35

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 07/19] vdso: Make delta calculation overflow safe

Kernel timekeeping is designed to keep the change in cycles (since the last
timer interrupt) below max_cycles, which prevents multiplication overflow
when converting cycles to nanoseconds. However, if timer interrupts stop,
the calculation will eventually overflow.

Add protection against that, enabled by config option
CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT. Check against max_cycles, falling
back to a slower higher precision calculation.

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
lib/vdso/gettimeofday.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index 9fa90e0794c9..9c3a8d2440c9 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -13,6 +13,18 @@
# define VDSO_DELTA_MASK(vd) (vd->mask)
#endif

+#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT
+static __always_inline bool vdso_delta_ok(const struct vdso_data *vd, u64 delta)
+{
+ return delta < vd->max_cycles;
+}
+#else
+static __always_inline bool vdso_delta_ok(const struct vdso_data *vd, u64 delta)
+{
+ return true;
+}
+#endif
+
#ifndef vdso_shift_ns
static __always_inline u64 vdso_shift_ns(u64 ns, u32 shift)
{
@@ -28,7 +40,10 @@ static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles,
{
u64 delta = (cycles - vd->cycle_last) & VDSO_DELTA_MASK(vd);

- return vdso_shift_ns((delta * vd->mult) + base, vd->shift);
+ if (likely(vdso_delta_ok(vd, delta)))
+ return vdso_shift_ns((delta * vd->mult) + base, vd->shift);
+
+ return mul_u64_u32_add_u64_shr(delta, vd->mult, base, vd->shift);
}
#endif /* vdso_calc_ns */

--
2.34.1


2024-03-08 13:17:59

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 10/19] timekeeping: Rename fast_tk_get_delta_ns() to __timekeeping_get_ns()

Rename fast_tk_get_delta_ns() to __timekeeping_get_ns() to prepare for its
reuse as a general timekeeping helper function.

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
kernel/time/timekeeping.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3375f0a6400f..63061332a75c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -390,7 +390,7 @@ static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 c
return timekeeping_delta_to_ns(tkr, delta);
}

-static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr)
+static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr)
{
u64 delta, cycles = tk_clock_read(tkr);

@@ -449,7 +449,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
seq = raw_read_seqcount_latch(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
now = ktime_to_ns(tkr->base);
- now += fast_tk_get_delta_ns(tkr);
+ now += __timekeeping_get_ns(tkr);
} while (raw_read_seqcount_latch_retry(&tkf->seq, seq));

return now;
@@ -565,7 +565,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
tkr = tkf->base + (seq & 0x01);
basem = ktime_to_ns(tkr->base);
baser = ktime_to_ns(tkr->base_real);
- delta = fast_tk_get_delta_ns(tkr);
+ delta = __timekeeping_get_ns(tkr);
} while (raw_read_seqcount_latch_retry(&tkf->seq, seq));

if (mono)
--
2.34.1


2024-03-08 13:18:11

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 11/19] timekeeping: Tidy timekeeping_cycles_to_ns() slightly

Put together declaration and initialization of the local variable 'delta'.

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
kernel/time/timekeeping.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 63061332a75c..c698219b152d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -383,10 +383,9 @@ static inline u64 timekeeping_delta_to_ns(const struct tk_read_base *tkr, u64 de

static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
{
- u64 delta;
+ /* Calculate the delta since the last update_wall_time() */
+ u64 delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);

- /* calculate the delta since the last update_wall_time */
- delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
return timekeeping_delta_to_ns(tkr, delta);
}

--
2.34.1


2024-03-08 13:18:22

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 12/19] timekeeping: Reuse timekeeping_cycles_to_ns()

Simplify __timekeeping_get_ns() by reusing timekeeping_cycles_to_ns().

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
kernel/time/timekeeping.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index c698219b152d..f81d675291e0 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -391,10 +391,7 @@ static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 c

static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr)
{
- u64 delta, cycles = tk_clock_read(tkr);
-
- delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
- return timekeeping_delta_to_ns(tkr, delta);
+ return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr));
}

static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
--
2.34.1


2024-03-08 13:18:33

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 13/19] timekeeping: Refactor timekeeping helpers

Simplify use of timekeeping sanity checking, in preparation for
consolidating timekeeping helpers. This works towards eliminating
timekeeping_delta_to_ns() in favour of timekeeping_cycles_to_ns().

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
kernel/time/timekeeping.c | 20 ++++++--------------
1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f81d675291e0..618328cd4bc4 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -237,7 +237,7 @@ static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
}
}

-static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
+static inline u64 timekeeping_debug_get_delta(const struct tk_read_base *tkr)
{
struct timekeeper *tk = &tk_core.timekeeper;
u64 now, last, mask, max, delta;
@@ -281,17 +281,9 @@ static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
{
}
-static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
+static inline u64 timekeeping_debug_get_delta(const struct tk_read_base *tkr)
{
- u64 cycle_now, delta;
-
- /* read clocksource */
- cycle_now = tk_clock_read(tkr);
-
- /* calculate the delta since the last update_wall_time */
- delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
-
- return delta;
+ BUG();
}
#endif

@@ -396,10 +388,10 @@ static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr)

static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
{
- u64 delta;
+ if (IS_ENABLED(CONFIG_DEBUG_TIMEKEEPING))
+ return timekeeping_delta_to_ns(tkr, timekeeping_debug_get_delta(tkr));

- delta = timekeeping_get_delta(tkr);
- return timekeeping_delta_to_ns(tkr, delta);
+ return __timekeeping_get_ns(tkr);
}

/**
--
2.34.1


2024-03-08 13:18:46

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 14/19] timekeeping: Consolidate timekeeping helpers

Consolidate timekeeping helpers, making use of timekeeping_cycles_to_ns()
in preference to directly using timekeeping_delta_to_ns().

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
kernel/time/timekeeping.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 618328cd4bc4..1bbfe1ff8d24 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -237,7 +237,9 @@ static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
}
}

-static inline u64 timekeeping_debug_get_delta(const struct tk_read_base *tkr)
+static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles);
+
+static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr)
{
struct timekeeper *tk = &tk_core.timekeeper;
u64 now, last, mask, max, delta;
@@ -266,22 +268,22 @@ static inline u64 timekeeping_debug_get_delta(const struct tk_read_base *tkr)
*/
if (unlikely((~delta & mask) < (mask >> 3))) {
tk->underflow_seen = 1;
- delta = 0;
+ now = last;
}

/* Cap delta value to the max_cycles values to avoid mult overflows */
if (unlikely(delta > max)) {
tk->overflow_seen = 1;
- delta = tkr->clock->max_cycles;
+ now = last + max;
}

- return delta;
+ return timekeeping_cycles_to_ns(tkr, now);
}
#else
static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
{
}
-static inline u64 timekeeping_debug_get_delta(const struct tk_read_base *tkr)
+static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr)
{
BUG();
}
@@ -389,7 +391,7 @@ static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr)
static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
{
if (IS_ENABLED(CONFIG_DEBUG_TIMEKEEPING))
- return timekeeping_delta_to_ns(tkr, timekeeping_debug_get_delta(tkr));
+ return timekeeping_debug_get_ns(tkr);

return __timekeeping_get_ns(tkr);
}
--
2.34.1


2024-03-08 13:19:08

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 16/19] timekeeping: Prepare timekeeping_cycles_to_ns() for overflow safety

Open code clocksource_delta() in timekeeping_cycles_to_ns() so that
overflow safety can be added efficiently.

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
kernel/time/timekeeping.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 749387f22f0f..d17484082e2c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -367,7 +367,17 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
{
/* Calculate the delta since the last update_wall_time() */
- u64 delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
+ u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask;
+
+ if (IS_ENABLED(CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE)) {
+ /*
+ * Handle clocksource inconsistency between CPUs to prevent
+ * time from going backwards by checking for the MSB of the
+ * mask being set in the delta.
+ */
+ if (unlikely(delta & ~(mask >> 1)))
+ return tkr->xtime_nsec >> tkr->shift;
+ }

return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift;
}
--
2.34.1


2024-03-08 13:19:20

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 17/19] timekeeping: Make delta calculation overflow safe

Kernel timekeeping is designed to keep the change in cycles (since the last
timer interrupt) below max_cycles, which prevents multiplication overflow
when converting cycles to nanoseconds. However, if timer interrupts stop,
the calculation will eventually overflow.

Add protection against that. In timekeeping_cycles_to_ns() calculation,
check against max_cycles, falling back to a slower higher precision
calculation. In timekeeping_forward_now(), process delta in chunks of at
most max_cycles.

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
kernel/time/timekeeping.c | 40 ++++++++++++++++++++++++++++-----------
1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d17484082e2c..111dfdbd488f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -364,19 +364,32 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
}

/* Timekeeper helper functions. */
+static noinline u64 delta_to_ns_safe(const struct tk_read_base *tkr, u64 delta)
+{
+ return mul_u64_u32_add_u64_shr(delta, tkr->mult, tkr->xtime_nsec, tkr->shift);
+}
+
static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
{
/* Calculate the delta since the last update_wall_time() */
u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask;

- if (IS_ENABLED(CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE)) {
- /*
- * Handle clocksource inconsistency between CPUs to prevent
- * time from going backwards by checking for the MSB of the
- * mask being set in the delta.
- */
- if (unlikely(delta & ~(mask >> 1)))
- return tkr->xtime_nsec >> tkr->shift;
+ /*
+ * This detects the case where the delta overflows the multiplication
+ * with tkr->mult.
+ */
+ if (unlikely(delta > tkr->clock->max_cycles)) {
+ if (IS_ENABLED(CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE)) {
+ /*
+ * Handle clocksource inconsistency between CPUs to prevent
+ * time from going backwards by checking for the MSB of the
+ * mask being set in the delta.
+ */
+ if (unlikely(delta & ~(mask >> 1)))
+ return tkr->xtime_nsec >> tkr->shift;
+ }
+
+ return delta_to_ns_safe(tkr, delta);
}

return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift;
@@ -789,10 +802,15 @@ static void timekeeping_forward_now(struct timekeeper *tk)
tk->tkr_mono.cycle_last = cycle_now;
tk->tkr_raw.cycle_last = cycle_now;

- tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
- tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult;
+ while (delta > 0) {
+ u64 max = tk->tkr_mono.clock->max_cycles;
+ u64 incr = delta < max ? delta : max;

- tk_normalize_xtime(tk);
+ tk->tkr_mono.xtime_nsec += incr * tk->tkr_mono.mult;
+ tk->tkr_raw.xtime_nsec += incr * tk->tkr_raw.mult;
+ tk_normalize_xtime(tk);
+ delta -= incr;
+ }
}

/**
--
2.34.1


2024-03-08 13:22:10

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 02/19] vdso: Consolidate nanoseconds calculation

Consolidate nanoseconds calculation to simplify and reduce code
duplication.

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
arch/x86/include/asm/vdso/gettimeofday.h | 17 +++++----
lib/vdso/gettimeofday.c | 44 +++++++++++-------------
2 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index 8e048ca980df..5727dedd3549 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -300,7 +300,7 @@ static inline bool arch_vdso_cycles_ok(u64 cycles)
#define vdso_cycles_ok arch_vdso_cycles_ok

/*
- * x86 specific delta calculation.
+ * x86 specific calculation of nanoseconds for the current cycle count
*
* The regular implementation assumes that clocksource reads are globally
* monotonic. The TSC can be slightly off across sockets which can cause
@@ -308,8 +308,8 @@ static inline bool arch_vdso_cycles_ok(u64 cycles)
* jump.
*
* Therefore it needs to be verified that @cycles are greater than
- * @last. If not then use @last, which is the base time of the current
- * conversion period.
+ * @vd->cycles_last. If not then use @vd->cycles_last, which is the base
+ * time of the current conversion period.
*
* This variant also uses a custom mask because while the clocksource mask of
* all the VDSO capable clocksources on x86 is U64_MAX, the above code uses
@@ -317,25 +317,24 @@ static inline bool arch_vdso_cycles_ok(u64 cycles)
* declares everything with the MSB/Sign-bit set as invalid. Therefore the
* effective mask is S64_MAX.
*/
-static __always_inline
-u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, u64 base)
{
/*
* Due to the MSB/Sign-bit being used as invalid marker (see
* arch_vdso_cycles_valid() above), the effective mask is S64_MAX.
*/
- u64 delta = (cycles - last) & S64_MAX;
+ u64 delta = (cycles - vd->cycle_last) & S64_MAX;

/*
* Due to the above mentioned TSC wobbles, filter out negative motion.
* Per the above masking, the effective sign bit is now bit 62.
*/
if (unlikely(delta & (1ULL << 62)))
- return 0;
+ return base >> vd->shift;

- return delta * mult;
+ return ((delta * vd->mult) + base) >> vd->shift;
}
-#define vdso_calc_delta vdso_calc_delta
+#define vdso_calc_ns vdso_calc_ns

#endif /* !__ASSEMBLY__ */

diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index 042b95e8164d..9fa90e0794c9 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -5,20 +5,12 @@
#include <vdso/datapage.h>
#include <vdso/helpers.h>

-#ifndef vdso_calc_delta
-/*
- * Default implementation which works for all sane clocksources. That
- * obviously excludes x86/TSC.
- */
-static __always_inline
-u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
-{
+#ifndef vdso_calc_ns
+
#ifdef VDSO_DELTA_NOMASK
- return (cycles - last) * mult;
+# define VDSO_DELTA_MASK(vd) U64_MAX
#else
- return ((cycles - last) & mask) * mult;
-#endif
-}
+# define VDSO_DELTA_MASK(vd) (vd->mask)
#endif

#ifndef vdso_shift_ns
@@ -28,6 +20,18 @@ static __always_inline u64 vdso_shift_ns(u64 ns, u32 shift)
}
#endif

+/*
+ * Default implementation which works for all sane clocksources. That
+ * obviously excludes x86/TSC.
+ */
+static __always_inline u64 vdso_calc_ns(const struct vdso_data *vd, u64 cycles, u64 base)
+{
+ u64 delta = (cycles - vd->cycle_last) & VDSO_DELTA_MASK(vd);
+
+ return vdso_shift_ns((delta * vd->mult) + base, vd->shift);
+}
+#endif /* vdso_calc_ns */
+
#ifndef __arch_vdso_hres_capable
static inline bool __arch_vdso_hres_capable(void)
{
@@ -53,10 +57,10 @@ static inline bool vdso_cycles_ok(u64 cycles)
static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
struct __kernel_timespec *ts)
{
- const struct vdso_data *vd;
const struct timens_offset *offs = &vdns->offset[clk];
const struct vdso_timestamp *vdso_ts;
- u64 cycles, last, ns;
+ const struct vdso_data *vd;
+ u64 cycles, ns;
u32 seq;
s64 sec;

@@ -77,10 +81,7 @@ static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_
cycles = __arch_get_hw_counter(vd->clock_mode, vd);
if (unlikely(!vdso_cycles_ok(cycles)))
return -1;
- ns = vdso_ts->nsec;
- last = vd->cycle_last;
- ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
- ns = vdso_shift_ns(ns, vd->shift);
+ ns = vdso_calc_ns(vd, cycles, vdso_ts->nsec);
sec = vdso_ts->sec;
} while (unlikely(vdso_read_retry(vd, seq)));

@@ -115,7 +116,7 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk,
struct __kernel_timespec *ts)
{
const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
- u64 cycles, last, sec, ns;
+ u64 cycles, sec, ns;
u32 seq;

/* Allows to compile the high resolution parts out */
@@ -148,10 +149,7 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk,
cycles = __arch_get_hw_counter(vd->clock_mode, vd);
if (unlikely(!vdso_cycles_ok(cycles)))
return -1;
- ns = vdso_ts->nsec;
- last = vd->cycle_last;
- ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
- ns = vdso_shift_ns(ns, vd->shift);
+ ns = vdso_calc_ns(vd, cycles, vdso_ts->nsec);
sec = vdso_ts->sec;
} while (unlikely(vdso_read_retry(vd, seq)));

--
2.34.1


2024-03-08 13:27:00

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 19/19] clocksource: Make watchdog and suspend-timing multiplication overflow safe

Kernel timekeeping is designed to keep the change in cycles (since the last
timer interrupt) below max_cycles, which prevents multiplication overflow
when converting cycles to nanoseconds. However, if timer interrupts stop,
the clocksource_cyc2ns() calculation will eventually overflow.

Add protection against that. Simplify by folding together
clocksource_delta() and clocksource_cyc2ns() into cycles_to_nsec_safe().
Check against max_cycles, falling back to a slower higher precision
calculation.

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
kernel/time/clocksource.c | 42 +++++++++++++++++++--------------------
1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e5b260aa0e02..4d50d53ac719 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -20,6 +20,16 @@
#include "tick-internal.h"
#include "timekeeping_internal.h"

+static noinline u64 cycles_to_nsec_safe(struct clocksource *cs, u64 start, u64 end)
+{
+ u64 delta = clocksource_delta(end, start, cs->mask);
+
+ if (likely(delta < cs->max_cycles))
+ return clocksource_cyc2ns(delta, cs->mult, cs->shift);
+
+ return mul_u64_u32_shr(delta, cs->mult, cs->shift);
+}
+
/**
* clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
* @mult: pointer to mult variable
@@ -222,8 +232,8 @@ enum wd_read_status {
static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
{
unsigned int nretries, max_retries;
- u64 wd_end, wd_end2, wd_delta;
int64_t wd_delay, wd_seq_delay;
+ u64 wd_end, wd_end2;

max_retries = clocksource_get_max_watchdog_retry();
for (nretries = 0; nretries <= max_retries; nretries++) {
@@ -234,9 +244,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow,
wd_end2 = watchdog->read(watchdog);
local_irq_enable();

- wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask);
- wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult,
- watchdog->shift);
+ wd_delay = cycles_to_nsec_safe(watchdog, *wdnow, wd_end);
if (wd_delay <= WATCHDOG_MAX_SKEW) {
if (nretries > 1 || nretries >= max_retries) {
pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
@@ -254,8 +262,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow,
* report system busy, reinit the watchdog and skip the current
* watchdog test.
*/
- wd_delta = clocksource_delta(wd_end2, wd_end, watchdog->mask);
- wd_seq_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, watchdog->shift);
+ wd_seq_delay = cycles_to_nsec_safe(watchdog, wd_end, wd_end2);
if (wd_seq_delay > WATCHDOG_MAX_SKEW/2)
goto skip_test;
}
@@ -366,8 +373,7 @@ void clocksource_verify_percpu(struct clocksource *cs)
delta = (csnow_end - csnow_mid) & cs->mask;
if (delta < 0)
cpumask_set_cpu(cpu, &cpus_ahead);
- delta = clocksource_delta(csnow_end, csnow_begin, cs->mask);
- cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+ cs_nsec = cycles_to_nsec_safe(cs, csnow_begin, csnow_end);
if (cs_nsec > cs_nsec_max)
cs_nsec_max = cs_nsec;
if (cs_nsec < cs_nsec_min)
@@ -398,8 +404,8 @@ static inline void clocksource_reset_watchdog(void)

static void clocksource_watchdog(struct timer_list *unused)
{
- u64 csnow, wdnow, cslast, wdlast, delta;
int64_t wd_nsec, cs_nsec, interval;
+ u64 csnow, wdnow, cslast, wdlast;
int next_cpu, reset_pending;
struct clocksource *cs;
enum wd_read_status read_ret;
@@ -456,12 +462,8 @@ static void clocksource_watchdog(struct timer_list *unused)
continue;
}

- delta = clocksource_delta(wdnow, cs->wd_last, watchdog->mask);
- wd_nsec = clocksource_cyc2ns(delta, watchdog->mult,
- watchdog->shift);
-
- delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
- cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+ wd_nsec = cycles_to_nsec_safe(watchdog, cs->wd_last, wdnow);
+ cs_nsec = cycles_to_nsec_safe(cs, cs->cs_last, csnow);
wdlast = cs->wd_last; /* save these in case we print them */
cslast = cs->cs_last;
cs->cs_last = csnow;
@@ -832,7 +834,7 @@ void clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles)
*/
u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now)
{
- u64 now, delta, nsec = 0;
+ u64 now, nsec = 0;

if (!suspend_clocksource)
return 0;
@@ -847,12 +849,8 @@ u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now)
else
now = suspend_clocksource->read(suspend_clocksource);

- if (now > suspend_start) {
- delta = clocksource_delta(now, suspend_start,
- suspend_clocksource->mask);
- nsec = mul_u64_u32_shr(delta, suspend_clocksource->mult,
- suspend_clocksource->shift);
- }
+ if (now > suspend_start)
+ nsec = cycles_to_nsec_safe(suspend_clocksource, suspend_start, now);

/*
* Disable the suspend timer to save power if current clocksource is
--
2.34.1


2024-03-08 13:27:21

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 18/19] timekeeping: Let timekeeping_cycles_to_ns() handle both under and overflow

For the case !CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE, forego overflow
protection in the range (mask << 1) < delta <= mask, and interpret it
always as an inconsistency between CPU clock values. That allows
slightly neater code, and it is on a slow path so has no effect on
performance.

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
kernel/time/timekeeping.c | 31 +++++++++++++------------------
1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 111dfdbd488f..4e18db1819f8 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -266,17 +266,14 @@ static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr)
* Try to catch underflows by checking if we are seeing small
* mask-relative negative values.
*/
- if (unlikely((~delta & mask) < (mask >> 3))) {
+ if (unlikely((~delta & mask) < (mask >> 3)))
tk->underflow_seen = 1;
- now = last;
- }

- /* Cap delta value to the max_cycles values to avoid mult overflows */
- if (unlikely(delta > max)) {
+ /* Check for multiplication overflows */
+ if (unlikely(delta > max))
tk->overflow_seen = 1;
- now = last + max;
- }

+ /* timekeeping_cycles_to_ns() handles both under and overflow */
return timekeeping_cycles_to_ns(tkr, now);
}
#else
@@ -375,19 +372,17 @@ static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 c
u64 mask = tkr->mask, delta = (cycles - tkr->cycle_last) & mask;

/*
- * This detects the case where the delta overflows the multiplication
- * with tkr->mult.
+ * This detects both negative motion and the case where the delta
+ * overflows the multiplication with tkr->mult.
*/
if (unlikely(delta > tkr->clock->max_cycles)) {
- if (IS_ENABLED(CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE)) {
- /*
- * Handle clocksource inconsistency between CPUs to prevent
- * time from going backwards by checking for the MSB of the
- * mask being set in the delta.
- */
- if (unlikely(delta & ~(mask >> 1)))
- return tkr->xtime_nsec >> tkr->shift;
- }
+ /*
+ * Handle clocksource inconsistency between CPUs to prevent
+ * time from going backwards by checking for the MSB of the
+ * mask being set in the delta.
+ */
+ if (delta & ~(mask >> 1))
+ return tkr->xtime_nsec >> tkr->shift;

return delta_to_ns_safe(tkr, delta);
}
--
2.34.1


2024-03-08 13:28:28

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 15/19] timekeeping: Fold in timekeeping_delta_to_ns()

timekeeping_delta_to_ns() is now called only from
timekeeping_cycles_to_ns(), and it is not useful otherwise. Simplify by
folding it into timekeeping_cycles_to_ns().

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
kernel/time/timekeeping.c | 13 +------------
1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 1bbfe1ff8d24..749387f22f0f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -364,23 +364,12 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
}

/* Timekeeper helper functions. */
-
-static inline u64 timekeeping_delta_to_ns(const struct tk_read_base *tkr, u64 delta)
-{
- u64 nsec;
-
- nsec = delta * tkr->mult + tkr->xtime_nsec;
- nsec >>= tkr->shift;
-
- return nsec;
-}
-
static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
{
/* Calculate the delta since the last update_wall_time() */
u64 delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);

- return timekeeping_delta_to_ns(tkr, delta);
+ return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift;
}

static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr)
--
2.34.1


2024-03-08 13:28:31

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 09/19] timekeeping: Move timekeeping helper functions

Move timekeeping helper functions to prepare for their reuse.

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
kernel/time/timekeeping.c | 34 +++++++++++++++++-----------------
1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b58dffc58a8f..3375f0a6400f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -381,6 +381,23 @@ static inline u64 timekeeping_delta_to_ns(const struct tk_read_base *tkr, u64 de
return nsec;
}

+static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
+{
+ u64 delta;
+
+ /* calculate the delta since the last update_wall_time */
+ delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
+ return timekeeping_delta_to_ns(tkr, delta);
+}
+
+static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr)
+{
+ u64 delta, cycles = tk_clock_read(tkr);
+
+ delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
+ return timekeeping_delta_to_ns(tkr, delta);
+}
+
static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
{
u64 delta;
@@ -389,15 +406,6 @@ static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
return timekeeping_delta_to_ns(tkr, delta);
}

-static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
-{
- u64 delta;
-
- /* calculate the delta since the last update_wall_time */
- delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
- return timekeeping_delta_to_ns(tkr, delta);
-}
-
/**
* update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
* @tkr: Timekeeping readout base from which we take the update
@@ -431,14 +439,6 @@ static void update_fast_timekeeper(const struct tk_read_base *tkr,
memcpy(base + 1, base, sizeof(*base));
}

-static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr)
-{
- u64 delta, cycles = tk_clock_read(tkr);
-
- delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
- return timekeeping_delta_to_ns(tkr, delta);
-}
-
static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
{
struct tk_read_base *tkr;
--
2.34.1


2024-03-08 13:28:49

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH 06/19] vdso: Add vdso_data::max_cycles

Add vdso_data::max_cycles in preparation to use it to detect potential
multiplication overflow.

Suggested-by: Thomas Gleixner <[email protected]>
Signed-off-by: Adrian Hunter <[email protected]>
---
include/vdso/datapage.h | 4 ++++
kernel/time/vsyscall.c | 6 ++++++
2 files changed, 10 insertions(+)

diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h
index 5d5c0b8efff2..6c3d67d6b758 100644
--- a/include/vdso/datapage.h
+++ b/include/vdso/datapage.h
@@ -67,6 +67,7 @@ struct vdso_timestamp {
* @seq: timebase sequence counter
* @clock_mode: clock mode
* @cycle_last: timebase at clocksource init
+ * @max_cycles: maximum cycles which won't overflow 64bit multiplication
* @mask: clocksource mask
* @mult: clocksource multiplier
* @shift: clocksource shift
@@ -98,6 +99,9 @@ struct vdso_data {

s32 clock_mode;
u64 cycle_last;
+#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT
+ u64 max_cycles;
+#endif
u64 mask;
u32 mult;
u32 shift;
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c
index f0d5062d9cbc..9193d6133e5d 100644
--- a/kernel/time/vsyscall.c
+++ b/kernel/time/vsyscall.c
@@ -22,10 +22,16 @@ static inline void update_vdso_data(struct vdso_data *vdata,
u64 nsec, sec;

vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last;
+#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT
+ vdata[CS_HRES_COARSE].max_cycles = tk->tkr_mono.clock->max_cycles;
+#endif
vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask;
vdata[CS_HRES_COARSE].mult = tk->tkr_mono.mult;
vdata[CS_HRES_COARSE].shift = tk->tkr_mono.shift;
vdata[CS_RAW].cycle_last = tk->tkr_raw.cycle_last;
+#ifdef CONFIG_GENERIC_VDSO_OVERFLOW_PROTECT
+ vdata[CS_RAW].max_cycles = tk->tkr_raw.clock->max_cycles;
+#endif
vdata[CS_RAW].mask = tk->tkr_raw.mask;
vdata[CS_RAW].mult = tk->tkr_raw.mult;
vdata[CS_RAW].shift = tk->tkr_raw.shift;
--
2.34.1


2024-03-09 02:09:53

by John Stultz

[permalink] [raw]
Subject: Re: [PATCH 01/19] vdso: Consolidate vdso_calc_delta()

On Fri, Mar 8, 2024 at 5:15 AM Adrian Hunter <[email protected]> wrote:
>
> Consolidate vdso_calc_delta(), in preparation for further simplification.
>
> Suggested-by: Thomas Gleixner <[email protected]>
> Signed-off-by: Adrian Hunter <[email protected]>
> ---
> arch/powerpc/include/asm/vdso/gettimeofday.h | 17 ++---------------
> arch/s390/include/asm/vdso/gettimeofday.h | 7 ++-----
> lib/vdso/gettimeofday.c | 4 ++++
> 3 files changed, 8 insertions(+), 20 deletions(-)
>
> --- a/lib/vdso/gettimeofday.c
> +++ b/lib/vdso/gettimeofday.c
> @@ -13,7 +13,11 @@
> static __always_inline
> u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
> {
> +#ifdef VDSO_DELTA_NOMASK
> + return (cycles - last) * mult;
> +#else
> return ((cycles - last) & mask) * mult;
> +#endif
> }

Nit: Just for readability, usually we avoid #ifdefs inside of functions.

Instead maybe:
#ifdef VDSO_DELTA_NOMASK
static __always_inline
u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
{
return (cycles - last) * mult;
}
#else
static __always_inline
u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
{
return ((cycles - last) & mask) * mult;
}
#endif

2024-03-09 07:48:59

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH 01/19] vdso: Consolidate vdso_calc_delta()



Le 08/03/2024 à 14:14, Adrian Hunter a écrit :
> [Vous ne recevez pas souvent de courriers de [email protected]. Découvrez pourquoi ceci est important à https://aka.ms/LearnAboutSenderIdentification ]
>
> Consolidate vdso_calc_delta(), in preparation for further simplification.
>
> Suggested-by: Thomas Gleixner <[email protected]>
> Signed-off-by: Adrian Hunter <[email protected]>
> ---
> arch/powerpc/include/asm/vdso/gettimeofday.h | 17 ++---------------
> arch/s390/include/asm/vdso/gettimeofday.h | 7 ++-----
> lib/vdso/gettimeofday.c | 4 ++++
> 3 files changed, 8 insertions(+), 20 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h
> index f0a4cf01e85c..f4da8e18cdf3 100644
> --- a/arch/powerpc/include/asm/vdso/gettimeofday.h
> +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h
> @@ -14,6 +14,8 @@
>
> #define VDSO_HAS_TIME 1
>
> +#define VDSO_DELTA_NOMASK 1
> +
> static __always_inline int do_syscall_2(const unsigned long _r0, const unsigned long _r3,
> const unsigned long _r4)
> {
> @@ -105,21 +107,6 @@ static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
> }
> #define vdso_clocksource_ok vdso_clocksource_ok
>
> -/*
> - * powerpc specific delta calculation.
> - *
> - * This variant removes the masking of the subtraction because the
> - * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX
> - * which would result in a pointless operation. The compiler cannot
> - * optimize it away as the mask comes from the vdso data and is not compile
> - * time constant.
> - */

Please keep the comment. You can move it close to VDSO_DELTA_NOMASK

> -static __always_inline u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
> -{
> - return (cycles - last) * mult;
> -}
> -#define vdso_calc_delta vdso_calc_delta
> -
> #ifndef __powerpc64__
> static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
> {
> diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h
> index db84942eb78f..7937765ccfa5 100644
> --- a/arch/s390/include/asm/vdso/gettimeofday.h
> +++ b/arch/s390/include/asm/vdso/gettimeofday.h
> @@ -6,16 +6,13 @@
>
> #define VDSO_HAS_CLOCK_GETRES 1
>
> +#define VDSO_DELTA_NOMASK 1
> +
> #include <asm/syscall.h>
> #include <asm/timex.h>
> #include <asm/unistd.h>
> #include <linux/compiler.h>
>
> -#define vdso_calc_delta __arch_vdso_calc_delta
> -static __always_inline u64 __arch_vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
> -{
> - return (cycles - last) * mult;
> -}
>
> static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
> {
> diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
> index ce2f69552003..042b95e8164d 100644
> --- a/lib/vdso/gettimeofday.c
> +++ b/lib/vdso/gettimeofday.c
> @@ -13,7 +13,11 @@
> static __always_inline
> u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
> {
> +#ifdef VDSO_DELTA_NOMASK
> + return (cycles - last) * mult;
> +#else
> return ((cycles - last) & mask) * mult;
> +#endif

See
https://docs.kernel.org/process/coding-style.html#conditional-compilation

You don't need #ifdefs here.

One solution is to define VDSO_DELTA_NOMASK to 0 in
include/vdso/datapage.h after including asm/vdso/gettimeofday.h :

#ifndef VDSO_DELTA_NOMASK
#define VDSO_DELTA_NOMASK 0
#endif

Then

u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
{
if (VDSO_DELTA_NOMASK)
mask = ~0ULL;

return ((cycles - last) & mask) * mult;
}

or

u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
{
if (VDSO_DELTA_NOMASK)
return (cycles - last) * mult;

return ((cycles - last) & mask) * mult;
}




> }
> #endif
>
> --
> 2.34.1
>