2019-08-22 12:42:22

by Thomas Gleixner

[permalink] [raw]
Subject: [PATCH] timekeeping/vsyscall: Prevent math overflow in BOOTTIME update

The VDSO update for CLOCK_BOOTTIME has a overflow issue as it shifts the
nanoseconds based boot time offset left by the clocksource shift. That
overflows once the boot time offset becomes large enough. As a consequence
CLOCK_BOOTTIME in the VDSO becomes a random number causing applications to
misbehave.

Fix it by storing a timespec64 representation of the offset when boot time
is adjusted and add that to the MONOTONIC base time value in the vdso data
page. Using the timespec64 representation avoids a 64bit division in the
update code.

Fixes: 44f57d788e7d ("timekeeping: Provide a generic update_vsyscall() implementation")
Reported-by: Chris Clayton <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
---
include/linux/timekeeper_internal.h | 5 +++++
kernel/time/timekeeping.c | 5 +++++
kernel/time/vsyscall.c | 22 +++++++++++++---------
3 files changed, 23 insertions(+), 9 deletions(-)

--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -57,6 +57,7 @@ struct tk_read_base {
* @cs_was_changed_seq: The sequence number of clocksource change events
* @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
* @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
+ * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
* @cycle_interval: Number of clock cycles in one NTP interval
* @xtime_interval: Number of clock shifted nano seconds in one NTP
* interval.
@@ -84,6 +85,9 @@ struct tk_read_base {
*
* wall_to_monotonic is no longer the boot time, getboottime must be
* used instead.
+ *
+ * @monotonic_to_boottime is a timespec64 representation of @offs_boot to
+ * accelerate the VDSO update for CLOCK_BOOTTIME.
*/
struct timekeeper {
struct tk_read_base tkr_mono;
@@ -99,6 +103,7 @@ struct timekeeper {
u8 cs_was_changed_seq;
ktime_t next_leap_ktime;
u64 raw_sec;
+ struct timespec64 monotonic_to_boot;

/* The following members are for timekeeping internal use */
u64 cycle_interval;
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -146,6 +146,11 @@ static void tk_set_wall_to_mono(struct t
static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
{
tk->offs_boot = ktime_add(tk->offs_boot, delta);
+ /*
+ * Timespec representation for VDSO update to avoid 64bit division
+ * on every update.
+ */
+ tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
}

/*
--- a/kernel/time/vsyscall.c
+++ b/kernel/time/vsyscall.c
@@ -17,7 +17,7 @@ static inline void update_vdso_data(stru
struct timekeeper *tk)
{
struct vdso_timestamp *vdso_ts;
- u64 nsec;
+ u64 nsec, sec;

vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last;
vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask;
@@ -45,23 +45,27 @@ static inline void update_vdso_data(stru
}
vdso_ts->nsec = nsec;

- /* CLOCK_MONOTONIC_RAW */
- vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
- vdso_ts->sec = tk->raw_sec;
- vdso_ts->nsec = tk->tkr_raw.xtime_nsec;
+ /* Copy MONOTONIC time for BOOTTIME */
+ sec = vdso_ts->sec;
+ /* Add the boot offset */
+ sec += tk->monotonic_to_boot.tv_sec;
+ nsec += (u64)tk->monotonic_to_boot.tv_nsec << tk->tkr_mono.shift;

/* CLOCK_BOOTTIME */
vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME];
- vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
- nsec = tk->tkr_mono.xtime_nsec;
- nsec += ((u64)(tk->wall_to_monotonic.tv_nsec +
- ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift);
+ vdso_ts->sec = sec;
+
while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
vdso_ts->sec++;
}
vdso_ts->nsec = nsec;

+ /* CLOCK_MONOTONIC_RAW */
+ vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
+ vdso_ts->sec = tk->raw_sec;
+ vdso_ts->nsec = tk->tkr_raw.xtime_nsec;
+
/* CLOCK_TAI */
vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI];
vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset;


2019-08-22 18:23:31

by Chris Clayton

[permalink] [raw]
Subject: Re: [PATCH] timekeeping/vsyscall: Prevent math overflow in BOOTTIME update

Thanks Thomas.

On 22/08/2019 12:00, Thomas Gleixner wrote:
> The VDSO update for CLOCK_BOOTTIME has a overflow issue as it shifts the
> nanoseconds based boot time offset left by the clocksource shift. That
> overflows once the boot time offset becomes large enough. As a consequence
> CLOCK_BOOTTIME in the VDSO becomes a random number causing applications to
> misbehave.
>
> Fix it by storing a timespec64 representation of the offset when boot time
> is adjusted and add that to the MONOTONIC base time value in the vdso data
> page. Using the timespec64 representation avoids a 64bit division in the
> update code.
>

I've tested resume from both suspend and hibernate and this patch fixes the problem I reported.

Tested-by: Chris Clayton <[email protected]>

> Fixes: 44f57d788e7d ("timekeeping: Provide a generic update_vsyscall() implementation")
> Reported-by: Chris Clayton <[email protected]>
> Signed-off-by: Thomas Gleixner <[email protected]>
> ---
> include/linux/timekeeper_internal.h | 5 +++++
> kernel/time/timekeeping.c | 5 +++++
> kernel/time/vsyscall.c | 22 +++++++++++++---------
> 3 files changed, 23 insertions(+), 9 deletions(-)
>
> --- a/include/linux/timekeeper_internal.h
> +++ b/include/linux/timekeeper_internal.h
> @@ -57,6 +57,7 @@ struct tk_read_base {
> * @cs_was_changed_seq: The sequence number of clocksource change events
> * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
> * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
> + * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
> * @cycle_interval: Number of clock cycles in one NTP interval
> * @xtime_interval: Number of clock shifted nano seconds in one NTP
> * interval.
> @@ -84,6 +85,9 @@ struct tk_read_base {
> *
> * wall_to_monotonic is no longer the boot time, getboottime must be
> * used instead.
> + *
> + * @monotonic_to_boottime is a timespec64 representation of @offs_boot to
> + * accelerate the VDSO update for CLOCK_BOOTTIME.
> */
> struct timekeeper {
> struct tk_read_base tkr_mono;
> @@ -99,6 +103,7 @@ struct timekeeper {
> u8 cs_was_changed_seq;
> ktime_t next_leap_ktime;
> u64 raw_sec;
> + struct timespec64 monotonic_to_boot;
>
> /* The following members are for timekeeping internal use */
> u64 cycle_interval;
> --- a/kernel/time/timekeeping.c
> +++ b/kernel/time/timekeeping.c
> @@ -146,6 +146,11 @@ static void tk_set_wall_to_mono(struct t
> static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
> {
> tk->offs_boot = ktime_add(tk->offs_boot, delta);
> + /*
> + * Timespec representation for VDSO update to avoid 64bit division
> + * on every update.
> + */
> + tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
> }
>
> /*
> --- a/kernel/time/vsyscall.c
> +++ b/kernel/time/vsyscall.c
> @@ -17,7 +17,7 @@ static inline void update_vdso_data(stru
> struct timekeeper *tk)
> {
> struct vdso_timestamp *vdso_ts;
> - u64 nsec;
> + u64 nsec, sec;
>
> vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last;
> vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask;
> @@ -45,23 +45,27 @@ static inline void update_vdso_data(stru
> }
> vdso_ts->nsec = nsec;
>
> - /* CLOCK_MONOTONIC_RAW */
> - vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
> - vdso_ts->sec = tk->raw_sec;
> - vdso_ts->nsec = tk->tkr_raw.xtime_nsec;
> + /* Copy MONOTONIC time for BOOTTIME */
> + sec = vdso_ts->sec;
> + /* Add the boot offset */
> + sec += tk->monotonic_to_boot.tv_sec;
> + nsec += (u64)tk->monotonic_to_boot.tv_nsec << tk->tkr_mono.shift;
>
> /* CLOCK_BOOTTIME */
> vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME];
> - vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
> - nsec = tk->tkr_mono.xtime_nsec;
> - nsec += ((u64)(tk->wall_to_monotonic.tv_nsec +
> - ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift);
> + vdso_ts->sec = sec;
> +
> while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
> nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
> vdso_ts->sec++;
> }
> vdso_ts->nsec = nsec;
>
> + /* CLOCK_MONOTONIC_RAW */
> + vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
> + vdso_ts->sec = tk->raw_sec;
> + vdso_ts->nsec = tk->tkr_raw.xtime_nsec;
> +
> /* CLOCK_TAI */
> vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI];
> vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset;
>

2019-08-22 20:56:53

by Vincenzo Frascino

[permalink] [raw]
Subject: Re: [PATCH] timekeeping/vsyscall: Prevent math overflow in BOOTTIME update

Hi Thomas,

On 22/08/2019 13:52, Chris Clayton wrote:
> Thanks Thomas.
>
> On 22/08/2019 12:00, Thomas Gleixner wrote:
>> The VDSO update for CLOCK_BOOTTIME has a overflow issue as it shifts the
>> nanoseconds based boot time offset left by the clocksource shift. That
>> overflows once the boot time offset becomes large enough. As a consequence
>> CLOCK_BOOTTIME in the VDSO becomes a random number causing applications to
>> misbehave.
>>
>> Fix it by storing a timespec64 representation of the offset when boot time
>> is adjusted and add that to the MONOTONIC base time value in the vdso data
>> page. Using the timespec64 representation avoids a 64bit division in the
>> update code.
>>
>
> I've tested resume from both suspend and hibernate and this patch fixes the problem I reported.
>
> Tested-by: Chris Clayton <[email protected]>
>

I can confirm what reported by Chris. Please see below the scissors.

With this:

Tested-by: Vincenzo Frascino <[email protected]>

--->8---

Clock test start
clk_id: CLOCK_BOOTTIME
clock_getres: 0 1
clock_gettime:2697 489679147
2019-08-22 16:21:57.911
Clock test end

<...Suspend/Resume...>

Clock test start
clk_id: CLOCK_BOOTTIME
clock_getres: 0 1
clock_gettime:4489 684341925
2019-08-22 16:51:50.106
Clock test end


>> Fixes: 44f57d788e7d ("timekeeping: Provide a generic update_vsyscall() implementation")
>> Reported-by: Chris Clayton <[email protected]>
>> Signed-off-by: Thomas Gleixner <[email protected]>
>> ---
>> include/linux/timekeeper_internal.h | 5 +++++
>> kernel/time/timekeeping.c | 5 +++++
>> kernel/time/vsyscall.c | 22 +++++++++++++---------
>> 3 files changed, 23 insertions(+), 9 deletions(-)
>>
>> --- a/include/linux/timekeeper_internal.h
>> +++ b/include/linux/timekeeper_internal.h
>> @@ -57,6 +57,7 @@ struct tk_read_base {
>> * @cs_was_changed_seq: The sequence number of clocksource change events
>> * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
>> * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
>> + * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
>> * @cycle_interval: Number of clock cycles in one NTP interval
>> * @xtime_interval: Number of clock shifted nano seconds in one NTP
>> * interval.
>> @@ -84,6 +85,9 @@ struct tk_read_base {
>> *
>> * wall_to_monotonic is no longer the boot time, getboottime must be
>> * used instead.
>> + *
>> + * @monotonic_to_boottime is a timespec64 representation of @offs_boot to
>> + * accelerate the VDSO update for CLOCK_BOOTTIME.
>> */
>> struct timekeeper {
>> struct tk_read_base tkr_mono;
>> @@ -99,6 +103,7 @@ struct timekeeper {
>> u8 cs_was_changed_seq;
>> ktime_t next_leap_ktime;
>> u64 raw_sec;
>> + struct timespec64 monotonic_to_boot;
>>
>> /* The following members are for timekeeping internal use */
>> u64 cycle_interval;
>> --- a/kernel/time/timekeeping.c
>> +++ b/kernel/time/timekeeping.c
>> @@ -146,6 +146,11 @@ static void tk_set_wall_to_mono(struct t
>> static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
>> {
>> tk->offs_boot = ktime_add(tk->offs_boot, delta);
>> + /*
>> + * Timespec representation for VDSO update to avoid 64bit division
>> + * on every update.
>> + */
>> + tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
>> }
>>
>> /*
>> --- a/kernel/time/vsyscall.c
>> +++ b/kernel/time/vsyscall.c
>> @@ -17,7 +17,7 @@ static inline void update_vdso_data(stru
>> struct timekeeper *tk)
>> {
>> struct vdso_timestamp *vdso_ts;
>> - u64 nsec;
>> + u64 nsec, sec;
>>
>> vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last;
>> vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask;
>> @@ -45,23 +45,27 @@ static inline void update_vdso_data(stru
>> }
>> vdso_ts->nsec = nsec;
>>
>> - /* CLOCK_MONOTONIC_RAW */
>> - vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
>> - vdso_ts->sec = tk->raw_sec;
>> - vdso_ts->nsec = tk->tkr_raw.xtime_nsec;
>> + /* Copy MONOTONIC time for BOOTTIME */
>> + sec = vdso_ts->sec;
>> + /* Add the boot offset */
>> + sec += tk->monotonic_to_boot.tv_sec;
>> + nsec += (u64)tk->monotonic_to_boot.tv_nsec << tk->tkr_mono.shift;
>>
>> /* CLOCK_BOOTTIME */
>> vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME];
>> - vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
>> - nsec = tk->tkr_mono.xtime_nsec;
>> - nsec += ((u64)(tk->wall_to_monotonic.tv_nsec +
>> - ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift);
>> + vdso_ts->sec = sec;
>> +
>> while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
>> nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
>> vdso_ts->sec++;
>> }
>> vdso_ts->nsec = nsec;
>>
>> + /* CLOCK_MONOTONIC_RAW */
>> + vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
>> + vdso_ts->sec = tk->raw_sec;
>> + vdso_ts->nsec = tk->tkr_raw.xtime_nsec;
>> +
>> /* CLOCK_TAI */
>> vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI];
>> vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset;
>>

--
Regards,
Vincenzo

2019-08-23 09:14:45

by tip-bot2 for Jacob Pan

[permalink] [raw]
Subject: [tip: timers/urgent] timekeeping/vsyscall: Prevent math overflow in BOOTTIME update

The following commit has been merged into the timers/urgent branch of tip:

Commit-ID: b99328a60a482108f5195b4d611f90992ca016ba
Gitweb: https://git.kernel.org/tip/b99328a60a482108f5195b4d611f90992ca016ba
Author: Thomas Gleixner <[email protected]>
AuthorDate: Thu, 22 Aug 2019 13:00:15 +02:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Fri, 23 Aug 2019 02:12:11 +02:00

timekeeping/vsyscall: Prevent math overflow in BOOTTIME update

The VDSO update for CLOCK_BOOTTIME has a overflow issue as it shifts the
nanoseconds based boot time offset left by the clocksource shift. That
overflows once the boot time offset becomes large enough. As a consequence
CLOCK_BOOTTIME in the VDSO becomes a random number causing applications to
misbehave.

Fix it by storing a timespec64 representation of the offset when boot time
is adjusted and add that to the MONOTONIC base time value in the vdso data
page. Using the timespec64 representation avoids a 64bit division in the
update code.

Fixes: 44f57d788e7d ("timekeeping: Provide a generic update_vsyscall() implementation")
Reported-by: Chris Clayton <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Chris Clayton <[email protected]>
Tested-by: Vincenzo Frascino <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]

---
include/linux/timekeeper_internal.h | 5 -----
kernel/time/timekeeping.c | 5 -----
kernel/time/vsyscall.c | 22 +++++++++-------------
3 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 84ff284..7acb953 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -57,7 +57,6 @@ struct tk_read_base {
* @cs_was_changed_seq: The sequence number of clocksource change events
* @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
* @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
- * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
* @cycle_interval: Number of clock cycles in one NTP interval
* @xtime_interval: Number of clock shifted nano seconds in one NTP
* interval.
@@ -85,9 +84,6 @@ struct tk_read_base {
*
* wall_to_monotonic is no longer the boot time, getboottime must be
* used instead.
- *
- * @monotonic_to_boottime is a timespec64 representation of @offs_boot to
- * accelerate the VDSO update for CLOCK_BOOTTIME.
*/
struct timekeeper {
struct tk_read_base tkr_mono;
@@ -103,7 +99,6 @@ struct timekeeper {
u8 cs_was_changed_seq;
ktime_t next_leap_ktime;
u64 raw_sec;
- struct timespec64 monotonic_to_boot;

/* The following members are for timekeeping internal use */
u64 cycle_interval;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index ca69290..d911c84 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -146,11 +146,6 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
{
tk->offs_boot = ktime_add(tk->offs_boot, delta);
- /*
- * Timespec representation for VDSO update to avoid 64bit division
- * on every update.
- */
- tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
}

/*
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c
index 4bc37ac..8cf3596 100644
--- a/kernel/time/vsyscall.c
+++ b/kernel/time/vsyscall.c
@@ -17,7 +17,7 @@ static inline void update_vdso_data(struct vdso_data *vdata,
struct timekeeper *tk)
{
struct vdso_timestamp *vdso_ts;
- u64 nsec, sec;
+ u64 nsec;

vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last;
vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask;
@@ -45,27 +45,23 @@ static inline void update_vdso_data(struct vdso_data *vdata,
}
vdso_ts->nsec = nsec;

- /* Copy MONOTONIC time for BOOTTIME */
- sec = vdso_ts->sec;
- /* Add the boot offset */
- sec += tk->monotonic_to_boot.tv_sec;
- nsec += (u64)tk->monotonic_to_boot.tv_nsec << tk->tkr_mono.shift;
+ /* CLOCK_MONOTONIC_RAW */
+ vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
+ vdso_ts->sec = tk->raw_sec;
+ vdso_ts->nsec = tk->tkr_raw.xtime_nsec;

/* CLOCK_BOOTTIME */
vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME];
- vdso_ts->sec = sec;
-
+ vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+ nsec = tk->tkr_mono.xtime_nsec;
+ nsec += ((u64)(tk->wall_to_monotonic.tv_nsec +
+ ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift);
while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
vdso_ts->sec++;
}
vdso_ts->nsec = nsec;

- /* CLOCK_MONOTONIC_RAW */
- vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
- vdso_ts->sec = tk->raw_sec;
- vdso_ts->nsec = tk->tkr_raw.xtime_nsec;
-
/* CLOCK_TAI */
vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI];
vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset;