2017-08-25 13:40:54

by Chris Wilson

[permalink] [raw]
Subject: Re: [RFC][PATCH 4/4] time: Clean up CLOCK_MONOTONIC_RAW time handling

Quoting John Stultz (2017-05-27 04:33:55)
> Now that we fixed the sub-ns handling for CLOCK_MONOTONIC_RAW,
> remove the duplicitive tk->raw_time.tv_nsec, which can be
> stored in tk->tkr_raw.xtime_nsec (similarly to how its handled
> for monotonic time).

This patch breaks tkr_raw pretty badly. It is now accumulating 2x faster
than tkr_mono, with the occasional wacky jump between two reads.

e.g:

@@ -838,6 +840,11 @@ ktime_t ktime_get_raw(void)

} while (read_seqcount_retry(&tk_core.seq, seq));

+ pr_err("%s: raw.base=%llu, mono.base=%llu: nsecs=%llu, mono.nsecs=%llu\n",
+ __func__,
+ ktime_to_ns(base), ktime_to_ns(tk->tkr_mono.base),
+ nsecs, timekeeping_get_ns(&tk->tkr_mono));
+

gave
ktime_get_raw: raw.base=40255680121, mono.base=39940532364: nsecs=261321742, mono.nsecs=318630514
ktime_get_raw: raw.base=40339680124, mono.base=39940532364: nsecs=345836800, mono.nsecs=403109209

https://bugs.freedesktop.org/show_bug.cgi?id=102336

The patch still reverts cleanly and aiui was not part of the bugfixes,
but a cleanup afterwards; so can be reapplied at leisure.

> Cc: Thomas Gleixner <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Miroslav Lichvar <[email protected]>
> Cc: Richard Cochran <[email protected]>
> Cc: Prarit Bhargava <[email protected]>
> Cc: Stephen Boyd <[email protected]>
> Cc: Kevin Brodsky <[email protected]>
> Cc: Will Deacon <[email protected]>
> Cc: Daniel Mentz <[email protected]>
> Signed-off-by: John Stultz <[email protected]>
> ---
> arch/arm64/kernel/vdso.c | 6 ++---
> include/linux/timekeeper_internal.h | 4 ++--
> kernel/time/timekeeping.c | 47 ++++++++++++++++++++-----------------
> 3 files changed, 29 insertions(+), 28 deletions(-)
>
> diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
> index d0cb007..7492d90 100644
> --- a/arch/arm64/kernel/vdso.c
> +++ b/arch/arm64/kernel/vdso.c
> @@ -220,10 +220,8 @@ void update_vsyscall(struct timekeeper *tk)
> if (!use_syscall) {
> /* tkr_mono.cycle_last == tkr_raw.cycle_last */
> vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
> - vdso_data->raw_time_sec = tk->raw_time.tv_sec;
> - vdso_data->raw_time_nsec = (tk->raw_time.tv_nsec <<
> - tk->tkr_raw.shift) +
> - tk->tkr_raw.xtime_nsec;
> + vdso_data->raw_time_sec = tk->raw_sec;
> + vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
> vdso_data->xtime_clock_sec = tk->xtime_sec;
> vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
> vdso_data->cs_mono_mult = tk->tkr_mono.mult;
> diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
> index 528cc86..8abf6df 100644
> --- a/include/linux/timekeeper_internal.h
> +++ b/include/linux/timekeeper_internal.h
> @@ -52,7 +52,7 @@ struct tk_read_base {
> * @clock_was_set_seq: The sequence number of clock was set events
> * @cs_was_changed_seq: The sequence number of clocksource change events
> * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
> - * @raw_time: Monotonic raw base time in timespec64 format
> + * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
> * @cycle_interval: Number of clock cycles in one NTP interval
> * @xtime_interval: Number of clock shifted nano seconds in one NTP
> * interval.
> @@ -94,7 +94,7 @@ struct timekeeper {
> unsigned int clock_was_set_seq;
> u8 cs_was_changed_seq;
> ktime_t next_leap_ktime;
> - struct timespec64 raw_time;
> + u64 raw_sec;
>
> /* The following members are for timekeeping internal use */
> u64 cycle_interval;
> diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
> index 35d3ba3..0c3b8c1 100644
> --- a/kernel/time/timekeeping.c
> +++ b/kernel/time/timekeeping.c
> @@ -72,6 +72,10 @@ static inline void tk_normalize_xtime(struct timekeeper *tk)
> tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
> tk->xtime_sec++;
> }
> + while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) {
> + tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
> + tk->raw_sec++;
> + }
> }
>
> static inline struct timespec64 tk_xtime(struct timekeeper *tk)
> @@ -287,12 +291,14 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
> /* if changing clocks, convert xtime_nsec shift units */
> if (old_clock) {
> int shift_change = clock->shift - old_clock->shift;
> - if (shift_change < 0)
> + if (shift_change < 0) {
> tk->tkr_mono.xtime_nsec >>= -shift_change;
> - else
> + tk->tkr_raw.xtime_nsec >>= -shift_change;
> + } else {
> tk->tkr_mono.xtime_nsec <<= shift_change;
> + tk->tkr_raw.xtime_nsec <<= shift_change;
> + }
> }
> - tk->tkr_raw.xtime_nsec = 0;
>
> tk->tkr_mono.shift = clock->shift;
> tk->tkr_raw.shift = clock->shift;
> @@ -617,9 +623,6 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
> nsec = (u32) tk->wall_to_monotonic.tv_nsec;
> tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
>
> - /* Update the monotonic raw base */
> - tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
> -
> /*
> * The sum of the nanoseconds portions of xtime and
> * wall_to_monotonic can be greater/equal one second. Take
> @@ -629,6 +632,11 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
> if (nsec >= NSEC_PER_SEC)
> seconds++;
> tk->ktime_sec = seconds;
> +
> + /* Update the monotonic raw base */
> + seconds = tk->raw_sec;
> + nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift);
> + tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
> }
>
> /* must hold timekeeper_lock */
> @@ -670,7 +678,6 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
> static void timekeeping_forward_now(struct timekeeper *tk)
> {
> u64 cycle_now, delta;
> - u64 nsec;
>
> cycle_now = tk_clock_read(&tk->tkr_mono);
> delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
> @@ -682,10 +689,13 @@ static void timekeeping_forward_now(struct timekeeper *tk)
> /* If arch requires, add in get_arch_timeoffset() */
> tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
>
> - tk_normalize_xtime(tk);
>
> - nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
> - timespec64_add_ns(&tk->raw_time, nsec);
> + tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult;
> +
> + /* If arch requires, add in get_arch_timeoffset() */
> + tk->tkr_raw.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_raw.shift;
> +
> + tk_normalize_xtime(tk);
> }
>
> /**
> @@ -1371,19 +1381,18 @@ int timekeeping_notify(struct clocksource *clock)
> void getrawmonotonic64(struct timespec64 *ts)
> {
> struct timekeeper *tk = &tk_core.timekeeper;
> - struct timespec64 ts64;
> unsigned long seq;
> u64 nsecs;
>
> do {
> seq = read_seqcount_begin(&tk_core.seq);
> + ts->tv_sec = tk->raw_sec;
> nsecs = timekeeping_get_ns(&tk->tkr_raw);
> - ts64 = tk->raw_time;
>
> } while (read_seqcount_retry(&tk_core.seq, seq));
>
> - timespec64_add_ns(&ts64, nsecs);
> - *ts = ts64;
> + ts->tv_nsec = 0;
> + timespec64_add_ns(ts, nsecs);
> }
> EXPORT_SYMBOL(getrawmonotonic64);
>
> @@ -1507,8 +1516,7 @@ void __init timekeeping_init(void)
> tk_setup_internals(tk, clock);
>
> tk_set_xtime(tk, &now);
> - tk->raw_time.tv_sec = 0;
> - tk->raw_time.tv_nsec = 0;
> + tk->raw_sec = 0;
> if (boot.tv_sec == 0 && boot.tv_nsec == 0)
> boot = tk_xtime(tk);
>
> @@ -2009,17 +2017,12 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
> *clock_set |= accumulate_nsecs_to_secs(tk);
>
> /* Accumulate raw time */
> - tk->tkr_raw.xtime_nsec += (u64)tk->raw_time.tv_nsec
> - << tk->tkr_raw.shift;
> tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
> nsecps = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
> while (tk->tkr_raw.xtime_nsec >= nsecps) {
> tk->tkr_raw.xtime_nsec -= nsecps;
> - tk->raw_time.tv_sec++;
> + tk->raw_sec++;
> }
> - tk->raw_time.tv_nsec = tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift;
> - tk->tkr_raw.xtime_nsec -= (u64)tk->raw_time.tv_nsec
> - << tk->tkr_raw.shift;
>
> /* Accumulate error between NTP and clock interval */
> tk->ntp_error += tk->ntp_tick << shift;
> --
> 2.7.4
>


2017-08-25 18:55:22

by John Stultz

[permalink] [raw]
Subject: Re: [RFC][PATCH 4/4] time: Clean up CLOCK_MONOTONIC_RAW time handling

On Fri, Aug 25, 2017 at 6:40 AM, Chris Wilson <[email protected]> wrote:
> Quoting John Stultz (2017-05-27 04:33:55)
>> Now that we fixed the sub-ns handling for CLOCK_MONOTONIC_RAW,
>> remove the duplicitive tk->raw_time.tv_nsec, which can be
>> stored in tk->tkr_raw.xtime_nsec (similarly to how its handled
>> for monotonic time).
>
> This patch breaks tkr_raw pretty badly. It is now accumulating 2x faster
> than tkr_mono, with the occasional wacky jump between two reads.
>
> e.g:
>
> @@ -838,6 +840,11 @@ ktime_t ktime_get_raw(void)
>
> } while (read_seqcount_retry(&tk_core.seq, seq));
>
> + pr_err("%s: raw.base=%llu, mono.base=%llu: nsecs=%llu, mono.nsecs=%llu\n",
> + __func__,
> + ktime_to_ns(base), ktime_to_ns(tk->tkr_mono.base),
> + nsecs, timekeeping_get_ns(&tk->tkr_mono));
> +
>
> gave
> ktime_get_raw: raw.base=40255680121, mono.base=39940532364: nsecs=261321742, mono.nsecs=318630514
> ktime_get_raw: raw.base=40339680124, mono.base=39940532364: nsecs=345836800, mono.nsecs=403109209
>
> https://bugs.freedesktop.org/show_bug.cgi?id=102336
>
> The patch still reverts cleanly and aiui was not part of the bugfixes,
> but a cleanup afterwards; so can be reapplied at leisure.

Thanks for the bug report!

Its odd, as I'm not seeing such behavior in my testing (using the
raw_skew or change_skew tests in kselftest/timers). Can you try
running those tests to see if they fail on your hardware?

>From the bug report, it says it BYT specific, but I'm not sure what
that means. Are there any hardware specific details you can share?
What clocksource is being used? Can you send a dmesg log?

I'll look over the code again to see if I can catch anything by
review. Worse case if we can't get any traction on this in a day or so
I'll submit a revert.

thanks
-john

2017-08-25 21:17:00

by John Stultz

[permalink] [raw]
Subject: Re: [RFC][PATCH 4/4] time: Clean up CLOCK_MONOTONIC_RAW time handling

On Fri, Aug 25, 2017 at 11:55 AM, John Stultz <[email protected]> wrote:
> I'll look over the code again to see if I can catch anything by
> review. Worse case if we can't get any traction on this in a day or so
> I'll submit a revert.

I think I found the issue. In tk_update_ktime_data() I add the raw_sec
and shifted down tk->tkr_raw.xtime_nsec to the base. But we already
add the tk->tkr_raw.xtime_nsec to the offset and shift it all down in
the timekeeping_delta_to_ns called from ktime_get_raw, so we
effectively are accumulating the nsecs portion faster then we should.

This only crops up for internal ktime_get_raw() users, but not
getrawmonotonic64() which uses the timespec generation rather then the
ktime method, which is why this wasn't seen by userspace time tests.

I'll send a patch for testing shortly.

thanks
-john

2017-08-25 22:57:12

by John Stultz

[permalink] [raw]
Subject: [RFC][PATCH] time: Fix ktime_get_raw() issues caused by incorrect base accumulation

In commit fc6eead7c1e2 ("time: Clean up CLOCK_MONOTONIC_RAW time
handling"), I mistakenly added the following:

/* Update the monotonic raw base */
seconds = tk->raw_sec;
nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift);
tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);

Which adds the raw_sec value and the shifted down raw xtime_nsec
to the base value.

This is problematic as when calling ktime_get_raw(), we add the
tk->tkr_raw.xtime_nsec and current offset, shift it down and add
it to the raw base.

This results in the shifted down tk->tkr_raw.xtime_nsec being
added twice.

My mistake, was that I was matching the monotonic base logic
above:

seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
nsec = (u32) tk->wall_to_monotonic.tv_nsec;
tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);

Which adds the wall_to_monotonic.tv_nsec value, but not the
tk->tkr_mono.xtime_nsec value to the base.

The result of this is that ktime_get_raw() users (which are all
internal users) see the raw time move faster then it should
(the rate at which can vary with the current size of
tkr_raw.xtime_nsec), which has resulted in at least problems
with graphics rendering performance.

To fix this, we simplify the tkr_raw.base accumulation to only
accumulate the raw_sec portion, and do not include the
tkr_raw.xtime_nsec portion, which will be added at read time.

Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Miroslav Lichvar <[email protected]>
Cc: Richard Cochran <[email protected]>
Cc: Prarit Bhargava <[email protected]>
Cc: Stephen Boyd <[email protected]>
Cc: Kevin Brodsky <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Daniel Mentz <[email protected]>
Cc: Chris Wilson <[email protected]>
Reported-by: Chris Wilson <[email protected]>
Signed-off-by: John Stultz <[email protected]>
---
kernel/time/timekeeping.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index cedafa0..7e7e61c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -637,9 +637,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
tk->ktime_sec = seconds;

/* Update the monotonic raw base */
- seconds = tk->raw_sec;
- nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift);
- tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
+ tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
}

/* must hold timekeeper_lock */
--
2.7.4

2017-08-26 10:21:23

by Chris Wilson

[permalink] [raw]
Subject: Re: [RFC][PATCH] time: Fix ktime_get_raw() issues caused by incorrect base accumulation

Quoting John Stultz (2017-08-25 23:57:04)
> In commit fc6eead7c1e2 ("time: Clean up CLOCK_MONOTONIC_RAW time
> handling"), I mistakenly added the following:
>
> /* Update the monotonic raw base */
> seconds = tk->raw_sec;
> nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift);
> tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
>
> Which adds the raw_sec value and the shifted down raw xtime_nsec
> to the base value.
>
> This is problematic as when calling ktime_get_raw(), we add the
> tk->tkr_raw.xtime_nsec and current offset, shift it down and add
> it to the raw base.
>
> This results in the shifted down tk->tkr_raw.xtime_nsec being
> added twice.
>
> My mistake, was that I was matching the monotonic base logic
> above:
>
> seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
> nsec = (u32) tk->wall_to_monotonic.tv_nsec;
> tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
>
> Which adds the wall_to_monotonic.tv_nsec value, but not the
> tk->tkr_mono.xtime_nsec value to the base.
>
> The result of this is that ktime_get_raw() users (which are all
> internal users) see the raw time move faster then it should
> (the rate at which can vary with the current size of
> tkr_raw.xtime_nsec), which has resulted in at least problems
> with graphics rendering performance.
>
> To fix this, we simplify the tkr_raw.base accumulation to only
> accumulate the raw_sec portion, and do not include the
> tkr_raw.xtime_nsec portion, which will be added at read time.

This fixes our testcase of using ktime_get_raw() deltas. Thanks,
Tested-by: Chris Wilson <[email protected]>
-Chris

Subject: [tip:timers/urgent] time: Fix ktime_get_raw() incorrect base accumulation

Commit-ID: 0bcdc0987cce9880436b70836c6a92bb8e744fd1
Gitweb: http://git.kernel.org/tip/0bcdc0987cce9880436b70836c6a92bb8e744fd1
Author: John Stultz <[email protected]>
AuthorDate: Fri, 25 Aug 2017 15:57:04 -0700
Committer: Thomas Gleixner <[email protected]>
CommitDate: Sat, 26 Aug 2017 16:06:12 +0200

time: Fix ktime_get_raw() incorrect base accumulation

In comqit fc6eead7c1e2 ("time: Clean up CLOCK_MONOTONIC_RAW time
handling"), the following code got mistakenly added to the update of the
raw timekeeper:

/* Update the monotonic raw base */
seconds = tk->raw_sec;
nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift);
tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);

Which adds the raw_sec value and the shifted down raw xtime_nsec to the
base value.

But the read function adds the shifted down tk->tkr_raw.xtime_nsec value
another time, The result of this is that ktime_get_raw() users (which are
all internal users) see the raw time move faster then it should (the rate
at which can vary with the current size of tkr_raw.xtime_nsec), which has
resulted in at least problems with graphics rendering performance.

The change tried to match the monotonic base update logic:

seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
nsec = (u32) tk->wall_to_monotonic.tv_nsec;
tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);

Which adds the wall_to_monotonic.tv_nsec value, but not the
tk->tkr_mono.xtime_nsec value to the base.

To fix this, simplify the tkr_raw.base accumulation to only accumulate the
raw_sec portion, and do not include the tkr_raw.xtime_nsec portion, which
will be added at read time.

Fixes: fc6eead7c1e2 ("time: Clean up CLOCK_MONOTONIC_RAW time handling")
Reported-and-tested-by: Chris Wilson <[email protected]>
Signed-off-by: John Stultz <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: Prarit Bhargava <[email protected]>
Cc: Kevin Brodsky <[email protected]>
Cc: Richard Cochran <[email protected]>
Cc: Stephen Boyd <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Miroslav Lichvar <[email protected]>
Cc: Daniel Mentz <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]

---
kernel/time/timekeeping.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index cedafa0..7e7e61c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -637,9 +637,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
tk->ktime_sec = seconds;

/* Update the monotonic raw base */
- seconds = tk->raw_sec;
- nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift);
- tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
+ tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
}

/* must hold timekeeper_lock */