I am planning to add boot clock as a trace clock that can account suspend time
during tracing, however ktime_get_with_offset throws a warning as the
clocksource is attempted to be accessed in suspend.
Currently only fast monotonic and raw timekeepers are halted and the cached
readout and cycles are used to make them return time during suspend. However,
ktime APIs such as ktime_get_with_offset to get the bootclock throw a warning.
Instead of warning, lets use the cached readouts and the last cycles at suspend
to return the time.
Cc: John Stultz <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Rafael J. Wysocki <[email protected]>
Signed-off-by: Joel Fernandes <[email protected]>
---
kernel/time/timekeeping.c | 72 ++++++++++++++++++++++++++++-------------------
1 file changed, 43 insertions(+), 29 deletions(-)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 37dec7e..8ed43ce 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -434,29 +434,37 @@ static cycle_t dummy_clock_read(struct clocksource *cs)
}
/**
- * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
- * @tk: Timekeeper to snapshot.
+ * halt_timekeepers - Prevent timekeepers from accessing clocksource.
*
* It generally is unsafe to access the clocksource after timekeeping has been
* suspended, so take a snapshot of the readout base of @tk and use it as the
* fast timekeeper's readout base while suspended. It will return the same
* number of cycles every time until timekeeping is resumed at which time the
- * proper readout base for the fast timekeeper will be restored automatically.
+ * proper readout base for the timekeeper will be restored automatically.
*/
-static void halt_fast_timekeeper(struct timekeeper *tk)
+
+static struct tk_read_base tkr_dummy_mono;
+static struct tk_read_base tkr_dummy_raw;
+
+static void halt_timekeepers(void)
{
- static struct tk_read_base tkr_dummy;
- struct tk_read_base *tkr = &tk->tkr_mono;
+ struct timekeeper *tk = &tk_core.timekeeper;
- memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
- cycles_at_suspend = tkr->read(tkr->clock);
- tkr_dummy.read = dummy_clock_read;
- update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
+ /*
+ * Save the last readouts of mono and raw and replace their
+ * read functions to return the last cycles at suspend.
+ */
+ cycles_at_suspend = tk->tkr_mono.cycle_last;
+ memcpy(&tkr_dummy_mono, &tk->tkr_mono, sizeof(tkr_dummy_mono));
+ memcpy(&tkr_dummy_raw, &tk->tkr_raw, sizeof(tkr_dummy_mono));
+ tkr_dummy_mono.read = dummy_clock_read;
+ tkr_dummy_raw.read = dummy_clock_read;
- tkr = &tk->tkr_raw;
- memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
- tkr_dummy.read = dummy_clock_read;
- update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
+ /*
+ * Halt the fast and slow timekeepers and use the cached readouts
+ */
+ update_fast_timekeeper(&tkr_dummy_mono, &tk_fast_mono);
+ update_fast_timekeeper(&tkr_dummy_raw, &tk_fast_raw);
}
#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
@@ -689,17 +697,18 @@ EXPORT_SYMBOL(getnstimeofday64);
ktime_t ktime_get(void)
{
- struct timekeeper *tk = &tk_core.timekeeper;
+ struct tk_read_base *tkr;
unsigned int seq;
ktime_t base;
s64 nsecs;
- WARN_ON(timekeeping_suspended);
+ tkr = timekeeping_suspended ?
+ &tkr_dummy_mono : &tk_core.timekeeper.tkr_mono;
do {
seq = read_seqcount_begin(&tk_core.seq);
- base = tk->tkr_mono.base;
- nsecs = timekeeping_get_ns(&tk->tkr_mono);
+ base = tkr->base;
+ nsecs = timekeeping_get_ns(tkr);
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -709,15 +718,16 @@ EXPORT_SYMBOL_GPL(ktime_get);
u32 ktime_get_resolution_ns(void)
{
- struct timekeeper *tk = &tk_core.timekeeper;
+ struct tk_read_base *tkr;
unsigned int seq;
u32 nsecs;
- WARN_ON(timekeeping_suspended);
+ tkr = timekeeping_suspended ?
+ &tkr_dummy_mono : &tk_core.timekeeper.tkr_mono;
do {
seq = read_seqcount_begin(&tk_core.seq);
- nsecs = tk->tkr_mono.mult >> tk->tkr_mono.shift;
+ nsecs = tkr->mult >> tkr->shift;
} while (read_seqcount_retry(&tk_core.seq, seq));
return nsecs;
@@ -732,17 +742,18 @@ static ktime_t *offsets[TK_OFFS_MAX] = {
ktime_t ktime_get_with_offset(enum tk_offsets offs)
{
- struct timekeeper *tk = &tk_core.timekeeper;
+ struct tk_read_base *tkr;
unsigned int seq;
ktime_t base, *offset = offsets[offs];
s64 nsecs;
- WARN_ON(timekeeping_suspended);
+ tkr = timekeeping_suspended ?
+ &tkr_dummy_mono : &tk_core.timekeeper.tkr_mono;
do {
seq = read_seqcount_begin(&tk_core.seq);
- base = ktime_add(tk->tkr_mono.base, *offset);
- nsecs = timekeeping_get_ns(&tk->tkr_mono);
+ base = ktime_add(tkr->base, *offset);
+ nsecs = timekeeping_get_ns(tkr);
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -776,15 +787,18 @@ EXPORT_SYMBOL_GPL(ktime_mono_to_any);
*/
ktime_t ktime_get_raw(void)
{
- struct timekeeper *tk = &tk_core.timekeeper;
+ struct tk_read_base *tkr;
unsigned int seq;
ktime_t base;
s64 nsecs;
+ tkr = timekeeping_suspended ?
+ &tkr_dummy_raw : &tk_core.timekeeper.tkr_raw;
+
do {
seq = read_seqcount_begin(&tk_core.seq);
- base = tk->tkr_raw.base;
- nsecs = timekeeping_get_ns(&tk->tkr_raw);
+ base = tkr->base;
+ nsecs = timekeeping_get_ns(tkr);
} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1734,7 +1748,7 @@ int timekeeping_suspend(void)
}
timekeeping_update(tk, TK_MIRROR);
- halt_fast_timekeeper(tk);
+ halt_timekeepers();
write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
--
2.8.0.rc3.226.g39d4020
On Sat, 19 Nov 2016, Joel Fernandes wrote:
> I am planning to add boot clock as a trace clock that can account suspend time
> during tracing, however ktime_get_with_offset throws a warning as the
> clocksource is attempted to be accessed in suspend.
ktime_get_with_offset() cannot be used as trace clock at all because it can
life lock in NMI context. That's why we have ktime_get_mono_fast().
Thanks,
tglx
Hi Thomas,
On 11/20/2016 05:24 AM, Thomas Gleixner wrote:
> On Sat, 19 Nov 2016, Joel Fernandes wrote:
>
>> I am planning to add boot clock as a trace clock that can account suspend time
>> during tracing, however ktime_get_with_offset throws a warning as the
>> clocksource is attempted to be accessed in suspend.
>
> ktime_get_with_offset() cannot be used as trace clock at all because it can
> life lock in NMI context. That's why we have ktime_get_mono_fast().
But ktime_get_mono_fast doesn't account the suspend time, only boot
clock (accessed with ktime_get_with_offset) does and while tracing it is
useful for the trace clock to account suspended time for my usecase.
Instead, would it be ok to introduce a fast boot clock derived from fast
monotonic clock to address the NMI live lock issues you mentioned? Below
is an untested patch just to show the idea. Let me know your suggestions
and Thanks,
Joel
----------8<--------------
From 78c4f89e6f39cdd32e91883f2d2a80c7d97e34cf Mon Sep 17 00:00:00 2001
From: Joel Fernandes <[email protected]>
Date: Sun, 20 Nov 2016 18:58:28 -0800
Subject: [RFC] timekeeping: Add a fast boot clock derived from fast
monotonic clock
Signed-off-by: Joel Fernandes <[email protected]>
---
kernel/time/timekeeping.c | 36 ++++++++++++++++++++++++++++++------
1 file changed, 30 insertions(+), 6 deletions(-)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 37dec7e..41afa1e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -55,6 +55,12 @@ static struct timekeeper shadow_timekeeper;
*/
struct tk_fast {
seqcount_t seq;
+
+ /*
+ * first dimension is based on lower seq bit,
+ * second dimension is for offset type (real, boot, tai)
+ */
+ ktime_t offsets[2][3];
struct tk_read_base base[2];
};
@@ -350,14 +356,20 @@ static void update_fast_timekeeper(struct
tk_read_base *tkr, struct tk_fast *tkf
/* Force readers off to base[1] */
raw_write_seqcount_latch(&tkf->seq);
- /* Update base[0] */
+ /* Update base[0] and offsets*/
memcpy(base, tkr, sizeof(*base));
+ tkf->offsets[0][TK_OFFS_REAL] = tk_core.timekeeper.offs_real;
+ tkf->offsets[0][TK_OFFS_BOOT] = tk_core.timekeeper.offs_boot;
+ tkf->offsets[0][TK_OFFS_TAI] = tk_core.timekeeper.offs_tai;
/* Force readers back to base[0] */
raw_write_seqcount_latch(&tkf->seq);
- /* Update base[1] */
+ /* Update base[1] and offsets*/
memcpy(base + 1, base, sizeof(*base));
+ tkf->offsets[1][TK_OFFS_REAL] = tk_core.timekeeper.offs_real;
+ tkf->offsets[1][TK_OFFS_BOOT] = tk_core.timekeeper.offs_boot;
+ tkf->offsets[1][TK_OFFS_TAI] = tk_core.timekeeper.offs_tai;
}
/**
@@ -392,16 +404,23 @@ static void update_fast_timekeeper(struct
tk_read_base *tkr, struct tk_fast *tkf
* of the following timestamps. Callers need to be aware of that and
* deal with it.
*/
-static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
+static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf, int
offset)
{
struct tk_read_base *tkr;
unsigned int seq;
u64 now;
+ ktime_t *off;
do {
seq = raw_read_seqcount_latch(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
- now = ktime_to_ns(tkr->base);
+
+ if (offset >= 0) {
+ off = tkf->offsets[seq & 0x01];
+ now = ktime_to_ns(ktime_add(tkr->base, off[offset]));
+ } else {
+ now = ktime_to_ns(tkr->base);
+ }
now += timekeeping_delta_to_ns(tkr,
clocksource_delta(
@@ -415,16 +434,21 @@ static __always_inline u64
__ktime_get_fast_ns(struct tk_fast *tkf)
u64 ktime_get_mono_fast_ns(void)
{
- return __ktime_get_fast_ns(&tk_fast_mono);
+ return __ktime_get_fast_ns(&tk_fast_mono, -1);
}
EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
u64 ktime_get_raw_fast_ns(void)
{
- return __ktime_get_fast_ns(&tk_fast_raw);
+ return __ktime_get_fast_ns(&tk_fast_raw, -1);
}
EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
+u64 ktime_get_boot_fast_ns(void)
+{
+ return __ktime_get_fast_ns(&tk_fast_mono, TK_OFFS_BOOT);
+}
+
/* Suspend-time cycles value for halted fast timekeeper. */
static cycle_t cycles_at_suspend;
--
2.8.0.rc3.226.g39d4020
On Sun, 20 Nov 2016, joelaf wrote:
> diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
> index 37dec7e..41afa1e 100644
> --- a/kernel/time/timekeeping.c
> +++ b/kernel/time/timekeeping.c
> @@ -55,6 +55,12 @@ static struct timekeeper shadow_timekeeper;
> */
> struct tk_fast {
> seqcount_t seq;
> +
> + /*
> + * first dimension is based on lower seq bit,
> + * second dimension is for offset type (real, boot, tai)
> + */
> + ktime_t offsets[2][3];
s/3/TK_OFFSET_MAX/ ?
> struct tk_read_base base[2];
The struct is cache line optimized which you wreckage. If at all we can put
the offsets at the end of the struct, but definitely not at the
beginning. clock monotonic is the case we optimize for.
> /**
> @@ -392,16 +404,23 @@ static void update_fast_timekeeper(struct tk_read_base
> *tkr, struct tk_fast *tkf
> * of the following timestamps. Callers need to be aware of that and
> * deal with it.
> */
> -static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
> +static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf, int
> offset)
> {
> struct tk_read_base *tkr;
> unsigned int seq;
> u64 now;
> + ktime_t *off;
>
> do {
> seq = raw_read_seqcount_latch(&tkf->seq);
> tkr = tkf->base + (seq & 0x01);
> - now = ktime_to_ns(tkr->base);
> +
> + if (offset >= 0) {
This surely wants: unlikely() around the condition.
> + off = tkf->offsets[seq & 0x01];
> + now = ktime_to_ns(ktime_add(tkr->base, off[offset]));
> + } else {
> + now = ktime_to_ns(tkr->base);
> + }
Thanks,
tglx