LinuxLists.cc - [PATCH RFC clocksource] Do not mark clocks unstable due to delays

2021-01-06 00:51:24

Subject: [PATCH RFC clocksource] Do not mark clocks unstable due to delays

2021-01-06 00:51:31

Subject: [PATCH RFC clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

From: "Paul E. McKenney" <[email protected]>

Some sorts of per-CPU clock sources have a history of going out of
synchronization with each other. However, this problem has purportedy
been solved in the past ten years. Except that it is all too possible
that the problem has instead simply been made less likely, which might
mean that some of the occasional "Marking clocksource 'tsc' as unstable"
messages might be due to desynchronization. How would anyone know?

This commit therefore adds CPU-to-CPU synchronization checking
for newly unstable clocksource that are marked with the new
CLOCK_SOURCE_VERIFY_PERCPU flag. Lists of desynchronized CPUs are
printed, with the caveat that if it is the reporting CPU that is itself
desynchronized, it will appear that all the other clocks are wrong.
Just like in real life.

Cc: John Stultz <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Stephen Boyd <[email protected]>
Cc: Jonathan Corbet <[email protected]>
Cc: Mark Rutland <[email protected]>
Cc: Marc Zyngier <[email protected]>
Reported-by: Chris Mason <[email protected]>
[ paulmck: Add "static" to clocksource_verify_one_cpu() per kernel test robot feedback. ]
Signed-off-by: Paul E. McKenney <[email protected]>
---
arch/x86/kernel/kvmclock.c | 2 +-
arch/x86/kernel/tsc.c | 3 +-
include/linux/clocksource.h | 2 +-
kernel/time/clocksource.c | 73 +++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index aa59374..337bb2c 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -169,7 +169,7 @@ struct clocksource kvm_clock = {
.read = kvm_clock_get_cycles,
.rating = 400,
.mask = CLOCKSOURCE_MASK(64),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_VERIFY_PERCPU,
.enable = kvm_cs_enable,
};
EXPORT_SYMBOL_GPL(kvm_clock);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index f70dffc..5628917 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1151,7 +1151,8 @@ static struct clocksource clocksource_tsc = {
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
CLOCK_SOURCE_VALID_FOR_HRES |
- CLOCK_SOURCE_MUST_VERIFY,
+ CLOCK_SOURCE_MUST_VERIFY |
+ CLOCK_SOURCE_VERIFY_PERCPU,
.vdso_clock_mode = VDSO_CLOCKMODE_TSC,
.enable = tsc_cs_enable,
.resume = tsc_resume,
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 86d143d..83a3ebf 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -131,7 +131,7 @@ struct clocksource {
#define CLOCK_SOURCE_UNSTABLE 0x40
#define CLOCK_SOURCE_SUSPEND_NONSTOP 0x80
#define CLOCK_SOURCE_RESELECT 0x100
-
+#define CLOCK_SOURCE_VERIFY_PERCPU 0x200
/* simplify initialization of mask field */
#define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 4663b86..23bcefe 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -211,6 +211,78 @@ static void clocksource_watchdog_inject_delay(void)
WARN_ON_ONCE(injectfail < 0);
}

+static struct clocksource *clocksource_verify_work_cs;
+static DEFINE_PER_CPU(u64, csnow_mid);
+static cpumask_t cpus_ahead;
+static cpumask_t cpus_behind;
+
+static void clocksource_verify_one_cpu(void *csin)
+{
+ struct clocksource *cs = (struct clocksource *)csin;
+
+ __this_cpu_write(csnow_mid, cs->read(cs));
+}
+
+static void clocksource_verify_percpu_wq(struct work_struct *unused)
+{
+ int cpu;
+ struct clocksource *cs;
+ int64_t cs_nsec;
+ u64 csnow_begin;
+ u64 csnow_end;
+ u64 delta;
+
+ cs = smp_load_acquire(&clocksource_verify_work_cs); // pairs with release
+ if (WARN_ON_ONCE(!cs))
+ return;
+ pr_warn("Checking clocksource %s synchronization from CPU %d.\n",
+ cs->name, smp_processor_id());
+ cpumask_clear(&cpus_ahead);
+ cpumask_clear(&cpus_behind);
+ csnow_begin = cs->read(cs);
+ smp_call_function(clocksource_verify_one_cpu, cs, 1);
+ csnow_end = cs->read(cs);
+ for_each_online_cpu(cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ delta = (per_cpu(csnow_mid, cpu) - csnow_begin) & cs->mask;
+ if ((s64)delta < 0)
+ cpumask_set_cpu(cpu, &cpus_behind);
+ delta = (csnow_end - per_cpu(csnow_mid, cpu)) & cs->mask;
+ if ((s64)delta < 0)
+ cpumask_set_cpu(cpu, &cpus_ahead);
+ }
+ if (!cpumask_empty(&cpus_ahead))
+ pr_warn(" CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
+ cpumask_pr_args(&cpus_ahead),
+ smp_processor_id(), cs->name);
+ if (!cpumask_empty(&cpus_behind))
+ pr_warn(" CPUs %*pbl behind CPU %d for clocksource %s.\n",
+ cpumask_pr_args(&cpus_behind),
+ smp_processor_id(), cs->name);
+ if (!cpumask_empty(&cpus_ahead) || !cpumask_empty(&cpus_behind)) {
+ delta = clocksource_delta(csnow_end, csnow_begin, cs->mask);
+ cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+ pr_warn(" CPU %d duration %lldns for clocksource %s.\n",
+ smp_processor_id(), cs_nsec, cs->name);
+ }
+ smp_store_release(&clocksource_verify_work_cs, NULL); // pairs with acquire.
+}
+
+static DECLARE_WORK(clocksource_verify_work, clocksource_verify_percpu_wq);
+
+static void clocksource_verify_percpu(struct clocksource *cs)
+{
+ if (!(cs->flags & CLOCK_SOURCE_VERIFY_PERCPU))
+ return;
+ if (smp_load_acquire(&clocksource_verify_work_cs)) { // pairs with release.
+ pr_warn("Previous clocksource synchronization still in flight.\n");
+ return;
+ }
+ smp_store_release(&clocksource_verify_work_cs, cs); //pairs with acquire.
+ queue_work(system_highpri_wq, &clocksource_verify_work);
+}
+
static void clocksource_watchdog(struct timer_list *unused)
{
struct clocksource *cs;
@@ -284,6 +356,7 @@ static void clocksource_watchdog(struct timer_list *unused)
watchdog->name, wdnow, wdlast, watchdog->mask);
pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
cs->name, csnow, cslast, cs->mask);
+ clocksource_verify_percpu(cs);
__clocksource_unstable(cs);
continue;
}
--
2.9.5

2021-01-06 00:51:34

Subject: [PATCH RFC clocksource] Do not mark clocks unstable due to delays

Subject: [PATCH RFC clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: [PATCH RFC clocksource 4/5] clocksource: Provide a module parameter to fuzz per-CPU clock checking

Subject: [PATCH RFC clocksource 1/5] clocksource: Provide module parameters to inject delays in watchdog

Subject: [PATCH RFC clocksource 5/5] clocksource: Do pairwise clock-desynchronization checking

Subject: [PATCH RFC clocksource 2/5] clocksource: Retry clock read if long delays detected

Subject: Re: [PATCH RFC clocksource 2/5] clocksource: Retry clock read if long delays detected

Subject: Re: [PATCH RFC clocksource 2/5] clocksource: Retry clock read if long delays detected

Subject: Re: [PATCH RFC clocksource 2/5] clocksource: Retry clock read if long delays detected

Subject: [PATCH v2 clocksource] Do not mark clocks unstable due to delays

Subject: [PATCH v2 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: [PATCH v2 clocksource 4/5] clocksource: Provide a module parameter to fuzz per-CPU clock checking

Subject: [PATCH v2 clocksource 2/5] clocksource: Retry clock read if long delays detected

Subject: [PATCH v2 clocksource 5/5] clocksource: Do pairwise clock-desynchronization checking

Subject: [PATCH v2 clocksource 1/5] clocksource: Provide module parameters to inject delays in watchdog

Subject: [PATCH v3 clocksource] Do not mark clocks unstable due to delays

Subject: [PATCH clocksource 1/5] clocksource: Provide module parameters to inject delays in watchdog

Subject: [PATCH clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: [PATCH clocksource 4/5] clocksource: Provide a module parameter to fuzz per-CPU clock checking

Subject: [PATCH clocksource 5/5] clocksource: Do pairwise clock-desynchronization checking

Subject: [PATCH clocksource 2/5] clocksource: Retry clock read if long delays detected

Subject: Re: [PATCH clocksource 4/5] clocksource: Provide a module parameter to fuzz per-CPU clock checking

Subject: Re: [PATCH clocksource 4/5] clocksource: Provide a module parameter to fuzz per-CPU clock checking

Subject: Re: [PATCH clocksource 4/5] clocksource: Provide a module parameter to fuzz per-CPU clock checking

Subject: Re: [PATCH clocksource 4/5] clocksource: Provide a module parameter to fuzz per-CPU clock checking

Subject: Re: [PATCH v3 clocksource] Do not mark clocks unstable due to delays

Subject: [PATCH clocksource 1/5] clocksource: Provide module parameters to inject delays in watchdog

Subject: [PATCH clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: [PATCH clocksource 4/5] clocksource: Provide a module parameter to fuzz per-CPU clock checking

Subject: [PATCH clocksource 5/5] clocksource: Do pairwise clock-desynchronization checking

Subject: [PATCH clocksource 2/5] clocksource: Retry clock read if long delays detected

Subject: [PATCH v5 clocksource] Do not mark clocks unstable due to delays for v5.13

Subject: [PATCH kernel/time 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: [PATCH kernel/time 1/5] clocksource: Provide module parameters to inject delays in watchdog

Subject: [PATCH kernel/time 5/5] clocksource: Do pairwise clock-desynchronization checking

Subject: [PATCH kernel/time 4/5] clocksource: Provide a module parameter to fuzz per-CPU clock checking

Subject: [PATCH kernel/time 2/5] clocksource: Retry clock read if long delays detected

Subject: Re: [PATCH v5 clocksource] Do not mark clocks unstable due to delays for v5.13

Subject: [PATCH v6 clocksource] Do not mark clocks unstable dueclocksource: Retry clock read if long delays detected

Subject: [PATCH v6 clocksource] Do not mark clocks unstable dueclocksource: Provide module parameters to inject delays in watchdog

Subject: [PATCH v6 clocksource] Do not mark clocks unstable dueclocksource: Provide a module parameter to fuzz per-CPU clock checking

Subject: [PATCH v6 clocksource] Do not mark clocks unstable dueclocksource: Do pairwise clock-desynchronization checking

Subject: [PATCH v6 clocksource] Do not mark clocks unstable dueclocksource: Check per-CPU clock synchronization when marked unstable

Subject: Re: [PATCH v6 clocksource] Do not mark clocks unstable dueclocksource: Provide module parameters to inject delays in watchdog

Subject: Re: [PATCH v6 clocksource] Do not mark clocks unstable dueclocksource: Provide module parameters to inject delays in watchdog

Subject: [PATCH v7 clocksource] Do not mark clocks unstable due to delays for v5.13

Subject: [PATCH v7 clocksource 2/5] clocksource: Retry clock read if long delays detected

Subject: [PATCH v7 clocksource 1/5] clocksource: Provide module parameters to inject delays in watchdog

Subject: [PATCH v7 clocksource 4/5] clocksource: Provide a module parameter to fuzz per-CPU clock checking

Subject: [PATCH v7 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: [PATCH v7 clocksource 5/5] clocksource: Do pairwise clock-desynchronization checking

Subject: Re: [PATCH v7 clocksource] Do not mark clocks unstable due to delays for v5.13

Subject: Re: [PATCH v7 clocksource 2/5] clocksource: Retry clock read if long delays detected

Subject: Re: [PATCH v7 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: Re: [PATCH v7 clocksource 5/5] clocksource: Do pairwise clock-desynchronization checking

Subject: Re: [PATCH v7 clocksource] Do not mark clocks unstable due to delays for v5.13

Subject: Re: [PATCH v7 clocksource 2/5] clocksource: Retry clock read if long delays detected

Subject: Re: [PATCH v7 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: Re: [PATCH v7 clocksource 5/5] clocksource: Do pairwise clock-desynchronization checking

Subject: Re: [PATCH v7 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: Re: [PATCH v7 clocksource] Do not mark clocks unstable due to delays for v5.13

Subject: Re: [PATCH v7 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: Re: [PATCH v7 clocksource] Do not mark clocks unstable due to delays for v5.13

Subject: Re: [PATCH v7 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: Re: [PATCH v7 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: Re: [PATCH v7 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: Re: [PATCH v7 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: Re: [PATCH v7 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: Re: [PATCH v7 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: Re: [PATCH v7 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: Re: [PATCH v7 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable

Subject: Re: [PATCH v7 clocksource 3/5] clocksource: Check per-CPU clock synchronization when marked unstable