LinuxLists.cc - [PATCH x86/nmi 1/2] x86/nmi: Accumulate NMI-progress evidence in exc

2023-01-05 01:28:03

Subject: [PATCH x86/nmi 1/2] x86/nmi: Accumulate NMI-progress evidence in exc_nmi()

CPUs ignoring NMIs is often a sign of those CPUs going bad, but there
are quite a few other reasons why a CPU might ignore NMIs. Therefore,
accumulate evidence within exc_nmi() as to what might be preventing a
given CPU from responding to an NMI.

Signed-off-by: Paul E. McKenney <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: <[email protected]>
---
arch/x86/kernel/nmi.c | 31 ++++++++++++++++++++++++++++++-
lib/Kconfig.debug | 11 +++++++++++
2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index cec0bfa3bc04f..4f1651dc65b3a 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -69,6 +69,15 @@ struct nmi_stats {
unsigned int unknown;
unsigned int external;
unsigned int swallow;
+ unsigned long recv_jiffies;
+ unsigned long idt_seq;
+ unsigned long idt_nmi_seq;
+ unsigned long idt_ignored;
+ atomic_long_t idt_calls;
+ unsigned long idt_seq_snap;
+ unsigned long idt_nmi_seq_snap;
+ unsigned long idt_ignored_snap;
+ long idt_calls_snap;
};

static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
@@ -479,12 +488,15 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7);
DEFINE_IDTENTRY_RAW(exc_nmi)
{
irqentry_state_t irq_state;
+ struct nmi_stats *nsp = this_cpu_ptr(&nmi_stats);

/*
* Re-enable NMIs right here when running as an SEV-ES guest. This might
* cause nested NMIs, but those can be handled safely.
*/
sev_es_nmi_complete();
+ if (IS_ENABLED(CONFIG_NMI_CHECK_CPU))
+ arch_atomic_long_inc(&nsp->idt_calls);

if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
return;
@@ -495,6 +507,11 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
}
this_cpu_write(nmi_state, NMI_EXECUTING);
this_cpu_write(nmi_cr2, read_cr2());
+ if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
+ WRITE_ONCE(nsp->idt_seq, nsp->idt_seq + 1);
+ WARN_ON_ONCE(!(nsp->idt_seq & 0x1));
+ WRITE_ONCE(nsp->recv_jiffies, jiffies);
+ }
nmi_restart:

/*
@@ -509,8 +526,15 @@ DEFINE_IDTENTRY_RAW(exc_nmi)

inc_irq_stat(__nmi_count);

- if (!ignore_nmis)
+ if (IS_ENABLED(CONFIG_NMI_CHECK_CPU) && ignore_nmis) {
+ WRITE_ONCE(nsp->idt_ignored, nsp->idt_ignored + 1);
+ } else if (!ignore_nmis) {
+ WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
+ WARN_ON_ONCE(!(nsp->idt_nmi_seq & 0x1));
default_do_nmi(regs);
+ WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
+ WARN_ON_ONCE(nsp->idt_nmi_seq & 0x1);
+ }

irqentry_nmi_exit(regs, irq_state);

@@ -525,6 +549,11 @@ DEFINE_IDTENTRY_RAW(exc_nmi)

if (user_mode(regs))
mds_user_clear_cpu_buffers();
+ if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
+ WRITE_ONCE(nsp->idt_seq, nsp->idt_seq + 1);
+ WARN_ON_ONCE(nsp->idt_seq & 0x1);
+ WRITE_ONCE(nsp->recv_jiffies, jiffies);
+ }
}

#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 881c3f84e88a3..dad99197d2695 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1552,6 +1552,17 @@ config TRACE_IRQFLAGS_NMI
depends on TRACE_IRQFLAGS
depends on TRACE_IRQFLAGS_NMI_SUPPORT

+config NMI_CHECK_CPU
+ bool "Debugging for CPUs failing to respond to backtrace requests"
+ depends on DEBUG_KERNEL
+ depends on X86
+ default n
+ help
+ Enables debug prints when a CPU fails to respond to a given
+ backtrace NMI. These prints provide some reasons why a CPU
+ might legitimately be failing to respond, for example, if it
+ is offline of if ignore_nmis is set.
+
config DEBUG_IRQFLAGS
bool "Debug IRQ flag manipulation"
help
--
2.31.1.189.g2e36527f23

2023-01-05 01:46:28

by Paul E. McKenney

[permalink] [raw]

Subject: [PATCH x86/nmi 2/2] x86/nmi: Print reasons why backtrace NMIs are ignored

Instrument nmi_trigger_cpumask_backtrace() to dump out diagnostics based
on evidence accumulated by exc_nmi(). These diagnostics are dumped for
CPUs that ignored an NMI backtrace request for more than 10 seconds.

Signed-off-by: Paul E. McKenney <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: <[email protected]>
---
arch/x86/kernel/nmi.c | 73 +++++++++++++++++++++++++++++++++++++++++++
include/linux/nmi.h | 8 +++++
lib/nmi_backtrace.c | 2 ++
3 files changed, 83 insertions(+)

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 4f1651dc65b3a..4a58ea6a806e1 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -566,6 +566,79 @@ DEFINE_IDTENTRY_RAW(exc_nmi_noist)
EXPORT_SYMBOL_GPL(asm_exc_nmi_noist);
#endif

+#ifdef CONFIG_NMI_CHECK_CPU
+
+static char *nmi_check_stall_msg[] = {
+/* */
+/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler. */
+/* | +------ cpu_is_offline(cpu) */
+/* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */
+/* | | | NMI handler has been invoked. */
+/* | | | */
+/* V V V */
+/* 0 0 0 */ "NMIs are not reaching exc_nmi handler",
+/* 0 0 1 */ "exc_nmi handler is ignoring NMIs",
+/* 0 1 0 */ "CPU is offline and NMIs are not reaching exc_nmi handler",
+/* 0 1 1 */ "CPU is offline and exc_nmi handler is legitimately ignoring NMIs",
+/* 1 0 0 */ "CPU is in exc_nmi handler and no further NMIs are reaching handler",
+/* 1 0 1 */ "CPU is in exc_nmi handler which is legitimately ignoring NMIs",
+/* 1 1 0 */ "CPU is offline in exc_nmi handler and no further NMIs are reaching exc_nmi handler",
+/* 1 1 1 */ "CPU is offline in exc_nmi handler which is legitimately ignoring NMIs",
+};
+
+void nmi_backtrace_stall_snap(const struct cpumask *btp)
+{
+ int cpu;
+ struct nmi_stats *nsp;
+
+ for_each_cpu(cpu, btp) {
+ nsp = per_cpu_ptr(&nmi_stats, cpu);
+ nsp->idt_seq_snap = READ_ONCE(nsp->idt_seq);
+ nsp->idt_nmi_seq_snap = READ_ONCE(nsp->idt_nmi_seq);
+ nsp->idt_ignored_snap = READ_ONCE(nsp->idt_ignored);
+ nsp->idt_calls_snap = atomic_long_read(&nsp->idt_calls);
+ }
+}
+
+void nmi_backtrace_stall_check(const struct cpumask *btp)
+{
+ int cpu;
+ int idx;
+ unsigned long nmi_seq;
+ unsigned long j = jiffies;
+ char *modp;
+ char *msgp;
+ char *msghp;
+ struct nmi_stats *nsp;
+
+ for_each_cpu(cpu, btp) {
+ nsp = per_cpu_ptr(&nmi_stats, cpu);
+ modp = "";
+ msghp = "";
+ nmi_seq = READ_ONCE(nsp->idt_nmi_seq);
+ if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) {
+ msgp = "CPU entered NMI handler function, but has not exited";
+ } else if ((nsp->idt_nmi_seq_snap & 0x1) != (nmi_seq & 0x1)) {
+ msgp = "CPU is handling NMIs";
+ } else {
+ idx = ((nsp->idt_seq_snap & 0x1) << 2) |
+ (cpu_is_offline(cpu) << 1) |
+ (nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls));
+ msgp = nmi_check_stall_msg[idx];
+ if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1))
+ modp = ", but OK because ignore_nmis was set";
+ if (nmi_seq & ~0x1)
+ msghp = " (CPU currently in NMI handler function)";
+ else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
+ msghp = " (CPU exited one NMI handler function)";
+ }
+ pr_alert("%s: CPU %d: %s%s%s, last activity: %lu jiffies ago.\n",
+ __func__, cpu, msgp, modp, msghp, j - READ_ONCE(nsp->recv_jiffies));
+ }
+}
+
+#endif
+
void stop_nmi(void)
{
ignore_nmis++;
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index f700ff2df074e..048c0b9aa623d 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -214,4 +214,12 @@ int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *);
#include <asm/nmi.h>
#endif

+#ifdef CONFIG_NMI_CHECK_CPU
+void nmi_backtrace_stall_snap(const struct cpumask *btp);
+void nmi_backtrace_stall_check(const struct cpumask *btp);
+#else
+static inline void nmi_backtrace_stall_snap(const struct cpumask *btp) {}
+static inline void nmi_backtrace_stall_check(const struct cpumask *btp) {}
+#endif
+
#endif
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index d01aec6ae15c8..5274bbb026d79 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -64,6 +64,7 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
if (!cpumask_empty(to_cpumask(backtrace_mask))) {
pr_info("Sending NMI from CPU %d to CPUs %*pbl:\n",
this_cpu, nr_cpumask_bits, to_cpumask(backtrace_mask));
+ nmi_backtrace_stall_snap(to_cpumask(backtrace_mask));
raise(to_cpumask(backtrace_mask));
}

@@ -74,6 +75,7 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
mdelay(1);
touch_softlockup_watchdog();
}
+ nmi_backtrace_stall_check(to_cpumask(backtrace_mask));

/*
* Force flush any remote buffers that might be stuck in IRQ context
--
2.31.1.189.g2e36527f23

2023-01-05 10:54:24

by Ingo Molnar

[permalink] [raw]

Subject: Re: [PATCH x86/nmi 2/2] x86/nmi: Print reasons why backtrace NMIs are ignored

* Paul E. McKenney <[email protected]> wrote:

> +#ifdef CONFIG_NMI_CHECK_CPU
> +
> +static char *nmi_check_stall_msg[] = {
> +/* */
> +/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler. */
> +/* | +------ cpu_is_offline(cpu) */
> +/* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */
> +/* | | | NMI handler has been invoked. */
> +/* | | | */
> +/* V V V */
> +/* 0 0 0 */ "NMIs are not reaching exc_nmi handler",
> +/* 0 0 1 */ "exc_nmi handler is ignoring NMIs",
> +/* 0 1 0 */ "CPU is offline and NMIs are not reaching exc_nmi handler",
> +/* 0 1 1 */ "CPU is offline and exc_nmi handler is legitimately ignoring NMIs",
> +/* 1 0 0 */ "CPU is in exc_nmi handler and no further NMIs are reaching handler",
> +/* 1 0 1 */ "CPU is in exc_nmi handler which is legitimately ignoring NMIs",
> +/* 1 1 0 */ "CPU is offline in exc_nmi handler and no further NMIs are reaching exc_nmi handler",
> +/* 1 1 1 */ "CPU is offline in exc_nmi handler which is legitimately ignoring NMIs",

That kind of disambiguation of why a CPU is stuck looks really useful:

Reviewed-by: Ingo Molnar <[email protected]>

One small suggestion would be to do this in the messages:

s/exc_nmi handler
/exc_nmi() handler

... to make it clear that it's a regular kernel function [well, hw entry
handler], not a function pointer or some other indirection? No strong
feelings though.

Thanks,

Ingo

2023-01-05 19:38:47

by Paul E. McKenney

[permalink] [raw]

Subject: Re: [PATCH x86/nmi 2/2] x86/nmi: Print reasons why backtrace NMIs are ignored

On Thu, Jan 05, 2023 at 11:40:30AM +0100, Ingo Molnar wrote:
>
> * Paul E. McKenney <[email protected]> wrote:
>
> > +#ifdef CONFIG_NMI_CHECK_CPU
> > +
> > +static char *nmi_check_stall_msg[] = {
> > +/* */
> > +/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler. */
> > +/* | +------ cpu_is_offline(cpu) */
> > +/* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */
> > +/* | | | NMI handler has been invoked. */
> > +/* | | | */
> > +/* V V V */
> > +/* 0 0 0 */ "NMIs are not reaching exc_nmi handler",
> > +/* 0 0 1 */ "exc_nmi handler is ignoring NMIs",
> > +/* 0 1 0 */ "CPU is offline and NMIs are not reaching exc_nmi handler",
> > +/* 0 1 1 */ "CPU is offline and exc_nmi handler is legitimately ignoring NMIs",
> > +/* 1 0 0 */ "CPU is in exc_nmi handler and no further NMIs are reaching handler",
> > +/* 1 0 1 */ "CPU is in exc_nmi handler which is legitimately ignoring NMIs",
> > +/* 1 1 0 */ "CPU is offline in exc_nmi handler and no further NMIs are reaching exc_nmi handler",
> > +/* 1 1 1 */ "CPU is offline in exc_nmi handler which is legitimately ignoring NMIs",
>
> That kind of disambiguation of why a CPU is stuck looks really useful:
>
> Reviewed-by: Ingo Molnar <[email protected]>

Glad you like it and thank you! I will apply this on the next rebase.

> One small suggestion would be to do this in the messages:
>
> s/exc_nmi handler
> /exc_nmi() handler
>
> ... to make it clear that it's a regular kernel function [well, hw entry
> handler], not a function pointer or some other indirection? No strong
> feelings though.

Will do! I would balk at "DEFINE_IDTENTRY_RAW(exc_nmi)", though. ;-)

Thanx, Paul

2023-01-09 16:45:53

by Peter Zijlstra

[permalink] [raw]

Subject: Re: [PATCH x86/nmi 1/2] x86/nmi: Accumulate NMI-progress evidence in exc_nmi()

On Wed, Jan 04, 2023 at 05:15:36PM -0800, Paul E. McKenney wrote:

> diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
> index cec0bfa3bc04f..4f1651dc65b3a 100644
> --- a/arch/x86/kernel/nmi.c
> +++ b/arch/x86/kernel/nmi.c
> @@ -69,6 +69,15 @@ struct nmi_stats {
> unsigned int unknown;
> unsigned int external;
> unsigned int swallow;
> + unsigned long recv_jiffies;
> + unsigned long idt_seq;
> + unsigned long idt_nmi_seq;
> + unsigned long idt_ignored;
> + atomic_long_t idt_calls;
> + unsigned long idt_seq_snap;
> + unsigned long idt_nmi_seq_snap;
> + unsigned long idt_ignored_snap;
> + long idt_calls_snap;
> };

Urgh, this is more than a whole cacheline of extra data :/ Can't we make
this #ifdef CONFIG_NMI_CHECK_CPU ?

> @@ -509,8 +526,15 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
>
> inc_irq_stat(__nmi_count);
>
> - if (!ignore_nmis)
> + if (IS_ENABLED(CONFIG_NMI_CHECK_CPU) && ignore_nmis) {
> + WRITE_ONCE(nsp->idt_ignored, nsp->idt_ignored + 1);
> + } else if (!ignore_nmis) {
> + WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
> + WARN_ON_ONCE(!(nsp->idt_nmi_seq & 0x1));
> default_do_nmi(regs);
> + WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
> + WARN_ON_ONCE(nsp->idt_nmi_seq & 0x1);
> + }
>
> irqentry_nmi_exit(regs, irq_state);

That is not a NO-OP when !CONFIG_NMI_CHECK_CPU :/

2023-01-09 17:12:00

by Paul E. McKenney

[permalink] [raw]

Subject: Re: [PATCH x86/nmi 1/2] x86/nmi: Accumulate NMI-progress evidence in exc_nmi()

On Mon, Jan 09, 2023 at 05:19:59PM +0100, Peter Zijlstra wrote:
> On Wed, Jan 04, 2023 at 05:15:36PM -0800, Paul E. McKenney wrote:
>
> > diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
> > index cec0bfa3bc04f..4f1651dc65b3a 100644
> > --- a/arch/x86/kernel/nmi.c
> > +++ b/arch/x86/kernel/nmi.c
> > @@ -69,6 +69,15 @@ struct nmi_stats {
> > unsigned int unknown;
> > unsigned int external;
> > unsigned int swallow;
> > + unsigned long recv_jiffies;
> > + unsigned long idt_seq;
> > + unsigned long idt_nmi_seq;
> > + unsigned long idt_ignored;
> > + atomic_long_t idt_calls;
> > + unsigned long idt_seq_snap;
> > + unsigned long idt_nmi_seq_snap;
> > + unsigned long idt_ignored_snap;
> > + long idt_calls_snap;
> > };
>
> Urgh, this is more than a whole cacheline of extra data :/ Can't we make
> this #ifdef CONFIG_NMI_CHECK_CPU ?

We can. However, the new data is at the end of the structure. Plus doing
that will also require converting the IS_ENABLED() checks to #ifdef or
to lots of tiny functions, neither of which will be at all pretty.

Another approach would be to have a #else that created a union of all
of these fields, thus reducing this unused space so that of a single
unsigned long, while still permitting IS_ENABLED() checks. Thoughts?

> > @@ -509,8 +526,15 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
> >
> > inc_irq_stat(__nmi_count);
> >
> > - if (!ignore_nmis)
> > + if (IS_ENABLED(CONFIG_NMI_CHECK_CPU) && ignore_nmis) {
> > + WRITE_ONCE(nsp->idt_ignored, nsp->idt_ignored + 1);
> > + } else if (!ignore_nmis) {
> > + WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
> > + WARN_ON_ONCE(!(nsp->idt_nmi_seq & 0x1));
> > default_do_nmi(regs);
> > + WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
> > + WARN_ON_ONCE(nsp->idt_nmi_seq & 0x1);
> > + }
> >
> > irqentry_nmi_exit(regs, irq_state);
>
> That is not a NO-OP when !CONFIG_NMI_CHECK_CPU :/

Good catch, will fix!

Thanx, Paul