2012-08-03 22:29:51

by Luck, Tony

[permalink] [raw]
Subject: Re: [RESEND PATCH 0/5 V2] x86: mce: Bugfixes, cleanups and a new CMCI poll version

I applied this series on top of v3.6-rc1 and took it for
a test drive with a little storm of 20 corrected interrupts.

The series worked ... but the console log was entirely unhelpful
in letting me know what had just happened to my system. All I saw
was:

mce: [Hardware Error]: Machine check events logged
mce: [Hardware Error]: Machine check events logged
... several seconds pass ...
CPU 35 MCA banks CMCI:0 CMCI:1 CMCI:3 CMCI:5 CMCI:6 CMCI:7 CMCI:8 CMCI:9 CMCI:10 CMCI:11
mce_notify_irq: 3 callbacks suppressed
CPU 1 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 39 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 38 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 32 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 37 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 36 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 34 MCA banks CMCI:0 CMCI:1 CMCI:3
mce: [Hardware Error]: Machine check events logged

No mention of the storm, no mention that we switched to polling
mode (and so missed some of the reports). Just the cryptic output
as the kernel re-established the CMCI on processors that had been
affected by the storm.

I tried the patch below to log the start/end of the storm. But I
may be doing something wrong with printk_timed_ratelimit() because
I saw two "storm detected" and two "storm subsided" messages.

It would also be nice to avoid all the "CPU 1 MCA banks CMCI:0 CMCI:1 CMCI:3"
messages.

-Tony

diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 693bc7d..236f60e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -87,6 +87,8 @@ void mce_intel_hcpu_update(unsigned long cpu)

unsigned long mce_intel_adjust_timer(unsigned long interval)
{
+ static unsigned long jiffie_state;
+
if (interval < CMCI_POLL_INTERVAL)
return interval;

@@ -108,6 +110,8 @@ unsigned long mce_intel_adjust_timer(unsigned long interval)
*/
if (!atomic_read(&cmci_storm_on_cpus)) {
__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
+ if (printk_timed_ratelimit(&jiffie_state, CMCI_STORM_INTERVAL/HZ*1000))
+ pr_notice("CMCI storm subsided, switching to interrupt mode\n");
cmci_reenable();
cmci_recheck();
}
@@ -126,6 +130,7 @@ static bool cmci_storm_detect(void)
unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
unsigned long ts = __this_cpu_read(cmci_time_stamp);
unsigned long now = jiffies;
+ static unsigned long jiffie_state;

if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
return true;
@@ -145,6 +150,9 @@ static bool cmci_storm_detect(void)
__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
atomic_inc(&cmci_storm_on_cpus);
mce_timer_kick(CMCI_POLL_INTERVAL);
+
+ if (printk_timed_ratelimit(&jiffie_state, CMCI_STORM_INTERVAL/HZ*1000))
+ pr_notice("CMCI storm detected, switching to poll mode\n");
return true;
}


2012-08-06 21:42:53

by Chen, Gong

[permalink] [raw]
Subject: Re: [RESEND PATCH 0/5 V2] x86: mce: Bugfixes, cleanups and a new CMCI poll version

On Fri, Aug 03, 2012 at 03:29:50PM -0700, Luck, Tony wrote:
> Date: Fri, 03 Aug 2012 15:29:50 -0700
> From: "Luck, Tony" <[email protected]>
> To: Chen Gong <[email protected]>
> Cc: [email protected], [email protected], [email protected]
> Subject: Re: [RESEND PATCH 0/5 V2] x86: mce: Bugfixes, cleanups and a new
> CMCI poll version
>
> I applied this series on top of v3.6-rc1 and took it for
> a test drive with a little storm of 20 corrected interrupts.
>
> The series worked ... but the console log was entirely unhelpful
> in letting me know what had just happened to my system. All I saw
> was:
>
> mce: [Hardware Error]: Machine check events logged
> mce: [Hardware Error]: Machine check events logged
> ... several seconds pass ...
> CPU 35 MCA banks CMCI:0 CMCI:1 CMCI:3 CMCI:5 CMCI:6 CMCI:7 CMCI:8 CMCI:9 CMCI:10 CMCI:11
> mce_notify_irq: 3 callbacks suppressed
> CPU 1 MCA banks CMCI:0 CMCI:1 CMCI:3
> CPU 39 MCA banks CMCI:0 CMCI:1 CMCI:3
> CPU 38 MCA banks CMCI:0 CMCI:1 CMCI:3
> CPU 32 MCA banks CMCI:0 CMCI:1 CMCI:3
> CPU 37 MCA banks CMCI:0 CMCI:1 CMCI:3
> CPU 36 MCA banks CMCI:0 CMCI:1 CMCI:3
> CPU 34 MCA banks CMCI:0 CMCI:1 CMCI:3
> mce: [Hardware Error]: Machine check events logged
>
> No mention of the storm, no mention that we switched to polling
> mode (and so missed some of the reports). Just the cryptic output
> as the kernel re-established the CMCI on processors that had been
> affected by the storm.
>
> I tried the patch below to log the start/end of the storm. But I
> may be doing something wrong with printk_timed_ratelimit() because
> I saw two "storm detected" and two "storm subsided" messages.

You saw two times because the injection speed is not quick enough so that
the poll timer thinks during the expected time it doesn't meet new CMC, and
then double this timer, again, no CMC... until restore for poll timer to
INT mode. Under the real situation, thousands of CMCI come in so this
situation wil not happen. In fact, during the test I met this kind of
situations many times.

>
> It would also be nice to avoid all the "CPU 1 MCA banks CMCI:0 CMCI:1 CMCI:3"
> messages.
>
> -Tony
>
> diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
> index 693bc7d..236f60e 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
> @@ -87,6 +87,8 @@ void mce_intel_hcpu_update(unsigned long cpu)
>
> unsigned long mce_intel_adjust_timer(unsigned long interval)
> {
> + static unsigned long jiffie_state;
> +
> if (interval < CMCI_POLL_INTERVAL)
> return interval;
>
> @@ -108,6 +110,8 @@ unsigned long mce_intel_adjust_timer(unsigned long interval)
> */
> if (!atomic_read(&cmci_storm_on_cpus)) {
> __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
> + if (printk_timed_ratelimit(&jiffie_state, CMCI_STORM_INTERVAL/HZ*1000))
> + pr_notice("CMCI storm subsided, switching to interrupt mode\n");
> cmci_reenable();
> cmci_recheck();
> }
> @@ -126,6 +130,7 @@ static bool cmci_storm_detect(void)
> unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
> unsigned long ts = __this_cpu_read(cmci_time_stamp);
> unsigned long now = jiffies;
> + static unsigned long jiffie_state;
>
> if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
> return true;
> @@ -145,6 +150,9 @@ static bool cmci_storm_detect(void)
> __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
> atomic_inc(&cmci_storm_on_cpus);
> mce_timer_kick(CMCI_POLL_INTERVAL);
> +
> + if (printk_timed_ratelimit(&jiffie_state, CMCI_STORM_INTERVAL/HZ*1000))
> + pr_notice("CMCI storm detected, switching to poll mode\n");
> return true;
> }
>


Attachments:
(No filename) (3.63 kB)
signature.asc (836.00 B)
Digital signature
Download all attachments

2012-08-07 23:16:06

by Luck, Tony

[permalink] [raw]
Subject: [PATCH 5/6] x86/mce: Provide an option to keep cmci_reenable() quiet

cmci_reenable() calls cmci_discover() to look at which machine check
banks are shared between processors. It ensure that only one cpu takes
ownership of each shared bank. At boot time cmci_discover() is muted,
but during hot add events it provides some output which may be helpful
to ensure that all banks have an owner.

We want to use cmci_reenable() when a CMCI storm subsides. In this case
the topology has not changed, so we do not need any commentary as it
goes about its business.

Add a "quiet" argument to cmci_reenable() that it passes to cmci_discover().

Signed-off-by: Tony Luck <[email protected]>
---

[Patches 1-4 remain as previously posted. This is a new patch to
help tidy console messages. Old patch 5 becomes patch 6 (and has
a few cleanups]

arch/x86/include/asm/mce.h | 4 ++--
arch/x86/kernel/cpu/mcheck/mce.c | 4 ++--
arch/x86/kernel/cpu/mcheck/mce_intel.c | 10 +++++-----
3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 441520e..bf79a0f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -165,13 +165,13 @@ extern int mce_cmci_disabled;
extern int mce_ignore_ce;
void mce_intel_feature_init(struct cpuinfo_x86 *c);
void cmci_clear(void);
-void cmci_reenable(void);
+void cmci_reenable(int quiet);
void cmci_rediscover(int dying);
void cmci_recheck(void);
#else
static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
static inline void cmci_clear(void) {}
-static inline void cmci_reenable(void) {}
+static inline void cmci_reenable(int quiet) {}
static inline void cmci_rediscover(int dying) {}
static inline void cmci_recheck(void) {}
#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b4dde15..826dd21 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1994,7 +1994,7 @@ static void mce_enable_ce(void *all)
{
if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
- cmci_reenable();
+ cmci_reenable(0);
cmci_recheck();
if (all)
__mcheck_cpu_init_timer();
@@ -2246,7 +2246,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
return;

if (!(action & CPU_TASKS_FROZEN))
- cmci_reenable();
+ cmci_reenable(0);
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];

diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 38e49bc..e652cde 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -78,7 +78,7 @@ static void print_update(char *type, int *hdr, int num)
* on this CPU. Use the algorithm recommended in the SDM to discover shared
* banks.
*/
-static void cmci_discover(int banks, int boot)
+static void cmci_discover(int banks, int quiet)
{
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
unsigned long flags;
@@ -96,7 +96,7 @@ static void cmci_discover(int banks, int boot)

/* Already owned by someone else? */
if (val & MCI_CTL2_CMCI_EN) {
- if (test_and_clear_bit(i, owned) && !boot)
+ if (test_and_clear_bit(i, owned) && !quiet)
print_update("SHD", &hdr, i);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
continue;
@@ -109,7 +109,7 @@ static void cmci_discover(int banks, int boot)

/* Did the enable bit stick? -- the bank supports CMCI */
if (val & MCI_CTL2_CMCI_EN) {
- if (!test_and_set_bit(i, owned) && !boot)
+ if (!test_and_set_bit(i, owned) && !quiet)
print_update("CMCI", &hdr, i);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
} else {
@@ -196,11 +196,11 @@ void cmci_rediscover(int dying)
/*
* Reenable CMCI on this CPU in case a CPU down failed.
*/
-void cmci_reenable(void)
+void cmci_reenable(int quiet)
{
int banks;
if (cmci_supported(&banks))
- cmci_discover(banks, 0);
+ cmci_discover(banks, quiet);
}

static void intel_init_cmci(void)
--
1.7.10.2.552.gaa3bb87

2012-08-07 23:16:10

by Luck, Tony

[permalink] [raw]
Subject: [PATCH 6/6] x86/mce: Add CMCI poll mode

From: Chen Gong <[email protected]>

On Intel systems corrected machine check interrupts (CMCI) may be sent to
multiple logical processors; possibly to all processors on the affected
socket (SDM Volume 3B "15.5.1 CMCI Local APIC Interface"). This means
that a persistent error (such as a stuck bit in ECC memory) may cause
a storm of interrupts that greatly hinders or prevents forward progress
(probably on many processors).

To solve this we keep track of the rate at which each processor sees
CMCI. If we exceed a threshold, we disable CMCI delivery and switch to
polling the machine check banks. If the storm subsides (none of the
affected processors see any more errors for a complete poll interval) we
re-enable CMCI.

Signed-off-by: Chen Gong <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Chen Gong <[email protected]>
Signed-off-by: Tony Luck <[email protected]>
---

Changes (w.r.t. old patch 5/5):
+ New commit message
+ Print messages when storm starts/ends
+ Suppress messages from cmci_discover()
+ Some spelling fixes
+ Increased storm threshold from 5 to 15 (so we are
have a few more samples for pattern detection to
identify the source of the storm).

arch/x86/kernel/cpu/mcheck/mce-internal.h | 12 ++++
arch/x86/kernel/cpu/mcheck/mce.c | 47 +++++++++++--
arch/x86/kernel/cpu/mcheck/mce_intel.c | 108 +++++++++++++++++++++++++++++-
3 files changed, 160 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index ed44c8a..6a05c1d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -28,6 +28,18 @@ extern int mce_ser;

extern struct mce_bank *mce_banks;

+#ifdef CONFIG_X86_MCE_INTEL
+unsigned long mce_intel_adjust_timer(unsigned long interval);
+void mce_intel_cmci_poll(void);
+void mce_intel_hcpu_update(unsigned long cpu);
+#else
+# define mce_intel_adjust_timer mce_adjust_timer_default
+static inline void mce_intel_cmci_poll(void) { }
+static inline void mce_intel_hcpu_update(unsigned long cpu) { }
+#endif
+
+void mce_timer_kick(unsigned long interval);
+
#ifdef CONFIG_ACPI_APEI
int apei_write_mce(struct mce *m);
ssize_t apei_read_mce(struct mce *m, u64 *record_id);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 826dd21..ee57a8f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1260,6 +1260,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */
static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer);

+static unsigned long mce_adjust_timer_default(unsigned long interval)
+{
+ return interval;
+}
+
+static unsigned long (*mce_adjust_timer)(unsigned long interval) =
+ mce_adjust_timer_default;
+
static void mce_timer_fn(unsigned long data)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
@@ -1270,6 +1278,7 @@ static void mce_timer_fn(unsigned long data)
if (mce_available(__this_cpu_ptr(&cpu_info))) {
machine_check_poll(MCP_TIMESTAMP,
&__get_cpu_var(mce_poll_banks));
+ mce_intel_cmci_poll();
}

/*
@@ -1277,14 +1286,38 @@ static void mce_timer_fn(unsigned long data)
* polling interval, otherwise increase the polling interval.
*/
iv = __this_cpu_read(mce_next_interval);
- if (mce_notify_irq())
+ if (mce_notify_irq()) {
iv = max(iv / 2, (unsigned long) HZ/100);
- else
+ } else {
iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
+ iv = mce_adjust_timer(iv);
+ }
__this_cpu_write(mce_next_interval, iv);
+ /* Might have become 0 after CMCI storm subsided */
+ if (iv) {
+ t->expires = jiffies + iv;
+ add_timer_on(t, smp_processor_id());
+ }
+}

- t->expires = jiffies + iv;
- add_timer_on(t, smp_processor_id());
+/*
+ * Ensure that the timer is firing in @interval from now.
+ */
+void mce_timer_kick(unsigned long interval)
+{
+ struct timer_list *t = &__get_cpu_var(mce_timer);
+ unsigned long when = jiffies + interval;
+ unsigned long iv = __this_cpu_read(mce_next_interval);
+
+ if (timer_pending(t)) {
+ if (time_before(when, t->expires))
+ mod_timer_pinned(t, when);
+ } else {
+ t->expires = round_jiffies(when);
+ add_timer_on(t, smp_processor_id());
+ }
+ if (interval < iv)
+ __this_cpu_write(mce_next_interval, interval);
}

/* Must not be called in IRQ context where del_timer_sync() can deadlock */
@@ -1548,6 +1581,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
mce_intel_feature_init(c);
+ mce_adjust_timer = mce_intel_adjust_timer;
break;
case X86_VENDOR_AMD:
mce_amd_feature_init(c);
@@ -1559,7 +1593,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)

static void mce_start_timer(unsigned int cpu, struct timer_list *t)
{
- unsigned long iv = check_interval * HZ;
+ unsigned long iv = mce_adjust_timer(check_interval * HZ);

__this_cpu_write(mce_next_interval, iv);

@@ -2272,10 +2306,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
if (threshold_cpu_callback)
threshold_cpu_callback(action, cpu);
mce_device_remove(cpu);
+ mce_intel_hcpu_update(cpu);
break;
case CPU_DOWN_PREPARE:
- del_timer_sync(t);
smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
+ del_timer_sync(t);
break;
case CPU_DOWN_FAILED:
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index e652cde..a573033 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -15,6 +15,8 @@
#include <asm/msr.h>
#include <asm/mce.h>

+#include "mce-internal.h"
+
/*
* Support for Intel Correct Machine Check Interrupts. This allows
* the CPU to raise an interrupt when a corrected machine check happened.
@@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
*/
static DEFINE_RAW_SPINLOCK(cmci_discover_lock);

-#define CMCI_THRESHOLD 1
+#define CMCI_THRESHOLD 1
+#define CMCI_POLL_INTERVAL (30 * HZ)
+#define CMCI_STORM_INTERVAL (1 * HZ)
+#define CMCI_STORM_THRESHOLD 15
+
+static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
+static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
+static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
+
+enum {
+ CMCI_STORM_NONE,
+ CMCI_STORM_ACTIVE,
+ CMCI_STORM_SUBSIDED,
+};
+
+static atomic_t cmci_storm_on_cpus;

static int cmci_supported(int *banks)
{
@@ -53,6 +70,93 @@ static int cmci_supported(int *banks)
return !!(cap & MCG_CMCI_P);
}

+void mce_intel_cmci_poll(void)
+{
+ if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
+ return;
+ machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
+}
+
+void mce_intel_hcpu_update(unsigned long cpu)
+{
+ if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
+ atomic_dec(&cmci_storm_on_cpus);
+
+ per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
+}
+
+unsigned long mce_intel_adjust_timer(unsigned long interval)
+{
+ int r;
+
+ if (interval < CMCI_POLL_INTERVAL)
+ return interval;
+
+ switch (__this_cpu_read(cmci_storm_state)) {
+ case CMCI_STORM_ACTIVE:
+ /*
+ * We switch back to interrupt mode once the poll timer has
+ * silenced itself. That means no events recorded and the
+ * timer interval is back to our poll interval.
+ */
+ __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
+ r = atomic_sub_return(1, &cmci_storm_on_cpus);
+ if (r == 0)
+ pr_notice("CMCI storm subsided: switching to interrupt mode\n");
+ /* FALLTHROUGH */
+
+ case CMCI_STORM_SUBSIDED:
+ /*
+ * We wait for all cpus to go back to SUBSIDED
+ * state. When that happens we switch back to
+ * interrupt mode.
+ */
+ if (!atomic_read(&cmci_storm_on_cpus)) {
+ __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
+ cmci_reenable(1);
+ cmci_recheck();
+ }
+ return CMCI_POLL_INTERVAL;
+ default:
+ /*
+ * We have shiny weather. Let the poll do whatever it
+ * thinks.
+ */
+ return interval;
+ }
+}
+
+static bool cmci_storm_detect(void)
+{
+ unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
+ unsigned long ts = __this_cpu_read(cmci_time_stamp);
+ unsigned long now = jiffies;
+ int r;
+
+ if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
+ return true;
+
+ if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
+ cnt++;
+ } else {
+ cnt = 1;
+ __this_cpu_write(cmci_time_stamp, now);
+ }
+ __this_cpu_write(cmci_storm_cnt, cnt);
+
+ if (cnt <= CMCI_STORM_THRESHOLD)
+ return false;
+
+ cmci_clear();
+ __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
+ r = atomic_add_return(1, &cmci_storm_on_cpus);
+ mce_timer_kick(CMCI_POLL_INTERVAL);
+
+ if (r == 1)
+ pr_notice("CMCI storm detected: switching to poll mode\n");
+ return true;
+}
+
/*
* The interrupt handler. This is called on every event.
* Just call the poller directly to log any events.
@@ -61,6 +165,8 @@ static int cmci_supported(int *banks)
*/
static void intel_threshold_interrupt(void)
{
+ if (cmci_storm_detect())
+ return;
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
mce_notify_irq();
}
--
1.7.10.2.552.gaa3bb87

2012-08-09 18:36:29

by Luck, Tony

[permalink] [raw]
Subject: [PATCH 5/6] x86/mce: Make cmci_discover() quiet

cmci_discover() works out which machine check banks support CMCI, and
which of those are shared by multiple logical processors. It uses this
information to ensure that exactly one cpu is designated the owner of
each bank so that when interrupts are broadcast to multiple cpus, only one
of them will look in a shared bank to log the error and clear the bank.

At boot time cmci_discover() performs this task silently. But during
certain cpu hotplug operations it prints out a set of summary lines
like this:

CPU 35 MCA banks CMCI:0 CMCI:1 CMCI:3 CMCI:5 CMCI:6 CMCI:7 CMCI:8 CMCI:9 CMCI:10 CMCI:11
CPU 1 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 39 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 38 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 32 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 37 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 36 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 34 MCA banks CMCI:0 CMCI:1 CMCI:3

The value of these messages seems very low. A user might painstakingly
cross-check against the data sheet for a processor to ensure that all
CMCI supported banks are correctly reported, but this seems improbable.
If users really wanted to do this, we should print the information at
boot time too.

Remove the messages.

Signed-off-by: Tony Luck <[email protected]>
---

Gong pointed out to me offline that my previous "patch 5/6" would not
do what I said it did in the case where a processor is taken offline
during a CMCI storm. We'd have a topology change, but would suppress
the bank attribution messages when the storm ended. I took a longer
look at the messages, and decided that we can live without them.

arch/x86/kernel/cpu/mcheck/mce_intel.c | 25 ++++++-------------------
1 file changed, 6 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 38e49bc..59648e4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -65,24 +65,15 @@ static void intel_threshold_interrupt(void)
mce_notify_irq();
}

-static void print_update(char *type, int *hdr, int num)
-{
- if (*hdr == 0)
- printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
- *hdr = 1;
- printk(KERN_CONT " %s:%d", type, num);
-}
-
/*
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
* on this CPU. Use the algorithm recommended in the SDM to discover shared
* banks.
*/
-static void cmci_discover(int banks, int boot)
+static void cmci_discover(int banks)
{
unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
unsigned long flags;
- int hdr = 0;
int i;

raw_spin_lock_irqsave(&cmci_discover_lock, flags);
@@ -96,8 +87,7 @@ static void cmci_discover(int banks, int boot)

/* Already owned by someone else? */
if (val & MCI_CTL2_CMCI_EN) {
- if (test_and_clear_bit(i, owned) && !boot)
- print_update("SHD", &hdr, i);
+ clear_bit(i, owned);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
continue;
}
@@ -109,16 +99,13 @@ static void cmci_discover(int banks, int boot)

/* Did the enable bit stick? -- the bank supports CMCI */
if (val & MCI_CTL2_CMCI_EN) {
- if (!test_and_set_bit(i, owned) && !boot)
- print_update("CMCI", &hdr, i);
+ set_bit(i, owned);
__clear_bit(i, __get_cpu_var(mce_poll_banks));
} else {
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
}
}
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
- if (hdr)
- printk(KERN_CONT "\n");
}

/*
@@ -186,7 +173,7 @@ void cmci_rediscover(int dying)
continue;
/* Recheck banks in case CPUs don't all have the same */
if (cmci_supported(&banks))
- cmci_discover(banks, 0);
+ cmci_discover(banks);
}

set_cpus_allowed_ptr(current, old);
@@ -200,7 +187,7 @@ void cmci_reenable(void)
{
int banks;
if (cmci_supported(&banks))
- cmci_discover(banks, 0);
+ cmci_discover(banks);
}

static void intel_init_cmci(void)
@@ -211,7 +198,7 @@ static void intel_init_cmci(void)
return;

mce_threshold_vector = intel_threshold_interrupt;
- cmci_discover(banks, 1);
+ cmci_discover(banks);
/*
* For CPU #0 this runs with still disabled APIC, but that's
* ok because only the vector is set up. We still do another
--
1.7.10.2.552.gaa3bb87