2009-09-28 01:21:21

by Huang, Ying

[permalink] [raw]
Subject: [BUGFIX -v2] x86, mce, inject: Make injected mce valid only during faked handler call

In the current implementation, injected MCE is valid from the point
the MCE is injected to the point the MCE is processed by the faked
handler call.

This has an undesired side-effect: it is possible for it to be
consumed by real machine_check_poll. This may confuse a real system
error and may confuse the mce test suite.

To fix this, this patch introduces another flag MCJ_VALID to indicate
that the MCE entry is valid for injector but not for the
handler. Another flag, mce.finished is used to indicate the MCE entry
is valid for the handler.

mce.finished is enabled only during faked MCE handler call and
protected by IRQ disabling. This make it impossible for real
machine_check_poll to consume it.

Signed-off-by: Huang Ying <[email protected]>

v2:
- Revise commit changelog (Thanks Ingo)
- Change naming (XX_BIT for bit definition)

---
arch/x86/include/asm/mce.h | 17 +++++++++++------
arch/x86/kernel/cpu/mcheck/mce-inject.c | 23 ++++++++++++++++-------
2 files changed, 27 insertions(+), 13 deletions(-)

--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -38,13 +38,18 @@
#define MCM_ADDR_MEM 3 /* memory address */
#define MCM_ADDR_GENERIC 7 /* generic */

-#define MCJ_CTX_MASK 3
+#define MCJ_NMI_BROADCAST_BIT 2 /* do NMI broadcasting */
+#define MCJ_EXCEPTION_BIT 3 /* raise as exception */
+#define MCJ_LOADED_BIT 4 /* entry is valid for injector */
+
+#define MCJ_CTX_MASK 0x03
#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
-#define MCJ_CTX_RANDOM 0 /* inject context: random */
-#define MCJ_CTX_PROCESS 1 /* inject context: process */
-#define MCJ_CTX_IRQ 2 /* inject context: IRQ */
-#define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */
-#define MCJ_EXCEPTION 8 /* raise as exception */
+#define MCJ_CTX_RANDOM 0x00 /* inject context: random */
+#define MCJ_CTX_PROCESS 0x01 /* inject context: process */
+#define MCJ_CTX_IRQ 0x02 /* inject context: IRQ */
+#define MCJ_NMI_BROADCAST (1 << MCJ_NMI_BROADCAST_BIT)
+#define MCJ_EXCEPTION (1 << MCJ_EXCEPTION_BIT)
+#define MCJ_LOADED (1 << MCJ_LOADED_BIT)

/* Fields are zero when not available */
struct mce {
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -32,16 +32,16 @@ static void inject_mce(struct mce *m)

/* Make sure noone reads partially written injectm */
i->finished = 0;
+ clear_bit(MCJ_LOADED_BIT, (unsigned long *)&i->inject_flags);
mb();
m->finished = 0;
- /* First set the fields after finished */
+ clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
i->extcpu = m->extcpu;
mb();
- /* Now write record in order, finished last (except above) */
memcpy(i, m, sizeof(struct mce));
/* Finally activate it */
mb();
- i->finished = 1;
+ set_bit(MCJ_LOADED_BIT, (unsigned long *)&i->inject_flags);
}

static void raise_poll(struct mce *m)
@@ -51,9 +51,11 @@ static void raise_poll(struct mce *m)

memset(&b, 0xff, sizeof(mce_banks_t));
local_irq_save(flags);
+ m->finished = 1;
machine_check_poll(0, &b);
- local_irq_restore(flags);
m->finished = 0;
+ clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
+ local_irq_restore(flags);
}

static void raise_exception(struct mce *m, struct pt_regs *pregs)
@@ -69,9 +71,11 @@ static void raise_exception(struct mce *
}
/* in mcheck exeception handler, irq will be disabled */
local_irq_save(flags);
+ m->finished = 1;
do_machine_check(pregs, 0);
- local_irq_restore(flags);
m->finished = 0;
+ clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
+ local_irq_restore(flags);
}

static cpumask_t mce_inject_cpumask;
@@ -89,6 +93,8 @@ static int mce_raise_notify(struct notif
raise_exception(m, args->regs);
else if (m->status)
raise_poll(m);
+ else
+ clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
return NOTIFY_STOP;
}

@@ -129,7 +135,7 @@ static int raise_local(void)
mce_notify_irq();
printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
} else
- m->finished = 0;
+ clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);

return ret;
}
@@ -152,10 +158,13 @@ static void raise_mce(struct mce *m)
cpu_clear(get_cpu(), mce_inject_cpumask);
for_each_online_cpu(cpu) {
struct mce *mcpu = &per_cpu(injectm, cpu);
- if (!mcpu->finished ||
+ if (!test_bit(MCJ_LOADED_BIT,
+ (unsigned long *)&mcpu->inject_flags) ||
MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
cpu_clear(cpu, mce_inject_cpumask);
}
+ /* make sure needed data is available on other CPUs */
+ smp_mb();
if (!cpus_empty(mce_inject_cpumask))
apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR);
start = jiffies;


2009-09-28 06:41:22

by Hidetoshi Seto

[permalink] [raw]
Subject: Re: [BUGFIX -v2] x86, mce, inject: Make injected mce valid only during faked handler call

Hi Huang,

Huang Ying wrote:
> In the current implementation, injected MCE is valid from the point
> the MCE is injected to the point the MCE is processed by the faked
> handler call.
>
> This has an undesired side-effect: it is possible for it to be
> consumed by real machine_check_poll. This may confuse a real system
> error and may confuse the mce test suite.
>
> To fix this, this patch introduces another flag MCJ_VALID to indicate
^^^^^^^^^
MCJ_LOADED?

> that the MCE entry is valid for injector but not for the
> handler. Another flag, mce.finished is used to indicate the MCE entry
> is valid for the handler.
>
> mce.finished is enabled only during faked MCE handler call and
> protected by IRQ disabling. This make it impossible for real
> machine_check_poll to consume it.

Are there the reverse case - is it possible that the faked handler
call might consume real error which is not handled yet by the real
machine_check_poll?

>
> Signed-off-by: Huang Ying <[email protected]>
>
> v2:
> - Revise commit changelog (Thanks Ingo)
> - Change naming (XX_BIT for bit definition)
>
> ---
> arch/x86/include/asm/mce.h | 17 +++++++++++------
> arch/x86/kernel/cpu/mcheck/mce-inject.c | 23 ++++++++++++++++-------
> 2 files changed, 27 insertions(+), 13 deletions(-)
>
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -38,13 +38,18 @@
> #define MCM_ADDR_MEM 3 /* memory address */
> #define MCM_ADDR_GENERIC 7 /* generic */
>
> -#define MCJ_CTX_MASK 3
> +#define MCJ_NMI_BROADCAST_BIT 2 /* do NMI broadcasting */
> +#define MCJ_EXCEPTION_BIT 3 /* raise as exception */
> +#define MCJ_LOADED_BIT 4 /* entry is valid for injector */
> +
> +#define MCJ_CTX_MASK 0x03
> #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
> -#define MCJ_CTX_RANDOM 0 /* inject context: random */
> -#define MCJ_CTX_PROCESS 1 /* inject context: process */
> -#define MCJ_CTX_IRQ 2 /* inject context: IRQ */
> -#define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */
> -#define MCJ_EXCEPTION 8 /* raise as exception */
> +#define MCJ_CTX_RANDOM 0x00 /* inject context: random */
> +#define MCJ_CTX_PROCESS 0x01 /* inject context: process */
> +#define MCJ_CTX_IRQ 0x02 /* inject context: IRQ */
> +#define MCJ_NMI_BROADCAST (1 << MCJ_NMI_BROADCAST_BIT)
> +#define MCJ_EXCEPTION (1 << MCJ_EXCEPTION_BIT)
> +#define MCJ_LOADED (1 << MCJ_LOADED_BIT)

I'd like to see a patch to replace MCJ_* to MCE_INJ_* before
adding new flag.

>
> /* Fields are zero when not available */
> struct mce {
> --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
> @@ -32,16 +32,16 @@ static void inject_mce(struct mce *m)
>
> /* Make sure noone reads partially written injectm */
> i->finished = 0;
> + clear_bit(MCJ_LOADED_BIT, (unsigned long *)&i->inject_flags);
> mb();
> m->finished = 0;
> - /* First set the fields after finished */
> + clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
> i->extcpu = m->extcpu;
> mb();
> - /* Now write record in order, finished last (except above) */
> memcpy(i, m, sizeof(struct mce));
> /* Finally activate it */
> mb();
> - i->finished = 1;
> + set_bit(MCJ_LOADED_BIT, (unsigned long *)&i->inject_flags);

Why
clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
set_bit(MCJ_LOADED_BIT, (unsigned long *)&i->inject_flags);
cannot be
m->inject_flags &= ~MCJ_LOADED;
m->inject_flags |= MCJ_LOADED;
?

If it can, defined *_BIT will not be necessary here.

> }
>
> static void raise_poll(struct mce *m)
> @@ -51,9 +51,11 @@ static void raise_poll(struct mce *m)
>
> memset(&b, 0xff, sizeof(mce_banks_t));
> local_irq_save(flags);
> + m->finished = 1;
> machine_check_poll(0, &b);
> - local_irq_restore(flags);
> m->finished = 0;
> + clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
> + local_irq_restore(flags);
> }

I think the "finished" is not good name. (I suppose it is named
after "loading data to structure have been finished" or so.)
And also I think "MCJ_LOADED" is not good name, because I could not
figure out the difference between "loading finished" and "loaded."

OTOH in the course of nature mce_rdmsrl() returns injected data
anytime if data is loaded, because it is defined to do so.
304 /* MSR access wrappers used for error injection */
305 static u64 mce_rdmsrl(u32 msr)
306 {
307 u64 v;
308
309 if (__get_cpu_var(injectm).finished) {

I believe what you want to do here is "make mce_rdmsrl()/mce_wrmsrl()
to refer faked data only during faked handler call."
Then what we have to do is making a flag to indicate that "now
in faked handler call," for an example:

309 if (__get_cpu_var(mce_fake_in_progress)) {

and:
local_irq_save(flags);
__get_cpu_var(mce_fake_in_progress) = 1;
machine_check_poll(0, &b);
__get_cpu_var(mce_fake_in_progress) = 0;
local_irq_restore(flags);

>
> static void raise_exception(struct mce *m, struct pt_regs *pregs)
> @@ -69,9 +71,11 @@ static void raise_exception(struct mce *
> }
> /* in mcheck exeception handler, irq will be disabled */
> local_irq_save(flags);
> + m->finished = 1;
> do_machine_check(pregs, 0);
> - local_irq_restore(flags);
> m->finished = 0;
> + clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
> + local_irq_restore(flags);
> }
>
> static cpumask_t mce_inject_cpumask;
> @@ -89,6 +93,8 @@ static int mce_raise_notify(struct notif
> raise_exception(m, args->regs);
> else if (m->status)
> raise_poll(m);
> + else
> + clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
> return NOTIFY_STOP;
> }
>
> @@ -129,7 +135,7 @@ static int raise_local(void)
> mce_notify_irq();
> printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
> } else
> - m->finished = 0;
> + clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
>
> return ret;
> }
> @@ -152,10 +158,13 @@ static void raise_mce(struct mce *m)
> cpu_clear(get_cpu(), mce_inject_cpumask);
> for_each_online_cpu(cpu) {
> struct mce *mcpu = &per_cpu(injectm, cpu);
> - if (!mcpu->finished ||
> + if (!test_bit(MCJ_LOADED_BIT,
> + (unsigned long *)&mcpu->inject_flags) ||
> MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
> cpu_clear(cpu, mce_inject_cpumask);
> }
> + /* make sure needed data is available on other CPUs */
> + smp_mb();

What data are you taking care here for?


Thanks,
H.Seto

2009-09-28 06:46:56

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 0/5] x86, mce-inject: misc fix

Here is a patch set inspired by Huang's patch.
Based on 2.6.32-rc1.

Thanks,
H.Seto

Hidetoshi Seto (5):
mce-inject: replace MCJ_ to MCE_INJ_
x86, mce: rename finished to valid in struct mce
mce-inject: make injected mce valid only during faked handler call
mce-inject: no wait on write with MCE_INJ_CTX_RANDOM
mce-inject: allow injecting status=0 to poll handler

arch/x86/include/asm/mce.h | 21 +++++----
arch/x86/kernel/cpu/mcheck/mce-inject.c | 69 ++++++++++++++++++-------------
arch/x86/kernel/cpu/mcheck/mce.c | 16 ++++----
3 files changed, 59 insertions(+), 47 deletions(-)

2009-09-28 06:51:37

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 1/5] mce-inject: replace MCJ_ to MCE_INJ_

MCE_INJ_ is better than MCJ_ standing for "MCe inJect."

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/include/asm/mce.h | 15 ++++++++-------
arch/x86/kernel/cpu/mcheck/mce-inject.c | 20 ++++++++++----------
2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index b608a64..4a48344 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -38,13 +38,14 @@
#define MCM_ADDR_MEM 3 /* memory address */
#define MCM_ADDR_GENERIC 7 /* generic */

-#define MCJ_CTX_MASK 3
-#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
-#define MCJ_CTX_RANDOM 0 /* inject context: random */
-#define MCJ_CTX_PROCESS 1 /* inject context: process */
-#define MCJ_CTX_IRQ 2 /* inject context: IRQ */
-#define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */
-#define MCJ_EXCEPTION 8 /* raise as exception */
+#define MCE_INJ_CTX_MASK 0x03
+#define MCE_INJ_CTX_RANDOM 0x00 /* inject context: random */
+#define MCE_INJ_CTX_PROCESS 0x01 /* inject context: process */
+#define MCE_INJ_CTX_IRQ 0x02 /* inject context: IRQ */
+#define MCE_INJ_CTX(flags) ((flags) & MCE_INJ_CTX_MASK)
+
+#define MCE_INJ_NMI_BROADCAST (1 << 2) /* do NMI broadcasting */
+#define MCE_INJ_EXCEPTION (1 << 3) /* raise as exception */

/* Fields are zero when not available */
struct mce {
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 472763d..2c1fc5a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -85,7 +85,7 @@ static int mce_raise_notify(struct notifier_block *self,
if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask))
return NOTIFY_DONE;
cpu_clear(cpu, mce_inject_cpumask);
- if (m->inject_flags & MCJ_EXCEPTION)
+ if (m->inject_flags & MCE_INJ_EXCEPTION)
raise_exception(m, args->regs);
else if (m->status)
raise_poll(m);
@@ -101,21 +101,21 @@ static struct notifier_block mce_raise_nb = {
static int raise_local(void)
{
struct mce *m = &__get_cpu_var(injectm);
- int context = MCJ_CTX(m->inject_flags);
+ int context = MCE_INJ_CTX(m->inject_flags);
int ret = 0;
int cpu = m->extcpu;

- if (m->inject_flags & MCJ_EXCEPTION) {
+ if (m->inject_flags & MCE_INJ_EXCEPTION) {
printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
switch (context) {
- case MCJ_CTX_IRQ:
+ case MCE_INJ_CTX_IRQ:
/*
* Could do more to fake interrupts like
* calling irq_enter, but the necessary
* machinery isn't exported currently.
*/
/*FALL THROUGH*/
- case MCJ_CTX_PROCESS:
+ case MCE_INJ_CTX_PROCESS:
raise_exception(m, NULL);
break;
default:
@@ -136,15 +136,15 @@ static int raise_local(void)

static void raise_mce(struct mce *m)
{
- int context = MCJ_CTX(m->inject_flags);
+ int context = MCE_INJ_CTX(m->inject_flags);

inject_mce(m);

- if (context == MCJ_CTX_RANDOM)
+ if (context == MCE_INJ_CTX_RANDOM)
return;

#ifdef CONFIG_X86_LOCAL_APIC
- if (m->inject_flags & MCJ_NMI_BROADCAST) {
+ if (m->inject_flags & MCE_INJ_NMI_BROADCAST) {
unsigned long start;
int cpu;
get_online_cpus();
@@ -152,8 +152,8 @@ static void raise_mce(struct mce *m)
cpu_clear(get_cpu(), mce_inject_cpumask);
for_each_online_cpu(cpu) {
struct mce *mcpu = &per_cpu(injectm, cpu);
- if (!mcpu->finished ||
- MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
+ if (!mcpu->finished || MCE_INJ_CTX(mcpu->inject_flags)
+ != MCE_INJ_CTX_RANDOM)
cpu_clear(cpu, mce_inject_cpumask);
}
if (!cpus_empty(mce_inject_cpumask))
--
1.6.4.3

2009-09-28 06:52:14

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 2/5] x86, mce: rename finished to valid in struct mce

The straightforward name is easier to understand.

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/include/asm/mce.h | 6 +++---
arch/x86/kernel/cpu/mcheck/mce-inject.c | 14 +++++++-------
arch/x86/kernel/cpu/mcheck/mce.c | 16 ++++++++--------
3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 4a48344..995dfd2 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -58,12 +58,12 @@ struct mce {
__u64 time; /* wall time_t when error was detected */
__u8 cpuvendor; /* cpu vendor as encoded in system.h */
__u8 inject_flags; /* software inject flags */
- __u16 pad;
+ __u16 pad;
__u32 cpuid; /* CPUID 1 EAX */
- __u8 cs; /* code segment */
+ __u8 cs; /* code segment */
__u8 bank; /* machine check bank */
__u8 cpu; /* cpu number; obsolete; use extcpu now */
- __u8 finished; /* entry is valid */
+ __u8 valid; /* entry is valid */
__u32 extcpu; /* linux cpu number that detected the error */
__u32 socketid; /* CPU socket ID */
__u32 apicid; /* CPU initial apic ID */
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 2c1fc5a..5bac818 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -31,9 +31,9 @@ static void inject_mce(struct mce *m)
struct mce *i = &per_cpu(injectm, m->extcpu);

/* Make sure noone reads partially written injectm */
- i->finished = 0;
+ i->valid = 0;
mb();
- m->finished = 0;
+ m->valid = 0;
/* First set the fields after finished */
i->extcpu = m->extcpu;
mb();
@@ -41,7 +41,7 @@ static void inject_mce(struct mce *m)
memcpy(i, m, sizeof(struct mce));
/* Finally activate it */
mb();
- i->finished = 1;
+ i->valid = 1;
}

static void raise_poll(struct mce *m)
@@ -53,7 +53,7 @@ static void raise_poll(struct mce *m)
local_irq_save(flags);
machine_check_poll(0, &b);
local_irq_restore(flags);
- m->finished = 0;
+ m->valid = 0;
}

static void raise_exception(struct mce *m, struct pt_regs *pregs)
@@ -71,7 +71,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
local_irq_save(flags);
do_machine_check(pregs, 0);
local_irq_restore(flags);
- m->finished = 0;
+ m->valid = 0;
}

static cpumask_t mce_inject_cpumask;
@@ -129,7 +129,7 @@ static int raise_local(void)
mce_notify_irq();
printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
} else
- m->finished = 0;
+ m->valid = 0;

return ret;
}
@@ -152,7 +152,7 @@ static void raise_mce(struct mce *m)
cpu_clear(get_cpu(), mce_inject_cpumask);
for_each_online_cpu(cpu) {
struct mce *mcpu = &per_cpu(injectm, cpu);
- if (!mcpu->finished || MCE_INJ_CTX(mcpu->inject_flags)
+ if (!mcpu->valid || MCE_INJ_CTX(mcpu->inject_flags)
!= MCE_INJ_CTX_RANDOM)
cpu_clear(cpu, mce_inject_cpumask);
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 4b2af86..ac4f478 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -129,7 +129,7 @@ void mce_log(struct mce *mce)
{
unsigned next, entry;

- mce->finished = 0;
+ mce->valid = 0;
wmb();
for (;;) {
entry = rcu_dereference(mcelog.next);
@@ -145,7 +145,7 @@ void mce_log(struct mce *mce)
return;
}
/* Old left over entry. Skip: */
- if (mcelog.entry[entry].finished) {
+ if (mcelog.entry[entry].valid) {
entry++;
continue;
}
@@ -158,10 +158,10 @@ void mce_log(struct mce *mce)
}
memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
wmb();
- mcelog.entry[entry].finished = 1;
+ mcelog.entry[entry].valid = 1;
wmb();

- mce->finished = 1;
+ mce->valid = 1;
set_bit(0, &mce_need_notify);
}

@@ -306,7 +306,7 @@ static u64 mce_rdmsrl(u32 msr)
{
u64 v;

- if (__get_cpu_var(injectm).finished) {
+ if (__get_cpu_var(injectm).valid) {
int offset = msr_to_offset(msr);

if (offset < 0)
@@ -329,7 +329,7 @@ static u64 mce_rdmsrl(u32 msr)

static void mce_wrmsrl(u32 msr, u64 v)
{
- if (__get_cpu_var(injectm).finished) {
+ if (__get_cpu_var(injectm).valid) {
int offset = msr_to_offset(msr);

if (offset >= 0)
@@ -1488,7 +1488,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
for (i = prev; i < next; i++) {
unsigned long start = jiffies;

- while (!mcelog.entry[i].finished) {
+ while (!mcelog.entry[i].valid) {
if (time_after_eq(jiffies, start + 2)) {
memset(mcelog.entry + i, 0,
sizeof(struct mce));
@@ -1519,7 +1519,7 @@ timeout:
on_each_cpu(collect_tscs, cpu_tsc, 1);

for (i = next; i < MCE_LOG_LEN; i++) {
- if (mcelog.entry[i].finished &&
+ if (mcelog.entry[i].valid &&
mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
err |= copy_to_user(buf, mcelog.entry+i,
sizeof(struct mce));
--
1.6.4.3

2009-09-28 06:53:30

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 3/5] mce-inject: make injected mce valid only during faked handler call

In the current implementation, injected MCE is valid from the point
the MCE is injected to the point the MCE is processed by the faked
handler call.

This has an undesired side-effect: it is possible for it to be
consumed by real machine_check_poll. This may confuse a real system
error and may confuse the mce test suite.

To fix this, this patch changes mce_rdmsrl/wemsrl() to refer injected
data only when injectm.valid states 3rd state "2", which indicates that
the injected MCE entry is valid and ready for the handler.

The injectm.valid becomes "2" only during faked MCE handler call
and protected by IRQ disabling. This make it impossible for real
machine_check_poll to consume it.

Reported-by: Huang Ying <[email protected]>
Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/include/asm/mce.h | 2 +-
arch/x86/kernel/cpu/mcheck/mce-inject.c | 17 +++++++++++++++--
arch/x86/kernel/cpu/mcheck/mce.c | 4 ++--
3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 995dfd2..fd9c3ca 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -63,7 +63,7 @@ struct mce {
__u8 cs; /* code segment */
__u8 bank; /* machine check bank */
__u8 cpu; /* cpu number; obsolete; use extcpu now */
- __u8 valid; /* entry is valid */
+ __u8 valid; /* 1: entry is valid, 2: valid as fake (injectm) */
__u32 extcpu; /* linux cpu number that detected the error */
__u32 socketid; /* CPU socket ID */
__u32 apicid; /* CPU initial apic ID */
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 5bac818..702f712 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -50,10 +50,12 @@ static void raise_poll(struct mce *m)
mce_banks_t b;

memset(&b, 0xff, sizeof(mce_banks_t));
+
local_irq_save(flags);
+ m->valid = 2;
machine_check_poll(0, &b);
- local_irq_restore(flags);
m->valid = 0;
+ local_irq_restore(flags);
}

static void raise_exception(struct mce *m, struct pt_regs *pregs)
@@ -67,11 +69,13 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
regs.cs = m->cs;
pregs = &regs;
}
+
/* in mcheck exeception handler, irq will be disabled */
local_irq_save(flags);
+ m->valid = 2;
do_machine_check(pregs, 0);
- local_irq_restore(flags);
m->valid = 0;
+ local_irq_restore(flags);
}

static cpumask_t mce_inject_cpumask;
@@ -82,13 +86,19 @@ static int mce_raise_notify(struct notifier_block *self,
struct die_args *args = (struct die_args *)data;
int cpu = smp_processor_id();
struct mce *m = &__get_cpu_var(injectm);
+
if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask))
return NOTIFY_DONE;
cpu_clear(cpu, mce_inject_cpumask);
+
+ if (!m->valid)
+ return NOTIFY_STOP;
+
if (m->inject_flags & MCE_INJ_EXCEPTION)
raise_exception(m, args->regs);
else if (m->status)
raise_poll(m);
+
return NOTIFY_STOP;
}

@@ -105,6 +115,9 @@ static int raise_local(void)
int ret = 0;
int cpu = m->extcpu;

+ if (!m->valid)
+ return 0;
+
if (m->inject_flags & MCE_INJ_EXCEPTION) {
printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
switch (context) {
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ac4f478..aeab37a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -306,7 +306,7 @@ static u64 mce_rdmsrl(u32 msr)
{
u64 v;

- if (__get_cpu_var(injectm).valid) {
+ if (__get_cpu_var(injectm).valid > 1) {
int offset = msr_to_offset(msr);

if (offset < 0)
@@ -329,7 +329,7 @@ static u64 mce_rdmsrl(u32 msr)

static void mce_wrmsrl(u32 msr, u64 v)
{
- if (__get_cpu_var(injectm).valid) {
+ if (__get_cpu_var(injectm).valid > 1) {
int offset = msr_to_offset(msr);

if (offset >= 0)
--
1.6.4.3

2009-09-28 06:54:10

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 4/5] mce-inject: no wait on write with MCE_INJ_CTX_RANDOM

Skip schedule_timeout() on write with MCE_INJ_CTX_RANDOM.
And call raise_mce() only when it is required.

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce-inject.c | 23 +++++++++++------------
1 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 702f712..7691c52 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -149,12 +149,11 @@ static int raise_local(void)

static void raise_mce(struct mce *m)
{
- int context = MCE_INJ_CTX(m->inject_flags);
-
- inject_mce(m);
-
- if (context == MCE_INJ_CTX_RANDOM)
- return;
+ /*
+ * Need to give user space some time to set everything up,
+ * so do it a jiffie or two later everywhere.
+ */
+ schedule_timeout(2);

#ifdef CONFIG_X86_LOCAL_APIC
if (m->inject_flags & MCE_INJ_NMI_BROADCAST) {
@@ -212,12 +211,12 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
return -EINVAL;

- /*
- * Need to give user space some time to set everything up,
- * so do it a jiffie or two later everywhere.
- */
- schedule_timeout(2);
- raise_mce(&m);
+ /* Copy to percpu */
+ inject_mce(&m);
+
+ if (MCE_INJ_CTX(m.inject_flags) != MCE_INJ_CTX_RANDOM)
+ raise_mce(&m);
+
return usize;
}

--
1.6.4.3

2009-09-28 06:54:50

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 5/5] mce-inject: allow injecting status=0 to poll handler

We can inject void data to exception handler by writing data with
status=0 and flag=MCE_INJ_EXCEPTION. But without this patch we cannot
do same to poll handler by status=0 and flag=!MCE_INJ_EXCEPTION.

This change will allow us to test behavior of poll handler with no
data.

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce-inject.c | 7 +++----
1 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 7691c52..2eeb034 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -96,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self,

if (m->inject_flags & MCE_INJ_EXCEPTION)
raise_exception(m, args->regs);
- else if (m->status)
+ else
raise_poll(m);

return NOTIFY_STOP;
@@ -136,13 +136,12 @@ static int raise_local(void)
ret = -EINVAL;
}
printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
- } else if (m->status) {
+ } else {
printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
raise_poll(m);
mce_notify_irq();
printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
- } else
- m->valid = 0;
+ }

return ret;
}
--
1.6.4.3

2009-09-28 07:09:13

by Huang, Ying

[permalink] [raw]
Subject: Re: [BUGFIX -v2] x86, mce, inject: Make injected mce valid only during faked handler call

On Mon, 2009-09-28 at 14:40 +0800, Hidetoshi Seto wrote:
> Hi Huang,
>
> Huang Ying wrote:
> > In the current implementation, injected MCE is valid from the point
> > the MCE is injected to the point the MCE is processed by the faked
> > handler call.
> >
> > This has an undesired side-effect: it is possible for it to be
> > consumed by real machine_check_poll. This may confuse a real system
> > error and may confuse the mce test suite.
> >
> > To fix this, this patch introduces another flag MCJ_VALID to indicate
> ^^^^^^^^^
> MCJ_LOADED?

Sorry, will fix this.

> > that the MCE entry is valid for injector but not for the
> > handler. Another flag, mce.finished is used to indicate the MCE entry
> > is valid for the handler.
> >
> > mce.finished is enabled only during faked MCE handler call and
> > protected by IRQ disabling. This make it impossible for real
> > machine_check_poll to consume it.
>
> Are there the reverse case - is it possible that the faked handler
> call might consume real error which is not handled yet by the real
> machine_check_poll?

Yes. It's possible at least in theory. But whole mce-inject.c is used
for testing only. The faked handler call will not occur on real system.

> > Signed-off-by: Huang Ying <[email protected]>
> >
> > v2:
> > - Revise commit changelog (Thanks Ingo)
> > - Change naming (XX_BIT for bit definition)
> >
> > ---
> > arch/x86/include/asm/mce.h | 17 +++++++++++------
> > arch/x86/kernel/cpu/mcheck/mce-inject.c | 23 ++++++++++++++++-------
> > 2 files changed, 27 insertions(+), 13 deletions(-)
> >
> > --- a/arch/x86/include/asm/mce.h
> > +++ b/arch/x86/include/asm/mce.h
> > @@ -38,13 +38,18 @@
> > #define MCM_ADDR_MEM 3 /* memory address */
> > #define MCM_ADDR_GENERIC 7 /* generic */
> >
> > -#define MCJ_CTX_MASK 3
> > +#define MCJ_NMI_BROADCAST_BIT 2 /* do NMI broadcasting */
> > +#define MCJ_EXCEPTION_BIT 3 /* raise as exception */
> > +#define MCJ_LOADED_BIT 4 /* entry is valid for injector */
> > +
> > +#define MCJ_CTX_MASK 0x03
> > #define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
> > -#define MCJ_CTX_RANDOM 0 /* inject context: random */
> > -#define MCJ_CTX_PROCESS 1 /* inject context: process */
> > -#define MCJ_CTX_IRQ 2 /* inject context: IRQ */
> > -#define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */
> > -#define MCJ_EXCEPTION 8 /* raise as exception */
> > +#define MCJ_CTX_RANDOM 0x00 /* inject context: random */
> > +#define MCJ_CTX_PROCESS 0x01 /* inject context: process */
> > +#define MCJ_CTX_IRQ 0x02 /* inject context: IRQ */
> > +#define MCJ_NMI_BROADCAST (1 << MCJ_NMI_BROADCAST_BIT)
> > +#define MCJ_EXCEPTION (1 << MCJ_EXCEPTION_BIT)
> > +#define MCJ_LOADED (1 << MCJ_LOADED_BIT)
>
> I'd like to see a patch to replace MCJ_* to MCE_INJ_* before
> adding new flag.

MCX_ prefix is the naming convention used all over the mce.h, such as
MCG_, MCI_, MCM_, if we want to change MCJ_ into MCE_INJ_, we should
consider changing all these into similar style to keep consistent.

> > /* Fields are zero when not available */
> > struct mce {
> > --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
> > +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
> > @@ -32,16 +32,16 @@ static void inject_mce(struct mce *m)
> >
> > /* Make sure noone reads partially written injectm */
> > i->finished = 0;
> > + clear_bit(MCJ_LOADED_BIT, (unsigned long *)&i->inject_flags);
> > mb();
> > m->finished = 0;
> > - /* First set the fields after finished */
> > + clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
> > i->extcpu = m->extcpu;
> > mb();
> > - /* Now write record in order, finished last (except above) */
> > memcpy(i, m, sizeof(struct mce));
> > /* Finally activate it */
> > mb();
> > - i->finished = 1;
> > + set_bit(MCJ_LOADED_BIT, (unsigned long *)&i->inject_flags);
>
> Why
> clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
> set_bit(MCJ_LOADED_BIT, (unsigned long *)&i->inject_flags);
> cannot be
> m->inject_flags &= ~MCJ_LOADED;
> m->inject_flags |= MCJ_LOADED;
> ?

Because they may be write on one CPU and read on another CPU, atomic
operation is safer for this.

> If it can, defined *_BIT will not be necessary here.
>
> > }
> >
> > static void raise_poll(struct mce *m)
> > @@ -51,9 +51,11 @@ static void raise_poll(struct mce *m)
> >
> > memset(&b, 0xff, sizeof(mce_banks_t));
> > local_irq_save(flags);
> > + m->finished = 1;
> > machine_check_poll(0, &b);
> > - local_irq_restore(flags);
> > m->finished = 0;
> > + clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
> > + local_irq_restore(flags);
> > }
>
> I think the "finished" is not good name. (I suppose it is named
> after "loading data to structure have been finished" or so.)

No. Its name is not invented for injecting. It stands for the MCE record
writing to mce log buffer has finished. That is, it is named according
to normal path, not testing path.

> And also I think "MCJ_LOADED" is not good name, because I could not
> figure out the difference between "loading finished" and "loaded."
>
> OTOH in the course of nature mce_rdmsrl() returns injected data
> anytime if data is loaded, because it is defined to do so.
> 304 /* MSR access wrappers used for error injection */
> 305 static u64 mce_rdmsrl(u32 msr)
> 306 {
> 307 u64 v;
> 308
> 309 if (__get_cpu_var(injectm).finished) {
>
> I believe what you want to do here is "make mce_rdmsrl()/mce_wrmsrl()
> to refer faked data only during faked handler call."
> Then what we have to do is making a flag to indicate that "now
> in faked handler call," for an example:
>
> 309 if (__get_cpu_var(mce_fake_in_progress)) {
>
> and:
> local_irq_save(flags);
> __get_cpu_var(mce_fake_in_progress) = 1;
> machine_check_poll(0, &b);
> __get_cpu_var(mce_fake_in_progress) = 0;
> local_irq_restore(flags);

I don't think this method is better than the original one. They are just
equivalent.

> > static void raise_exception(struct mce *m, struct pt_regs *pregs)
> > @@ -69,9 +71,11 @@ static void raise_exception(struct mce *
> > }
> > /* in mcheck exeception handler, irq will be disabled */
> > local_irq_save(flags);
> > + m->finished = 1;
> > do_machine_check(pregs, 0);
> > - local_irq_restore(flags);
> > m->finished = 0;
> > + clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
> > + local_irq_restore(flags);
> > }
> >
> > static cpumask_t mce_inject_cpumask;
> > @@ -89,6 +93,8 @@ static int mce_raise_notify(struct notif
> > raise_exception(m, args->regs);
> > else if (m->status)
> > raise_poll(m);
> > + else
> > + clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
> > return NOTIFY_STOP;
> > }
> >
> > @@ -129,7 +135,7 @@ static int raise_local(void)
> > mce_notify_irq();
> > printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
> > } else
> > - m->finished = 0;
> > + clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
> >
> > return ret;
> > }
> > @@ -152,10 +158,13 @@ static void raise_mce(struct mce *m)
> > cpu_clear(get_cpu(), mce_inject_cpumask);
> > for_each_online_cpu(cpu) {
> > struct mce *mcpu = &per_cpu(injectm, cpu);
> > - if (!mcpu->finished ||
> > + if (!test_bit(MCJ_LOADED_BIT,
> > + (unsigned long *)&mcpu->inject_flags) ||
> > MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
> > cpu_clear(cpu, mce_inject_cpumask);
> > }
> > + /* make sure needed data is available on other CPUs */
> > + smp_mb();
>
> What data are you taking care here for?

For mce_inject_cpumask.

Best Regards,
Huang Ying

2009-09-28 07:27:03

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH 3/5] mce-inject: make injected mce valid only during faked handler call

On Mon, 2009-09-28 at 14:52 +0800, Hidetoshi Seto wrote:
> In the current implementation, injected MCE is valid from the point
> the MCE is injected to the point the MCE is processed by the faked
> handler call.
>
> This has an undesired side-effect: it is possible for it to be
> consumed by real machine_check_poll. This may confuse a real system
> error and may confuse the mce test suite.
>
> To fix this, this patch changes mce_rdmsrl/wemsrl() to refer injected
> data only when injectm.valid states 3rd state "2", which indicates that
> the injected MCE entry is valid and ready for the handler.
>
> The injectm.valid becomes "2" only during faked MCE handler call
> and protected by IRQ disabling. This make it impossible for real
> machine_check_poll to consume it.

Anyway, I don't think it is a good idea to use a magic number like "2"
here. A meaningful name is better.

And I think my original method is clearer. Because injection related
flags go inject_flags, not be mixed with normal path flag "finished".

Best Regards,
Huang Ying

2009-09-28 08:03:03

by Hidetoshi Seto

[permalink] [raw]
Subject: Re: [BUGFIX -v2] x86, mce, inject: Make injected mce valid only during faked handler call

Huang Ying wrote:
>>> mce.finished is enabled only during faked MCE handler call and
>>> protected by IRQ disabling. This make it impossible for real
>>> machine_check_poll to consume it.
>> Are there the reverse case - is it possible that the faked handler
>> call might consume real error which is not handled yet by the real
>> machine_check_poll?
>
> Yes. It's possible at least in theory. But whole mce-inject.c is used
> for testing only. The faked handler call will not occur on real system.

Just I concerned that it may confuse the mce test suite.

>>> +#define MCJ_LOADED (1 << MCJ_LOADED_BIT)
>> I'd like to see a patch to replace MCJ_* to MCE_INJ_* before
>> adding new flag.
>
> MCX_ prefix is the naming convention used all over the mce.h, such as
> MCG_, MCI_, MCM_, if we want to change MCJ_ into MCE_INJ_, we should
> consider changing all these into similar style to keep consistent.

That is bad naming convention, isn't it?
I don't mind considering changing all those.

>> Why
>> clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
>> set_bit(MCJ_LOADED_BIT, (unsigned long *)&i->inject_flags);
>> cannot be
>> m->inject_flags &= ~MCJ_LOADED;
>> m->inject_flags |= MCJ_LOADED;
>> ?
>
> Because they may be write on one CPU and read on another CPU, atomic
> operation is safer for this.

I think such read should not happen while write is on flight.
We already have many barriers all around.

>> I think the "finished" is not good name. (I suppose it is named
>> after "loading data to structure have been finished" or so.)
>
> No. Its name is not invented for injecting. It stands for the MCE record
> writing to mce log buffer has finished. That is, it is named according
> to normal path, not testing path.

I know it.
I just point that there is a bad name since early times.

>> I believe what you want to do here is "make mce_rdmsrl()/mce_wrmsrl()
>> to refer faked data only during faked handler call."
>> Then what we have to do is making a flag to indicate that "now
>> in faked handler call," for an example:
>>
>> 309 if (__get_cpu_var(mce_fake_in_progress)) {
>>
>> and:
>> local_irq_save(flags);
>> __get_cpu_var(mce_fake_in_progress) = 1;
>> machine_check_poll(0, &b);
>> __get_cpu_var(mce_fake_in_progress) = 0;
>> local_irq_restore(flags);
>
> I don't think this method is better than the original one. They are just
> equivalent.

No, you changed usage of .finished, and transfer the functionality of the
flag to newly introduced MCJ_LOADED.
We can keep .finished as is, and introduce one new flag for this.

>>> static void raise_exception(struct mce *m, struct pt_regs *pregs)
>>> @@ -69,9 +71,11 @@ static void raise_exception(struct mce *
>>> }
>>> /* in mcheck exeception handler, irq will be disabled */
>>> local_irq_save(flags);
>>> + m->finished = 1;
>>> do_machine_check(pregs, 0);
>>> - local_irq_restore(flags);
>>> m->finished = 0;
>>> + clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
>>> + local_irq_restore(flags);
>>> }
>>>
>>> static cpumask_t mce_inject_cpumask;
>>> @@ -89,6 +93,8 @@ static int mce_raise_notify(struct notif
>>> raise_exception(m, args->regs);
>>> else if (m->status)
>>> raise_poll(m);
>>> + else
>>> + clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
>>> return NOTIFY_STOP;
>>> }
>>>
>>> @@ -129,7 +135,7 @@ static int raise_local(void)
>>> mce_notify_irq();
>>> printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
>>> } else
>>> - m->finished = 0;
>>> + clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
>>>
>>> return ret;
>>> }
>>> @@ -152,10 +158,13 @@ static void raise_mce(struct mce *m)
>>> cpu_clear(get_cpu(), mce_inject_cpumask);
>>> for_each_online_cpu(cpu) {
>>> struct mce *mcpu = &per_cpu(injectm, cpu);
>>> - if (!mcpu->finished ||
>>> + if (!test_bit(MCJ_LOADED_BIT,
>>> + (unsigned long *)&mcpu->inject_flags) ||
>>> MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
>>> cpu_clear(cpu, mce_inject_cpumask);
>>> }
>>> + /* make sure needed data is available on other CPUs */
>>> + smp_mb();
>> What data are you taking care here for?
>
> For mce_inject_cpumask.

OK, it seems fair enough.
I'd like to see this change in a separate patch with proper description.


Thanks,
H.Seto

2009-09-28 08:27:36

by Huang, Ying

[permalink] [raw]
Subject: Re: [BUGFIX -v2] x86, mce, inject: Make injected mce valid only during faked handler call

On Mon, 2009-09-28 at 16:02 +0800, Hidetoshi Seto wrote:
> Huang Ying wrote:
> >>> mce.finished is enabled only during faked MCE handler call and
> >>> protected by IRQ disabling. This make it impossible for real
> >>> machine_check_poll to consume it.
> >> Are there the reverse case - is it possible that the faked handler
> >> call might consume real error which is not handled yet by the real
> >> machine_check_poll?
> >
> > Yes. It's possible at least in theory. But whole mce-inject.c is used
> > for testing only. The faked handler call will not occur on real system.
>
> Just I concerned that it may confuse the mce test suite.

I don't think that is a big issue. Real MCE is very rare for a normal
machine.

> >>> +#define MCJ_LOADED (1 << MCJ_LOADED_BIT)
> >> I'd like to see a patch to replace MCJ_* to MCE_INJ_* before
> >> adding new flag.
> >
> > MCX_ prefix is the naming convention used all over the mce.h, such as
> > MCG_, MCI_, MCM_, if we want to change MCJ_ into MCE_INJ_, we should
> > consider changing all these into similar style to keep consistent.
>
> That is bad naming convention, isn't it?
> I don't mind considering changing all those.

MCG_ and MCI_ (MCi) comes from "Intel Software developer's manual Vol
3A", I think keep consistent is more important.

> >> Why
> >> clear_bit(MCJ_LOADED_BIT, (unsigned long *)&m->inject_flags);
> >> set_bit(MCJ_LOADED_BIT, (unsigned long *)&i->inject_flags);
> >> cannot be
> >> m->inject_flags &= ~MCJ_LOADED;
> >> m->inject_flags |= MCJ_LOADED;
> >> ?
> >
> > Because they may be write on one CPU and read on another CPU, atomic
> > operation is safer for this.
>
> I think such read should not happen while write is on flight.
> We already have many barriers all around.
>
> >> I think the "finished" is not good name. (I suppose it is named
> >> after "loading data to structure have been finished" or so.)
> >
> > No. Its name is not invented for injecting. It stands for the MCE record
> > writing to mce log buffer has finished. That is, it is named according
> > to normal path, not testing path.
>
> I know it.
> I just point that there is a bad name since early times.

It is not a bad time when it is used for mce_log and mce_read. Only
finished mce can be read out.

> >> I believe what you want to do here is "make mce_rdmsrl()/mce_wrmsrl()
> >> to refer faked data only during faked handler call."
> >> Then what we have to do is making a flag to indicate that "now
> >> in faked handler call," for an example:
> >>
> >> 309 if (__get_cpu_var(mce_fake_in_progress)) {
> >>
> >> and:
> >> local_irq_save(flags);
> >> __get_cpu_var(mce_fake_in_progress) = 1;
> >> machine_check_poll(0, &b);
> >> __get_cpu_var(mce_fake_in_progress) = 0;
> >> local_irq_restore(flags);
> >
> > I don't think this method is better than the original one. They are just
> > equivalent.
>
> No, you changed usage of .finished, and transfer the functionality of the
> flag to newly introduced MCJ_LOADED.
> We can keep .finished as is, and introduce one new flag for this.

You just use .finished as MCJ_LOADED and mce_fake_in_progress
as .finished.

Use a per-CPU variable mce_fake_in_progress make it hard to add support
to inject multiple MCEs in one CPU.

Best Regards,
Huang Ying

2009-09-28 08:59:51

by Hidetoshi Seto

[permalink] [raw]
Subject: Re: [BUGFIX -v2] x86, mce, inject: Make injected mce valid only during faked handler call

Huang Ying wrote:
>>>> Are there the reverse case - is it possible that the faked handler
>>>> call might consume real error which is not handled yet by the real
>>>> machine_check_poll?
>>> Yes. It's possible at least in theory. But whole mce-inject.c is used
>>> for testing only. The faked handler call will not occur on real system.
>> Just I concerned that it may confuse the mce test suite.
>
> I don't think that is a big issue. Real MCE is very rare for a normal
> machine.

It's true.
However it is better not to touch the real data during the test, to
minimize the confusion.

>>> MCX_ prefix is the naming convention used all over the mce.h, such as
>>> MCG_, MCI_, MCM_, if we want to change MCJ_ into MCE_INJ_, we should
>>> consider changing all these into similar style to keep consistent.
>> That is bad naming convention, isn't it?
>> I don't mind considering changing all those.
>
> MCG_ and MCI_ (MCi) comes from "Intel Software developer's manual Vol
> 3A", I think keep consistent is more important.

Keeping consistent for defined in spec is good, but I don't think using
same convention between defined and undefined is required.
So now I think MCM_ should be changed, while MCG_ and MCI_ should not.

>>>> I think the "finished" is not good name. (I suppose it is named
>>>> after "loading data to structure have been finished" or so.)
>>> No. Its name is not invented for injecting. It stands for the MCE record
>>> writing to mce log buffer has finished. That is, it is named according
>>> to normal path, not testing path.
>> I know it.
>> I just point that there is a bad name since early times.
>
> It is not a bad time when it is used for mce_log and mce_read. Only
> finished mce can be read out.

It would be good time to rename/restructure all when your "ring buffer"
patch is applied.

>>>> I believe what you want to do here is "make mce_rdmsrl()/mce_wrmsrl()
>>>> to refer faked data only during faked handler call."
>>>> Then what we have to do is making a flag to indicate that "now
>>>> in faked handler call," for an example:
>>>>
>>>> 309 if (__get_cpu_var(mce_fake_in_progress)) {
>>>>
>>>> and:
>>>> local_irq_save(flags);
>>>> __get_cpu_var(mce_fake_in_progress) = 1;
>>>> machine_check_poll(0, &b);
>>>> __get_cpu_var(mce_fake_in_progress) = 0;
>>>> local_irq_restore(flags);
>>> I don't think this method is better than the original one. They are just
>>> equivalent.
>> No, you changed usage of .finished, and transfer the functionality of the
>> flag to newly introduced MCJ_LOADED.
>> We can keep .finished as is, and introduce one new flag for this.
>
> You just use .finished as MCJ_LOADED and mce_fake_in_progress
> as .finished.

I just recommends you to keep .finished as a flag indicates "loading finished."

> Use a per-CPU variable mce_fake_in_progress make it hard to add support
> to inject multiple MCEs in one CPU.

Why? I think it can with such per-cpu flag.


Thanks,
H.Seto

2009-09-28 09:15:15

by Huang, Ying

[permalink] [raw]
Subject: Re: [BUGFIX -v2] x86, mce, inject: Make injected mce valid only during faked handler call

On Mon, 2009-09-28 at 16:59 +0800, Hidetoshi Seto wrote:
[snip]
> >>>> I believe what you want to do here is "make mce_rdmsrl()/mce_wrmsrl()
> >>>> to refer faked data only during faked handler call."
> >>>> Then what we have to do is making a flag to indicate that "now
> >>>> in faked handler call," for an example:
> >>>>
> >>>> 309 if (__get_cpu_var(mce_fake_in_progress)) {
> >>>>
> >>>> and:
> >>>> local_irq_save(flags);
> >>>> __get_cpu_var(mce_fake_in_progress) = 1;
> >>>> machine_check_poll(0, &b);
> >>>> __get_cpu_var(mce_fake_in_progress) = 0;
> >>>> local_irq_restore(flags);
> >>> I don't think this method is better than the original one. They are just
> >>> equivalent.
> >> No, you changed usage of .finished, and transfer the functionality of the
> >> flag to newly introduced MCJ_LOADED.
> >> We can keep .finished as is, and introduce one new flag for this.
> >
> > You just use .finished as MCJ_LOADED and mce_fake_in_progress
> > as .finished.
>
> I just recommends you to keep .finished as a flag indicates "loading finished."

.finished can be "injecting finished, ready to be consumed". So I don't
think there is much difference.

> > Use a per-CPU variable mce_fake_in_progress make it hard to add support
> > to inject multiple MCEs in one CPU.
>
> Why? I think it can with such per-cpu flag.

Oh. It works too.

Best Regards,
Huang Ying

2009-09-28 18:24:18

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 2/5] x86, mce: rename finished to valid in struct mce

Hidetoshi Seto wrote:
> The straightforward name is easier to understand.

struct mce is exported to user space. I don't think it's a good idea
to rename fields in exported structures like this. That just causes confusion.

-Andi

2009-09-28 18:50:40

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 3/5] mce-inject: make injected mce valid only during faked handler call

Hidetoshi Seto wrote:

> This has an undesired side-effect: it is possible for it to be
> consumed by real machine_check_poll. This may confuse a real system
> error and may confuse the mce test suite.

You can just disable the poll handler in the test suite. That's
what we did.

> To fix this, this patch changes mce_rdmsrl/wemsrl() to refer injected
> data only when injectm.valid states 3rd state "2", which indicates that
> the injected MCE entry is valid and ready for the handler.
>
> The injectm.valid becomes "2" only during faked MCE handler call
> and protected by IRQ disabling. This make it impossible for real
> machine_check_poll to consume it.

Using magic numbers for this seems very hackish.

-Andi

2009-09-29 08:41:23

by Hidetoshi Seto

[permalink] [raw]
Subject: Re: [PATCH 2/5] x86, mce: rename finished to valid in struct mce

Andi Kleen wrote:
> struct mce is exported to user space. I don't think it's a good idea
> to rename fields in exported structures like this. That just causes confusion.

I don't know any other real user of this struct than the mcelog and the
mce test suite. In other words, I expected that you (Huang and Andi) are
only users who may be confused by this kind of change.

I believe this exported struct is not well defined.
At least I could not figure out why we need to pass the value of .finished
from/to user space.

However, this change is not urgent thing. I'll put aside this for now.


Thanks,
H.Seto

2009-09-29 08:43:25

by Hidetoshi Seto

[permalink] [raw]
Subject: Re: [PATCH 3/5] mce-inject: make injected mce valid only during faked handler call

Andi Kleen wrote:
>> This has an undesired side-effect: it is possible for it to be
>> consumed by real machine_check_poll. This may confuse a real system
>> error and may confuse the mce test suite.
>
> You can just disable the poll handler in the test suite. That's
> what we did.

I'd like to fix this simply, rather than providing documentation like
"you should turn off polling before you use the mce-inject."

>> The injectm.valid becomes "2" only during faked MCE handler call
>> and protected by IRQ disabling. This make it impossible for real
>> machine_check_poll to consume it.
>
> Using magic numbers for this seems very hackish.

Right. I'll take other approach in next time.


Thanks,
H.Seto

2009-09-29 20:45:26

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 3/5] mce-inject: make injected mce valid only during faked handler call

Hidetoshi Seto wrote:
> Andi Kleen wrote:
>>> This has an undesired side-effect: it is possible for it to be
>>> consumed by real machine_check_poll. This may confuse a real system
>>> error and may confuse the mce test suite.
>> You can just disable the poll handler in the test suite. That's
>> what we did.
>
> I'd like to fix this simply, rather than providing documentation like
> "you should turn off polling before you use the mce-inject."

We already fixed the documentation to include that. mce-inject is not expected
to be used by unskilled users, so relying in documentation is ok.

-Andi

2009-09-29 23:02:48

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 2/5] x86, mce: rename finished to valid in struct mce



> At least I could not figure out why we need to pass the value of .finished
> from/to user space.

You shouldn't afaik. The injector overwrites it in the kernel anyways.

-Andi

2009-09-29 23:04:08

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 2/5] x86, mce: rename finished to valid in struct mce II

Hidetoshi Seto wrote:
> Andi Kleen wrote:
>> struct mce is exported to user space. I don't think it's a good idea
>> to rename fields in exported structures like this. That just causes confusion.
>
> I don't know any other real user of this struct than the mcelog and the
> mce test suite. In other words, I expected that you (Huang and Andi) are
> only users who may be confused by this kind of change.

Also I should add there are other consumers of /dev/mcelog, not just
mcelog/mce-inject.

-Andi

2009-09-29 23:20:57

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 2/5] x86, mce: rename finished to valid in struct mce II

On 09/29/2009 04:04 PM, Andi Kleen wrote:
> Hidetoshi Seto wrote:
>> Andi Kleen wrote:
>>> struct mce is exported to user space. I don't think it's a good idea
>>> to rename fields in exported structures like this. That just causes confusion.
>>
>> I don't know any other real user of this struct than the mcelog and the
>> mce test suite. In other words, I expected that you (Huang and Andi) are
>> only users who may be confused by this kind of change.
>
> Also I should add there are other consumers of /dev/mcelog, not just
> mcelog/mce-inject.
>

Details?

-hpa

2009-09-30 04:39:49

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 2/5] x86, mce: rename finished to valid in struct mce II

H. Peter Anvin wrote:
> On 09/29/2009 04:04 PM, Andi Kleen wrote:
>> Hidetoshi Seto wrote:
>>> Andi Kleen wrote:
>>>> struct mce is exported to user space. I don't think it's a good idea
>>>> to rename fields in exported structures like this. That just causes confusion.
>>> I don't know any other real user of this struct than the mcelog and the
>>> mce test suite. In other words, I expected that you (Huang and Andi) are
>>> only users who may be confused by this kind of change.
>> Also I should add there are other consumers of /dev/mcelog, not just
>> mcelog/mce-inject.
>>
>
> Details?

e.g. mced

-Andi

2009-10-01 11:08:56

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/5] x86, mce: rename finished to valid in struct mce II


* Andi Kleen <[email protected]> wrote:

> H. Peter Anvin wrote:
>> On 09/29/2009 04:04 PM, Andi Kleen wrote:
>>> Hidetoshi Seto wrote:
>>>> Andi Kleen wrote:
>>>>> struct mce is exported to user space. I don't think it's a good idea
>>>>> to rename fields in exported structures like this. That just causes confusion.
>>>> I don't know any other real user of this struct than the mcelog and the
>>>> mce test suite. In other words, I expected that you (Huang and Andi) are
>>>> only users who may be confused by this kind of change.
>>> Also I should add there are other consumers of /dev/mcelog, not just
>>> mcelog/mce-inject.
>>>
>>
>> Details?
>
> e.g. mced

Am i the only one who finds Andi's reply and discussion methods in such
matters utterly unproductive and intentionally obstructive?

Firstly, the initial sentence of:

" Also I should add there are other consumers of /dev/mcelog, not just
mcelog/mce-inject. "

Was IMO designed to be as unhelpful as possible, while still making his
point. It would have been _so_ easy for Andi to be a bit more helpful to
add this little bit of information to the sentence:

"..., such as 'mced'."

A communication style like that sucks on lkml, a lot. It's vasteful
(costs time) and is deceptive as well and holds up the technical
discussion.

The "which are those?" basic question was to be expected, especially
given that mced is a very, very obscure project barely findable via
googling around. It's not packaged up and not available in rpmfind.net's
vast package repo. So Andi cannot credibly claim a "oh, I thought that's
obvious" defense.

Nor is the second (again maximally minimal) reply from Andi helpful:

"e.g. mced"

... which still implies that "there might be others". But it does not
tell us what _Andi_ thinks about that. A fair, honest, helpful reply
would have been to write the truth straight away, via something like:

" Also I should add there are other consumers of /dev/mcelog, not just
mcelog/mce-inject. There's a tiny, still-obscure project called
'mced' - and there might be others although i dont know of any.
Granted, for our purposes, mcelog/mce-inject are the main ones to
care about. "

And look how such an honest and helpful reply would convey the right
kind of technical message, instead of this multiple (and avoidable)
ping-pong trying to spoon-feed information which still conveys a
deceptive technical message? And that's all just because Andi apparently
disagrees with the direction of the discussion?

This lack of basic honestly and helpfulnesss is one of the reasons why i
have to ignore most of Andi's mails btw. Such exchanges show the attempt
of trying to treat information as a weapon - giving it out
conservatively while taking it in liberally and hoarding it. That's very
much against basic FOSS principles. It is also very hard for me to trust
a person who is willing to hurt (stand in the way of) others with such
an ease of mind.

Ingo

2009-10-05 02:53:14

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 0/6] x86, mce, mce-inject: misc fix v2

Here is v2 patch set for small improvements, inspired by Huang's patch.

* Short summaries v1 -> v2:
dropped:
x86, mce: rename finished to valid in struct mce
added:
x86, mce: replace MCM_ to MCI_MISC_
mce-inject: add a barrier to raise_mce()
modified:
mce-inject: use injected mce only during faked handler call

Based on 2.6.32-rc3.

Thanks,
H.Seto

Hidetoshi Seto (6):
x86, mce: replace MCJ_ to MCE_INJ_
x86, mce: replace MCM_ to MCI_MISC_
mce-inject: no wait on write with MCE_INJ_CTX_RANDOM
mce-inject: allow injecting status=0 to poll handler
mce-inject: add a barrier to raise_mce()
mce-inject: use injected mce only during faked handler call

arch/x86/include/asm/mce.h | 46 +++++++++++++------
arch/x86/kernel/cpu/mcheck/mce-inject.c | 72 +++++++++++++++++++------------
arch/x86/kernel/cpu/mcheck/mce.c | 20 +++++----
3 files changed, 86 insertions(+), 52 deletions(-)

2009-10-05 03:06:21

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 1/6] x86, mce: replace MCJ_ to MCE_INJ_

MCE_INJ_ is better than MCJ_ standing for "MCe inJect."

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/include/asm/mce.h | 15 ++++++++-------
arch/x86/kernel/cpu/mcheck/mce-inject.c | 20 ++++++++++----------
2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index b608a64..a937e90 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -38,13 +38,14 @@
#define MCM_ADDR_MEM 3 /* memory address */
#define MCM_ADDR_GENERIC 7 /* generic */

-#define MCJ_CTX_MASK 3
-#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
-#define MCJ_CTX_RANDOM 0 /* inject context: random */
-#define MCJ_CTX_PROCESS 1 /* inject context: process */
-#define MCJ_CTX_IRQ 2 /* inject context: IRQ */
-#define MCJ_NMI_BROADCAST 4 /* do NMI broadcasting */
-#define MCJ_EXCEPTION 8 /* raise as exception */
+/* inject_flags defines */
+#define MCE_INJ_CTX_MASK 0x03
+#define MCE_INJ_CTX_RANDOM 0x00 /* inject context: random */
+#define MCE_INJ_CTX_PROCESS 0x01 /* inject context: process */
+#define MCE_INJ_CTX_IRQ 0x02 /* inject context: IRQ */
+#define MCE_INJ_CTX(flags) ((flags) & MCE_INJ_CTX_MASK)
+#define MCE_INJ_NMI_BROADCAST (1 << 2) /* do NMI broadcasting */
+#define MCE_INJ_EXCEPTION (1 << 3) /* raise as exception */

/* Fields are zero when not available */
struct mce {
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 472763d..2c1fc5a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -85,7 +85,7 @@ static int mce_raise_notify(struct notifier_block *self,
if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask))
return NOTIFY_DONE;
cpu_clear(cpu, mce_inject_cpumask);
- if (m->inject_flags & MCJ_EXCEPTION)
+ if (m->inject_flags & MCE_INJ_EXCEPTION)
raise_exception(m, args->regs);
else if (m->status)
raise_poll(m);
@@ -101,21 +101,21 @@ static struct notifier_block mce_raise_nb = {
static int raise_local(void)
{
struct mce *m = &__get_cpu_var(injectm);
- int context = MCJ_CTX(m->inject_flags);
+ int context = MCE_INJ_CTX(m->inject_flags);
int ret = 0;
int cpu = m->extcpu;

- if (m->inject_flags & MCJ_EXCEPTION) {
+ if (m->inject_flags & MCE_INJ_EXCEPTION) {
printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
switch (context) {
- case MCJ_CTX_IRQ:
+ case MCE_INJ_CTX_IRQ:
/*
* Could do more to fake interrupts like
* calling irq_enter, but the necessary
* machinery isn't exported currently.
*/
/*FALL THROUGH*/
- case MCJ_CTX_PROCESS:
+ case MCE_INJ_CTX_PROCESS:
raise_exception(m, NULL);
break;
default:
@@ -136,15 +136,15 @@ static int raise_local(void)

static void raise_mce(struct mce *m)
{
- int context = MCJ_CTX(m->inject_flags);
+ int context = MCE_INJ_CTX(m->inject_flags);

inject_mce(m);

- if (context == MCJ_CTX_RANDOM)
+ if (context == MCE_INJ_CTX_RANDOM)
return;

#ifdef CONFIG_X86_LOCAL_APIC
- if (m->inject_flags & MCJ_NMI_BROADCAST) {
+ if (m->inject_flags & MCE_INJ_NMI_BROADCAST) {
unsigned long start;
int cpu;
get_online_cpus();
@@ -152,8 +152,8 @@ static void raise_mce(struct mce *m)
cpu_clear(get_cpu(), mce_inject_cpumask);
for_each_online_cpu(cpu) {
struct mce *mcpu = &per_cpu(injectm, cpu);
- if (!mcpu->finished ||
- MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
+ if (!mcpu->finished || MCE_INJ_CTX(mcpu->inject_flags)
+ != MCE_INJ_CTX_RANDOM)
cpu_clear(cpu, mce_inject_cpumask);
}
if (!cpus_empty(mce_inject_cpumask))
--
1.6.4.3

2009-10-05 03:07:35

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 2/6] x86, mce: replace MCM_ to MCI_MISC_

Plus define MCI_MISC_ADDR_LSB() and MCI_MISC_ADDR_MODE().

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/include/asm/mce.h | 17 +++++++++++------
arch/x86/kernel/cpu/mcheck/mce.c | 4 ++--
2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index a937e90..d051abd 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -8,6 +8,7 @@
* Machine Check support for x86
*/

+/* MCG_CAP register defines */
#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
@@ -17,10 +18,12 @@
#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */

+/* MCG_STATUS register defines */
#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */

+/* MCi_STATUS register defines */
#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
@@ -31,12 +34,14 @@
#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
#define MCI_STATUS_AR (1ULL<<55) /* Action required */

-/* MISC register defines */
-#define MCM_ADDR_SEGOFF 0 /* segment offset */
-#define MCM_ADDR_LINEAR 1 /* linear address */
-#define MCM_ADDR_PHYS 2 /* physical address */
-#define MCM_ADDR_MEM 3 /* memory address */
-#define MCM_ADDR_GENERIC 7 /* generic */
+/* MCi_MISC register defines */
+#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
+#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7)
+#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */
+#define MCI_MISC_ADDR_LINEAR 1 /* linear address */
+#define MCI_MISC_ADDR_PHYS 2 /* physical address */
+#define MCI_MISC_ADDR_MEM 3 /* memory address */
+#define MCI_MISC_ADDR_GENERIC 7 /* generic */

/* inject_flags defines */
#define MCE_INJ_CTX_MASK 0x03
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 183c345..413aba8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -843,9 +843,9 @@ static int mce_usable_address(struct mce *m)
{
if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
return 0;
- if ((m->misc & 0x3f) > PAGE_SHIFT)
+ if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
return 0;
- if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS)
+ if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
return 0;
return 1;
}
--
1.6.4.3

2009-10-05 03:08:15

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 3/6] mce-inject: no wait on write with MCE_INJ_CTX_RANDOM

Skip schedule_timeout() on write with MCE_INJ_CTX_RANDOM.
And call raise_mce() only when it is required.

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce-inject.c | 23 +++++++++++------------
1 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 2c1fc5a..e9379b6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -136,12 +136,11 @@ static int raise_local(void)

static void raise_mce(struct mce *m)
{
- int context = MCE_INJ_CTX(m->inject_flags);
-
- inject_mce(m);
-
- if (context == MCE_INJ_CTX_RANDOM)
- return;
+ /*
+ * Need to give user space some time to set everything up,
+ * so do it a jiffie or two later everywhere.
+ */
+ schedule_timeout(2);

#ifdef CONFIG_X86_LOCAL_APIC
if (m->inject_flags & MCE_INJ_NMI_BROADCAST) {
@@ -199,12 +198,12 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
return -EINVAL;

- /*
- * Need to give user space some time to set everything up,
- * so do it a jiffie or two later everywhere.
- */
- schedule_timeout(2);
- raise_mce(&m);
+ /* Copy to percpu */
+ inject_mce(&m);
+
+ if (MCE_INJ_CTX(m.inject_flags) != MCE_INJ_CTX_RANDOM)
+ raise_mce(&m);
+
return usize;
}

--
1.6.4.3

2009-10-05 03:08:50

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 4/6] mce-inject: allow injecting status=0 to poll handler

We can inject void data to exception handler by writing data with
status=0 and flag=MCE_INJ_EXCEPTION. But without this patch we cannot
do same to poll handler by status=0 and flag=!MCE_INJ_EXCEPTION.

This change will allow us to test behavior of poll handler with no
data.

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce-inject.c | 7 +++----
1 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index e9379b6..105e527 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -87,7 +87,7 @@ static int mce_raise_notify(struct notifier_block *self,
cpu_clear(cpu, mce_inject_cpumask);
if (m->inject_flags & MCE_INJ_EXCEPTION)
raise_exception(m, args->regs);
- else if (m->status)
+ else
raise_poll(m);
return NOTIFY_STOP;
}
@@ -123,13 +123,12 @@ static int raise_local(void)
ret = -EINVAL;
}
printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
- } else if (m->status) {
+ } else {
printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
raise_poll(m);
mce_notify_irq();
printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
- } else
- m->finished = 0;
+ }

return ret;
}
--
1.6.4.3

2009-10-05 03:09:36

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 5/6] mce-inject: add a barrier to raise_mce()

The prepared mce_inject_cpumask needs to be referred soon in NMI
on other CPUs.

Reported-by: Huang Ying <[email protected]>
Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce-inject.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 105e527..4fb5b78 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -154,6 +154,9 @@ static void raise_mce(struct mce *m)
!= MCE_INJ_CTX_RANDOM)
cpu_clear(cpu, mce_inject_cpumask);
}
+ /* make sure mce_inject_cpumask is visible on other CPUs */
+ smp_mb();
+
if (!cpus_empty(mce_inject_cpumask))
apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR);
start = jiffies;
--
1.6.4.3

2009-10-05 03:11:05

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 6/6] mce-inject: use injected mce only during faked handler call

In the current implementation, injected MCE is valid from the point
the MCE is injected to the point the MCE is processed by the faked
handler call.

This has an undesired side-effect: it is possible for it to be
consumed by real machine_check_poll. This may confuse a real system
error and may confuse the mce test suite.

To fix this, this patch introduces struct mce_fake_banks to hold
injected data and a flag which indicates that the injected data is
ready for the handler.

The mce_fake_banks.valid becomes 1 only during faked MCE handler call
and protected by IRQ disabling. This make it impossible for real
machine_check_poll to consume it.

(I suppose that in the near future the mce_fake_banks will be patched
to support injecting multiple errors on a cpu.)

Reported-by: Huang Ying <[email protected]>
Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/include/asm/mce.h | 12 +++++++++++-
arch/x86/kernel/cpu/mcheck/mce-inject.c | 25 ++++++++++++++++++++-----
arch/x86/kernel/cpu/mcheck/mce.c | 16 +++++++++-------
3 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index d051abd..2f1c0ef 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -192,7 +192,6 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
int mce_notify_irq(void);
void mce_notify_process(void);

-DECLARE_PER_CPU(struct mce, injectm);
extern struct file_operations mce_chrdev_ops;

/*
@@ -218,5 +217,16 @@ void intel_init_thermal(struct cpuinfo_x86 *c);

void mce_log_therm_throt_event(__u64 status);

+/*
+ * For error injection
+ */
+
+struct mce_fake_banks {
+ int valid;
+ struct mce injectm;
+};
+
+DECLARE_PER_CPU(struct mce_fake_banks, mce_fake_banks);
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 4fb5b78..a481291 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -28,7 +28,7 @@
/* Update fake mce registers on current CPU. */
static void inject_mce(struct mce *m)
{
- struct mce *i = &per_cpu(injectm, m->extcpu);
+ struct mce *i = &per_cpu(mce_fake_banks, m->extcpu).injectm;

/* Make sure noone reads partially written injectm */
i->finished = 0;
@@ -50,8 +50,11 @@ static void raise_poll(struct mce *m)
mce_banks_t b;

memset(&b, 0xff, sizeof(mce_banks_t));
+
local_irq_save(flags);
+ __get_cpu_var(mce_fake_banks).valid = 1;
machine_check_poll(0, &b);
+ __get_cpu_var(mce_fake_banks).valid = 0;
local_irq_restore(flags);
m->finished = 0;
}
@@ -67,9 +70,12 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
regs.cs = m->cs;
pregs = &regs;
}
+
/* in mcheck exeception handler, irq will be disabled */
local_irq_save(flags);
+ __get_cpu_var(mce_fake_banks).valid = 1;
do_machine_check(pregs, 0);
+ __get_cpu_var(mce_fake_banks).valid = 0;
local_irq_restore(flags);
m->finished = 0;
}
@@ -81,14 +87,20 @@ static int mce_raise_notify(struct notifier_block *self,
{
struct die_args *args = (struct die_args *)data;
int cpu = smp_processor_id();
- struct mce *m = &__get_cpu_var(injectm);
+ struct mce *m = &__get_cpu_var(mce_fake_banks).injectm;
+
if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask))
return NOTIFY_DONE;
cpu_clear(cpu, mce_inject_cpumask);
+
+ if (!m->finished)
+ return NOTIFY_STOP;
+
if (m->inject_flags & MCE_INJ_EXCEPTION)
raise_exception(m, args->regs);
else
raise_poll(m);
+
return NOTIFY_STOP;
}

@@ -100,11 +112,14 @@ static struct notifier_block mce_raise_nb = {
/* Inject mce on current CPU */
static int raise_local(void)
{
- struct mce *m = &__get_cpu_var(injectm);
+ struct mce *m = &__get_cpu_var(mce_fake_banks).injectm;
int context = MCE_INJ_CTX(m->inject_flags);
int ret = 0;
int cpu = m->extcpu;

+ if (!m->finished)
+ return 0;
+
if (m->inject_flags & MCE_INJ_EXCEPTION) {
printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
switch (context) {
@@ -149,8 +164,8 @@ static void raise_mce(struct mce *m)
mce_inject_cpumask = cpu_online_map;
cpu_clear(get_cpu(), mce_inject_cpumask);
for_each_online_cpu(cpu) {
- struct mce *mcpu = &per_cpu(injectm, cpu);
- if (!mcpu->finished || MCE_INJ_CTX(mcpu->inject_flags)
+ struct mce *m = &per_cpu(mce_fake_banks, cpu).injectm;
+ if (!m->finished || MCE_INJ_CTX(m->inject_flags)
!= MCE_INJ_CTX_RANDOM)
cpu_clear(cpu, mce_inject_cpumask);
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 413aba8..a6d5d4a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -110,8 +110,8 @@ void mce_setup(struct mce *m)
rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
}

-DEFINE_PER_CPU(struct mce, injectm);
-EXPORT_PER_CPU_SYMBOL_GPL(injectm);
+DEFINE_PER_CPU(struct mce_fake_banks, mce_fake_banks);
+EXPORT_PER_CPU_SYMBOL_GPL(mce_fake_banks);

/*
* Lockless MCE logging infrastructure.
@@ -284,7 +284,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)

static int msr_to_offset(u32 msr)
{
- unsigned bank = __get_cpu_var(injectm.bank);
+ unsigned bank = __get_cpu_var(mce_fake_banks).injectm.bank;
if (msr == rip_msr)
return offsetof(struct mce, ip);
if (msr == MSR_IA32_MCx_STATUS(bank))
@@ -303,12 +303,13 @@ static u64 mce_rdmsrl(u32 msr)
{
u64 v;

- if (__get_cpu_var(injectm).finished) {
+ if (__get_cpu_var(mce_fake_banks).valid) {
int offset = msr_to_offset(msr);
+ char *m = (char *)&__get_cpu_var(mce_fake_banks).injectm;

if (offset < 0)
return 0;
- return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
+ return *(u64 *)(m + offset);
}

if (rdmsrl_safe(msr, &v)) {
@@ -326,11 +327,12 @@ static u64 mce_rdmsrl(u32 msr)

static void mce_wrmsrl(u32 msr, u64 v)
{
- if (__get_cpu_var(injectm).finished) {
+ if (__get_cpu_var(mce_fake_banks).valid) {
int offset = msr_to_offset(msr);
+ char *m = (char *)&__get_cpu_var(mce_fake_banks).injectm;

if (offset >= 0)
- *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
+ *(u64 *)(m + offset) = v;
return;
}
wrmsrl(msr, v);
--
1.6.4.3

2009-10-09 01:55:39

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH 6/6] mce-inject: use injected mce only during faked handler call

On Mon, 2009-10-05 at 11:10 +0800, Hidetoshi Seto wrote:
> +/*
> + * For error injection
> + */
> +
> +struct mce_fake_banks {
> + int valid;
> + struct mce injectm;
> +};

There are some bits available in mce.inject_flags. I don't think it is
necessary to add another variable just for another flag.

Best Regards,
Huang Ying

2009-10-09 05:39:11

by Hidetoshi Seto

[permalink] [raw]
Subject: Re: [PATCH 6/6] mce-inject: use injected mce only during faked handler call

Huang Ying wrote:
> On Mon, 2009-10-05 at 11:10 +0800, Hidetoshi Seto wrote:
>> +/*
>> + * For error injection
>> + */
>> +
>> +struct mce_fake_banks {
>> + int valid;
>> + struct mce injectm;
>> +};
>
> There are some bits available in mce.inject_flags. I don't think it is
> necessary to add another variable just for another flag.

I disagreed. I think it is necessary.

If we try to support injecting multiple event to a cpu, we should have a flag
like this sooner or later. Or we have to manage injectm[].inject_flags.

And I think the inject_flags should not have bits for kernel internal use.
All bits in inject_flags should be used by tools to tell attributes of data
injected with the flag.

As a sample I made an additional patch set to support injecting multiple event
to a CPU. These change doesn't break current functionality.
Some improve will be required on mce-inject tool to use this feature.

Check my patches and comment on it.


Thanks,
H.Seto

2009-10-09 05:44:50

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 1/4] mce-inject: make raise_global()

Move large block into a function. No code changed.

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce-inject.c | 73 ++++++++++++++++++-------------
1 files changed, 42 insertions(+), 31 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index a481291..835c072 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -148,6 +148,45 @@ static int raise_local(void)
return ret;
}

+#ifdef CONFIG_X86_LOCAL_APIC
+static void raise_global(void)
+{
+ unsigned long start;
+ int cpu;
+
+ get_online_cpus();
+ mce_inject_cpumask = cpu_online_map;
+ cpu_clear(get_cpu(), mce_inject_cpumask);
+
+ for_each_online_cpu(cpu) {
+ struct mce *m = &per_cpu(mce_fake_banks, cpu).injectm;
+ if (!m->finished || MCE_INJ_CTX(m->inject_flags)
+ != MCE_INJ_CTX_RANDOM)
+ cpu_clear(cpu, mce_inject_cpumask);
+ }
+ /* make sure mce_inject_cpumask is visible on other CPUs */
+ smp_mb();
+
+ if (!cpus_empty(mce_inject_cpumask))
+ apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR);
+
+ start = jiffies;
+ while (!cpus_empty(mce_inject_cpumask)) {
+ if (!time_before(jiffies, start + 2*HZ)) {
+ printk(KERN_ERR
+ "Timeout waiting for mce inject NMI %lx\n",
+ *cpus_addr(mce_inject_cpumask));
+ break;
+ }
+ cpu_relax();
+ }
+ raise_local();
+
+ put_cpu();
+ put_online_cpus();
+}
+#endif
+
static void raise_mce(struct mce *m)
{
/*
@@ -157,37 +196,9 @@ static void raise_mce(struct mce *m)
schedule_timeout(2);

#ifdef CONFIG_X86_LOCAL_APIC
- if (m->inject_flags & MCE_INJ_NMI_BROADCAST) {
- unsigned long start;
- int cpu;
- get_online_cpus();
- mce_inject_cpumask = cpu_online_map;
- cpu_clear(get_cpu(), mce_inject_cpumask);
- for_each_online_cpu(cpu) {
- struct mce *m = &per_cpu(mce_fake_banks, cpu).injectm;
- if (!m->finished || MCE_INJ_CTX(m->inject_flags)
- != MCE_INJ_CTX_RANDOM)
- cpu_clear(cpu, mce_inject_cpumask);
- }
- /* make sure mce_inject_cpumask is visible on other CPUs */
- smp_mb();
-
- if (!cpus_empty(mce_inject_cpumask))
- apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR);
- start = jiffies;
- while (!cpus_empty(mce_inject_cpumask)) {
- if (!time_before(jiffies, start + 2*HZ)) {
- printk(KERN_ERR
- "Timeout waiting for mce inject NMI %lx\n",
- *cpus_addr(mce_inject_cpumask));
- break;
- }
- cpu_relax();
- }
- raise_local();
- put_cpu();
- put_online_cpus();
- } else
+ if (m->inject_flags & MCE_INJ_NMI_BROADCAST)
+ raise_global();
+ else
#endif
raise_local();
}
--
1.6.2.2

2009-10-09 05:45:51

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 2/4] mce-inject: use individual members instead of struct mce

struct mce have many members and almost half of them are not used
for mce injection. So make struct mce_fake_banks to have required
members instead of having struct mce in it.

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/include/asm/mce.h | 15 +++++-
arch/x86/kernel/cpu/mcheck/mce-inject.c | 79 ++++++++++++++++--------------
arch/x86/kernel/cpu/mcheck/mce.c | 16 +++---
3 files changed, 63 insertions(+), 47 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 4b5ef3c..0668044 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -235,8 +235,19 @@ void mce_log_therm_throt_event(__u64 status);
*/

struct mce_fake_banks {
- int valid;
- struct mce injectm;
+ int valid:1; /* 1 if injected data is ready for consume */
+ int loaded:1; /* 1 if injected data is available */
+
+ __u64 mcgstatus;
+ __u64 ip;
+ __u8 cs;
+
+ __u8 inject_flags; /* software inject flags */
+
+ __u8 bank;
+ __u64 status;
+ __u64 misc;
+ __u64 addr;
};

DECLARE_PER_CPU(struct mce_fake_banks, mce_fake_banks);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 835c072..6275318 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -28,23 +28,27 @@
/* Update fake mce registers on current CPU. */
static void inject_mce(struct mce *m)
{
- struct mce *i = &per_cpu(mce_fake_banks, m->extcpu).injectm;
+ struct mce_fake_banks *banks = &per_cpu(mce_fake_banks, m->extcpu);
+
+ banks->mcgstatus = m->mcgstatus;
+ banks->ip = m->ip;
+ banks->cs = m->cs;
+ banks->inject_flags = m->inject_flags;
+ banks->bank = m->bank;
+ banks->status = m->status;
+ banks->addr = m->addr;
+ banks->misc = m->misc;

- /* Make sure noone reads partially written injectm */
- i->finished = 0;
- mb();
- m->finished = 0;
- /* First set the fields after finished */
- i->extcpu = m->extcpu;
- mb();
- /* Now write record in order, finished last (except above) */
- memcpy(i, m, sizeof(struct mce));
- /* Finally activate it */
mb();
- i->finished = 1;
+ banks->loaded = 1;
}

-static void raise_poll(struct mce *m)
+static void clean_injected(struct mce_fake_banks *banks)
+{
+ banks->loaded = 0;
+}
+
+static void raise_poll(struct mce_fake_banks *banks)
{
unsigned long flags;
mce_banks_t b;
@@ -52,32 +56,34 @@ static void raise_poll(struct mce *m)
memset(&b, 0xff, sizeof(mce_banks_t));

local_irq_save(flags);
- __get_cpu_var(mce_fake_banks).valid = 1;
+ banks->valid = 1;
machine_check_poll(0, &b);
- __get_cpu_var(mce_fake_banks).valid = 0;
+ banks->valid = 0;
local_irq_restore(flags);
- m->finished = 0;
+
+ clean_injected(banks);
}

-static void raise_exception(struct mce *m, struct pt_regs *pregs)
+static void raise_exception(struct mce_fake_banks *banks, struct pt_regs *pregs)
{
struct pt_regs regs;
unsigned long flags;

if (!pregs) {
memset(&regs, 0, sizeof(struct pt_regs));
- regs.ip = m->ip;
- regs.cs = m->cs;
+ regs.ip = banks->ip;
+ regs.cs = banks->cs;
pregs = &regs;
}

/* in mcheck exeception handler, irq will be disabled */
local_irq_save(flags);
- __get_cpu_var(mce_fake_banks).valid = 1;
+ banks->valid = 1;
do_machine_check(pregs, 0);
- __get_cpu_var(mce_fake_banks).valid = 0;
+ banks->valid = 0;
local_irq_restore(flags);
- m->finished = 0;
+
+ clean_injected(banks);
}

static cpumask_t mce_inject_cpumask;
@@ -87,19 +93,19 @@ static int mce_raise_notify(struct notifier_block *self,
{
struct die_args *args = (struct die_args *)data;
int cpu = smp_processor_id();
- struct mce *m = &__get_cpu_var(mce_fake_banks).injectm;
+ struct mce_fake_banks *banks = &__get_cpu_var(mce_fake_banks);

if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask))
return NOTIFY_DONE;
cpu_clear(cpu, mce_inject_cpumask);

- if (!m->finished)
+ if (!banks->loaded)
return NOTIFY_STOP;

- if (m->inject_flags & MCE_INJ_EXCEPTION)
- raise_exception(m, args->regs);
+ if (banks->inject_flags & MCE_INJ_EXCEPTION)
+ raise_exception(banks, args->regs);
else
- raise_poll(m);
+ raise_poll(banks);

return NOTIFY_STOP;
}
@@ -112,17 +118,16 @@ static struct notifier_block mce_raise_nb = {
/* Inject mce on current CPU */
static int raise_local(void)
{
- struct mce *m = &__get_cpu_var(mce_fake_banks).injectm;
- int context = MCE_INJ_CTX(m->inject_flags);
+ struct mce_fake_banks *banks = &__get_cpu_var(mce_fake_banks);
+ int cpu = smp_processor_id();
int ret = 0;
- int cpu = m->extcpu;

- if (!m->finished)
+ if (!banks->loaded)
return 0;

- if (m->inject_flags & MCE_INJ_EXCEPTION) {
+ if (banks->inject_flags & MCE_INJ_EXCEPTION) {
printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
- switch (context) {
+ switch (MCE_INJ_CTX(banks->inject_flags)) {
case MCE_INJ_CTX_IRQ:
/*
* Could do more to fake interrupts like
@@ -131,7 +136,7 @@ static int raise_local(void)
*/
/*FALL THROUGH*/
case MCE_INJ_CTX_PROCESS:
- raise_exception(m, NULL);
+ raise_exception(banks, NULL);
break;
default:
printk(KERN_INFO "Invalid MCE context\n");
@@ -140,7 +145,7 @@ static int raise_local(void)
printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
} else {
printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
- raise_poll(m);
+ raise_poll(banks);
mce_notify_irq();
printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
}
@@ -159,8 +164,8 @@ static void raise_global(void)
cpu_clear(get_cpu(), mce_inject_cpumask);

for_each_online_cpu(cpu) {
- struct mce *m = &per_cpu(mce_fake_banks, cpu).injectm;
- if (!m->finished || MCE_INJ_CTX(m->inject_flags)
+ struct mce_fake_banks *banks = &per_cpu(mce_fake_banks, cpu);
+ if (!banks->loaded || MCE_INJ_CTX(banks->inject_flags)
!= MCE_INJ_CTX_RANDOM)
cpu_clear(cpu, mce_inject_cpumask);
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index fad3daa..5a6f17d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -303,17 +303,17 @@ static void mce_panic(char *msg, struct mce *final, char *exp)

static int msr_to_offset(u32 msr)
{
- unsigned bank = __get_cpu_var(mce_fake_banks).injectm.bank;
+ unsigned bank = __get_cpu_var(mce_fake_banks).bank;
if (msr == rip_msr)
- return offsetof(struct mce, ip);
+ return offsetof(struct mce_fake_banks, ip);
if (msr == MSR_IA32_MCx_STATUS(bank))
- return offsetof(struct mce, status);
+ return offsetof(struct mce_fake_banks, status);
if (msr == MSR_IA32_MCx_ADDR(bank))
- return offsetof(struct mce, addr);
+ return offsetof(struct mce_fake_banks, addr);
if (msr == MSR_IA32_MCx_MISC(bank))
- return offsetof(struct mce, misc);
+ return offsetof(struct mce_fake_banks, misc);
if (msr == MSR_IA32_MCG_STATUS)
- return offsetof(struct mce, mcgstatus);
+ return offsetof(struct mce_fake_banks, mcgstatus);
return -1;
}

@@ -324,7 +324,7 @@ static u64 mce_rdmsrl(u32 msr)

if (__get_cpu_var(mce_fake_banks).valid) {
int offset = msr_to_offset(msr);
- char *m = (char *)&__get_cpu_var(mce_fake_banks).injectm;
+ char *m = (char *)&__get_cpu_var(mce_fake_banks);

if (offset < 0)
return 0;
@@ -348,7 +348,7 @@ static void mce_wrmsrl(u32 msr, u64 v)
{
if (__get_cpu_var(mce_fake_banks).valid) {
int offset = msr_to_offset(msr);
- char *m = (char *)&__get_cpu_var(mce_fake_banks).injectm;
+ char *m = (char *)&__get_cpu_var(mce_fake_banks);

if (offset >= 0)
*(u64 *)(m + offset) = v;
--
1.6.2.2

2009-10-09 05:46:35

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 3/4] mce-inject: change msr_to_offset() to mce_get_fake_reg()

Introduce mce_get_fake_reg() which returns address of variable for
faked register.

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce.c | 30 +++++++++++++++---------------
1 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5a6f17d..edd2a82 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -301,20 +301,22 @@ static void mce_panic(char *msg, struct mce *final, char *exp)

/* Support code for software error injection */

-static int msr_to_offset(u32 msr)
+static u64 *mce_get_fake_reg(u32 msr)
{
unsigned bank = __get_cpu_var(mce_fake_banks).bank;
+
if (msr == rip_msr)
- return offsetof(struct mce_fake_banks, ip);
+ return &__get_cpu_var(mce_fake_banks).ip;
if (msr == MSR_IA32_MCx_STATUS(bank))
- return offsetof(struct mce_fake_banks, status);
+ return &__get_cpu_var(mce_fake_banks).status;
if (msr == MSR_IA32_MCx_ADDR(bank))
- return offsetof(struct mce_fake_banks, addr);
+ return &__get_cpu_var(mce_fake_banks).addr;
if (msr == MSR_IA32_MCx_MISC(bank))
- return offsetof(struct mce_fake_banks, misc);
+ return &__get_cpu_var(mce_fake_banks).misc;
if (msr == MSR_IA32_MCG_STATUS)
- return offsetof(struct mce_fake_banks, mcgstatus);
- return -1;
+ return &__get_cpu_var(mce_fake_banks).mcgstatus;
+
+ return NULL;
}

/* MSR access wrappers used for error injection */
@@ -323,12 +325,11 @@ static u64 mce_rdmsrl(u32 msr)
u64 v;

if (__get_cpu_var(mce_fake_banks).valid) {
- int offset = msr_to_offset(msr);
- char *m = (char *)&__get_cpu_var(mce_fake_banks);
+ u64 *reg = mce_get_fake_reg(msr);

- if (offset < 0)
+ if (!reg)
return 0;
- return *(u64 *)(m + offset);
+ return *reg;
}

if (rdmsrl_safe(msr, &v)) {
@@ -347,11 +348,10 @@ static u64 mce_rdmsrl(u32 msr)
static void mce_wrmsrl(u32 msr, u64 v)
{
if (__get_cpu_var(mce_fake_banks).valid) {
- int offset = msr_to_offset(msr);
- char *m = (char *)&__get_cpu_var(mce_fake_banks);
+ u64 *reg = mce_get_fake_reg(msr);

- if (offset >= 0)
- *(u64 *)(m + offset) = v;
+ if (reg)
+ *reg = v;
return;
}
wrmsrl(msr, v);
--
1.6.2.2

2009-10-09 05:47:36

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 4/4] mce-inject: support injecting multiple error to a CPU

There is only one register set in structure mce_fake_banks, so
we cannot inject more than one event to a CPU. For example, we
cannot emulate situation that bank 1 of CPU X has a error while
bank 3 of same CPU X has an another error.

This patch make mce_fake_banks to have a list of register set,
and allow us to test more various error combination.

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/include/asm/mce.h | 6 +++++
arch/x86/kernel/cpu/mcheck/mce-inject.c | 32 +++++++++++++++++++++++++++---
arch/x86/kernel/cpu/mcheck/mce.c | 22 ++++++++++++++------
3 files changed, 49 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 0668044..8776ab1 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -244,6 +244,12 @@ struct mce_fake_banks {

__u8 inject_flags; /* software inject flags */

+ struct list_head list;
+};
+
+struct mce_fake_bank {
+ struct list_head list;
+ __u8 loaded;
__u8 bank;
__u64 status;
__u64 misc;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 6275318..943fc93 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -29,15 +29,30 @@
static void inject_mce(struct mce *m)
{
struct mce_fake_banks *banks = &per_cpu(mce_fake_banks, m->extcpu);
+ struct mce_fake_bank *b, *temp;

banks->mcgstatus = m->mcgstatus;
banks->ip = m->ip;
banks->cs = m->cs;
banks->inject_flags = m->inject_flags;
- banks->bank = m->bank;
- banks->status = m->status;
- banks->addr = m->addr;
- banks->misc = m->misc;
+
+ list_for_each_entry(temp, &banks->list, list) {
+ if (temp->bank == m->bank)
+ b = temp;
+ }
+ if (!b) {
+ b = kzalloc(sizeof(struct mce_fake_bank), GFP_KERNEL);
+ if (!b)
+ return; /* -ENOMEM */
+ INIT_LIST_HEAD(&b->list);
+ b->bank = m->bank;
+ list_add(&b->list, &banks->list);
+ }
+
+ b->status = m->status;
+ b->addr = m->addr;
+ b->misc = m->misc;
+ b->loaded = 1;

mb();
banks->loaded = 1;
@@ -45,6 +60,15 @@ static void inject_mce(struct mce *m)

static void clean_injected(struct mce_fake_banks *banks)
{
+ struct mce_fake_bank *b;
+
+ list_for_each_entry(b, &banks->list, list) {
+ /*
+ * Might be in NMI context, so avoid doing kfree here.
+ * Allocated fake bank will be reused in next injections.
+ */
+ b->loaded = 0;
+ }
banks->loaded = 0;
}

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index edd2a82..327c72d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -303,19 +303,26 @@ static void mce_panic(char *msg, struct mce *final, char *exp)

static u64 *mce_get_fake_reg(u32 msr)
{
- unsigned bank = __get_cpu_var(mce_fake_banks).bank;
+ struct mce_fake_bank *b;

if (msr == rip_msr)
return &__get_cpu_var(mce_fake_banks).ip;
- if (msr == MSR_IA32_MCx_STATUS(bank))
- return &__get_cpu_var(mce_fake_banks).status;
- if (msr == MSR_IA32_MCx_ADDR(bank))
- return &__get_cpu_var(mce_fake_banks).addr;
- if (msr == MSR_IA32_MCx_MISC(bank))
- return &__get_cpu_var(mce_fake_banks).misc;
if (msr == MSR_IA32_MCG_STATUS)
return &__get_cpu_var(mce_fake_banks).mcgstatus;

+ list_for_each_entry(b, &__get_cpu_var(mce_fake_banks).list, list) {
+ unsigned bank = b->bank;
+
+ if (!b->loaded)
+ continue;
+ if (msr == MSR_IA32_MCx_STATUS(bank))
+ return &b->status;
+ if (msr == MSR_IA32_MCx_ADDR(bank))
+ return &b->addr;
+ if (msr == MSR_IA32_MCx_MISC(bank))
+ return &b->misc;
+ }
+
return NULL;
}

@@ -1456,6 +1463,7 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
mce_cpu_features(c);
mce_init_timer();
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
+ INIT_LIST_HEAD(&__get_cpu_var(mce_fake_banks).list);
}

/*
--
1.6.2.2

2009-10-09 06:51:19

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH 2/4] mce-inject: use individual members instead of struct mce

On Fri, 2009-10-09 at 13:45 +0800, Hidetoshi Seto wrote:
> struct mce have many members and almost half of them are not used
> for mce injection. So make struct mce_fake_banks to have required
> members instead of having struct mce in it.

I don't think this is necessary. struct mce works fine.

Best Regards,
Huang Ying

2009-10-09 07:15:23

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH 6/6] mce-inject: use injected mce only during faked handler call

This is another example for you to use my idea, implement it in a
similar way and send it out as your own.

Best Regards,
Huang Ying

On Mon, 2009-10-05 at 11:10 +0800, Hidetoshi Seto wrote:
> In the current implementation, injected MCE is valid from the point
> the MCE is injected to the point the MCE is processed by the faked
> handler call.
>
> This has an undesired side-effect: it is possible for it to be
> consumed by real machine_check_poll. This may confuse a real system
> error and may confuse the mce test suite.
>
> To fix this, this patch introduces struct mce_fake_banks to hold
> injected data and a flag which indicates that the injected data is
> ready for the handler.
>
> The mce_fake_banks.valid becomes 1 only during faked MCE handler call
> and protected by IRQ disabling. This make it impossible for real
> machine_check_poll to consume it.
>
> (I suppose that in the near future the mce_fake_banks will be patched
> to support injecting multiple errors on a cpu.)
>
> Reported-by: Huang Ying <[email protected]>
> Signed-off-by: Hidetoshi Seto <[email protected]>
> ---
> arch/x86/include/asm/mce.h | 12 +++++++++++-
> arch/x86/kernel/cpu/mcheck/mce-inject.c | 25 ++++++++++++++++++++-----
> arch/x86/kernel/cpu/mcheck/mce.c | 16 +++++++++-------
> 3 files changed, 40 insertions(+), 13 deletions(-)
>
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index d051abd..2f1c0ef 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -192,7 +192,6 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
> int mce_notify_irq(void);
> void mce_notify_process(void);
>
> -DECLARE_PER_CPU(struct mce, injectm);
> extern struct file_operations mce_chrdev_ops;
>
> /*
> @@ -218,5 +217,16 @@ void intel_init_thermal(struct cpuinfo_x86 *c);
>
> void mce_log_therm_throt_event(__u64 status);
>
> +/*
> + * For error injection
> + */
> +
> +struct mce_fake_banks {
> + int valid;
> + struct mce injectm;
> +};
> +
> +DECLARE_PER_CPU(struct mce_fake_banks, mce_fake_banks);
> +
> #endif /* __KERNEL__ */
> #endif /* _ASM_X86_MCE_H */
> diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
> index 4fb5b78..a481291 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
> @@ -28,7 +28,7 @@
> /* Update fake mce registers on current CPU. */
> static void inject_mce(struct mce *m)
> {
> - struct mce *i = &per_cpu(injectm, m->extcpu);
> + struct mce *i = &per_cpu(mce_fake_banks, m->extcpu).injectm;
>
> /* Make sure noone reads partially written injectm */
> i->finished = 0;
> @@ -50,8 +50,11 @@ static void raise_poll(struct mce *m)
> mce_banks_t b;
>
> memset(&b, 0xff, sizeof(mce_banks_t));
> +
> local_irq_save(flags);
> + __get_cpu_var(mce_fake_banks).valid = 1;
> machine_check_poll(0, &b);
> + __get_cpu_var(mce_fake_banks).valid = 0;
> local_irq_restore(flags);
> m->finished = 0;
> }
> @@ -67,9 +70,12 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
> regs.cs = m->cs;
> pregs = &regs;
> }
> +
> /* in mcheck exeception handler, irq will be disabled */
> local_irq_save(flags);
> + __get_cpu_var(mce_fake_banks).valid = 1;
> do_machine_check(pregs, 0);
> + __get_cpu_var(mce_fake_banks).valid = 0;
> local_irq_restore(flags);
> m->finished = 0;
> }
> @@ -81,14 +87,20 @@ static int mce_raise_notify(struct notifier_block *self,
> {
> struct die_args *args = (struct die_args *)data;
> int cpu = smp_processor_id();
> - struct mce *m = &__get_cpu_var(injectm);
> + struct mce *m = &__get_cpu_var(mce_fake_banks).injectm;
> +
> if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask))
> return NOTIFY_DONE;
> cpu_clear(cpu, mce_inject_cpumask);
> +
> + if (!m->finished)
> + return NOTIFY_STOP;
> +
> if (m->inject_flags & MCE_INJ_EXCEPTION)
> raise_exception(m, args->regs);
> else
> raise_poll(m);
> +
> return NOTIFY_STOP;
> }
>
> @@ -100,11 +112,14 @@ static struct notifier_block mce_raise_nb = {
> /* Inject mce on current CPU */
> static int raise_local(void)
> {
> - struct mce *m = &__get_cpu_var(injectm);
> + struct mce *m = &__get_cpu_var(mce_fake_banks).injectm;
> int context = MCE_INJ_CTX(m->inject_flags);
> int ret = 0;
> int cpu = m->extcpu;
>
> + if (!m->finished)
> + return 0;
> +
> if (m->inject_flags & MCE_INJ_EXCEPTION) {
> printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
> switch (context) {
> @@ -149,8 +164,8 @@ static void raise_mce(struct mce *m)
> mce_inject_cpumask = cpu_online_map;
> cpu_clear(get_cpu(), mce_inject_cpumask);
> for_each_online_cpu(cpu) {
> - struct mce *mcpu = &per_cpu(injectm, cpu);
> - if (!mcpu->finished || MCE_INJ_CTX(mcpu->inject_flags)
> + struct mce *m = &per_cpu(mce_fake_banks, cpu).injectm;
> + if (!m->finished || MCE_INJ_CTX(m->inject_flags)
> != MCE_INJ_CTX_RANDOM)
> cpu_clear(cpu, mce_inject_cpumask);
> }
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index 413aba8..a6d5d4a 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -110,8 +110,8 @@ void mce_setup(struct mce *m)
> rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
> }
>
> -DEFINE_PER_CPU(struct mce, injectm);
> -EXPORT_PER_CPU_SYMBOL_GPL(injectm);
> +DEFINE_PER_CPU(struct mce_fake_banks, mce_fake_banks);
> +EXPORT_PER_CPU_SYMBOL_GPL(mce_fake_banks);
>
> /*
> * Lockless MCE logging infrastructure.
> @@ -284,7 +284,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
>
> static int msr_to_offset(u32 msr)
> {
> - unsigned bank = __get_cpu_var(injectm.bank);
> + unsigned bank = __get_cpu_var(mce_fake_banks).injectm.bank;
> if (msr == rip_msr)
> return offsetof(struct mce, ip);
> if (msr == MSR_IA32_MCx_STATUS(bank))
> @@ -303,12 +303,13 @@ static u64 mce_rdmsrl(u32 msr)
> {
> u64 v;
>
> - if (__get_cpu_var(injectm).finished) {
> + if (__get_cpu_var(mce_fake_banks).valid) {
> int offset = msr_to_offset(msr);
> + char *m = (char *)&__get_cpu_var(mce_fake_banks).injectm;
>
> if (offset < 0)
> return 0;
> - return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
> + return *(u64 *)(m + offset);
> }
>
> if (rdmsrl_safe(msr, &v)) {
> @@ -326,11 +327,12 @@ static u64 mce_rdmsrl(u32 msr)
>
> static void mce_wrmsrl(u32 msr, u64 v)
> {
> - if (__get_cpu_var(injectm).finished) {
> + if (__get_cpu_var(mce_fake_banks).valid) {
> int offset = msr_to_offset(msr);
> + char *m = (char *)&__get_cpu_var(mce_fake_banks).injectm;
>
> if (offset >= 0)
> - *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
> + *(u64 *)(m + offset) = v;
> return;
> }
> wrmsrl(msr, v);

2009-10-09 07:18:57

by Hidetoshi Seto

[permalink] [raw]
Subject: Re: [PATCH 2/4] mce-inject: use individual members instead of struct mce

Huang Ying wrote:
> On Fri, 2009-10-09 at 13:45 +0800, Hidetoshi Seto wrote:
>> struct mce have many members and almost half of them are not used
>> for mce injection. So make struct mce_fake_banks to have required
>> members instead of having struct mce in it.
>
> I don't think this is necessary. struct mce works fine.

Why do you think so?
I think having unused space in precious per-CPU is not good idea.

And this change is required to reduces the size of allocation unit
in later patch to support multiple error injection.


Thanks,
H.Seto

2009-10-09 07:28:22

by Hidetoshi Seto

[permalink] [raw]
Subject: Re: [PATCH 6/6] mce-inject: use injected mce only during faked handler call

Huang Ying wrote:
> This is another example for you to use my idea, implement it in a
> similar way and send it out as your own.

I used "Reported-by:" for this patch, having some intent to indicate
that this patch uses an idea different from the original one, and that
this patch aims at the problem certainly reported by you.

> On Mon, 2009-10-05 at 11:10 +0800, Hidetoshi Seto wrote:
>> Reported-by: Huang Ying <[email protected]>
>> Signed-off-by: Hidetoshi Seto <[email protected]>


Thanks,
H.Seto

2009-10-09 07:45:27

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH 6/6] mce-inject: use injected mce only during faked handler call

On Fri, 2009-10-09 at 15:27 +0800, Hidetoshi Seto wrote:
> Huang Ying wrote:
> > This is another example for you to use my idea, implement it in a
> > similar way and send it out as your own.
>
> I used "Reported-by:" for this patch, having some intent to indicate
> that this patch uses an idea different from the original one, and that
> this patch aims at the problem certainly reported by you.

The point of the idea is to use two flags instead of one flag, not the
name of the flag or they are inside/outside of struct mce.

Best Regards,
Huang Ying

2009-10-09 08:32:13

by Hidetoshi Seto

[permalink] [raw]
Subject: Re: [PATCH 6/6] mce-inject: use injected mce only during faked handler call

Huang Ying wrote:
> On Fri, 2009-10-09 at 15:27 +0800, Hidetoshi Seto wrote:
>> Huang Ying wrote:
>>> This is another example for you to use my idea, implement it in a
>>> similar way and send it out as your own.
>> I used "Reported-by:" for this patch, having some intent to indicate
>> that this patch uses an idea different from the original one, and that
>> this patch aims at the problem certainly reported by you.
>
> The point of the idea is to use two flags instead of one flag, not the
> name of the flag or they are inside/outside of struct mce.

Humm, I could be wrong and could misread your comment...

The point of my idea is to use two separated flags, "inject_flags" for
inject tool and "valid" for kernel, instead of one flag as you proposed,
"inject_flags" shared by both.
And I think the flag for kernel should be outside of struct mce.

Once I complained about the name of flag ".finished", because you were
trying to add "LOADED" flag. I thought that "if .finished but !LOADED, it
means the loading data to struct is finished but not loaded.. ???what???"

So at first I tried to add 3rd state to ".finished", but soon I agreed
that it is not good idea.

After that I started to think about having two separated flags, and the
result is this [6/6] patch in -v2.

In short, I believe that my idea is "use two flags" and your idea is
"share one flag" ... right?


The only thing what I want to do here is merge your "fix" into upstream.


Thanks,
H.Seto

2009-10-09 09:12:05

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH 6/6] mce-inject: use injected mce only during faked handler call

On Fri, 2009-10-09 at 16:31 +0800, Hidetoshi Seto wrote:
> Huang Ying wrote:
> > On Fri, 2009-10-09 at 15:27 +0800, Hidetoshi Seto wrote:
> >> Huang Ying wrote:
> >>> This is another example for you to use my idea, implement it in a
> >>> similar way and send it out as your own.
> >> I used "Reported-by:" for this patch, having some intent to indicate
> >> that this patch uses an idea different from the original one, and that
> >> this patch aims at the problem certainly reported by you.
> >
> > The point of the idea is to use two flags instead of one flag, not the
> > name of the flag or they are inside/outside of struct mce.
>
> Humm, I could be wrong and could misread your comment...
>
> The point of my idea is to use two separated flags, "inject_flags" for
> inject tool and "valid" for kernel, instead of one flag as you proposed,
> "inject_flags" shared by both.
> And I think the flag for kernel should be outside of struct mce.
>
> Once I complained about the name of flag ".finished", because you were
> trying to add "LOADED" flag. I thought that "if .finished but !LOADED, it
> means the loading data to struct is finished but not loaded.. ???what???"

We have full control on these flags. In my original patch, I
set .finished = 0, before set MCJ_LOADED in .inject_flags. And I will
not set .finished = 1, before set MCJ_LOADED in inject_flags.

> So at first I tried to add 3rd state to ".finished", but soon I agreed
> that it is not good idea.
>
> After that I started to think about having two separated flags, and the
> result is this [6/6] patch in -v2.
>
> In short, I believe that my idea is "use two flags" and your idea is
> "share one flag" ... right?

No. There are many flags in .inject_flags (MCJ_EXCEPTION, MCJ_LOADED,
etc), .finished is just another flag.

The issue of original version is that .finished is used to indicate both
there is some data in injectm and injectm can be consumed. What I do is
to use two flags, .finished is used to indicate injectm can be consumed,
MCJ_LOADED is used to indicate there is some data in injectm. You just
change the name and the place of the two flags. In [6/6], you
use .finished to indicate there is some data in injectm and use
mce_fake_banks.valid to indicate injectm can be consumed.

It is OK to discuss the name and places of the two flags, but you should
not send out a similar patch and declare it is your idea.

> The only thing what I want to do here is merge your "fix" into upstream.

You should provide comment, instead of sending out a similar patch and
declaring it is your idea.

Best Regards,
Huang Ying

2009-10-13 02:36:28

by Hidetoshi Seto

[permalink] [raw]
Subject: Re: [PATCH 6/6] mce-inject: use injected mce only during faked handler call

Huang Ying wrote:
> On Fri, 2009-10-09 at 16:31 +0800, Hidetoshi Seto wrote:
>> In short, I believe that my idea is "use two flags" and your idea is
>> "share one flag" ... right?
>
> No. There are many flags in .inject_flags (MCJ_EXCEPTION, MCJ_LOADED,
> etc), .finished is just another flag.

Maybe I should have said "to indicate 'loaded' and 'ready for consume'",
"use two variables" and "share one variable."

> It is OK to discuss the name and places of the two flags, but you should
> not send out a similar patch and declare it is your idea.

I have experienced that some person received an alternative patch from
another person saying that "hey, how about this patch to do same thing in
an other way?" and that the original author replied "oh, it looks much
better, use it instead of mine" with his Acked-by:.

Now I have learned you are not one of such open-minded people, so I will
never try to make such patch for you again. Please throw all of my patches
posted last week in the trash.

>> The only thing what I want to do here is merge your "fix" into upstream.
>
> You should provide comment, instead of sending out a similar patch and
> declaring it is your idea.

I hope you could flexibly reflect comments from others, without saying like
"I don't think this is necessary because it works fine."

How long do I have to wait your next post, which will fix my urgent issue on
Nehalem?


Thanks,
H.Seto

2009-10-13 03:28:54

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH 6/6] mce-inject: use injected mce only during faked handler call

On Tue, 2009-10-13 at 10:34 +0800, Hidetoshi Seto wrote:
> Huang Ying wrote:
> > On Fri, 2009-10-09 at 16:31 +0800, Hidetoshi Seto wrote:
> >> In short, I believe that my idea is "use two flags" and your idea is
> >> "share one flag" ... right?
> >
> > No. There are many flags in .inject_flags (MCJ_EXCEPTION, MCJ_LOADED,
> > etc), .finished is just another flag.
>
> Maybe I should have said "to indicate 'loaded' and 'ready for consume'",
> "use two variables" and "share one variable."

.inject_flags and .finished are two variables. I think you try to avoid
using .inject_flags. But I think it is OK to use .inject_flags.

> > It is OK to discuss the name and places of the two flags, but you should
> > not send out a similar patch and declare it is your idea.
>
> I have experienced that some person received an alternative patch from
> another person saying that "hey, how about this patch to do same thing in
> an other way?" and that the original author replied "oh, it looks much
> better, use it instead of mine" with his Acked-by:.

I don't do that because I don't think your patch is in another way and
much better.

> Now I have learned you are not one of such open-minded people, so I will
> never try to make such patch for you again. Please throw all of my patches
> posted last week in the trash.
>
> >> The only thing what I want to do here is merge your "fix" into upstream.
> >
> > You should provide comment, instead of sending out a similar patch and
> > declaring it is your idea.
>
> I hope you could flexibly reflect comments from others, without saying like
> "I don't think this is necessary because it works fine."

When I say: "I don't think it is necessary". I do think so really.

> How long do I have to wait your next post, which will fix my urgent issue on
> Nehalem?

I think my patch with title:

[BUGFIX -v2] x86, mce, inject: Make injected mce valid only during faked handler call

fixes this issue correctly.

Hi, Ingo and Peter,

what are your idea about the patch above? Maybe you don't like the
naming style of MCJ_XXX. But I think if they need to be changed, it
should be in another patch. And H.Seto has posted the patch for it.

Best Regards,
Huang Ying

2009-10-13 06:01:47

by Hidetoshi Seto

[permalink] [raw]
Subject: Re: [PATCH 6/6] mce-inject: use injected mce only during faked handler call

Huang Ying wrote:
> On Tue, 2009-10-13 at 10:34 +0800, Hidetoshi Seto wrote:
>> How long do I have to wait your next post, which will fix my urgent issue on
>> Nehalem?
>
> I think my patch with title:
>
> [BUGFIX -v2] x86, mce, inject: Make injected mce valid only during faked handler call
>
> fixes this issue correctly.

I should note that there are 2 pending patches from you:

[BUGFIX -v2] x86, mce, inject: Make injected mce valid only during faked handler call
for issue A) injected mce could be consumed by real poll handler

[BUGFIX -v7] x86, MCE: Fix bugs and issues of MCE log ring buffer
for issue B) log buffer is too small
and for issue C) log buffer is not ring buffer
(I'm not sure but there might be other targeting issues D), E) and so on.)

And the only one what I believe it is urgent is issue B).


Thanks,
H.Seto

2009-10-13 06:20:11

by Huang, Ying

[permalink] [raw]
Subject: Re: [PATCH 6/6] mce-inject: use injected mce only during faked handler call

On Tue, 2009-10-13 at 14:00 +0800, Hidetoshi Seto wrote:
> Huang Ying wrote:
> > On Tue, 2009-10-13 at 10:34 +0800, Hidetoshi Seto wrote:
> >> How long do I have to wait your next post, which will fix my urgent issue on
> >> Nehalem?
> >
> > I think my patch with title:
> >
> > [BUGFIX -v2] x86, mce, inject: Make injected mce valid only during faked handler call
> >
> > fixes this issue correctly.
>
> I should note that there are 2 pending patches from you:
>
> [BUGFIX -v2] x86, mce, inject: Make injected mce valid only during faked handler call
> for issue A) injected mce could be consumed by real poll handler
>
> [BUGFIX -v7] x86, MCE: Fix bugs and issues of MCE log ring buffer
> for issue B) log buffer is too small
> and for issue C) log buffer is not ring buffer
> (I'm not sure but there might be other targeting issues D), E) and so on.)
>
> And the only one what I believe it is urgent is issue B).

I have talked with Ingo about this patch. But he has different idea
about MCE log ring buffer and he didn't want to merge the patch even as
an urgent bug fixes. It seems that another re-post can not convince him.

Best Regards,
Huang Ying

2009-10-13 06:30:32

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 6/6] mce-inject: use injected mce only during faked handler call


* Huang Ying <[email protected]> wrote:

> I have talked with Ingo about this patch. But he has different idea
> about MCE log ring buffer and he didn't want to merge the patch even
> as an urgent bug fixes. It seems that another re-post can not convince
> him.

Correct. The fixes are beyond what we can do in .32 - and for .33 i
outlined (with a patch) that we should be using not just the ftrace
ring-buffer (like your patch did) but perf events to expose MCE events.

That brings MCE events to a whole new level of functionality.

Event injection support would be an interesting new addition to
kernel/perf_event.c: non-MCE user-space wants to inject events as well -
both to simulate rare events, and to define their own user-space events.

Is there any technical reason why we wouldnt want to take this far
superior approach?

Ingo

2009-10-13 07:20:36

by Hidetoshi Seto

[permalink] [raw]
Subject: [RFC] x86, mce: use of TRACE_EVENT for mce

Ingo Molnar wrote:
> * Huang Ying <[email protected]> wrote:
>
>> I have talked with Ingo about this patch. But he has different idea
>> about MCE log ring buffer and he didn't want to merge the patch even
>> as an urgent bug fixes. It seems that another re-post can not convince
>> him.
>
> Correct. The fixes are beyond what we can do in .32 - and for .33 i
> outlined (with a patch) that we should be using not just the ftrace
> ring-buffer (like your patch did) but perf events to expose MCE events.
>
> That brings MCE events to a whole new level of functionality.
>
> Event injection support would be an interesting new addition to
> kernel/perf_event.c: non-MCE user-space wants to inject events as well -
> both to simulate rare events, and to define their own user-space events.
>
> Is there any technical reason why we wouldnt want to take this far
> superior approach?
>
> Ingo

We could have more aggressive discussion if there is a real patch.
This is an example.


Thanks,
H.Seto

===

Subject: [PATCH] [RFC] x86, mce: trial use of TRACE_EVENT for mce

(As a response to "x86: mce: New MCE logging design" from Ingo)

Just forward record to trace buffer.

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce.c | 6 +++
include/trace/events/mce.h | 69 ++++++++++++++++++++++++++++++++++++++
2 files changed, 75 insertions(+), 0 deletions(-)
create mode 100644 include/trace/events/mce.h

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b1598a9..2d563cd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -46,6 +46,9 @@

#include "mce-internal.h"

+#define CREATE_TRACE_POINTS
+#include <trace/events/mce.h>
+
int mce_disabled __read_mostly;

#define MISC_MCELOG_MINOR 227
@@ -141,6 +144,9 @@ void mce_log(struct mce *mce)
{
unsigned next, entry;

+ /* Forward record to trace buffer for several use */
+ trace_mce_record(mce);
+
mce->finished = 0;
wmb();
for (;;) {
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
new file mode 100644
index 0000000..7eee778
--- /dev/null
+++ b/include/trace/events/mce.h
@@ -0,0 +1,69 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mce
+
+#if !defined(_TRACE_MCE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MCE_H
+
+#include <linux/ktime.h>
+#include <linux/tracepoint.h>
+#include <asm/mce.h>
+
+TRACE_EVENT(mce_record,
+
+ TP_PROTO(struct mce *m),
+
+ TP_ARGS(m),
+
+ TP_STRUCT__entry(
+ __field( u64, mcgcap )
+ __field( u64, mcgstatus )
+ __field( u8, bank )
+ __field( u64, status )
+ __field( u64, addr )
+ __field( u64, misc )
+ __field( u64, ip )
+ __field( u8, cs )
+ __field( u64, tsc )
+ __field( u64, walltime )
+ __field( u32, cpu )
+ __field( u32, cpuid )
+ __field( u32, apicid )
+ __field( u32, socketid )
+ __field( u8, cpuvendor )
+ ),
+
+ TP_fast_assign(
+ __entry->mcgcap = m->mcgcap;
+ __entry->mcgstatus = m->mcgstatus;
+ __entry->bank = m->bank;
+ __entry->status = m->status;
+ __entry->addr = m->addr;
+ __entry->misc = m->misc;
+ __entry->ip = m->ip;
+ __entry->cs = m->cs;
+ __entry->tsc = m->tsc;
+ __entry->walltime = m->time;
+ __entry->cpu = m->extcpu;
+ __entry->cpuid = m->cpuid;
+ __entry->apicid = m->apicid;
+ __entry->socketid = m->socketid;
+ __entry->cpuvendor = m->cpuvendor;
+ ),
+
+ TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
+ __entry->cpu,
+ __entry->mcgcap, __entry->mcgstatus,
+ __entry->bank, __entry->status,
+ __entry->addr, __entry->misc,
+ __entry->cs, __entry->ip,
+ __entry->tsc,
+ __entry->cpuvendor, __entry->cpuid,
+ __entry->walltime,
+ __entry->socketid,
+ __entry->apicid)
+);
+
+#endif /* _TRACE_MCE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
--
1.6.4.4

2009-10-13 08:44:12

by Ingo Molnar

[permalink] [raw]
Subject: Re: [RFC] x86, mce: use of TRACE_EVENT for mce


* Hidetoshi Seto <[email protected]> wrote:

> Ingo Molnar wrote:
> > * Huang Ying <[email protected]> wrote:
> >
> >> I have talked with Ingo about this patch. But he has different idea
> >> about MCE log ring buffer and he didn't want to merge the patch even
> >> as an urgent bug fixes. It seems that another re-post can not convince
> >> him.
> >
> > Correct. The fixes are beyond what we can do in .32 - and for .33 i
> > outlined (with a patch) that we should be using not just the ftrace
> > ring-buffer (like your patch did) but perf events to expose MCE events.
> >
> > That brings MCE events to a whole new level of functionality.
> >
> > Event injection support would be an interesting new addition to
> > kernel/perf_event.c: non-MCE user-space wants to inject events as well -
> > both to simulate rare events, and to define their own user-space events.
> >
> > Is there any technical reason why we wouldnt want to take this far
> > superior approach?
> >
> > Ingo
>
> We could have more aggressive discussion if there is a real patch.
> This is an example.

That's the right attitude :-)

I've created a new topic tree for this approach: tip:perf/mce, and i've
committed your patch with a changelog outlining the approach, and pushed
it out. Please send delta patches against latest tip:master.

I think the next step should be to determine the rough 'event structure'
we want to map out. The mce_record event you added should be split up
some more.

For example we definitely want thermal events to be separate. One
approach would be the RFC patch i sent in "[PATCH] x86: mce: New MCE
logging design" - feel free to pick that up and iterate it.

A question would be whether each MCA/MCE bank should have a separate
event enumerated. I.e. right now 'perf list' shows:

mce:mce_record [Tracepoint event]

It might make sense to do something like:

mce:mce_bank_2 [Tracepoint event]
mce:mce_bank_3 [Tracepoint event]
mce:mce_bank_5 [Tracepoint event]
mce:mce_bank_6 [Tracepoint event]
mce:mce_bank_8 [Tracepoint event]

But this is pretty static and meaningless - so what i'd like to see is
to enumerate the _logical purpose_ of the MCE events, largely driven by
the physical source of the event:

$ perf list 2>&1 | grep mce
mce:mce_cpu [Tracepoint event]
mce:mce_thermal [Tracepoint event]
mce:mce_cache [Tracepoint event]
mce:mce_memory [Tracepoint event]
mce:mce_bus [Tracepoint event]
mce:mce_device [Tracepoint event]
mce:mce_other [Tracepoint event]

etc. - with a few simple rules about what type of event goes into which
category, such as:

- CPU internal errors go into mce_cpu
- memory or L3 cache related errors go into mce_memory
- L2 and lower level cache errors go into mce_cache
- general IO / bus / interconnect errors go into mce_bus
- specific device faults go into mce_device
- the rest goes into mce_other

Note - this is just a first rough guesstimate list - more can be added
and the definition can be made stricter. (Please suggest modifications
to this categorization.) Each event still has finegrained fields that
allows further disambiguation of precisely which event the CPU
generated.

Note that these categories will be largely CPU independent. Certain
models will offer events in all of these categories, some models will
only provide events in a very limited subset of these events.

The logical structure remains CPU model independent and tools, admins
and users can standardize on this generic 'logical overview' event
structure - instead of the current maze of model specific MCE decoding
with no real structure over it.

Once we have this higher level logging structure (while still preserving
the fine details as well), we can go a step forward and attach things
like the ability to panic the box to individual events.

[ Note, we might also still keep a 'generic' event like mce_record as
well, if that still makes sense once we've split up the events
properly. ]

Then the next step would be clean and generic event injection support
that uses perf events.

Hm? Looks like pretty exciting stuff to me - there's a _lot_ of
expressive potential in the hardware, we have myriads of interesting
details that can be logged - we just need to free it up and make it
available properly.

Ingo

2009-10-13 08:47:56

by Hidetoshi Seto

[permalink] [raw]
Subject: [tip:perf/mce] perf_event, x86, mce: Use TRACE_EVENT() for MCE logging

Commit-ID: 8968f9d3dc23d9a1821d97c6f11e72a59382e56c
Gitweb: http://git.kernel.org/tip/8968f9d3dc23d9a1821d97c6f11e72a59382e56c
Author: Hidetoshi Seto <[email protected]>
AuthorDate: Tue, 13 Oct 2009 16:19:41 +0900
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 13 Oct 2009 09:43:38 +0200

perf_event, x86, mce: Use TRACE_EVENT() for MCE logging

This approach is the first baby step towards solving many of the
structural problems the x86 MCE logging code is having today:

- It has a private ring-buffer implementation that has a number
of limitations and has been historically fragile and buggy.

- It is using a quirky /dev/mcelog ioctl driven ABI that is MCE
specific. /dev/mcelog is not part of any larger logging
framework and hence has remained on the fringes for many years.

- The MCE logging code is still very unclean partly due to its ABI
limitations. Fields are being reused for multiple purposes, and
the whole message structure is limited and x86 specific to begin
with.

All in one, the x86 tree would like to move away from this private
implementation of an event logging facility to a broader framework.

By using perf events we gain the following advantages:

- Multiple user-space agents can access MCE events. We can have an
mcelog daemon running but also a system-wide tracer capturing
important events in flight-recorder mode.

- Sampling support: the kernel and the user-space call-chain of MCE
events can be stored and analyzed as well. This way actual patterns
of bad behavior can be matched to precisely what kind of activity
happened in the kernel (and/or in the app) around that moment in
time.

- Coupling with other hardware and software events: the PMU can track a
number of other anomalies - monitoring software might chose to
monitor those plus the MCE events as well - in one coherent stream of
events.

- Discovery of MCE sources - tracepoints are enumerated and tools can
act upon the existence (or non-existence) of various channels of MCE
information.

- Filtering support: we just subscribe to and act upon the events we
are interested in. Then even on a per event source basis there's
in-kernel filter expressions available that can restrict the amount
of data that hits the event channel.

- Arbitrary deep per cpu buffering of events - we can buffer 32
entries or we can buffer as much as we want, as long as we have
the RAM.

- An NMI-safe ring-buffer implementation - mappable to user-space.

- Built-in support for timestamping of events, PID markers, CPU
markers, etc.

- A rich ABI accessible over system call interface. Per cpu, per task
and per workload monitoring of MCE events can be done this way. The
ABI itself has a nice, meaningful structure.

- Extensible ABI: new fields can be added without breaking tooling.
New tracepoints can be added as the hardware side evolves. There's
various parsers that can be used.

- Lots of scheduling/buffering/batching modes of operandi for MCE
events. poll() support. mmap() support. read() support. You name it.

- Rich tooling support: even without any MCE specific extensions added
the 'perf' tool today offers various views of MCE data: perf report,
perf stat, perf trace can all be used to view logged MCE events and
perhaps correlate them to certain user-space usage patterns. But it
can be used directly as well, for user-space agents and policy action
in mcelog, etc.

With this we hope to achieve significant code cleanup and feature
improvements in the MCE code, and we hope to be able to drop the
/dev/mcelog facility in the end.

This patch is just a plain dumb dump of mce_log() records to
the tracepoints / perf events framework - a first proof of
concept step.

Signed-off-by: Hidetoshi Seto <[email protected]>
Cc: Huang Ying <[email protected]>
Cc: Andi Kleen <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce.c | 6 +++
include/trace/events/mce.h | 69 ++++++++++++++++++++++++++++++++++++++
2 files changed, 75 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b1598a9..39caea3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -46,6 +46,9 @@

#include "mce-internal.h"

+#define CREATE_TRACE_POINTS
+#include <trace/events/mce.h>
+
int mce_disabled __read_mostly;

#define MISC_MCELOG_MINOR 227
@@ -141,6 +144,9 @@ void mce_log(struct mce *mce)
{
unsigned next, entry;

+ /* Emit the trace record: */
+ trace_mce_record(mce);
+
mce->finished = 0;
wmb();
for (;;) {
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
new file mode 100644
index 0000000..7eee778
--- /dev/null
+++ b/include/trace/events/mce.h
@@ -0,0 +1,69 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mce
+
+#if !defined(_TRACE_MCE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MCE_H
+
+#include <linux/ktime.h>
+#include <linux/tracepoint.h>
+#include <asm/mce.h>
+
+TRACE_EVENT(mce_record,
+
+ TP_PROTO(struct mce *m),
+
+ TP_ARGS(m),
+
+ TP_STRUCT__entry(
+ __field( u64, mcgcap )
+ __field( u64, mcgstatus )
+ __field( u8, bank )
+ __field( u64, status )
+ __field( u64, addr )
+ __field( u64, misc )
+ __field( u64, ip )
+ __field( u8, cs )
+ __field( u64, tsc )
+ __field( u64, walltime )
+ __field( u32, cpu )
+ __field( u32, cpuid )
+ __field( u32, apicid )
+ __field( u32, socketid )
+ __field( u8, cpuvendor )
+ ),
+
+ TP_fast_assign(
+ __entry->mcgcap = m->mcgcap;
+ __entry->mcgstatus = m->mcgstatus;
+ __entry->bank = m->bank;
+ __entry->status = m->status;
+ __entry->addr = m->addr;
+ __entry->misc = m->misc;
+ __entry->ip = m->ip;
+ __entry->cs = m->cs;
+ __entry->tsc = m->tsc;
+ __entry->walltime = m->time;
+ __entry->cpu = m->extcpu;
+ __entry->cpuid = m->cpuid;
+ __entry->apicid = m->apicid;
+ __entry->socketid = m->socketid;
+ __entry->cpuvendor = m->cpuvendor;
+ ),
+
+ TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
+ __entry->cpu,
+ __entry->mcgcap, __entry->mcgstatus,
+ __entry->bank, __entry->status,
+ __entry->addr, __entry->misc,
+ __entry->cs, __entry->ip,
+ __entry->tsc,
+ __entry->cpuvendor, __entry->cpuid,
+ __entry->walltime,
+ __entry->socketid,
+ __entry->apicid)
+);
+
+#endif /* _TRACE_MCE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>