2011-06-08 01:49:59

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 00/12] x86: minor cleanups/fixes for MCE codes (v2)

This is a set of minor changes for MCE codes.

Though I know there are many people expecting new RAS architecture
such as one that integrates RAS/EDAC with MCE, I hope these small
steps will be a help for future works.

v2:
reflect comments, update patch 3,4,5,7,10,12.
rebased on 3.0-rc2.

Thanks,
H.Seto

arch/x86/include/asm/entry_arch.h | 4 -
arch/x86/include/asm/hw_irq.h | 1 -
arch/x86/include/asm/irq_vectors.h | 5 -
arch/x86/include/asm/mce.h | 19 ++-
arch/x86/kernel/cpu/mcheck/mce-severity.c | 152 ++++++++++------
arch/x86/kernel/cpu/mcheck/mce.c | 288 ++++++++++++++---------------
arch/x86/kernel/cpu/mcheck/mce_amd.c | 10 +-
arch/x86/kernel/entry_64.S | 5 -
arch/x86/kernel/irqinit.c | 3 -
9 files changed, 251 insertions(+), 236 deletions(-)

Hidetoshi Seto (11):
mce-severity: cleanup severity table, prep
mce-severity: cleanup severity table
mce-severity: trivial cleanups
x86, mce: replace MCE_SELF_VECTOR by irq_work
x86, mce: replace MCM_ to MCI_MISC_
x86, mce: introduce mce_gather_info()
x86, mce: check the result of ancient_init()
x86, mce: cleanup mce_create/remove_device
x86, mce: cleanup mce_read
x86, mce: use prefix mce_chrdev_ to group functions
x86, mce: use prefix mce_sysdev_ to group functions

Tony Luck (1):
mce-severity: fixes for mce severity table


2011-06-08 01:52:20

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 01/12] mce-severity: fixes for mce severity table

From: Tony Luck <[email protected]>

The "Spurious not enabled" entry is redundant. The "Not enabled"
entry earlier in the table will cover this case.

The "Action required; unknown MCACOD" entry shouldn't specify
MCACOD in the .mask field. Current code will only match for
mcacod==0 rather than all AR=1 entries.

Signed-off-by: Tony Luck <[email protected]>
Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce-severity.c | 4 +---
1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 1e8d66c..352d16a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -69,8 +69,6 @@ static struct severity {
MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP",
KERNEL),
BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER),
- MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME,
- "Spurious not enabled", SER),

/* ignore OVER for UCNA */
MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP,
@@ -82,7 +80,7 @@ static struct severity {
/* AR add known MCACODs here */
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC,
"Action required with lost events", SER),
- MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC,
+ MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR, PANIC,
"Action required; unknown MCACOD", SER),

/* known AO MCACODs: */
--
1.7.1

2011-06-08 01:53:10

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 02/12] mce-severity: cleanup severity table, prep

It looks very complicated and hard to read for people other than
skilled developers. So let make it a bit clean.
At first, change format to ease reading elements in the table.

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce-severity.c | 116 +++++++++++++++++++++-------
1 files changed, 87 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 352d16a..eaf5a43 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -58,44 +58,102 @@ static struct severity {
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
#define MCACOD 0xffff

- BITCLR(MCI_STATUS_VAL, NO, "Invalid"),
- BITCLR(MCI_STATUS_EN, NO, "Not enabled"),
- BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"),
+ BITCLR(
+ MCI_STATUS_VAL,
+ NO, "Invalid"
+ ),
+ BITCLR(
+ MCI_STATUS_EN,
+ NO, "Not enabled"
+ ),
+ BITSET(
+ MCI_STATUS_PCC,
+ PANIC, "Processor context corrupt"
+ ),
/* When MCIP is not set something is very confused */
- MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"),
+ MCGMASK(
+ MCG_STATUS_MCIP, 0,
+ PANIC, "MCIP not set in MCA handler"
+ ),
/* Neither return not error IP -- no chance to recover -> PANIC */
- MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC,
- "Neither restart nor error IP"),
- MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP",
- KERNEL),
- BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER),
+ MCGMASK(
+ MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0,
+ PANIC, "Neither restart nor error IP"
+ ),
+ MCGMASK(
+ MCG_STATUS_RIPV, 0,
+ PANIC, "In kernel and no restart IP",
+ KERNEL
+ ),
+ BITCLR(
+ MCI_STATUS_UC,
+ KEEP, "Corrected error",
+ NOSER
+ ),

/* ignore OVER for UCNA */
- MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP,
- "Uncorrected no action required", SER),
- MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC,
- "Illegal combination (UCNA with AR=1)", SER),
- MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER),
+ MASK(
+ MCI_UC_SAR, MCI_STATUS_UC,
+ KEEP, "Uncorrected no action required",
+ SER
+ ),
+ MASK(
+ MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR,
+ PANIC, "Illegal combination (UCNA with AR=1)",
+ SER
+ ),
+ MASK(
+ MCI_STATUS_S, 0,
+ KEEP, "Non signalled machine check",
+ SER
+ ),

/* AR add known MCACODs here */
- MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC,
- "Action required with lost events", SER),
- MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR, PANIC,
- "Action required; unknown MCACOD", SER),
+ MASK(
+ MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR,
+ PANIC, "Action required with lost events",
+ SER
+ ),
+ MASK(
+ MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR,
+ PANIC, "Action required; unknown MCACOD",
+ SER
+ ),

/* known AO MCACODs: */
- MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO,
- "Action optional: memory scrubbing error", SER),
- MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO,
- "Action optional: last level cache writeback error", SER),
-
- MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME,
- "Action optional unknown MCACOD", SER),
- MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME,
- "Action optional with lost events", SER),
- BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"),
- BITSET(MCI_STATUS_UC, UC, "Uncorrected"),
- BITSET(0, SOME, "No match") /* always matches. keep at end */
+ MASK(
+ MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0,
+ AO, "Action optional: memory scrubbing error",
+ SER
+ ),
+ MASK(
+ MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a,
+ AO, "Action optional: last level cache writeback error",
+ SER
+ ),
+
+ MASK(
+ MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S,
+ SOME, "Action optional unknown MCACOD",
+ SER
+ ),
+ MASK(
+ MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER,
+ SOME, "Action optional with lost events",
+ SER
+ ),
+ BITSET(
+ MCI_STATUS_UC|MCI_STATUS_OVER,
+ PANIC, "Overflowed uncorrected"
+ ),
+ BITSET(
+ MCI_STATUS_UC,
+ UC, "Uncorrected"
+ ),
+ BITSET(
+ 0,
+ SOME, "No match"
+ ) /* always matches. keep at end */
};

/*
--
1.7.1

2011-06-08 01:54:00

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 03/12] mce-severity: cleanup severity table

Current format of item in this table is:
condition(param, ..., level, message [, condition2 ...])

So we have to check both of head and tail of the item to know
conditions to match the item.

Make them in straight forward form:
item(level, message, condition [, condition2 ...])

v2:
postpone minor sorting to next patch

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce-severity.c | 127 +++++++++++++----------------
1 files changed, 58 insertions(+), 69 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index eaf5a43..27e778e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -43,116 +43,105 @@ static struct severity {
unsigned char covered;
char *msg;
} severities[] = {
-#define KERNEL .context = IN_KERNEL
-#define USER .context = IN_USER
-#define SER .ser = SER_REQUIRED
-#define NOSER .ser = NO_SER
-#define SEV(s) .sev = MCE_ ## s ## _SEVERITY
-#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r }
-#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r }
-#define MCGMASK(x, res, s, m, r...) \
- { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r }
-#define MASK(x, y, s, m, r...) \
- { .mask = x, .result = y, SEV(s), .msg = m, ## r }
+#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
+#define KERNEL .context = IN_KERNEL
+#define USER .context = IN_USER
+#define SER .ser = SER_REQUIRED
+#define NOSER .ser = NO_SER
+#define BITCLR(x) .mask = x, .result = 0
+#define BITSET(x) .mask = x, .result = x
+#define MCGMASK(x, y) .mcgmask = x, .mcgres = y
+#define MASK(x, y) .mask = x, .result = y
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
#define MCACOD 0xffff

- BITCLR(
- MCI_STATUS_VAL,
- NO, "Invalid"
+ MCESEV(
+ NO, "Invalid",
+ BITCLR(MCI_STATUS_VAL)
),
- BITCLR(
- MCI_STATUS_EN,
- NO, "Not enabled"
+ MCESEV(
+ NO, "Not enabled",
+ BITCLR(MCI_STATUS_EN)
),
- BITSET(
- MCI_STATUS_PCC,
- PANIC, "Processor context corrupt"
+ MCESEV(
+ PANIC, "Processor context corrupt",
+ BITSET(MCI_STATUS_PCC)
),
/* When MCIP is not set something is very confused */
- MCGMASK(
- MCG_STATUS_MCIP, 0,
- PANIC, "MCIP not set in MCA handler"
+ MCESEV(
+ PANIC, "MCIP not set in MCA handler",
+ MCGMASK(MCG_STATUS_MCIP, 0)
),
/* Neither return not error IP -- no chance to recover -> PANIC */
- MCGMASK(
- MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0,
- PANIC, "Neither restart nor error IP"
+ MCESEV(
+ PANIC, "Neither restart nor error IP",
+ MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
),
- MCGMASK(
- MCG_STATUS_RIPV, 0,
+ MCESEV(
PANIC, "In kernel and no restart IP",
- KERNEL
+ KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
),
- BITCLR(
- MCI_STATUS_UC,
+ MCESEV(
KEEP, "Corrected error",
- NOSER
+ NOSER, BITCLR(MCI_STATUS_UC)
),

/* ignore OVER for UCNA */
- MASK(
- MCI_UC_SAR, MCI_STATUS_UC,
+ MCESEV(
KEEP, "Uncorrected no action required",
- SER
+ SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
),
- MASK(
- MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR,
+ MCESEV(
PANIC, "Illegal combination (UCNA with AR=1)",
- SER
+ SER,
+ MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR)
),
- MASK(
- MCI_STATUS_S, 0,
+ MCESEV(
KEEP, "Non signalled machine check",
- SER
+ SER, MASK(MCI_STATUS_S, 0)
),

/* AR add known MCACODs here */
- MASK(
- MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR,
+ MCESEV(
PANIC, "Action required with lost events",
- SER
+ SER,
+ MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR)
),
- MASK(
- MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR,
+ MCESEV(
PANIC, "Action required; unknown MCACOD",
- SER
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
),

/* known AO MCACODs: */
- MASK(
- MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0,
+ MCESEV(
AO, "Action optional: memory scrubbing error",
- SER
+ SER, MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0)
),
- MASK(
- MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a,
+ MCESEV(
AO, "Action optional: last level cache writeback error",
- SER
+ SER, MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a)
),
-
- MASK(
- MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S,
+ MCESEV(
SOME, "Action optional unknown MCACOD",
- SER
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
),
- MASK(
- MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER,
+ MCESEV(
SOME, "Action optional with lost events",
- SER
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER)
),
- BITSET(
- MCI_STATUS_UC|MCI_STATUS_OVER,
- PANIC, "Overflowed uncorrected"
+
+ MCESEV(
+ PANIC, "Overflowed uncorrected",
+ BITSET(MCI_STATUS_UC|MCI_STATUS_OVER)
),
- BITSET(
- MCI_STATUS_UC,
- UC, "Uncorrected"
+ MCESEV(
+ UC, "Uncorrected",
+ BITSET(MCI_STATUS_UC)
),
- BITSET(
- 0,
- SOME, "No match"
+ MCESEV(
+ SOME, "No match",
+ BITSET(0)
) /* always matches. keep at end */
};

--
1.7.1

2011-06-08 01:55:30

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 04/12] mce-severity: trivial cleanups

This patch does:
- sort bits in macro
- use BITCLR/BITSET
- coordinate message pattern
- use m for struct mce
- minor cleanup for severities_debugfs_init()

Logics are not changed.

v2:
add sorting that was in previous patch

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce-severity.c | 39 ++++++++++++++---------------
1 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 27e778e..7395d5f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -99,41 +99,40 @@ static struct severity {
),
MCESEV(
KEEP, "Non signalled machine check",
- SER, MASK(MCI_STATUS_S, 0)
+ SER, BITCLR(MCI_STATUS_S)
),

/* AR add known MCACODs here */
MCESEV(
PANIC, "Action required with lost events",
- SER,
- MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR)
+ SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
),
MCESEV(
- PANIC, "Action required; unknown MCACOD",
+ PANIC, "Action required: unknown MCACOD",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
),

/* known AO MCACODs: */
MCESEV(
AO, "Action optional: memory scrubbing error",
- SER, MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0)
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|0xfff0, MCI_UC_S|0x00c0)
),
MCESEV(
AO, "Action optional: last level cache writeback error",
- SER, MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a)
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|0x017a)
),
MCESEV(
- SOME, "Action optional unknown MCACOD",
+ SOME, "Action optional: unknown MCACOD",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
),
MCESEV(
SOME, "Action optional with lost events",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER)
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S)
),

MCESEV(
PANIC, "Overflowed uncorrected",
- BITSET(MCI_STATUS_UC|MCI_STATUS_OVER)
+ BITSET(MCI_STATUS_OVER|MCI_STATUS_UC)
),
MCESEV(
UC, "Uncorrected",
@@ -157,15 +156,15 @@ static int error_context(struct mce *m)
return IN_KERNEL;
}

-int mce_severity(struct mce *a, int tolerant, char **msg)
+int mce_severity(struct mce *m, int tolerant, char **msg)
{
- enum context ctx = error_context(a);
+ enum context ctx = error_context(m);
struct severity *s;

for (s = severities;; s++) {
- if ((a->status & s->mask) != s->result)
+ if ((m->status & s->mask) != s->result)
continue;
- if ((a->mcgstatus & s->mcgmask) != s->mcgres)
+ if ((m->mcgstatus & s->mcgmask) != s->mcgres)
continue;
if (s->ser == SER_REQUIRED && !mce_ser)
continue;
@@ -242,15 +241,15 @@ static const struct file_operations severities_coverage_fops = {

static int __init severities_debugfs_init(void)
{
- struct dentry *dmce = NULL, *fseverities_coverage = NULL;
+ struct dentry *dmce, *fsev;

dmce = mce_get_debugfs_dir();
- if (dmce == NULL)
+ if (!dmce)
goto err_out;
- fseverities_coverage = debugfs_create_file("severities-coverage",
- 0444, dmce, NULL,
- &severities_coverage_fops);
- if (fseverities_coverage == NULL)
+
+ fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL,
+ &severities_coverage_fops);
+ if (!fsev)
goto err_out;

return 0;
@@ -259,4 +258,4 @@ err_out:
return -ENOMEM;
}
late_initcall(severities_debugfs_init);
-#endif
+#endif /* CONFIG_DEBUG_FS */
--
1.7.1

2011-06-08 01:56:25

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 05/12] x86, mce: replace MCE_SELF_VECTOR by irq_work

MCE handler uses a special vector for self IPI to invoke post-emergency
processing in an interrupt context, e.g. calling NMI-unsafe function,
wakeup loggers and scheduling time-consuming work for recovery etc.

However this mechanism is now generalized by the following commit:

> e360adbe29241a0194e10e20595360dd7b98a2b3
> Author: Peter Zijlstra <[email protected]>
> Date: Thu Oct 14 14:01:34 2010 +0800
>
> irq_work: Add generic hardirq context callbacks
>
> Provide a mechanism that allows running code in IRQ context. It is
> most useful for NMI code that needs to interact with the rest of the
> system -- like wakeup a task to drain buffers.
:

So change to use provided generic mechanism.

v2:
update patch description

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/include/asm/entry_arch.h | 4 ---
arch/x86/include/asm/hw_irq.h | 1 -
arch/x86/include/asm/irq_vectors.h | 5 ----
arch/x86/kernel/cpu/mcheck/mce.c | 47 ++++-------------------------------
arch/x86/kernel/entry_64.S | 5 ----
arch/x86/kernel/irqinit.c | 3 --
6 files changed, 6 insertions(+), 59 deletions(-)

diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 1cd6d26..0baa628 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -53,8 +53,4 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
#endif

-#ifdef CONFIG_X86_MCE
-BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR)
-#endif
-
#endif
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index bb9efe8..13f5504 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -34,7 +34,6 @@ extern void irq_work_interrupt(void);
extern void spurious_interrupt(void);
extern void thermal_interrupt(void);
extern void reschedule_interrupt(void);
-extern void mce_self_interrupt(void);

extern void invalidate_interrupt(void);
extern void invalidate_interrupt0(void);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 6e976ee..6665026 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -109,11 +109,6 @@

#define UV_BAU_MESSAGE 0xf5

-/*
- * Self IPI vector for machine checks
- */
-#define MCE_SELF_VECTOR 0xf4
-
/* Xen vector callback to receive events in a HVM domain */
#define XEN_HVM_EVTCHN_CALLBACK 0xf3

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ff1ae9b..e81d48b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -10,7 +10,6 @@
#include <linux/thread_info.h>
#include <linux/capability.h>
#include <linux/miscdevice.h>
-#include <linux/interrupt.h>
#include <linux/ratelimit.h>
#include <linux/kallsyms.h>
#include <linux/rcupdate.h>
@@ -38,12 +37,9 @@
#include <linux/mm.h>
#include <linux/debugfs.h>
#include <linux/edac_mce.h>
+#include <linux/irq_work.h>

#include <asm/processor.h>
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/idle.h>
-#include <asm/ipi.h>
#include <asm/mce.h>
#include <asm/msr.h>

@@ -461,22 +457,13 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
m->ip = mce_rdmsrl(rip_msr);
}

-#ifdef CONFIG_X86_LOCAL_APIC
-/*
- * Called after interrupts have been reenabled again
- * when a MCE happened during an interrupts off region
- * in the kernel.
- */
-asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs)
+DEFINE_PER_CPU(struct irq_work, mce_irq_work);
+
+static void mce_irq_work_cb(struct irq_work *entry)
{
- ack_APIC_irq();
- exit_idle();
- irq_enter();
mce_notify_irq();
mce_schedule_work();
- irq_exit();
}
-#endif

static void mce_report_event(struct pt_regs *regs)
{
@@ -492,29 +479,7 @@ static void mce_report_event(struct pt_regs *regs)
return;
}

-#ifdef CONFIG_X86_LOCAL_APIC
- /*
- * Without APIC do not notify. The event will be picked
- * up eventually.
- */
- if (!cpu_has_apic)
- return;
-
- /*
- * When interrupts are disabled we cannot use
- * kernel services safely. Trigger an self interrupt
- * through the APIC to instead do the notification
- * after interrupts are reenabled again.
- */
- apic->send_IPI_self(MCE_SELF_VECTOR);
-
- /*
- * Wait for idle afterwards again so that we don't leave the
- * APIC in a non idle state because the normal APIC writes
- * cannot exclude us.
- */
- apic_wait_icr_idle();
-#endif
+ irq_work_queue(&__get_cpu_var(mce_irq_work));
}

DEFINE_PER_CPU(unsigned, mce_poll_count);
@@ -1444,7 +1409,7 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_timer();
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
-
+ init_irq_work(&__get_cpu_var(mce_irq_work), &mce_irq_work_cb);
}

/*
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 8a445a0..9fa6546 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -991,11 +991,6 @@ apicinterrupt THRESHOLD_APIC_VECTOR \
apicinterrupt THERMAL_APIC_VECTOR \
thermal_interrupt smp_thermal_interrupt

-#ifdef CONFIG_X86_MCE
-apicinterrupt MCE_SELF_VECTOR \
- mce_self_interrupt smp_mce_self_interrupt
-#endif
-
#ifdef CONFIG_SMP
apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
call_function_single_interrupt smp_call_function_single_interrupt
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index f470e4e..f09d4bb 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -272,9 +272,6 @@ static void __init apic_intr_init(void)
#ifdef CONFIG_X86_MCE_THRESHOLD
alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
#endif
-#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_LOCAL_APIC)
- alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt);
-#endif

#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
/* self generated IPI for local APIC timer */
--
1.7.1

2011-06-08 01:57:18

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 06/12] x86, mce: replace MCM_ to MCI_MISC_

Follow other MCi_* register defines.
Plus define MCI_MISC_ADDR_LSB() and MCI_MISC_ADDR_MODE().

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/include/asm/mce.h | 17 +++++++++++------
arch/x86/kernel/cpu/mcheck/mce.c | 4 ++--
2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 021979a..bbb328f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -8,6 +8,7 @@
* Machine Check support for x86
*/

+/* MCG_CAP register defines */
#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
@@ -17,10 +18,12 @@
#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */

+/* MCG_STATUS register defines */
#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */

+/* MCi_STATUS register defines */
#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
@@ -31,12 +34,14 @@
#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
#define MCI_STATUS_AR (1ULL<<55) /* Action required */

-/* MISC register defines */
-#define MCM_ADDR_SEGOFF 0 /* segment offset */
-#define MCM_ADDR_LINEAR 1 /* linear address */
-#define MCM_ADDR_PHYS 2 /* physical address */
-#define MCM_ADDR_MEM 3 /* memory address */
-#define MCM_ADDR_GENERIC 7 /* generic */
+/* MCi_MISC register defines */
+#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
+#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7)
+#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */
+#define MCI_MISC_ADDR_LINEAR 1 /* linear address */
+#define MCI_MISC_ADDR_PHYS 2 /* physical address */
+#define MCI_MISC_ADDR_MEM 3 /* memory address */
+#define MCI_MISC_ADDR_GENERIC 7 /* generic */

/* CTL2 register defines */
#define MCI_CTL2_CMCI_EN (1ULL << 30)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e81d48b..e807508 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -844,9 +844,9 @@ static int mce_usable_address(struct mce *m)
{
if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
return 0;
- if ((m->misc & 0x3f) > PAGE_SHIFT)
+ if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
return 0;
- if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS)
+ if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
return 0;
return 1;
}
--
1.7.1

2011-06-08 01:58:17

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 07/12] x86, mce: introduce mce_gather_info()

This patch introduces new function mce_gather_info() to be called
at beginning of error handling, to gather minimum error information
from proper error registers (and saved registers).

As the result the mce_get_rip() is integrated and unnecessary
zero-ing is removed. This also fix an expected issue that RIP is
required to make some decision about error severity (for SRAR errors)
but it was retrieved later in the handler.

v2:
update comment (Thanks Tony)

Signed-off-by: Hidetoshi Seto <[email protected]>
CC: Tony Luck <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce.c | 50 ++++++++++++++++++++-----------------
1 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e807508..a182875 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -369,6 +369,31 @@ static void mce_wrmsrl(u32 msr, u64 v)
}

/*
+ * Collect all global (w.r.t. this processor) status about this machine
+ * check into our "mce" struct so that we can use it later to assess
+ * the severity of the problem as we read per-bank specific details.
+ */
+static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
+{
+ mce_setup(m);
+
+ m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+ if (regs) {
+ /*
+ * Get the address of the instruction at the time of
+ * the machine check error.
+ */
+ if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) {
+ m->ip = regs->ip;
+ m->cs = regs->cs;
+ }
+ /* Use accurate RIP reporting if available. */
+ if (rip_msr)
+ m->ip = mce_rdmsrl(rip_msr);
+ }
+}
+
+/*
* Simple lockless ring to communicate PFNs from the exception handler with the
* process context work function. This is vastly simplified because there's
* only a single reader and a single writer.
@@ -439,24 +464,6 @@ static void mce_schedule_work(void)
}
}

-/*
- * Get the address of the instruction at the time of the machine check
- * error.
- */
-static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
-{
-
- if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) {
- m->ip = regs->ip;
- m->cs = regs->cs;
- } else {
- m->ip = 0;
- m->cs = 0;
- }
- if (rip_msr)
- m->ip = mce_rdmsrl(rip_msr);
-}
-
DEFINE_PER_CPU(struct irq_work, mce_irq_work);

static void mce_irq_work_cb(struct irq_work *entry)
@@ -506,9 +513,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)

percpu_inc(mce_poll_count);

- mce_setup(&m);
+ mce_gather_info(&m, NULL);

- m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
for (i = 0; i < banks; i++) {
if (!mce_banks[i].ctl || !test_bit(i, *b))
continue;
@@ -907,9 +913,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
if (!banks)
goto out;

- mce_setup(&m);
+ mce_gather_info(&m, regs);

- m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
final = &__get_cpu_var(mces_seen);
*final = m;

@@ -993,7 +998,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
mce_ring_add(m.addr >> PAGE_SHIFT);

- mce_get_rip(&m, regs);
mce_log(&m);

if (severity > worst) {
--
1.7.1

2011-06-08 01:58:57

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 08/12] x86, mce: check the result of ancient_init()

Because "ancient cpu" like p5 and winchip don't have X86_FEATURE_MCA
(I suppose so), mcheck_cpu_init() on such cpu will return at check
of mce_available() after __mcheck_cpu_ancient_init().

It is hard to know this implicit behavior without knowing cpus well.
So make it clear that we leave mcheck_cpu_init() when the cpu is
initialized in __mcheck_cpu_ancient_init().

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce.c | 12 +++++++++---
1 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a182875..2cc98d5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1332,18 +1332,23 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
return 0;
}

-static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
+static int __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
{
if (c->x86 != 5)
- return;
+ return 0;
+
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
intel_p5_mcheck_init(c);
+ return 1;
break;
case X86_VENDOR_CENTAUR:
winchip_mcheck_init(c);
+ return 1;
break;
}
+
+ return 0;
}

static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
@@ -1397,7 +1402,8 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
if (mce_disabled)
return;

- __mcheck_cpu_ancient_init(c);
+ if (__mcheck_cpu_ancient_init(c))
+ return;

if (!mce_available(c))
return;
--
1.7.1

2011-06-08 01:59:44

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 09/12] x86, mce: cleanup mce_create/remove_device

Use temporary local sysdev to make code simple.
No change in logic.

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce.c | 27 ++++++++++++++-------------
1 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2cc98d5..f3f648c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1925,28 +1925,28 @@ static cpumask_var_t mce_dev_initialized;
/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
static __cpuinit int mce_create_device(unsigned int cpu)
{
+ struct sys_device *sysdev = &per_cpu(mce_dev, cpu);
int err;
int i, j;

if (!mce_available(&boot_cpu_data))
return -EIO;

- memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject));
- per_cpu(mce_dev, cpu).id = cpu;
- per_cpu(mce_dev, cpu).cls = &mce_sysclass;
+ memset(&sysdev->kobj, 0, sizeof(struct kobject));
+ sysdev->id = cpu;
+ sysdev->cls = &mce_sysclass;

- err = sysdev_register(&per_cpu(mce_dev, cpu));
+ err = sysdev_register(sysdev);
if (err)
return err;

for (i = 0; mce_attrs[i]; i++) {
- err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
+ err = sysdev_create_file(sysdev, mce_attrs[i]);
if (err)
goto error;
}
for (j = 0; j < banks; j++) {
- err = sysdev_create_file(&per_cpu(mce_dev, cpu),
- &mce_banks[j].attr);
+ err = sysdev_create_file(sysdev, &mce_banks[j].attr);
if (err)
goto error2;
}
@@ -1955,30 +1955,31 @@ static __cpuinit int mce_create_device(unsigned int cpu)
return 0;
error2:
while (--j >= 0)
- sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr);
+ sysdev_remove_file(sysdev, &mce_banks[j].attr);
error:
while (--i >= 0)
- sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
+ sysdev_remove_file(sysdev, mce_attrs[i]);

- sysdev_unregister(&per_cpu(mce_dev, cpu));
+ sysdev_unregister(sysdev);

return err;
}

static __cpuinit void mce_remove_device(unsigned int cpu)
{
+ struct sys_device *sysdev = &per_cpu(mce_dev, cpu);
int i;

if (!cpumask_test_cpu(cpu, mce_dev_initialized))
return;

for (i = 0; mce_attrs[i]; i++)
- sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
+ sysdev_remove_file(sysdev, mce_attrs[i]);

for (i = 0; i < banks; i++)
- sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
+ sysdev_remove_file(sysdev, &mce_banks[i].attr);

- sysdev_unregister(&per_cpu(mce_dev, cpu));
+ sysdev_unregister(sysdev);
cpumask_clear_cpu(cpu, mce_dev_initialized);
}

--
1.7.1

2011-06-08 02:00:31

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 10/12] x86, mce: cleanup mce_read

Use temporary local m to make code simple.
No change in logic.

v2:
use m for struct mce

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce.c | 23 +++++++++++------------
1 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index f3f648c..54360e8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1537,19 +1537,18 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
do {
for (i = prev; i < next; i++) {
unsigned long start = jiffies;
+ struct mce *m = &mcelog.entry[i];

- while (!mcelog.entry[i].finished) {
+ while (!m->finished) {
if (time_after_eq(jiffies, start + 2)) {
- memset(mcelog.entry + i, 0,
- sizeof(struct mce));
+ memset(m, 0, sizeof(*m));
goto timeout;
}
cpu_relax();
}
smp_rmb();
- err |= copy_to_user(buf, mcelog.entry + i,
- sizeof(struct mce));
- buf += sizeof(struct mce);
+ err |= copy_to_user(buf, m, sizeof(*m));
+ buf += sizeof(*m);
timeout:
;
}
@@ -1569,13 +1568,13 @@ timeout:
on_each_cpu(collect_tscs, cpu_tsc, 1);

for (i = next; i < MCE_LOG_LEN; i++) {
- if (mcelog.entry[i].finished &&
- mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
- err |= copy_to_user(buf, mcelog.entry+i,
- sizeof(struct mce));
+ struct mce *m = &mcelog.entry[i];
+
+ if (m->finished && m->tsc < cpu_tsc[m->cpu]) {
+ err |= copy_to_user(buf, m, sizeof(*m));
smp_rmb();
- buf += sizeof(struct mce);
- memset(&mcelog.entry[i], 0, sizeof(struct mce));
+ buf += sizeof(*m);
+ memset(m, 0, sizeof(*m));
}
}

--
1.7.1

2011-06-08 02:01:10

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 11/12] x86, mce: use prefix mce_chrdev_ to group functions

There are many functions named mce_*, so use new prefix for
subset of functions that related to character device /dev/mcelog.

This change doesn't impact mce-inject module, because the exported
symbol mce_chrdev_ops already has the prefix so not changed.

Before: After:
mce_wait mce_chrdev_wait
mce_state_lock mce_chrdev_state_lock
open_count mce_chrdev_open_count
open_exclu mce_chrdev_open_exclu
mce_open mce_chrdev_open
mce_release mce_chrdev_release
mce_read_mutex mce_chrdev_read_mutex
mce_read mce_chrdev_read
mce_poll mce_chrdev_poll
mce_ioctl mce_chrdev_ioctl
mce_log_device mce_chrdev_device

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce.c | 77 ++++++++++++++++++++------------------
1 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 54360e8..9a73516 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -45,12 +45,12 @@

#include "mce-internal.h"

-static DEFINE_MUTEX(mce_read_mutex);
+static DEFINE_MUTEX(mce_chrdev_read_mutex);

#define rcu_dereference_check_mce(p) \
rcu_dereference_index_check((p), \
rcu_read_lock_sched_held() || \
- lockdep_is_held(&mce_read_mutex))
+ lockdep_is_held(&mce_chrdev_read_mutex))

#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
@@ -90,7 +90,8 @@ static unsigned long mce_need_notify;
static char mce_helper[128];
static char *mce_helper_argv[2] = { mce_helper, NULL };

-static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
+static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
+
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;

@@ -1159,7 +1160,8 @@ int mce_notify_irq(void)
clear_thread_flag(TIF_MCE_NOTIFY);

if (test_and_clear_bit(0, &mce_need_notify)) {
- wake_up_interruptible(&mce_wait);
+ /* wake processes polling /dev/mcelog */
+ wake_up_interruptible(&mce_chrdev_wait);

/*
* There is no risk of missing notifications because
@@ -1423,40 +1425,41 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
}

/*
- * Character device to read and clear the MCE log.
+ * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log.
*/

-static DEFINE_SPINLOCK(mce_state_lock);
-static int open_count; /* #times opened */
-static int open_exclu; /* already open exclusive? */
+static DEFINE_SPINLOCK(mce_chrdev_state_lock);
+static int mce_chrdev_open_count; /* #times opened */
+static int mce_chrdev_open_exclu; /* already open exclusive? */

-static int mce_open(struct inode *inode, struct file *file)
+static int mce_chrdev_open(struct inode *inode, struct file *file)
{
- spin_lock(&mce_state_lock);
+ spin_lock(&mce_chrdev_state_lock);

- if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
- spin_unlock(&mce_state_lock);
+ if (mce_chrdev_open_exclu ||
+ (mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
+ spin_unlock(&mce_chrdev_state_lock);

return -EBUSY;
}

if (file->f_flags & O_EXCL)
- open_exclu = 1;
- open_count++;
+ mce_chrdev_open_exclu = 1;
+ mce_chrdev_open_count++;

- spin_unlock(&mce_state_lock);
+ spin_unlock(&mce_chrdev_state_lock);

return nonseekable_open(inode, file);
}

-static int mce_release(struct inode *inode, struct file *file)
+static int mce_chrdev_release(struct inode *inode, struct file *file)
{
- spin_lock(&mce_state_lock);
+ spin_lock(&mce_chrdev_state_lock);

- open_count--;
- open_exclu = 0;
+ mce_chrdev_open_count--;
+ mce_chrdev_open_exclu = 0;

- spin_unlock(&mce_state_lock);
+ spin_unlock(&mce_chrdev_state_lock);

return 0;
}
@@ -1505,8 +1508,8 @@ static int __mce_read_apei(char __user **ubuf, size_t usize)
return 0;
}

-static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
- loff_t *off)
+static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
+ size_t usize, loff_t *off)
{
char __user *buf = ubuf;
unsigned long *cpu_tsc;
@@ -1517,7 +1520,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
if (!cpu_tsc)
return -ENOMEM;

- mutex_lock(&mce_read_mutex);
+ mutex_lock(&mce_chrdev_read_mutex);

if (!mce_apei_read_done) {
err = __mce_read_apei(&buf, usize);
@@ -1582,15 +1585,15 @@ timeout:
err = -EFAULT;

out:
- mutex_unlock(&mce_read_mutex);
+ mutex_unlock(&mce_chrdev_read_mutex);
kfree(cpu_tsc);

return err ? err : buf - ubuf;
}

-static unsigned int mce_poll(struct file *file, poll_table *wait)
+static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait)
{
- poll_wait(file, &mce_wait, wait);
+ poll_wait(file, &mce_chrdev_wait, wait);
if (rcu_access_index(mcelog.next))
return POLLIN | POLLRDNORM;
if (!mce_apei_read_done && apei_check_mce())
@@ -1598,7 +1601,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait)
return 0;
}

-static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
+ unsigned long arg)
{
int __user *p = (int __user *)arg;

@@ -1626,16 +1630,16 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)

/* Modified in mce-inject.c, so not static or const */
struct file_operations mce_chrdev_ops = {
- .open = mce_open,
- .release = mce_release,
- .read = mce_read,
- .poll = mce_poll,
- .unlocked_ioctl = mce_ioctl,
- .llseek = no_llseek,
+ .open = mce_chrdev_open,
+ .release = mce_chrdev_release,
+ .read = mce_chrdev_read,
+ .poll = mce_chrdev_poll,
+ .unlocked_ioctl = mce_chrdev_ioctl,
+ .llseek = no_llseek,
};
EXPORT_SYMBOL_GPL(mce_chrdev_ops);

-static struct miscdevice mce_log_device = {
+static struct miscdevice mce_chrdev_device = {
MISC_MCELOG_MINOR,
"mcelog",
&mce_chrdev_ops,
@@ -2107,11 +2111,12 @@ static __init int mcheck_init_device(void)

register_syscore_ops(&mce_syscore_ops);
register_hotcpu_notifier(&mce_cpu_notifier);
- misc_register(&mce_log_device);
+
+ /* register character device /dev/mcelog */
+ misc_register(&mce_chrdev_device);

return err;
}
-
device_initcall(mcheck_init_device);

/*
--
1.7.1

2011-06-08 02:02:31

by Hidetoshi Seto

[permalink] [raw]
Subject: [PATCH 12/12] x86, mce: use prefix mce_sysdev_ to group functions

There are many functions named mce_*, so use new prefix for
subset of functions to make it clear that the function is
related to sysfs support.

And since f3c6ea1b06c71b43f751b36bd99345369fe911af introduces
syscore_ops, use prefix mce_syscore for some functions related
to power management, that were in sysdev_class.

Before: After:
mce_device mce_sysdev
mce_sysclass mce_sysdev_class
mce_attrs mce_sysdev_attrs
mce_dev_initialized mce_sysdev_initialized
mce_create_device mce_sysdev_create
mce_remove_device mce_sysdev_remove

mce_suspend mce_syscore_suspend
mce_shutdown mce_syscore_shutdown
mce_resume mce_syscore_resume

v2:
use prefix mce_syscore for some PM funcs.

Signed-off-by: Hidetoshi Seto <[email protected]>
---
arch/x86/include/asm/mce.h | 2 +-
arch/x86/kernel/cpu/mcheck/mce.c | 62 ++++++++++++++++++----------------
arch/x86/kernel/cpu/mcheck/mce_amd.c | 10 +++---
3 files changed, 39 insertions(+), 35 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index bbb328f..716b48a 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -149,7 +149,7 @@ static inline void enable_p5_mce(void) {}

void mce_setup(struct mce *m);
void mce_log(struct mce *m);
-DECLARE_PER_CPU(struct sys_device, mce_dev);
+DECLARE_PER_CPU(struct sys_device, mce_sysdev);

/*
* Maximum banks number.
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9a73516..08363b0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1697,7 +1697,7 @@ int __init mcheck_init(void)
}

/*
- * Sysfs support
+ * mce_syscore: PM support
*/

/*
@@ -1717,12 +1717,12 @@ static int mce_disable_error_reporting(void)
return 0;
}

-static int mce_suspend(void)
+static int mce_syscore_suspend(void)
{
return mce_disable_error_reporting();
}

-static void mce_shutdown(void)
+static void mce_syscore_shutdown(void)
{
mce_disable_error_reporting();
}
@@ -1732,18 +1732,22 @@ static void mce_shutdown(void)
* Only one CPU is active at this time, the others get re-added later using
* CPU hotplug:
*/
-static void mce_resume(void)
+static void mce_syscore_resume(void)
{
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info));
}

static struct syscore_ops mce_syscore_ops = {
- .suspend = mce_suspend,
- .shutdown = mce_shutdown,
- .resume = mce_resume,
+ .suspend = mce_syscore_suspend,
+ .shutdown = mce_syscore_shutdown,
+ .resume = mce_syscore_resume,
};

+/*
+ * mce_sysdev: Sysfs support
+ */
+
static void mce_cpu_restart(void *data)
{
del_timer_sync(&__get_cpu_var(mce_timer));
@@ -1779,11 +1783,11 @@ static void mce_enable_ce(void *all)
__mcheck_cpu_init_timer();
}

-static struct sysdev_class mce_sysclass = {
+static struct sysdev_class mce_sysdev_class = {
.name = "machinecheck",
};

-DEFINE_PER_CPU(struct sys_device, mce_dev);
+DEFINE_PER_CPU(struct sys_device, mce_sysdev);

__cpuinitdata
void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
@@ -1912,7 +1916,7 @@ static struct sysdev_ext_attribute attr_cmci_disabled = {
&mce_cmci_disabled
};

-static struct sysdev_attribute *mce_attrs[] = {
+static struct sysdev_attribute *mce_sysdev_attrs[] = {
&attr_tolerant.attr,
&attr_check_interval.attr,
&attr_trigger,
@@ -1923,12 +1927,12 @@ static struct sysdev_attribute *mce_attrs[] = {
NULL
};

-static cpumask_var_t mce_dev_initialized;
+static cpumask_var_t mce_sysdev_initialized;

/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
-static __cpuinit int mce_create_device(unsigned int cpu)
+static __cpuinit int mce_sysdev_create(unsigned int cpu)
{
- struct sys_device *sysdev = &per_cpu(mce_dev, cpu);
+ struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu);
int err;
int i, j;

@@ -1937,14 +1941,14 @@ static __cpuinit int mce_create_device(unsigned int cpu)

memset(&sysdev->kobj, 0, sizeof(struct kobject));
sysdev->id = cpu;
- sysdev->cls = &mce_sysclass;
+ sysdev->cls = &mce_sysdev_class;

err = sysdev_register(sysdev);
if (err)
return err;

- for (i = 0; mce_attrs[i]; i++) {
- err = sysdev_create_file(sysdev, mce_attrs[i]);
+ for (i = 0; mce_sysdev_attrs[i]; i++) {
+ err = sysdev_create_file(sysdev, mce_sysdev_attrs[i]);
if (err)
goto error;
}
@@ -1953,7 +1957,7 @@ static __cpuinit int mce_create_device(unsigned int cpu)
if (err)
goto error2;
}
- cpumask_set_cpu(cpu, mce_dev_initialized);
+ cpumask_set_cpu(cpu, mce_sysdev_initialized);

return 0;
error2:
@@ -1961,29 +1965,29 @@ error2:
sysdev_remove_file(sysdev, &mce_banks[j].attr);
error:
while (--i >= 0)
- sysdev_remove_file(sysdev, mce_attrs[i]);
+ sysdev_remove_file(sysdev, mce_sysdev_attrs[i]);

sysdev_unregister(sysdev);

return err;
}

-static __cpuinit void mce_remove_device(unsigned int cpu)
+static __cpuinit void mce_sysdev_remove(unsigned int cpu)
{
- struct sys_device *sysdev = &per_cpu(mce_dev, cpu);
+ struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu);
int i;

- if (!cpumask_test_cpu(cpu, mce_dev_initialized))
+ if (!cpumask_test_cpu(cpu, mce_sysdev_initialized))
return;

- for (i = 0; mce_attrs[i]; i++)
- sysdev_remove_file(sysdev, mce_attrs[i]);
+ for (i = 0; mce_sysdev_attrs[i]; i++)
+ sysdev_remove_file(sysdev, mce_sysdev_attrs[i]);

for (i = 0; i < banks; i++)
sysdev_remove_file(sysdev, &mce_banks[i].attr);

sysdev_unregister(sysdev);
- cpumask_clear_cpu(cpu, mce_dev_initialized);
+ cpumask_clear_cpu(cpu, mce_sysdev_initialized);
}

/* Make sure there are no machine checks on offlined CPUs. */
@@ -2033,7 +2037,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
- mce_create_device(cpu);
+ mce_sysdev_create(cpu);
if (threshold_cpu_callback)
threshold_cpu_callback(action, cpu);
break;
@@ -2041,7 +2045,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_DEAD_FROZEN:
if (threshold_cpu_callback)
threshold_cpu_callback(action, cpu);
- mce_remove_device(cpu);
+ mce_sysdev_remove(cpu);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
@@ -2095,16 +2099,16 @@ static __init int mcheck_init_device(void)
if (!mce_available(&boot_cpu_data))
return -EIO;

- zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
+ zalloc_cpumask_var(&mce_sysdev_initialized, GFP_KERNEL);

mce_init_banks();

- err = sysdev_class_register(&mce_sysclass);
+ err = sysdev_class_register(&mce_sysdev_class);
if (err)
return err;

for_each_online_cpu(i) {
- err = mce_create_device(i);
+ err = mce_sysdev_create(i);
if (err)
return err;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index bb0adad..f547421 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -548,7 +548,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (!b)
goto out;

- err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj,
+ err = sysfs_create_link(&per_cpu(mce_sysdev, cpu).kobj,
b->kobj, name);
if (err)
goto out;
@@ -571,7 +571,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
goto out;
}

- b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj);
+ b->kobj = kobject_create_and_add(name, &per_cpu(mce_sysdev, cpu).kobj);
if (!b->kobj)
goto out_free;

@@ -591,7 +591,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (i == cpu)
continue;

- err = sysfs_create_link(&per_cpu(mce_dev, i).kobj,
+ err = sysfs_create_link(&per_cpu(mce_sysdev, i).kobj,
b->kobj, name);
if (err)
goto out;
@@ -669,7 +669,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
#ifdef CONFIG_SMP
/* sibling symlink */
if (shared_bank[bank] && b->blocks->cpu != cpu) {
- sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name);
+ sysfs_remove_link(&per_cpu(mce_sysdev, cpu).kobj, name);
per_cpu(threshold_banks, cpu)[bank] = NULL;

return;
@@ -681,7 +681,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
if (i == cpu)
continue;

- sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name);
+ sysfs_remove_link(&per_cpu(mce_sysdev, i).kobj, name);
per_cpu(threshold_banks, i)[bank] = NULL;
}

--
1.7.1

2011-06-08 10:20:14

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 00/12] x86: minor cleanups/fixes for MCE codes (v2)

On Tue, Jun 07, 2011 at 09:48:57PM -0400, Hidetoshi Seto wrote:
> This is a set of minor changes for MCE codes.
>
> Though I know there are many people expecting new RAS architecture
> such as one that integrates RAS/EDAC with MCE, I hope these small
> steps will be a help for future works.
>
> v2:
> reflect comments, update patch 3,4,5,7,10,12.
> rebased on 3.0-rc2.
>
> Thanks,
> H.Seto
>
> arch/x86/include/asm/entry_arch.h | 4 -
> arch/x86/include/asm/hw_irq.h | 1 -
> arch/x86/include/asm/irq_vectors.h | 5 -
> arch/x86/include/asm/mce.h | 19 ++-
> arch/x86/kernel/cpu/mcheck/mce-severity.c | 152 ++++++++++------
> arch/x86/kernel/cpu/mcheck/mce.c | 288 ++++++++++++++---------------
> arch/x86/kernel/cpu/mcheck/mce_amd.c | 10 +-
> arch/x86/kernel/entry_64.S | 5 -
> arch/x86/kernel/irqinit.c | 3 -
> 9 files changed, 251 insertions(+), 236 deletions(-)
>
> Hidetoshi Seto (11):
> mce-severity: cleanup severity table, prep
> mce-severity: cleanup severity table
> mce-severity: trivial cleanups
> x86, mce: replace MCE_SELF_VECTOR by irq_work
> x86, mce: replace MCM_ to MCI_MISC_
> x86, mce: introduce mce_gather_info()
> x86, mce: check the result of ancient_init()
> x86, mce: cleanup mce_create/remove_device
> x86, mce: cleanup mce_read
> x86, mce: use prefix mce_chrdev_ to group functions
> x86, mce: use prefix mce_sysdev_ to group functions
>
> Tony Luck (1):
> mce-severity: fixes for mce severity table

Well, I can't say I'm crazy about the severities mechanism but I guess
this is fine for a cleanup patchset. Btw, the problem of making the
severities readable will come up again when we implement this into perf
filters and the complexity will be moved into the filter function...

Other than that,

Reviewed-by: Borislav Petkov <[email protected]>

Thanks.

--
Regards/Gruss,
Boris.

Advanced Micro Devices GmbH
Einsteinring 24, 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

2011-06-10 08:25:10

by Hidetoshi Seto

[permalink] [raw]
Subject: Re: [PATCH 00/12] x86: minor cleanups/fixes for MCE codes (v2)

(2011/06/08 19:19), Borislav Petkov wrote:
> Well, I can't say I'm crazy about the severities mechanism but I guess
> this is fine for a cleanup patchset. Btw, the problem of making the
> severities readable will come up again when we implement this into perf
> filters and the complexity will be moved into the filter function...
>
> Other than that,
>
> Reviewed-by: Borislav Petkov <[email protected]>

Thank you again for your review!
I hope we do much better with the perf filter version.

Thanks,
H.Seto

2011-06-16 11:16:22

by Luck, Tony

[permalink] [raw]
Subject: [tip:ras/core] x86, mce, severity: Fix two severities table signatures

Commit-ID: 880a317abccf60b989951bf1515964cd73245970
Gitweb: http://git.kernel.org/tip/880a317abccf60b989951bf1515964cd73245970
Author: Tony Luck <[email protected]>
AuthorDate: Tue, 7 Jun 2011 21:51:56 -0400
Committer: Borislav Petkov <[email protected]>
CommitDate: Thu, 16 Jun 2011 11:37:57 +0200

x86, mce, severity: Fix two severities table signatures

The "Spurious not enabled" entry is redundant: the "Not enabled" entry
earlier in the table will cover this case.

The "Action required; unknown MCACOD" entry shouldn't specify MCACOD in
the .mask field. Current code will only match for mcacod==0 rather than
all AR=1 entries.

Signed-off-by: Tony Luck <[email protected]>
Signed-off-by: Hidetoshi Seto <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce-severity.c | 4 +---
1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 1e8d66c..352d16a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -69,8 +69,6 @@ static struct severity {
MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP",
KERNEL),
BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER),
- MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME,
- "Spurious not enabled", SER),

/* ignore OVER for UCNA */
MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP,
@@ -82,7 +80,7 @@ static struct severity {
/* AR add known MCACODs here */
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC,
"Action required with lost events", SER),
- MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC,
+ MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR, PANIC,
"Action required; unknown MCACOD", SER),

/* known AO MCACODs: */

2011-06-16 11:16:42

by Hidetoshi Seto

[permalink] [raw]
Subject: [tip:ras/core] x86, mce, severity: Make formatting a bit more readable

Commit-ID: 901d7691d3238ad68c80a567b88b1e5d614137fb
Gitweb: http://git.kernel.org/tip/901d7691d3238ad68c80a567b88b1e5d614137fb
Author: Hidetoshi Seto <[email protected]>
AuthorDate: Wed, 8 Jun 2011 10:52:43 +0900
Committer: Borislav Petkov <[email protected]>
CommitDate: Thu, 16 Jun 2011 11:40:21 +0200

x86, mce, severity: Make formatting a bit more readable

The table looks very complicated and hard to read for people other than
skilled developers. So let's clean it up a bit. At first, change format
to ease reading elements in the table.

Signed-off-by: Hidetoshi Seto <[email protected]>
Acked-by: Tony Luck <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce-severity.c | 116 +++++++++++++++++++++-------
1 files changed, 87 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 352d16a..eaf5a43 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -58,44 +58,102 @@ static struct severity {
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
#define MCACOD 0xffff

- BITCLR(MCI_STATUS_VAL, NO, "Invalid"),
- BITCLR(MCI_STATUS_EN, NO, "Not enabled"),
- BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"),
+ BITCLR(
+ MCI_STATUS_VAL,
+ NO, "Invalid"
+ ),
+ BITCLR(
+ MCI_STATUS_EN,
+ NO, "Not enabled"
+ ),
+ BITSET(
+ MCI_STATUS_PCC,
+ PANIC, "Processor context corrupt"
+ ),
/* When MCIP is not set something is very confused */
- MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"),
+ MCGMASK(
+ MCG_STATUS_MCIP, 0,
+ PANIC, "MCIP not set in MCA handler"
+ ),
/* Neither return not error IP -- no chance to recover -> PANIC */
- MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC,
- "Neither restart nor error IP"),
- MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP",
- KERNEL),
- BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER),
+ MCGMASK(
+ MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0,
+ PANIC, "Neither restart nor error IP"
+ ),
+ MCGMASK(
+ MCG_STATUS_RIPV, 0,
+ PANIC, "In kernel and no restart IP",
+ KERNEL
+ ),
+ BITCLR(
+ MCI_STATUS_UC,
+ KEEP, "Corrected error",
+ NOSER
+ ),

/* ignore OVER for UCNA */
- MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP,
- "Uncorrected no action required", SER),
- MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC,
- "Illegal combination (UCNA with AR=1)", SER),
- MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER),
+ MASK(
+ MCI_UC_SAR, MCI_STATUS_UC,
+ KEEP, "Uncorrected no action required",
+ SER
+ ),
+ MASK(
+ MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR,
+ PANIC, "Illegal combination (UCNA with AR=1)",
+ SER
+ ),
+ MASK(
+ MCI_STATUS_S, 0,
+ KEEP, "Non signalled machine check",
+ SER
+ ),

/* AR add known MCACODs here */
- MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC,
- "Action required with lost events", SER),
- MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR, PANIC,
- "Action required; unknown MCACOD", SER),
+ MASK(
+ MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR,
+ PANIC, "Action required with lost events",
+ SER
+ ),
+ MASK(
+ MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR,
+ PANIC, "Action required; unknown MCACOD",
+ SER
+ ),

/* known AO MCACODs: */
- MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO,
- "Action optional: memory scrubbing error", SER),
- MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO,
- "Action optional: last level cache writeback error", SER),
-
- MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME,
- "Action optional unknown MCACOD", SER),
- MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME,
- "Action optional with lost events", SER),
- BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"),
- BITSET(MCI_STATUS_UC, UC, "Uncorrected"),
- BITSET(0, SOME, "No match") /* always matches. keep at end */
+ MASK(
+ MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0,
+ AO, "Action optional: memory scrubbing error",
+ SER
+ ),
+ MASK(
+ MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a,
+ AO, "Action optional: last level cache writeback error",
+ SER
+ ),
+
+ MASK(
+ MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S,
+ SOME, "Action optional unknown MCACOD",
+ SER
+ ),
+ MASK(
+ MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER,
+ SOME, "Action optional with lost events",
+ SER
+ ),
+ BITSET(
+ MCI_STATUS_UC|MCI_STATUS_OVER,
+ PANIC, "Overflowed uncorrected"
+ ),
+ BITSET(
+ MCI_STATUS_UC,
+ UC, "Uncorrected"
+ ),
+ BITSET(
+ 0,
+ SOME, "No match"
+ ) /* always matches. keep at end */
};

/*

2011-06-16 11:17:07

by Hidetoshi Seto

[permalink] [raw]
Subject: [tip:ras/core] x86, mce, severity: Cleanup severity table

Commit-ID: a17957cdec69acb9e26319618b95a810a936e637
Gitweb: http://git.kernel.org/tip/a17957cdec69acb9e26319618b95a810a936e637
Author: Hidetoshi Seto <[email protected]>
AuthorDate: Wed, 8 Jun 2011 10:53:35 +0900
Committer: Borislav Petkov <[email protected]>
CommitDate: Thu, 16 Jun 2011 12:09:42 +0200

x86, mce, severity: Cleanup severity table

The current format of an item in this table is:
condition(param, ..., level, message [, condition2 ...])

So we have to check both an item's head and tail to find the conditions
which match the item.

Format them in a more straight forward manner:
item(level, message, condition [, condition2 ...])

Signed-off-by: Hidetoshi Seto <[email protected]>
Acked-by: Tony Luck <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce-severity.c | 127 +++++++++++++----------------
1 files changed, 58 insertions(+), 69 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index eaf5a43..27e778e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -43,116 +43,105 @@ static struct severity {
unsigned char covered;
char *msg;
} severities[] = {
-#define KERNEL .context = IN_KERNEL
-#define USER .context = IN_USER
-#define SER .ser = SER_REQUIRED
-#define NOSER .ser = NO_SER
-#define SEV(s) .sev = MCE_ ## s ## _SEVERITY
-#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r }
-#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r }
-#define MCGMASK(x, res, s, m, r...) \
- { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r }
-#define MASK(x, y, s, m, r...) \
- { .mask = x, .result = y, SEV(s), .msg = m, ## r }
+#define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
+#define KERNEL .context = IN_KERNEL
+#define USER .context = IN_USER
+#define SER .ser = SER_REQUIRED
+#define NOSER .ser = NO_SER
+#define BITCLR(x) .mask = x, .result = 0
+#define BITSET(x) .mask = x, .result = x
+#define MCGMASK(x, y) .mcgmask = x, .mcgres = y
+#define MASK(x, y) .mask = x, .result = y
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
#define MCACOD 0xffff

- BITCLR(
- MCI_STATUS_VAL,
- NO, "Invalid"
+ MCESEV(
+ NO, "Invalid",
+ BITCLR(MCI_STATUS_VAL)
),
- BITCLR(
- MCI_STATUS_EN,
- NO, "Not enabled"
+ MCESEV(
+ NO, "Not enabled",
+ BITCLR(MCI_STATUS_EN)
),
- BITSET(
- MCI_STATUS_PCC,
- PANIC, "Processor context corrupt"
+ MCESEV(
+ PANIC, "Processor context corrupt",
+ BITSET(MCI_STATUS_PCC)
),
/* When MCIP is not set something is very confused */
- MCGMASK(
- MCG_STATUS_MCIP, 0,
- PANIC, "MCIP not set in MCA handler"
+ MCESEV(
+ PANIC, "MCIP not set in MCA handler",
+ MCGMASK(MCG_STATUS_MCIP, 0)
),
/* Neither return not error IP -- no chance to recover -> PANIC */
- MCGMASK(
- MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0,
- PANIC, "Neither restart nor error IP"
+ MCESEV(
+ PANIC, "Neither restart nor error IP",
+ MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
),
- MCGMASK(
- MCG_STATUS_RIPV, 0,
+ MCESEV(
PANIC, "In kernel and no restart IP",
- KERNEL
+ KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
),
- BITCLR(
- MCI_STATUS_UC,
+ MCESEV(
KEEP, "Corrected error",
- NOSER
+ NOSER, BITCLR(MCI_STATUS_UC)
),

/* ignore OVER for UCNA */
- MASK(
- MCI_UC_SAR, MCI_STATUS_UC,
+ MCESEV(
KEEP, "Uncorrected no action required",
- SER
+ SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
),
- MASK(
- MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR,
+ MCESEV(
PANIC, "Illegal combination (UCNA with AR=1)",
- SER
+ SER,
+ MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR)
),
- MASK(
- MCI_STATUS_S, 0,
+ MCESEV(
KEEP, "Non signalled machine check",
- SER
+ SER, MASK(MCI_STATUS_S, 0)
),

/* AR add known MCACODs here */
- MASK(
- MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR,
+ MCESEV(
PANIC, "Action required with lost events",
- SER
+ SER,
+ MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR)
),
- MASK(
- MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR,
+ MCESEV(
PANIC, "Action required; unknown MCACOD",
- SER
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
),

/* known AO MCACODs: */
- MASK(
- MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0,
+ MCESEV(
AO, "Action optional: memory scrubbing error",
- SER
+ SER, MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0)
),
- MASK(
- MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a,
+ MCESEV(
AO, "Action optional: last level cache writeback error",
- SER
+ SER, MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a)
),
-
- MASK(
- MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S,
+ MCESEV(
SOME, "Action optional unknown MCACOD",
- SER
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
),
- MASK(
- MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER,
+ MCESEV(
SOME, "Action optional with lost events",
- SER
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER)
),
- BITSET(
- MCI_STATUS_UC|MCI_STATUS_OVER,
- PANIC, "Overflowed uncorrected"
+
+ MCESEV(
+ PANIC, "Overflowed uncorrected",
+ BITSET(MCI_STATUS_UC|MCI_STATUS_OVER)
),
- BITSET(
- MCI_STATUS_UC,
- UC, "Uncorrected"
+ MCESEV(
+ UC, "Uncorrected",
+ BITSET(MCI_STATUS_UC)
),
- BITSET(
- 0,
- SOME, "No match"
+ MCESEV(
+ SOME, "No match",
+ BITSET(0)
) /* always matches. keep at end */
};

2011-06-16 11:17:32

by Hidetoshi Seto

[permalink] [raw]
Subject: [tip:ras/core] x86, mce, severity: Clean up trivial coding style problems

Commit-ID: 7639bfc753f70321dbea83852e1cc47a45b681d7
Gitweb: http://git.kernel.org/tip/7639bfc753f70321dbea83852e1cc47a45b681d7
Author: Hidetoshi Seto <[email protected]>
AuthorDate: Wed, 8 Jun 2011 10:55:05 +0900
Committer: Borislav Petkov <[email protected]>
CommitDate: Thu, 16 Jun 2011 12:10:07 +0200

x86, mce, severity: Clean up trivial coding style problems

More specifically:

- sort bits in the macros
- use BITCLR/BITSET
- coordinate message pattern
- use m for struct mce
- cleanup for severities_debugfs_init()

No functional change.

Signed-off-by: Hidetoshi Seto <[email protected]>
Acked-by: Tony Luck <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce-severity.c | 39 ++++++++++++++---------------
1 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 27e778e..7395d5f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -99,41 +99,40 @@ static struct severity {
),
MCESEV(
KEEP, "Non signalled machine check",
- SER, MASK(MCI_STATUS_S, 0)
+ SER, BITCLR(MCI_STATUS_S)
),

/* AR add known MCACODs here */
MCESEV(
PANIC, "Action required with lost events",
- SER,
- MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR)
+ SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
),
MCESEV(
- PANIC, "Action required; unknown MCACOD",
+ PANIC, "Action required: unknown MCACOD",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
),

/* known AO MCACODs: */
MCESEV(
AO, "Action optional: memory scrubbing error",
- SER, MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0)
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|0xfff0, MCI_UC_S|0x00c0)
),
MCESEV(
AO, "Action optional: last level cache writeback error",
- SER, MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a)
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|0x017a)
),
MCESEV(
- SOME, "Action optional unknown MCACOD",
+ SOME, "Action optional: unknown MCACOD",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S)
),
MCESEV(
SOME, "Action optional with lost events",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER)
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_S)
),

MCESEV(
PANIC, "Overflowed uncorrected",
- BITSET(MCI_STATUS_UC|MCI_STATUS_OVER)
+ BITSET(MCI_STATUS_OVER|MCI_STATUS_UC)
),
MCESEV(
UC, "Uncorrected",
@@ -157,15 +156,15 @@ static int error_context(struct mce *m)
return IN_KERNEL;
}

-int mce_severity(struct mce *a, int tolerant, char **msg)
+int mce_severity(struct mce *m, int tolerant, char **msg)
{
- enum context ctx = error_context(a);
+ enum context ctx = error_context(m);
struct severity *s;

for (s = severities;; s++) {
- if ((a->status & s->mask) != s->result)
+ if ((m->status & s->mask) != s->result)
continue;
- if ((a->mcgstatus & s->mcgmask) != s->mcgres)
+ if ((m->mcgstatus & s->mcgmask) != s->mcgres)
continue;
if (s->ser == SER_REQUIRED && !mce_ser)
continue;
@@ -242,15 +241,15 @@ static const struct file_operations severities_coverage_fops = {

static int __init severities_debugfs_init(void)
{
- struct dentry *dmce = NULL, *fseverities_coverage = NULL;
+ struct dentry *dmce, *fsev;

dmce = mce_get_debugfs_dir();
- if (dmce == NULL)
+ if (!dmce)
goto err_out;
- fseverities_coverage = debugfs_create_file("severities-coverage",
- 0444, dmce, NULL,
- &severities_coverage_fops);
- if (fseverities_coverage == NULL)
+
+ fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL,
+ &severities_coverage_fops);
+ if (!fsev)
goto err_out;

return 0;
@@ -259,4 +258,4 @@ err_out:
return -ENOMEM;
}
late_initcall(severities_debugfs_init);
-#endif
+#endif /* CONFIG_DEBUG_FS */

2011-06-16 11:17:59

by Hidetoshi Seto

[permalink] [raw]
Subject: [tip:ras/core] x86, mce: Replace MCE_SELF_VECTOR by irq_work

Commit-ID: b77e70bf3535e0bd5472e0681f41cce4ae0598bb
Gitweb: http://git.kernel.org/tip/b77e70bf3535e0bd5472e0681f41cce4ae0598bb
Author: Hidetoshi Seto <[email protected]>
AuthorDate: Wed, 8 Jun 2011 10:56:02 +0900
Committer: Borislav Petkov <[email protected]>
CommitDate: Thu, 16 Jun 2011 12:10:08 +0200

x86, mce: Replace MCE_SELF_VECTOR by irq_work

The MCE handler uses a special vector for self IPI to invoke
post-emergency processing in an interrupt context, e.g. call an
NMI-unsafe function, wakeup loggers, schedule time-consuming work for
recovery, etc.

This mechanism is now generalized by the following commit:

> e360adbe29241a0194e10e20595360dd7b98a2b3
> Author: Peter Zijlstra <[email protected]>
> Date: Thu Oct 14 14:01:34 2010 +0800
>
> irq_work: Add generic hardirq context callbacks
>
> Provide a mechanism that allows running code in IRQ context. It is
> most useful for NMI code that needs to interact with the rest of the
> system -- like wakeup a task to drain buffers.
:

So change to use provided generic mechanism.

Signed-off-by: Hidetoshi Seto <[email protected]>
Acked-by: Tony Luck <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/include/asm/entry_arch.h | 4 ---
arch/x86/include/asm/hw_irq.h | 1 -
arch/x86/include/asm/irq_vectors.h | 5 ----
arch/x86/kernel/cpu/mcheck/mce.c | 47 ++++-------------------------------
arch/x86/kernel/entry_64.S | 5 ----
arch/x86/kernel/irqinit.c | 3 --
6 files changed, 6 insertions(+), 59 deletions(-)

diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 1cd6d26..0baa628 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -53,8 +53,4 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
#endif

-#ifdef CONFIG_X86_MCE
-BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR)
-#endif
-
#endif
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index bb9efe8..13f5504 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -34,7 +34,6 @@ extern void irq_work_interrupt(void);
extern void spurious_interrupt(void);
extern void thermal_interrupt(void);
extern void reschedule_interrupt(void);
-extern void mce_self_interrupt(void);

extern void invalidate_interrupt(void);
extern void invalidate_interrupt0(void);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 6e976ee..6665026 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -109,11 +109,6 @@

#define UV_BAU_MESSAGE 0xf5

-/*
- * Self IPI vector for machine checks
- */
-#define MCE_SELF_VECTOR 0xf4
-
/* Xen vector callback to receive events in a HVM domain */
#define XEN_HVM_EVTCHN_CALLBACK 0xf3

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ff1ae9b..e81d48b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -10,7 +10,6 @@
#include <linux/thread_info.h>
#include <linux/capability.h>
#include <linux/miscdevice.h>
-#include <linux/interrupt.h>
#include <linux/ratelimit.h>
#include <linux/kallsyms.h>
#include <linux/rcupdate.h>
@@ -38,12 +37,9 @@
#include <linux/mm.h>
#include <linux/debugfs.h>
#include <linux/edac_mce.h>
+#include <linux/irq_work.h>

#include <asm/processor.h>
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/idle.h>
-#include <asm/ipi.h>
#include <asm/mce.h>
#include <asm/msr.h>

@@ -461,22 +457,13 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
m->ip = mce_rdmsrl(rip_msr);
}

-#ifdef CONFIG_X86_LOCAL_APIC
-/*
- * Called after interrupts have been reenabled again
- * when a MCE happened during an interrupts off region
- * in the kernel.
- */
-asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs)
+DEFINE_PER_CPU(struct irq_work, mce_irq_work);
+
+static void mce_irq_work_cb(struct irq_work *entry)
{
- ack_APIC_irq();
- exit_idle();
- irq_enter();
mce_notify_irq();
mce_schedule_work();
- irq_exit();
}
-#endif

static void mce_report_event(struct pt_regs *regs)
{
@@ -492,29 +479,7 @@ static void mce_report_event(struct pt_regs *regs)
return;
}

-#ifdef CONFIG_X86_LOCAL_APIC
- /*
- * Without APIC do not notify. The event will be picked
- * up eventually.
- */
- if (!cpu_has_apic)
- return;
-
- /*
- * When interrupts are disabled we cannot use
- * kernel services safely. Trigger an self interrupt
- * through the APIC to instead do the notification
- * after interrupts are reenabled again.
- */
- apic->send_IPI_self(MCE_SELF_VECTOR);
-
- /*
- * Wait for idle afterwards again so that we don't leave the
- * APIC in a non idle state because the normal APIC writes
- * cannot exclude us.
- */
- apic_wait_icr_idle();
-#endif
+ irq_work_queue(&__get_cpu_var(mce_irq_work));
}

DEFINE_PER_CPU(unsigned, mce_poll_count);
@@ -1444,7 +1409,7 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_timer();
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
-
+ init_irq_work(&__get_cpu_var(mce_irq_work), &mce_irq_work_cb);
}

/*
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 8a445a0..9fa6546 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -991,11 +991,6 @@ apicinterrupt THRESHOLD_APIC_VECTOR \
apicinterrupt THERMAL_APIC_VECTOR \
thermal_interrupt smp_thermal_interrupt

-#ifdef CONFIG_X86_MCE
-apicinterrupt MCE_SELF_VECTOR \
- mce_self_interrupt smp_mce_self_interrupt
-#endif
-
#ifdef CONFIG_SMP
apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
call_function_single_interrupt smp_call_function_single_interrupt
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index f470e4e..f09d4bb 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -272,9 +272,6 @@ static void __init apic_intr_init(void)
#ifdef CONFIG_X86_MCE_THRESHOLD
alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
#endif
-#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_LOCAL_APIC)
- alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt);
-#endif

#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
/* self generated IPI for local APIC timer */

2011-06-16 11:18:27

by Hidetoshi Seto

[permalink] [raw]
Subject: [tip:ras/core] x86, mce: Replace MCM_ with MCI_MISC_

Commit-ID: 2b90e77eaee8809073db5cf43ac9795cc2054dc0
Gitweb: http://git.kernel.org/tip/2b90e77eaee8809073db5cf43ac9795cc2054dc0
Author: Hidetoshi Seto <[email protected]>
AuthorDate: Wed, 8 Jun 2011 10:56:56 +0900
Committer: Borislav Petkov <[email protected]>
CommitDate: Thu, 16 Jun 2011 12:10:10 +0200

x86, mce: Replace MCM_ with MCI_MISC_

Follow other MCi register defines. Plus define MCI_MISC_ADDR_LSB() and
MCI_MISC_ADDR_MODE().

Signed-off-by: Hidetoshi Seto <[email protected]>
Acked-by: Tony Luck <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/include/asm/mce.h | 17 +++++++++++------
arch/x86/kernel/cpu/mcheck/mce.c | 4 ++--
2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 021979a..bbb328f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -8,6 +8,7 @@
* Machine Check support for x86
*/

+/* MCG_CAP register defines */
#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
@@ -17,10 +18,12 @@
#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */

+/* MCG_STATUS register defines */
#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */

+/* MCi_STATUS register defines */
#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
@@ -31,12 +34,14 @@
#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
#define MCI_STATUS_AR (1ULL<<55) /* Action required */

-/* MISC register defines */
-#define MCM_ADDR_SEGOFF 0 /* segment offset */
-#define MCM_ADDR_LINEAR 1 /* linear address */
-#define MCM_ADDR_PHYS 2 /* physical address */
-#define MCM_ADDR_MEM 3 /* memory address */
-#define MCM_ADDR_GENERIC 7 /* generic */
+/* MCi_MISC register defines */
+#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
+#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7)
+#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */
+#define MCI_MISC_ADDR_LINEAR 1 /* linear address */
+#define MCI_MISC_ADDR_PHYS 2 /* physical address */
+#define MCI_MISC_ADDR_MEM 3 /* memory address */
+#define MCI_MISC_ADDR_GENERIC 7 /* generic */

/* CTL2 register defines */
#define MCI_CTL2_CMCI_EN (1ULL << 30)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e81d48b..e807508 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -844,9 +844,9 @@ static int mce_usable_address(struct mce *m)
{
if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
return 0;
- if ((m->misc & 0x3f) > PAGE_SHIFT)
+ if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
return 0;
- if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS)
+ if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
return 0;
return 1;
}

2011-06-16 11:18:55

by Hidetoshi Seto

[permalink] [raw]
Subject: [tip:ras/core] x86, mce: Introduce mce_gather_info()

Commit-ID: b8325c5b110d7ff460b79588e7e9afdcc73d5c3c
Gitweb: http://git.kernel.org/tip/b8325c5b110d7ff460b79588e7e9afdcc73d5c3c
Author: Hidetoshi Seto <[email protected]>
AuthorDate: Wed, 8 Jun 2011 10:57:46 +0900
Committer: Borislav Petkov <[email protected]>
CommitDate: Thu, 16 Jun 2011 12:10:10 +0200

x86, mce: Introduce mce_gather_info()

This patch introduces mce_gather_info() which is to be called at the
beginning of error handling and gathers minimum error information from
proper error registers (and saved registers).

As the result of mce_get_rip() is integrated, unnecessary zeroing
is removed. This also takes care of saving RIP which is required to
make some decision about error severity for SRAR errors, instead of
retrieving it later in the handler.

Signed-off-by: Hidetoshi Seto <[email protected]>
Acked-by: Tony Luck <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce.c | 50 ++++++++++++++++++++-----------------
1 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e807508..a182875 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -369,6 +369,31 @@ static void mce_wrmsrl(u32 msr, u64 v)
}

/*
+ * Collect all global (w.r.t. this processor) status about this machine
+ * check into our "mce" struct so that we can use it later to assess
+ * the severity of the problem as we read per-bank specific details.
+ */
+static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
+{
+ mce_setup(m);
+
+ m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
+ if (regs) {
+ /*
+ * Get the address of the instruction at the time of
+ * the machine check error.
+ */
+ if (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) {
+ m->ip = regs->ip;
+ m->cs = regs->cs;
+ }
+ /* Use accurate RIP reporting if available. */
+ if (rip_msr)
+ m->ip = mce_rdmsrl(rip_msr);
+ }
+}
+
+/*
* Simple lockless ring to communicate PFNs from the exception handler with the
* process context work function. This is vastly simplified because there's
* only a single reader and a single writer.
@@ -439,24 +464,6 @@ static void mce_schedule_work(void)
}
}

-/*
- * Get the address of the instruction at the time of the machine check
- * error.
- */
-static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
-{
-
- if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) {
- m->ip = regs->ip;
- m->cs = regs->cs;
- } else {
- m->ip = 0;
- m->cs = 0;
- }
- if (rip_msr)
- m->ip = mce_rdmsrl(rip_msr);
-}
-
DEFINE_PER_CPU(struct irq_work, mce_irq_work);

static void mce_irq_work_cb(struct irq_work *entry)
@@ -506,9 +513,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)

percpu_inc(mce_poll_count);

- mce_setup(&m);
+ mce_gather_info(&m, NULL);

- m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
for (i = 0; i < banks; i++) {
if (!mce_banks[i].ctl || !test_bit(i, *b))
continue;
@@ -907,9 +913,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
if (!banks)
goto out;

- mce_setup(&m);
+ mce_gather_info(&m, regs);

- m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
final = &__get_cpu_var(mces_seen);
*final = m;

@@ -993,7 +998,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
mce_ring_add(m.addr >> PAGE_SHIFT);

- mce_get_rip(&m, regs);
mce_log(&m);

if (severity > worst) {

2011-06-16 11:19:19

by Hidetoshi Seto

[permalink] [raw]
Subject: [tip:ras/core] x86, mce: Check the result of ancient_init()

Commit-ID: 3a97fc34130326da87b20de5d0259c35406707ce
Gitweb: http://git.kernel.org/tip/3a97fc34130326da87b20de5d0259c35406707ce
Author: Hidetoshi Seto <[email protected]>
AuthorDate: Wed, 8 Jun 2011 10:58:35 +0900
Committer: Borislav Petkov <[email protected]>
CommitDate: Thu, 16 Jun 2011 12:10:12 +0200

x86, mce: Check the result of ancient_init()

Because "ancient CPUs" like p5 and winchip don't have X86_FEATURE_MCA
(I suppose so), mcheck_cpu_init() on such CPUs will return at check of
mce_available() after __mcheck_cpu_ancient_init().

It is hard to know this implicit behavior without knowing the CPUs
well. So make it clear that we leave mcheck_cpu_init() when the CPU is
initialized in __mcheck_cpu_ancient_init().

Signed-off-by: Hidetoshi Seto <[email protected]>
Acked-by: Tony Luck <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce.c | 12 +++++++++---
1 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a182875..2cc98d5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1332,18 +1332,23 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
return 0;
}

-static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
+static int __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
{
if (c->x86 != 5)
- return;
+ return 0;
+
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
intel_p5_mcheck_init(c);
+ return 1;
break;
case X86_VENDOR_CENTAUR:
winchip_mcheck_init(c);
+ return 1;
break;
}
+
+ return 0;
}

static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
@@ -1397,7 +1402,8 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
if (mce_disabled)
return;

- __mcheck_cpu_ancient_init(c);
+ if (__mcheck_cpu_ancient_init(c))
+ return;

if (!mce_available(c))
return;

2011-06-16 11:19:46

by Hidetoshi Seto

[permalink] [raw]
Subject: [tip:ras/core] x86, mce: Cleanup mce_create()/remove_device()

Commit-ID: f6783c4234e65bd6f85596d97745ccdbf041cb63
Gitweb: http://git.kernel.org/tip/f6783c4234e65bd6f85596d97745ccdbf041cb63
Author: Hidetoshi Seto <[email protected]>
AuthorDate: Wed, 8 Jun 2011 10:59:19 +0900
Committer: Borislav Petkov <[email protected]>
CommitDate: Thu, 16 Jun 2011 12:10:12 +0200

x86, mce: Cleanup mce_create()/remove_device()

Use temporary local variable sysdev to simplify the code. No change in
logic.

Signed-off-by: Hidetoshi Seto <[email protected]>
Acked-by: Tony Luck <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce.c | 27 ++++++++++++++-------------
1 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2cc98d5..f3f648c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1925,28 +1925,28 @@ static cpumask_var_t mce_dev_initialized;
/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
static __cpuinit int mce_create_device(unsigned int cpu)
{
+ struct sys_device *sysdev = &per_cpu(mce_dev, cpu);
int err;
int i, j;

if (!mce_available(&boot_cpu_data))
return -EIO;

- memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject));
- per_cpu(mce_dev, cpu).id = cpu;
- per_cpu(mce_dev, cpu).cls = &mce_sysclass;
+ memset(&sysdev->kobj, 0, sizeof(struct kobject));
+ sysdev->id = cpu;
+ sysdev->cls = &mce_sysclass;

- err = sysdev_register(&per_cpu(mce_dev, cpu));
+ err = sysdev_register(sysdev);
if (err)
return err;

for (i = 0; mce_attrs[i]; i++) {
- err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
+ err = sysdev_create_file(sysdev, mce_attrs[i]);
if (err)
goto error;
}
for (j = 0; j < banks; j++) {
- err = sysdev_create_file(&per_cpu(mce_dev, cpu),
- &mce_banks[j].attr);
+ err = sysdev_create_file(sysdev, &mce_banks[j].attr);
if (err)
goto error2;
}
@@ -1955,30 +1955,31 @@ static __cpuinit int mce_create_device(unsigned int cpu)
return 0;
error2:
while (--j >= 0)
- sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr);
+ sysdev_remove_file(sysdev, &mce_banks[j].attr);
error:
while (--i >= 0)
- sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
+ sysdev_remove_file(sysdev, mce_attrs[i]);

- sysdev_unregister(&per_cpu(mce_dev, cpu));
+ sysdev_unregister(sysdev);

return err;
}

static __cpuinit void mce_remove_device(unsigned int cpu)
{
+ struct sys_device *sysdev = &per_cpu(mce_dev, cpu);
int i;

if (!cpumask_test_cpu(cpu, mce_dev_initialized))
return;

for (i = 0; mce_attrs[i]; i++)
- sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
+ sysdev_remove_file(sysdev, mce_attrs[i]);

for (i = 0; i < banks; i++)
- sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
+ sysdev_remove_file(sysdev, &mce_banks[i].attr);

- sysdev_unregister(&per_cpu(mce_dev, cpu));
+ sysdev_unregister(sysdev);
cpumask_clear_cpu(cpu, mce_dev_initialized);
}

2011-06-16 11:20:12

by Hidetoshi Seto

[permalink] [raw]
Subject: [tip:ras/core] x86, mce: Cleanup mce_read()

Commit-ID: 559faa6be143b8aa7a07b12f618d29fbc1c8eb0d
Gitweb: http://git.kernel.org/tip/559faa6be143b8aa7a07b12f618d29fbc1c8eb0d
Author: Hidetoshi Seto <[email protected]>
AuthorDate: Wed, 8 Jun 2011 11:00:08 +0900
Committer: Borislav Petkov <[email protected]>
CommitDate: Thu, 16 Jun 2011 12:10:13 +0200

x86, mce: Cleanup mce_read()

Use a temporary local variable m to simplify the code. No change in
logic.

Signed-off-by: Hidetoshi Seto <[email protected]>
Acked-by: Tony Luck <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce.c | 23 +++++++++++------------
1 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index f3f648c..54360e8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1537,19 +1537,18 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
do {
for (i = prev; i < next; i++) {
unsigned long start = jiffies;
+ struct mce *m = &mcelog.entry[i];

- while (!mcelog.entry[i].finished) {
+ while (!m->finished) {
if (time_after_eq(jiffies, start + 2)) {
- memset(mcelog.entry + i, 0,
- sizeof(struct mce));
+ memset(m, 0, sizeof(*m));
goto timeout;
}
cpu_relax();
}
smp_rmb();
- err |= copy_to_user(buf, mcelog.entry + i,
- sizeof(struct mce));
- buf += sizeof(struct mce);
+ err |= copy_to_user(buf, m, sizeof(*m));
+ buf += sizeof(*m);
timeout:
;
}
@@ -1569,13 +1568,13 @@ timeout:
on_each_cpu(collect_tscs, cpu_tsc, 1);

for (i = next; i < MCE_LOG_LEN; i++) {
- if (mcelog.entry[i].finished &&
- mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
- err |= copy_to_user(buf, mcelog.entry+i,
- sizeof(struct mce));
+ struct mce *m = &mcelog.entry[i];
+
+ if (m->finished && m->tsc < cpu_tsc[m->cpu]) {
+ err |= copy_to_user(buf, m, sizeof(*m));
smp_rmb();
- buf += sizeof(struct mce);
- memset(&mcelog.entry[i], 0, sizeof(struct mce));
+ buf += sizeof(*m);
+ memset(m, 0, sizeof(*m));
}
}

2011-06-16 11:20:36

by Hidetoshi Seto

[permalink] [raw]
Subject: [tip:ras/core] x86, mce: Use mce_chrdev_ prefix to group functions

Commit-ID: 93b62c3cf59d44850cbe9f04d58da08930e3fb0b
Gitweb: http://git.kernel.org/tip/93b62c3cf59d44850cbe9f04d58da08930e3fb0b
Author: Hidetoshi Seto <[email protected]>
AuthorDate: Wed, 8 Jun 2011 11:00:45 +0900
Committer: Borislav Petkov <[email protected]>
CommitDate: Thu, 16 Jun 2011 12:10:15 +0200

x86, mce: Use mce_chrdev_ prefix to group functions

There are many functions named mce_* so use a new prefix for the subset
of functions dealing with the character device /dev/mcelog.

This change doesn't impact the mce-inject module because the exported
symbol mce_chrdev_ops already has the prefix, therefore it is left
unchanged.

Before: After:
mce_wait mce_chrdev_wait
mce_state_lock mce_chrdev_state_lock
open_count mce_chrdev_open_count
open_exclu mce_chrdev_open_exclu
mce_open mce_chrdev_open
mce_release mce_chrdev_release
mce_read_mutex mce_chrdev_read_mutex
mce_read mce_chrdev_read
mce_poll mce_chrdev_poll
mce_ioctl mce_chrdev_ioctl
mce_log_device mce_chrdev_device

Signed-off-by: Hidetoshi Seto <[email protected]>
Acked-by: Tony Luck <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce.c | 77 ++++++++++++++++++++------------------
1 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 54360e8..9a73516 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -45,12 +45,12 @@

#include "mce-internal.h"

-static DEFINE_MUTEX(mce_read_mutex);
+static DEFINE_MUTEX(mce_chrdev_read_mutex);

#define rcu_dereference_check_mce(p) \
rcu_dereference_index_check((p), \
rcu_read_lock_sched_held() || \
- lockdep_is_held(&mce_read_mutex))
+ lockdep_is_held(&mce_chrdev_read_mutex))

#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
@@ -90,7 +90,8 @@ static unsigned long mce_need_notify;
static char mce_helper[128];
static char *mce_helper_argv[2] = { mce_helper, NULL };

-static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
+static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
+
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;

@@ -1159,7 +1160,8 @@ int mce_notify_irq(void)
clear_thread_flag(TIF_MCE_NOTIFY);

if (test_and_clear_bit(0, &mce_need_notify)) {
- wake_up_interruptible(&mce_wait);
+ /* wake processes polling /dev/mcelog */
+ wake_up_interruptible(&mce_chrdev_wait);

/*
* There is no risk of missing notifications because
@@ -1423,40 +1425,41 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
}

/*
- * Character device to read and clear the MCE log.
+ * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log.
*/

-static DEFINE_SPINLOCK(mce_state_lock);
-static int open_count; /* #times opened */
-static int open_exclu; /* already open exclusive? */
+static DEFINE_SPINLOCK(mce_chrdev_state_lock);
+static int mce_chrdev_open_count; /* #times opened */
+static int mce_chrdev_open_exclu; /* already open exclusive? */

-static int mce_open(struct inode *inode, struct file *file)
+static int mce_chrdev_open(struct inode *inode, struct file *file)
{
- spin_lock(&mce_state_lock);
+ spin_lock(&mce_chrdev_state_lock);

- if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
- spin_unlock(&mce_state_lock);
+ if (mce_chrdev_open_exclu ||
+ (mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
+ spin_unlock(&mce_chrdev_state_lock);

return -EBUSY;
}

if (file->f_flags & O_EXCL)
- open_exclu = 1;
- open_count++;
+ mce_chrdev_open_exclu = 1;
+ mce_chrdev_open_count++;

- spin_unlock(&mce_state_lock);
+ spin_unlock(&mce_chrdev_state_lock);

return nonseekable_open(inode, file);
}

-static int mce_release(struct inode *inode, struct file *file)
+static int mce_chrdev_release(struct inode *inode, struct file *file)
{
- spin_lock(&mce_state_lock);
+ spin_lock(&mce_chrdev_state_lock);

- open_count--;
- open_exclu = 0;
+ mce_chrdev_open_count--;
+ mce_chrdev_open_exclu = 0;

- spin_unlock(&mce_state_lock);
+ spin_unlock(&mce_chrdev_state_lock);

return 0;
}
@@ -1505,8 +1508,8 @@ static int __mce_read_apei(char __user **ubuf, size_t usize)
return 0;
}

-static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
- loff_t *off)
+static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
+ size_t usize, loff_t *off)
{
char __user *buf = ubuf;
unsigned long *cpu_tsc;
@@ -1517,7 +1520,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
if (!cpu_tsc)
return -ENOMEM;

- mutex_lock(&mce_read_mutex);
+ mutex_lock(&mce_chrdev_read_mutex);

if (!mce_apei_read_done) {
err = __mce_read_apei(&buf, usize);
@@ -1582,15 +1585,15 @@ timeout:
err = -EFAULT;

out:
- mutex_unlock(&mce_read_mutex);
+ mutex_unlock(&mce_chrdev_read_mutex);
kfree(cpu_tsc);

return err ? err : buf - ubuf;
}

-static unsigned int mce_poll(struct file *file, poll_table *wait)
+static unsigned int mce_chrdev_poll(struct file *file, poll_table *wait)
{
- poll_wait(file, &mce_wait, wait);
+ poll_wait(file, &mce_chrdev_wait, wait);
if (rcu_access_index(mcelog.next))
return POLLIN | POLLRDNORM;
if (!mce_apei_read_done && apei_check_mce())
@@ -1598,7 +1601,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait)
return 0;
}

-static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
+ unsigned long arg)
{
int __user *p = (int __user *)arg;

@@ -1626,16 +1630,16 @@ static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg)

/* Modified in mce-inject.c, so not static or const */
struct file_operations mce_chrdev_ops = {
- .open = mce_open,
- .release = mce_release,
- .read = mce_read,
- .poll = mce_poll,
- .unlocked_ioctl = mce_ioctl,
- .llseek = no_llseek,
+ .open = mce_chrdev_open,
+ .release = mce_chrdev_release,
+ .read = mce_chrdev_read,
+ .poll = mce_chrdev_poll,
+ .unlocked_ioctl = mce_chrdev_ioctl,
+ .llseek = no_llseek,
};
EXPORT_SYMBOL_GPL(mce_chrdev_ops);

-static struct miscdevice mce_log_device = {
+static struct miscdevice mce_chrdev_device = {
MISC_MCELOG_MINOR,
"mcelog",
&mce_chrdev_ops,
@@ -2107,11 +2111,12 @@ static __init int mcheck_init_device(void)

register_syscore_ops(&mce_syscore_ops);
register_hotcpu_notifier(&mce_cpu_notifier);
- misc_register(&mce_log_device);
+
+ /* register character device /dev/mcelog */
+ misc_register(&mce_chrdev_device);

return err;
}
-
device_initcall(mcheck_init_device);

/*

2011-06-16 11:20:59

by Hidetoshi Seto

[permalink] [raw]
Subject: [tip:ras/core] x86, mce: Use mce_sysdev_ prefix to group functions

Commit-ID: c7cece89f1b00b56276303942f96ec67cf206e1e
Gitweb: http://git.kernel.org/tip/c7cece89f1b00b56276303942f96ec67cf206e1e
Author: Hidetoshi Seto <[email protected]>
AuthorDate: Wed, 8 Jun 2011 11:02:03 +0900
Committer: Borislav Petkov <[email protected]>
CommitDate: Thu, 16 Jun 2011 12:10:16 +0200

x86, mce: Use mce_sysdev_ prefix to group functions

There are many functions named mce_* so use a new prefix for the subset
of functions related to sysfs support.

And since f3c6ea1b06c71b43f751b36bd99345369fe911af introduces
syscore_ops, use the prefix mce_syscore for some functions related to
power management which were in sysdev_class before.

Before: After:
mce_device mce_sysdev
mce_sysclass mce_sysdev_class
mce_attrs mce_sysdev_attrs
mce_dev_initialized mce_sysdev_initialized
mce_create_device mce_sysdev_create
mce_remove_device mce_sysdev_remove

mce_suspend mce_syscore_suspend
mce_shutdown mce_syscore_shutdown
mce_resume mce_syscore_resume

Signed-off-by: Hidetoshi Seto <[email protected]>
Acked-by: Tony Luck <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/include/asm/mce.h | 2 +-
arch/x86/kernel/cpu/mcheck/mce.c | 62 ++++++++++++++++++----------------
arch/x86/kernel/cpu/mcheck/mce_amd.c | 10 +++---
3 files changed, 39 insertions(+), 35 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index bbb328f..716b48a 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -149,7 +149,7 @@ static inline void enable_p5_mce(void) {}

void mce_setup(struct mce *m);
void mce_log(struct mce *m);
-DECLARE_PER_CPU(struct sys_device, mce_dev);
+DECLARE_PER_CPU(struct sys_device, mce_sysdev);

/*
* Maximum banks number.
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9a73516..08363b0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1697,7 +1697,7 @@ int __init mcheck_init(void)
}

/*
- * Sysfs support
+ * mce_syscore: PM support
*/

/*
@@ -1717,12 +1717,12 @@ static int mce_disable_error_reporting(void)
return 0;
}

-static int mce_suspend(void)
+static int mce_syscore_suspend(void)
{
return mce_disable_error_reporting();
}

-static void mce_shutdown(void)
+static void mce_syscore_shutdown(void)
{
mce_disable_error_reporting();
}
@@ -1732,18 +1732,22 @@ static void mce_shutdown(void)
* Only one CPU is active at this time, the others get re-added later using
* CPU hotplug:
*/
-static void mce_resume(void)
+static void mce_syscore_resume(void)
{
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info));
}

static struct syscore_ops mce_syscore_ops = {
- .suspend = mce_suspend,
- .shutdown = mce_shutdown,
- .resume = mce_resume,
+ .suspend = mce_syscore_suspend,
+ .shutdown = mce_syscore_shutdown,
+ .resume = mce_syscore_resume,
};

+/*
+ * mce_sysdev: Sysfs support
+ */
+
static void mce_cpu_restart(void *data)
{
del_timer_sync(&__get_cpu_var(mce_timer));
@@ -1779,11 +1783,11 @@ static void mce_enable_ce(void *all)
__mcheck_cpu_init_timer();
}

-static struct sysdev_class mce_sysclass = {
+static struct sysdev_class mce_sysdev_class = {
.name = "machinecheck",
};

-DEFINE_PER_CPU(struct sys_device, mce_dev);
+DEFINE_PER_CPU(struct sys_device, mce_sysdev);

__cpuinitdata
void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
@@ -1912,7 +1916,7 @@ static struct sysdev_ext_attribute attr_cmci_disabled = {
&mce_cmci_disabled
};

-static struct sysdev_attribute *mce_attrs[] = {
+static struct sysdev_attribute *mce_sysdev_attrs[] = {
&attr_tolerant.attr,
&attr_check_interval.attr,
&attr_trigger,
@@ -1923,12 +1927,12 @@ static struct sysdev_attribute *mce_attrs[] = {
NULL
};

-static cpumask_var_t mce_dev_initialized;
+static cpumask_var_t mce_sysdev_initialized;

/* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */
-static __cpuinit int mce_create_device(unsigned int cpu)
+static __cpuinit int mce_sysdev_create(unsigned int cpu)
{
- struct sys_device *sysdev = &per_cpu(mce_dev, cpu);
+ struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu);
int err;
int i, j;

@@ -1937,14 +1941,14 @@ static __cpuinit int mce_create_device(unsigned int cpu)

memset(&sysdev->kobj, 0, sizeof(struct kobject));
sysdev->id = cpu;
- sysdev->cls = &mce_sysclass;
+ sysdev->cls = &mce_sysdev_class;

err = sysdev_register(sysdev);
if (err)
return err;

- for (i = 0; mce_attrs[i]; i++) {
- err = sysdev_create_file(sysdev, mce_attrs[i]);
+ for (i = 0; mce_sysdev_attrs[i]; i++) {
+ err = sysdev_create_file(sysdev, mce_sysdev_attrs[i]);
if (err)
goto error;
}
@@ -1953,7 +1957,7 @@ static __cpuinit int mce_create_device(unsigned int cpu)
if (err)
goto error2;
}
- cpumask_set_cpu(cpu, mce_dev_initialized);
+ cpumask_set_cpu(cpu, mce_sysdev_initialized);

return 0;
error2:
@@ -1961,29 +1965,29 @@ error2:
sysdev_remove_file(sysdev, &mce_banks[j].attr);
error:
while (--i >= 0)
- sysdev_remove_file(sysdev, mce_attrs[i]);
+ sysdev_remove_file(sysdev, mce_sysdev_attrs[i]);

sysdev_unregister(sysdev);

return err;
}

-static __cpuinit void mce_remove_device(unsigned int cpu)
+static __cpuinit void mce_sysdev_remove(unsigned int cpu)
{
- struct sys_device *sysdev = &per_cpu(mce_dev, cpu);
+ struct sys_device *sysdev = &per_cpu(mce_sysdev, cpu);
int i;

- if (!cpumask_test_cpu(cpu, mce_dev_initialized))
+ if (!cpumask_test_cpu(cpu, mce_sysdev_initialized))
return;

- for (i = 0; mce_attrs[i]; i++)
- sysdev_remove_file(sysdev, mce_attrs[i]);
+ for (i = 0; mce_sysdev_attrs[i]; i++)
+ sysdev_remove_file(sysdev, mce_sysdev_attrs[i]);

for (i = 0; i < banks; i++)
sysdev_remove_file(sysdev, &mce_banks[i].attr);

sysdev_unregister(sysdev);
- cpumask_clear_cpu(cpu, mce_dev_initialized);
+ cpumask_clear_cpu(cpu, mce_sysdev_initialized);
}

/* Make sure there are no machine checks on offlined CPUs. */
@@ -2033,7 +2037,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
- mce_create_device(cpu);
+ mce_sysdev_create(cpu);
if (threshold_cpu_callback)
threshold_cpu_callback(action, cpu);
break;
@@ -2041,7 +2045,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_DEAD_FROZEN:
if (threshold_cpu_callback)
threshold_cpu_callback(action, cpu);
- mce_remove_device(cpu);
+ mce_sysdev_remove(cpu);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
@@ -2095,16 +2099,16 @@ static __init int mcheck_init_device(void)
if (!mce_available(&boot_cpu_data))
return -EIO;

- zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
+ zalloc_cpumask_var(&mce_sysdev_initialized, GFP_KERNEL);

mce_init_banks();

- err = sysdev_class_register(&mce_sysclass);
+ err = sysdev_class_register(&mce_sysdev_class);
if (err)
return err;

for_each_online_cpu(i) {
- err = mce_create_device(i);
+ err = mce_sysdev_create(i);
if (err)
return err;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index bb0adad..f547421 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -548,7 +548,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (!b)
goto out;

- err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj,
+ err = sysfs_create_link(&per_cpu(mce_sysdev, cpu).kobj,
b->kobj, name);
if (err)
goto out;
@@ -571,7 +571,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
goto out;
}

- b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj);
+ b->kobj = kobject_create_and_add(name, &per_cpu(mce_sysdev, cpu).kobj);
if (!b->kobj)
goto out_free;

@@ -591,7 +591,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (i == cpu)
continue;

- err = sysfs_create_link(&per_cpu(mce_dev, i).kobj,
+ err = sysfs_create_link(&per_cpu(mce_sysdev, i).kobj,
b->kobj, name);
if (err)
goto out;
@@ -669,7 +669,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
#ifdef CONFIG_SMP
/* sibling symlink */
if (shared_bank[bank] && b->blocks->cpu != cpu) {
- sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name);
+ sysfs_remove_link(&per_cpu(mce_sysdev, cpu).kobj, name);
per_cpu(threshold_banks, cpu)[bank] = NULL;

return;
@@ -681,7 +681,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
if (i == cpu)
continue;

- sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name);
+ sysfs_remove_link(&per_cpu(mce_sysdev, i).kobj, name);
per_cpu(threshold_banks, i)[bank] = NULL;
}