2018-02-16 17:07:33

by Yazen Ghannam

[permalink] [raw]
Subject: [PATCH v2 1/4] x86/MCE/AMD: Redo function to get SMCA bank type

From: Yazen Ghannam <[email protected]>

Pass the bank number to smca_get_bank_type() since that's all we need.

Also, we should compare the bank number to the size of the smca_banks
array not the number of bank types. Bank types are reused for multiple
banks, so the number of types can be different from the number of banks
in a system. We could return an invalid bank type.

Use smca_get_bank_type() in get_name(), and change type of bank_type
variable to match return type of smca_get_bank_type().

Cc: <[email protected]> # 4.14.x: 11cf887728a3 x86/MCE/AMD: Define a function to get SMCA bank type
Cc: <[email protected]> # 4.14.x: c6708d50f166 x86/MCE: Report only DRAM ECC as memory errors on AMD systems
Cc: <[email protected]> # 4.14.x
Signed-off-by: Yazen Ghannam <[email protected]>
---
Link:
https://lkml.kernel.org/r/[email protected]

v1->v2:
* Check bank number against MAX_NR_BANKS

arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 0f32ad242324..7fbb19cb1859 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -110,14 +110,14 @@ const char *smca_get_long_name(enum smca_bank_types t)
}
EXPORT_SYMBOL_GPL(smca_get_long_name);

-static enum smca_bank_types smca_get_bank_type(struct mce *m)
+static enum smca_bank_types smca_get_bank_type(unsigned int bank)
{
struct smca_bank *b;

- if (m->bank >= N_SMCA_BANK_TYPES)
+ if (bank >= MAX_NR_BANKS)
return N_SMCA_BANK_TYPES;

- b = &smca_banks[m->bank];
+ b = &smca_banks[bank];
if (!b->hwid)
return N_SMCA_BANK_TYPES;

@@ -760,7 +760,7 @@ bool amd_mce_is_memory_error(struct mce *m)
u8 xec = (m->status >> 16) & 0x1f;

if (mce_flags.smca)
- return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0;
+ return smca_get_bank_type(m->bank) == SMCA_UMC && xec == 0x0;

return m->bank == 4 && xec == 0x8;
}
@@ -1063,7 +1063,7 @@ static struct kobj_type threshold_ktype = {

static const char *get_name(unsigned int bank, struct threshold_block *b)
{
- unsigned int bank_type;
+ enum smca_bank_types bank_type;

if (!mce_flags.smca) {
if (b && bank == 4)
@@ -1072,11 +1072,10 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
return th_names[bank];
}

- if (!smca_banks[bank].hwid)
+ bank_type = smca_get_bank_type(bank);
+ if (bank_type >= N_SMCA_BANK_TYPES)
return NULL;

- bank_type = smca_banks[bank].hwid->bank_type;
-
if (b && bank_type == SMCA_UMC) {
if (b->block < ARRAY_SIZE(smca_umc_block_names))
return smca_umc_block_names[b->block];
--
2.14.1



2018-02-16 15:22:07

by Yazen Ghannam

[permalink] [raw]
Subject: [PATCH v2 3/4] x86/MCE/AMD: Get address from already initialized block

From: Yazen Ghannam <[email protected]>

The block address is saved after the block is initialized when
threshold_init_device() is called.

Use the saved block address, if available, rather than trying to
rediscover it.

This will avoid a call trace, when resuming from suspend, due to the
rdmsr_safe_on_cpu() call in get_block_address(). The rdmsr_safe_on_cpu()
call issues an IPI but we're running with interrupts disabled.
This triggers:

WARNING: CPU: 0 PID: 11523 at kernel/smp.c:291
smp_call_function_single+0xdc/0xe0

Cc: <[email protected]> # 4.14.x
Signed-off-by: Yazen Ghannam <[email protected]>
---
Link:
https://lkml.kernel.org/r/[email protected]

v1->v2:
* Give more detail on call trace issue.

arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index d8ba9d0c3f01..12bc2863a4d6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -436,6 +436,21 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
{
u32 addr = 0, offset = 0;

+ if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
+ return addr;
+
+ /* Get address from already initialized block. */
+ if (per_cpu(threshold_banks, cpu)) {
+ struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];
+
+ if (bankp && bankp->blocks) {
+ struct threshold_block *blockp = &bankp->blocks[block];
+
+ if (blockp)
+ return blockp->address;
+ }
+ }
+
if (mce_flags.smca) {
if (smca_get_bank_type(bank) == SMCA_RESERVED)
return addr;
--
2.14.1


2018-02-16 17:07:52

by Yazen Ghannam

[permalink] [raw]
Subject: [PATCH v2 4/4] x86/MCE/AMD: Carve out SMCA get_block_address() code

From: Yazen Ghannam <[email protected]>

Carve out the SMCA code in get_block_address() into a separate helper
function.

No functional change.

Signed-off-by: Yazen Ghannam <[email protected]>
---
v1->v2:
* New in this series.

arch/x86/kernel/cpu/mcheck/mce_amd.c | 59 ++++++++++++++++++++----------------
1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 12bc2863a4d6..7c451850d8aa 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -431,6 +431,37 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}

+static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
+ unsigned int block)
+{
+ u32 addr = 0;
+
+ if (smca_get_bank_type(bank) == SMCA_RESERVED)
+ return addr;
+
+ if (!block) {
+ addr = MSR_AMD64_SMCA_MCx_MISC(bank);
+ } else {
+ /*
+ * For SMCA enabled processors, BLKPTR field of the
+ * first MISC register (MCx_MISC0) indicates presence of
+ * additional MISC register set (MISC1-4).
+ */
+ u32 low, high;
+
+ if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+ return addr;
+
+ if (!(low & MCI_CONFIG_MCAX))
+ return addr;
+
+ if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+ (low & MASK_BLKPTR_LO))
+ addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+ }
+ return addr;
+}
+
static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block)
{
@@ -451,32 +482,8 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
}
}

- if (mce_flags.smca) {
- if (smca_get_bank_type(bank) == SMCA_RESERVED)
- return addr;
-
- if (!block) {
- addr = MSR_AMD64_SMCA_MCx_MISC(bank);
- } else {
- /*
- * For SMCA enabled processors, BLKPTR field of the
- * first MISC register (MCx_MISC0) indicates presence of
- * additional MISC register set (MISC1-4).
- */
- u32 low, high;
-
- if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- return addr;
-
- if (!(low & MCI_CONFIG_MCAX))
- return addr;
-
- if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
- (low & MASK_BLKPTR_LO))
- addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
- }
- return addr;
- }
+ if (mce_flags.smca)
+ return smca_get_block_address(cpu, bank, block);

/* Fall back to method we used for older processors: */
switch (block) {
--
2.14.1


2018-02-16 17:08:56

by Yazen Ghannam

[permalink] [raw]
Subject: [PATCH v2 2/4] x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type

From: Yazen Ghannam <[email protected]>

Currently, bank 4 is reserved on Fam17h, so we chose not to initialize
bank 4 in the smca_banks array. This means that when we check if a bank
is initialized, like during boot or resume, we will see that bank 4 is
not initialized and try to initialize it.

This will cause a call trace, when resuming from suspend, due to
rdmsr_*on_cpu() calls in the init path. The rdmsr_*on_cpu() calls issue
an IPI but we're running with interrupts disabled. This triggers:

WARNING: CPU: 0 PID: 11523 at kernel/smp.c:291
smp_call_function_single+0xdc/0xe0

Reserved banks will be read-as-zero, so their MCA_IPID register will be
zero. So, like the smca_banks array, the threshold_banks array will not
have an entry for a reserved bank since all its MCA_MISC* registers will
be zero.

Enumerate a "Reserved" bank type that matches on a HWID_MCATYPE of 0,0.

Use the "Reserved" type when checking if a bank is reserved. It's
possible that other bank numbers may be reserved on future systems.

Don't try to find the block address on reserved banks.

Cc: <[email protected]> # 4.14.x
Signed-off-by: Yazen Ghannam <[email protected]>
---
Link:
https://lkml.kernel.org/r/[email protected]

v1->v2:
* Give more detail on call trace issue.

arch/x86/include/asm/mce.h | 1 +
arch/x86/kernel/cpu/mcheck/mce_amd.c | 7 +++++++
drivers/edac/mce_amd.c | 11 +++++++----
3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 96ea4b5ba658..340070415c2c 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -346,6 +346,7 @@ enum smca_bank_types {
SMCA_IF, /* Instruction Fetch */
SMCA_L2_CACHE, /* L2 Cache */
SMCA_DE, /* Decoder Unit */
+ SMCA_RESERVED, /* Reserved */
SMCA_EX, /* Execution Unit */
SMCA_FP, /* Floating Point */
SMCA_L3_CACHE, /* L3 Cache */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 7fbb19cb1859..d8ba9d0c3f01 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -82,6 +82,7 @@ static struct smca_bank_name smca_names[] = {
[SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
[SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
[SMCA_DE] = { "decode_unit", "Decode Unit" },
+ [SMCA_RESERVED] = { "reserved", "Reserved" },
[SMCA_EX] = { "execution_unit", "Execution Unit" },
[SMCA_FP] = { "floating_point", "Floating Point Unit" },
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
@@ -127,6 +128,9 @@ static enum smca_bank_types smca_get_bank_type(unsigned int bank)
static struct smca_hwid smca_hwid_mcatypes[] = {
/* { bank_type, hwid_mcatype, xec_bitmap } */

+ /* Reserved type */
+ { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
+
/* ZN Core (HWID=0xB0) MCA types */
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
@@ -433,6 +437,9 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
u32 addr = 0, offset = 0;

if (mce_flags.smca) {
+ if (smca_get_bank_type(bank) == SMCA_RESERVED)
+ return addr;
+
if (!block) {
addr = MSR_AMD64_SMCA_MCx_MISC(bank);
} else {
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index a11a671c7a38..2ab4d61ee47e 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -854,21 +854,24 @@ static void decode_mc6_mce(struct mce *m)
static void decode_smca_error(struct mce *m)
{
struct smca_hwid *hwid;
- unsigned int bank_type;
+ enum smca_bank_types bank_type;
const char *ip_name;
u8 xec = XEC(m->status, xec_mask);

if (m->bank >= ARRAY_SIZE(smca_banks))
return;

- if (x86_family(m->cpuid) >= 0x17 && m->bank == 4)
- pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n");
-
hwid = smca_banks[m->bank].hwid;
if (!hwid)
return;

bank_type = hwid->bank_type;
+
+ if (bank_type == SMCA_RESERVED) {
+ pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank);
+ return;
+ }
+
ip_name = smca_get_long_name(bank_type);

pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
--
2.14.1


2018-02-17 10:31:46

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v2 4/4] x86/MCE/AMD: Carve out SMCA get_block_address() code

On Thu, Feb 15, 2018 at 03:09:43PM -0600, Yazen Ghannam wrote:
> From: Yazen Ghannam <[email protected]>
>
> Carve out the SMCA code in get_block_address() into a separate helper
> function.
>
> No functional change.
>
> Signed-off-by: Yazen Ghannam <[email protected]>
> ---
> v1->v2:
> * New in this series.
>
> arch/x86/kernel/cpu/mcheck/mce_amd.c | 59 ++++++++++++++++++++----------------
> 1 file changed, 33 insertions(+), 26 deletions(-)

All look ok to me, this last one I massaged a bit to save an indentation
level, see below.

Running them a bit on the boxes here...

---
From: Yazen Ghannam <[email protected]>
Date: Thu, 15 Feb 2018 15:09:43 -0600
Subject: [PATCH] x86/MCE/AMD: Carve out SMCA get_block_address() code

Carve out the SMCA code in get_block_address() into a separate helper
function.

No functional change.

Signed-off-by: Yazen Ghannam <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: linux-edac <[email protected]>
Cc: x86-ml <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
[ Save an indentation level. ]
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce_amd.c | 57 ++++++++++++++++++++----------------
1 file changed, 31 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 12bc2863a4d6..f7666eef4a87 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -431,6 +431,35 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}

+static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
+ unsigned int block)
+{
+ u32 low, high;
+ u32 addr = 0;
+
+ if (smca_get_bank_type(bank) == SMCA_RESERVED)
+ return addr;
+
+ if (!block)
+ return MSR_AMD64_SMCA_MCx_MISC(bank);
+
+ /*
+ * For SMCA enabled processors, BLKPTR field of the first MISC register
+ * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
+ */
+ if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+ return addr;
+
+ if (!(low & MCI_CONFIG_MCAX))
+ return addr;
+
+ if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+ (low & MASK_BLKPTR_LO))
+ return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+
+ return addr;
+}
+
static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block)
{
@@ -451,32 +480,8 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
}
}

- if (mce_flags.smca) {
- if (smca_get_bank_type(bank) == SMCA_RESERVED)
- return addr;
-
- if (!block) {
- addr = MSR_AMD64_SMCA_MCx_MISC(bank);
- } else {
- /*
- * For SMCA enabled processors, BLKPTR field of the
- * first MISC register (MCx_MISC0) indicates presence of
- * additional MISC register set (MISC1-4).
- */
- u32 low, high;
-
- if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- return addr;
-
- if (!(low & MCI_CONFIG_MCAX))
- return addr;
-
- if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
- (low & MASK_BLKPTR_LO))
- addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
- }
- return addr;
- }
+ if (mce_flags.smca)
+ return smca_get_block_address(cpu, bank, block);

/* Fall back to method we used for older processors: */
switch (block) {
--
2.13.0

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

Subject: [tip:ras/core] x86/mce/AMD: Carve out SMCA get_block_address() code

Commit-ID: 8a331f4a0863bea758561c921b94b4d28f7c4029
Gitweb: https://git.kernel.org/tip/8a331f4a0863bea758561c921b94b4d28f7c4029
Author: Yazen Ghannam <[email protected]>
AuthorDate: Wed, 21 Feb 2018 11:19:00 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Wed, 21 Feb 2018 17:00:55 +0100

x86/mce/AMD: Carve out SMCA get_block_address() code

Carve out the SMCA code in get_block_address() into a separate helper
function.

No functional change.

Signed-off-by: Yazen Ghannam <[email protected]>
[ Save an indentation level. ]
Signed-off-by: Borislav Petkov <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: linux-edac <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce_amd.c | 57 ++++++++++++++++++++----------------
1 file changed, 31 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 12bc286..f7666ee 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -431,6 +431,35 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}

+static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
+ unsigned int block)
+{
+ u32 low, high;
+ u32 addr = 0;
+
+ if (smca_get_bank_type(bank) == SMCA_RESERVED)
+ return addr;
+
+ if (!block)
+ return MSR_AMD64_SMCA_MCx_MISC(bank);
+
+ /*
+ * For SMCA enabled processors, BLKPTR field of the first MISC register
+ * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
+ */
+ if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+ return addr;
+
+ if (!(low & MCI_CONFIG_MCAX))
+ return addr;
+
+ if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+ (low & MASK_BLKPTR_LO))
+ return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+
+ return addr;
+}
+
static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block)
{
@@ -451,32 +480,8 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
}
}

- if (mce_flags.smca) {
- if (smca_get_bank_type(bank) == SMCA_RESERVED)
- return addr;
-
- if (!block) {
- addr = MSR_AMD64_SMCA_MCx_MISC(bank);
- } else {
- /*
- * For SMCA enabled processors, BLKPTR field of the
- * first MISC register (MCx_MISC0) indicates presence of
- * additional MISC register set (MISC1-4).
- */
- u32 low, high;
-
- if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- return addr;
-
- if (!(low & MCI_CONFIG_MCAX))
- return addr;
-
- if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
- (low & MASK_BLKPTR_LO))
- addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
- }
- return addr;
- }
+ if (mce_flags.smca)
+ return smca_get_block_address(cpu, bank, block);

/* Fall back to method we used for older processors: */
switch (block) {