2018-02-01 18:49:25

by Yazen Ghannam

[permalink] [raw]
Subject: [PATCH 1/3] x86/MCE/AMD: Redo function to get SMCA bank type

From: Yazen Ghannam <[email protected]>

Pass the bank number to smca_get_bank_type() since that's all we need.

Also, we should compare the bank number to the size of the smca_banks
array not the number of bank types. Bank types are reused for multiple
banks, so the number of types can be different from the number of banks
in a system. We could return an invalid bank type.

Use smca_get_bank_type() in get_name(), and change type of bank_type
variable to match return type of smca_get_bank_type().

Cc: <[email protected]> # 4.14.x: 11cf887728a3 x86/MCE/AMD: Define a function to get SMCA bank type
Cc: <[email protected]> # 4.14.x: c6708d50f166 x86/MCE: Report only DRAM ECC as memory errors on AMD systems
Cc: <[email protected]> # 4.14.x
Signed-off-by: Yazen Ghannam <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 0f32ad242324..4e16afc0794d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -110,14 +110,14 @@ const char *smca_get_long_name(enum smca_bank_types t)
}
EXPORT_SYMBOL_GPL(smca_get_long_name);

-static enum smca_bank_types smca_get_bank_type(struct mce *m)
+static enum smca_bank_types smca_get_bank_type(unsigned int bank)
{
struct smca_bank *b;

- if (m->bank >= N_SMCA_BANK_TYPES)
+ if (bank >= ARRAY_SIZE(smca_banks))
return N_SMCA_BANK_TYPES;

- b = &smca_banks[m->bank];
+ b = &smca_banks[bank];
if (!b->hwid)
return N_SMCA_BANK_TYPES;

@@ -760,7 +760,7 @@ bool amd_mce_is_memory_error(struct mce *m)
u8 xec = (m->status >> 16) & 0x1f;

if (mce_flags.smca)
- return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0;
+ return smca_get_bank_type(m->bank) == SMCA_UMC && xec == 0x0;

return m->bank == 4 && xec == 0x8;
}
@@ -1063,7 +1063,7 @@ static struct kobj_type threshold_ktype = {

static const char *get_name(unsigned int bank, struct threshold_block *b)
{
- unsigned int bank_type;
+ enum smca_bank_types bank_type;

if (!mce_flags.smca) {
if (b && bank == 4)
@@ -1072,11 +1072,10 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
return th_names[bank];
}

- if (!smca_banks[bank].hwid)
+ bank_type = smca_get_bank_type(bank);
+ if (bank_type >= N_SMCA_BANK_TYPES)
return NULL;

- bank_type = smca_banks[bank].hwid->bank_type;
-
if (b && bank_type == SMCA_UMC) {
if (b->block < ARRAY_SIZE(smca_umc_block_names))
return smca_umc_block_names[b->block];
--
2.14.1



2018-02-01 18:49:51

by Yazen Ghannam

[permalink] [raw]
Subject: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized block

From: Yazen Ghannam <[email protected]>

The block address is saved after the block is initialized when
threshold_init_device() is called.

Use the saved block address, if available, rather than trying to
rediscover it.

We can avoid some *on_cpu() calls in the init path that will cause a
call trace when resuming from suspend.

Cc: <[email protected]> # 4.14.x
Signed-off-by: Yazen Ghannam <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index bf53b4549a17..8c4f8f30c779 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -436,6 +436,21 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
{
u32 addr = 0, offset = 0;

+ if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
+ return addr;
+
+ /* Get address from already initialized block. */
+ if (per_cpu(threshold_banks, cpu)) {
+ struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];
+
+ if (bankp && bankp->blocks) {
+ struct threshold_block *blockp = &bankp->blocks[block];
+
+ if (blockp)
+ return blockp->address;
+ }
+ }
+
if (mce_flags.smca) {
if (smca_get_bank_type(bank) == SMCA_RESERVED)
return addr;
--
2.14.1


2018-02-01 18:51:00

by Yazen Ghannam

[permalink] [raw]
Subject: [PATCH 2/3] x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type

From: Yazen Ghannam <[email protected]>

Currently, bank 4 is reserved on Fam17h, so we chose not to initialize
bank 4 in the smca_banks array. This means that when we check if a bank
is initialized, like during boot or resume, we will see that bank 4 is
not initialized and try to initialize it. This may cause a call trace,
when resuming from suspend, due to *on_cpu() calls in the init path.

Reserved banks will be read-as-zero, so their MCA_IPID register will be
zero. So, like the smca_banks array, the threshold_banks array will not
have an entry for a reserved bank since all its MCA_MISC* registers will
be zero.

Enumerate a "Reserved" bank type that matches on a HWID_MCATYPE of 0,0.

Use the "Reserved" type when checking if a bank is reserved. It's
possible that other bank numbers may be reserved on future systems.

Don't try to find the block address on reserved banks.

Cc: <[email protected]> # 4.14.x
Signed-off-by: Yazen Ghannam <[email protected]>
---
arch/x86/include/asm/mce.h | 1 +
arch/x86/kernel/cpu/mcheck/mce_amd.c | 7 +++++++
drivers/edac/mce_amd.c | 11 +++++++----
3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 96ea4b5ba658..340070415c2c 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -346,6 +346,7 @@ enum smca_bank_types {
SMCA_IF, /* Instruction Fetch */
SMCA_L2_CACHE, /* L2 Cache */
SMCA_DE, /* Decoder Unit */
+ SMCA_RESERVED, /* Reserved */
SMCA_EX, /* Execution Unit */
SMCA_FP, /* Floating Point */
SMCA_L3_CACHE, /* L3 Cache */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 4e16afc0794d..bf53b4549a17 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -82,6 +82,7 @@ static struct smca_bank_name smca_names[] = {
[SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
[SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
[SMCA_DE] = { "decode_unit", "Decode Unit" },
+ [SMCA_RESERVED] = { "reserved", "Reserved" },
[SMCA_EX] = { "execution_unit", "Execution Unit" },
[SMCA_FP] = { "floating_point", "Floating Point Unit" },
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
@@ -127,6 +128,9 @@ static enum smca_bank_types smca_get_bank_type(unsigned int bank)
static struct smca_hwid smca_hwid_mcatypes[] = {
/* { bank_type, hwid_mcatype, xec_bitmap } */

+ /* Reserved type */
+ { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
+
/* ZN Core (HWID=0xB0) MCA types */
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
@@ -433,6 +437,9 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
u32 addr = 0, offset = 0;

if (mce_flags.smca) {
+ if (smca_get_bank_type(bank) == SMCA_RESERVED)
+ return addr;
+
if (!block) {
addr = MSR_AMD64_SMCA_MCx_MISC(bank);
} else {
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index a11a671c7a38..2ab4d61ee47e 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -854,21 +854,24 @@ static void decode_mc6_mce(struct mce *m)
static void decode_smca_error(struct mce *m)
{
struct smca_hwid *hwid;
- unsigned int bank_type;
+ enum smca_bank_types bank_type;
const char *ip_name;
u8 xec = XEC(m->status, xec_mask);

if (m->bank >= ARRAY_SIZE(smca_banks))
return;

- if (x86_family(m->cpuid) >= 0x17 && m->bank == 4)
- pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n");
-
hwid = smca_banks[m->bank].hwid;
if (!hwid)
return;

bank_type = hwid->bank_type;
+
+ if (bank_type == SMCA_RESERVED) {
+ pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank);
+ return;
+ }
+
ip_name = smca_get_long_name(bank_type);

pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
--
2.14.1


2018-02-08 15:05:52

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86/MCE/AMD: Redo function to get SMCA bank type

On Thu, Feb 01, 2018 at 12:48:11PM -0600, Yazen Ghannam wrote:
> arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++--------
> 1 file changed, 7 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> index 0f32ad242324..4e16afc0794d 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> @@ -110,14 +110,14 @@ const char *smca_get_long_name(enum smca_bank_types t)
> }
> EXPORT_SYMBOL_GPL(smca_get_long_name);
>
> -static enum smca_bank_types smca_get_bank_type(struct mce *m)
> +static enum smca_bank_types smca_get_bank_type(unsigned int bank)
> {
> struct smca_bank *b;
>
> - if (m->bank >= N_SMCA_BANK_TYPES)
> + if (bank >= ARRAY_SIZE(smca_banks))

MAX_NR_BANKS

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2018-02-08 15:17:07

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 2/3] x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type

On Thu, Feb 01, 2018 at 12:48:12PM -0600, Yazen Ghannam wrote:
> From: Yazen Ghannam <[email protected]>
>
> Currently, bank 4 is reserved on Fam17h, so we chose not to initialize
> bank 4 in the smca_banks array. This means that when we check if a bank
> is initialized, like during boot or resume, we will see that bank 4 is
> not initialized and try to initialize it. This may cause a call trace,
> when resuming from suspend, due to *on_cpu() calls in the init path.

Please be more specific: the rdmsr_*_on_cpu() calls issue an IPI but we're
running with interrupts disabled, which triggers:

WARNING: CPU: 0 PID: 11523 at kernel/smp.c:291 smp_call_function_single+0xdc/0xe0

> Reserved banks will be read-as-zero, so their MCA_IPID register will be
> zero. So, like the smca_banks array, the threshold_banks array will not
> have an entry for a reserved bank since all its MCA_MISC* registers will
> be zero.
>
> Enumerate a "Reserved" bank type that matches on a HWID_MCATYPE of 0,0.
>
> Use the "Reserved" type when checking if a bank is reserved. It's
> possible that other bank numbers may be reserved on future systems.
>
> Don't try to find the block address on reserved banks.
>
> Cc: <[email protected]> # 4.14.x
> Signed-off-by: Yazen Ghannam <[email protected]>
> ---
> arch/x86/include/asm/mce.h | 1 +
> arch/x86/kernel/cpu/mcheck/mce_amd.c | 7 +++++++
> drivers/edac/mce_amd.c | 11 +++++++----
> 3 files changed, 15 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 96ea4b5ba658..340070415c2c 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -346,6 +346,7 @@ enum smca_bank_types {
> SMCA_IF, /* Instruction Fetch */
> SMCA_L2_CACHE, /* L2 Cache */
> SMCA_DE, /* Decoder Unit */
> + SMCA_RESERVED, /* Reserved */
> SMCA_EX, /* Execution Unit */
> SMCA_FP, /* Floating Point */
> SMCA_L3_CACHE, /* L3 Cache */
> diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> index 4e16afc0794d..bf53b4549a17 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> @@ -82,6 +82,7 @@ static struct smca_bank_name smca_names[] = {
> [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
> [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
> [SMCA_DE] = { "decode_unit", "Decode Unit" },
> + [SMCA_RESERVED] = { "reserved", "Reserved" },
> [SMCA_EX] = { "execution_unit", "Execution Unit" },
> [SMCA_FP] = { "floating_point", "Floating Point Unit" },
> [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
> @@ -127,6 +128,9 @@ static enum smca_bank_types smca_get_bank_type(unsigned int bank)
> static struct smca_hwid smca_hwid_mcatypes[] = {
> /* { bank_type, hwid_mcatype, xec_bitmap } */
>
> + /* Reserved type */
> + { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
> +
> /* ZN Core (HWID=0xB0) MCA types */
> { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
> { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
> @@ -433,6 +437,9 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
> u32 addr = 0, offset = 0;
>
> if (mce_flags.smca) {

As a last patch in the series: please carve the code in this
if-statement into a smca_get_block_address() helper. And it doesn't need
the stable tag as it is only a cleanup.

Thx.

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2018-02-08 15:28:10

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized block

On Thu, Feb 01, 2018 at 12:48:13PM -0600, Yazen Ghannam wrote:
> From: Yazen Ghannam <[email protected]>
>
> The block address is saved after the block is initialized when
> threshold_init_device() is called.
>
> Use the saved block address, if available, rather than trying to
> rediscover it.
>
> We can avoid some *on_cpu() calls in the init path that will cause a
> call trace when resuming from suspend.

Ditto.

--
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

2018-02-14 16:30:52

by Yazen Ghannam

[permalink] [raw]
Subject: RE: [PATCH 2/3] x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type

> -----Original Message-----
> From: [email protected] [mailto:linux-edac-
> [email protected]] On Behalf Of Borislav Petkov
> Sent: Thursday, February 8, 2018 10:15 AM
> To: Ghannam, Yazen <[email protected]>
> Cc: [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]
> Subject: Re: [PATCH 2/3] x86/MCE/AMD, EDAC/mce_amd: Enumerate Reserved
> SMCA bank type
>
> On Thu, Feb 01, 2018 at 12:48:12PM -0600, Yazen Ghannam wrote:
> > From: Yazen Ghannam <[email protected]>
> >
> > Currently, bank 4 is reserved on Fam17h, so we chose not to initialize
> > bank 4 in the smca_banks array. This means that when we check if a bank
> > is initialized, like during boot or resume, we will see that bank 4 is
> > not initialized and try to initialize it. This may cause a call trace,
> > when resuming from suspend, due to *on_cpu() calls in the init path.
>
> Please be more specific: the rdmsr_*_on_cpu() calls issue an IPI but we're
> running with interrupts disabled, which triggers:
>
> WARNING: CPU: 0 PID: 11523 at kernel/smp.c:291
> smp_call_function_single+0xdc/0xe0
>

Okay.

...
> > @@ -433,6 +437,9 @@ static u32 get_block_address(unsigned int cpu, u32
> current_addr, u32 low, u32 hi
> > u32 addr = 0, offset = 0;
> >
> > if (mce_flags.smca) {
>
> As a last patch in the series: please carve the code in this
> if-statement into a smca_get_block_address() helper. And it doesn't need
> the stable tag as it is only a cleanup.
>

Will do.

Thanks,
Yazen

2018-02-14 20:07:55

by Yazen Ghannam

[permalink] [raw]
Subject: RE: [PATCH 1/3] x86/MCE/AMD: Redo function to get SMCA bank type

> -----Original Message-----
> From: Borislav Petkov [mailto:[email protected]]
> Sent: Thursday, February 8, 2018 10:05 AM
> To: Ghannam, Yazen <[email protected]>
> Cc: [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]
> Subject: Re: [PATCH 1/3] x86/MCE/AMD: Redo function to get SMCA bank type
>
> On Thu, Feb 01, 2018 at 12:48:11PM -0600, Yazen Ghannam wrote:
> > arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++--------
> > 1 file changed, 7 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > index 0f32ad242324..4e16afc0794d 100644
> > --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > @@ -110,14 +110,14 @@ const char *smca_get_long_name(enum
> smca_bank_types t)
> > }
> > EXPORT_SYMBOL_GPL(smca_get_long_name);
> >
> > -static enum smca_bank_types smca_get_bank_type(struct mce *m)
> > +static enum smca_bank_types smca_get_bank_type(unsigned int bank)
> > {
> > struct smca_bank *b;
> >
> > - if (m->bank >= N_SMCA_BANK_TYPES)
> > + if (bank >= ARRAY_SIZE(smca_banks))
>
> MAX_NR_BANKS
>

I know that we're declaring smca_banks[] to have MAX_NR_BANKS items. But
shouldn't we directly check that an index is within the bounds of the array?
We'll have a bug if we check against MAX_NR_BANKS and the definition of
smca_banks[] changes.

Thanks,
Yazen

2018-02-14 20:14:37

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 1/3] x86/MCE/AMD: Redo function to get SMCA bank type

On Wed, Feb 14, 2018 at 04:38:34PM +0000, Ghannam, Yazen wrote:
> I know that we're declaring smca_banks[] to have MAX_NR_BANKS items. But
> shouldn't we directly check that an index is within the bounds of the array?
> We'll have a bug if we check against MAX_NR_BANKS and the definition of
> smca_banks[] changes.

Then we don't need MAX_NR_BANKS. We either keep them in sync or kill it.

--
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--

2018-04-14 00:53:29

by Johannes Hirte

[permalink] [raw]
Subject: Re: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized block

On 2018 Feb 01, Yazen Ghannam wrote:
> From: Yazen Ghannam <[email protected]>
>
> The block address is saved after the block is initialized when
> threshold_init_device() is called.
>
> Use the saved block address, if available, rather than trying to
> rediscover it.
>
> We can avoid some *on_cpu() calls in the init path that will cause a
> call trace when resuming from suspend.
>
> Cc: <[email protected]> # 4.14.x
> Signed-off-by: Yazen Ghannam <[email protected]>
> ---
> arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++++++++++
> 1 file changed, 15 insertions(+)
>
> diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> index bf53b4549a17..8c4f8f30c779 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> @@ -436,6 +436,21 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
> {
> u32 addr = 0, offset = 0;
>
> + if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
> + return addr;
> +
> + /* Get address from already initialized block. */
> + if (per_cpu(threshold_banks, cpu)) {
> + struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];
> +
> + if (bankp && bankp->blocks) {
> + struct threshold_block *blockp = &bankp->blocks[block];
> +
> + if (blockp)
> + return blockp->address;
> + }
> + }
> +
> if (mce_flags.smca) {
> if (smca_get_bank_type(bank) == SMCA_RESERVED)
> return addr;
> --
> 2.14.1

I have a KASAN: slab-out-of-bounds, and git bisect points me to this
change:

Apr 13 00:40:32 probook kernel: ==================================================================
Apr 13 00:40:32 probook kernel: BUG: KASAN: slab-out-of-bounds in get_block_address.isra.3+0x1e9/0x520
Apr 13 00:40:32 probook kernel: Read of size 4 at addr ffff8803f165ddf4 by task swapper/0/1
Apr 13 00:40:32 probook kernel:
Apr 13 00:40:32 probook kernel: CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.16.0-10757-g4ca8ba4ccff9 #532
Apr 13 00:40:32 probook kernel: Hardware name: HP HP ProBook 645 G2/80FE, BIOS N77 Ver. 01.12 12/19/2017
Apr 13 00:40:32 probook kernel: Call Trace:
Apr 13 00:40:32 probook kernel: dump_stack+0x5b/0x8b
Apr 13 00:40:32 probook kernel: ? get_block_address.isra.3+0x1e9/0x520
Apr 13 00:40:32 probook kernel: print_address_description+0x65/0x270
Apr 13 00:40:32 probook kernel: ? get_block_address.isra.3+0x1e9/0x520
Apr 13 00:40:32 probook kernel: kasan_report+0x232/0x350
Apr 13 00:40:32 probook kernel: get_block_address.isra.3+0x1e9/0x520
Apr 13 00:40:32 probook kernel: ? kobject_init_and_add+0xde/0x130
Apr 13 00:40:32 probook kernel: ? get_name+0x390/0x390
Apr 13 00:40:32 probook kernel: ? kasan_unpoison_shadow+0x30/0x40
Apr 13 00:40:32 probook kernel: ? kasan_kmalloc+0xa0/0xd0
Apr 13 00:40:32 probook kernel: allocate_threshold_blocks+0x12c/0xc60
Apr 13 00:40:32 probook kernel: ? kobject_add_internal+0x800/0x800
Apr 13 00:40:32 probook kernel: ? get_block_address.isra.3+0x520/0x520
Apr 13 00:40:32 probook kernel: ? kasan_kmalloc+0xa0/0xd0
Apr 13 00:40:32 probook kernel: mce_threshold_create_device+0x35b/0x990
Apr 13 00:40:32 probook kernel: ? init_special_inode+0x1d0/0x230
Apr 13 00:40:32 probook kernel: threshold_init_device+0x98/0xa7
Apr 13 00:40:32 probook kernel: ? mcheck_vendor_init_severity+0x43/0x43
Apr 13 00:40:32 probook kernel: do_one_initcall+0x76/0x30c
Apr 13 00:40:32 probook kernel: ? trace_event_raw_event_initcall_finish+0x190/0x190
Apr 13 00:40:32 probook kernel: ? kasan_unpoison_shadow+0xb/0x40
Apr 13 00:40:32 probook kernel: ? kasan_unpoison_shadow+0x30/0x40
Apr 13 00:40:32 probook kernel: kernel_init_freeable+0x3d6/0x471
Apr 13 00:40:32 probook kernel: ? rest_init+0xf0/0xf0
Apr 13 00:40:32 probook kernel: kernel_init+0xa/0x120
Apr 13 00:40:32 probook kernel: ? rest_init+0xf0/0xf0
Apr 13 00:40:32 probook kernel: ret_from_fork+0x22/0x40
Apr 13 00:40:32 probook kernel:
Apr 13 00:40:32 probook kernel: Allocated by task 1:
Apr 13 00:40:32 probook kernel: kasan_kmalloc+0xa0/0xd0
Apr 13 00:40:32 probook kernel: kmem_cache_alloc_trace+0xf3/0x1f0
Apr 13 00:40:32 probook kernel: allocate_threshold_blocks+0x1bc/0xc60
Apr 13 00:40:32 probook kernel: mce_threshold_create_device+0x35b/0x990
Apr 13 00:40:32 probook kernel: threshold_init_device+0x98/0xa7
Apr 13 00:40:32 probook kernel: do_one_initcall+0x76/0x30c
Apr 13 00:40:32 probook kernel: kernel_init_freeable+0x3d6/0x471
Apr 13 00:40:32 probook kernel: kernel_init+0xa/0x120
Apr 13 00:40:32 probook kernel: ret_from_fork+0x22/0x40
Apr 13 00:40:32 probook kernel:
Apr 13 00:40:32 probook kernel: Freed by task 0:
Apr 13 00:40:32 probook kernel: (stack is not available)
Apr 13 00:40:32 probook kernel:
Apr 13 00:40:32 probook kernel: The buggy address belongs to the object at ffff8803f165dd80
which belongs to the cache kmalloc-128 of size 128
Apr 13 00:40:32 probook kernel: The buggy address is located 116 bytes inside of
128-byte region [ffff8803f165dd80, ffff8803f165de00)
Apr 13 00:40:32 probook kernel: The buggy address belongs to the page:
Apr 13 00:40:32 probook kernel: page:ffffea000fc59740 count:1 mapcount:0 mapping:0000000000000000 index:0x0
Apr 13 00:40:32 probook kernel: flags: 0x2000000000000100(slab)
Apr 13 00:40:32 probook kernel: raw: 2000000000000100 0000000000000000 0000000000000000 0000000180150015
Apr 13 00:40:32 probook kernel: raw: dead000000000100 dead000000000200 ffff8803f3403340 0000000000000000
Apr 13 00:40:32 probook kernel: page dumped because: kasan: bad access detected
Apr 13 00:40:32 probook kernel:
Apr 13 00:40:32 probook kernel: Memory state around the buggy address:
Apr 13 00:40:32 probook kernel: ffff8803f165dc80: fc fc fc fc fc fc fc fc 00 00 00 00 00 00 00 00
Apr 13 00:40:32 probook kernel: ffff8803f165dd00: 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc
Apr 13 00:40:32 probook kernel: >ffff8803f165dd80: 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc
Apr 13 00:40:32 probook kernel: ^
Apr 13 00:40:32 probook kernel: ffff8803f165de00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
Apr 13 00:40:32 probook kernel: ffff8803f165de80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
Apr 13 00:40:32 probook kernel: ==================================================================

--
Regards,
Johannes


2018-04-16 14:32:13

by Johannes Hirte

[permalink] [raw]
Subject: Re: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized block

On 2018 Apr 14, Johannes Hirte wrote:
> On 2018 Feb 01, Yazen Ghannam wrote:
> > From: Yazen Ghannam <[email protected]>
> >
> > The block address is saved after the block is initialized when
> > threshold_init_device() is called.
> >
> > Use the saved block address, if available, rather than trying to
> > rediscover it.
> >
> > We can avoid some *on_cpu() calls in the init path that will cause a
> > call trace when resuming from suspend.
> >
> > Cc: <[email protected]> # 4.14.x
> > Signed-off-by: Yazen Ghannam <[email protected]>
> > ---
> > arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++++++++++
> > 1 file changed, 15 insertions(+)
> >
> > diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > index bf53b4549a17..8c4f8f30c779 100644
> > --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > @@ -436,6 +436,21 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
> > {
> > u32 addr = 0, offset = 0;
> >
> > + if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
> > + return addr;
> > +
> > + /* Get address from already initialized block. */
> > + if (per_cpu(threshold_banks, cpu)) {
> > + struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];
> > +
> > + if (bankp && bankp->blocks) {
> > + struct threshold_block *blockp = &bankp->blocks[block];
> > +
> > + if (blockp)
> > + return blockp->address;
> > + }
> > + }
> > +
> > if (mce_flags.smca) {
> > if (smca_get_bank_type(bank) == SMCA_RESERVED)
> > return addr;
> > --
> > 2.14.1
>
> I have a KASAN: slab-out-of-bounds, and git bisect points me to this
> change:
>
> Apr 13 00:40:32 probook kernel: ==================================================================
> Apr 13 00:40:32 probook kernel: BUG: KASAN: slab-out-of-bounds in get_block_address.isra.3+0x1e9/0x520
> Apr 13 00:40:32 probook kernel: Read of size 4 at addr ffff8803f165ddf4 by task swapper/0/1
> Apr 13 00:40:32 probook kernel:
> Apr 13 00:40:32 probook kernel: CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.16.0-10757-g4ca8ba4ccff9 #532
> Apr 13 00:40:32 probook kernel: Hardware name: HP HP ProBook 645 G2/80FE, BIOS N77 Ver. 01.12 12/19/2017
> Apr 13 00:40:32 probook kernel: Call Trace:
> Apr 13 00:40:32 probook kernel: dump_stack+0x5b/0x8b
> Apr 13 00:40:32 probook kernel: ? get_block_address.isra.3+0x1e9/0x520
> Apr 13 00:40:32 probook kernel: print_address_description+0x65/0x270
> Apr 13 00:40:32 probook kernel: ? get_block_address.isra.3+0x1e9/0x520
> Apr 13 00:40:32 probook kernel: kasan_report+0x232/0x350
> Apr 13 00:40:32 probook kernel: get_block_address.isra.3+0x1e9/0x520
> Apr 13 00:40:32 probook kernel: ? kobject_init_and_add+0xde/0x130
> Apr 13 00:40:32 probook kernel: ? get_name+0x390/0x390
> Apr 13 00:40:32 probook kernel: ? kasan_unpoison_shadow+0x30/0x40
> Apr 13 00:40:32 probook kernel: ? kasan_kmalloc+0xa0/0xd0
> Apr 13 00:40:32 probook kernel: allocate_threshold_blocks+0x12c/0xc60
> Apr 13 00:40:32 probook kernel: ? kobject_add_internal+0x800/0x800
> Apr 13 00:40:32 probook kernel: ? get_block_address.isra.3+0x520/0x520
> Apr 13 00:40:32 probook kernel: ? kasan_kmalloc+0xa0/0xd0
> Apr 13 00:40:32 probook kernel: mce_threshold_create_device+0x35b/0x990
> Apr 13 00:40:32 probook kernel: ? init_special_inode+0x1d0/0x230
> Apr 13 00:40:32 probook kernel: threshold_init_device+0x98/0xa7
> Apr 13 00:40:32 probook kernel: ? mcheck_vendor_init_severity+0x43/0x43
> Apr 13 00:40:32 probook kernel: do_one_initcall+0x76/0x30c
> Apr 13 00:40:32 probook kernel: ? trace_event_raw_event_initcall_finish+0x190/0x190
> Apr 13 00:40:32 probook kernel: ? kasan_unpoison_shadow+0xb/0x40
> Apr 13 00:40:32 probook kernel: ? kasan_unpoison_shadow+0x30/0x40
> Apr 13 00:40:32 probook kernel: kernel_init_freeable+0x3d6/0x471
> Apr 13 00:40:32 probook kernel: ? rest_init+0xf0/0xf0
> Apr 13 00:40:32 probook kernel: kernel_init+0xa/0x120
> Apr 13 00:40:32 probook kernel: ? rest_init+0xf0/0xf0
> Apr 13 00:40:32 probook kernel: ret_from_fork+0x22/0x40
> Apr 13 00:40:32 probook kernel:
> Apr 13 00:40:32 probook kernel: Allocated by task 1:
> Apr 13 00:40:32 probook kernel: kasan_kmalloc+0xa0/0xd0
> Apr 13 00:40:32 probook kernel: kmem_cache_alloc_trace+0xf3/0x1f0
> Apr 13 00:40:32 probook kernel: allocate_threshold_blocks+0x1bc/0xc60
> Apr 13 00:40:32 probook kernel: mce_threshold_create_device+0x35b/0x990
> Apr 13 00:40:32 probook kernel: threshold_init_device+0x98/0xa7
> Apr 13 00:40:32 probook kernel: do_one_initcall+0x76/0x30c
> Apr 13 00:40:32 probook kernel: kernel_init_freeable+0x3d6/0x471
> Apr 13 00:40:32 probook kernel: kernel_init+0xa/0x120
> Apr 13 00:40:32 probook kernel: ret_from_fork+0x22/0x40
> Apr 13 00:40:32 probook kernel:
> Apr 13 00:40:32 probook kernel: Freed by task 0:
> Apr 13 00:40:32 probook kernel: (stack is not available)
> Apr 13 00:40:32 probook kernel:
> Apr 13 00:40:32 probook kernel: The buggy address belongs to the object at ffff8803f165dd80
> which belongs to the cache kmalloc-128 of size 128
> Apr 13 00:40:32 probook kernel: The buggy address is located 116 bytes inside of
> 128-byte region [ffff8803f165dd80, ffff8803f165de00)
> Apr 13 00:40:32 probook kernel: The buggy address belongs to the page:
> Apr 13 00:40:32 probook kernel: page:ffffea000fc59740 count:1 mapcount:0 mapping:0000000000000000 index:0x0
> Apr 13 00:40:32 probook kernel: flags: 0x2000000000000100(slab)
> Apr 13 00:40:32 probook kernel: raw: 2000000000000100 0000000000000000 0000000000000000 0000000180150015
> Apr 13 00:40:32 probook kernel: raw: dead000000000100 dead000000000200 ffff8803f3403340 0000000000000000
> Apr 13 00:40:32 probook kernel: page dumped because: kasan: bad access detected
> Apr 13 00:40:32 probook kernel:
> Apr 13 00:40:32 probook kernel: Memory state around the buggy address:
> Apr 13 00:40:32 probook kernel: ffff8803f165dc80: fc fc fc fc fc fc fc fc 00 00 00 00 00 00 00 00
> Apr 13 00:40:32 probook kernel: ffff8803f165dd00: 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc
> Apr 13 00:40:32 probook kernel: >ffff8803f165dd80: 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc
> Apr 13 00:40:32 probook kernel: ^
> Apr 13 00:40:32 probook kernel: ffff8803f165de00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> Apr 13 00:40:32 probook kernel: ffff8803f165de80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> Apr 13 00:40:32 probook kernel: ==================================================================
>

Putting the whole chaching part under the

if (mce_flags.smca) {

solved the issue on my Carrizo.


--
Regards,
Johannes


2018-04-17 13:33:26

by Yazen Ghannam

[permalink] [raw]
Subject: RE: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized block

> -----Original Message-----
> From: [email protected] <linux-edac-
> [email protected]> On Behalf Of Johannes Hirte
> Sent: Monday, April 16, 2018 7:56 AM
> To: Ghannam, Yazen <[email protected]>
> Cc: [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]
> Subject: Re: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized
> block
>
> On 2018 Apr 14, Johannes Hirte wrote:
> > On 2018 Feb 01, Yazen Ghannam wrote:
> > > From: Yazen Ghannam <[email protected]>
> > >
> > > The block address is saved after the block is initialized when
> > > threshold_init_device() is called.
> > >
> > > Use the saved block address, if available, rather than trying to
> > > rediscover it.
> > >
> > > We can avoid some *on_cpu() calls in the init path that will cause a
> > > call trace when resuming from suspend.
> > >
> > > Cc: <[email protected]> # 4.14.x
> > > Signed-off-by: Yazen Ghannam <[email protected]>
> > > ---
> > > arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++++++++++
> > > 1 file changed, 15 insertions(+)
> > >
> > > diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > > index bf53b4549a17..8c4f8f30c779 100644
> > > --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > > +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > > @@ -436,6 +436,21 @@ static u32 get_block_address(unsigned int cpu,
> u32 current_addr, u32 low, u32 hi
> > > {
> > > u32 addr = 0, offset = 0;
> > >
> > > + if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
> > > + return addr;
> > > +
> > > + /* Get address from already initialized block. */
> > > + if (per_cpu(threshold_banks, cpu)) {
> > > + struct threshold_bank *bankp = per_cpu(threshold_banks,
> cpu)[bank];
> > > +
> > > + if (bankp && bankp->blocks) {
> > > + struct threshold_block *blockp = &bankp-
> >blocks[block];
> > > +
> > > + if (blockp)
> > > + return blockp->address;
> > > + }
> > > + }
> > > +
> > > if (mce_flags.smca) {
> > > if (smca_get_bank_type(bank) == SMCA_RESERVED)
> > > return addr;
> > > --
> > > 2.14.1
> >
> > I have a KASAN: slab-out-of-bounds, and git bisect points me to this
> > change:
> >
> > Apr 13 00:40:32 probook kernel:
> ================================================================
> ==
> > Apr 13 00:40:32 probook kernel: BUG: KASAN: slab-out-of-bounds in
> get_block_address.isra.3+0x1e9/0x520
> > Apr 13 00:40:32 probook kernel: Read of size 4 at addr ffff8803f165ddf4 by
> task swapper/0/1
> > Apr 13 00:40:32 probook kernel:
> > Apr 13 00:40:32 probook kernel: CPU: 1 PID: 1 Comm: swapper/0 Not
> tainted 4.16.0-10757-g4ca8ba4ccff9 #532
> > Apr 13 00:40:32 probook kernel: Hardware name: HP HP ProBook 645
> G2/80FE, BIOS N77 Ver. 01.12 12/19/2017
> > Apr 13 00:40:32 probook kernel: Call Trace:
> > Apr 13 00:40:32 probook kernel: dump_stack+0x5b/0x8b
> > Apr 13 00:40:32 probook kernel: ? get_block_address.isra.3+0x1e9/0x520
> > Apr 13 00:40:32 probook kernel: print_address_description+0x65/0x270
> > Apr 13 00:40:32 probook kernel: ? get_block_address.isra.3+0x1e9/0x520
> > Apr 13 00:40:32 probook kernel: kasan_report+0x232/0x350
> > Apr 13 00:40:32 probook kernel: get_block_address.isra.3+0x1e9/0x520
> > Apr 13 00:40:32 probook kernel: ? kobject_init_and_add+0xde/0x130
> > Apr 13 00:40:32 probook kernel: ? get_name+0x390/0x390
> > Apr 13 00:40:32 probook kernel: ? kasan_unpoison_shadow+0x30/0x40
> > Apr 13 00:40:32 probook kernel: ? kasan_kmalloc+0xa0/0xd0
> > Apr 13 00:40:32 probook kernel: allocate_threshold_blocks+0x12c/0xc60
> > Apr 13 00:40:32 probook kernel: ? kobject_add_internal+0x800/0x800
> > Apr 13 00:40:32 probook kernel: ? get_block_address.isra.3+0x520/0x520
> > Apr 13 00:40:32 probook kernel: ? kasan_kmalloc+0xa0/0xd0
> > Apr 13 00:40:32 probook kernel:
> mce_threshold_create_device+0x35b/0x990
> > Apr 13 00:40:32 probook kernel: ? init_special_inode+0x1d0/0x230
> > Apr 13 00:40:32 probook kernel: threshold_init_device+0x98/0xa7
> > Apr 13 00:40:32 probook kernel: ?
> mcheck_vendor_init_severity+0x43/0x43
> > Apr 13 00:40:32 probook kernel: do_one_initcall+0x76/0x30c
> > Apr 13 00:40:32 probook kernel: ?
> trace_event_raw_event_initcall_finish+0x190/0x190
> > Apr 13 00:40:32 probook kernel: ? kasan_unpoison_shadow+0xb/0x40
> > Apr 13 00:40:32 probook kernel: ? kasan_unpoison_shadow+0x30/0x40
> > Apr 13 00:40:32 probook kernel: kernel_init_freeable+0x3d6/0x471
> > Apr 13 00:40:32 probook kernel: ? rest_init+0xf0/0xf0
> > Apr 13 00:40:32 probook kernel: kernel_init+0xa/0x120
> > Apr 13 00:40:32 probook kernel: ? rest_init+0xf0/0xf0
> > Apr 13 00:40:32 probook kernel: ret_from_fork+0x22/0x40
> > Apr 13 00:40:32 probook kernel:
> > Apr 13 00:40:32 probook kernel: Allocated by task 1:
> > Apr 13 00:40:32 probook kernel: kasan_kmalloc+0xa0/0xd0
> > Apr 13 00:40:32 probook kernel: kmem_cache_alloc_trace+0xf3/0x1f0
> > Apr 13 00:40:32 probook kernel: allocate_threshold_blocks+0x1bc/0xc60
> > Apr 13 00:40:32 probook kernel:
> mce_threshold_create_device+0x35b/0x990
> > Apr 13 00:40:32 probook kernel: threshold_init_device+0x98/0xa7
> > Apr 13 00:40:32 probook kernel: do_one_initcall+0x76/0x30c
> > Apr 13 00:40:32 probook kernel: kernel_init_freeable+0x3d6/0x471
> > Apr 13 00:40:32 probook kernel: kernel_init+0xa/0x120
> > Apr 13 00:40:32 probook kernel: ret_from_fork+0x22/0x40
> > Apr 13 00:40:32 probook kernel:
> > Apr 13 00:40:32 probook kernel: Freed by task 0:
> > Apr 13 00:40:32 probook kernel: (stack is not available)
> > Apr 13 00:40:32 probook kernel:
> > Apr 13 00:40:32 probook kernel: The buggy address belongs to the object at
> ffff8803f165dd80
> > which belongs to the cache kmalloc-128 of size 128
> > Apr 13 00:40:32 probook kernel: The buggy address is located 116 bytes
> inside of
> > 128-byte region [ffff8803f165dd80, ffff8803f165de00)
> > Apr 13 00:40:32 probook kernel: The buggy address belongs to the page:
> > Apr 13 00:40:32 probook kernel: page:ffffea000fc59740 count:1
> mapcount:0 mapping:0000000000000000 index:0x0
> > Apr 13 00:40:32 probook kernel: flags: 0x2000000000000100(slab)
> > Apr 13 00:40:32 probook kernel: raw: 2000000000000100
> 0000000000000000 0000000000000000 0000000180150015
> > Apr 13 00:40:32 probook kernel: raw: dead000000000100
> dead000000000200 ffff8803f3403340 0000000000000000
> > Apr 13 00:40:32 probook kernel: page dumped because: kasan: bad access
> detected
> > Apr 13 00:40:32 probook kernel:
> > Apr 13 00:40:32 probook kernel: Memory state around the buggy address:
> > Apr 13 00:40:32 probook kernel: ffff8803f165dc80: fc fc fc fc fc fc fc fc 00 00
> 00 00 00 00 00 00
> > Apr 13 00:40:32 probook kernel: ffff8803f165dd00: 00 00 00 00 00 00 00 fc
> fc fc fc fc fc fc fc fc
> > Apr 13 00:40:32 probook kernel: >ffff8803f165dd80: 00 00 00 00 00 00 00
> 00 00 00 00 00 00 fc fc fc
> > Apr 13 00:40:32 probook kernel: ^
> > Apr 13 00:40:32 probook kernel: ffff8803f165de00: fc fc fc fc fc fc fc fc fc fc
> fc fc fc fc fc fc
> > Apr 13 00:40:32 probook kernel: ffff8803f165de80: fc fc fc fc fc fc fc fc fc fc
> fc fc fc fc fc fc
> > Apr 13 00:40:32 probook kernel:
> ================================================================
> ==
> >
>
> Putting the whole chaching part under the
>
> if (mce_flags.smca) {
>
> solved the issue on my Carrizo.
>

Thanks for reporting this. I'm able to reproduce this on my Fam17h system. The
caching should still be the same on non-SMCA systems. Putting it all under the
SMCA flags effectively removes it on Carrizo.

Here are when get_block_address() is called:
1) Boot time MCE init. Called on each CPU. No caching.
2) Init of the MCE device. Called on a single CPU. Values are cached here.
3) CPU on/offling which calls MCE init. Should use the cached values.

It seems to me that the KASAN bug is detected during #2 though it's not yet clear
to me what the issue is. I need to read up on KASAN and keep debugging.

Do any of the maintainers have any suggestions?

Thanks,
Yazen

2018-05-15 09:40:58

by Johannes Hirte

[permalink] [raw]
Subject: Re: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized block

On 2018 Apr 17, Ghannam, Yazen wrote:
> > -----Original Message-----
> > From: [email protected] <linux-edac-
> > [email protected]> On Behalf Of Johannes Hirte
> > Sent: Monday, April 16, 2018 7:56 AM
> > To: Ghannam, Yazen <[email protected]>
> > Cc: [email protected]; [email protected]; [email protected];
> > [email protected]; [email protected]
> > Subject: Re: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized
> > block
> >
> > On 2018 Apr 14, Johannes Hirte wrote:
> > > On 2018 Feb 01, Yazen Ghannam wrote:
> > > > From: Yazen Ghannam <[email protected]>
> > > >
> > > > The block address is saved after the block is initialized when
> > > > threshold_init_device() is called.
> > > >
> > > > Use the saved block address, if available, rather than trying to
> > > > rediscover it.
> > > >
> > > > We can avoid some *on_cpu() calls in the init path that will cause a
> > > > call trace when resuming from suspend.
> > > >
> > > > Cc: <[email protected]> # 4.14.x
> > > > Signed-off-by: Yazen Ghannam <[email protected]>
> > > > ---
> > > > arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++++++++++
> > > > 1 file changed, 15 insertions(+)
> > > >
> > > > diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > > > index bf53b4549a17..8c4f8f30c779 100644
> > > > --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > > > +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > > > @@ -436,6 +436,21 @@ static u32 get_block_address(unsigned int cpu,
> > u32 current_addr, u32 low, u32 hi
> > > > {
> > > > u32 addr = 0, offset = 0;
> > > >
> > > > + if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
> > > > + return addr;
> > > > +
> > > > + /* Get address from already initialized block. */
> > > > + if (per_cpu(threshold_banks, cpu)) {
> > > > + struct threshold_bank *bankp = per_cpu(threshold_banks,
> > cpu)[bank];
> > > > +
> > > > + if (bankp && bankp->blocks) {
> > > > + struct threshold_block *blockp = &bankp-
> > >blocks[block];
> > > > +
> > > > + if (blockp)
> > > > + return blockp->address;
> > > > + }
> > > > + }
> > > > +
> > > > if (mce_flags.smca) {
> > > > if (smca_get_bank_type(bank) == SMCA_RESERVED)
> > > > return addr;
> > > > --
> > > > 2.14.1
> > >
> > > I have a KASAN: slab-out-of-bounds, and git bisect points me to this
> > > change:
> > >
> > > Apr 13 00:40:32 probook kernel:
> > ================================================================
> > ==
> > > Apr 13 00:40:32 probook kernel: BUG: KASAN: slab-out-of-bounds in
> > get_block_address.isra.3+0x1e9/0x520
> > > Apr 13 00:40:32 probook kernel: Read of size 4 at addr ffff8803f165ddf4 by
> > task swapper/0/1
> > > Apr 13 00:40:32 probook kernel:
> > > Apr 13 00:40:32 probook kernel: CPU: 1 PID: 1 Comm: swapper/0 Not
> > tainted 4.16.0-10757-g4ca8ba4ccff9 #532
> > > Apr 13 00:40:32 probook kernel: Hardware name: HP HP ProBook 645
> > G2/80FE, BIOS N77 Ver. 01.12 12/19/2017
> > > Apr 13 00:40:32 probook kernel: Call Trace:
> > > Apr 13 00:40:32 probook kernel: dump_stack+0x5b/0x8b
> > > Apr 13 00:40:32 probook kernel: ? get_block_address.isra.3+0x1e9/0x520
> > > Apr 13 00:40:32 probook kernel: print_address_description+0x65/0x270
> > > Apr 13 00:40:32 probook kernel: ? get_block_address.isra.3+0x1e9/0x520
> > > Apr 13 00:40:32 probook kernel: kasan_report+0x232/0x350
> > > Apr 13 00:40:32 probook kernel: get_block_address.isra.3+0x1e9/0x520
> > > Apr 13 00:40:32 probook kernel: ? kobject_init_and_add+0xde/0x130
> > > Apr 13 00:40:32 probook kernel: ? get_name+0x390/0x390
> > > Apr 13 00:40:32 probook kernel: ? kasan_unpoison_shadow+0x30/0x40
> > > Apr 13 00:40:32 probook kernel: ? kasan_kmalloc+0xa0/0xd0
> > > Apr 13 00:40:32 probook kernel: allocate_threshold_blocks+0x12c/0xc60
> > > Apr 13 00:40:32 probook kernel: ? kobject_add_internal+0x800/0x800
> > > Apr 13 00:40:32 probook kernel: ? get_block_address.isra.3+0x520/0x520
> > > Apr 13 00:40:32 probook kernel: ? kasan_kmalloc+0xa0/0xd0
> > > Apr 13 00:40:32 probook kernel:
> > mce_threshold_create_device+0x35b/0x990
> > > Apr 13 00:40:32 probook kernel: ? init_special_inode+0x1d0/0x230
> > > Apr 13 00:40:32 probook kernel: threshold_init_device+0x98/0xa7
> > > Apr 13 00:40:32 probook kernel: ?
> > mcheck_vendor_init_severity+0x43/0x43
> > > Apr 13 00:40:32 probook kernel: do_one_initcall+0x76/0x30c
> > > Apr 13 00:40:32 probook kernel: ?
> > trace_event_raw_event_initcall_finish+0x190/0x190
> > > Apr 13 00:40:32 probook kernel: ? kasan_unpoison_shadow+0xb/0x40
> > > Apr 13 00:40:32 probook kernel: ? kasan_unpoison_shadow+0x30/0x40
> > > Apr 13 00:40:32 probook kernel: kernel_init_freeable+0x3d6/0x471
> > > Apr 13 00:40:32 probook kernel: ? rest_init+0xf0/0xf0
> > > Apr 13 00:40:32 probook kernel: kernel_init+0xa/0x120
> > > Apr 13 00:40:32 probook kernel: ? rest_init+0xf0/0xf0
> > > Apr 13 00:40:32 probook kernel: ret_from_fork+0x22/0x40
> > > Apr 13 00:40:32 probook kernel:
> > > Apr 13 00:40:32 probook kernel: Allocated by task 1:
> > > Apr 13 00:40:32 probook kernel: kasan_kmalloc+0xa0/0xd0
> > > Apr 13 00:40:32 probook kernel: kmem_cache_alloc_trace+0xf3/0x1f0
> > > Apr 13 00:40:32 probook kernel: allocate_threshold_blocks+0x1bc/0xc60
> > > Apr 13 00:40:32 probook kernel:
> > mce_threshold_create_device+0x35b/0x990
> > > Apr 13 00:40:32 probook kernel: threshold_init_device+0x98/0xa7
> > > Apr 13 00:40:32 probook kernel: do_one_initcall+0x76/0x30c
> > > Apr 13 00:40:32 probook kernel: kernel_init_freeable+0x3d6/0x471
> > > Apr 13 00:40:32 probook kernel: kernel_init+0xa/0x120
> > > Apr 13 00:40:32 probook kernel: ret_from_fork+0x22/0x40
> > > Apr 13 00:40:32 probook kernel:
> > > Apr 13 00:40:32 probook kernel: Freed by task 0:
> > > Apr 13 00:40:32 probook kernel: (stack is not available)
> > > Apr 13 00:40:32 probook kernel:
> > > Apr 13 00:40:32 probook kernel: The buggy address belongs to the object at
> > ffff8803f165dd80
> > > which belongs to the cache kmalloc-128 of size 128
> > > Apr 13 00:40:32 probook kernel: The buggy address is located 116 bytes
> > inside of
> > > 128-byte region [ffff8803f165dd80, ffff8803f165de00)
> > > Apr 13 00:40:32 probook kernel: The buggy address belongs to the page:
> > > Apr 13 00:40:32 probook kernel: page:ffffea000fc59740 count:1
> > mapcount:0 mapping:0000000000000000 index:0x0
> > > Apr 13 00:40:32 probook kernel: flags: 0x2000000000000100(slab)
> > > Apr 13 00:40:32 probook kernel: raw: 2000000000000100
> > 0000000000000000 0000000000000000 0000000180150015
> > > Apr 13 00:40:32 probook kernel: raw: dead000000000100
> > dead000000000200 ffff8803f3403340 0000000000000000
> > > Apr 13 00:40:32 probook kernel: page dumped because: kasan: bad access
> > detected
> > > Apr 13 00:40:32 probook kernel:
> > > Apr 13 00:40:32 probook kernel: Memory state around the buggy address:
> > > Apr 13 00:40:32 probook kernel: ffff8803f165dc80: fc fc fc fc fc fc fc fc 00 00
> > 00 00 00 00 00 00
> > > Apr 13 00:40:32 probook kernel: ffff8803f165dd00: 00 00 00 00 00 00 00 fc
> > fc fc fc fc fc fc fc fc
> > > Apr 13 00:40:32 probook kernel: >ffff8803f165dd80: 00 00 00 00 00 00 00
> > 00 00 00 00 00 00 fc fc fc
> > > Apr 13 00:40:32 probook kernel: ^
> > > Apr 13 00:40:32 probook kernel: ffff8803f165de00: fc fc fc fc fc fc fc fc fc fc
> > fc fc fc fc fc fc
> > > Apr 13 00:40:32 probook kernel: ffff8803f165de80: fc fc fc fc fc fc fc fc fc fc
> > fc fc fc fc fc fc
> > > Apr 13 00:40:32 probook kernel:
> > ================================================================
> > ==
> > >
> >
> > Putting the whole chaching part under the
> >
> > if (mce_flags.smca) {
> >
> > solved the issue on my Carrizo.
> >
>
> Thanks for reporting this. I'm able to reproduce this on my Fam17h system. The
> caching should still be the same on non-SMCA systems. Putting it all under the
> SMCA flags effectively removes it on Carrizo.
>
> Here are when get_block_address() is called:
> 1) Boot time MCE init. Called on each CPU. No caching.
> 2) Init of the MCE device. Called on a single CPU. Values are cached here.
> 3) CPU on/offling which calls MCE init. Should use the cached values.
>
> It seems to me that the KASAN bug is detected during #2 though it's not yet clear
> to me what the issue is. I need to read up on KASAN and keep debugging.

The out-of-bound access happens in get_block_address:

if (bankp && bankp->blocks) {
struct threshold_block *blockp blockp = &bankp->blocks[block];

with block=1. This doesn't exists. I don't even find any array here.
There is a linked list, created in allocate_threshold_blocks. On my
system I get 17 lists with one element each.

--
Regards,
Johannes


2018-05-16 22:47:50

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized block

On Tue, May 15, 2018 at 11:39:54AM +0200, Johannes Hirte wrote:
> The out-of-bound access happens in get_block_address:
>
> if (bankp && bankp->blocks) {
> struct threshold_block *blockp blockp = &bankp->blocks[block];
>
> with block=1. This doesn't exists. I don't even find any array here.
> There is a linked list, created in allocate_threshold_blocks. On my
> system I get 17 lists with one element each.

Yes, what a mess this is. ;-\

There's no such thing as ->blocks[block] array. We assign simply the
threshold_block to it in allocate_threshold_blocks:

per_cpu(threshold_banks, cpu)[bank]->blocks = b;

And I can't say the design of this thing is really friendly but it is
still no excuse that I missed that during review. Grrr.

So, Yazen, what really needs to happen here is to iterate the
bank->blocks->miscj list to find the block you're looking for and return
its address, the opposite to this here:

if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
list_add(&b->miscj,
&per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
} else {
per_cpu(threshold_banks, cpu)[bank]->blocks = b;
}

and don't forget to look at ->blocks itself.

And then you need to make sure that searching for block addresses still
works when resuming from suspend so that you can avoid the RDMSR IPIs.

Ok?

--
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--

2018-05-17 06:51:58

by Johannes Hirte

[permalink] [raw]
Subject: Re: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized block

On 2018 Mai 17, Borislav Petkov wrote:
> On Tue, May 15, 2018 at 11:39:54AM +0200, Johannes Hirte wrote:
> > The out-of-bound access happens in get_block_address:
> >
> > if (bankp && bankp->blocks) {
> > struct threshold_block *blockp blockp = &bankp->blocks[block];
> >
> > with block=1. This doesn't exists. I don't even find any array here.
> > There is a linked list, created in allocate_threshold_blocks. On my
> > system I get 17 lists with one element each.
>
> Yes, what a mess this is. ;-\
>
> There's no such thing as ->blocks[block] array. We assign simply the
> threshold_block to it in allocate_threshold_blocks:
>
> per_cpu(threshold_banks, cpu)[bank]->blocks = b;
>
> And I can't say the design of this thing is really friendly but it is
> still no excuse that I missed that during review. Grrr.
>
> So, Yazen, what really needs to happen here is to iterate the
> bank->blocks->miscj list to find the block you're looking for and return
> its address, the opposite to this here:
>
> if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
> list_add(&b->miscj,
> &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
> } else {
> per_cpu(threshold_banks, cpu)[bank]->blocks = b;
> }
>
> and don't forget to look at ->blocks itself.
>
> And then you need to make sure that searching for block addresses still
> works when resuming from suspend so that you can avoid the RDMSR IPIs.
>

Maybe I'm missing something, but those RDMSR IPSs don't happen on
pre-SMCA systems, right? So the caching should be avoided here, cause
the whole lookup looks more expensive to me than the simple switch-block
in get_block_address.

--
Regards,
Johannes


2018-05-17 10:43:29

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized block

On Thu, May 17, 2018 at 08:49:31AM +0200, Johannes Hirte wrote:
> Maybe I'm missing something, but those RDMSR IPSs don't happen on
> pre-SMCA systems, right? So the caching should be avoided here, cause
> the whole lookup looks more expensive to me than the simple switch-block
> in get_block_address.

Yeah, and we should simply cache all those MSR values as I suggested then.

The patch at the end should fix your issue.

Yazen, so I'm working on the assumption that all addresses are the same
on every core - I dumped them and it looks like this:

128 bank: 00, block: 0 : 0xc0002003
128 bank: 00, block: 1 : 0x0
128 bank: 01, block: 0 : 0xc0002013
128 bank: 01, block: 1 : 0x0
128 bank: 02, block: 0 : 0xc0002023
128 bank: 02, block: 1 : 0x0
128 bank: 03, block: 0 : 0xc0002033
128 bank: 03, block: 1 : 0x0
128 bank: 04, block: 0 : 0x0
128 bank: 05, block: 0 : 0xc0002053
128 bank: 05, block: 1 : 0x0
128 bank: 06, block: 0 : 0xc0002063
128 bank: 06, block: 1 : 0x0
128 bank: 07, block: 0 : 0xc0002073
128 bank: 07, block: 1 : 0x0
128 bank: 08, block: 0 : 0xc0002083
128 bank: 08, block: 1 : 0x0
128 bank: 09, block: 0 : 0xc0002093
128 bank: 09, block: 1 : 0x0
128 bank: 10, block: 0 : 0xc00020a3
128 bank: 10, block: 1 : 0x0
128 bank: 11, block: 0 : 0xc00020b3
128 bank: 11, block: 1 : 0x0
128 bank: 12, block: 0 : 0xc00020c3
128 bank: 12, block: 1 : 0x0
128 bank: 13, block: 0 : 0xc00020d3
128 bank: 13, block: 1 : 0x0
128 bank: 14, block: 0 : 0xc00020e3
128 bank: 14, block: 1 : 0x0
128 bank: 15, block: 0 : 0xc00020f3
128 bank: 15, block: 1 : 0x0
128 bank: 16, block: 0 : 0xc0002103
128 bank: 16, block: 1 : 0x0
128 bank: 17, block: 0 : 0xc0002113
128 bank: 17, block: 1 : 0x0
128 bank: 18, block: 0 : 0xc0002123
128 bank: 18, block: 1 : 0x0
128 bank: 19, block: 0 : 0xc0002133
128 bank: 19, block: 1 : 0x0
128 bank: 20, block: 0 : 0xc0002143
128 bank: 20, block: 1 : 0x0
128 bank: 21, block: 0 : 0xc0002153
128 bank: 21, block: 1 : 0x0
128 bank: 22, block: 0 : 0xc0002163
128 bank: 22, block: 1 : 0x0

so you have 128 times the same address on a 128 core Zen box.

If that is correct, then we can use this nice simplification and cache
them globally and not per-CPU. Seems to work here. Scream if something's
amiss.

Thx.

---
From d8614e904d8f63b89d2d204d6fa247c4c416a1b6 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@susede>
Date: Thu, 17 May 2018 10:46:26 +0200
Subject: [PATCH] array caching

Not-Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce_amd.c | 29 ++++++++++++++--------------
1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f7666eef4a87..c8e038800591 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -94,6 +94,11 @@ static struct smca_bank_name smca_names[] = {
[SMCA_SMU] = { "smu", "System Management Unit" },
};

+static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
+{
+ [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
+};
+
const char *smca_get_name(enum smca_bank_types t)
{
if (t >= N_SMCA_BANK_TYPES)
@@ -443,20 +448,26 @@ static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
if (!block)
return MSR_AMD64_SMCA_MCx_MISC(bank);

+ /* Check our cache first: */
+ if (smca_bank_addrs[bank][block] != -1)
+ return smca_bank_addrs[bank][block];
+
/*
* For SMCA enabled processors, BLKPTR field of the first MISC register
* (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
*/
if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- return addr;
+ goto out;

if (!(low & MCI_CONFIG_MCAX))
- return addr;
+ goto out;

if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
(low & MASK_BLKPTR_LO))
- return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+ addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);

+out:
+ smca_bank_addrs[bank][block] = addr;
return addr;
}

@@ -468,18 +479,6 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
return addr;

- /* Get address from already initialized block. */
- if (per_cpu(threshold_banks, cpu)) {
- struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];
-
- if (bankp && bankp->blocks) {
- struct threshold_block *blockp = &bankp->blocks[block];
-
- if (blockp)
- return blockp->address;
- }
- }
-
if (mce_flags.smca)
return smca_get_block_address(cpu, bank, block);

--
2.17.0.391.g1f1cddd558b5

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--

2018-05-17 13:05:23

by Yazen Ghannam

[permalink] [raw]
Subject: RE: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized block

> -----Original Message-----
> From: Borislav Petkov <[email protected]>
> Sent: Thursday, May 17, 2018 6:41 AM
> To: Johannes Hirte <[email protected]>
> Cc: Ghannam, Yazen <[email protected]>; linux-
> [email protected]; [email protected]; [email protected];
> [email protected]
> Subject: Re: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized
> block
>
> On Thu, May 17, 2018 at 08:49:31AM +0200, Johannes Hirte wrote:
> > Maybe I'm missing something, but those RDMSR IPSs don't happen on
> > pre-SMCA systems, right? So the caching should be avoided here, cause
> > the whole lookup looks more expensive to me than the simple switch-block
> > in get_block_address.
>
> Yeah, and we should simply cache all those MSR values as I suggested then.
>

Yes, you're right. I thought using the existing data structures would work, but it
seems I messed up the implementation.

> The patch at the end should fix your issue.
>
> Yazen, so I'm working on the assumption that all addresses are the same
> on every core - I dumped them and it looks like this:
>
> 128 bank: 00, block: 0 : 0xc0002003
> 128 bank: 00, block: 1 : 0x0
> 128 bank: 01, block: 0 : 0xc0002013
> 128 bank: 01, block: 1 : 0x0
> 128 bank: 02, block: 0 : 0xc0002023
> 128 bank: 02, block: 1 : 0x0
> 128 bank: 03, block: 0 : 0xc0002033
> 128 bank: 03, block: 1 : 0x0
> 128 bank: 04, block: 0 : 0x0
> 128 bank: 05, block: 0 : 0xc0002053
> 128 bank: 05, block: 1 : 0x0
> 128 bank: 06, block: 0 : 0xc0002063
> 128 bank: 06, block: 1 : 0x0
> 128 bank: 07, block: 0 : 0xc0002073
> 128 bank: 07, block: 1 : 0x0
> 128 bank: 08, block: 0 : 0xc0002083
> 128 bank: 08, block: 1 : 0x0
> 128 bank: 09, block: 0 : 0xc0002093
> 128 bank: 09, block: 1 : 0x0
> 128 bank: 10, block: 0 : 0xc00020a3
> 128 bank: 10, block: 1 : 0x0
> 128 bank: 11, block: 0 : 0xc00020b3
> 128 bank: 11, block: 1 : 0x0
> 128 bank: 12, block: 0 : 0xc00020c3
> 128 bank: 12, block: 1 : 0x0
> 128 bank: 13, block: 0 : 0xc00020d3
> 128 bank: 13, block: 1 : 0x0
> 128 bank: 14, block: 0 : 0xc00020e3
> 128 bank: 14, block: 1 : 0x0
> 128 bank: 15, block: 0 : 0xc00020f3
> 128 bank: 15, block: 1 : 0x0
> 128 bank: 16, block: 0 : 0xc0002103
> 128 bank: 16, block: 1 : 0x0

Banks 15 and 16 should have an address for block 1 also. Do you have PFEH
enabled on your system? That would cause MISC0 to be RAZ so we won't
get the MISC1 address. I'll try it myself also and let you know.

> 128 bank: 17, block: 0 : 0xc0002113
> 128 bank: 17, block: 1 : 0x0
> 128 bank: 18, block: 0 : 0xc0002123
> 128 bank: 18, block: 1 : 0x0
> 128 bank: 19, block: 0 : 0xc0002133
> 128 bank: 19, block: 1 : 0x0
> 128 bank: 20, block: 0 : 0xc0002143
> 128 bank: 20, block: 1 : 0x0
> 128 bank: 21, block: 0 : 0xc0002153
> 128 bank: 21, block: 1 : 0x0
> 128 bank: 22, block: 0 : 0xc0002163
> 128 bank: 22, block: 1 : 0x0
>
> so you have 128 times the same address on a 128 core Zen box.
>
> If that is correct, then we can use this nice simplification and cache
> them globally and not per-CPU. Seems to work here. Scream if something's
> amiss.
>

I think this good for now. We'll probably need to change it in the future, but
maybe we can clean up all the thresholding blocks code and make it simpler
when we do change it.

> Thx.
>
> ---
> From d8614e904d8f63b89d2d204d6fa247c4c416a1b6 Mon Sep 17 00:00:00
> 2001
> From: Borislav Petkov <bp@susede>
> Date: Thu, 17 May 2018 10:46:26 +0200
> Subject: [PATCH] array caching
>
> Not-Signed-off-by: Borislav Petkov <[email protected]>
> ---
> arch/x86/kernel/cpu/mcheck/mce_amd.c | 29 ++++++++++++++--------------
> 1 file changed, 14 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> index f7666eef4a87..c8e038800591 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> @@ -94,6 +94,11 @@ static struct smca_bank_name smca_names[] = {
> [SMCA_SMU] = { "smu", "System Management Unit" },
> };
>
> +static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
> +{
> + [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
> +};
> +
> const char *smca_get_name(enum smca_bank_types t)
> {
> if (t >= N_SMCA_BANK_TYPES)
> @@ -443,20 +448,26 @@ static u32 smca_get_block_address(unsigned int
> cpu, unsigned int bank,
> if (!block)
> return MSR_AMD64_SMCA_MCx_MISC(bank);
>
> + /* Check our cache first: */
> + if (smca_bank_addrs[bank][block] != -1)
> + return smca_bank_addrs[bank][block];
> +

This hunk could go above the !block. Though maybe the macro is lighter than the
array lookup. It'll work either way, but I'm just thinking out loud.

> /*
> * For SMCA enabled processors, BLKPTR field of the first MISC register
> * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-
> 4).
> */
> if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank),
> &low, &high))
> - return addr;
> + goto out;
>
> if (!(low & MCI_CONFIG_MCAX))
> - return addr;
> + goto out;
>
> if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank),
> &low, &high) &&
> (low & MASK_BLKPTR_LO))
> - return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
> + addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
>
> +out:
> + smca_bank_addrs[bank][block] = addr;
> return addr;
> }
>

Since we're caching the values during init, we can drop all the *_on_cpu() calls.
What do you think?

Thanks,
Yazen

2018-05-17 13:45:22

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized block

On Thu, May 17, 2018 at 01:04:19PM +0000, Ghannam, Yazen wrote:
> Yes, you're right. I thought using the existing data structures would work, but it
> seems I messed up the implementation.

Not only that - your idea wouldn't fly because the per-CPU stuff you
were using gets torn down when the CPU goes offline so you can't use
them on resume because they're not there yet.

> Banks 15 and 16 should have an address for block 1 also. Do you have PFEH
> enabled on your system? That would cause MISC0 to be RAZ so we won't
> get the MISC1 address. I'll try it myself also and let you know.

I check PFEH is enabled how?

> I think this good for now. We'll probably need to change it in the
> future, but maybe we can clean up all the thresholding blocks code and
> make it simpler when we do change it.

Ok.

> This hunk could go above the !block. Though maybe the macro is lighter than the
> array lookup. It'll work either way, but I'm just thinking out loud.

Yeah, it doesn't matter in that case.

> Since we're caching the values during init, we can drop all the
> *_on_cpu() calls. What do you think?

Well, if they're all the same on all CPUs, sure. That's your call.

--
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--

2018-05-17 14:06:24

by Yazen Ghannam

[permalink] [raw]
Subject: RE: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized block

> -----Original Message-----
> From: Borislav Petkov <[email protected]>
> Sent: Thursday, May 17, 2018 9:44 AM
> To: Ghannam, Yazen <[email protected]>
> Cc: Johannes Hirte <[email protected]>; linux-
> [email protected]; [email protected]; [email protected];
> [email protected]
> Subject: Re: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized
> block
>
> On Thu, May 17, 2018 at 01:04:19PM +0000, Ghannam, Yazen wrote:
...
>
> I check PFEH is enabled how?
>

If MISC0 is RAZ then you can assume PFEH is enabled. There should be a BIOS
option to disable it.

BTW, I just tried you patch with PFEH disabled and it seems to work fine.

...
> > Since we're caching the values during init, we can drop all the
> > *_on_cpu() calls. What do you think?
>
> Well, if they're all the same on all CPUs, sure. That's your call.
>

Let's drop them. We won't need them since we're caching the values during
init. And the init code is run on the target CPU.

We can just make smca_bank_addrs[][] into per_cpu when we need to support
different values on different CPUs.

Thanks,
Yazen


2018-05-17 18:31:44

by Borislav Petkov

[permalink] [raw]
Subject: [PATCH 1/2] x86/MCE/AMD: Cache SMCA MISC block addresses

From: Borislav Petkov <[email protected]>

... into a global, two-dimensional array and service subsequent reads
from that cache to avoid rdmsr_on_cpu() calls during CPU hotplug (IPIs
with IRQs disabled).

In addition, this fixes a KASAN slab-out-of-bounds read due to wrong
usage of the bank->blocks pointer.

Reported-by: Johannes Hirte <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Cc: Yazen Ghannam <[email protected]>
Fixes: 27bd59502702 ("x86/mce/AMD: Get address from already initialized block")
Link: http://lkml.kernel.org/r/20180414004230.GA2033@probook
---
arch/x86/kernel/cpu/mcheck/mce_amd.c | 29 ++++++++++++++--------------
1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f7666eef4a87..c8e038800591 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -94,6 +94,11 @@ static struct smca_bank_name smca_names[] = {
[SMCA_SMU] = { "smu", "System Management Unit" },
};

+static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
+{
+ [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
+};
+
const char *smca_get_name(enum smca_bank_types t)
{
if (t >= N_SMCA_BANK_TYPES)
@@ -443,20 +448,26 @@ static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
if (!block)
return MSR_AMD64_SMCA_MCx_MISC(bank);

+ /* Check our cache first: */
+ if (smca_bank_addrs[bank][block] != -1)
+ return smca_bank_addrs[bank][block];
+
/*
* For SMCA enabled processors, BLKPTR field of the first MISC register
* (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
*/
if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- return addr;
+ goto out;

if (!(low & MCI_CONFIG_MCAX))
- return addr;
+ goto out;

if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
(low & MASK_BLKPTR_LO))
- return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+ addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);

+out:
+ smca_bank_addrs[bank][block] = addr;
return addr;
}

@@ -468,18 +479,6 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
return addr;

- /* Get address from already initialized block. */
- if (per_cpu(threshold_banks, cpu)) {
- struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];
-
- if (bankp && bankp->blocks) {
- struct threshold_block *blockp = &bankp->blocks[block];
-
- if (blockp)
- return blockp->address;
- }
- }
-
if (mce_flags.smca)
return smca_get_block_address(cpu, bank, block);

--
2.17.0.391.g1f1cddd558b5

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--

2018-05-17 18:32:34

by Borislav Petkov

[permalink] [raw]
Subject: [PATCH 2/2] x86/MCE/AMD: Read MCx_MISC block addresses on any CPU

From: Borislav Petkov <[email protected]>

We used rdmsr_safe_on_cpu() to make sure we're reading the proper CPU's
MISC block addresses. However, that caused trouble with CPU hotplug due
to the _on_cpu() helper issuing an IPI while IRQs are disabled.

But we don't have to do that: the block addresses are the same on any
CPU so we can read them on any CPU. (What practically happens is, we
read them on the BSP and cache them, and for later reads, we service
them from the cache).

Suggested-by: Yazen Ghannam <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce_amd.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index c8e038800591..f591b01930db 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -436,8 +436,7 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}

-static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
- unsigned int block)
+static u32 smca_get_block_address(unsigned int bank, unsigned int block)
{
u32 low, high;
u32 addr = 0;
@@ -456,13 +455,13 @@ static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
* For SMCA enabled processors, BLKPTR field of the first MISC register
* (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
*/
- if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+ if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
goto out;

if (!(low & MCI_CONFIG_MCAX))
goto out;

- if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+ if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
(low & MASK_BLKPTR_LO))
addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);

@@ -471,7 +470,7 @@ static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
return addr;
}

-static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
+static u32 get_block_address(u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block)
{
u32 addr = 0, offset = 0;
@@ -480,7 +479,7 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
return addr;

if (mce_flags.smca)
- return smca_get_block_address(cpu, bank, block);
+ return smca_get_block_address(bank, block);

/* Fall back to method we used for older processors: */
switch (block) {
@@ -558,7 +557,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
smca_configure(bank, cpu);

for (block = 0; block < NR_BLOCKS; ++block) {
- address = get_block_address(cpu, address, low, high, bank, block);
+ address = get_block_address(address, low, high, bank, block);
if (!address)
break;

@@ -1175,7 +1174,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
if (err)
goto out_free;
recurse:
- address = get_block_address(cpu, address, low, high, bank, ++block);
+ address = get_block_address(address, low, high, bank, ++block);
if (!address)
return 0;

--
2.17.0.391.g1f1cddd558b5

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--

2018-05-17 19:30:29

by Johannes Hirte

[permalink] [raw]
Subject: Re: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized block

On 2018 Mai 17, Borislav Petkov wrote:
> On Thu, May 17, 2018 at 08:49:31AM +0200, Johannes Hirte wrote:
> > Maybe I'm missing something, but those RDMSR IPSs don't happen on
> > pre-SMCA systems, right? So the caching should be avoided here, cause
> > the whole lookup looks more expensive to me than the simple switch-block
> > in get_block_address.
>
> Yeah, and we should simply cache all those MSR values as I suggested then.
>
> The patch at the end should fix your issue.
>

Works as expected on my Carrizo.

--
Regards,
Johannes


2018-05-17 19:35:14

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 3/3] x86/MCE/AMD: Get address from already initialized block

On Thu, May 17, 2018 at 09:29:23PM +0200, Johannes Hirte wrote:
> Works as expected on my Carrizo.

Thanks, I'll add your Tested-by.

--
Regards/Gruss,
Boris.

SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--

Subject: [tip:ras/urgent] x86/MCE/AMD: Cache SMCA MISC block addresses

Commit-ID: 78ce241099bb363b19dbd0245442e66c8de8f567
Gitweb: https://git.kernel.org/tip/78ce241099bb363b19dbd0245442e66c8de8f567
Author: Borislav Petkov <[email protected]>
AuthorDate: Thu, 17 May 2018 10:46:26 +0200
Committer: Thomas Gleixner <[email protected]>
CommitDate: Sat, 19 May 2018 15:19:30 +0200

x86/MCE/AMD: Cache SMCA MISC block addresses

... into a global, two-dimensional array and service subsequent reads from
that cache to avoid rdmsr_on_cpu() calls during CPU hotplug (IPIs with IRQs
disabled).

In addition, this fixes a KASAN slab-out-of-bounds read due to wrong usage
of the bank->blocks pointer.

Fixes: 27bd59502702 ("x86/mce/AMD: Get address from already initialized block")
Reported-by: Johannes Hirte <[email protected]>
Tested-by: Johannes Hirte <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: Yazen Ghannam <[email protected]>
Link: http://lkml.kernel.org/r/20180414004230.GA2033@probook
---
arch/x86/kernel/cpu/mcheck/mce_amd.c | 29 ++++++++++++++---------------
1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f7666eef4a87..c8e038800591 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -94,6 +94,11 @@ static struct smca_bank_name smca_names[] = {
[SMCA_SMU] = { "smu", "System Management Unit" },
};

+static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
+{
+ [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 }
+};
+
const char *smca_get_name(enum smca_bank_types t)
{
if (t >= N_SMCA_BANK_TYPES)
@@ -443,20 +448,26 @@ static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
if (!block)
return MSR_AMD64_SMCA_MCx_MISC(bank);

+ /* Check our cache first: */
+ if (smca_bank_addrs[bank][block] != -1)
+ return smca_bank_addrs[bank][block];
+
/*
* For SMCA enabled processors, BLKPTR field of the first MISC register
* (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
*/
if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- return addr;
+ goto out;

if (!(low & MCI_CONFIG_MCAX))
- return addr;
+ goto out;

if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
(low & MASK_BLKPTR_LO))
- return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+ addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);

+out:
+ smca_bank_addrs[bank][block] = addr;
return addr;
}

@@ -468,18 +479,6 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
return addr;

- /* Get address from already initialized block. */
- if (per_cpu(threshold_banks, cpu)) {
- struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];
-
- if (bankp && bankp->blocks) {
- struct threshold_block *blockp = &bankp->blocks[block];
-
- if (blockp)
- return blockp->address;
- }
- }
-
if (mce_flags.smca)
return smca_get_block_address(cpu, bank, block);