2013-03-14 22:11:55

by Boris Ostrovsky

[permalink] [raw]
Subject: [PATCH v2 0/2] AMD MCE fixes


Boris,

Here is the updated patch for determining number of regiter banks on
AMD plus a patch removing shared_bank array, as you suggested.

Offline/online testing didn't show any issues.



Boris Ostrovsky (2):
x86/mce: Replace shared_bank array with is_shared_bank() helper
x86/mce: Use MCG_CAP MSR to find out number of banks on AMD

arch/x86/kernel/cpu/mcheck/mce_amd.c | 38 ++++++++++++++++++++++--------------
1 file changed, 23 insertions(+), 15 deletions(-)

--
1.8.1.2


2013-03-14 22:11:44

by Boris Ostrovsky

[permalink] [raw]
Subject: [PATCH v2 1/2] x86/mce: Replace shared_bank array with is_shared_bank() helper

Use helper function instead of an array to report whether register
bank is shared. Currently only bank 4 (northbridge) is shared.

Signed-off-by: Boris Ostrovsky <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce_amd.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 1ac581f..654a155 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -58,11 +58,6 @@ static const char * const th_names[] = {
};

static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
-
-static unsigned char shared_bank[NR_BANKS] = {
- 0, 0, 0, 0, 1
-};
-
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */

static void amd_threshold_interrupt(void);
@@ -79,6 +74,12 @@ struct thresh_restart {
u16 old_limit;
};

+static inline bool is_shared_bank(int bank)
+{
+ /* Bank 4 is for northbridge reporting is thus is shared */
+ return (bank == 4);
+}
+
static const char * const bank4_names(struct threshold_block *b)
{
switch (b->address) {
@@ -575,7 +576,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
const char *name = th_names[bank];
int err = 0;

- if (shared_bank[bank]) {
+ if (is_shared_bank(bank)) {
nb = node_to_amd_nb(amd_get_nb_id(cpu));

/* threshold descriptor already initialized on this node? */
@@ -609,7 +610,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)

per_cpu(threshold_banks, cpu)[bank] = b;

- if (shared_bank[bank]) {
+ if (is_shared_bank(bank)) {
atomic_set(&b->cpus, 1);

/* nb is already initialized, see above */
@@ -691,7 +692,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
if (!b->blocks)
goto free_out;

- if (shared_bank[bank]) {
+ if (is_shared_bank(bank)) {
if (!atomic_dec_and_test(&b->cpus)) {
__threshold_remove_blocks(b);
per_cpu(threshold_banks, cpu)[bank] = NULL;
--
1.8.1.2

2013-03-14 22:11:43

by Boris Ostrovsky

[permalink] [raw]
Subject: [PATCH v2 2/2] x86/mce: Use MCG_CAP MSR to find out number of banks on AMD

Currently number of error reporting register banks is hardcoded to
6 on AMD processors. This may break in virtualized scenarios when
a hypervisor prefers to report fewer banks than what the physical
HW provides.

Since number of supported banks is reported in MSR_IA32_MCG_CAP[7:0]
that's what we should use.

Signed-off-by: Boris Ostrovsky <[email protected]>
---
arch/x86/kernel/cpu/mcheck/mce_amd.c | 21 ++++++++++++++-------
1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 654a155..13a22e2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -33,7 +33,6 @@
#include <asm/mce.h>
#include <asm/msr.h>

-#define NR_BANKS 6
#define NR_BLOCKS 9
#define THRESHOLD_MAX 0xFFF
#define INT_TYPE_APIC 0x00020000
@@ -57,7 +56,7 @@ static const char * const th_names[] = {
"execution_unit",
};

-static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
+static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */

static void amd_threshold_interrupt(void);
@@ -215,7 +214,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
unsigned int bank, block;
int offset = -1;

- for (bank = 0; bank < NR_BANKS; ++bank) {
+ for (bank = 0; bank < mca_cfg.banks; ++bank) {
for (block = 0; block < NR_BLOCKS; ++block) {
if (block == 0)
address = MSR_IA32_MC0_MISC + bank * 4;
@@ -277,7 +276,7 @@ static void amd_threshold_interrupt(void)
mce_setup(&m);

/* assume first bank caused it */
- for (bank = 0; bank < NR_BANKS; ++bank) {
+ for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (!(per_cpu(bank_map, m.cpu) & (1 << bank)))
continue;
for (block = 0; block < NR_BLOCKS; ++block) {
@@ -468,7 +467,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
u32 low, high;
int err;

- if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
+ if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
return 0;

if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
@@ -636,9 +635,16 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
static __cpuinit int threshold_create_device(unsigned int cpu)
{
unsigned int bank;
+ struct threshold_bank **bp;
int err = 0;

- for (bank = 0; bank < NR_BANKS; ++bank) {
+ bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks,
+ GFP_KERNEL);
+ if (bp == NULL)
+ return -ENOMEM;
+ per_cpu(threshold_banks, cpu) = bp;
+
+ for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
err = threshold_create_bank(cpu, bank);
@@ -720,11 +726,12 @@ static void threshold_remove_device(unsigned int cpu)
{
unsigned int bank;

- for (bank = 0; bank < NR_BANKS; ++bank) {
+ for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue;
threshold_remove_bank(cpu, bank);
}
+ kfree(per_cpu(threshold_banks, cpu));
}

/* get notified when a cpu comes on/off */
--
1.8.1.2

2013-03-14 22:20:10

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] x86/mce: Replace shared_bank array with is_shared_bank() helper

On Thu, Mar 14, 2013 at 05:10:40PM -0400, Boris Ostrovsky wrote:
> Use helper function instead of an array to report whether register
> bank is shared. Currently only bank 4 (northbridge) is shared.
>
> Signed-off-by: Boris Ostrovsky <[email protected]>
> ---
> arch/x86/kernel/cpu/mcheck/mce_amd.c | 17 +++++++++--------
> 1 file changed, 9 insertions(+), 8 deletions(-)

<formletter>

This is not the correct way to submit patches for inclusion in the
stable kernel tree. Please read Documentation/stable_kernel_rules.txt
for how to do this properly.

</formletter>

2013-03-14 22:47:33

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] x86/mce: Replace shared_bank array with is_shared_bank() helper

On Thu, Mar 14, 2013 at 03:20:05PM -0700, Greg KH wrote:
> On Thu, Mar 14, 2013 at 05:10:40PM -0400, Boris Ostrovsky wrote:
> > Use helper function instead of an array to report whether register
> > bank is shared. Currently only bank 4 (northbridge) is shared.
> >
> > Signed-off-by: Boris Ostrovsky <[email protected]>
> > ---
> > arch/x86/kernel/cpu/mcheck/mce_amd.c | 17 +++++++++--------
> > 1 file changed, 9 insertions(+), 8 deletions(-)
>
> <formletter>
>
> This is not the correct way to submit patches for inclusion in the
> stable kernel tree. Please read Documentation/stable_kernel_rules.txt
> for how to do this properly.

Yeah,

stable@ was obviously wrongly CCed. This patch is clearly not stable
material.

Thanks.

--
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

2013-03-15 19:26:20

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v2 0/2] AMD MCE fixes

On Thu, Mar 14, 2013 at 05:10:39PM -0400, Boris Ostrovsky wrote:
>
> Boris,
>
> Here is the updated patch for determining number of regiter banks on
> AMD plus a patch removing shared_bank array, as you suggested.
>
> Offline/online testing didn't show any issues.
>
>
>
> Boris Ostrovsky (2):
> x86/mce: Replace shared_bank array with is_shared_bank() helper
> x86/mce: Use MCG_CAP MSR to find out number of banks on AMD

Both applied,

thanks.

--
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--