2019-05-23 15:07:49

by Yazen Ghannam

[permalink] [raw]
Subject: [PATCH] x86/MCE: Statically allocate mce_banks_array

From: Yazen Ghannam <[email protected]>

The MCE control data is stored in an array of struct mce_banks. This
array has historically been shared by all CPUs and it was allocated
dynamically during the first CPU's init sequence.

However, starting with

5b0883f5c7be ("x86/MCE: Make mce_banks a per-CPU array")

the array was changed to become a per-CPU array. Each CPU would
dynamically allocate the array during its own init sequence.

This seems benign expect when "Lock Debugging" config options are
enabled in which case the following message appears.

BUG: sleeping function called from invalid context at mm/slab.h:418

The message appears during the secondary CPUs' init sequences. This seems
to be because these CPUs are in system_state=SYSTEM_SCHEDULING compared
to the primary CPU which is in system_state=SYSTEM_BOOTING.

Allocate the mce_banks_array statically so that this issue can be
avoided.

Also, remove the now unnecessary return values from
__mcheck_cpu_mce_banks_init() and __mcheck_cpu_cap_init().

Fixes: 5b0883f5c7be ("x86/MCE: Make mce_banks a per-CPU array")
Reported-by: kernel test robot <[email protected]>
Suggested-by: Borislav Petkov <[email protected]>
Signed-off-by: Yazen Ghannam <[email protected]>
---
arch/x86/kernel/cpu/mce/core.c | 39 ++++++++++++----------------------
1 file changed, 14 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 25e501a853cd..b8eebebbc2f8 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -70,7 +70,7 @@ struct mce_bank {
u64 ctl; /* subevents to enable */
bool init; /* initialise bank? */
};
-static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank *, mce_banks_array);
+static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank [MAX_NR_BANKS], mce_banks_array);

#define ATTR_LEN 16
/* One object for each MCE bank, shared by all CPUs */
@@ -690,7 +690,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
*/
bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
- struct mce_bank *mce_banks = this_cpu_read(mce_banks_array);
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
bool error_seen = false;
struct mce m;
int i;
@@ -1138,7 +1138,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
unsigned long *toclear, unsigned long *valid_banks,
int no_way_out, int *worst)
{
- struct mce_bank *mce_banks = this_cpu_read(mce_banks_array);
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
struct mca_config *cfg = &mca_cfg;
int severity, i;

@@ -1480,16 +1480,12 @@ int mce_notify_irq(void)
}
EXPORT_SYMBOL_GPL(mce_notify_irq);

-static int __mcheck_cpu_mce_banks_init(void)
+static void __mcheck_cpu_mce_banks_init(void)
{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
u8 n_banks = this_cpu_read(mce_num_banks);
- struct mce_bank *mce_banks;
int i;

- mce_banks = kcalloc(n_banks, sizeof(struct mce_bank), GFP_KERNEL);
- if (!mce_banks)
- return -ENOMEM;
-
for (i = 0; i < n_banks; i++) {
struct mce_bank *b = &mce_banks[i];

@@ -1501,15 +1497,12 @@ static int __mcheck_cpu_mce_banks_init(void)
b->ctl = -1ULL;
b->init = 1;
}
-
- per_cpu(mce_banks_array, smp_processor_id()) = mce_banks;
- return 0;
}

/*
* Initialize Machine Checks for a CPU.
*/
-static int __mcheck_cpu_cap_init(void)
+static void __mcheck_cpu_cap_init(void)
{
u64 cap;
u8 b;
@@ -1526,11 +1519,7 @@ static int __mcheck_cpu_cap_init(void)

this_cpu_write(mce_num_banks, b);

- if (!this_cpu_read(mce_banks_array)) {
- int err = __mcheck_cpu_mce_banks_init();
- if (err)
- return err;
- }
+ __mcheck_cpu_mce_banks_init();

/* Use accurate RIP reporting if available. */
if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
@@ -1538,8 +1527,6 @@ static int __mcheck_cpu_cap_init(void)

if (cap & MCG_SER_P)
mca_cfg.ser = 1;
-
- return 0;
}

static void __mcheck_cpu_init_generic(void)
@@ -1566,7 +1553,7 @@ static void __mcheck_cpu_init_generic(void)

static void __mcheck_cpu_init_clear_banks(void)
{
- struct mce_bank *mce_banks = this_cpu_read(mce_banks_array);
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
u64 msrval;
int i;

@@ -1617,7 +1604,7 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
/* Add per CPU specific workarounds here */
static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
- struct mce_bank *mce_banks = this_cpu_read(mce_banks_array);
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
struct mca_config *cfg = &mca_cfg;

if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
@@ -1854,7 +1841,9 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
if (!mce_available(c))
return;

- if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
+ __mcheck_cpu_cap_init();
+
+ if (__mcheck_cpu_apply_quirks(c) < 0) {
mca_cfg.disabled = 1;
return;
}
@@ -1988,7 +1977,7 @@ int __init mcheck_init(void)
*/
static void mce_disable_error_reporting(void)
{
- struct mce_bank *mce_banks = this_cpu_read(mce_banks_array);
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
int i;

for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
@@ -2332,7 +2321,7 @@ static void mce_disable_cpu(void)

static void mce_reenable_cpu(void)
{
- struct mce_bank *mce_banks = this_cpu_read(mce_banks_array);
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
int i;

if (!mce_available(raw_cpu_ptr(&cpu_info)))
--
2.17.1


2019-05-23 18:29:53

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH] x86/MCE: Statically allocate mce_banks_array

On Thu, May 23, 2019 at 03:03:55PM +0000, Ghannam, Yazen wrote:
> From: Yazen Ghannam <[email protected]>
>
> The MCE control data is stored in an array of struct mce_banks. This
> array has historically been shared by all CPUs and it was allocated
> dynamically during the first CPU's init sequence.
>
> However, starting with
>
> 5b0883f5c7be ("x86/MCE: Make mce_banks a per-CPU array")
>
> the array was changed to become a per-CPU array. Each CPU would
> dynamically allocate the array during its own init sequence.
>
> This seems benign expect when "Lock Debugging" config options are
> enabled in which case the following message appears.
>
> BUG: sleeping function called from invalid context at mm/slab.h:418
>
> The message appears during the secondary CPUs' init sequences. This seems
> to be because these CPUs are in system_state=SYSTEM_SCHEDULING compared
> to the primary CPU which is in system_state=SYSTEM_BOOTING.
>
> Allocate the mce_banks_array statically so that this issue can be
> avoided.
>
> Also, remove the now unnecessary return values from
> __mcheck_cpu_mce_banks_init() and __mcheck_cpu_cap_init().
>
> Fixes: 5b0883f5c7be ("x86/MCE: Make mce_banks a per-CPU array")
> Reported-by: kernel test robot <[email protected]>
> Suggested-by: Borislav Petkov <[email protected]>
> Signed-off-by: Yazen Ghannam <[email protected]>
> ---
> arch/x86/kernel/cpu/mce/core.c | 39 ++++++++++++----------------------
> 1 file changed, 14 insertions(+), 25 deletions(-)

Can you rediff this patch against tip/master please?

It fixes a patch which is already in -rc1 so it needs to go first, into
urgent, before your patchset.

Thx.

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply. Srsly.

2019-05-23 19:40:26

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH] x86/MCE: Statically allocate mce_banks_array

On Thu, May 23, 2019 at 07:23:08PM +0000, Ghannam, Yazen wrote:
> Sure, but which patch are you referring to?
>
> This seems to fix a patch in the set in bp/rc0+3-ras.

Sorry, I got confused. So bp/rc0+3-ras is not cast in stone - feel free
to take it and merge this change into 5b0883f5c7be ("x86/MCE: Make
mce_banks a per-CPU array") and then fix up any potential conflicts
coming from the following patches. And then send the whole pile as a new
revision.

Thx.

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply. Srsly.

2019-05-23 19:40:36

by Yazen Ghannam

[permalink] [raw]
Subject: RE: [PATCH] x86/MCE: Statically allocate mce_banks_array

> -----Original Message-----
> From: Borislav Petkov <[email protected]>
> Sent: Thursday, May 23, 2019 3:28 PM
> To: Ghannam, Yazen <[email protected]>
> Cc: [email protected]; [email protected]; [email protected]; [email protected]
> Subject: Re: [PATCH] x86/MCE: Statically allocate mce_banks_array
>
>
> On Thu, May 23, 2019 at 03:03:55PM +0000, Ghannam, Yazen wrote:
> > From: Yazen Ghannam <[email protected]>
> >
> > The MCE control data is stored in an array of struct mce_banks. This
> > array has historically been shared by all CPUs and it was allocated
> > dynamically during the first CPU's init sequence.
> >
> > However, starting with
> >
> > 5b0883f5c7be ("x86/MCE: Make mce_banks a per-CPU array")
> >
> > the array was changed to become a per-CPU array. Each CPU would
> > dynamically allocate the array during its own init sequence.
> >
> > This seems benign expect when "Lock Debugging" config options are
> > enabled in which case the following message appears.
> >
> > BUG: sleeping function called from invalid context at mm/slab.h:418
> >
> > The message appears during the secondary CPUs' init sequences. This seems
> > to be because these CPUs are in system_state=SYSTEM_SCHEDULING compared
> > to the primary CPU which is in system_state=SYSTEM_BOOTING.
> >
> > Allocate the mce_banks_array statically so that this issue can be
> > avoided.
> >
> > Also, remove the now unnecessary return values from
> > __mcheck_cpu_mce_banks_init() and __mcheck_cpu_cap_init().
> >
> > Fixes: 5b0883f5c7be ("x86/MCE: Make mce_banks a per-CPU array")
> > Reported-by: kernel test robot <[email protected]>
> > Suggested-by: Borislav Petkov <[email protected]>
> > Signed-off-by: Yazen Ghannam <[email protected]>
> > ---
> > arch/x86/kernel/cpu/mce/core.c | 39 ++++++++++++----------------------
> > 1 file changed, 14 insertions(+), 25 deletions(-)
>
> Can you rediff this patch against tip/master please?
>
> It fixes a patch which is already in -rc1 so it needs to go first, into
> urgent, before your patchset.
>

Sure, but which patch are you referring to?

This seems to fix a patch in the set in bp/rc0+3-ras.

Thanks,
Yazen