2022-02-15 00:27:48

by Smita Koralahalli

[permalink] [raw]
Subject: [PATCH v4 1/2] x86/mce: Check for writes ignored in MCA_STATUS register

According to Section 2.1.16.3 under HWCR[McStatusWrEn] in "PPR for AMD
Family 19h, Model 01h, Revision B1 Processors - 55898 Rev 0.35 - Feb 5,
2021", the status register may sometimes enforce write ignored behavior
independent of the value of HWCR[McStatusWrEn] depending on the platform
settings.

Hence, evaluate for writes ignored for MCA_STATUS before doing error
injection. If true, return the appropriate error code to userspace.

Make mca_msr_reg exportable in order to be accessible from mce-inject
module.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537
Signed-off-by: Smita Koralahalli <[email protected]>
Reviewed-by: Yazen Ghannam <[email protected]>
---
Link:
https://lkml.kernel.org/r/[email protected]

v2:
msr_ops -> mca_msr_reg().
simulation -> injection.
pr_info() -> pr_err().
Aligned on ",".
v3:
Removed "x86/mce: Use mca_msr_reg() in prepare_msrs()" patch.
and made changes on the existing MCx_{STATUS, ADDR, MISC} macros.
v4:
Simplified the code by just checking for writes ignored behavior in
MCA_STATUS register.
Introduced prepare_mca_status() and performed writes ignored checks
inside the function.
Rephrased error message.
---
arch/x86/kernel/cpu/mce/core.c | 1 +
arch/x86/kernel/cpu/mce/inject.c | 37 +++++++++++++++++++++++++++++---
2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 4f1e825033ce..4ddad8082989 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -188,6 +188,7 @@ u32 mca_msr_reg(int bank, enum mca_msr reg)

return 0;
}
+EXPORT_SYMBOL_GPL(mca_msr_reg);

static void __print_mce(struct mce *m)
{
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 5fbd7ffb3233..43ba63b7dc73 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -470,11 +470,36 @@ static void toggle_nb_mca_mst_cpu(u16 nid)
__func__, PCI_FUNC(F3->devfn), NBCFG);
}

+struct mce_err_handler {
+ struct mce *mce;
+ int err;
+};
+
+static struct mce_err_handler mce_err;
+
+static bool prepare_mca_status(struct mce *m)
+{
+ u32 status_reg = mca_msr_reg(m->bank, MCA_STATUS);
+ u64 status_val = m->status;
+
+ wrmsrl(status_reg, status_val);
+ rdmsrl(status_reg, status_val);
+
+ return status_val;
+}
+
static void prepare_msrs(void *info)
{
- struct mce m = *(struct mce *)info;
+ struct mce_err_handler *i_mce_err = ((struct mce_err_handler *)info);
+ struct mce m = *i_mce_err->mce;
u8 b = m.bank;

+ if (!prepare_mca_status(&m)) {
+ pr_err("Platform does not allow error injection, try using APEI EINJ instead.\n");
+ i_mce_err->err = -EINVAL;
+ return;
+ }
+
wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);

if (boot_cpu_has(X86_FEATURE_SMCA)) {
@@ -501,6 +526,9 @@ static void do_inject(void)
unsigned int cpu = i_mce.extcpu;
u8 b = i_mce.bank;

+ mce_err.mce = &i_mce;
+ mce_err.err = 0;
+
i_mce.tsc = rdtsc_ordered();

i_mce.status |= MCI_STATUS_VAL;
@@ -552,10 +580,13 @@ static void do_inject(void)

i_mce.mcgstatus = mcg_status;
i_mce.inject_flags = inj_type;
- smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
+ smp_call_function_single(cpu, prepare_msrs, &mce_err, 0);

toggle_hw_mce_inject(cpu, false);

+ if (mce_err.err)
+ goto err;
+
switch (inj_type) {
case DFR_INT_INJ:
smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
@@ -624,7 +655,7 @@ static int inj_bank_set(void *data, u64 val)
/* Reset injection struct */
setup_inj_struct(&i_mce);

- return 0;
+ return mce_err.err;
}

MCE_INJECT_GET(bank);
--
2.17.1


2022-04-06 18:09:02

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] x86/mce: Check for writes ignored in MCA_STATUS register

On Mon, Feb 14, 2022 at 05:36:39PM -0600, Smita Koralahalli wrote:
> diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
> index 5fbd7ffb3233..43ba63b7dc73 100644
> --- a/arch/x86/kernel/cpu/mce/inject.c
> +++ b/arch/x86/kernel/cpu/mce/inject.c
> @@ -470,11 +470,36 @@ static void toggle_nb_mca_mst_cpu(u16 nid)
> __func__, PCI_FUNC(F3->devfn), NBCFG);
> }
>
> +struct mce_err_handler {
> + struct mce *mce;
> + int err;
> +};

Well, we already have a struct mce i_mce which serves as a sort-of
global container for injection data which gets dumped into the hardware
upon injection time.

Now you're adding another struct which contains additional info and are
adding a pointer to that i_mce.

But this is not a clean design - when you do stuff like that you need to
unify all those global-info-containing structs and make the code dealing
with them straight-forward . I.e., if you don't look at the big picture
of a design, it soon grows into an unwieldy mess which some poor sod
would need to clean up afterwards and that poor sod is in most cases the
maintainer.

So I went and did that ontop of your patch, this is one possible way to
do it but it.

Here it is, a combined diff ontop of tip:ras/core. Please split it into
patches: first patch converts to the new descriptor and then a second
one adds the MCA_STATUS checking bits.

There are a couple of other changes in there, if they're not clear, feel
free to ask.

Thx.

---
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 43ba63b7dc73..0fd1eea2f754 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -33,10 +33,14 @@

#include "internal.h"

-/*
- * Collect all the MCi_XXX settings
- */
-static struct mce i_mce;
+static bool hw_injection_possible = true;
+
+/* Collect all the MCi_XXX settings */
+static struct inject_desc {
+ struct mce m;
+ int err;
+} inj_desc;
+
static struct dentry *dfs_inj;

#define MAX_FLAG_OPT_SIZE 4
@@ -110,9 +114,11 @@ static int inj_ipid_set(void *data, u64 val)

DEFINE_SIMPLE_ATTRIBUTE(ipid_fops, inj_ipid_get, inj_ipid_set, "%llx\n");

-static void setup_inj_struct(struct mce *m)
+static void setup_inj_struct(void)
{
- memset(m, 0, sizeof(struct mce));
+ struct mce *m = &inj_desc.m;
+
+ memset(&inj_desc, 0, sizeof(struct inject_desc));

m->cpuvendor = boot_cpu_data.x86_vendor;
m->time = ktime_get_real_seconds();
@@ -470,84 +476,78 @@ static void toggle_nb_mca_mst_cpu(u16 nid)
__func__, PCI_FUNC(F3->devfn), NBCFG);
}

-struct mce_err_handler {
- struct mce *mce;
- int err;
-};
-
-static struct mce_err_handler mce_err;
-
-static bool prepare_mca_status(struct mce *m)
+static bool prepare_mca_status(void)
{
- u32 status_reg = mca_msr_reg(m->bank, MCA_STATUS);
- u64 status_val = m->status;
+ u32 status_reg = mca_msr_reg(inj_desc.m.bank, MCA_STATUS);
+ u64 status_val = inj_desc.m.status;

wrmsrl(status_reg, status_val);
rdmsrl(status_reg, status_val);

- return status_val;
+ return status_val == inj_desc.m.status;
}

-static void prepare_msrs(void *info)
+static void prepare_msrs(void *unused)
{
- struct mce_err_handler *i_mce_err = ((struct mce_err_handler *)info);
- struct mce m = *i_mce_err->mce;
- u8 b = m.bank;
+ struct mce *m = &inj_desc.m;
+ u8 b = inj_desc.m.bank;

- if (!prepare_mca_status(&m)) {
+ if (!prepare_mca_status()) {
pr_err("Platform does not allow error injection, try using APEI EINJ instead.\n");
- i_mce_err->err = -EINVAL;
+ inj_desc.err = -EINVAL;
+ hw_injection_possible = false;
return;
}

- wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
+ wrmsrl(MSR_IA32_MCG_STATUS, m->mcgstatus);

if (boot_cpu_has(X86_FEATURE_SMCA)) {
- if (m.inject_flags == DFR_INT_INJ) {
- wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
- wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
+ if (m->inject_flags == DFR_INT_INJ) {
+ wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m->status);
+ wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m->addr);
} else {
- wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
- wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
+ wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m->status);
+ wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m->addr);
}

- wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
- wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
+ wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m->misc);
+ wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m->synd);
} else {
- wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
- wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
- wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
+ wrmsrl(MSR_IA32_MCx_STATUS(b), m->status);
+ wrmsrl(MSR_IA32_MCx_ADDR(b), m->addr);
+ wrmsrl(MSR_IA32_MCx_MISC(b), m->misc);
}
}

-static void do_inject(void)
+static int do_inject(void)
{
+ struct mce *m = &inj_desc.m;
+ unsigned int cpu = m->extcpu;
u64 mcg_status = 0;
- unsigned int cpu = i_mce.extcpu;
- u8 b = i_mce.bank;
+ u8 b = m->bank;

- mce_err.mce = &i_mce;
- mce_err.err = 0;
+ m->tsc = rdtsc_ordered();

- i_mce.tsc = rdtsc_ordered();
+ m->status |= MCI_STATUS_VAL;

- i_mce.status |= MCI_STATUS_VAL;
+ if (m->misc)
+ m->status |= MCI_STATUS_MISCV;

- if (i_mce.misc)
- i_mce.status |= MCI_STATUS_MISCV;
-
- if (i_mce.synd)
- i_mce.status |= MCI_STATUS_SYNDV;
+ if (m->synd)
+ m->status |= MCI_STATUS_SYNDV;

if (inj_type == SW_INJ) {
- mce_log(&i_mce);
- return;
+ mce_log(m);
+ return 0;
}

+ if (!hw_injection_possible)
+ return -EINVAL;
+
/* prep MCE global settings for the injection */
mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;

- if (!(i_mce.status & MCI_STATUS_PCC))
+ if (!(m->status & MCI_STATUS_PCC))
mcg_status |= MCG_STATUS_RIPV;

/*
@@ -556,8 +556,8 @@ static void do_inject(void)
* - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
*/
if (inj_type == DFR_INT_INJ) {
- i_mce.status |= MCI_STATUS_DEFERRED;
- i_mce.status &= ~MCI_STATUS_UC;
+ m->status |= MCI_STATUS_DEFERRED;
+ m->status &= ~MCI_STATUS_UC;
}

/*
@@ -578,13 +578,13 @@ static void do_inject(void)

toggle_hw_mce_inject(cpu, true);

- i_mce.mcgstatus = mcg_status;
- i_mce.inject_flags = inj_type;
- smp_call_function_single(cpu, prepare_msrs, &mce_err, 0);
+ m->mcgstatus = mcg_status;
+ m->inject_flags = inj_type;
+ smp_call_function_single(cpu, prepare_msrs, NULL, 0);

toggle_hw_mce_inject(cpu, false);

- if (mce_err.err)
+ if (inj_desc.err)
goto err;

switch (inj_type) {
@@ -601,6 +601,7 @@ static void do_inject(void)
err:
cpus_read_unlock();

+ return inj_desc.err;
}

/*
@@ -611,6 +612,7 @@ static int inj_bank_set(void *data, u64 val)
{
struct mce *m = (struct mce *)data;
u8 n_banks;
+ int err;
u64 cap;

/* Get bank count on target CPU so we can handle non-uniform values. */
@@ -650,12 +652,12 @@ static int inj_bank_set(void *data, u64 val)
}

inject:
- do_inject();
+ err = do_inject();

/* Reset injection struct */
- setup_inj_struct(&i_mce);
+ setup_inj_struct();

- return mce_err.err;
+ return err;
}

MCE_INJECT_GET(bank);
@@ -745,7 +747,7 @@ static void __init debugfs_init(void)

for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
- &i_mce, dfs_fls[i].fops);
+ &inj_desc.m, dfs_fls[i].fops);
}

static int __init inject_init(void)
@@ -758,7 +760,7 @@ static int __init inject_init(void)
register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
mce_register_injector_chain(&inject_nb);

- setup_inj_struct(&i_mce);
+ setup_inj_struct();

pr_info("Machine check injector initialized\n");

diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 4ae0e603f7fa..7e03f5b7f6bd 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -211,7 +211,7 @@ noinstr u64 mce_rdmsrl(u32 msr);

static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
{
- if (mce_flags.smca) {
+ if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
switch (reg) {
case MCA_CTL: return MSR_AMD64_SMCA_MCx_CTL(bank);
case MCA_ADDR: return MSR_AMD64_SMCA_MCx_ADDR(bank);

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2022-04-14 04:43:01

by Smita Koralahalli

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] x86/mce: Check for writes ignored in MCA_STATUS register

On 4/6/2022 9:08 AM, Borislav Petkov wrote:
> On Mon, Feb 14, 2022 at 05:36:39PM -0600, Smita Koralahalli wrote:
>> diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
>> index 5fbd7ffb3233..43ba63b7dc73 100644
>> --- a/arch/x86/kernel/cpu/mce/inject.c
>> +++ b/arch/x86/kernel/cpu/mce/inject.c
>> @@ -470,11 +470,36 @@ static void toggle_nb_mca_mst_cpu(u16 nid)
>> __func__, PCI_FUNC(F3->devfn), NBCFG);
>> }
>>
>> +struct mce_err_handler {
>> + struct mce *mce;
>> + int err;
>> +};
> Well, we already have a struct mce i_mce which serves as a sort-of
> global container for injection data which gets dumped into the hardware
> upon injection time.
>
> Now you're adding another struct which contains additional info and are
> adding a pointer to that i_mce.
>
> But this is not a clean design - when you do stuff like that you need to
> unify all those global-info-containing structs and make the code dealing
> with them straight-forward . I.e., if you don't look at the big picture
> of a design, it soon grows into an unwieldy mess which some poor sod
> would need to clean up afterwards and that poor sod is in most cases the
> maintainer.
>
> So I went and did that ontop of your patch, this is one possible way to
> do it but it.
>
> Here it is, a combined diff ontop of tip:ras/core. Please split it into
> patches: first patch converts to the new descriptor and then a second
> one adds the MCA_STATUS checking bits.
>
> There are a couple of other changes in there, if they're not clear, feel
> free to ask.
Okay, will make changes as mentioned. Thanks.
I understood most of it, just have one doubt below..
>
> Thx.
>
> ---
> diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
> index 43ba63b7dc73..0fd1eea2f754 100644
> --- a/arch/x86/kernel/cpu/mce/inject.c
> +++ b/arch/x86/kernel/cpu/mce/inject.c
> @@ -33,10 +33,14 @@
>
> #include "internal.h"
>
> -/*
> - * Collect all the MCi_XXX settings
> - */
> -static struct mce i_mce;
> +static bool hw_injection_possible = true;
> +
> +/* Collect all the MCi_XXX settings */
> +static struct inject_desc {
> + struct mce m;
> + int err;
> +} inj_desc;
> +
> static struct dentry *dfs_inj;
>
> #define MAX_FLAG_OPT_SIZE 4
> @@ -110,9 +114,11 @@ static int inj_ipid_set(void *data, u64 val)
>
> DEFINE_SIMPLE_ATTRIBUTE(ipid_fops, inj_ipid_get, inj_ipid_set, "%llx\n");
>
> -static void setup_inj_struct(struct mce *m)
> +static void setup_inj_struct(void)
> {
> - memset(m, 0, sizeof(struct mce));
> + struct mce *m = &inj_desc.m;
> +
> + memset(&inj_desc, 0, sizeof(struct inject_desc));
>
> m->cpuvendor = boot_cpu_data.x86_vendor;
> m->time = ktime_get_real_seconds();
> @@ -470,84 +476,78 @@ static void toggle_nb_mca_mst_cpu(u16 nid)
> __func__, PCI_FUNC(F3->devfn), NBCFG);
> }
>
> -struct mce_err_handler {
> - struct mce *mce;
> - int err;
> -};
> -
> -static struct mce_err_handler mce_err;
> -
> -static bool prepare_mca_status(struct mce *m)
> +static bool prepare_mca_status(void)
> {
> - u32 status_reg = mca_msr_reg(m->bank, MCA_STATUS);
> - u64 status_val = m->status;
> + u32 status_reg = mca_msr_reg(inj_desc.m.bank, MCA_STATUS);
> + u64 status_val = inj_desc.m.status;
>
> wrmsrl(status_reg, status_val);
> rdmsrl(status_reg, status_val);
>
> - return status_val;
> + return status_val == inj_desc.m.status;
> }
>
> -static void prepare_msrs(void *info)
> +static void prepare_msrs(void *unused)
> {
> - struct mce_err_handler *i_mce_err = ((struct mce_err_handler *)info);
> - struct mce m = *i_mce_err->mce;
> - u8 b = m.bank;
> + struct mce *m = &inj_desc.m;
> + u8 b = inj_desc.m.bank;
>
> - if (!prepare_mca_status(&m)) {
> + if (!prepare_mca_status()) {
> pr_err("Platform does not allow error injection, try using APEI EINJ instead.\n");
> - i_mce_err->err = -EINVAL;
> + inj_desc.err = -EINVAL;
> + hw_injection_possible = false;
> return;
> }
>
> - wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
> + wrmsrl(MSR_IA32_MCG_STATUS, m->mcgstatus);
>
> if (boot_cpu_has(X86_FEATURE_SMCA)) {
> - if (m.inject_flags == DFR_INT_INJ) {
> - wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
> - wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
> + if (m->inject_flags == DFR_INT_INJ) {
> + wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m->status);
> + wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m->addr);
> } else {
> - wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
> - wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
> + wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m->status);
> + wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m->addr);
> }
>
> - wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
> - wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
> + wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m->misc);
> + wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m->synd);
> } else {
> - wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
> - wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
> - wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
> + wrmsrl(MSR_IA32_MCx_STATUS(b), m->status);
> + wrmsrl(MSR_IA32_MCx_ADDR(b), m->addr);
> + wrmsrl(MSR_IA32_MCx_MISC(b), m->misc);
> }
> }
>
> -static void do_inject(void)
> +static int do_inject(void)
> {
> + struct mce *m = &inj_desc.m;
> + unsigned int cpu = m->extcpu;
> u64 mcg_status = 0;
> - unsigned int cpu = i_mce.extcpu;
> - u8 b = i_mce.bank;
> + u8 b = m->bank;
>
> - mce_err.mce = &i_mce;
> - mce_err.err = 0;
> + m->tsc = rdtsc_ordered();
>
> - i_mce.tsc = rdtsc_ordered();
> + m->status |= MCI_STATUS_VAL;
>
> - i_mce.status |= MCI_STATUS_VAL;
> + if (m->misc)
> + m->status |= MCI_STATUS_MISCV;
>
> - if (i_mce.misc)
> - i_mce.status |= MCI_STATUS_MISCV;
> -
> - if (i_mce.synd)
> - i_mce.status |= MCI_STATUS_SYNDV;
> + if (m->synd)
> + m->status |= MCI_STATUS_SYNDV;
>
> if (inj_type == SW_INJ) {
> - mce_log(&i_mce);
> - return;
> + mce_log(m);
> + return 0;
> }
>
> + if (!hw_injection_possible)
> + return -EINVAL;
> +
> /* prep MCE global settings for the injection */
> mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
>
> - if (!(i_mce.status & MCI_STATUS_PCC))
> + if (!(m->status & MCI_STATUS_PCC))
> mcg_status |= MCG_STATUS_RIPV;
>
> /*
> @@ -556,8 +556,8 @@ static void do_inject(void)
> * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
> */
> if (inj_type == DFR_INT_INJ) {
> - i_mce.status |= MCI_STATUS_DEFERRED;
> - i_mce.status &= ~MCI_STATUS_UC;
> + m->status |= MCI_STATUS_DEFERRED;
> + m->status &= ~MCI_STATUS_UC;
> }
>
> /*
> @@ -578,13 +578,13 @@ static void do_inject(void)
>
> toggle_hw_mce_inject(cpu, true);
>
> - i_mce.mcgstatus = mcg_status;
> - i_mce.inject_flags = inj_type;
> - smp_call_function_single(cpu, prepare_msrs, &mce_err, 0);
> + m->mcgstatus = mcg_status;
> + m->inject_flags = inj_type;
> + smp_call_function_single(cpu, prepare_msrs, NULL, 0);
>
> toggle_hw_mce_inject(cpu, false);
>
> - if (mce_err.err)
> + if (inj_desc.err)
> goto err;
>
> switch (inj_type) {
> @@ -601,6 +601,7 @@ static void do_inject(void)
> err:
> cpus_read_unlock();
>
> + return inj_desc.err;
> }
>
> /*
> @@ -611,6 +612,7 @@ static int inj_bank_set(void *data, u64 val)
> {
> struct mce *m = (struct mce *)data;
> u8 n_banks;
> + int err;
> u64 cap;
>
> /* Get bank count on target CPU so we can handle non-uniform values. */
> @@ -650,12 +652,12 @@ static int inj_bank_set(void *data, u64 val)
> }
>
> inject:
> - do_inject();
> + err = do_inject();
>
> /* Reset injection struct */
> - setup_inj_struct(&i_mce);
> + setup_inj_struct();
>
> - return mce_err.err;
> + return err;
> }
>
> MCE_INJECT_GET(bank);
> @@ -745,7 +747,7 @@ static void __init debugfs_init(void)
>
> for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
> debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
> - &i_mce, dfs_fls[i].fops);
> + &inj_desc.m, dfs_fls[i].fops);
> }
>
> static int __init inject_init(void)
> @@ -758,7 +760,7 @@ static int __init inject_init(void)
> register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
> mce_register_injector_chain(&inject_nb);
>
> - setup_inj_struct(&i_mce);
> + setup_inj_struct();
>
> pr_info("Machine check injector initialized\n");
>
> diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
> index 4ae0e603f7fa..7e03f5b7f6bd 100644
> --- a/arch/x86/kernel/cpu/mce/internal.h
> +++ b/arch/x86/kernel/cpu/mce/internal.h
> @@ -211,7 +211,7 @@ noinstr u64 mce_rdmsrl(u32 msr);
>
> static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
> {
> - if (mce_flags.smca) {
> + if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
Should this change be part of this patch series? Also, why mce_flags.smca
check won't work?

Thanks,
Smita
> switch (reg) {
> case MCA_CTL: return MSR_AMD64_SMCA_MCx_CTL(bank);
> case MCA_ADDR: return MSR_AMD64_SMCA_MCx_ADDR(bank);
>

2022-04-14 13:57:26

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] x86/mce: Check for writes ignored in MCA_STATUS register

On Wed, Apr 13, 2022 at 12:16:26PM -0700, Smita Koralahalli wrote:
> > diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
> > index 4ae0e603f7fa..7e03f5b7f6bd 100644
> > --- a/arch/x86/kernel/cpu/mce/internal.h
> > +++ b/arch/x86/kernel/cpu/mce/internal.h
> > @@ -211,7 +211,7 @@ noinstr u64 mce_rdmsrl(u32 msr);
> > static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
> > {
> > - if (mce_flags.smca) {
> > + if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
> Should this change be part of this patch series? Also, why mce_flags.smca
> check won't work?

Because you'd need to export it to modules and mca_flags is not
something I'd want to export to anything.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2022-04-20 01:38:36

by Smita Koralahalli

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] x86/mce: Check for writes ignored in MCA_STATUS register

On 4/6/2022 9:08 AM, Borislav Petkov wrote:
> On Mon, Feb 14, 2022 at 05:36:39PM -0600, Smita Koralahalli wrote:
>> diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
>> index 5fbd7ffb3233..43ba63b7dc73 100644
>> --- a/arch/x86/kernel/cpu/mce/inject.c
>> +++ b/arch/x86/kernel/cpu/mce/inject.c
>> @@ -470,11 +470,36 @@ static void toggle_nb_mca_mst_cpu(u16 nid)
>> __func__, PCI_FUNC(F3->devfn), NBCFG);
>> }
>>
>> +struct mce_err_handler {
>> + struct mce *mce;
>> + int err;
>> +};
> Well, we already have a struct mce i_mce which serves as a sort-of
> global container for injection data which gets dumped into the hardware
> upon injection time.
>
> Now you're adding another struct which contains additional info and are
> adding a pointer to that i_mce.
>
> But this is not a clean design - when you do stuff like that you need to
> unify all those global-info-containing structs and make the code dealing
> with them straight-forward . I.e., if you don't look at the big picture
> of a design, it soon grows into an unwieldy mess which some poor sod
> would need to clean up afterwards and that poor sod is in most cases the
> maintainer.
>
> So I went and did that ontop of your patch, this is one possible way to
> do it but it.
>
> Here it is, a combined diff ontop of tip:ras/core. Please split it into
> patches: first patch converts to the new descriptor and then a second
> one adds the MCA_STATUS checking bits.
>
> There are a couple of other changes in there, if they're not clear, feel
> free to ask.
>
> Thx.
>
> ---
> diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
> index 43ba63b7dc73..0fd1eea2f754 100644
> --- a/arch/x86/kernel/cpu/mce/inject.c
> +++ b/arch/x86/kernel/cpu/mce/inject.c
> @@ -33,10 +33,14 @@
>
> #include "internal.h"
>
> -/*
> - * Collect all the MCi_XXX settings
> - */
> -static struct mce i_mce;
> +static bool hw_injection_possible = true;
> +
> +/* Collect all the MCi_XXX settings */
> +static struct inject_desc {
> + struct mce m;
> + int err;
> +} inj_desc;
> +
> static struct dentry *dfs_inj;
>
> #define MAX_FLAG_OPT_SIZE 4
> @@ -110,9 +114,11 @@ static int inj_ipid_set(void *data, u64 val)
>
> DEFINE_SIMPLE_ATTRIBUTE(ipid_fops, inj_ipid_get, inj_ipid_set, "%llx\n");
>
> -static void setup_inj_struct(struct mce *m)
> +static void setup_inj_struct(void)
> {
> - memset(m, 0, sizeof(struct mce));
> + struct mce *m = &inj_desc.m;
> +
> + memset(&inj_desc, 0, sizeof(struct inject_desc));
>
> m->cpuvendor = boot_cpu_data.x86_vendor;
> m->time = ktime_get_real_seconds();
> @@ -470,84 +476,78 @@ static void toggle_nb_mca_mst_cpu(u16 nid)
> __func__, PCI_FUNC(F3->devfn), NBCFG);
> }
>
> -struct mce_err_handler {
> - struct mce *mce;
> - int err;
> -};
> -
> -static struct mce_err_handler mce_err;
> -
> -static bool prepare_mca_status(struct mce *m)
> +static bool prepare_mca_status(void)
> {
> - u32 status_reg = mca_msr_reg(m->bank, MCA_STATUS);
> - u64 status_val = m->status;
> + u32 status_reg = mca_msr_reg(inj_desc.m.bank, MCA_STATUS);
> + u64 status_val = inj_desc.m.status;
>
> wrmsrl(status_reg, status_val);
> rdmsrl(status_reg, status_val);
>
> - return status_val;
> + return status_val == inj_desc.m.status;
> }
>
> -static void prepare_msrs(void *info)
> +static void prepare_msrs(void *unused)
> {
> - struct mce_err_handler *i_mce_err = ((struct mce_err_handler *)info);
> - struct mce m = *i_mce_err->mce;
> - u8 b = m.bank;
> + struct mce *m = &inj_desc.m;
> + u8 b = inj_desc.m.bank;
>
> - if (!prepare_mca_status(&m)) {
> + if (!prepare_mca_status()) {
> pr_err("Platform does not allow error injection, try using APEI EINJ instead.\n");
> - i_mce_err->err = -EINVAL;
> + inj_desc.err = -EINVAL;
> + hw_injection_possible = false;
> return;
> }
>
> - wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
> + wrmsrl(MSR_IA32_MCG_STATUS, m->mcgstatus);
>
> if (boot_cpu_has(X86_FEATURE_SMCA)) {
> - if (m.inject_flags == DFR_INT_INJ) {
> - wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
> - wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
> + if (m->inject_flags == DFR_INT_INJ) {
> + wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m->status);
> + wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m->addr);
> } else {
> - wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
> - wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
> + wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m->status);
> + wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m->addr);
> }
>
> - wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
> - wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
> + wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m->misc);
> + wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m->synd);
> } else {
> - wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
> - wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
> - wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
> + wrmsrl(MSR_IA32_MCx_STATUS(b), m->status);
> + wrmsrl(MSR_IA32_MCx_ADDR(b), m->addr);
> + wrmsrl(MSR_IA32_MCx_MISC(b), m->misc);
> }
> }
>
> -static void do_inject(void)
> +static int do_inject(void)
> {
> + struct mce *m = &inj_desc.m;
> + unsigned int cpu = m->extcpu;
> u64 mcg_status = 0;
> - unsigned int cpu = i_mce.extcpu;
> - u8 b = i_mce.bank;
> + u8 b = m->bank;
>
> - mce_err.mce = &i_mce;
> - mce_err.err = 0;
> + m->tsc = rdtsc_ordered();
>
> - i_mce.tsc = rdtsc_ordered();
> + m->status |= MCI_STATUS_VAL;
>
> - i_mce.status |= MCI_STATUS_VAL;
> + if (m->misc)
> + m->status |= MCI_STATUS_MISCV;
>
> - if (i_mce.misc)
> - i_mce.status |= MCI_STATUS_MISCV;
> -
> - if (i_mce.synd)
> - i_mce.status |= MCI_STATUS_SYNDV;
> + if (m->synd)
> + m->status |= MCI_STATUS_SYNDV;
>
> if (inj_type == SW_INJ) {
> - mce_log(&i_mce);
> - return;
> + mce_log(m);
> + return 0;
> }
>
> + if (!hw_injection_possible)
> + return -EINVAL;

Why are we checking this here? This flag (hw_injection_possible)
is set to false inside prepare_msrs() called from
smp_call_function_single().
Should this check be done after the call to smp_call_function_single()?
Also, we already have inj_desc.err which returns error code to userspace
when WRIG in status registers. Why is this flag needed?

Thanks,
Smita

> +
> /* prep MCE global settings for the injection */
> mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
>
> - if (!(i_mce.status & MCI_STATUS_PCC))
> + if (!(m->status & MCI_STATUS_PCC))
> mcg_status |= MCG_STATUS_RIPV;
>
> /*
> @@ -556,8 +556,8 @@ static void do_inject(void)
> * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
> */
> if (inj_type == DFR_INT_INJ) {
> - i_mce.status |= MCI_STATUS_DEFERRED;
> - i_mce.status &= ~MCI_STATUS_UC;
> + m->status |= MCI_STATUS_DEFERRED;
> + m->status &= ~MCI_STATUS_UC;
> }
>
> /*
> @@ -578,13 +578,13 @@ static void do_inject(void)
>
> toggle_hw_mce_inject(cpu, true);
>
> - i_mce.mcgstatus = mcg_status;
> - i_mce.inject_flags = inj_type;
> - smp_call_function_single(cpu, prepare_msrs, &mce_err, 0);
> + m->mcgstatus = mcg_status;
> + m->inject_flags = inj_type;
> + smp_call_function_single(cpu, prepare_msrs, NULL, 0);
>
> toggle_hw_mce_inject(cpu, false);
>
> - if (mce_err.err)
> + if (inj_desc.err)
> goto err;
>
> switch (inj_type) {
> @@ -601,6 +601,7 @@ static void do_inject(void)
> err:
> cpus_read_unlock();
>
> + return inj_desc.err;
> }
>
> /*
> @@ -611,6 +612,7 @@ static int inj_bank_set(void *data, u64 val)
> {
> struct mce *m = (struct mce *)data;
> u8 n_banks;
> + int err;
> u64 cap;
>
> /* Get bank count on target CPU so we can handle non-uniform values. */
> @@ -650,12 +652,12 @@ static int inj_bank_set(void *data, u64 val)
> }
>
> inject:
> - do_inject();
> + err = do_inject();
>
> /* Reset injection struct */
> - setup_inj_struct(&i_mce);
> + setup_inj_struct();
>
> - return mce_err.err;
> + return err;
> }
>
> MCE_INJECT_GET(bank);
> @@ -745,7 +747,7 @@ static void __init debugfs_init(void)
>
> for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
> debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj,
> - &i_mce, dfs_fls[i].fops);
> + &inj_desc.m, dfs_fls[i].fops);
> }
>
> static int __init inject_init(void)
> @@ -758,7 +760,7 @@ static int __init inject_init(void)
> register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
> mce_register_injector_chain(&inject_nb);
>
> - setup_inj_struct(&i_mce);
> + setup_inj_struct();
>
> pr_info("Machine check injector initialized\n");
>
> diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
> index 4ae0e603f7fa..7e03f5b7f6bd 100644
> --- a/arch/x86/kernel/cpu/mce/internal.h
> +++ b/arch/x86/kernel/cpu/mce/internal.h
> @@ -211,7 +211,7 @@ noinstr u64 mce_rdmsrl(u32 msr);
>
> static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
> {
> - if (mce_flags.smca) {
> + if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
> switch (reg) {
> case MCA_CTL: return MSR_AMD64_SMCA_MCx_CTL(bank);
> case MCA_ADDR: return MSR_AMD64_SMCA_MCx_ADDR(bank);
>

2022-04-22 13:54:40

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] x86/mce: Check for writes ignored in MCA_STATUS register

On Mon, Apr 18, 2022 at 08:24:35PM -0700, Smita Koralahalli wrote:
> Why are we checking this here? This flag (hw_injection_possible)
> is set to false inside prepare_msrs() called from
> smp_call_function_single().
> Should this check be done after the call to smp_call_function_single()?

Why would you do that then?

Is any of the code after

if (inj_type == SW_INJ) {
mce_log(m);
return 0;
}

worth running if hardware injection is not possible?

> Also, we already have inj_desc.err which returns error code to userspace
> when WRIG in status registers. Why is this flag needed?

To not do unnecessary work when you *know* hardware injection won't
work.

:-)

Btw, please trim your mails when you reply, just like I did.

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2022-04-22 22:10:06

by Smita Koralahalli

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] x86/mce: Check for writes ignored in MCA_STATUS register

On 4/20/2022 2:17 AM, Borislav Petkov wrote:
> On Mon, Apr 18, 2022 at 08:24:35PM -0700, Smita Koralahalli wrote:
>> Why are we checking this here? This flag (hw_injection_possible)
>> is set to false inside prepare_msrs() called from
>> smp_call_function_single().
>> Should this check be done after the call to smp_call_function_single()?
> Why would you do that then?
>
> Is any of the code after
>
> if (inj_type == SW_INJ) {
> mce_log(m);
> return 0;
> }
>
> worth running if hardware injection is not possible?

Okay I got the gist of this now. This will be mainly useful for subsequent
hardware error injections.

Also, should we move this slightly before? In inj_bank_set() after we check
for sw injection and before reading IPID value?

>
>> Also, we already have inj_desc.err which returns error code to userspace
>> when WRIG in status registers. Why is this flag needed?
> To not do unnecessary work when you *know* hardware injection won't
> work.
>
> :-)

Okay.

>
> Btw, please trim your mails when you reply, just like I did.

I'm sorry. Noted for my next replies!

Thanks,
Smita
>
> Thx.
>

2022-04-25 08:33:17

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] x86/mce: Check for writes ignored in MCA_STATUS register

On Thu, Apr 21, 2022 at 12:10:07PM -0700, Smita Koralahalli wrote:
> Also, should we move this slightly before? In inj_bank_set() after we check
> for sw injection and before reading IPID value?

If anything, the proper place for this would be to do the check in
flags_write() where you set the injection type and bail out if one of
the !sw types is chosen.

However, you must do the prepare_mca_status() dance first in order to do
the check.

Which means, you'd have to poke at the STATUS MSR of some bank and
carefully restore it to its original value so that you leave no changes
after the check. And I thought about it but it sounded kinda yucky, thus
the setting of hw_injection_possible at injection time.

That doesn't mean you can't check that variable in flags_write() *after*
the first injection has happened and it has been set properly, but the
first injection needs to get attempted first.

At least this is my idea, maybe you have a better one...

Btw, we'd need some error messaging when the hw injection fails:

---
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 0fd1eea2f754..5ea1d603b124 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -345,6 +345,9 @@ static int __set_inj(const char *buf)

for (i = 0; i < N_INJ_TYPES; i++) {
if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
+ if (i > SW_INJ && !hw_injection_possible)
+ continue;
+
inj_type = i;
return 0;
}
@@ -382,7 +385,11 @@ static ssize_t flags_write(struct file *filp, const char __user *ubuf,

err = __set_inj(__buf);
if (err) {
- pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
+ pr_err("%s: Invalid flags value%s: %s\n", __func__,
+ (!hw_injection_possible
+ ? " (SW-only injection possible on this platform)"
+ : ""),
+ __buf);
return err;
}

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2022-05-03 03:29:09

by Smita Koralahalli

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] x86/mce: Check for writes ignored in MCA_STATUS register

On 4/24/2022 3:32 PM, Borislav Petkov wrote:
> On Thu, Apr 21, 2022 at 12:10:07PM -0700, Smita Koralahalli wrote:
>> Also, should we move this slightly before? In inj_bank_set() after we check
>> for sw injection and before reading IPID value?
> If anything, the proper place for this would be to do the check in
> flags_write() where you set the injection type and bail out if one of
> the !sw types is chosen.
>
> However, you must do the prepare_mca_status() dance first in order to do
> the check.
>
> Which means, you'd have to poke at the STATUS MSR of some bank and
> carefully restore it to its original value so that you leave no changes
> after the check. And I thought about it but it sounded kinda yucky, thus
> the setting of hw_injection_possible at injection time.
>
> That doesn't mean you can't check that variable in flags_write() *after*
> the first injection has happened and it has been set properly, but the
> first injection needs to get attempted first.
>
> At least this is my idea, maybe you have a better one...

I'm bit more inclined towards your previous approach of
hw_injection_possible
check in do_inject(). This seems better than doing it in flags_write().

Suppose for example, the user just writes different bank numbers for
subsequent
error injections and does not modify any other fields including flags,
then this
method might not be helpful. So I think keeping it in inj_bank_set() or
do_inject() is more reasonable.

I was kind of figuring out the best location to place the check inside
inj_bank_set()
and doing it before IPID check seemed the better one. But it doesn't make
much of a difference though.. So just keeping it where you suggested before.

Thanks,
Smita
>
> Btw, we'd need some error messaging when the hw injection fails:
>
> ---
> diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
> index 0fd1eea2f754..5ea1d603b124 100644
> --- a/arch/x86/kernel/cpu/mce/inject.c
> +++ b/arch/x86/kernel/cpu/mce/inject.c
> @@ -345,6 +345,9 @@ static int __set_inj(const char *buf)
>
> for (i = 0; i < N_INJ_TYPES; i++) {
> if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
> + if (i > SW_INJ && !hw_injection_possible)
> + continue;
> +
> inj_type = i;
> return 0;
> }
> @@ -382,7 +385,11 @@ static ssize_t flags_write(struct file *filp, const char __user *ubuf,
>
> err = __set_inj(__buf);
> if (err) {
> - pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
> + pr_err("%s: Invalid flags value%s: %s\n", __func__,
> + (!hw_injection_possible
> + ? " (SW-only injection possible on this platform)"
> + : ""),
> + __buf);
> return err;
> }
>


2022-05-04 08:51:38

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] x86/mce: Check for writes ignored in MCA_STATUS register

On Mon, May 02, 2022 at 08:28:47PM -0700, Smita Koralahalli wrote:
> I'm bit more inclined towards your previous approach of
> hw_injection_possible
> check in do_inject(). This seems better than doing it in flags_write().

If you don't do it in flags_write() then the user would do

echo "hw" > flags

the command will succeed and the user will think that hw injection is
possible and then wonder why it fails later.

I even actually think that in the first run, when hw_injection_possible
is not determined yet, you should try to poke at MCi_STATUS of some
non-reserved bank - and we enumerate which those are at boot in
__mcheck_cpu_check_banks(), so you can pick a random, non-RAZ bank, save
its MCi_STATUS, try to write it and if it succeeds, restore it.

This way you'll determine whether hw injection is possible, store it
in the static hw_injection_possible and then query only that variable.
I.e., you'll have to poke that MCi_STATUS only once on driver init. And
this way it'll be the most optimal, methinks.

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2022-05-07 02:53:26

by Smita Koralahalli

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] x86/mce: Check for writes ignored in MCA_STATUS register

On 5/3/2022 2:14 PM, Borislav Petkov wrote:
> On Mon, May 02, 2022 at 08:28:47PM -0700, Smita Koralahalli wrote:
>> I'm bit more inclined towards your previous approach of
>> hw_injection_possible
>> check in do_inject(). This seems better than doing it in flags_write().
> If you don't do it in flags_write() then the user would do
>
> echo "hw" > flags
>
> the command will succeed and the user will think that hw injection is
> possible and then wonder why it fails later.
That's right!
>
> I even actually think that in the first run, when hw_injection_possible
> is not determined yet, you should try to poke at MCi_STATUS of some
> non-reserved bank - and we enumerate which those are at boot in
> __mcheck_cpu_check_banks(), so you can pick a random, non-RAZ bank, save
> its MCi_STATUS, try to write it and if it succeeds, restore it.
>
> This way you'll determine whether hw injection is possible, store it
> in the static hw_injection_possible and then query only that variable.
> I.e., you'll have to poke that MCi_STATUS only once on driver init.

Okay I agree too. I will work on this.

Thanks,
Smita
> And
> this way it'll be the most optimal, methinks.
>
> Thx.
>


Subject: [tip: ras/core] x86/mce: Check whether writes to MCA_STATUS are getting ignored

The following commit has been merged into the ras/core branch of tip:

Commit-ID: 891e465a1bd8798d5f97c3afb99393f123817fef
Gitweb: https://git.kernel.org/tip/891e465a1bd8798d5f97c3afb99393f123817fef
Author: Smita Koralahalli <[email protected]>
AuthorDate: Mon, 27 Jun 2022 20:56:46
Committer: Borislav Petkov <[email protected]>
CommitterDate: Tue, 28 Jun 2022 12:08:10 +02:00

x86/mce: Check whether writes to MCA_STATUS are getting ignored

The platform can sometimes - depending on its settings - cause writes
to MCA_STATUS MSRs to get ignored, regardless of HWCR[McStatusWrEn]'s
value.

For further info see

PPR for AMD Family 19h, Model 01h, Revision B1 Processors, doc ID 55898

at https://bugzilla.kernel.org/show_bug.cgi?id=206537.

Therefore, probe for ignored writes to MCA_STATUS to determine if hardware
error injection is at all possible.

[ bp: Heavily massage commit message and patch. ]

Signed-off-by: Smita Koralahalli <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/kernel/cpu/mce/inject.c | 47 +++++++++++++++++++++++++++++-
arch/x86/kernel/cpu/mce/internal.h | 2 +-
2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 5fbd7ff..12cf2e7 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -33,6 +33,8 @@

#include "internal.h"

+static bool hw_injection_possible = true;
+
/*
* Collect all the MCi_XXX settings
*/
@@ -339,6 +341,8 @@ static int __set_inj(const char *buf)

for (i = 0; i < N_INJ_TYPES; i++) {
if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
+ if (i > SW_INJ && !hw_injection_possible)
+ continue;
inj_type = i;
return 0;
}
@@ -717,11 +721,54 @@ static void __init debugfs_init(void)
&i_mce, dfs_fls[i].fops);
}

+static void check_hw_inj_possible(void)
+{
+ int cpu;
+ u8 bank;
+
+ /*
+ * This behavior exists only on SMCA systems though its not directly
+ * related to SMCA.
+ */
+ if (!cpu_feature_enabled(X86_FEATURE_SMCA))
+ return;
+
+ cpu = get_cpu();
+
+ for (bank = 0; bank < MAX_NR_BANKS; ++bank) {
+ u64 status = MCI_STATUS_VAL, ipid;
+
+ /* Check whether bank is populated */
+ rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), ipid);
+ if (!ipid)
+ continue;
+
+ toggle_hw_mce_inject(cpu, true);
+
+ wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), status);
+ rdmsrl_safe(mca_msr_reg(bank, MCA_STATUS), &status);
+
+ if (!status) {
+ hw_injection_possible = false;
+ pr_warn("Platform does not allow *hardware* error injection."
+ "Try using APEI EINJ instead.\n");
+ }
+
+ toggle_hw_mce_inject(cpu, false);
+
+ break;
+ }
+
+ put_cpu();
+}
+
static int __init inject_init(void)
{
if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
return -ENOMEM;

+ check_hw_inj_possible();
+
debugfs_init();

register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 4ae0e60..7e03f5b 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -211,7 +211,7 @@ noinstr u64 mce_rdmsrl(u32 msr);

static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
{
- if (mce_flags.smca) {
+ if (cpu_feature_enabled(X86_FEATURE_SMCA)) {
switch (reg) {
case MCA_CTL: return MSR_AMD64_SMCA_MCx_CTL(bank);
case MCA_ADDR: return MSR_AMD64_SMCA_MCx_ADDR(bank);