Hi all,
This set is the next revision of the following patches:
https://lkml.kernel.org/r/[email protected]
https://lkml.kernel.org/r/[email protected]
Patch 1 does the function renaming that Boris suggested.
Patches 2 and 3 are mostly unchanged except for rebasing on patch 1.
Thanks,
Yazen
Yazen Ghannam (3):
x86/mce: Rename mce_setup() to mce_prep_record()
x86/mce: Define mce_prep_record() helpers for common and per-CPU
fields
x86/mce: Use mce_prep_record() helpers for
apei_smca_report_x86_error()
arch/x86/include/asm/mce.h | 2 +-
arch/x86/kernel/cpu/mce/amd.c | 2 +-
arch/x86/kernel/cpu/mce/apei.c | 19 +++++++--------
arch/x86/kernel/cpu/mce/core.c | 38 ++++++++++++++++++++----------
arch/x86/kernel/cpu/mce/internal.h | 2 ++
5 files changed, 37 insertions(+), 26 deletions(-)
base-commit: 108c6494bdf1dfeaefc0a506e2f471aa92fafdd6
--
2.34.1
There is no MCE "setup" done in mce_setup(). Rather this function
initializes and prepares an MCE record.
Rename the function to highlight what it does.
No functional change is intended.
Suggested-by: Borislav Petkov <[email protected]>
Signed-off-by: Yazen Ghannam <[email protected]>
---
arch/x86/include/asm/mce.h | 2 +-
arch/x86/kernel/cpu/mce/amd.c | 2 +-
arch/x86/kernel/cpu/mce/apei.c | 4 ++--
arch/x86/kernel/cpu/mce/core.c | 6 +++---
4 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index de3118305838..c3722b84d9cf 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -219,7 +219,7 @@ static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
u64 lapic_id) { return -EINVAL; }
#endif
-void mce_setup(struct mce *m);
+void mce_prep_record(struct mce *m);
void mce_log(struct mce *m);
DECLARE_PER_CPU(struct device *, mce_device);
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 9a0133ef7e20..14bf8c232e45 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -780,7 +780,7 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
{
struct mce m;
- mce_setup(&m);
+ mce_prep_record(&m);
m.status = status;
m.misc = misc;
diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index 7f7309ff67d0..8f509c8a4e98 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -44,7 +44,7 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
else
lsb = PAGE_SHIFT;
- mce_setup(&m);
+ mce_prep_record(&m);
m.bank = -1;
/* Fake a memory read error with unknown channel */
m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
@@ -97,7 +97,7 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
if (ctx_info->reg_arr_size < 48)
return -EINVAL;
- mce_setup(&m);
+ mce_prep_record(&m);
m.extcpu = -1;
m.socketid = -1;
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index b5cc557cfc37..9dfa80084054 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -118,7 +118,7 @@ static struct irq_work mce_irq_work;
BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
/* Do initial initialization of a struct mce */
-void mce_setup(struct mce *m)
+void mce_prep_record(struct mce *m)
{
memset(m, 0, sizeof(struct mce));
m->cpu = m->extcpu = smp_processor_id();
@@ -436,11 +436,11 @@ static noinstr void mce_wrmsrl(u32 msr, u64 v)
static noinstr void mce_gather_info(struct mce *m, struct pt_regs *regs)
{
/*
- * Enable instrumentation around mce_setup() which calls external
+ * Enable instrumentation around mce_prep_record() which calls external
* facilities.
*/
instrumentation_begin();
- mce_setup(m);
+ mce_prep_record(m);
instrumentation_end();
m->mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
--
2.34.1
Generally, MCA information for an error is gathered on the CPU that
reported the error. In this case, CPU-specific information from the
running CPU will be correct.
However, this will be incorrect if the MCA information is gathered while
running on a CPU that didn't report the error. One example is creating
an MCA record using mce_prep_record() for errors reported from ACPI.
Split mce_prep_record() so that there is a helper function to gather
common, i.e. not CPU-specific, information and another helper for
CPU-specific information.
Leave mce_prep_record() defined as-is for the common case when running
on the reporting CPU.
Get MCG_CAP in the global helper even though the register is per-CPU.
This value is not already cached per-CPU like other values. And it does
not assist with any per-CPU decoding or handling.
Signed-off-by: Yazen Ghannam <[email protected]>
---
arch/x86/kernel/cpu/mce/core.c | 34 ++++++++++++++++++++----------
arch/x86/kernel/cpu/mce/internal.h | 2 ++
2 files changed, 25 insertions(+), 11 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 9dfa80084054..dacca82d45de 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -117,20 +117,32 @@ static struct irq_work mce_irq_work;
*/
BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
-/* Do initial initialization of a struct mce */
-void mce_prep_record(struct mce *m)
+void mce_prep_record_common(struct mce *m)
{
memset(m, 0, sizeof(struct mce));
- m->cpu = m->extcpu = smp_processor_id();
+
+ m->cpuid = cpuid_eax(1);
+ m->cpuvendor = boot_cpu_data.x86_vendor;
+ m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
/* need the internal __ version to avoid deadlocks */
- m->time = __ktime_get_real_seconds();
- m->cpuvendor = boot_cpu_data.x86_vendor;
- m->cpuid = cpuid_eax(1);
- m->socketid = cpu_data(m->extcpu).topo.pkg_id;
- m->apicid = cpu_data(m->extcpu).topo.initial_apicid;
- m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP);
- m->ppin = cpu_data(m->extcpu).ppin;
- m->microcode = boot_cpu_data.microcode;
+ m->time = __ktime_get_real_seconds();
+}
+
+void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m)
+{
+ m->cpu = cpu;
+ m->extcpu = cpu;
+ m->apicid = cpu_data(m->extcpu).topo.initial_apicid;
+ m->microcode = cpu_data(m->extcpu).microcode;
+ m->ppin = cpu_data(m->extcpu).ppin;
+ m->socketid = cpu_data(m->extcpu).topo.pkg_id;
+}
+
+/* Do initial initialization of a struct mce */
+void mce_prep_record(struct mce *m)
+{
+ mce_prep_record_common(m);
+ mce_prep_record_per_cpu(smp_processor_id(), m);
}
DEFINE_PER_CPU(struct mce, injectm);
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 01f8f03969e6..43c7f3b71df5 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -261,6 +261,8 @@ enum mca_msr {
/* Decide whether to add MCE record to MCE event pool or filter it out. */
extern bool filter_mce(struct mce *m);
+void mce_prep_record_common(struct mce *m);
+void mce_prep_record_per_cpu(unsigned int cpu, struct mce *m);
#ifdef CONFIG_X86_MCE_AMD
extern bool amd_filter_mce(struct mce *m);
--
2.34.1
Current AMD systems can report MCA errors using the ACPI Boot Error
Record Table (BERT). The BERT entries for MCA errors will be an x86
Common Platform Error Record (CPER) with an MSR register context that
matches the MCAX/SMCA register space.
However, the BERT will not necessarily be processed on the CPU that
reported the MCA errors. Therefore, the correct CPU number needs to be
determined and the information saved in struct mce.
The CPU number is determined by searching all possible CPUs for a Local
APIC ID matching the value in the x86 CPER.
Use the newly defined mce_prep_record_*() helpers to get the correct
data.
Signed-off-by: Yazen Ghannam <[email protected]>
---
arch/x86/kernel/cpu/mce/apei.c | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index 8f509c8a4e98..0cbadfaf2400 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -97,20 +97,17 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
if (ctx_info->reg_arr_size < 48)
return -EINVAL;
- mce_prep_record(&m);
-
- m.extcpu = -1;
- m.socketid = -1;
-
for_each_possible_cpu(cpu) {
- if (cpu_data(cpu).topo.initial_apicid == lapic_id) {
- m.extcpu = cpu;
- m.socketid = cpu_data(m.extcpu).topo.pkg_id;
+ if (cpu_data(cpu).topo.initial_apicid == lapic_id)
break;
- }
}
- m.apicid = lapic_id;
+ if (!cpu_possible(cpu))
+ return -EINVAL;
+
+ mce_prep_record_common(&m);
+ mce_prep_record_per_cpu(cpu, &m);
+
m.bank = (ctx_info->msr_addr >> 4) & 0xFF;
m.status = *i_mce;
m.addr = *(i_mce + 1);
--
2.34.1
On Tue, May 21, 2024 at 07:54:34AM -0500, Yazen Ghannam wrote:
> Current AMD systems can report MCA errors using the ACPI Boot Error
> Record Table (BERT). The BERT entries for MCA errors will be an x86
> Common Platform Error Record (CPER) with an MSR register context that
> matches the MCAX/SMCA register space.
>
> However, the BERT will not necessarily be processed on the CPU that
> reported the MCA errors. Therefore, the correct CPU number needs to be
> determined and the information saved in struct mce.
>
> The CPU number is determined by searching all possible CPUs for a Local
> APIC ID matching the value in the x86 CPER.
You're explaining the code again. :)
> for_each_possible_cpu(cpu) {
> - if (cpu_data(cpu).topo.initial_apicid == lapic_id) {
> - m.extcpu = cpu;
> - m.socketid = cpu_data(m.extcpu).topo.pkg_id;
> + if (cpu_data(cpu).topo.initial_apicid == lapic_id)
> break;
> - }
> }
>
> - m.apicid = lapic_id;
> + if (!cpu_possible(cpu))
> + return -EINVAL;
What's that test for? You just iterated over the possible CPUs using
"cpu" as the iterator there...
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On 5/29/24 1:28 PM, Borislav Petkov wrote:
> On Tue, May 21, 2024 at 07:54:34AM -0500, Yazen Ghannam wrote:
>> Current AMD systems can report MCA errors using the ACPI Boot Error
>> Record Table (BERT). The BERT entries for MCA errors will be an x86
>> Common Platform Error Record (CPER) with an MSR register context that
>> matches the MCAX/SMCA register space.
>>
>> However, the BERT will not necessarily be processed on the CPU that
>> reported the MCA errors. Therefore, the correct CPU number needs to be
>> determined and the information saved in struct mce.
>>
>> The CPU number is determined by searching all possible CPUs for a Local
>> APIC ID matching the value in the x86 CPER.
>
> You're explaining the code again. :)
>
One day I'll break this habit. Thanks again for the reminder. :)
>> for_each_possible_cpu(cpu) {
>> - if (cpu_data(cpu).topo.initial_apicid == lapic_id) {
>> - m.extcpu = cpu;
>> - m.socketid = cpu_data(m.extcpu).topo.pkg_id;
>> + if (cpu_data(cpu).topo.initial_apicid == lapic_id)
>> break;
>> - }
>> }
>>
>> - m.apicid = lapic_id;
>> + if (!cpu_possible(cpu))
>> + return -EINVAL;
>
> What's that test for? You just iterated over the possible CPUs using
> "cpu" as the iterator there...
>
This is to catch the case where there was no break from the loop.
If there is no match during the iterator, then "cpu" will be equal to
nr_cpu_ids.
I wanted to use a helper that goes with with the iterator rather than
checking against nr_cpu_ids directly.
Thanks,
Yazen
On Mon, Jun 03, 2024 at 10:34:10AM -0400, Yazen Ghannam wrote:
> One day I'll break this habit. Thanks again for the reminder. :)
Sure, np. :-)
> >> for_each_possible_cpu(cpu) {
> >> - if (cpu_data(cpu).topo.initial_apicid == lapic_id) {
> >> - m.extcpu = cpu;
> >> - m.socketid = cpu_data(m.extcpu).topo.pkg_id;
> >> + if (cpu_data(cpu).topo.initial_apicid == lapic_id)
> >> break;
> >> - }
> >> }
> >>
> >> - m.apicid = lapic_id;
> >> + if (!cpu_possible(cpu))
> >> + return -EINVAL;
> >
> > What's that test for? You just iterated over the possible CPUs using
> > "cpu" as the iterator there...
> >
>
> This is to catch the case where there was no break from the loop.
If the CPU is possible != whether there was a apicid match.
Here's how you do that and I'd let you figure out why yours doesn't
always work:
diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index 0cbadfaf2400..3885fe05f01e 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -66,6 +66,7 @@ EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
{
const u64 *i_mce = ((const u64 *) (ctx_info + 1));
+ bool apicid_found = false;
unsigned int cpu;
struct mce m;
@@ -98,11 +99,13 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
return -EINVAL;
for_each_possible_cpu(cpu) {
- if (cpu_data(cpu).topo.initial_apicid == lapic_id)
+ if (cpu_data(cpu).topo.initial_apicid == lapic_id) {
+ apicid_found = true;
break;
+ }
}
- if (!cpu_possible(cpu))
+ if (!apicid_found)
return -EINVAL;
mce_prep_record_common(&m);
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Mon, Jun 03, 2024 at 06:55:30PM +0200, Borislav Petkov wrote:
> On Mon, Jun 03, 2024 at 10:34:10AM -0400, Yazen Ghannam wrote:
[...]
> > This is to catch the case where there was no break from the loop.
>
> If the CPU is possible != whether there was a apicid match.
>
> Here's how you do that and I'd let you figure out why yours doesn't
> always work:
>
I don't see why it won't work. If there is no break, then the iterator
ends by setting the variable past the last valid value.
For example, I ran this on a system with 512 CPUs:
unsigned int cpu;
/* Loops over CPUs 0-511. */
for_each_possible_cpu(cpu)
pr_info("loop: cpu=%d\n", cpu);
/* CPU is now set to 512. */
pr_info("final: cpu=%d\n", cpu);
/* CPU 512 is not possible. */
pr_info("CPU %d is %s possible\n", cpu, cpu_possible(cpu) ? "" : "not");
But...I like your suggestion as it is much more explicit. And I might be
missing something. :/
> diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
> index 0cbadfaf2400..3885fe05f01e 100644
> --- a/arch/x86/kernel/cpu/mce/apei.c
> +++ b/arch/x86/kernel/cpu/mce/apei.c
> @@ -66,6 +66,7 @@ EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
> int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
> {
> const u64 *i_mce = ((const u64 *) (ctx_info + 1));
> + bool apicid_found = false;
> unsigned int cpu;
> struct mce m;
>
> @@ -98,11 +99,13 @@ int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id)
> return -EINVAL;
>
> for_each_possible_cpu(cpu) {
> - if (cpu_data(cpu).topo.initial_apicid == lapic_id)
> + if (cpu_data(cpu).topo.initial_apicid == lapic_id) {
> + apicid_found = true;
> break;
> + }
> }
>
> - if (!cpu_possible(cpu))
> + if (!apicid_found)
> return -EINVAL;
>
> mce_prep_record_common(&m);
>
>
Would you like me to send another revision with this change? Do you have
any other comments?
Thanks,
Yazen
On Fri, Jun 14, 2024 at 05:47:36PM -0400, Yazen Ghannam wrote:
> I don't see why it won't work. If there is no break, then the iterator
> ends by setting the variable past the last valid value.
>
> For example, I ran this on a system with 512 CPUs:
>
> unsigned int cpu;
>
> /* Loops over CPUs 0-511. */
> for_each_possible_cpu(cpu)
> pr_info("loop: cpu=%d\n", cpu);
>
> /* CPU is now set to 512. */
> pr_info("final: cpu=%d\n", cpu);
>
> /* CPU 512 is not possible. */
> pr_info("CPU %d is %s possible\n", cpu, cpu_possible(cpu) ? "" : "not");
>
> But...I like your suggestion as it is much more explicit. And I might be
> missing something. :/
I can think of at least three:
* CPU topology and the initial_apicid sometimes can get programmed wrong by the
* FW. Nothing new.
* nr_cpus= - you can enable less CPUs than actually physically present so an MCE
on a CPU which is not enabled by Linux will be -EINVAL
* possible_cpus= - pretty much the same thing
But I haven't actually tried them - am just looking at the code.
And yes, with the apicid_found boolean it is perfectly clear what's going on.
And looking at
convert_apicid_to_cpu()
which already does that loop, we probably should talk to tglx whether we can
simply export that helper.
And better yet if he's done some more helpful caching of the reverse mapping:
apicid to CPU number. As part of the topology rewrite. Because then we don't
need the loop at all.
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette