On Tue, Sep 10, 2019 at 08:20:07AM +0000, Tony W Wang-oc wrote:
> Zhaoxin newer CPUs support LMCE that compatible with Intel's
> "Machine-Check Architecture", so add support for Zhaoxin LMCE
> in mce/core.c.
>
> Signed-off-by: Tony W Wang-oc <[email protected]>
> ---
> v1->v2:
> - Fix redefinition of "mce_zhaoxin_feature_clear"
>
> arch/x86/include/asm/mce.h | 2 ++
> arch/x86/kernel/cpu/mce/core.c | 25 +++++++++++++++++++++++--
> 2 files changed, 25 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 0986a11..01840ec 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -352,8 +352,10 @@ static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_am
>
> #ifdef CONFIG_CPU_SUP_ZHAOXIN
> void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c);
> +void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c);
> #else
> static inline void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c) { }
> +static inline void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c) { }
> #endif
>
> #endif /* _ASM_X86_MCE_H */
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 8a36833..595d3af7ac 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -1129,6 +1129,17 @@ static bool __mc_check_crashing_cpu(int cpu)
> u64 mcgstatus;
>
> mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
> +
> + if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
> + if (mcgstatus & MCG_STATUS_LMCES) {
> + return false;
> + } else {
> + if (mcgstatus & MCG_STATUS_RIPV)
> + mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
> + return true;
> + }
> + }
Simplify:
if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
if (mcgstatus & MCG_STATUS_LMCES)
return false;
}
<--- Now here, on your CPUs which don't set MCG_STATUS_LMCES,
it will fallback to clearing the status register. I.e., what you do in the else
clause.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Tue, Sep 10, 2019, Borislav Petkov wrote:
>On Tue, Sep 10, 2019 at 08:20:07AM +0000, Tony W Wang-oc wrote:
>> Zhaoxin newer CPUs support LMCE that compatible with Intel's
>> "Machine-Check Architecture", so add support for Zhaoxin LMCE
>> in mce/core.c.
>>
>> Signed-off-by: Tony W Wang-oc <[email protected]>
>> ---
>> v1->v2:
>> - Fix redefinition of "mce_zhaoxin_feature_clear"
>>
>> arch/x86/include/asm/mce.h | 2 ++
>> arch/x86/kernel/cpu/mce/core.c | 25 +++++++++++++++++++++++--
>> 2 files changed, 25 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
>> index 0986a11..01840ec 100644
>> --- a/arch/x86/include/asm/mce.h
>> +++ b/arch/x86/include/asm/mce.h
>> @@ -352,8 +352,10 @@ static inline void mce_hygon_feature_init(struct
>cpuinfo_x86 *c) { return mce_am
>>
>> #ifdef CONFIG_CPU_SUP_ZHAOXIN
>> void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c);
>> +void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c);
>> #else
>> static inline void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c) { }
>> +static inline void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c) { }
>> #endif
>>
>> #endif /* _ASM_X86_MCE_H */
>> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
>> index 8a36833..595d3af7ac 100644
>> --- a/arch/x86/kernel/cpu/mce/core.c
>> +++ b/arch/x86/kernel/cpu/mce/core.c
>> @@ -1129,6 +1129,17 @@ static bool __mc_check_crashing_cpu(int cpu)
>> u64 mcgstatus;
>>
>> mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
>> +
>> + if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
>> + if (mcgstatus & MCG_STATUS_LMCES) {
>> + return false;
>> + } else {
>> + if (mcgstatus & MCG_STATUS_RIPV)
>> + mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
>> + return true;
>> + }
>> + }
>
>Simplify:
>
> if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN) {
> if (mcgstatus & MCG_STATUS_LMCES)
> return false;
> }
>
> <--- Now here, on your CPUs which don't set MCG_STATUS_LMCES,
>it will fallback to clearing the status register. I.e., what you do in the else
>clause.
>
On Zhaoxin CPUs don't set MCG_STATUS_LMCES, to avoid rendezvous timeout if
this CPU is offline or crashing_cpu was set, we want return true regardless of
MCG_STATUS_RIPV's setting.
Without my else clause, original codes return true only when MCG_STATUS_RIPV
be setted.
For better readability, will add comment and change coding style in v3.
Sincerely
TonyWWang-oc