Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S940551AbcKWP6b (ORCPT ); Wed, 23 Nov 2016 10:58:31 -0500 Received: from terminus.zytor.com ([198.137.202.10]:58998 "EHLO terminus.zytor.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S934495AbcKWP62 (ORCPT ); Wed, 23 Nov 2016 10:58:28 -0500 Date: Wed, 23 Nov 2016 07:58:12 -0800 From: tip-bot for Tony Luck Message-ID: Cc: hpa@zytor.com, linux-edac@vger.kernel.org, bp@suse.de, tony.luck@intel.com, mingo@kernel.org, linux-kernel@vger.kernel.org, tglx@linutronix.de, x86@kernel.org, ashok.raj@intel.com Reply-To: bp@suse.de, tony.luck@intel.com, hpa@zytor.com, linux-edac@vger.kernel.org, x86@kernel.org, ashok.raj@intel.com, mingo@kernel.org, linux-kernel@vger.kernel.org, tglx@linutronix.de In-Reply-To: <20161123114855.njguoaygp3qnbkia@pd.tnic> References: <20161123114855.njguoaygp3qnbkia@pd.tnic> To: linux-tip-commits@vger.kernel.org Subject: [tip:ras/core] x86/mce: Include the PPIN in MCE records when available Git-Commit-ID: 3f5a7896a5096fd50030a04d4c3f28a7441e30a5 X-Mailer: tip-git-log-daemon Robot-ID: Robot-Unsubscribe: Contact to get blacklisted from these emails MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset=UTF-8 Content-Disposition: inline Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5565 Lines: 158 Commit-ID: 3f5a7896a5096fd50030a04d4c3f28a7441e30a5 Gitweb: http://git.kernel.org/tip/3f5a7896a5096fd50030a04d4c3f28a7441e30a5 Author: Tony Luck AuthorDate: Fri, 18 Nov 2016 09:48:36 -0800 Committer: Thomas Gleixner CommitDate: Wed, 23 Nov 2016 16:51:52 +0100 x86/mce: Include the PPIN in MCE records when available Intel Xeons from Ivy Bridge onwards support a processor identification number set in the factory. To the user this is a handy unique number to identify a particular CPU. Intel can decode this to the fab/production run to track errors. On systems that have it, include it in the machine check record. I'm told that this would be helpful for users that run large data centers with multi-socket servers to keep track of which CPUs are seeing errors. Boris: * Add some clarifying comments and spacing. * Mask out [63:2] in the disabled-but-not-locked case * Call the MSR variable "val" for more readability. Signed-off-by: Tony Luck Cc: Ashok Raj Cc: linux-edac Cc: x86-ml Link: http://lkml.kernel.org/r/20161123114855.njguoaygp3qnbkia@pd.tnic Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 4 ++++ arch/x86/include/uapi/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 4 ++++ arch/x86/kernel/cpu/mcheck/mce_intel.c | 37 ++++++++++++++++++++++++++++++++++ 5 files changed, 47 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a396292..d625b65 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -193,6 +193,7 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 78f3760..710273c 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -37,6 +37,10 @@ #define EFER_FFXSR (1<<_EFER_FFXSR) /* Intel MSRs. Some also available on other CPUs */ + +#define MSR_PPIN_CTL 0x0000004e +#define MSR_PPIN 0x0000004f + #define MSR_IA32_PERFCTR0 0x000000c1 #define MSR_IA32_PERFCTR1 0x000000c2 #define MSR_FSB_FREQ 0x000000cd diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 69a6e07..eb6247a 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -28,6 +28,7 @@ struct mce { __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ + __u64 ppin; /* Protected Processor Inventory Number */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index aab96f8..a3cb27a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -43,6 +43,7 @@ #include #include +#include #include #include #include @@ -135,6 +136,9 @@ void mce_setup(struct mce *m) m->socketid = cpu_data(m->extcpu).phys_proc_id; m->apicid = cpu_data(m->extcpu).initial_apicid; rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); + + if (this_cpu_has(X86_FEATURE_INTEL_PPIN)) + rdmsrl(MSR_PPIN, m->ppin); } DEFINE_PER_CPU(struct mce, injectm); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index be0b2fa..190b3e6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -464,11 +466,46 @@ static void intel_clear_lmce(void) wrmsrl(MSR_IA32_MCG_EXT_CTL, val); } +static void intel_ppin_init(struct cpuinfo_x86 *c) +{ + unsigned long long val; + + /* + * Even if testing the presence of the MSR would be enough, we don't + * want to risk the situation where other models reuse this MSR for + * other purposes. + */ + switch (c->x86_model) { + case INTEL_FAM6_IVYBRIDGE_X: + case INTEL_FAM6_HASWELL_X: + case INTEL_FAM6_BROADWELL_XEON_D: + case INTEL_FAM6_BROADWELL_X: + case INTEL_FAM6_SKYLAKE_X: + if (rdmsrl_safe(MSR_PPIN_CTL, &val)) + return; + + if ((val & 3UL) == 1UL) { + /* PPIN available but disabled: */ + return; + } + + /* If PPIN is disabled, but not locked, try to enable: */ + if (!(val & 3UL)) { + wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); + rdmsrl_safe(MSR_PPIN_CTL, &val); + } + + if ((val & 3UL) == 2UL) + set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); + } +} + void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); intel_init_cmci(); intel_init_lmce(); + intel_ppin_init(c); } void mce_intel_feature_clear(struct cpuinfo_x86 *c)