Currently x86_spec_ctrl_base is read at boot time, and speculative bits
are set if configs are enable, such as MSR[SPEC_CTRL_IBRS] is enabled if
CONFIG_CPU_IBRS_ENTRY is configured. These MSR bits are not cleared if
the mitigations are disabled.
This is a problem when kexec-ing a kernel that has the mitigation
disabled, from a kernel that has the mitigation enabled. In this case,
the MSR bits are carried forward and not cleared at the boot of the new
kernel. This might have some performance degradation that is hard to
find.
This problem does not happen if the machine is (hard) rebooted, because
the bit will be cleared by default.
This patch also defines a SPEC_CTRL_MASK macro, so, we can easily track
and clear if eventually some new mitigation shows up.
Suggested-by: Pawan Gupta <[email protected]>
Signed-off-by: Breno Leitao <[email protected]>
---
arch/x86/include/asm/msr-index.h | 3 +++
arch/x86/kernel/cpu/bugs.c | 9 ++++++++-
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4a2af82553e4..704f49580ee1 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -54,6 +54,9 @@
#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+#define SPEC_CTRL_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
+ | SPEC_CTRL_RRSBA_DIS_S)
+
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 3e3230cccaa7..88957da1029b 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -137,8 +137,15 @@ void __init check_bugs(void)
* have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
* init code as it is not enumerated and depends on the family.
*/
- if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ /*
+ * Previously running software may have some controls turned ON.
+ * Clear them and let kernel decide which controls to use.
+ */
+ x86_spec_ctrl_base &= ~SPEC_CTRL_MASK;
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ }
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
--
2.30.2
On Thu, Nov 24, 2022 at 02:46:50AM -0800, Breno Leitao wrote:
> Currently x86_spec_ctrl_base is read at boot time, and speculative bits
> are set if configs are enable, such as MSR[SPEC_CTRL_IBRS] is enabled if
> CONFIG_CPU_IBRS_ENTRY is configured. These MSR bits are not cleared if
> the mitigations are disabled.
>
> This is a problem when kexec-ing a kernel that has the mitigation
> disabled, from a kernel that has the mitigation enabled. In this case,
> the MSR bits are carried forward and not cleared at the boot of the new
> kernel. This might have some performance degradation that is hard to
> find.
>
> This problem does not happen if the machine is (hard) rebooted, because
> the bit will be cleared by default.
>
> This patch also defines a SPEC_CTRL_MASK macro, so, we can easily track
> and clear if eventually some new mitigation shows up.
Just remove that sentence - the macro's function is kinda obvious from
the diff itself.
> Suggested-by: Pawan Gupta <[email protected]>
> Signed-off-by: Breno Leitao <[email protected]>
> ---
> arch/x86/include/asm/msr-index.h | 3 +++
> arch/x86/kernel/cpu/bugs.c | 9 ++++++++-
> 2 files changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index 4a2af82553e4..704f49580ee1 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -54,6 +54,9 @@
> #define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
> #define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
>
> +#define SPEC_CTRL_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
> + | SPEC_CTRL_RRSBA_DIS_S)
Call that SPEC_CTRL_MITIGATIONS_MASK or so to denote what it is - a
mask of the SPEC_CTRL bits which the kernel toggles when controlling
mitigations.
A comment above it wouldn't hurt either.
> +
> #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
> #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
>
> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
> index 3e3230cccaa7..88957da1029b 100644
> --- a/arch/x86/kernel/cpu/bugs.c
> +++ b/arch/x86/kernel/cpu/bugs.c
> @@ -137,8 +137,15 @@ void __init check_bugs(void)
> * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
> * init code as it is not enumerated and depends on the family.
> */
> - if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
> + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
> rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
> + /*
> + * Previously running software may have some controls turned ON.
"Previously running software, like kexec for example, ..."
> + * Clear them and let kernel decide which controls to use.
"Clear them and let the mitigations setup below set them based on configuration."
> + */
> + x86_spec_ctrl_base &= ~SPEC_CTRL_MASK;
> + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
So this WRMSR will happen on the BSP only but the SPEC_CTRL MSR is
per-CPU. As is x86_spec_ctrl_current which tracks it.
So I'd say you don't need that WRMSR here - the proper value will get
replicated eventually everywhere...
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Mon, Nov 28, 2022 at 01:42:26AM +0100, Borislav Petkov wrote:
>On Thu, Nov 24, 2022 at 02:46:50AM -0800, Breno Leitao wrote:
>> Currently x86_spec_ctrl_base is read at boot time, and speculative bits
>> are set if configs are enable, such as MSR[SPEC_CTRL_IBRS] is enabled if
>> CONFIG_CPU_IBRS_ENTRY is configured. These MSR bits are not cleared if
>> the mitigations are disabled.
>>
>> This is a problem when kexec-ing a kernel that has the mitigation
>> disabled, from a kernel that has the mitigation enabled. In this case,
>> the MSR bits are carried forward and not cleared at the boot of the new
>> kernel. This might have some performance degradation that is hard to
>> find.
>>
>> This problem does not happen if the machine is (hard) rebooted, because
>> the bit will be cleared by default.
>>
>> This patch also defines a SPEC_CTRL_MASK macro, so, we can easily track
>> and clear if eventually some new mitigation shows up.
>
>Just remove that sentence - the macro's function is kinda obvious from
>the diff itself.
>
>> Suggested-by: Pawan Gupta <[email protected]>
>> Signed-off-by: Breno Leitao <[email protected]>
>> ---
>> arch/x86/include/asm/msr-index.h | 3 +++
>> arch/x86/kernel/cpu/bugs.c | 9 ++++++++-
>> 2 files changed, 11 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
>> index 4a2af82553e4..704f49580ee1 100644
>> --- a/arch/x86/include/asm/msr-index.h
>> +++ b/arch/x86/include/asm/msr-index.h
>> @@ -54,6 +54,9 @@
>> #define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
>> #define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
>>
>> +#define SPEC_CTRL_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
>> + | SPEC_CTRL_RRSBA_DIS_S)
>
>Call that SPEC_CTRL_MITIGATIONS_MASK or so to denote what it is - a
>mask of the SPEC_CTRL bits which the kernel toggles when controlling
>mitigations.
>
>A comment above it wouldn't hurt either.
>
>> +
>> #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
>> #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
>>
>> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
>> index 3e3230cccaa7..88957da1029b 100644
>> --- a/arch/x86/kernel/cpu/bugs.c
>> +++ b/arch/x86/kernel/cpu/bugs.c
>> @@ -137,8 +137,15 @@ void __init check_bugs(void)
>> * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
>> * init code as it is not enumerated and depends on the family.
>> */
>> - if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
>> + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
>> rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
>> + /*
>> + * Previously running software may have some controls turned ON.
>
>"Previously running software, like kexec for example, ..."
>
>> + * Clear them and let kernel decide which controls to use.
>
>"Clear them and let the mitigations setup below set them based on configuration."
>
>> + */
>> + x86_spec_ctrl_base &= ~SPEC_CTRL_MASK;
>> + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
>
>So this WRMSR will happen on the BSP only but the SPEC_CTRL MSR is
>per-CPU. As is x86_spec_ctrl_current which tracks it.
>
>So I'd say you don't need that WRMSR here - the proper value will get
>replicated eventually everywhere...
This patch is particularly for the case when user intends to turn off
the mitigations like with mitigations=off. In that case we need the
WRMSR because mitigation selection will simply return without writing to
the MSR on BSP.
As part of AP init x86_spec_ctrl_setup_ap() writes to the MSR even
when the mitigation is turned off, so AP's should have been fine, but I
think there is a subtle bug there as well. For below call:
x86_spec_ctrl_setup_ap(void)
{
write_spec_ctrl_current(x86_spec_ctrl_base, true);
When x86_spec_ctrl_base is 0 MSR won't be written because of a check in
write_spec_ctrl_current() that doesn't write the MSR when the new value
(0) is same as x86_spec_ctrl_current (initialized to 0).
Below should fix the problem with APs:
---
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 3e3230cccaa7..cfc2ed2661fc 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -66,7 +66,7 @@ static DEFINE_MUTEX(spec_ctrl_mutex);
*/
void write_spec_ctrl_current(u64 val, bool force)
{
- if (this_cpu_read(x86_spec_ctrl_current) == val)
+ if (!force && this_cpu_read(x86_spec_ctrl_current) == val)
return;
this_cpu_write(x86_spec_ctrl_current, val);
On Mon, Nov 28, 2022 at 02:03:58PM -0800, Pawan Gupta wrote:
> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
> index 3e3230cccaa7..cfc2ed2661fc 100644
> --- a/arch/x86/kernel/cpu/bugs.c
> +++ b/arch/x86/kernel/cpu/bugs.c
> @@ -66,7 +66,7 @@ static DEFINE_MUTEX(spec_ctrl_mutex);
> */
> void write_spec_ctrl_current(u64 val, bool force)
> {
> - if (this_cpu_read(x86_spec_ctrl_current) == val)
> + if (!force && this_cpu_read(x86_spec_ctrl_current) == val)
> return;
> this_cpu_write(x86_spec_ctrl_current, val);
Still looks hacky to me.
I think it would be a lot cleaner if MSR_IA32_SPEC_CTRL gets cleaned of
the speculation bits in init_speculation_control() which gets run on
*every* CPU.
So by the time check_bugs() gets to setup stuff, the MSR will be ready
to go regardless.
I.e., something like this (not supposed to work - just to show what I
mean):
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 73cc546e024d..367732c92942 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -993,9 +993,19 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
* Intel CPUs, for finer-grained selection of what's available.
*/
if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
+ u64 msr;
+
set_cpu_cap(c, X86_FEATURE_IBRS);
set_cpu_cap(c, X86_FEATURE_IBPB);
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
+
+ /*
+ * Clear speculation control settings from a previous kernel
+ * run, i.e., kexec.
+ */
+ rdmsrl(MSR_IA32_SPEC_CTRL, msr);
+ if (msr & SPEC_CTRL_MASK)
+ wrmsr (MSR_IA32_SPEC_CTRL, msr & ~SPEC_CTRL_MASK);
}
if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Mon, Nov 28, 2022 at 11:40:19PM +0100, Borislav Petkov wrote:
>On Mon, Nov 28, 2022 at 02:03:58PM -0800, Pawan Gupta wrote:
>> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
>> index 3e3230cccaa7..cfc2ed2661fc 100644
>> --- a/arch/x86/kernel/cpu/bugs.c
>> +++ b/arch/x86/kernel/cpu/bugs.c
>> @@ -66,7 +66,7 @@ static DEFINE_MUTEX(spec_ctrl_mutex);
>> */
>> void write_spec_ctrl_current(u64 val, bool force)
>> {
>> - if (this_cpu_read(x86_spec_ctrl_current) == val)
>> + if (!force && this_cpu_read(x86_spec_ctrl_current) == val)
>> return;
>> this_cpu_write(x86_spec_ctrl_current, val);
>
>Still looks hacky to me.
>
>I think it would be a lot cleaner if MSR_IA32_SPEC_CTRL gets cleaned of
>the speculation bits in init_speculation_control() which gets run on
>*every* CPU.
>
>So by the time check_bugs() gets to setup stuff, the MSR will be ready
>to go regardless.
>
>I.e., something like this (not supposed to work - just to show what I
>mean):
>
>diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
>index 73cc546e024d..367732c92942 100644
>--- a/arch/x86/kernel/cpu/common.c
>+++ b/arch/x86/kernel/cpu/common.c
>@@ -993,9 +993,19 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
> * Intel CPUs, for finer-grained selection of what's available.
> */
> if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
>+ u64 msr;
>+
> set_cpu_cap(c, X86_FEATURE_IBRS);
> set_cpu_cap(c, X86_FEATURE_IBPB);
> set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
>+
>+ /*
>+ * Clear speculation control settings from a previous kernel
>+ * run, i.e., kexec.
>+ */
>+ rdmsrl(MSR_IA32_SPEC_CTRL, msr);
>+ if (msr & SPEC_CTRL_MASK)
>+ wrmsr (MSR_IA32_SPEC_CTRL, msr & ~SPEC_CTRL_MASK);
Yes thats a cleaner approach, except that the late microcode load will
ruin the MSR:
microcode_reload_late()
microcode_check()
get_cpu_cap()
init_speculation_control()
On Mon, Nov 28, 2022 at 03:02:21PM -0800, Pawan Gupta wrote:
>On Mon, Nov 28, 2022 at 11:40:19PM +0100, Borislav Petkov wrote:
>>On Mon, Nov 28, 2022 at 02:03:58PM -0800, Pawan Gupta wrote:
>>>diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
>>>index 3e3230cccaa7..cfc2ed2661fc 100644
>>>--- a/arch/x86/kernel/cpu/bugs.c
>>>+++ b/arch/x86/kernel/cpu/bugs.c
>>>@@ -66,7 +66,7 @@ static DEFINE_MUTEX(spec_ctrl_mutex);
>>> */
>>> void write_spec_ctrl_current(u64 val, bool force)
>>> {
>>>- if (this_cpu_read(x86_spec_ctrl_current) == val)
>>>+ if (!force && this_cpu_read(x86_spec_ctrl_current) == val)
>>> return;
>>> this_cpu_write(x86_spec_ctrl_current, val);
>>
>>Still looks hacky to me.
>>
>>I think it would be a lot cleaner if MSR_IA32_SPEC_CTRL gets cleaned of
>>the speculation bits in init_speculation_control() which gets run on
>>*every* CPU.
>>
>>So by the time check_bugs() gets to setup stuff, the MSR will be ready
>>to go regardless.
>>
>>I.e., something like this (not supposed to work - just to show what I
>>mean):
>>
>>diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
>>index 73cc546e024d..367732c92942 100644
>>--- a/arch/x86/kernel/cpu/common.c
>>+++ b/arch/x86/kernel/cpu/common.c
>>@@ -993,9 +993,19 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
>> * Intel CPUs, for finer-grained selection of what's available.
>> */
>> if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
>>+ u64 msr;
>>+
>> set_cpu_cap(c, X86_FEATURE_IBRS);
>> set_cpu_cap(c, X86_FEATURE_IBPB);
>> set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
>>+
>>+ /*
>>+ * Clear speculation control settings from a previous kernel
>>+ * run, i.e., kexec.
>>+ */
>>+ rdmsrl(MSR_IA32_SPEC_CTRL, msr);
>>+ if (msr & SPEC_CTRL_MASK)
>>+ wrmsr (MSR_IA32_SPEC_CTRL, msr & ~SPEC_CTRL_MASK);
>
>Yes thats a cleaner approach, except that the late microcode load will
>ruin the MSR:
Root of the original problem is x86_spec_ctrl_current is not the current
value of MSR at bootup.
How about we update x86_spec_ctrl_current before any writes to the MSR?:
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 3e3230cccaa7..68ed52394fd9 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -137,8 +137,18 @@ void __init check_bugs(void)
* have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
* init code as it is not enumerated and depends on the family.
*/
- if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ /*
+ * Previously running software, like kexec for example, may
+ * have some controls turned ON.
+ * Clear them and let the mitigations setup below set them
+ * based on configuration.
+ */
+ this_cpu_write(x86_spec_ctrl_current, x86_spec_ctrl_base);
+ x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK;
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ }
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
@@ -2047,8 +2057,13 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
void x86_spec_ctrl_setup_ap(void)
{
- if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
+ u64 msr;
+
+ rdmsrl(MSR_IA32_SPEC_CTRL, msr);
+ this_cpu_write(x86_spec_ctrl_current, msr);
write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ }
if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
x86_amd_ssb_disable();
On Mon, Nov 28, 2022 at 03:02:19PM -0800, Pawan Gupta wrote:
> Yes thats a cleaner approach, except that the late microcode load will
> ruin the MSR:
>
> microcode_reload_late()
> microcode_check()
> get_cpu_cap()
> init_speculation_control()
Microcode late loading ruins a lot of crap already anyway.
Then I guess we'll have to look for a better place for this on the init
path - I don't want any more hackery in the hw vuln nightmare code.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette