2017-11-16 02:51:24

by H. Peter Anvin

[permalink] [raw]
Subject: Re: x86: CR4 update when IRQs are enabled

On November 15, 2017 3:31:50 PM PST, Nadav Amit <[email protected]> wrote:
>Ping?
>
>Nadav Amit <[email protected]> wrote:
>
>> CC’ing more people, and adding a patch to clarify...
>>
>> Nadav Amit <[email protected]> wrote:
>>
>>> I am puzzled by the comment in tlb_state.cr4 , which says:
>>>
>>> /*
>>> * Access to this CR4 shadow and to H/W CR4 is protected by
>>> * disabling interrupts when modifying either one.
>>> */
>>>
>>> This does not seem to be true and adding a warning to CR4 writes
>when
>>> !irqs_disabled() immediately fires in identify_cpu() and
>>> __mcheck_cpu_init_generic(). While these two are called during boot,
>I think
>>> there other CR4 changes with enabled IRQs, for example, PR_SET_TSC.
>>>
>>> So my question(s): Is the comment correct? Is the current behavior
>correct?
>>
>> So here is what I have in mind. I am not sure whether
>CONFIG_DEBUG_PREEMPT is
>> the right #ifdef. Let me know what you think.
>>
>> -- >8 --
>>
>> Subject: [PATCH] x86: disable IRQs before changing CR4
>>
>> CR4 changes need to be performed while IRQs are disabled in order to
>> update the CR4 shadow and the actual register atomically.
>>
>> Signed-off-by: Nadav Amit <[email protected]>
>> ---
>> arch/x86/include/asm/tlbflush.h | 18 ++++++++++++------
>> arch/x86/kernel/cpu/common.c | 13 ++++++++++++-
>> arch/x86/kernel/cpu/mcheck/mce.c | 3 +++
>> arch/x86/kernel/cpu/mcheck/p5.c | 4 ++++
>> arch/x86/kernel/cpu/mcheck/winchip.c | 3 +++
>> arch/x86/kernel/process.c | 14 ++++++++++++--
>> 6 files changed, 46 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/tlbflush.h
>b/arch/x86/include/asm/tlbflush.h
>> index 50ea3482e1d1..bc70dd1cc7c6 100644
>> --- a/arch/x86/include/asm/tlbflush.h
>> +++ b/arch/x86/include/asm/tlbflush.h
>> @@ -89,6 +89,15 @@ static inline void cr4_init_shadow(void)
>> this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
>> }
>>
>> +static inline void update_cr4(unsigned long cr4)
>> +{
>> +#ifdef CONFIG_DEBUG_PREEMPT
>> + WARN_ON_ONCE(!irqs_disabled());
>> +#endif
>> + this_cpu_write(cpu_tlbstate.cr4, cr4);
>> + __write_cr4(cr4);
>> +}
>> +
>> /* Set in this cpu's CR4. */
>> static inline void cr4_set_bits(unsigned long mask)
>> {
>> @@ -97,8 +106,7 @@ static inline void cr4_set_bits(unsigned long
>mask)
>> cr4 = this_cpu_read(cpu_tlbstate.cr4);
>> if ((cr4 | mask) != cr4) {
>> cr4 |= mask;
>> - this_cpu_write(cpu_tlbstate.cr4, cr4);
>> - __write_cr4(cr4);
>> + update_cr4(cr4);
>> }
>> }
>>
>> @@ -110,8 +118,7 @@ static inline void cr4_clear_bits(unsigned long
>mask)
>> cr4 = this_cpu_read(cpu_tlbstate.cr4);
>> if ((cr4 & ~mask) != cr4) {
>> cr4 &= ~mask;
>> - this_cpu_write(cpu_tlbstate.cr4, cr4);
>> - __write_cr4(cr4);
>> + update_cr4(cr4);
>> }
>> }
>>
>> @@ -121,8 +128,7 @@ static inline void cr4_toggle_bits(unsigned long
>mask)
>>
>> cr4 = this_cpu_read(cpu_tlbstate.cr4);
>> cr4 ^= mask;
>> - this_cpu_write(cpu_tlbstate.cr4, cr4);
>> - __write_cr4(cr4);
>> + update_cr4(cr4);
>> }
>>
>> /* Read the CR4 shadow. */
>> diff --git a/arch/x86/kernel/cpu/common.c
>b/arch/x86/kernel/cpu/common.c
>> index c8b39870f33e..82e6b41fd5e9 100644
>> --- a/arch/x86/kernel/cpu/common.c
>> +++ b/arch/x86/kernel/cpu/common.c
>> @@ -318,6 +318,8 @@ static bool pku_disabled;
>>
>> static __always_inline void setup_pku(struct cpuinfo_x86 *c)
>> {
>> + unsigned long flags;
>> +
>> /* check the boot processor, plus compile options for PKU: */
>> if (!cpu_feature_enabled(X86_FEATURE_PKU))
>> return;
>> @@ -327,7 +329,10 @@ static __always_inline void setup_pku(struct
>cpuinfo_x86 *c)
>> if (pku_disabled)
>> return;
>>
>> + local_irq_save(flags);
>> cr4_set_bits(X86_CR4_PKE);
>> + local_irq_restore(flags);
>> +
>> /*
>> * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
>> * cpuid bit to be set. We need to ensure that we
>> @@ -1069,6 +1074,7 @@ static void validate_apic_and_package_id(struct
>cpuinfo_x86 *c)
>> */
>> static void identify_cpu(struct cpuinfo_x86 *c)
>> {
>> + unsigned long flags;
>> int i;
>>
>> c->loops_per_jiffy = loops_per_jiffy;
>> @@ -1121,9 +1127,14 @@ static void identify_cpu(struct cpuinfo_x86
>*c)
>> /* Disable the PN if appropriate */
>> squash_the_stupid_serial_number(c);
>>
>> - /* Set up SMEP/SMAP */
>> + /*
>> + * Set up SMEP/SMAP. Disable interrupts to prevent triggering a
>warning
>> + * as CR4 changes must be done with disabled interrupts.
>> + */
>> + local_irq_save(flags);
>> setup_smep(c);
>> setup_smap(c);
>> + local_irq_restore(flags);
>>
>> /*
>> * The vendor-specific functions might have changed features.
>> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c
>b/arch/x86/kernel/cpu/mcheck/mce.c
>> index 3b413065c613..497c07e33c25 100644
>> --- a/arch/x86/kernel/cpu/mcheck/mce.c
>> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
>> @@ -1508,6 +1508,7 @@ static void __mcheck_cpu_init_generic(void)
>> {
>> enum mcp_flags m_fl = 0;
>> mce_banks_t all_banks;
>> + unsigned long flags;
>> u64 cap;
>>
>> if (!mca_cfg.bootlog)
>> @@ -1519,7 +1520,9 @@ static void __mcheck_cpu_init_generic(void)
>> bitmap_fill(all_banks, MAX_NR_BANKS);
>> machine_check_poll(MCP_UC | m_fl, &all_banks);
>>
>> + local_irq_save(flags);
>> cr4_set_bits(X86_CR4_MCE);
>> + local_irq_restore(flags);
>>
>> rdmsrl(MSR_IA32_MCG_CAP, cap);
>> if (cap & MCG_CTL_P)
>> diff --git a/arch/x86/kernel/cpu/mcheck/p5.c
>b/arch/x86/kernel/cpu/mcheck/p5.c
>> index 2a0717bf8033..d5d4963415e9 100644
>> --- a/arch/x86/kernel/cpu/mcheck/p5.c
>> +++ b/arch/x86/kernel/cpu/mcheck/p5.c
>> @@ -42,6 +42,7 @@ static void pentium_machine_check(struct pt_regs
>*regs, long error_code)
>> /* Set up machine check reporting for processors with Intel style
>MCE: */
>> void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
>> {
>> + unsigned long flags;
>> u32 l, h;
>>
>> /* Default P5 to off as its often misconnected: */
>> @@ -62,7 +63,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
>> pr_info("Intel old style machine check architecture supported.\n");
>>
>> /* Enable MCE: */
>> + local_irq_save(flags);
>> cr4_set_bits(X86_CR4_MCE);
>> + local_irq_restore(flags);
>> +
>> pr_info("Intel old style machine check reporting enabled on
>CPU#%d.\n",
>> smp_processor_id());
>> }
>> diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c
>b/arch/x86/kernel/cpu/mcheck/winchip.c
>> index c6a722e1d011..6dd985e3849d 100644
>> --- a/arch/x86/kernel/cpu/mcheck/winchip.c
>> +++ b/arch/x86/kernel/cpu/mcheck/winchip.c
>> @@ -26,6 +26,7 @@ static void winchip_machine_check(struct pt_regs
>*regs, long error_code)
>> /* Set up machine check reporting on the Winchip C6 series */
>> void winchip_mcheck_init(struct cpuinfo_x86 *c)
>> {
>> + unsigned long flags;
>> u32 lo, hi;
>>
>> machine_check_vector = winchip_machine_check;
>> @@ -37,7 +38,9 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
>> lo &= ~(1<<4); /* Enable MCE */
>> wrmsr(MSR_IDT_FCR1, lo, hi);
>>
>> + local_irq_save(flags);
>> cr4_set_bits(X86_CR4_MCE);
>> + local_irq_restore(flags);
>>
>> pr_info("Winchip machine check reporting enabled on CPU#0.\n");
>> }
>> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
>> index 3ca198080ea9..09e44e784745 100644
>> --- a/arch/x86/kernel/process.c
>> +++ b/arch/x86/kernel/process.c
>> @@ -127,25 +127,35 @@ void flush_thread(void)
>>
>> void disable_TSC(void)
>> {
>> + unsigned long flags;
>> +
>> preempt_disable();
>> - if (!test_and_set_thread_flag(TIF_NOTSC))
>> + if (!test_and_set_thread_flag(TIF_NOTSC)) {
>> /*
>> * Must flip the CPU state synchronously with
>> * TIF_NOTSC in the current running context.
>> */
>> + local_irq_save(flags);
>> cr4_set_bits(X86_CR4_TSD);
>> + local_irq_restore(flags);
>> + }
>> preempt_enable();
>> }
>>
>> static void enable_TSC(void)
>> {
>> + unsigned long flags;
>> +
>> preempt_disable();
>> - if (test_and_clear_thread_flag(TIF_NOTSC))
>> + if (test_and_clear_thread_flag(TIF_NOTSC)) {
>> /*
>> * Must flip the CPU state synchronously with
>> * TIF_NOTSC in the current running context.
>> */
>> + local_irq_save(flags);
>> cr4_clear_bits(X86_CR4_TSD);
>> + local_irq_restore(flags);
>> + }
>> preempt_enable();
>> }

This is wrong on at least two levels colon first of all, this should not be wrapped around the abstracted operations but put inside them if relevant. Second, I suspect that this is not at all a requirement but rather that as long as the hardware register is written second, I think we should always be safe.
--
Sent from my Android device with K-9 Mail. Please excuse my brevity.

From 1584183050742675141@xxx Thu Nov 16 01:11:27 +0000 2017
X-GM-THRID: 1583536401885830151
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread


2017-11-16 01:11:27

by Nadav Amit

[permalink] [raw]
Subject: Re: x86: CR4 update when IRQs are enabled

[email protected] wrote:

> On November 15, 2017 3:31:50 PM PST, Nadav Amit <[email protected]> wrote:
>> Ping?
>>
>> Nadav Amit <[email protected]> wrote:
>>
>>> CC’ing more people, and adding a patch to clarify...
>>>
>>> Nadav Amit <[email protected]> wrote:
>>>
>>>> I am puzzled by the comment in tlb_state.cr4 , which says:
>>>>
>>>> /*
>>>> * Access to this CR4 shadow and to H/W CR4 is protected by
>>>> * disabling interrupts when modifying either one.
>>>> */
>>>>
>>>> This does not seem to be true and adding a warning to CR4 writes
>> when
>>>> !irqs_disabled() immediately fires in identify_cpu() and
>>>> __mcheck_cpu_init_generic(). While these two are called during boot,
>> I think
>>>> there other CR4 changes with enabled IRQs, for example, PR_SET_TSC.
>>>>
>>>> So my question(s): Is the comment correct? Is the current behavior
>> correct?
>>> So here is what I have in mind. I am not sure whether
>> CONFIG_DEBUG_PREEMPT is
>>> the right #ifdef. Let me know what you think.
>>>
>>> -- >8 --
>>>
>>> Subject: [PATCH] x86: disable IRQs before changing CR4
>>>
>>> CR4 changes need to be performed while IRQs are disabled in order to
>>> update the CR4 shadow and the actual register atomically.
>>>
>>> Signed-off-by: Nadav Amit <[email protected]>
>>> ---
>>> arch/x86/include/asm/tlbflush.h | 18 ++++++++++++------
>>> arch/x86/kernel/cpu/common.c | 13 ++++++++++++-
>>> arch/x86/kernel/cpu/mcheck/mce.c | 3 +++
>>> arch/x86/kernel/cpu/mcheck/p5.c | 4 ++++
>>> arch/x86/kernel/cpu/mcheck/winchip.c | 3 +++
>>> arch/x86/kernel/process.c | 14 ++++++++++++--
>>> 6 files changed, 46 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/tlbflush.h
>> b/arch/x86/include/asm/tlbflush.h
>>> index 50ea3482e1d1..bc70dd1cc7c6 100644
>>> --- a/arch/x86/include/asm/tlbflush.h
>>> +++ b/arch/x86/include/asm/tlbflush.h
>>> @@ -89,6 +89,15 @@ static inline void cr4_init_shadow(void)
>>> this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
>>> }
>>>
>>> +static inline void update_cr4(unsigned long cr4)
>>> +{
>>> +#ifdef CONFIG_DEBUG_PREEMPT
>>> + WARN_ON_ONCE(!irqs_disabled());
>>> +#endif
>>> + this_cpu_write(cpu_tlbstate.cr4, cr4);
>>> + __write_cr4(cr4);
>>> +}
>>> +
>>> /* Set in this cpu's CR4. */
>>> static inline void cr4_set_bits(unsigned long mask)
>>> {
>>> @@ -97,8 +106,7 @@ static inline void cr4_set_bits(unsigned long
>> mask)
>>> cr4 = this_cpu_read(cpu_tlbstate.cr4);
>>> if ((cr4 | mask) != cr4) {
>>> cr4 |= mask;
>>> - this_cpu_write(cpu_tlbstate.cr4, cr4);
>>> - __write_cr4(cr4);
>>> + update_cr4(cr4);
>>> }
>>> }
>>>
>>> @@ -110,8 +118,7 @@ static inline void cr4_clear_bits(unsigned long
>> mask)
>>> cr4 = this_cpu_read(cpu_tlbstate.cr4);
>>> if ((cr4 & ~mask) != cr4) {
>>> cr4 &= ~mask;
>>> - this_cpu_write(cpu_tlbstate.cr4, cr4);
>>> - __write_cr4(cr4);
>>> + update_cr4(cr4);
>>> }
>>> }
>>>
>>> @@ -121,8 +128,7 @@ static inline void cr4_toggle_bits(unsigned long
>> mask)
>>> cr4 = this_cpu_read(cpu_tlbstate.cr4);
>>> cr4 ^= mask;
>>> - this_cpu_write(cpu_tlbstate.cr4, cr4);
>>> - __write_cr4(cr4);
>>> + update_cr4(cr4);
>>> }
>>>
>>> /* Read the CR4 shadow. */
>>> diff --git a/arch/x86/kernel/cpu/common.c
>> b/arch/x86/kernel/cpu/common.c
>>> index c8b39870f33e..82e6b41fd5e9 100644
>>> --- a/arch/x86/kernel/cpu/common.c
>>> +++ b/arch/x86/kernel/cpu/common.c
>>> @@ -318,6 +318,8 @@ static bool pku_disabled;
>>>
>>> static __always_inline void setup_pku(struct cpuinfo_x86 *c)
>>> {
>>> + unsigned long flags;
>>> +
>>> /* check the boot processor, plus compile options for PKU: */
>>> if (!cpu_feature_enabled(X86_FEATURE_PKU))
>>> return;
>>> @@ -327,7 +329,10 @@ static __always_inline void setup_pku(struct
>> cpuinfo_x86 *c)
>>> if (pku_disabled)
>>> return;
>>>
>>> + local_irq_save(flags);
>>> cr4_set_bits(X86_CR4_PKE);
>>> + local_irq_restore(flags);
>>> +
>>> /*
>>> * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
>>> * cpuid bit to be set. We need to ensure that we
>>> @@ -1069,6 +1074,7 @@ static void validate_apic_and_package_id(struct
>> cpuinfo_x86 *c)
>>> */
>>> static void identify_cpu(struct cpuinfo_x86 *c)
>>> {
>>> + unsigned long flags;
>>> int i;
>>>
>>> c->loops_per_jiffy = loops_per_jiffy;
>>> @@ -1121,9 +1127,14 @@ static void identify_cpu(struct cpuinfo_x86
>> *c)
>>> /* Disable the PN if appropriate */
>>> squash_the_stupid_serial_number(c);
>>>
>>> - /* Set up SMEP/SMAP */
>>> + /*
>>> + * Set up SMEP/SMAP. Disable interrupts to prevent triggering a
>> warning
>>> + * as CR4 changes must be done with disabled interrupts.
>>> + */
>>> + local_irq_save(flags);
>>> setup_smep(c);
>>> setup_smap(c);
>>> + local_irq_restore(flags);
>>>
>>> /*
>>> * The vendor-specific functions might have changed features.
>>> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c
>> b/arch/x86/kernel/cpu/mcheck/mce.c
>>> index 3b413065c613..497c07e33c25 100644
>>> --- a/arch/x86/kernel/cpu/mcheck/mce.c
>>> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
>>> @@ -1508,6 +1508,7 @@ static void __mcheck_cpu_init_generic(void)
>>> {
>>> enum mcp_flags m_fl = 0;
>>> mce_banks_t all_banks;
>>> + unsigned long flags;
>>> u64 cap;
>>>
>>> if (!mca_cfg.bootlog)
>>> @@ -1519,7 +1520,9 @@ static void __mcheck_cpu_init_generic(void)
>>> bitmap_fill(all_banks, MAX_NR_BANKS);
>>> machine_check_poll(MCP_UC | m_fl, &all_banks);
>>>
>>> + local_irq_save(flags);
>>> cr4_set_bits(X86_CR4_MCE);
>>> + local_irq_restore(flags);
>>>
>>> rdmsrl(MSR_IA32_MCG_CAP, cap);
>>> if (cap & MCG_CTL_P)
>>> diff --git a/arch/x86/kernel/cpu/mcheck/p5.c
>> b/arch/x86/kernel/cpu/mcheck/p5.c
>>> index 2a0717bf8033..d5d4963415e9 100644
>>> --- a/arch/x86/kernel/cpu/mcheck/p5.c
>>> +++ b/arch/x86/kernel/cpu/mcheck/p5.c
>>> @@ -42,6 +42,7 @@ static void pentium_machine_check(struct pt_regs
>> *regs, long error_code)
>>> /* Set up machine check reporting for processors with Intel style
>> MCE: */
>>> void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
>>> {
>>> + unsigned long flags;
>>> u32 l, h;
>>>
>>> /* Default P5 to off as its often misconnected: */
>>> @@ -62,7 +63,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
>>> pr_info("Intel old style machine check architecture supported.\n");
>>>
>>> /* Enable MCE: */
>>> + local_irq_save(flags);
>>> cr4_set_bits(X86_CR4_MCE);
>>> + local_irq_restore(flags);
>>> +
>>> pr_info("Intel old style machine check reporting enabled on
>> CPU#%d.\n",
>>> smp_processor_id());
>>> }
>>> diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c
>> b/arch/x86/kernel/cpu/mcheck/winchip.c
>>> index c6a722e1d011..6dd985e3849d 100644
>>> --- a/arch/x86/kernel/cpu/mcheck/winchip.c
>>> +++ b/arch/x86/kernel/cpu/mcheck/winchip.c
>>> @@ -26,6 +26,7 @@ static void winchip_machine_check(struct pt_regs
>> *regs, long error_code)
>>> /* Set up machine check reporting on the Winchip C6 series */
>>> void winchip_mcheck_init(struct cpuinfo_x86 *c)
>>> {
>>> + unsigned long flags;
>>> u32 lo, hi;
>>>
>>> machine_check_vector = winchip_machine_check;
>>> @@ -37,7 +38,9 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
>>> lo &= ~(1<<4); /* Enable MCE */
>>> wrmsr(MSR_IDT_FCR1, lo, hi);
>>>
>>> + local_irq_save(flags);
>>> cr4_set_bits(X86_CR4_MCE);
>>> + local_irq_restore(flags);
>>>
>>> pr_info("Winchip machine check reporting enabled on CPU#0.\n");
>>> }
>>> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
>>> index 3ca198080ea9..09e44e784745 100644
>>> --- a/arch/x86/kernel/process.c
>>> +++ b/arch/x86/kernel/process.c
>>> @@ -127,25 +127,35 @@ void flush_thread(void)
>>>
>>> void disable_TSC(void)
>>> {
>>> + unsigned long flags;
>>> +
>>> preempt_disable();
>>> - if (!test_and_set_thread_flag(TIF_NOTSC))
>>> + if (!test_and_set_thread_flag(TIF_NOTSC)) {
>>> /*
>>> * Must flip the CPU state synchronously with
>>> * TIF_NOTSC in the current running context.
>>> */
>>> + local_irq_save(flags);
>>> cr4_set_bits(X86_CR4_TSD);
>>> + local_irq_restore(flags);
>>> + }
>>> preempt_enable();
>>> }
>>>
>>> static void enable_TSC(void)
>>> {
>>> + unsigned long flags;
>>> +
>>> preempt_disable();
>>> - if (test_and_clear_thread_flag(TIF_NOTSC))
>>> + if (test_and_clear_thread_flag(TIF_NOTSC)) {
>>> /*
>>> * Must flip the CPU state synchronously with
>>> * TIF_NOTSC in the current running context.
>>> */
>>> + local_irq_save(flags);
>>> cr4_clear_bits(X86_CR4_TSD);
>>> + local_irq_restore(flags);
>>> + }
>>> preempt_enable();
>>> }
>
> This is wrong on at least two levels colon first of all, this should not be wrapped around the abstracted operations but put inside them if relevant. Second, I suspect that this is not at all a requirement but rather that as long as the hardware register is written second, I think we should always be safe.

Thanks for your reply.

Can you please explain your suspicion? Here is a simple execution that may
fail (assume cr4 is initially zero):

CPU0
====
cr4_set_bits(1)
=> cpu_tlbstate.cr4 = 1
=> interrupt
cr4_set_bits(2)
cpu_tlbstate.cr4 = 3
__write_cr4(3)
=> __write_cr4(1) [ and should have been 3 ]

Am I missing anything?

Now, it may be a theoretical issue right now, since I did not find any
change of CR4 bits that happens in an interrupt handler.

Anyhow, if you want me to submit a fix, please let me know what other levels
of wrongness you had in mind.

Regards,
Nadav


From 1584182499407506098@xxx Thu Nov 16 01:02:42 +0000 2017
X-GM-THRID: 1583536401885830151
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread