2021-10-01 16:07:53

by Joerg Roedel

[permalink] [raw]
Subject: [PATCH v3 2/4] x86/mm/64: Flush global TLB on boot and AP bringup

From: Joerg Roedel <[email protected]>

The AP bringup code uses the trampoline_pgd page-table, which
establishes global mappings in the user range of the address space.
Flush the global TLB entries after the indentity mappings are removed
so no stale entries remain in the TLB.

Signed-off-by: Joerg Roedel <[email protected]>
---
arch/x86/kernel/head64.c | 15 +++++++++++++++
arch/x86/kernel/head_64.S | 19 ++++++++++++++++++-
2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index de01903c3735..cae21afe0922 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -457,6 +457,19 @@ static void __init copy_bootdata(char *real_mode_data)
sme_unmap_bootdata(real_mode_data);
}

+/*
+ * The __flush_tlb_all() function uses all kinds of state which is not
+ * initialized that early and can not be used here. So the helper below is used
+ * to flush global TLB entries.
+ */
+static void __init early_flush_tlb_global(void)
+{
+ unsigned long cr4 = native_read_cr4();
+
+ native_write_cr4(cr4 ^ X86_CR4_PGE);
+ native_write_cr4(cr4);
+}
+
asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
{
/*
@@ -478,6 +491,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
/* Kill off the identity-map trampoline */
reset_early_page_tables();

+ early_flush_tlb_global();
+
clear_bss();

clear_page(init_top_pgt);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d8b3ebd2bb85..bd4b6ebafdc3 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -166,9 +166,26 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
call sev_verify_cbit
popq %rsi

- /* Switch to new page-table */
+ /*
+ * Switch to new page-table
+ *
+ * For the boot CPU this switches to early_top_pgt which still has the
+ * indentity mappings present. The secondary CPUs will switch to the
+ * init_top_pgt here, away from the trampoline_pgd and unmapping the
+ * indentity mapped ranges.
+ *
+ * Do a global TLB flush after the CR3 switch to make sure the TLB
+ * entries from the identity mapping are flushed.
+ */
movq %rax, %cr3

+ /* Flush global TLB entries - only needed for secondary CPUs */
+ movq %cr4, %rcx
+ movq %rcx, %rax
+ xorq $X86_CR4_PGE, %rcx
+ movq %rcx, %cr4
+ movq %rax, %cr4
+
/* Ensure I am executing from virtual addresses */
movq $1f, %rax
ANNOTATE_RETPOLINE_SAFE
--
2.33.0


2021-10-26 12:58:08

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v3 2/4] x86/mm/64: Flush global TLB on boot and AP bringup

On Fri, Oct 01, 2021 at 05:48:15PM +0200, Joerg Roedel wrote:
> +/*
> + * The __flush_tlb_all() function uses all kinds of state which is not
> + * initialized that early and can not be used here. So the helper below is used
> + * to flush global TLB entries.
> + */
> +static void __init early_flush_tlb_global(void)
> +{
> + unsigned long cr4 = native_read_cr4();
> +
> + native_write_cr4(cr4 ^ X86_CR4_PGE);
> + native_write_cr4(cr4);
> +}

Please make sure now and in the future to avoid such duplication - diff
ontop, for a possible way to do this, at the end.

> asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
> {
> /*
> @@ -478,6 +491,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
> /* Kill off the identity-map trampoline */
> reset_early_page_tables();
>
> + early_flush_tlb_global();
> +
> clear_bss();
>
> clear_page(init_top_pgt);
> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> index d8b3ebd2bb85..bd4b6ebafdc3 100644
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -166,9 +166,26 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
> call sev_verify_cbit
> popq %rsi
>
> - /* Switch to new page-table */
> + /*
> + * Switch to new page-table
> + *
> + * For the boot CPU this switches to early_top_pgt which still has the
> + * indentity mappings present. The secondary CPUs will switch to the
> + * init_top_pgt here, away from the trampoline_pgd and unmapping the

"... unmap the... "

> + * indentity mapped ranges.
> + *
> + * Do a global TLB flush after the CR3 switch to make sure the TLB
> + * entries from the identity mapping are flushed.

Put this comment...

> + */
> movq %rax, %cr3
>
> + /* Flush global TLB entries - only needed for secondary CPUs */

... here instead of this one.

> + movq %cr4, %rcx
> + movq %rcx, %rax
> + xorq $X86_CR4_PGE, %rcx
> + movq %rcx, %cr4
> + movq %rax, %cr4
> +
> /* Ensure I am executing from virtual addresses */
> movq $1f, %rax
> ANNOTATE_RETPOLINE_SAFE
> --

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index b587a9ee9cb2..98fa0a114074 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -261,4 +261,9 @@ extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);

#endif /* !MODULE */

+static inline void __native_tlb_flush_global(unsigned long cr4)
+{
+ native_write_cr4(cr4 ^ X86_CR4_PGE);
+ native_write_cr4(cr4);
+}
#endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 0a93b24d7604..75acb6027a87 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -462,19 +462,6 @@ static void __init copy_bootdata(char *real_mode_data)
sme_unmap_bootdata(real_mode_data);
}

-/*
- * The __flush_tlb_all() function uses all kinds of state which is not
- * initialized that early and can not be used here. So the helper below is used
- * to flush global TLB entries.
- */
-static void __init early_flush_tlb_global(void)
-{
- unsigned long cr4 = native_read_cr4();
-
- native_write_cr4(cr4 ^ X86_CR4_PGE);
- native_write_cr4(cr4);
-}
-
asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
{
/*
@@ -496,7 +483,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
/* Kill off the identity-map trampoline */
reset_early_page_tables();

- early_flush_tlb_global();
+ __native_tlb_flush_global(native_read_cr4());

clear_bss();

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 59ba2968af1b..1e6513f95133 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1148,7 +1148,7 @@ void flush_tlb_one_user(unsigned long addr)
*/
STATIC_NOPV void native_flush_tlb_global(void)
{
- unsigned long cr4, flags;
+ unsigned long flags;

if (static_cpu_has(X86_FEATURE_INVPCID)) {
/*
@@ -1168,11 +1168,7 @@ STATIC_NOPV void native_flush_tlb_global(void)
*/
raw_local_irq_save(flags);

- cr4 = this_cpu_read(cpu_tlbstate.cr4);
- /* toggle PGE */
- native_write_cr4(cr4 ^ X86_CR4_PGE);
- /* write old PGE again and flush TLBs */
- native_write_cr4(cr4);
+ __native_tlb_flush_global(this_cpu_read(cpu_tlbstate.cr4));

raw_local_irq_restore(flags);
}

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2021-10-26 16:38:45

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v3 2/4] x86/mm/64: Flush global TLB on boot and AP bringup

On Tue, Oct 26, 2021 at 11:55:44AM +0200, Borislav Petkov wrote:
> > + movq %cr4, %rcx
> > + movq %rcx, %rax
> > + xorq $X86_CR4_PGE, %rcx
> > + movq %rcx, %cr4
> > + movq %rax, %cr4

Also, I'm wondering if you could compact this even more by defining a
function toggling the PGE bit and calling it from everywhere, even from
asm.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2021-12-02 12:50:20

by Joerg Roedel

[permalink] [raw]
Subject: Re: [PATCH v3 2/4] x86/mm/64: Flush global TLB on boot and AP bringup

On Tue, Oct 26, 2021 at 02:58:44PM +0200, Borislav Petkov wrote:
> On Tue, Oct 26, 2021 at 11:55:44AM +0200, Borislav Petkov wrote:
> > > + movq %cr4, %rcx
> > > + movq %rcx, %rax
> > > + xorq $X86_CR4_PGE, %rcx
> > > + movq %rcx, %cr4
> > > + movq %rax, %cr4
>
> Also, I'm wondering if you could compact this even more by defining a
> function toggling the PGE bit and calling it from everywhere, even from
> asm.

Yeah, that would make sense, but is probably worth its own patch-set.
Unifying this across arch/x86/ needs to touch a couple more places and
needs special care so that the function is safe to call from early asm.

Regards,

--
J?rg R?del
[email protected]

SUSE Software Solutions Germany GmbH
Maxfeldstr. 5
90409 N?rnberg
Germany

(HRB 36809, AG N?rnberg)
Gesch?ftsf?hrer: Ivo Totev


2021-12-02 18:19:06

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH v3 2/4] x86/mm/64: Flush global TLB on boot and AP bringup

On Thu, Dec 02, 2021 at 01:50:05PM +0100, Joerg Roedel wrote:
> Yeah, that would make sense, but is probably worth its own patch-set.
> Unifying this across arch/x86/ needs to touch a couple more places and
> needs special care so that the function is safe to call from early asm.

I'd gladly review a preparatory patchset doing that. The usual strategy
is, cleanup and refactoring first, new features later.

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2021-12-02 21:17:43

by Joerg Roedel

[permalink] [raw]
Subject: Re: [PATCH v3 2/4] x86/mm/64: Flush global TLB on boot and AP bringup

On Thu, Dec 02, 2021 at 07:19:07PM +0100, Borislav Petkov wrote:
> I'd gladly review a preparatory patchset doing that. The usual strategy
> is, cleanup and refactoring first, new features later.

Is that also true for fixes? Because this patch-set actually tries to
fix an issue present in current code and not adding any new feature.

Thanks,

--
J?rg R?del
[email protected]

SUSE Software Solutions Germany GmbH
Maxfeldstr. 5
90409 N?rnberg
Germany

(HRB 36809, AG N?rnberg)
Gesch?ftsf?hrer: Ivo Totev