2023-05-08 20:08:18

by Thomas Gleixner

[permalink] [raw]
Subject: [patch v3 31/36] x86/apic: Provide cpu_primary_thread mask

From: Thomas Gleixner <[email protected]>

Make the primary thread tracking CPU mask based in preparation for simpler
handling of parallel bootup.

Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Michael Kelley <[email protected]>


---
arch/x86/include/asm/apic.h | 2 --
arch/x86/include/asm/topology.h | 19 +++++++++++++++----
arch/x86/kernel/apic/apic.c | 20 +++++++++-----------
arch/x86/kernel/smpboot.c | 12 +++---------
4 files changed, 27 insertions(+), 26 deletions(-)
---

--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -506,10 +506,8 @@ extern int default_check_phys_apicid_pre
#endif /* CONFIG_X86_LOCAL_APIC */

#ifdef CONFIG_SMP
-bool apic_id_is_primary_thread(unsigned int id);
void apic_smt_update(void);
#else
-static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
static inline void apic_smt_update(void) { }
#endif

--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -31,9 +31,9 @@
* CONFIG_NUMA.
*/
#include <linux/numa.h>
+#include <linux/cpumask.h>

#ifdef CONFIG_NUMA
-#include <linux/cpumask.h>

#include <asm/mpspec.h>
#include <asm/percpu.h>
@@ -139,9 +139,20 @@ static inline int topology_max_smt_threa
int topology_update_package_map(unsigned int apicid, unsigned int cpu);
int topology_update_die_map(unsigned int dieid, unsigned int cpu);
int topology_phys_to_logical_pkg(unsigned int pkg);
-bool topology_is_primary_thread(unsigned int cpu);
bool topology_smt_supported(void);
-#else
+
+extern struct cpumask __cpu_primary_thread_mask;
+#define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
+
+/**
+ * topology_is_primary_thread - Check whether CPU is the primary SMT thread
+ * @cpu: CPU to check
+ */
+static inline bool topology_is_primary_thread(unsigned int cpu)
+{
+ return cpumask_test_cpu(cpu, cpu_primary_thread_mask);
+}
+#else /* CONFIG_SMP */
#define topology_max_packages() (1)
static inline int
topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; }
@@ -152,7 +163,7 @@ static inline int topology_max_die_per_p
static inline int topology_max_smt_threads(void) { return 1; }
static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
static inline bool topology_smt_supported(void) { return false; }
-#endif
+#endif /* !CONFIG_SMP */

static inline void arch_fix_phys_package_id(int num, u32 slot)
{
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2386,20 +2386,16 @@ bool arch_match_cpu_phys_id(int cpu, u64
}

#ifdef CONFIG_SMP
-/**
- * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
- * @apicid: APIC ID to check
- */
-bool apic_id_is_primary_thread(unsigned int apicid)
+static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
{
- u32 mask;
-
- if (smp_num_siblings == 1)
- return true;
/* Isolate the SMT bit(s) in the APICID and check for 0 */
- mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
- return !(apicid & mask);
+ u32 mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
+
+ if (smp_num_siblings == 1 || !(apicid & mask))
+ cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
}
+#else
+static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
#endif

/*
@@ -2544,6 +2540,8 @@ int generic_processor_info(int apicid, i
set_cpu_present(cpu, true);
num_processors++;

+ cpu_mark_primary_thread(cpu, apicid);
+
return cpu;
}

--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -102,6 +102,9 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map);
DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);

+/* CPUs which are the primary SMT threads */
+struct cpumask __cpu_primary_thread_mask __read_mostly;
+
/* Representing CPUs for which sibling maps can be computed */
static cpumask_var_t cpu_sibling_setup_mask;

@@ -283,15 +286,6 @@ static void notrace start_secondary(void
}

/**
- * topology_is_primary_thread - Check whether CPU is the primary SMT thread
- * @cpu: CPU to check
- */
-bool topology_is_primary_thread(unsigned int cpu)
-{
- return apic_id_is_primary_thread(per_cpu(x86_cpu_to_apicid, cpu));
-}
-
-/**
* topology_smt_supported - Check whether SMT is supported by the CPUs
*/
bool topology_smt_supported(void)



2023-05-24 21:02:18

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [patch v3 31/36] x86/apic: Provide cpu_primary_thread mask

On Mon, May 08, 2023 at 09:44:17PM +0200, Thomas Gleixner wrote:
> From: Thomas Gleixner <[email protected]>
>
> Make the primary thread tracking CPU mask based in preparation for simpler
> handling of parallel bootup.
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> Tested-by: Michael Kelley <[email protected]>
>
>
> ---
> arch/x86/include/asm/apic.h | 2 --
> arch/x86/include/asm/topology.h | 19 +++++++++++++++----
> arch/x86/kernel/apic/apic.c | 20 +++++++++-----------
> arch/x86/kernel/smpboot.c | 12 +++---------
> 4 files changed, 27 insertions(+), 26 deletions(-)
> ---
>

...

> @@ -2386,20 +2386,16 @@ bool arch_match_cpu_phys_id(int cpu, u64
> }
>
> #ifdef CONFIG_SMP
> -/**
> - * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
> - * @apicid: APIC ID to check
> - */
> -bool apic_id_is_primary_thread(unsigned int apicid)
> +static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
> {
> - u32 mask;
> -
> - if (smp_num_siblings == 1)
> - return true;
> /* Isolate the SMT bit(s) in the APICID and check for 0 */
> - mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
> - return !(apicid & mask);
> + u32 mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
> +
> + if (smp_num_siblings == 1 || !(apicid & mask))
> + cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
> }
> +#else
> +static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
> #endif
>
> /*

This patch causes boot regression on TDX guest. The guest crashes on SMP
bring up.

The change makes use of smp_num_siblings earlier than before: the mask get
constructed in acpi_boot_init() codepath. Later on smp_num_siblings gets
updated in detect_ht().

In my setup with 16 vCPUs, smp_num_siblings is 16 before detect_ht() and
set to 1 in detect_ht().

--
Kiryl Shutsemau / Kirill A. Shutemov

2023-05-26 10:40:25

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch v3 31/36] x86/apic: Provide cpu_primary_thread mask

On Wed, May 24 2023 at 23:48, Kirill A. Shutemov wrote:
> On Mon, May 08, 2023 at 09:44:17PM +0200, Thomas Gleixner wrote:
>> #ifdef CONFIG_SMP
>> -/**
>> - * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
>> - * @apicid: APIC ID to check
>> - */
>> -bool apic_id_is_primary_thread(unsigned int apicid)
>> +static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
>> {
>> - u32 mask;
>> -
>> - if (smp_num_siblings == 1)
>> - return true;
>> /* Isolate the SMT bit(s) in the APICID and check for 0 */
>> - mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
>> - return !(apicid & mask);
>> + u32 mask = (1U << (fls(smp_num_siblings) - 1)) - 1;
>> +
>> + if (smp_num_siblings == 1 || !(apicid & mask))
>> + cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
>> }
>> +#else
>> +static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
>> #endif
>>
>> /*
>
> This patch causes boot regression on TDX guest. The guest crashes on SMP
> bring up.

I rather call it a security feature: It makes TDX unbreakably secure.

> The change makes use of smp_num_siblings earlier than before: the mask get
> constructed in acpi_boot_init() codepath. Later on smp_num_siblings gets
> updated in detect_ht().
>
> In my setup with 16 vCPUs, smp_num_siblings is 16 before detect_ht() and
> set to 1 in detect_ht().

early_init_intel(c)
if (detect_extended_topology_early(c) < 0)
detect_ht_early(c);

acpi_boot_init()
....

identify_boot_cpu(c)
detect_ht(c);

Aaargh. That whole CPU identification code is a complete horrorshow.

I'll have a look....



2023-05-27 13:59:35

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch v3 31/36] x86/apic: Provide cpu_primary_thread mask

On Fri, May 26 2023 at 12:14, Thomas Gleixner wrote:
> On Wed, May 24 2023 at 23:48, Kirill A. Shutemov wrote:
>> This patch causes boot regression on TDX guest. The guest crashes on SMP
>> bring up.

The below should fix that. Sigh...

Thanks,

tglx
----
Subject: x86/smp: Initialize cpu_primary_thread_mask late
From: Thomas Gleixner <[email protected]>
Date: Fri, 26 May 2023 21:38:47 +0200

Marking primary threads in the cpumask during early boot is only correct in
certain configurations, but broken e.g. for the legacy hyperthreading
detection.

This is due to the complete mess in the CPUID evaluation code which
initializes smp_num_siblings only half during early init and fixes it up
later when identify_boot_cpu() is invoked.

So using smp_num_siblings before identify_boot_cpu() leads to incorrect
results.

Fixing the early CPU init code to provide the proper data is a larger scale
surgery as the code has dependencies on data structures which are not
initialized during early boot.

Move the initialization of cpu_primary_thread_mask wich depends on
smp_num_siblings being correct to an early initcall so that it is set up
correctly before SMP bringup.

Fixes: f54d4434c281 ("x86/apic: Provide cpu_primary_thread mask")
Reported-by: "Kirill A. Shutemov" <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/kernel/apic/apic.c | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)

--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2398,6 +2398,21 @@ static void cpu_mark_primary_thread(unsi
if (smp_num_siblings == 1 || !(apicid & mask))
cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
}
+
+/*
+ * Due to the utter mess of CPUID evaluation smp_num_siblings is not valid
+ * during early boot. Initialize the primary thread mask before SMP
+ * bringup.
+ */
+static int __init smp_init_primary_thread_mask(void)
+{
+ unsigned int cpu;
+
+ for (cpu = 0; cpu < nr_logical_cpuids; cpu++)
+ cpu_mark_primary_thread(cpu, cpuid_to_apicid[cpu]);
+ return 0;
+}
+early_initcall(smp_init_primary_thread_mask);
#else
static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
#endif
@@ -2544,7 +2559,8 @@ int generic_processor_info(int apicid, i
set_cpu_present(cpu, true);
num_processors++;

- cpu_mark_primary_thread(cpu, apicid);
+ if (system_state >= SYSTEM_BOOTING)
+ cpu_mark_primary_thread(cpu, apicid);

return cpu;
}

2023-05-29 02:45:39

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [patch v3 31/36] x86/apic: Provide cpu_primary_thread mask

On Sat, May 27, 2023 at 03:40:02PM +0200, Thomas Gleixner wrote:
> On Fri, May 26 2023 at 12:14, Thomas Gleixner wrote:
> > On Wed, May 24 2023 at 23:48, Kirill A. Shutemov wrote:
> >> This patch causes boot regression on TDX guest. The guest crashes on SMP
> >> bring up.
>
> The below should fix that. Sigh...

Okay, this gets me fixes the boot for TDX guest:

Tested-by: Kirill A. Shutemov <[email protected]>

But it gets broken again on "x86/smpboot: Implement a bit spinlock to
protect the realmode stack" with

[ 0.554079] .... node #0, CPUs: #1 #2
[ 0.738071] Callback from call_rcu_tasks() invoked.
[ 10.562065] CPU2 failed to report alive state
[ 10.566337] #3
[ 20.570066] CPU3 failed to report alive state
[ 20.574268] #4
...

Notably CPU1 is missing from "failed to report" list. So CPU1 takes the
lock fine, but seems never unlocks it.

Maybe trampoline_lock(%rip) in head_64.S somehow is not the same as
&tr_lock in trampoline_64.S. I donno.

I haven't find the root cause yet. But bypassing locking in
LOAD_REALMODE_ESP makes the issue go away.

I will look more into it.

--
Kiryl Shutsemau / Kirill A. Shutemov

Subject: [tip: smp/core] x86/smp: Initialize cpu_primary_thread_mask late

The following commit has been merged into the smp/core branch of tip:

Commit-ID: 5da80b28bf25c3458c7beb23794ff53622ce7eb4
Gitweb: https://git.kernel.org/tip/5da80b28bf25c3458c7beb23794ff53622ce7eb4
Author: Thomas Gleixner <[email protected]>
AuthorDate: Fri, 26 May 2023 21:38:47 +02:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 29 May 2023 21:31:23 +02:00

x86/smp: Initialize cpu_primary_thread_mask late

Marking primary threads in the cpumask during early boot is only correct in
certain configurations, but broken e.g. for the legacy hyperthreading
detection.

This is due to the complete mess in the CPUID evaluation code which
initializes smp_num_siblings only half during early init and fixes it up
later when identify_boot_cpu() is invoked.

So using smp_num_siblings before identify_boot_cpu() leads to incorrect
results.

Fixing the early CPU init code to provide the proper data is a larger scale
surgery as the code has dependencies on data structures which are not
initialized during early boot.

Move the initialization of cpu_primary_thread_mask wich depends on
smp_num_siblings being correct to an early initcall so that it is set up
correctly before SMP bringup.

Fixes: f54d4434c281 ("x86/apic: Provide cpu_primary_thread mask")
Reported-by: "Kirill A. Shutemov" <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Kirill A. Shutemov <[email protected]>
Link: https://lore.kernel.org/r/87sfbhlwp9.ffs@tglx

---
arch/x86/kernel/apic/apic.c | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 209c505..af49e24 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2398,6 +2398,21 @@ static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
if (smp_num_siblings == 1 || !(apicid & mask))
cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
}
+
+/*
+ * Due to the utter mess of CPUID evaluation smp_num_siblings is not valid
+ * during early boot. Initialize the primary thread mask before SMP
+ * bringup.
+ */
+static int __init smp_init_primary_thread_mask(void)
+{
+ unsigned int cpu;
+
+ for (cpu = 0; cpu < nr_logical_cpuids; cpu++)
+ cpu_mark_primary_thread(cpu, cpuid_to_apicid[cpu]);
+ return 0;
+}
+early_initcall(smp_init_primary_thread_mask);
#else
static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
#endif
@@ -2544,7 +2559,8 @@ int generic_processor_info(int apicid, int version)
set_cpu_present(cpu, true);
num_processors++;

- cpu_mark_primary_thread(cpu, apicid);
+ if (system_state != SYSTEM_BOOTING)
+ cpu_mark_primary_thread(cpu, apicid);

return cpu;
}

2023-05-29 19:49:16

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch v3 31/36] x86/apic: Provide cpu_primary_thread mask

On Mon, May 29 2023 at 05:39, Kirill A. Shutemov wrote:
> On Sat, May 27, 2023 at 03:40:02PM +0200, Thomas Gleixner wrote:
> But it gets broken again on "x86/smpboot: Implement a bit spinlock to
> protect the realmode stack" with
>
> [ 0.554079] .... node #0, CPUs: #1 #2
> [ 0.738071] Callback from call_rcu_tasks() invoked.
> [ 10.562065] CPU2 failed to report alive state
> [ 10.566337] #3
> [ 20.570066] CPU3 failed to report alive state
> [ 20.574268] #4
> ...
>
> Notably CPU1 is missing from "failed to report" list. So CPU1 takes the
> lock fine, but seems never unlocks it.
>
> Maybe trampoline_lock(%rip) in head_64.S somehow is not the same as
> &tr_lock in trampoline_64.S. I donno.

It's definitely the same in the regular startup (16bit mode), but TDX
starts up via:

trampoline_start64
trampoline_compat
LOAD_REALMODE_ESP <- lock

That place cannot work with that LOAD_REALMODE_ESP macro. The untested
below should cure it.

Thanks,

tglx
---
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -37,12 +37,12 @@
.text
.code16

-.macro LOAD_REALMODE_ESP
+.macro LOAD_REALMODE_ESP lock:req
/*
* Make sure only one CPU fiddles with the realmode stack
*/
.Llock_rm\@:
- lock btsl $0, tr_lock
+ lock btsl $0, \lock
jnc 2f
pause
jmp .Llock_rm\@
@@ -63,7 +63,7 @@ SYM_CODE_START(trampoline_start)
mov %ax, %es
mov %ax, %ss

- LOAD_REALMODE_ESP
+ LOAD_REALMODE_ESP tr_lock

call verify_cpu # Verify the cpu supports long mode
testl %eax, %eax # Check for return code
@@ -106,7 +106,7 @@ SYM_CODE_START(sev_es_trampoline_start)
mov %ax, %es
mov %ax, %ss

- LOAD_REALMODE_ESP
+ LOAD_REALMODE_ESP tr_lock

jmp .Lswitch_to_protected
SYM_CODE_END(sev_es_trampoline_start)
@@ -189,7 +189,7 @@ SYM_CODE_START(pa_trampoline_compat)
* In compatibility mode. Prep ESP and DX for startup_32, then disable
* paging and complete the switch to legacy 32-bit mode.
*/
- LOAD_REALMODE_ESP
+ LOAD_REALMODE_ESP pa_tr_lock
movw $__KERNEL_DS, %dx

movl $(CR0_STATE & ~X86_CR0_PG), %eax


2023-05-29 20:40:44

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [patch v3 31/36] x86/apic: Provide cpu_primary_thread mask

On Mon, May 29, 2023 at 09:27:13PM +0200, Thomas Gleixner wrote:
> On Mon, May 29 2023 at 05:39, Kirill A. Shutemov wrote:
> > On Sat, May 27, 2023 at 03:40:02PM +0200, Thomas Gleixner wrote:
> > But it gets broken again on "x86/smpboot: Implement a bit spinlock to
> > protect the realmode stack" with
> >
> > [ 0.554079] .... node #0, CPUs: #1 #2
> > [ 0.738071] Callback from call_rcu_tasks() invoked.
> > [ 10.562065] CPU2 failed to report alive state
> > [ 10.566337] #3
> > [ 20.570066] CPU3 failed to report alive state
> > [ 20.574268] #4
> > ...
> >
> > Notably CPU1 is missing from "failed to report" list. So CPU1 takes the
> > lock fine, but seems never unlocks it.
> >
> > Maybe trampoline_lock(%rip) in head_64.S somehow is not the same as
> > &tr_lock in trampoline_64.S. I donno.
>
> It's definitely the same in the regular startup (16bit mode), but TDX
> starts up via:
>
> trampoline_start64
> trampoline_compat
> LOAD_REALMODE_ESP <- lock
>
> That place cannot work with that LOAD_REALMODE_ESP macro. The untested
> below should cure it.

Yep, works for me.

Aaand the next patch that breaks TDX boot is... <drum roll>

x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it

Disabling parallel bringup helps. I didn't look closer yet. If you have
an idea let me know.

--
Kiryl Shutsemau / Kirill A. Shutemov

2023-05-30 01:50:58

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [patch v3 31/36] x86/apic: Provide cpu_primary_thread mask

On Mon, May 29, 2023 at 11:31:29PM +0300, Kirill A. Shutemov wrote:
> On Mon, May 29, 2023 at 09:27:13PM +0200, Thomas Gleixner wrote:
> > On Mon, May 29 2023 at 05:39, Kirill A. Shutemov wrote:
> > > On Sat, May 27, 2023 at 03:40:02PM +0200, Thomas Gleixner wrote:
> > > But it gets broken again on "x86/smpboot: Implement a bit spinlock to
> > > protect the realmode stack" with
> > >
> > > [ 0.554079] .... node #0, CPUs: #1 #2
> > > [ 0.738071] Callback from call_rcu_tasks() invoked.
> > > [ 10.562065] CPU2 failed to report alive state
> > > [ 10.566337] #3
> > > [ 20.570066] CPU3 failed to report alive state
> > > [ 20.574268] #4
> > > ...
> > >
> > > Notably CPU1 is missing from "failed to report" list. So CPU1 takes the
> > > lock fine, but seems never unlocks it.
> > >
> > > Maybe trampoline_lock(%rip) in head_64.S somehow is not the same as
> > > &tr_lock in trampoline_64.S. I donno.
> >
> > It's definitely the same in the regular startup (16bit mode), but TDX
> > starts up via:
> >
> > trampoline_start64
> > trampoline_compat
> > LOAD_REALMODE_ESP <- lock
> >
> > That place cannot work with that LOAD_REALMODE_ESP macro. The untested
> > below should cure it.
>
> Yep, works for me.
>
> Aaand the next patch that breaks TDX boot is... <drum roll>
>
> x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it
>
> Disabling parallel bringup helps. I didn't look closer yet. If you have
> an idea let me know.

Okay, it crashes around .Lread_apicid due to touching MSRs that trigger #VE.

Looks like the patch had no intention to enable parallel bringup on TDX.

+ * Intel-TDX has a secure RDMSR hypercall, but that needs to be
+ * implemented seperately in the low level startup ASM code.

But CC_ATTR_GUEST_STATE_ENCRYPT that used to filter it out is
SEV-ES-specific thingy and doesn't cover TDX. I don't think we have an
attribute that fits nicely here.

--
Kiryl Shutsemau / Kirill A. Shutemov

2023-05-30 09:34:47

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch v3 31/36] x86/apic: Provide cpu_primary_thread mask

On Mon, May 29 2023 at 23:31, Kirill A. Shutemov wrote:
> Aaand the next patch that breaks TDX boot is... <drum roll>
>
> x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it
>
> Disabling parallel bringup helps. I didn't look closer yet. If you have
> an idea let me know.

So how does TDX end up with actual parallel bringup?

if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
pr_info("Parallel CPU startup disabled due to guest state encryption\n");
return false;
}

It should take that path, no?

Thanks,

tglx


2023-05-30 09:39:54

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch v3 31/36] x86/apic: Provide cpu_primary_thread mask

On Tue, May 30 2023 at 03:54, Kirill A. Shutemov wrote:
> On Mon, May 29, 2023 at 11:31:29PM +0300, Kirill A. Shutemov wrote:
>> Disabling parallel bringup helps. I didn't look closer yet. If you have
>> an idea let me know.
>
> Okay, it crashes around .Lread_apicid due to touching MSRs that trigger #VE.
>
> Looks like the patch had no intention to enable parallel bringup on TDX.
>
> + * Intel-TDX has a secure RDMSR hypercall, but that needs to be
> + * implemented seperately in the low level startup ASM code.
>
> But CC_ATTR_GUEST_STATE_ENCRYPT that used to filter it out is
> SEV-ES-specific thingy and doesn't cover TDX. I don't think we have an
> attribute that fits nicely here.

Bah. That sucks.

2023-05-30 10:42:59

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch v3 31/36] x86/apic: Provide cpu_primary_thread mask

On Tue, May 30 2023 at 11:26, Thomas Gleixner wrote:
> On Tue, May 30 2023 at 03:54, Kirill A. Shutemov wrote:
>> On Mon, May 29, 2023 at 11:31:29PM +0300, Kirill A. Shutemov wrote:
>>> Disabling parallel bringup helps. I didn't look closer yet. If you have
>>> an idea let me know.
>>
>> Okay, it crashes around .Lread_apicid due to touching MSRs that trigger #VE.
>>
>> Looks like the patch had no intention to enable parallel bringup on TDX.
>>
>> + * Intel-TDX has a secure RDMSR hypercall, but that needs to be
>> + * implemented seperately in the low level startup ASM code.
>>
>> But CC_ATTR_GUEST_STATE_ENCRYPT that used to filter it out is
>> SEV-ES-specific thingy and doesn't cover TDX. I don't think we have an
>> attribute that fits nicely here.
>
> Bah. That sucks.

Can we have something consistent in this CC space or needs everything to
be extra magic per CC variant?

2023-05-30 11:11:42

by Thomas Gleixner

[permalink] [raw]
Subject: [patch] x86/realmode: Make stack lock work in trampoline_compat()

The stack locking and stack assignment macro LOAD_REALMODE_ESP fails to
work when invoked from the 64bit trampoline entry point:

trampoline_start64
trampoline_compat
LOAD_REALMODE_ESP <- lock

Accessing tr_lock is only possible from 16bit mode. For the compat entry
point this needs to be pa_tr_lock so that the required relocation entry is
generated. Otherwise it locks the non-relocated address which is
aside of being wrong never cleared in secondary_startup_64() causing all
but the first CPU to get stuck on the lock.

Make the macro take an argument lock_pa which defaults to 0 and rename it
to LOCK_AND_LOAD_REALMODE_ESP to make it clear what this is about.

Fixes: f6f1ae9128d2 ("x86/smpboot: Implement a bit spinlock to protect the realmode stack")
Reported-by: Kirill A. Shutemov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/realmode/rm/trampoline_64.S | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)

--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -37,12 +37,16 @@
.text
.code16

-.macro LOAD_REALMODE_ESP
+.macro LOCK_AND_LOAD_REALMODE_ESP lock_pa=0
/*
* Make sure only one CPU fiddles with the realmode stack
*/
.Llock_rm\@:
+ .if \lock_pa
+ lock btsl $0, pa_tr_lock
+ .else
lock btsl $0, tr_lock
+ .endif
jnc 2f
pause
jmp .Llock_rm\@
@@ -63,7 +67,7 @@ SYM_CODE_START(trampoline_start)
mov %ax, %es
mov %ax, %ss

- LOAD_REALMODE_ESP
+ LOCK_AND_LOAD_REALMODE_ESP

call verify_cpu # Verify the cpu supports long mode
testl %eax, %eax # Check for return code
@@ -106,7 +110,7 @@ SYM_CODE_START(sev_es_trampoline_start)
mov %ax, %es
mov %ax, %ss

- LOAD_REALMODE_ESP
+ LOCK_AND_LOAD_REALMODE_ESP

jmp .Lswitch_to_protected
SYM_CODE_END(sev_es_trampoline_start)
@@ -189,7 +193,7 @@ SYM_CODE_START(pa_trampoline_compat)
* In compatibility mode. Prep ESP and DX for startup_32, then disable
* paging and complete the switch to legacy 32-bit mode.
*/
- LOAD_REALMODE_ESP
+ LOCK_AND_LOAD_REALMODE_ESP lock_pa=1
movw $__KERNEL_DS, %dx

movl $(CR0_STATE & ~X86_CR0_PG), %eax

2023-05-30 11:44:37

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [patch] x86/realmode: Make stack lock work in trampoline_compat()

On Tue, May 30, 2023 at 12:46:22PM +0200, Thomas Gleixner wrote:
> The stack locking and stack assignment macro LOAD_REALMODE_ESP fails to
> work when invoked from the 64bit trampoline entry point:
>
> trampoline_start64
> trampoline_compat
> LOAD_REALMODE_ESP <- lock
>
> Accessing tr_lock is only possible from 16bit mode. For the compat entry
> point this needs to be pa_tr_lock so that the required relocation entry is
> generated. Otherwise it locks the non-relocated address which is
> aside of being wrong never cleared in secondary_startup_64() causing all
> but the first CPU to get stuck on the lock.
>
> Make the macro take an argument lock_pa which defaults to 0 and rename it
> to LOCK_AND_LOAD_REALMODE_ESP to make it clear what this is about.
>
> Fixes: f6f1ae9128d2 ("x86/smpboot: Implement a bit spinlock to protect the realmode stack")
> Reported-by: Kirill A. Shutemov <[email protected]>
> Signed-off-by: Thomas Gleixner <[email protected]>

Tested-by: Kirill A. Shutemov <[email protected]>

--
Kiryl Shutsemau / Kirill A. Shutemov

2023-05-30 11:59:51

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [patch v3 31/36] x86/apic: Provide cpu_primary_thread mask

On Tue, May 30, 2023 at 12:34:45PM +0200, Thomas Gleixner wrote:
> On Tue, May 30 2023 at 11:26, Thomas Gleixner wrote:
> > On Tue, May 30 2023 at 03:54, Kirill A. Shutemov wrote:
> >> On Mon, May 29, 2023 at 11:31:29PM +0300, Kirill A. Shutemov wrote:
> >>> Disabling parallel bringup helps. I didn't look closer yet. If you have
> >>> an idea let me know.
> >>
> >> Okay, it crashes around .Lread_apicid due to touching MSRs that trigger #VE.
> >>
> >> Looks like the patch had no intention to enable parallel bringup on TDX.
> >>
> >> + * Intel-TDX has a secure RDMSR hypercall, but that needs to be
> >> + * implemented seperately in the low level startup ASM code.
> >>
> >> But CC_ATTR_GUEST_STATE_ENCRYPT that used to filter it out is
> >> SEV-ES-specific thingy and doesn't cover TDX. I don't think we have an
> >> attribute that fits nicely here.
> >
> > Bah. That sucks.
>
> Can we have something consistent in this CC space or needs everything to
> be extra magic per CC variant?

IIUC, CC_ATTR_GUEST_MEM_ENCRYPT should cover all AMD SEV flavours and
Intel TDX. But the name is confusing in this context: memory encryption
has nothing to do with the APIC.

--
Kiryl Shutsemau / Kirill A. Shutemov

2023-05-30 12:15:12

by Thomas Gleixner

[permalink] [raw]
Subject: [patch] x86/smpboot: Disable parallel bootup if cc_vendor != NONE

The decision to allow parallel bringup of secondary CPUs checks
CC_ATTR_GUEST_STATE_ENCRYPT to detect encrypted guests. Those cannot use
parallel bootup because accessing the local APIC is intercepted and raises
a #VC or #VE, which cannot be handled at that point.

The check works correctly, but only for AMD encrypted guests. TDX does not
set that flag.

Check for cc_vendor != CC_VENDOR_NONE instead. That might be overbroad, but
definitely works for both AMD and Intel.

Fixes: 0c7ffa32dbd6 ("x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it")
Reported-by: Kirill A. Shutemov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/kernel/smpboot.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1282,7 +1282,7 @@ bool __init arch_cpuhp_init_parallel_bri
* Intel-TDX has a secure RDMSR hypercall, but that needs to be
* implemented seperately in the low level startup ASM code.
*/
- if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
+ if (cc_get_vendor() != CC_VENDOR_NONE) {
pr_info("Parallel CPU startup disabled due to guest state encryption\n");
return false;
}

Subject: [tip: smp/core] x86/realmode: Make stack lock work in trampoline_compat()

The following commit has been merged into the smp/core branch of tip:

Commit-ID: 33e20b07bec4991c169e3c6ff28c2126583724fc
Gitweb: https://git.kernel.org/tip/33e20b07bec4991c169e3c6ff28c2126583724fc
Author: Thomas Gleixner <[email protected]>
AuthorDate: Tue, 30 May 2023 12:46:22 +02:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Tue, 30 May 2023 14:11:47 +02:00

x86/realmode: Make stack lock work in trampoline_compat()

The stack locking and stack assignment macro LOAD_REALMODE_ESP fails to
work when invoked from the 64bit trampoline entry point:

trampoline_start64
trampoline_compat
LOAD_REALMODE_ESP <- lock

Accessing tr_lock is only possible from 16bit mode. For the compat entry
point this needs to be pa_tr_lock so that the required relocation entry is
generated. Otherwise it locks the non-relocated address which is
aside of being wrong never cleared in secondary_startup_64() causing all
but the first CPU to get stuck on the lock.

Make the macro take an argument lock_pa which defaults to 0 and rename it
to LOCK_AND_LOAD_REALMODE_ESP to make it clear what this is about.

Fixes: f6f1ae9128d2 ("x86/smpboot: Implement a bit spinlock to protect the realmode stack")
Reported-by: Kirill A. Shutemov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Kirill A. Shutemov <[email protected]>
Link: https://lore.kernel.org/r/87h6rujdvl.ffs@tglx
---
arch/x86/realmode/rm/trampoline_64.S | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index 4822ad2..c9f76fa 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -37,12 +37,16 @@
.text
.code16

-.macro LOAD_REALMODE_ESP
+.macro LOCK_AND_LOAD_REALMODE_ESP lock_pa=0
/*
* Make sure only one CPU fiddles with the realmode stack
*/
.Llock_rm\@:
+ .if \lock_pa
+ lock btsl $0, pa_tr_lock
+ .else
lock btsl $0, tr_lock
+ .endif
jnc 2f
pause
jmp .Llock_rm\@
@@ -63,7 +67,7 @@ SYM_CODE_START(trampoline_start)
mov %ax, %es
mov %ax, %ss

- LOAD_REALMODE_ESP
+ LOCK_AND_LOAD_REALMODE_ESP

call verify_cpu # Verify the cpu supports long mode
testl %eax, %eax # Check for return code
@@ -106,7 +110,7 @@ SYM_CODE_START(sev_es_trampoline_start)
mov %ax, %es
mov %ax, %ss

- LOAD_REALMODE_ESP
+ LOCK_AND_LOAD_REALMODE_ESP

jmp .Lswitch_to_protected
SYM_CODE_END(sev_es_trampoline_start)
@@ -189,7 +193,7 @@ SYM_CODE_START(pa_trampoline_compat)
* In compatibility mode. Prep ESP and DX for startup_32, then disable
* paging and complete the switch to legacy 32-bit mode.
*/
- LOAD_REALMODE_ESP
+ LOCK_AND_LOAD_REALMODE_ESP lock_pa=1
movw $__KERNEL_DS, %dx

movl $(CR0_STATE & ~X86_CR0_PG), %eax

2023-05-30 12:32:28

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [patch] x86/smpboot: Disable parallel bootup if cc_vendor != NONE

On Tue, May 30, 2023 at 02:09:17PM +0200, Thomas Gleixner wrote:
> The decision to allow parallel bringup of secondary CPUs checks
> CC_ATTR_GUEST_STATE_ENCRYPT to detect encrypted guests. Those cannot use
> parallel bootup because accessing the local APIC is intercepted and raises
> a #VC or #VE, which cannot be handled at that point.
>
> The check works correctly, but only for AMD encrypted guests. TDX does not
> set that flag.
>
> Check for cc_vendor != CC_VENDOR_NONE instead. That might be overbroad, but
> definitely works for both AMD and Intel.

It boots fine with TDX, but I think it is wrong. cc_get_vendor() will
report CC_VENDOR_AMD even on bare metal if SME is enabled. I don't think
we want it.

--
Kiryl Shutsemau / Kirill A. Shutemov

2023-05-30 16:24:37

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch] x86/smpboot: Disable parallel bootup if cc_vendor != NONE

On Tue, May 30 2023 at 15:29, Kirill A. Shutemov wrote:
> On Tue, May 30, 2023 at 02:09:17PM +0200, Thomas Gleixner wrote:
>> The decision to allow parallel bringup of secondary CPUs checks
>> CC_ATTR_GUEST_STATE_ENCRYPT to detect encrypted guests. Those cannot use
>> parallel bootup because accessing the local APIC is intercepted and raises
>> a #VC or #VE, which cannot be handled at that point.
>>
>> The check works correctly, but only for AMD encrypted guests. TDX does not
>> set that flag.
>>
>> Check for cc_vendor != CC_VENDOR_NONE instead. That might be overbroad, but
>> definitely works for both AMD and Intel.
>
> It boots fine with TDX, but I think it is wrong. cc_get_vendor() will
> report CC_VENDOR_AMD even on bare metal if SME is enabled. I don't think
> we want it.

Right. Did not think about that.

But the same way is CC_ATTR_GUEST_MEM_ENCRYPT overbroad for AMD. Only
SEV-ES traps RDMSR if I'm understandig that maze correctly.

Thanks,

tglx

2023-05-30 17:12:03

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [patch] x86/smpboot: Disable parallel bootup if cc_vendor != NONE

On Tue, May 30, 2023 at 06:00:46PM +0200, Thomas Gleixner wrote:
> On Tue, May 30 2023 at 15:29, Kirill A. Shutemov wrote:
> > On Tue, May 30, 2023 at 02:09:17PM +0200, Thomas Gleixner wrote:
> >> The decision to allow parallel bringup of secondary CPUs checks
> >> CC_ATTR_GUEST_STATE_ENCRYPT to detect encrypted guests. Those cannot use
> >> parallel bootup because accessing the local APIC is intercepted and raises
> >> a #VC or #VE, which cannot be handled at that point.
> >>
> >> The check works correctly, but only for AMD encrypted guests. TDX does not
> >> set that flag.
> >>
> >> Check for cc_vendor != CC_VENDOR_NONE instead. That might be overbroad, but
> >> definitely works for both AMD and Intel.
> >
> > It boots fine with TDX, but I think it is wrong. cc_get_vendor() will
> > report CC_VENDOR_AMD even on bare metal if SME is enabled. I don't think
> > we want it.
>
> Right. Did not think about that.
>
> But the same way is CC_ATTR_GUEST_MEM_ENCRYPT overbroad for AMD. Only
> SEV-ES traps RDMSR if I'm understandig that maze correctly.

I don't know difference between SEV flavours that well.

I see there's that on SEV-SNP access to x2APIC MSR range (MSR 0x800-0x8FF)
is intercepted regardless if MSR_AMD64_SNP_ALT_INJ feature is present. But
I'm not sure what the state on SEV or SEV-ES.

Tom?

--
Kiryl Shutsemau / Kirill A. Shutemov

2023-05-30 17:14:09

by Sean Christopherson

[permalink] [raw]
Subject: Re: [patch] x86/smpboot: Disable parallel bootup if cc_vendor != NONE

On Tue, May 30, 2023, Thomas Gleixner wrote:
> On Tue, May 30 2023 at 15:29, Kirill A. Shutemov wrote:
> > On Tue, May 30, 2023 at 02:09:17PM +0200, Thomas Gleixner wrote:
> >> The decision to allow parallel bringup of secondary CPUs checks
> >> CC_ATTR_GUEST_STATE_ENCRYPT to detect encrypted guests. Those cannot use
> >> parallel bootup because accessing the local APIC is intercepted and raises
> >> a #VC or #VE, which cannot be handled at that point.
> >>
> >> The check works correctly, but only for AMD encrypted guests. TDX does not
> >> set that flag.
> >>
> >> Check for cc_vendor != CC_VENDOR_NONE instead. That might be overbroad, but
> >> definitely works for both AMD and Intel.
> >
> > It boots fine with TDX, but I think it is wrong. cc_get_vendor() will
> > report CC_VENDOR_AMD even on bare metal if SME is enabled. I don't think
> > we want it.
>
> Right. Did not think about that.
>
> But the same way is CC_ATTR_GUEST_MEM_ENCRYPT overbroad for AMD. Only
> SEV-ES traps RDMSR if I'm understandig that maze correctly.

Ya, regular SEV doesn't encrypt register state.

2023-05-30 17:35:54

by Sean Christopherson

[permalink] [raw]
Subject: Re: [patch] x86/smpboot: Disable parallel bootup if cc_vendor != NONE

On Tue, May 30, 2023, Kirill A. Shutemov wrote:
> On Tue, May 30, 2023 at 06:00:46PM +0200, Thomas Gleixner wrote:
> > On Tue, May 30 2023 at 15:29, Kirill A. Shutemov wrote:
> > > On Tue, May 30, 2023 at 02:09:17PM +0200, Thomas Gleixner wrote:
> > >> The decision to allow parallel bringup of secondary CPUs checks
> > >> CC_ATTR_GUEST_STATE_ENCRYPT to detect encrypted guests. Those cannot use
> > >> parallel bootup because accessing the local APIC is intercepted and raises
> > >> a #VC or #VE, which cannot be handled at that point.
> > >>
> > >> The check works correctly, but only for AMD encrypted guests. TDX does not
> > >> set that flag.
> > >>
> > >> Check for cc_vendor != CC_VENDOR_NONE instead. That might be overbroad, but
> > >> definitely works for both AMD and Intel.
> > >
> > > It boots fine with TDX, but I think it is wrong. cc_get_vendor() will
> > > report CC_VENDOR_AMD even on bare metal if SME is enabled. I don't think
> > > we want it.
> >
> > Right. Did not think about that.
> >
> > But the same way is CC_ATTR_GUEST_MEM_ENCRYPT overbroad for AMD. Only
> > SEV-ES traps RDMSR if I'm understandig that maze correctly.
>
> I don't know difference between SEV flavours that well.
>
> I see there's that on SEV-SNP access to x2APIC MSR range (MSR 0x800-0x8FF)
> is intercepted regardless if MSR_AMD64_SNP_ALT_INJ feature is present. But
> I'm not sure what the state on SEV or SEV-ES.

With SEV-ES, if the hypervisor intercepts an MSR access, the VM-Exit is instead
morphed to a #VC (except for EFER). The guest needs to do an explicit VMGEXIT
(i.e. a hypercall) to explicitly request MSR emulation (this *can* be done in the
#VC handler, but the guest can also do VMGEXIT directly, e.g. in lieu of a RDMSR).

With regular SEV, VM-Exits aren't reflected into the guest. Register state isn't
encrypted so the hypervisor can emulate MSR accesses (and other instructions)
without needing an explicit hypercall from the guest.

2023-05-30 20:06:02

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch] x86/smpboot: Disable parallel bootup if cc_vendor != NONE

On Tue, May 30 2023 at 09:56, Sean Christopherson wrote:
> On Tue, May 30, 2023, Thomas Gleixner wrote:
>> On Tue, May 30 2023 at 15:29, Kirill A. Shutemov wrote:
>> > On Tue, May 30, 2023 at 02:09:17PM +0200, Thomas Gleixner wrote:
>> >> The decision to allow parallel bringup of secondary CPUs checks
>> >> CC_ATTR_GUEST_STATE_ENCRYPT to detect encrypted guests. Those cannot use
>> >> parallel bootup because accessing the local APIC is intercepted and raises
>> >> a #VC or #VE, which cannot be handled at that point.
>> >>
>> >> The check works correctly, but only for AMD encrypted guests. TDX does not
>> >> set that flag.
>> >>
>> >> Check for cc_vendor != CC_VENDOR_NONE instead. That might be overbroad, but
>> >> definitely works for both AMD and Intel.
>> >
>> > It boots fine with TDX, but I think it is wrong. cc_get_vendor() will
>> > report CC_VENDOR_AMD even on bare metal if SME is enabled. I don't think
>> > we want it.
>>
>> Right. Did not think about that.
>>
>> But the same way is CC_ATTR_GUEST_MEM_ENCRYPT overbroad for AMD. Only
>> SEV-ES traps RDMSR if I'm understandig that maze correctly.
>
> Ya, regular SEV doesn't encrypt register state.

That aside. From a semantical POV making this decision about parallel
bootup based on some magic CC encryption attribute is questionable.

I'm tending to just do the below and make this CC agnostic (except that
I couldn't find the right spot for SEV-ES to clear that flag.)

Thanks,

tglx
---
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -871,5 +871,7 @@ void __init tdx_early_init(void)
x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed;

+ x86_cpuinit.parallel_bringup = false;
+
pr_info("Guest detected\n");
}
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -2,6 +2,7 @@
#ifndef _ASM_X86_PLATFORM_H
#define _ASM_X86_PLATFORM_H

+#include <linux/bits.h>
#include <asm/bootparam.h>

struct ghcb;
@@ -177,11 +178,14 @@ struct x86_init_ops {
* struct x86_cpuinit_ops - platform specific cpu hotplug setups
* @setup_percpu_clockev: set up the per cpu clock event device
* @early_percpu_clock_init: early init of the per cpu clock event device
+ * @fixup_cpu_id: fixup function for cpuinfo_x86::phys_proc_id
+ * @parallel_bringup: Parallel bringup control
*/
struct x86_cpuinit_ops {
void (*setup_percpu_clockev)(void);
void (*early_percpu_clock_init)(void);
void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
+ bool parallel_bringup;
};

struct timespec64;
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1287,6 +1287,11 @@ bool __init arch_cpuhp_init_parallel_bri
return false;
}

+ if (!x86_cpuinit.parallel_bringup) {
+ pr_info("Parallel CPU startup disabled by the platform\n");
+ return false;
+ }
+
smpboot_control = STARTUP_READ_APICID;
pr_debug("Parallel CPU startup enabled: 0x%08x\n", smpboot_control);
return true;
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -126,6 +126,7 @@ struct x86_init_ops x86_init __initdata
struct x86_cpuinit_ops x86_cpuinit = {
.early_percpu_clock_init = x86_init_noop,
.setup_percpu_clockev = setup_secondary_APIC_clock,
+ .parallel_bringup = true,
};

static void default_nmi_init(void) { };

2023-05-30 20:09:39

by Tom Lendacky

[permalink] [raw]
Subject: Re: [patch] x86/smpboot: Disable parallel bootup if cc_vendor != NONE

On 5/30/23 14:51, Thomas Gleixner wrote:
> On Tue, May 30 2023 at 09:56, Sean Christopherson wrote:
>> On Tue, May 30, 2023, Thomas Gleixner wrote:
>>> On Tue, May 30 2023 at 15:29, Kirill A. Shutemov wrote:
>>>> On Tue, May 30, 2023 at 02:09:17PM +0200, Thomas Gleixner wrote:
>>>>> The decision to allow parallel bringup of secondary CPUs checks
>>>>> CC_ATTR_GUEST_STATE_ENCRYPT to detect encrypted guests. Those cannot use
>>>>> parallel bootup because accessing the local APIC is intercepted and raises
>>>>> a #VC or #VE, which cannot be handled at that point.
>>>>>
>>>>> The check works correctly, but only for AMD encrypted guests. TDX does not
>>>>> set that flag.
>>>>>
>>>>> Check for cc_vendor != CC_VENDOR_NONE instead. That might be overbroad, but
>>>>> definitely works for both AMD and Intel.
>>>>
>>>> It boots fine with TDX, but I think it is wrong. cc_get_vendor() will
>>>> report CC_VENDOR_AMD even on bare metal if SME is enabled. I don't think
>>>> we want it.
>>>
>>> Right. Did not think about that.
>>>
>>> But the same way is CC_ATTR_GUEST_MEM_ENCRYPT overbroad for AMD. Only
>>> SEV-ES traps RDMSR if I'm understandig that maze correctly.
>>
>> Ya, regular SEV doesn't encrypt register state.
>
> That aside. From a semantical POV making this decision about parallel
> bootup based on some magic CC encryption attribute is questionable.
>
> I'm tending to just do the below and make this CC agnostic (except that
> I couldn't find the right spot for SEV-ES to clear that flag.)

Maybe in sme_sev_setup_real_mode() in arch/x86/realmode/init.c? You could
clear the flag within the CC_ATTR_GUEST_STATE_ENCRYPT check.

Thanks,
Tom

>
> Thanks,
>
> tglx
> ---
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -871,5 +871,7 @@ void __init tdx_early_init(void)
> x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
> x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed;
>
> + x86_cpuinit.parallel_bringup = false;
> +
> pr_info("Guest detected\n");
> }
> --- a/arch/x86/include/asm/x86_init.h
> +++ b/arch/x86/include/asm/x86_init.h
> @@ -2,6 +2,7 @@
> #ifndef _ASM_X86_PLATFORM_H
> #define _ASM_X86_PLATFORM_H
>
> +#include <linux/bits.h>
> #include <asm/bootparam.h>
>
> struct ghcb;
> @@ -177,11 +178,14 @@ struct x86_init_ops {
> * struct x86_cpuinit_ops - platform specific cpu hotplug setups
> * @setup_percpu_clockev: set up the per cpu clock event device
> * @early_percpu_clock_init: early init of the per cpu clock event device
> + * @fixup_cpu_id: fixup function for cpuinfo_x86::phys_proc_id
> + * @parallel_bringup: Parallel bringup control
> */
> struct x86_cpuinit_ops {
> void (*setup_percpu_clockev)(void);
> void (*early_percpu_clock_init)(void);
> void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
> + bool parallel_bringup;
> };
>
> struct timespec64;
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -1287,6 +1287,11 @@ bool __init arch_cpuhp_init_parallel_bri
> return false;
> }
>
> + if (!x86_cpuinit.parallel_bringup) {
> + pr_info("Parallel CPU startup disabled by the platform\n");
> + return false;
> + }
> +
> smpboot_control = STARTUP_READ_APICID;
> pr_debug("Parallel CPU startup enabled: 0x%08x\n", smpboot_control);
> return true;
> --- a/arch/x86/kernel/x86_init.c
> +++ b/arch/x86/kernel/x86_init.c
> @@ -126,6 +126,7 @@ struct x86_init_ops x86_init __initdata
> struct x86_cpuinit_ops x86_cpuinit = {
> .early_percpu_clock_init = x86_init_noop,
> .setup_percpu_clockev = setup_secondary_APIC_clock,
> + .parallel_bringup = true,
> };
>
> static void default_nmi_init(void) { };

2023-05-30 20:52:36

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch] x86/smpboot: Disable parallel bootup if cc_vendor != NONE

On Tue, May 30 2023 at 15:03, Tom Lendacky wrote:
> On 5/30/23 14:51, Thomas Gleixner wrote:
>> That aside. From a semantical POV making this decision about parallel
>> bootup based on some magic CC encryption attribute is questionable.
>>
>> I'm tending to just do the below and make this CC agnostic (except that
>> I couldn't find the right spot for SEV-ES to clear that flag.)
>
> Maybe in sme_sev_setup_real_mode() in arch/x86/realmode/init.c? You could
> clear the flag within the CC_ATTR_GUEST_STATE_ENCRYPT check.

Eeew.

Can we please have a AMD SEV-ES init specific place and not hijack some
random code which has to check CC_ATTR_GUEST_STATE_ENCRYPT?

Thanks,

tglx

2023-05-30 21:18:16

by Tom Lendacky

[permalink] [raw]
Subject: Re: [patch] x86/smpboot: Disable parallel bootup if cc_vendor != NONE

On 5/30/23 15:39, Thomas Gleixner wrote:
> On Tue, May 30 2023 at 15:03, Tom Lendacky wrote:
>> On 5/30/23 14:51, Thomas Gleixner wrote:
>>> That aside. From a semantical POV making this decision about parallel
>>> bootup based on some magic CC encryption attribute is questionable.
>>>
>>> I'm tending to just do the below and make this CC agnostic (except that
>>> I couldn't find the right spot for SEV-ES to clear that flag.)
>>
>> Maybe in sme_sev_setup_real_mode() in arch/x86/realmode/init.c? You could
>> clear the flag within the CC_ATTR_GUEST_STATE_ENCRYPT check.
>
> Eeew.
>
> Can we please have a AMD SEV-ES init specific place and not hijack some
> random code which has to check CC_ATTR_GUEST_STATE_ENCRYPT?

As long as it's not too early, you could try sme_early_init() in
arch/x86/mm/mem_encrypt_amd.c. Add a check for sev_status &
MSR_AMD64_SEV_ES_ENABLED and clear the flag.

Thanks,
Tom

>
> Thanks,
>
> tglx

2023-05-31 08:24:47

by Thomas Gleixner

[permalink] [raw]
Subject: [patch] x86/smpboot: Fix the parallel bringup decision

The decision to allow parallel bringup of secondary CPUs checks
CC_ATTR_GUEST_STATE_ENCRYPT to detect encrypted guests. Those cannot use
parallel bootup because accessing the local APIC is intercepted and raises
a #VC or #VE, which cannot be handled at that point.

The check works correctly, but only for AMD encrypted guests. TDX does not
set that flag.

As there is no real connection between CC attributes and the inability to
support parallel bringup, replace this with a generic control flag in
x86_cpuinit and let SEV-ES and TDX init code disable it.

Fixes: 0c7ffa32dbd6 ("x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it")
Reported-by: Kirill A. Shutemov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/coco/tdx/tdx.c | 11 +++++++++++
arch/x86/include/asm/x86_init.h | 3 +++
arch/x86/kernel/smpboot.c | 19 ++-----------------
arch/x86/kernel/x86_init.c | 1 +
arch/x86/mm/mem_encrypt_amd.c | 15 +++++++++++++++
5 files changed, 32 insertions(+), 17 deletions(-)

--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -871,5 +871,16 @@ void __init tdx_early_init(void)
x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed;

+ /*
+ * TDX intercepts the RDMSR to read the X2APIC ID in the parallel
+ * bringup low level code. That raises #VE which cannot be handled
+ * there.
+ *
+ * Intel-TDX has a secure RDMSR hypercall, but that needs to be
+ * implemented seperately in the low level startup ASM code.
+ * Until that is in place, disable parallel bringup for TDX.
+ */
+ x86_cpuinit.parallel_bringup = false;
+
pr_info("Guest detected\n");
}
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -177,11 +177,14 @@ struct x86_init_ops {
* struct x86_cpuinit_ops - platform specific cpu hotplug setups
* @setup_percpu_clockev: set up the per cpu clock event device
* @early_percpu_clock_init: early init of the per cpu clock event device
+ * @fixup_cpu_id: fixup function for cpuinfo_x86::phys_proc_id
+ * @parallel_bringup: Parallel bringup control
*/
struct x86_cpuinit_ops {
void (*setup_percpu_clockev)(void);
void (*early_percpu_clock_init)(void);
void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
+ bool parallel_bringup;
};

struct timespec64;
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1267,23 +1267,8 @@ void __init smp_prepare_cpus_common(void
/* Establish whether parallel bringup can be supported. */
bool __init arch_cpuhp_init_parallel_bringup(void)
{
- /*
- * Encrypted guests require special handling. They enforce X2APIC
- * mode but the RDMSR to read the APIC ID is intercepted and raises
- * #VC or #VE which cannot be handled in the early startup code.
- *
- * AMD-SEV does not provide a RDMSR GHCB protocol so the early
- * startup code cannot directly communicate with the secure
- * firmware. The alternative solution to retrieve the APIC ID via
- * CPUID(0xb), which is covered by the GHCB protocol, is not viable
- * either because there is no enforcement of the CPUID(0xb)
- * provided "initial" APIC ID to be the same as the real APIC ID.
- *
- * Intel-TDX has a secure RDMSR hypercall, but that needs to be
- * implemented seperately in the low level startup ASM code.
- */
- if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
- pr_info("Parallel CPU startup disabled due to guest state encryption\n");
+ if (!x86_cpuinit.parallel_bringup) {
+ pr_info("Parallel CPU startup disabled by the platform\n");
return false;
}

--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -126,6 +126,7 @@ struct x86_init_ops x86_init __initdata
struct x86_cpuinit_ops x86_cpuinit = {
.early_percpu_clock_init = x86_init_noop,
.setup_percpu_clockev = setup_secondary_APIC_clock,
+ .parallel_bringup = true,
};

static void default_nmi_init(void) { };
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -501,6 +501,21 @@ void __init sme_early_init(void)
x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish;
x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required;
x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required;
+
+ /*
+ * AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the
+ * parallel bringup low level code. That raises #VC which cannot be
+ * handled there.
+ * It does not provide a RDMSR GHCB protocol so the early startup
+ * code cannot directly communicate with the secure firmware. The
+ * alternative solution to retrieve the APIC ID via CPUID(0xb),
+ * which is covered by the GHCB protocol, is not viable either
+ * because there is no enforcement of the CPUID(0xb) provided
+ * "initial" APIC ID to be the same as the real APIC ID.
+ * Disable parallel bootup.
+ */
+ if (sev_status & MSR_AMD64_SEV_ES_ENABLED)
+ x86_cpuinit.parallel_bringup = false;
}

void __init mem_encrypt_free_decrypted_mem(void)

2023-05-31 11:34:48

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [patch] x86/smpboot: Fix the parallel bringup decision

On Wed, May 31, 2023 at 09:44:26AM +0200, Thomas Gleixner wrote:
> The decision to allow parallel bringup of secondary CPUs checks
> CC_ATTR_GUEST_STATE_ENCRYPT to detect encrypted guests. Those cannot use
> parallel bootup because accessing the local APIC is intercepted and raises
> a #VC or #VE, which cannot be handled at that point.
>
> The check works correctly, but only for AMD encrypted guests. TDX does not
> set that flag.
>
> As there is no real connection between CC attributes and the inability to
> support parallel bringup, replace this with a generic control flag in
> x86_cpuinit and let SEV-ES and TDX init code disable it.
>
> Fixes: 0c7ffa32dbd6 ("x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it")
> Reported-by: Kirill A. Shutemov <[email protected]>
> Signed-off-by: Thomas Gleixner <[email protected]>

Tested-by: Kirill A. Shutemov <[email protected]>

--
Kiryl Shutsemau / Kirill A. Shutemov

2023-05-31 14:49:30

by Tom Lendacky

[permalink] [raw]
Subject: Re: [patch] x86/smpboot: Fix the parallel bringup decision

On 5/31/23 02:44, Thomas Gleixner wrote:
> The decision to allow parallel bringup of secondary CPUs checks
> CC_ATTR_GUEST_STATE_ENCRYPT to detect encrypted guests. Those cannot use
> parallel bootup because accessing the local APIC is intercepted and raises
> a #VC or #VE, which cannot be handled at that point.
>
> The check works correctly, but only for AMD encrypted guests. TDX does not
> set that flag.
>
> As there is no real connection between CC attributes and the inability to
> support parallel bringup, replace this with a generic control flag in
> x86_cpuinit and let SEV-ES and TDX init code disable it.
>
> Fixes: 0c7ffa32dbd6 ("x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it")
> Reported-by: Kirill A. Shutemov <[email protected]>
> Signed-off-by: Thomas Gleixner <[email protected]>

Still works for SEV-ES/SEV-SNP with parallel boot properly disabled.

Tested-by: Tom Lendacky <[email protected]>

> ---
> arch/x86/coco/tdx/tdx.c | 11 +++++++++++
> arch/x86/include/asm/x86_init.h | 3 +++
> arch/x86/kernel/smpboot.c | 19 ++-----------------
> arch/x86/kernel/x86_init.c | 1 +
> arch/x86/mm/mem_encrypt_amd.c | 15 +++++++++++++++
> 5 files changed, 32 insertions(+), 17 deletions(-)
>
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -871,5 +871,16 @@ void __init tdx_early_init(void)
> x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
> x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed;
>
> + /*
> + * TDX intercepts the RDMSR to read the X2APIC ID in the parallel
> + * bringup low level code. That raises #VE which cannot be handled
> + * there.
> + *
> + * Intel-TDX has a secure RDMSR hypercall, but that needs to be
> + * implemented seperately in the low level startup ASM code.
> + * Until that is in place, disable parallel bringup for TDX.
> + */
> + x86_cpuinit.parallel_bringup = false;
> +
> pr_info("Guest detected\n");
> }
> --- a/arch/x86/include/asm/x86_init.h
> +++ b/arch/x86/include/asm/x86_init.h
> @@ -177,11 +177,14 @@ struct x86_init_ops {
> * struct x86_cpuinit_ops - platform specific cpu hotplug setups
> * @setup_percpu_clockev: set up the per cpu clock event device
> * @early_percpu_clock_init: early init of the per cpu clock event device
> + * @fixup_cpu_id: fixup function for cpuinfo_x86::phys_proc_id
> + * @parallel_bringup: Parallel bringup control
> */
> struct x86_cpuinit_ops {
> void (*setup_percpu_clockev)(void);
> void (*early_percpu_clock_init)(void);
> void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
> + bool parallel_bringup;
> };
>
> struct timespec64;
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -1267,23 +1267,8 @@ void __init smp_prepare_cpus_common(void
> /* Establish whether parallel bringup can be supported. */
> bool __init arch_cpuhp_init_parallel_bringup(void)
> {
> - /*
> - * Encrypted guests require special handling. They enforce X2APIC
> - * mode but the RDMSR to read the APIC ID is intercepted and raises
> - * #VC or #VE which cannot be handled in the early startup code.
> - *
> - * AMD-SEV does not provide a RDMSR GHCB protocol so the early
> - * startup code cannot directly communicate with the secure
> - * firmware. The alternative solution to retrieve the APIC ID via
> - * CPUID(0xb), which is covered by the GHCB protocol, is not viable
> - * either because there is no enforcement of the CPUID(0xb)
> - * provided "initial" APIC ID to be the same as the real APIC ID.
> - *
> - * Intel-TDX has a secure RDMSR hypercall, but that needs to be
> - * implemented seperately in the low level startup ASM code.
> - */
> - if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
> - pr_info("Parallel CPU startup disabled due to guest state encryption\n");
> + if (!x86_cpuinit.parallel_bringup) {
> + pr_info("Parallel CPU startup disabled by the platform\n");
> return false;
> }
>
> --- a/arch/x86/kernel/x86_init.c
> +++ b/arch/x86/kernel/x86_init.c
> @@ -126,6 +126,7 @@ struct x86_init_ops x86_init __initdata
> struct x86_cpuinit_ops x86_cpuinit = {
> .early_percpu_clock_init = x86_init_noop,
> .setup_percpu_clockev = setup_secondary_APIC_clock,
> + .parallel_bringup = true,
> };
>
> static void default_nmi_init(void) { };
> --- a/arch/x86/mm/mem_encrypt_amd.c
> +++ b/arch/x86/mm/mem_encrypt_amd.c
> @@ -501,6 +501,21 @@ void __init sme_early_init(void)
> x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish;
> x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required;
> x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required;
> +
> + /*
> + * AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the
> + * parallel bringup low level code. That raises #VC which cannot be
> + * handled there.
> + * It does not provide a RDMSR GHCB protocol so the early startup
> + * code cannot directly communicate with the secure firmware. The
> + * alternative solution to retrieve the APIC ID via CPUID(0xb),
> + * which is covered by the GHCB protocol, is not viable either
> + * because there is no enforcement of the CPUID(0xb) provided
> + * "initial" APIC ID to be the same as the real APIC ID.
> + * Disable parallel bootup.
> + */
> + if (sev_status & MSR_AMD64_SEV_ES_ENABLED)
> + x86_cpuinit.parallel_bringup = false;
> }
>
> void __init mem_encrypt_free_decrypted_mem(void)

Subject: [tip: smp/core] x86/smpboot: Fix the parallel bringup decision

The following commit has been merged into the smp/core branch of tip:

Commit-ID: ff3cfcb0d46adc541283a507560f88b7d7114dbe
Gitweb: https://git.kernel.org/tip/ff3cfcb0d46adc541283a507560f88b7d7114dbe
Author: Thomas Gleixner <[email protected]>
AuthorDate: Wed, 31 May 2023 09:44:26 +02:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Wed, 31 May 2023 16:49:34 +02:00

x86/smpboot: Fix the parallel bringup decision

The decision to allow parallel bringup of secondary CPUs checks
CC_ATTR_GUEST_STATE_ENCRYPT to detect encrypted guests. Those cannot use
parallel bootup because accessing the local APIC is intercepted and raises
a #VC or #VE, which cannot be handled at that point.

The check works correctly, but only for AMD encrypted guests. TDX does not
set that flag.

As there is no real connection between CC attributes and the inability to
support parallel bringup, replace this with a generic control flag in
x86_cpuinit and let SEV-ES and TDX init code disable it.

Fixes: 0c7ffa32dbd6 ("x86/smpboot/64: Implement arch_cpuhp_init_parallel_bringup() and enable it")
Reported-by: Kirill A. Shutemov <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Tom Lendacky <[email protected]>
Tested-by: Kirill A. Shutemov <[email protected]>
Link: https://lore.kernel.org/r/87ilc9gd2d.ffs@tglx

---
arch/x86/coco/tdx/tdx.c | 11 +++++++++++
arch/x86/include/asm/x86_init.h | 3 +++
arch/x86/kernel/smpboot.c | 19 ++-----------------
arch/x86/kernel/x86_init.c | 1 +
arch/x86/mm/mem_encrypt_amd.c | 15 +++++++++++++++
5 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index e146b59..27ce10c 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -871,5 +871,16 @@ void __init tdx_early_init(void)
x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed;

+ /*
+ * TDX intercepts the RDMSR to read the X2APIC ID in the parallel
+ * bringup low level code. That raises #VE which cannot be handled
+ * there.
+ *
+ * Intel-TDX has a secure RDMSR hypercall, but that needs to be
+ * implemented seperately in the low level startup ASM code.
+ * Until that is in place, disable parallel bringup for TDX.
+ */
+ x86_cpuinit.parallel_bringup = false;
+
pr_info("Guest detected\n");
}
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 88085f3..0bf4d73 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -177,11 +177,14 @@ struct x86_init_ops {
* struct x86_cpuinit_ops - platform specific cpu hotplug setups
* @setup_percpu_clockev: set up the per cpu clock event device
* @early_percpu_clock_init: early init of the per cpu clock event device
+ * @fixup_cpu_id: fixup function for cpuinfo_x86::phys_proc_id
+ * @parallel_bringup: Parallel bringup control
*/
struct x86_cpuinit_ops {
void (*setup_percpu_clockev)(void);
void (*early_percpu_clock_init)(void);
void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
+ bool parallel_bringup;
};

struct timespec64;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 660709e..aaa876c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1267,23 +1267,8 @@ void __init smp_prepare_cpus_common(void)
/* Establish whether parallel bringup can be supported. */
bool __init arch_cpuhp_init_parallel_bringup(void)
{
- /*
- * Encrypted guests require special handling. They enforce X2APIC
- * mode but the RDMSR to read the APIC ID is intercepted and raises
- * #VC or #VE which cannot be handled in the early startup code.
- *
- * AMD-SEV does not provide a RDMSR GHCB protocol so the early
- * startup code cannot directly communicate with the secure
- * firmware. The alternative solution to retrieve the APIC ID via
- * CPUID(0xb), which is covered by the GHCB protocol, is not viable
- * either because there is no enforcement of the CPUID(0xb)
- * provided "initial" APIC ID to be the same as the real APIC ID.
- *
- * Intel-TDX has a secure RDMSR hypercall, but that needs to be
- * implemented seperately in the low level startup ASM code.
- */
- if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
- pr_info("Parallel CPU startup disabled due to guest state encryption\n");
+ if (!x86_cpuinit.parallel_bringup) {
+ pr_info("Parallel CPU startup disabled by the platform\n");
return false;
}

diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index d82f4fa..1da4baa 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -126,6 +126,7 @@ struct x86_init_ops x86_init __initdata = {
struct x86_cpuinit_ops x86_cpuinit = {
.early_percpu_clock_init = x86_init_noop,
.setup_percpu_clockev = setup_secondary_APIC_clock,
+ .parallel_bringup = true,
};

static void default_nmi_init(void) { };
diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c
index e0b51c0..4855e5f 100644
--- a/arch/x86/mm/mem_encrypt_amd.c
+++ b/arch/x86/mm/mem_encrypt_amd.c
@@ -501,6 +501,21 @@ void __init sme_early_init(void)
x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish;
x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required;
x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required;
+
+ /*
+ * AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the
+ * parallel bringup low level code. That raises #VC which cannot be
+ * handled there.
+ * It does not provide a RDMSR GHCB protocol so the early startup
+ * code cannot directly communicate with the secure firmware. The
+ * alternative solution to retrieve the APIC ID via CPUID(0xb),
+ * which is covered by the GHCB protocol, is not viable either
+ * because there is no enforcement of the CPUID(0xb) provided
+ * "initial" APIC ID to be the same as the real APIC ID.
+ * Disable parallel bootup.
+ */
+ if (sev_status & MSR_AMD64_SEV_ES_ENABLED)
+ x86_cpuinit.parallel_bringup = false;
}

void __init mem_encrypt_free_decrypted_mem(void)

2023-06-08 23:59:58

by Yunhong Jiang

[permalink] [raw]
Subject: Re: [patch] x86/realmode: Make stack lock work in trampoline_compat()

On Tue, May 30, 2023 at 12:46:22PM +0200, Thomas Gleixner wrote:
> The stack locking and stack assignment macro LOAD_REALMODE_ESP fails to
> work when invoked from the 64bit trampoline entry point:
>
> trampoline_start64
> trampoline_compat
> LOAD_REALMODE_ESP <- lock
One possibly dumb question and hope get some hints. The LOAD_REALMODE_ESP is
defined under .code16 directive and will be used by 32-bit mode caller also. Is
it ok because the instructions there will be same for both 16-bit and 32-bit? I
checked
https://ftp.gnu.org/old-gnu/Manuals/gas-2.9.1/html_chapter/as_16.html#SEC205 and
don't find much information there.


2023-06-09 00:42:42

by Yunhong Jiang

[permalink] [raw]
Subject: Re: [patch] x86/realmode: Make stack lock work in trampoline_compat()

On Fri, Jun 09, 2023 at 12:57:46AM +0100, Andrew Cooper wrote:
> On 09/06/2023 12:34 am, Yunhong Jiang wrote:
> > On Tue, May 30, 2023 at 12:46:22PM +0200, Thomas Gleixner wrote:
> >> The stack locking and stack assignment macro LOAD_REALMODE_ESP fails to
> >> work when invoked from the 64bit trampoline entry point:
> >>
> >> trampoline_start64
> >> trampoline_compat
> >> LOAD_REALMODE_ESP <- lock
> > One possibly dumb question and hope get some hints.
>
> There's a phrase.  "The only dumb question is the one not asked".
>
> If you have this question, there's an excellent chance that someone else
> reading this thread has the same question.
>
> > The LOAD_REALMODE_ESP is
> > defined under .code16 directive and will be used by 32-bit mode caller also. Is
> > it ok because the instructions there will be same for both 16-bit and 32-bit? I
> > checked
> > https://ftp.gnu.org/old-gnu/Manuals/gas-2.9.1/html_chapter/as_16.html#SEC205 and
> > don't find much information there.
>
> The position of the LOAD_REALMODE_ESP .macro itself doesn't matter. 
> It's just some text which gets pasted elsewhere.  Imagine it just the
> same as running the C preprocessor on a file before compiling it.
>
> As you note, some expansions of the macro are in .code16, and some are
> not.  This does result in different bytes being emitted.  The default
> operands size flips between .code16 and .code32, so there will be some
> 0x66 prefixes in one mode, and not in others.
>
> The important point is the l suffix on btsl, which forces it to be long
> (32bit) irrespective of the default operand size.
>
> So yes, it will work, but that's because gas is handling the differing
> encodings automatically based on the default operand size where the
> LOAD_REALMODE_ESP gets expanded.
>
> Hope this helps,
Thank you for the explaination, it's quite clear now.
>
> ~Andrew

2023-06-10 20:11:17

by David Laight

[permalink] [raw]
Subject: RE: [patch] x86/realmode: Make stack lock work in trampoline_compat()

From: Andrew Cooper
> Sent: 09 June 2023 00:58
>
...
> The important point is the l suffix on btsl, which forces it to be long
> (32bit) irrespective of the default operand size.

Does that matter at all?
The 'bit' opcodes (I'm sure 'bts' is 'bit test and set') take
a bit offset from the base address.
This accesses the same bit regardless of the operand size.

The one real issue is that a byte operand will only lock the one byte.
This might be problematic if non-bit locked accesses are also used.
Although it would need to be rather obscure use.
(This may be one of them...)

The only other problem is that btsl always does a locked 32bit
access. If the base address is misaligned this is a misaligned
locked access - problematic if it crosses a cache line boundary.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)