2023-02-22 22:13:48

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 0/6] x86-64: Remove global variables from boot

This is on top of the parallel boot v10 series.

Remove the global variables initial_gs, initial_stack, and
early_gdt_descr from the 64-bit boot code. The stack, GDT, and GSBASE
can be determined from the CPU number.

Brian Gerst (6):
x86/smpboot: Use CPU number instead of APIC ID for single CPU startup
x86/smpboot: Use current_task to get idle thread
x86/smpboot: Remove initial_stack on 64-bit
x86/smpbppt: Remove early_gdt_descr on 64-bit
x86/smpboot: Remove initial_gs
x86/smpboot: Simplify boot CPU setup

arch/x86/include/asm/processor.h | 6 +-
arch/x86/include/asm/realmode.h | 1 -
arch/x86/include/asm/smp.h | 1 -
arch/x86/kernel/acpi/sleep.c | 5 +-
arch/x86/kernel/asm-offsets.c | 1 +
arch/x86/kernel/head_64.S | 108 ++++++++++++-------------------
arch/x86/kernel/smpboot.c | 6 +-
arch/x86/xen/xen-head.S | 2 +-
kernel/smpboot.c | 2 +-
9 files changed, 53 insertions(+), 79 deletions(-)

--
2.39.2



2023-02-22 22:13:51

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 2/6] x86/smpboot: Use current_task to get idle thread

The idle_threads array is not populated during early boot. Use
current_task instead, which is initialized to init_task for the boot
CPU.

Also simplify start_cpu0(). Since the boot CPU never really goes
offline, the GSBASE is still set up and can be used for per-cpu
accesses.

Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/kernel/asm-offsets.c | 1 +
arch/x86/kernel/head_64.S | 7 ++-----
kernel/smpboot.c | 2 +-
3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 8650f29387e0..445bce086717 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -114,6 +114,7 @@ static void __used common(void)
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack);
+ OFFSET(X86_current_task, pcpu_hot, current_task);
#ifdef CONFIG_CALL_DEPTH_TRACKING
OFFSET(X86_call_depth, pcpu_hot, call_depth);
#endif
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index c1253aa737ca..c32e5b06a9ce 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -315,7 +315,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
movq %rcx, early_gdt_descr_base(%rip)

/* Find the idle task stack */
- movq idle_threads(%rbx), %rcx
+ movq pcpu_hot + X86_current_task(%rbx), %rcx
movq TASK_threadsp(%rcx), %rcx
movq %rcx, initial_stack(%rip)
#endif /* CONFIG_SMP */
@@ -460,12 +460,9 @@ SYM_CODE_END(secondary_startup_64)
SYM_CODE_START(start_cpu0)
ANNOTATE_NOENDBR
UNWIND_HINT_EMPTY
- /* Load the per-cpu base for CPU#0 */
- leaq __per_cpu_offset(%rip), %rbx
- movq (%rbx), %rbx

/* Find the idle task stack */
- movq idle_threads(%rbx), %rcx
+ movq PER_CPU_VAR(pcpu_hot) + X86_current_task, %rcx
movq TASK_threadsp(%rcx), %rsp

jmp .Ljump_to_C_code
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index a18a21dff9bc..2c7396da470c 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -25,7 +25,7 @@
* For the hotplug case we keep the task structs around and reuse
* them.
*/
-DEFINE_PER_CPU(struct task_struct *, idle_threads);
+static DEFINE_PER_CPU(struct task_struct *, idle_threads);

struct task_struct *idle_thread_get(unsigned int cpu)
{
--
2.39.2


2023-02-22 22:13:55

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 4/6] x86/smpbppt: Remove early_gdt_descr on 64-bit

Build the GDT descriptor on the stack instead.

Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/kernel/acpi/sleep.c | 2 --
arch/x86/kernel/head_64.S | 19 +++++++------------
2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 008fda8b1982..6538ddb55f28 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -114,8 +114,6 @@ int x86_acpi_suspend_lowlevel(void)
#else /* CONFIG_64BIT */
#ifdef CONFIG_SMP
current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack);
- early_gdt_descr.address =
- (unsigned long)get_cpu_gdt_rw(smp_processor_id());
initial_gs = per_cpu_offset(smp_processor_id());
/* Force the startup into boot mode */
saved_smpboot_ctrl = xchg(&smpboot_control, 0);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index f7905ba4b992..0dd57d573a0e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -245,7 +245,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
#ifdef CONFIG_SMP
/*
* Is this the boot CPU coming up? If so everything is available
- * in initial_gs and early_gdt_descr.
+ * in initial_gs.
*/
movl smpboot_control(%rip), %edx
testl $STARTUP_SECONDARY, %edx
@@ -313,11 +313,6 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
/* Save it for GS BASE setup */
movq %rbx, initial_gs(%rip)

- /* Calculate the GDT address */
- movq $gdt_page, %rcx
- addq %rbx, %rcx
- movq %rcx, early_gdt_descr_base(%rip)
-
movq %rbx, %rdx
#else
xorl %edx, %edx
@@ -339,7 +334,12 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
* addresses where we're currently running on. We have to do that here
* because in 32bit we couldn't load a 64bit linear address.
*/
- lgdt early_gdt_descr(%rip)
+ subq $16, %rsp
+ movw $(GDT_SIZE-1), (%rsp)
+ leaq gdt_page(%rdx), %rax
+ movq %rax, 2(%rsp)
+ lgdt (%rsp)
+ addq $16, %rsp

/* set up data segments */
xorl %eax,%eax
@@ -754,11 +754,6 @@ SYM_DATA_END(level1_fixmap_pgt)

.data
.align 16
-
-SYM_DATA(early_gdt_descr, .word GDT_ENTRIES*8-1)
-SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page))
-
- .align 16
SYM_DATA(smpboot_control, .long 0)

.align 16
--
2.39.2


2023-02-22 22:13:58

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 3/6] x86/smpboot: Remove initial_stack on 64-bit

Load RSP from current_task->thread.sp instead.

Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/include/asm/processor.h | 6 +++++-
arch/x86/kernel/acpi/sleep.c | 2 +-
arch/x86/kernel/head_64.S | 35 ++++++++++++++++++--------------
arch/x86/xen/xen-head.S | 2 +-
4 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 8d73004e4cac..7f64b69c2b0e 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -647,7 +647,11 @@ static inline void spin_lock_prefetch(const void *x)
#define KSTK_ESP(task) (task_pt_regs(task)->sp)

#else
-#define INIT_THREAD { }
+extern unsigned long __end_init_task[];
+
+#define INIT_THREAD { \
+ .sp = (unsigned long)&__end_init_task - PTREGS_SIZE, \
+}

extern unsigned long KSTK_ESP(struct task_struct *task);

diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 47e75c056cb5..008fda8b1982 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -113,7 +113,7 @@ int x86_acpi_suspend_lowlevel(void)
saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
#ifdef CONFIG_SMP
- initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
+ current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack);
early_gdt_descr.address =
(unsigned long)get_cpu_gdt_rw(smp_processor_id());
initial_gs = per_cpu_offset(smp_processor_id());
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index c32e5b06a9ce..f7905ba4b992 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -62,8 +62,8 @@ SYM_CODE_START_NOALIGN(startup_64)
* tables and then reload them.
*/

- /* Set up the stack for verify_cpu(), similar to initial_stack below */
- leaq (__end_init_task - FRAME_SIZE)(%rip), %rsp
+ /* Set up the stack for verify_cpu() */
+ leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp

leaq _text(%rip), %rdi

@@ -245,11 +245,11 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
#ifdef CONFIG_SMP
/*
* Is this the boot CPU coming up? If so everything is available
- * in initial_gs, initial_stack and early_gdt_descr.
+ * in initial_gs and early_gdt_descr.
*/
movl smpboot_control(%rip), %edx
testl $STARTUP_SECONDARY, %edx
- jz .Lsetup_cpu
+ jz .Linit_cpu0_data

/*
* For parallel boot, the APIC ID is retrieved from CPUID, and then
@@ -302,6 +302,10 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
hlt
jmp 1b

+.Linit_cpu0_data:
+ movq __per_cpu_offset(%rip), %rdx
+ jmp .Lsetup_cpu
+
.Linit_cpu_data:
/* Get the per cpu offset for the given CPU# which is in ECX */
leaq __per_cpu_offset(%rip), %rbx
@@ -314,13 +318,21 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
addq %rbx, %rcx
movq %rcx, early_gdt_descr_base(%rip)

- /* Find the idle task stack */
- movq pcpu_hot + X86_current_task(%rbx), %rcx
- movq TASK_threadsp(%rcx), %rcx
- movq %rcx, initial_stack(%rip)
+ movq %rbx, %rdx
+#else
+ xorl %edx, %edx
#endif /* CONFIG_SMP */

.Lsetup_cpu:
+ /*
+ * Setup a boot time stack - Any secondary CPU will have lost its stack
+ * by now because the cr3-switch above unmaps the real-mode stack
+ *
+ * RDX contains the per-cpu offset
+ */
+ movq pcpu_hot + X86_current_task(%rdx), %rax
+ movq TASK_threadsp(%rax), %rsp
+
/*
* We must switch to a new descriptor in kernel space for the GDT
* because soon the kernel won't have access anymore to the userspace
@@ -355,12 +367,6 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
movl initial_gs+4(%rip),%edx
wrmsr

- /*
- * Setup a boot time stack - Any secondary CPU will have lost its stack
- * by now because the cr3-switch above unmaps the real-mode stack
- */
- movq initial_stack(%rip), %rsp
-
/* Drop the realmode protection. For the boot CPU the pointer is NULL! */
movq trampoline_lock(%rip), %rax
testq %rax, %rax
@@ -517,7 +523,6 @@ SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb)
* The FRAME_SIZE gap is a convention which helps the in-kernel unwinder
* reliably detect the end of the stack.
*/
-SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - FRAME_SIZE)
SYM_DATA(trampoline_lock, .quad 0);
__FINITDATA

diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index e36ea4268bd2..91f7a53519a7 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -49,7 +49,7 @@ SYM_CODE_START(startup_xen)
ANNOTATE_NOENDBR
cld

- mov initial_stack(%rip), %rsp
+ leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp

/* Set up %gs.
*
--
2.39.2


2023-02-22 22:14:02

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 6/6] x86/smpboot: Simplify boot CPU setup

Now that the per-cpu GSBASE, stack, and GDT descriptor can be derived
dynammically by CPU number, the boot CPU can use a fixed CPU number and
take the same path as secondary CPUs.

Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/include/asm/smp.h | 1 -
arch/x86/kernel/head_64.S | 25 +++++++------------------
arch/x86/kernel/smpboot.c | 6 +++---
3 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index b4b29e052b6e..32c9157238c0 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -202,7 +202,6 @@ extern unsigned int smpboot_control;
#endif /* !__ASSEMBLY__ */

/* Control bits for startup_64 */
-#define STARTUP_SECONDARY 0x80000000
#define STARTUP_APICID_CPUID_0B 0x40000000
#define STARTUP_APICID_CPUID_01 0x20000000

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 9ed87ba0609f..949c13b26811 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -235,28 +235,22 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
ANNOTATE_NOENDBR // above

#ifdef CONFIG_SMP
- /* Is this the boot CPU coming up? */
- movl smpboot_control(%rip), %edx
- testl $STARTUP_SECONDARY, %edx
- jz .Linit_cpu0_data
-
/*
* For parallel boot, the APIC ID is retrieved from CPUID, and then
* used to look up the CPU number. For booting a single CPU, the
* CPU number is encoded in smpboot_control.
*
- * Bit 31 STARTUP_SECONDARY flag (checked above)
* Bit 30 STARTUP_APICID_CPUID_0B flag (use CPUID 0x0b)
* Bit 29 STARTUP_APICID_CPUID_01 flag (use CPUID 0x01)
* Bit 0-24 CPU# if STARTUP_APICID_CPUID_xx flags are not set
*/
- testl $STARTUP_APICID_CPUID_0B, %edx
+ movl smpboot_control(%rip), %ecx
+ testl $STARTUP_APICID_CPUID_0B, %ecx
jnz .Luse_cpuid_0b
- testl $STARTUP_APICID_CPUID_01, %edx
+ testl $STARTUP_APICID_CPUID_01, %ecx
jnz .Luse_cpuid_01
- andl $0x0FFFFFFF, %edx
- movl %edx, %ecx
- jmp .Linit_cpu_data
+ andl $0x0FFFFFFF, %ecx
+ jmp .Lsetup_cpu

.Luse_cpuid_01:
mov $0x01, %eax
@@ -277,7 +271,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)

.Lfind_cpunr:
cmpl (%rbx,%rcx,4), %edx
- jz .Linit_cpu_data
+ jz .Lsetup_cpu
inc %ecx
cmpl nr_cpu_ids(%rip), %ecx
jb .Lfind_cpunr
@@ -291,18 +285,13 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
hlt
jmp 1b

-.Linit_cpu0_data:
- movq __per_cpu_offset(%rip), %rdx
- jmp .Lsetup_cpu
-
-.Linit_cpu_data:
+.Lsetup_cpu:
/* Get the per cpu offset for the given CPU# which is in ECX */
movq __per_cpu_offset(,%rcx,8), %rdx
#else
xorl %edx, %edx
#endif /* CONFIG_SMP */

-.Lsetup_cpu:
/*
* Setup a boot time stack - Any secondary CPU will have lost its stack
* by now because the cr3-switch above unmaps the real-mode stack
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e1a2843c2841..c159a5c2df9f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1140,7 +1140,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
initial_stack = idle->thread.sp;
} else if (!do_parallel_bringup) {
- smpboot_control = STARTUP_SECONDARY | cpu;
+ smpboot_control = cpu;
}

/* Enable the espfix hack for this CPU */
@@ -1580,7 +1580,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
*/
if (eax) {
pr_debug("Using CPUID 0xb for parallel CPU startup\n");
- smpboot_control = STARTUP_SECONDARY | STARTUP_APICID_CPUID_0B;
+ smpboot_control = STARTUP_APICID_CPUID_0B;
} else {
pr_info("Disabling parallel bringup because CPUID 0xb looks untrustworthy\n");
do_parallel_bringup = false;
@@ -1588,7 +1588,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
} else if (do_parallel_bringup) {
/* Without X2APIC, what's in CPUID 0x01 should suffice. */
pr_debug("Using CPUID 0x1 for parallel CPU startup\n");
- smpboot_control = STARTUP_SECONDARY | STARTUP_APICID_CPUID_01;
+ smpboot_control = STARTUP_APICID_CPUID_01;
}

if (do_parallel_bringup) {
--
2.39.2


2023-02-22 22:14:04

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 5/6] x86/smpboot: Remove initial_gs

Use the percpu offset directly to set GSBASE.

Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/include/asm/realmode.h | 1 -
arch/x86/kernel/acpi/sleep.c | 1 -
arch/x86/kernel/head_64.S | 34 ++++++++++-----------------------
3 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index f0357cfe2fb0..87e5482acd0d 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -60,7 +60,6 @@ extern struct real_mode_header *real_mode_header;
extern unsigned char real_mode_blob_end[];

extern unsigned long initial_code;
-extern unsigned long initial_gs;
extern unsigned long initial_stack;
#ifdef CONFIG_AMD_MEM_ENCRYPT
extern unsigned long initial_vc_handler;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 6538ddb55f28..214dd4a79860 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -114,7 +114,6 @@ int x86_acpi_suspend_lowlevel(void)
#else /* CONFIG_64BIT */
#ifdef CONFIG_SMP
current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack);
- initial_gs = per_cpu_offset(smp_processor_id());
/* Force the startup into boot mode */
saved_smpboot_ctrl = xchg(&smpboot_control, 0);
#endif
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 0dd57d573a0e..9ed87ba0609f 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -67,18 +67,10 @@ SYM_CODE_START_NOALIGN(startup_64)

leaq _text(%rip), %rdi

- /*
- * initial_gs points to initial fixed_percpu_data struct with storage for
- * the stack protector canary. Global pointer fixups are needed at this
- * stage, so apply them as is done in fixup_pointer(), and initialize %gs
- * such that the canary can be accessed at %gs:40 for subsequent C calls.
- */
+ /* Setup GSBASE to allow stack canary access for C code */
movl $MSR_GS_BASE, %ecx
- movq initial_gs(%rip), %rax
- movq $_text, %rdx
- subq %rdx, %rax
- addq %rdi, %rax
- movq %rax, %rdx
+ leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
+ movl %edx, %eax
shrq $32, %rdx
wrmsr

@@ -243,10 +235,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
ANNOTATE_NOENDBR // above

#ifdef CONFIG_SMP
- /*
- * Is this the boot CPU coming up? If so everything is available
- * in initial_gs.
- */
+ /* Is this the boot CPU coming up? */
movl smpboot_control(%rip), %edx
testl $STARTUP_SECONDARY, %edx
jz .Linit_cpu0_data
@@ -308,12 +297,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)

.Linit_cpu_data:
/* Get the per cpu offset for the given CPU# which is in ECX */
- leaq __per_cpu_offset(%rip), %rbx
- movq (%rbx,%rcx,8), %rbx
- /* Save it for GS BASE setup */
- movq %rbx, initial_gs(%rip)
-
- movq %rbx, %rdx
+ movq __per_cpu_offset(,%rcx,8), %rdx
#else
xorl %edx, %edx
#endif /* CONFIG_SMP */
@@ -363,8 +347,11 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
* the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
- movl initial_gs(%rip),%eax
- movl initial_gs+4(%rip),%edx
+#ifndef CONFIG_SMP
+ leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
+#endif
+ movl %edx, %eax
+ shrq $32, %rdx
wrmsr

/* Drop the realmode protection. For the boot CPU the pointer is NULL! */
@@ -514,7 +501,6 @@ SYM_CODE_END(vc_boot_ghcb)
__REFDATA
.balign 8
SYM_DATA(initial_code, .quad x86_64_start_kernel)
-SYM_DATA(initial_gs, .quad INIT_PER_CPU_VAR(fixed_percpu_data))
#ifdef CONFIG_AMD_MEM_ENCRYPT
SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb)
#endif
--
2.39.2


2023-02-23 06:50:35

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH 4/6] x86/smpbppt: Remove early_gdt_descr on 64-bit

On February 22, 2023 2:12:59 PM PST, Brian Gerst <[email protected]> wrote:
>Build the GDT descriptor on the stack instead.
>
>Signed-off-by: Brian Gerst <[email protected]>
>---
> arch/x86/kernel/acpi/sleep.c | 2 --
> arch/x86/kernel/head_64.S | 19 +++++++------------
> 2 files changed, 7 insertions(+), 14 deletions(-)
>
>diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
>index 008fda8b1982..6538ddb55f28 100644
>--- a/arch/x86/kernel/acpi/sleep.c
>+++ b/arch/x86/kernel/acpi/sleep.c
>@@ -114,8 +114,6 @@ int x86_acpi_suspend_lowlevel(void)
> #else /* CONFIG_64BIT */
> #ifdef CONFIG_SMP
> current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack);
>- early_gdt_descr.address =
>- (unsigned long)get_cpu_gdt_rw(smp_processor_id());
> initial_gs = per_cpu_offset(smp_processor_id());
> /* Force the startup into boot mode */
> saved_smpboot_ctrl = xchg(&smpboot_control, 0);
>diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
>index f7905ba4b992..0dd57d573a0e 100644
>--- a/arch/x86/kernel/head_64.S
>+++ b/arch/x86/kernel/head_64.S
>@@ -245,7 +245,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
> #ifdef CONFIG_SMP
> /*
> * Is this the boot CPU coming up? If so everything is available
>- * in initial_gs and early_gdt_descr.
>+ * in initial_gs.
> */
> movl smpboot_control(%rip), %edx
> testl $STARTUP_SECONDARY, %edx
>@@ -313,11 +313,6 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
> /* Save it for GS BASE setup */
> movq %rbx, initial_gs(%rip)
>
>- /* Calculate the GDT address */
>- movq $gdt_page, %rcx
>- addq %rbx, %rcx
>- movq %rcx, early_gdt_descr_base(%rip)
>-
> movq %rbx, %rdx
> #else
> xorl %edx, %edx
>@@ -339,7 +334,12 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
> * addresses where we're currently running on. We have to do that here
> * because in 32bit we couldn't load a 64bit linear address.
> */
>- lgdt early_gdt_descr(%rip)
>+ subq $16, %rsp
>+ movw $(GDT_SIZE-1), (%rsp)
>+ leaq gdt_page(%rdx), %rax
>+ movq %rax, 2(%rsp)
>+ lgdt (%rsp)
>+ addq $16, %rsp
>
> /* set up data segments */
> xorl %eax,%eax
>@@ -754,11 +754,6 @@ SYM_DATA_END(level1_fixmap_pgt)
>
> .data
> .align 16
>-
>-SYM_DATA(early_gdt_descr, .word GDT_ENTRIES*8-1)
>-SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page))
>-
>- .align 16
> SYM_DATA(smpboot_control, .long 0)
>
> .align 16

This is known to break at least some hypervisors, probably old by now, but...

2023-02-23 08:06:38

by David Woodhouse

[permalink] [raw]
Subject: Re: [PATCH 3/6] x86/smpboot: Remove initial_stack on 64-bit

On Wed, 2023-02-22 at 17:12 -0500, Brian Gerst wrote:
> @@ -245,11 +245,11 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
>  #ifdef CONFIG_SMP
>         /*
>          * Is this the boot CPU coming up? If so everything is available
> -        * in initial_gs, initial_stack and early_gdt_descr.
> +        * in initial_gs and early_gdt_descr.
>          */
>         movl    smpboot_control(%rip), %edx
>         testl   $STARTUP_SECONDARY, %edx
> -       jz      .Lsetup_cpu
> +       jz      .Linit_cpu0_data
>  
>         /*
>          * For parallel boot, the APIC ID is retrieved from CPUID, and then
> @@ -302,6 +302,10 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
>         hlt
>         jmp     1b
>  
> +.Linit_cpu0_data:
> +       movq    __per_cpu_offset(%rip), %rdx
> +       jmp     .Lsetup_cpu
> +

Aren't all CPUs taking this .Linit_cpu0_data path for non-parallel
startup, not just cpu0? I think you want something more like

.Linit_cpuN_data:
orl $0x0fffffff, %edx
leaq __per_cpu_offset(%rip), %rbx
movq (%rbx,%rdx,8), %rdx
jmp .Lsetup_cpu


Attachments:
smime.p7s (5.83 kB)

2023-02-23 08:27:16

by David Woodhouse

[permalink] [raw]
Subject: Re: [PATCH 3/6] x86/smpboot: Remove initial_stack on 64-bit

On Thu, 2023-02-23 at 08:05 +0000, David Woodhouse wrote:
>
>
> Aren't all CPUs taking this .Linit_cpu0_data path for non-parallel
> startup, not just cpu0? I think you want something more like
>
> .Linit_cpuN_data:
> orl $0x0fffffff, %edx
> leaq __per_cpu_offset(%rip), %rbx
> movq (%rbx,%rdx,8), %rdx
> jmp .Lsetup_cpu
>  

Er, nope. Forget that. More coffee...


Attachments:
smime.p7s (5.83 kB)

2023-02-23 12:11:50

by Brian Gerst

[permalink] [raw]
Subject: Re: [PATCH 4/6] x86/smpbppt: Remove early_gdt_descr on 64-bit

On Thu, Feb 23, 2023 at 1:50 AM H. Peter Anvin <[email protected]> wrote:
>
> On February 22, 2023 2:12:59 PM PST, Brian Gerst <[email protected]> wrote:
> >Build the GDT descriptor on the stack instead.
> >
> >Signed-off-by: Brian Gerst <[email protected]>
> >---
> > arch/x86/kernel/acpi/sleep.c | 2 --
> > arch/x86/kernel/head_64.S | 19 +++++++------------
> > 2 files changed, 7 insertions(+), 14 deletions(-)
> >
> >diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
> >index 008fda8b1982..6538ddb55f28 100644
> >--- a/arch/x86/kernel/acpi/sleep.c
> >+++ b/arch/x86/kernel/acpi/sleep.c
> >@@ -114,8 +114,6 @@ int x86_acpi_suspend_lowlevel(void)
> > #else /* CONFIG_64BIT */
> > #ifdef CONFIG_SMP
> > current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack);
> >- early_gdt_descr.address =
> >- (unsigned long)get_cpu_gdt_rw(smp_processor_id());
> > initial_gs = per_cpu_offset(smp_processor_id());
> > /* Force the startup into boot mode */
> > saved_smpboot_ctrl = xchg(&smpboot_control, 0);
> >diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> >index f7905ba4b992..0dd57d573a0e 100644
> >--- a/arch/x86/kernel/head_64.S
> >+++ b/arch/x86/kernel/head_64.S
> >@@ -245,7 +245,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
> > #ifdef CONFIG_SMP
> > /*
> > * Is this the boot CPU coming up? If so everything is available
> >- * in initial_gs and early_gdt_descr.
> >+ * in initial_gs.
> > */
> > movl smpboot_control(%rip), %edx
> > testl $STARTUP_SECONDARY, %edx
> >@@ -313,11 +313,6 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
> > /* Save it for GS BASE setup */
> > movq %rbx, initial_gs(%rip)
> >
> >- /* Calculate the GDT address */
> >- movq $gdt_page, %rcx
> >- addq %rbx, %rcx
> >- movq %rcx, early_gdt_descr_base(%rip)
> >-
> > movq %rbx, %rdx
> > #else
> > xorl %edx, %edx
> >@@ -339,7 +334,12 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
> > * addresses where we're currently running on. We have to do that here
> > * because in 32bit we couldn't load a 64bit linear address.
> > */
> >- lgdt early_gdt_descr(%rip)
> >+ subq $16, %rsp
> >+ movw $(GDT_SIZE-1), (%rsp)
> >+ leaq gdt_page(%rdx), %rax
> >+ movq %rax, 2(%rsp)
> >+ lgdt (%rsp)
> >+ addq $16, %rsp
> >
> > /* set up data segments */
> > xorl %eax,%eax
> >@@ -754,11 +754,6 @@ SYM_DATA_END(level1_fixmap_pgt)
> >
> > .data
> > .align 16
> >-
> >-SYM_DATA(early_gdt_descr, .word GDT_ENTRIES*8-1)
> >-SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page))
> >-
> >- .align 16
> > SYM_DATA(smpboot_control, .long 0)
> >
> > .align 16
>
> This is known to break at least some hypervisors, probably old by now, but...

I remember some ancient versions of Xen stored the address of the
descriptor, instead of the contents. But looking at load_direct_gdt()
and load_fixmap_gdt() which both load a descriptor on the stack, I
don't think that is still a concern.

--
Brian Gerst

2023-02-23 12:37:26

by David Woodhouse

[permalink] [raw]
Subject: Re: [PATCH 6/6] x86/smpboot: Simplify boot CPU setup

On Wed, 2023-02-22 at 17:13 -0500, Brian Gerst wrote:
>
>  /* Control bits for startup_64 */
> -#define STARTUP_SECONDARY      0x80000000
>  #define STARTUP_APICID_CPUID_0B        0x40000000
>  #define STARTUP_APICID_CPUID_01        0x20000000

Might as well change CPUID_01 to 0x80000000 while you're at it?


Attachments:
smime.p7s (5.83 kB)

2023-02-23 13:46:16

by David Woodhouse

[permalink] [raw]
Subject: Re: [PATCH 0/6] x86-64: Remove global variables from boot

On Wed, 2023-02-22 at 17:12 -0500, Brian Gerst wrote:
> This is on top of the parallel boot v10 series.
>
> Remove the global variables initial_gs, initial_stack, and
> early_gdt_descr from the 64-bit boot code.  The stack, GDT, and GSBASE
> can be determined from the CPU number.
>
> Brian Gerst (6):
>   x86/smpboot: Use CPU number instead of APIC ID for single CPU startup
>   x86/smpboot: Use current_task to get idle thread


I think those first two should be folded into the 'x86/smpboot: Support
parallel startup of secondary CPUs' patch rather than follow-on
patches?

>   x86/smpboot: Remove initial_stack on 64-bit
>   x86/smpbppt: Remove early_gdt_descr on 64-bit
>   x86/smpboot: Remove initial_gs
>   x86/smpboot: Simplify boot CPU setup

Those four probably make sense to come separately. For each of them,

Reviewed-by: David Woodhouse <[email protected]>

I've pulled in the v10 series from Usama, squashed the first two as I
suggested, added the last four on top to do some testing:
https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/parallel-6.2-rc8-part1

Rather than overthinking the SoB chain, I've left Usama's signoff as
the last on the original series, and on the assumption that Usama will
continue posting, I'll give them the choice of my R-b or S-o-B on what
are now the final four.


Attachments:
smime.p7s (5.83 kB)

2023-02-23 14:24:59

by Brian Gerst

[permalink] [raw]
Subject: Re: [PATCH 0/6] x86-64: Remove global variables from boot

On Thu, Feb 23, 2023 at 8:44 AM David Woodhouse <[email protected]> wrote:
>
> On Wed, 2023-02-22 at 17:12 -0500, Brian Gerst wrote:
> > This is on top of the parallel boot v10 series.
> >
> > Remove the global variables initial_gs, initial_stack, and
> > early_gdt_descr from the 64-bit boot code. The stack, GDT, and GSBASE
> > can be determined from the CPU number.
> >
> > Brian Gerst (6):
> > x86/smpboot: Use CPU number instead of APIC ID for single CPU startup
> > x86/smpboot: Use current_task to get idle thread
>
>
> I think those first two should be folded into the 'x86/smpboot: Support
> parallel startup of secondary CPUs' patch rather than follow-on
> patches?

Yes, that makes sense.

> > x86/smpboot: Remove initial_stack on 64-bit
> > x86/smpbppt: Remove early_gdt_descr on 64-bit
> > x86/smpboot: Remove initial_gs
> > x86/smpboot: Simplify boot CPU setup
>
> Those four probably make sense to come separately. For each of them,
>
> Reviewed-by: David Woodhouse <[email protected]>
>
> I've pulled in the v10 series from Usama, squashed the first two as I
> suggested, added the last four on top to do some testing:
> https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/parallel-6.2-rc8-part1

Looks good. I noticed a typo in the commit log of the last patch
(dynammically -> dynamically). Can you fix that or should I resend?

Thanks

--
Brian Gerst

2023-02-23 19:17:35

by Usama Arif

[permalink] [raw]
Subject: Re: [External] Re: [PATCH 0/6] x86-64: Remove global variables from boot



On 23/02/2023 14:24, Brian Gerst wrote:
> On Thu, Feb 23, 2023 at 8:44 AM David Woodhouse <[email protected]> wrote:
>>
>> On Wed, 2023-02-22 at 17:12 -0500, Brian Gerst wrote:
>>> This is on top of the parallel boot v10 series.
>>>
>>> Remove the global variables initial_gs, initial_stack, and
>>> early_gdt_descr from the 64-bit boot code. The stack, GDT, and GSBASE
>>> can be determined from the CPU number.
>>>
>>> Brian Gerst (6):
>>> x86/smpboot: Use CPU number instead of APIC ID for single CPU startup
>>> x86/smpboot: Use current_task to get idle thread
>>
>>
>> I think those first two should be folded into the 'x86/smpboot: Support
>> parallel startup of secondary CPUs' patch rather than follow-on
>> patches?
>
> Yes, that makes sense.
>
>>> x86/smpboot: Remove initial_stack on 64-bit
>>> x86/smpbppt: Remove early_gdt_descr on 64-bit
>>> x86/smpboot: Remove initial_gs
>>> x86/smpboot: Simplify boot CPU setup
>>
>> Those four probably make sense to come separately. For each of them,
>>
>> Reviewed-by: David Woodhouse <[email protected]>
>>
>> I've pulled in the v10 series from Usama, squashed the first two as I
>> suggested, added the last four on top to do some testing:
>> https://git.infradead.org/users/dwmw2/linux.git/shortlog/refs/heads/parallel-6.2-rc8-part1
>
> Looks good. I noticed a typo in the commit log of the last patch
> (dynammically -> dynamically). Can you fix that or should I resend?
>
> Thanks
>
> --
> Brian Gerst

I have tested the branch on top of v6.2 release and sent it as v11. Have
also fixed the typo.

Thanks,
Usama