2024-04-25 12:09:25

by Ard Biesheuvel

[permalink] [raw]
Subject: [PATCH v2 3/4] x86/boot/64: Determine VA/PA offset before entering C code

From: Ard Biesheuvel <[email protected]>

Implicit absolute symbol references (e.g., taking the address of a
global variable) must be avoided from the C code that runs from the
early 1:1 mapping of the kernel, given that this is a practice that
violates assumptions on the part of the toolchain. I.e., absolute and
RIP-relative references are expected to produce the same result, and so
the compiler is free to choose either, and currently, the code assumes
that RIP-relative references are never emitted here.

So an explicit virtual-to-physical offset will be used instead to derive
the kernel virtual addresses of _text and _end, instead of simply taking
the address and having to rely on such implicit absolute symbol
references.

Currently, phys_base is used for this purpose, which is derived from the
kernel virtual address of _text, and this would lead to a circular
dependency. So instead, derive this virtual-to-physical offset in asm
code, using the kernel VA of common_startup_64 (which we already keep in
a global variable for other reasons), and pass it to the C startup code.

Signed-off-by: Ard Biesheuvel <[email protected]>
---
arch/x86/include/asm/setup.h | 2 +-
arch/x86/kernel/head64.c | 8 +++++---
arch/x86/kernel/head_64.S | 9 ++++++++-
3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index e61e68d71cba..aca18be5a228 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -47,7 +47,7 @@ extern unsigned long saved_video_mode;

extern void reserve_standard_io_resources(void);
extern void i386_reserve_resources(void);
-extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp);
+extern unsigned long __startup_64(unsigned long p2v_offset, struct boot_params *bp);
extern void startup_64_setup_gdt_idt(void);
extern void early_setup_idt(void);
extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index a817ed0724d1..81696a4967e6 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -138,12 +138,14 @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdv
* doesn't have to generate PC-relative relocations when accessing globals from
* that function. Clang actually does not generate them, which leads to
* boot-time crashes. To work around this problem, every global pointer must
- * be accessed using RIP_REL_REF().
+ * be accessed using RIP_REL_REF(). Kernel virtual addresses can be determined
+ * by subtracting p2v_offset from the RIP-relative address.
*/
-unsigned long __head __startup_64(unsigned long physaddr,
+unsigned long __head __startup_64(unsigned long p2v_offset,
struct boot_params *bp)
{
pmd_t (*early_pgts)[PTRS_PER_PMD] = RIP_REL_REF(early_dynamic_pgts);
+ unsigned long physaddr = (unsigned long)&RIP_REL_REF(_text);
unsigned long pgtable_flags;
unsigned long load_delta;
pgdval_t *pgd;
@@ -163,7 +165,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
* Compute the delta between the address I am compiled to run at
* and the address I am actually running at.
*/
- load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map);
+ load_delta = __START_KERNEL_map + p2v_offset;
RIP_REL_REF(phys_base) = load_delta;

/* Is the address not 2M aligned? */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d8198fbd70e5..cb7efb3628ef 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -100,13 +100,20 @@ SYM_CODE_START_NOALIGN(startup_64)
/* Sanitize CPU configuration */
call verify_cpu

+ /*
+ * Use the 1:1 physical and kernel virtual addresses of
+ * common_startup_64 to determine the physical-to-virtual offset, and
+ * pass it as the first argument to __startup_64().
+ */
+ leaq common_startup_64(%rip), %rdi
+ subq 0f(%rip), %rdi
+
/*
* Perform pagetable fixups. Additionally, if SME is active, encrypt
* the kernel and retrieve the modifier (SME encryption mask if SME
* is active) to be added to the initial pgdir entry that will be
* programmed into CR3.
*/
- leaq _text(%rip), %rdi
movq %r15, %rsi
call __startup_64

--
2.44.0.769.g3c40516874-goog