2023-10-23 21:18:07

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 0/9] x86-64: Stack protector and percpu improvements

Currently, x86-64 uses an unusual per-cpu layout, where the percpu section
is linked at absolute address 0. The reason behind this is that older GCC
versions placed the stack protector (if enabled) at a fixed offset from the
GS segment base. Since the GS segement is also used for percpu variables,
this forced the current layout.

GCC since version 8.1 supports a configurable location for the stack
protector value, which allows removal of the restriction on how the percpu
section is linked. This allows the percpu section to be linked
normally, like most other architectures. In turn, this allows removal
of code that was needed to support the zero-based percpu section.

P.S.: I posted this series back in 2021, and at the time it appeared that
GCC 8.1 was too recent (released May 2, 2018) to be a requirement to keep
stack protector support. Two years later, support for the new compiler
options should be much more widespread.

Brian Gerst (9):
x86/stackprotector/32: Remove stack protector test script
x86/boot: Disable stack protector for early boot code
x86/stackprotector/64: Convert stack protector to normal percpu
variable
x86/percpu/64: Remove fixed_percpu_data
x86/percpu/64: Use relative percpu offsets
x86/boot/64: Remove inverse relocations
x86/percpu/64: Remove INIT_PER_CPU macros
percpu: Remove PER_CPU_FIRST_SECTION
kallsyms: Remove KALLSYMS_ABSOLUTE_PERCPU

arch/x86/Kconfig | 7 +-
arch/x86/Makefile | 19 +--
arch/x86/boot/compressed/misc.c | 14 +--
arch/x86/entry/entry_64.S | 2 +-
arch/x86/include/asm/percpu.h | 22 ----
arch/x86/include/asm/processor.h | 28 +----
arch/x86/include/asm/stackprotector.h | 37 ++----
arch/x86/kernel/Makefile | 2 +
arch/x86/kernel/asm-offsets_64.c | 6 -
arch/x86/kernel/cpu/common.c | 8 +-
arch/x86/kernel/head_64.S | 10 +-
arch/x86/kernel/irq_64.c | 1 -
arch/x86/kernel/setup_percpu.c | 12 +-
arch/x86/kernel/vmlinux.lds.S | 35 ------
arch/x86/tools/relocs.c | 136 +---------------------
arch/x86/xen/xen-head.S | 6 +-
include/asm-generic/vmlinux.lds.h | 1 -
include/linux/percpu-defs.h | 12 --
init/Kconfig | 11 +-
kernel/kallsyms.c | 12 +-
scripts/gcc-x86_32-has-stack-protector.sh | 8 --
scripts/gcc-x86_64-has-stack-protector.sh | 4 -
scripts/kallsyms.c | 80 +++----------
scripts/link-vmlinux.sh | 4 -
24 files changed, 60 insertions(+), 417 deletions(-)
delete mode 100755 scripts/gcc-x86_32-has-stack-protector.sh
delete mode 100755 scripts/gcc-x86_64-has-stack-protector.sh

--
2.41.0


2023-10-23 21:18:20

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 4/9] x86/percpu/64: Remove fixed_percpu_data

Now that the stack protector canary value is a normal percpu variable,
fixed_percpu_data is unused and can be removed.

Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/include/asm/processor.h | 13 +++++--------
arch/x86/kernel/cpu/common.c | 4 ----
arch/x86/kernel/head_64.S | 12 ++++++------
arch/x86/kernel/vmlinux.lds.S | 6 ------
arch/x86/tools/relocs.c | 1 -
arch/x86/xen/xen-head.S | 12 ++++++++----
6 files changed, 19 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 04371f60e3c6..48c31b8e3e72 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -393,16 +393,13 @@ struct irq_stack {
} __aligned(IRQ_STACK_SIZE);

#ifdef CONFIG_X86_64
-struct fixed_percpu_data {
- char gs_base[40];
-};
-
-DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
-DECLARE_INIT_PER_CPU(fixed_percpu_data);
-
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
- return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
+#ifdef CONFIG_SMP
+ return per_cpu_offset(cpu);
+#else
+ return 0;
+#endif
}

extern asmlinkage void entry_SYSCALL32_ignore(void);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f9c8bd27b642..a44fd3ad460e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2051,10 +2051,6 @@ DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
EXPORT_PER_CPU_SYMBOL(pcpu_hot);

#ifdef CONFIG_X86_64
-DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
- fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
-EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
-
static void wrmsrl_cstar(unsigned long val)
{
/*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 3dcabbc49149..f2453eb38417 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -72,9 +72,14 @@ SYM_CODE_START_NOALIGN(startup_64)

/* Setup GSBASE to allow stack canary access for C code */
movl $MSR_GS_BASE, %ecx
- leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
+#ifdef CONFIG_SMP
+ leaq __per_cpu_load(%rip), %rdx
movl %edx, %eax
shrq $32, %rdx
+#else
+ xorl %eax, %eax
+ xorl %edx, %edx
+#endif
wrmsr

call startup_64_setup_env
@@ -345,15 +350,10 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)

/* Set up %gs.
*
- * The base of %gs always points to fixed_percpu_data. If the
- * stack protector canary is enabled, it is located at %gs:40.
* Note that, on SMP, the boot cpu uses init data section until
* the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
-#ifndef CONFIG_SMP
- leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
-#endif
movl %edx, %eax
shrq $32, %rdx
wrmsr
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 54a5596adaa6..c87dc8de2084 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -509,14 +509,8 @@ SECTIONS
*/
#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
INIT_PER_CPU(gdt_page);
-INIT_PER_CPU(fixed_percpu_data);
INIT_PER_CPU(irq_stack_backing_store);

-#ifdef CONFIG_SMP
-. = ASSERT((fixed_percpu_data == 0),
- "fixed_percpu_data is not at start of per-cpu area");
-#endif
-
#ifdef CONFIG_CPU_UNRET_ENTRY
. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
#endif
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index d30949e25ebd..3ccd9d4fcf9c 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -811,7 +811,6 @@ static void percpu_init(void)
* __per_cpu_load
*
* The "gold" linker incorrectly associates:
- * init_per_cpu__fixed_percpu_data
* init_per_cpu__gdt_page
*/
static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index a0ea285878db..9ce0d9d268bb 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -53,14 +53,18 @@ SYM_CODE_START(startup_xen)

/* Set up %gs.
*
- * The base of %gs always points to fixed_percpu_data. If the
- * stack protector canary is enabled, it is located at %gs:40.
* Note that, on SMP, the boot cpu uses init data section until
* the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
- movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
- cdq
+#ifdef CONFIG_SMP
+ leaq __per_cpu_load(%rip), %rdx
+ movl %edx, %eax
+ shrq $32, %rdx
+#else
+ xorl %eax, %eax
+ xorl %edx, %edx
+#endif
wrmsr

mov %rsi, %rdi
--
2.41.0

2023-10-23 21:18:36

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 1/9] x86/stackprotector/32: Remove stack protector test script

Test for compiler support directly in Kconfig.

Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/Kconfig | 2 +-
scripts/gcc-x86_32-has-stack-protector.sh | 8 --------
2 files changed, 1 insertion(+), 9 deletions(-)
delete mode 100755 scripts/gcc-x86_32-has-stack-protector.sh

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5ef081aa12ac..039872be1630 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -411,7 +411,7 @@ config PGTABLE_LEVELS
config CC_HAS_SANE_STACKPROTECTOR
bool
default $(success,$(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) $(CLANG_FLAGS)) if 64BIT
- default $(success,$(srctree)/scripts/gcc-x86_32-has-stack-protector.sh $(CC) $(CLANG_FLAGS))
+ default $(cc-option,-mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard)
help
We have to make sure stack protector is unconditionally disabled if
the compiler produces broken code or if it does not let us control
diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh
deleted file mode 100755
index 825c75c5b715..000000000000
--- a/scripts/gcc-x86_32-has-stack-protector.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-# This requires GCC 8.1 or better. Specifically, we require
-# -mstack-protector-guard-reg, added by
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81708
-
-echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -m32 -O0 -fstack-protector -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard - -o - 2> /dev/null | grep -q "%fs"
--
2.41.0

2023-10-23 21:18:42

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 6/9] x86/boot/64: Remove inverse relocations

Now that the percpu section is not at a fixed virtual address, inverse
relocations, which were needed to offset the effects of relocation on
RIP-relative percpu references, are no longer needed.

Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/boot/compressed/misc.c | 14 +---
arch/x86/tools/relocs.c | 126 +-------------------------------
2 files changed, 2 insertions(+), 138 deletions(-)

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b99e08e6815b..2de345a236c0 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -221,7 +221,7 @@ static void handle_relocations(void *output, unsigned long output_len,

/*
* Process relocations: 32 bit relocations first then 64 bit after.
- * Three sets of binary relocations are added to the end of the kernel
+ * Two sets of binary relocations are added to the end of the kernel
* before compression. Each relocation table entry is the kernel
* address of the location which needs to be updated stored as a
* 32-bit value which is sign extended to 64 bits.
@@ -231,8 +231,6 @@ static void handle_relocations(void *output, unsigned long output_len,
* kernel bits...
* 0 - zero terminator for 64 bit relocations
* 64 bit relocation repeated
- * 0 - zero terminator for inverse 32 bit relocations
- * 32 bit inverse relocation repeated
* 0 - zero terminator for 32 bit relocations
* 32 bit relocation repeated
*
@@ -249,16 +247,6 @@ static void handle_relocations(void *output, unsigned long output_len,
*(uint32_t *)ptr += delta;
}
#ifdef CONFIG_X86_64
- while (*--reloc) {
- long extended = *reloc;
- extended += map;
-
- ptr = (unsigned long)extended;
- if (ptr < min_addr || ptr > max_addr)
- error("inverse 32-bit relocation outside of kernel!\n");
-
- *(int32_t *)ptr -= delta;
- }
for (reloc--; *reloc; reloc--) {
long extended = *reloc;
extended += map;
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 01efbfdd3eb3..7feb63179b62 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -28,7 +28,6 @@ struct relocs {
static struct relocs relocs16;
static struct relocs relocs32;
#if ELF_BITS == 64
-static struct relocs relocs32neg;
static struct relocs relocs64;
#define FMT PRIu64
#else
@@ -84,7 +83,6 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
"__initramfs_start|"
"(jiffies|jiffies_64)|"
#if ELF_BITS == 64
- "__per_cpu_load|"
"init_per_cpu__.*|"
"__end_rodata_hpage_align|"
#endif
@@ -281,33 +279,6 @@ static const char *sym_name(const char *sym_strtab, Elf_Sym *sym)
return name;
}

-static Elf_Sym *sym_lookup(const char *symname)
-{
- int i;
- for (i = 0; i < shnum; i++) {
- struct section *sec = &secs[i];
- long nsyms;
- char *strtab;
- Elf_Sym *symtab;
- Elf_Sym *sym;
-
- if (sec->shdr.sh_type != SHT_SYMTAB)
- continue;
-
- nsyms = sec->shdr.sh_size/sizeof(Elf_Sym);
- symtab = sec->symtab;
- strtab = sec->link->strtab;
-
- for (sym = symtab; --nsyms >= 0; sym++) {
- if (!sym->st_name)
- continue;
- if (strcmp(symname, strtab + sym->st_name) == 0)
- return sym;
- }
- }
- return 0;
-}
-
#if BYTE_ORDER == LITTLE_ENDIAN
#define le16_to_cpu(val) (val)
#define le32_to_cpu(val) (val)
@@ -750,75 +721,8 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel,
}
}

-/*
- * The .data..percpu section is a special case for x86_64 SMP kernels.
- * It is used to initialize the actual per_cpu areas and to provide
- * definitions for the per_cpu variables that correspond to their offsets
- * within the percpu area. Since the values of all of the symbols need
- * to be offsets from the start of the per_cpu area the virtual address
- * (sh_addr) of .data..percpu is 0 in SMP kernels.
- *
- * This means that:
- *
- * Relocations that reference symbols in the per_cpu area do not
- * need further relocation (since the value is an offset relative
- * to the start of the per_cpu area that does not change).
- *
- * Relocations that apply to the per_cpu area need to have their
- * offset adjusted by by the value of __per_cpu_load to make them
- * point to the correct place in the loaded image (because the
- * virtual address of .data..percpu is 0).
- *
- * For non SMP kernels .data..percpu is linked as part of the normal
- * kernel data and does not require special treatment.
- *
- */
-static int per_cpu_shndx = -1;
-static Elf_Addr per_cpu_load_addr;
-
-static void percpu_init(void)
-{
- int i;
- for (i = 0; i < shnum; i++) {
- ElfW(Sym) *sym;
- if (strcmp(sec_name(i), ".data..percpu"))
- continue;
-
- if (secs[i].shdr.sh_addr != 0) /* non SMP kernel */
- return;
-
- sym = sym_lookup("__per_cpu_load");
- if (!sym)
- die("can't find __per_cpu_load\n");
-
- per_cpu_shndx = i;
- per_cpu_load_addr = sym->st_value;
- return;
- }
-}
-
#if ELF_BITS == 64

-/*
- * Check to see if a symbol lies in the .data..percpu section.
- *
- * The linker incorrectly associates some symbols with the
- * .data..percpu section so we also need to check the symbol
- * name to make sure that we classify the symbol correctly.
- *
- * The GNU linker incorrectly associates:
- * __init_begin
- * __per_cpu_load
- *
- * The "gold" linker incorrectly associates:
- * init_per_cpu__gdt_page
- */
-static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
-{
- return 0;
-}
-
-
static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
const char *symname)
{
@@ -829,12 +733,6 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
if (sym->st_shndx == SHN_UNDEF)
return 0;

- /*
- * Adjust the offset if this reloc applies to the percpu section.
- */
- if (sec->shdr.sh_info == per_cpu_shndx)
- offset += per_cpu_load_addr;
-
switch (r_type) {
case R_X86_64_NONE:
/* NONE can be ignored. */
@@ -843,33 +741,21 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
case R_X86_64_PC32:
case R_X86_64_PLT32:
/*
- * PC relative relocations don't need to be adjusted unless
- * referencing a percpu symbol.
+ * PC relative relocations don't need to be adjusted.
*
* NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32.
*/
- if (is_percpu_sym(sym, symname))
- add_reloc(&relocs32neg, offset);
break;

case R_X86_64_PC64:
/*
* Only used by jump labels
*/
- if (is_percpu_sym(sym, symname))
- die("Invalid R_X86_64_PC64 relocation against per-CPU symbol %s\n",
- symname);
break;

case R_X86_64_32:
case R_X86_64_32S:
case R_X86_64_64:
- /*
- * References to the percpu area don't need to be adjusted.
- */
- if (is_percpu_sym(sym, symname))
- break;
-
if (shn_abs) {
/*
* Whitelisted absolute symbols do not require
@@ -1083,7 +969,6 @@ static void emit_relocs(int as_text, int use_real_mode)
/* Order the relocations for more efficient processing */
sort_relocs(&relocs32);
#if ELF_BITS == 64
- sort_relocs(&relocs32neg);
sort_relocs(&relocs64);
#else
sort_relocs(&relocs16);
@@ -1115,13 +1000,6 @@ static void emit_relocs(int as_text, int use_real_mode)
/* Now print each relocation */
for (i = 0; i < relocs64.count; i++)
write_reloc(relocs64.offset[i], stdout);
-
- /* Print a stop */
- write_reloc(0, stdout);
-
- /* Now print each inverse 32-bit relocation */
- for (i = 0; i < relocs32neg.count; i++)
- write_reloc(relocs32neg.offset[i], stdout);
#endif

/* Print a stop */
@@ -1172,8 +1050,6 @@ void process(FILE *fp, int use_real_mode, int as_text,
read_strtabs(fp);
read_symtabs(fp);
read_relocs(fp);
- if (ELF_BITS == 64)
- percpu_init();
if (show_absolute_syms) {
print_absolute_symbols();
return;
--
2.41.0

2023-10-23 21:18:47

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 9/9] kallsyms: Remove KALLSYMS_ABSOLUTE_PERCPU

x86-64 was the only user.

Signed-off-by: Brian Gerst <[email protected]>
---
init/Kconfig | 11 +-----
kernel/kallsyms.c | 12 ++-----
scripts/kallsyms.c | 80 ++++++++---------------------------------
scripts/link-vmlinux.sh | 4 ---
4 files changed, 18 insertions(+), 89 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index 1af31b23e376..4d91c5632aaf 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1715,11 +1715,6 @@ config KALLSYMS_ALL

Say N unless you really need all symbols, or kernel live patching.

-config KALLSYMS_ABSOLUTE_PERCPU
- bool
- depends on KALLSYMS
- default n
-
config KALLSYMS_BASE_RELATIVE
bool
depends on KALLSYMS
@@ -1727,11 +1722,7 @@ config KALLSYMS_BASE_RELATIVE
help
Instead of emitting them as absolute values in the native word size,
emit the symbol references in the kallsyms table as 32-bit entries,
- each containing a relative value in the range [base, base + U32_MAX]
- or, when KALLSYMS_ABSOLUTE_PERCPU is in effect, each containing either
- an absolute value in the range [0, S32_MAX] or a relative value in the
- range [base, base + S32_MAX], where base is the lowest relative symbol
- address encountered in the image.
+ each containing a relative value in the range [base, base + U32_MAX].

On 64-bit builds, this reduces the size of the address table by 50%,
but more importantly, it results in entries whose values are build
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 18edd57b5fe8..f4e8e531052a 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -151,16 +151,8 @@ unsigned long kallsyms_sym_address(int idx)
if (!IS_ENABLED(CONFIG_KALLSYMS_BASE_RELATIVE))
return kallsyms_addresses[idx];

- /* values are unsigned offsets if --absolute-percpu is not in effect */
- if (!IS_ENABLED(CONFIG_KALLSYMS_ABSOLUTE_PERCPU))
- return kallsyms_relative_base + (u32)kallsyms_offsets[idx];
-
- /* ...otherwise, positive offsets are absolute values */
- if (kallsyms_offsets[idx] >= 0)
- return kallsyms_offsets[idx];
-
- /* ...and negative offsets are relative to kallsyms_relative_base - 1 */
- return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
+ /* values are unsigned offsets */
+ return kallsyms_relative_base + (u32)kallsyms_offsets[idx];
}

static void cleanup_symbol_name(char *s)
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 653b92f6d4c8..501f978abf4b 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -5,8 +5,8 @@
* This software may be used and distributed according to the terms
* of the GNU General Public License, incorporated herein by reference.
*
- * Usage: kallsyms [--all-symbols] [--absolute-percpu]
- * [--base-relative] [--lto-clang] in.map > out.S
+ * Usage: kallsyms [--all-symbols] [--base-relative] [--lto-clang]
+ * in.map > out.S
*
* Table compression uses all the unused char codes on the symbols and
* maps these to the most used substrings (tokens). For instance, it might
@@ -37,7 +37,6 @@ struct sym_entry {
unsigned int len;
unsigned int seq;
unsigned int start_pos;
- unsigned int percpu_absolute;
unsigned char sym[];
};

@@ -55,14 +54,9 @@ static struct addr_range text_ranges[] = {
#define text_range_text (&text_ranges[0])
#define text_range_inittext (&text_ranges[1])

-static struct addr_range percpu_range = {
- "__per_cpu_start", "__per_cpu_end", -1ULL, 0
-};
-
static struct sym_entry **table;
static unsigned int table_size, table_cnt;
static int all_symbols;
-static int absolute_percpu;
static int base_relative;
static int lto_clang;

@@ -75,7 +69,7 @@ static unsigned char best_table_len[256];

static void usage(void)
{
- fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] "
+ fprintf(stderr, "Usage: kallsyms [--all-symbols] "
"[--base-relative] [--lto-clang] in.map > out.S\n");
exit(1);
}
@@ -167,7 +161,6 @@ static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len)
return NULL;

check_symbol_range(name, addr, text_ranges, ARRAY_SIZE(text_ranges));
- check_symbol_range(name, addr, &percpu_range, 1);

/* include the type field in the symbol name, so that it gets
* compressed together */
@@ -183,7 +176,6 @@ static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len)
sym->len = len;
sym->sym[0] = type;
strcpy(sym_name(sym), name);
- sym->percpu_absolute = 0;

return sym;
}
@@ -334,11 +326,6 @@ static int expand_symbol(const unsigned char *data, int len, char *result)
return total;
}

-static int symbol_absolute(const struct sym_entry *s)
-{
- return s->percpu_absolute;
-}
-
static void cleanup_symbol_name(char *s)
{
char *p;
@@ -499,30 +486,17 @@ static void write_src(void)
*/

long long offset;
- int overflow;
-
- if (!absolute_percpu) {
- offset = table[i]->addr - relative_base;
- overflow = (offset < 0 || offset > UINT_MAX);
- } else if (symbol_absolute(table[i])) {
- offset = table[i]->addr;
- overflow = (offset < 0 || offset > INT_MAX);
- } else {
- offset = relative_base - table[i]->addr - 1;
- overflow = (offset < INT_MIN || offset >= 0);
- }
- if (overflow) {
+
+ offset = table[i]->addr - relative_base;
+ if (offset < 0 || offset > UINT_MAX) {
fprintf(stderr, "kallsyms failure: "
- "%s symbol value %#llx out of range in relative mode\n",
- symbol_absolute(table[i]) ? "absolute" : "relative",
+ "symbol value %#llx out of range in relative mode\n",
table[i]->addr);
exit(EXIT_FAILURE);
}
printf("\t.long\t%#x /* %s */\n", (int)offset, table[i]->sym);
- } else if (!symbol_absolute(table[i])) {
- output_address(table[i]->addr);
} else {
- printf("\tPTR\t%#llx\n", table[i]->addr);
+ output_address(table[i]->addr);
}
}
printf("\n");
@@ -775,36 +749,15 @@ static void sort_symbols(void)
qsort(table, table_cnt, sizeof(table[0]), compare_symbols);
}

-static void make_percpus_absolute(void)
-{
- unsigned int i;
-
- for (i = 0; i < table_cnt; i++)
- if (symbol_in_range(table[i], &percpu_range, 1)) {
- /*
- * Keep the 'A' override for percpu symbols to
- * ensure consistent behavior compared to older
- * versions of this tool.
- */
- table[i]->sym[0] = 'A';
- table[i]->percpu_absolute = 1;
- }
-}
-
-/* find the minimum non-absolute symbol address */
+/* find the minimum symbol address */
static void record_relative_base(void)
{
- unsigned int i;
-
- for (i = 0; i < table_cnt; i++)
- if (!symbol_absolute(table[i])) {
- /*
- * The table is sorted by address.
- * Take the first non-absolute symbol value.
- */
- relative_base = table[i]->addr;
- return;
- }
+ /*
+ * The table is sorted by address.
+ * Take the first symbol value.
+ */
+ if (table_cnt)
+ relative_base = table[0]->addr;
}

int main(int argc, char **argv)
@@ -812,7 +765,6 @@ int main(int argc, char **argv)
while (1) {
static const struct option long_options[] = {
{"all-symbols", no_argument, &all_symbols, 1},
- {"absolute-percpu", no_argument, &absolute_percpu, 1},
{"base-relative", no_argument, &base_relative, 1},
{"lto-clang", no_argument, &lto_clang, 1},
{},
@@ -831,8 +783,6 @@ int main(int argc, char **argv)

read_map(argv[optind]);
shrink_table();
- if (absolute_percpu)
- make_percpus_absolute();
sort_symbols();
if (base_relative)
record_relative_base();
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index a432b171be82..d25b6d5de45e 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -148,10 +148,6 @@ kallsyms()
kallsymopt="${kallsymopt} --all-symbols"
fi

- if is_enabled CONFIG_KALLSYMS_ABSOLUTE_PERCPU; then
- kallsymopt="${kallsymopt} --absolute-percpu"
- fi
-
if is_enabled CONFIG_KALLSYMS_BASE_RELATIVE; then
kallsymopt="${kallsymopt} --base-relative"
fi
--
2.41.0

2023-10-23 21:18:48

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 7/9] x86/percpu/64: Remove INIT_PER_CPU macros

The load and link addresses of percpu variables are now the same, so
these macros are no longer necessary.

Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/include/asm/percpu.h | 22 ----------------------
arch/x86/kernel/irq_64.c | 1 -
arch/x86/kernel/vmlinux.lds.S | 7 -------
arch/x86/tools/relocs.c | 1 -
4 files changed, 31 deletions(-)

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index bbcc1ca737f0..11f50ab32d0e 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -20,12 +20,6 @@

#define PER_CPU_VAR(var) __percpu(var)__percpu_rel

-#ifdef CONFIG_X86_64_SMP
-#define INIT_PER_CPU_VAR(var) init_per_cpu__##var
-#else
-#define INIT_PER_CPU_VAR(var) var
-#endif
-
#else /* ...!ASSEMBLY */

#include <linux/kernel.h>
@@ -96,22 +90,6 @@
#define __percpu_arg(x) __percpu_prefix "%" #x
#define __force_percpu_arg(x) __force_percpu_prefix "%" #x

-/*
- * Initialized pointers to per-cpu variables needed for the boot
- * processor need to use these macros to get the proper address
- * offset from __per_cpu_load on SMP.
- *
- * There also must be an entry in vmlinux_64.lds.S
- */
-#define DECLARE_INIT_PER_CPU(var) \
- extern typeof(var) init_per_cpu_var(var)
-
-#ifdef CONFIG_X86_64_SMP
-#define init_per_cpu_var(var) init_per_cpu__##var
-#else
-#define init_per_cpu_var(var) var
-#endif
-
/* For arch-specific code, we can use direct single-insn ops (they
* don't give an lvalue though). */

diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index fe0c859873d1..30424f9876bc 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -26,7 +26,6 @@
#include <asm/apic.h>

DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible;
-DECLARE_INIT_PER_CPU(irq_stack_backing_store);

#ifdef CONFIG_VMAP_STACK
/*
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 8d8eb4d9ff9d..089d164164e8 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -481,13 +481,6 @@ SECTIONS
"kernel image bigger than KERNEL_IMAGE_SIZE");

#ifdef CONFIG_X86_64
-/*
- * Per-cpu symbols which need to be offset from __per_cpu_load
- * for the boot processor.
- */
-#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x)
-INIT_PER_CPU(gdt_page);
-INIT_PER_CPU(irq_stack_backing_store);

#ifdef CONFIG_CPU_UNRET_ENTRY
. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 7feb63179b62..931d90aa814c 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -83,7 +83,6 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
"__initramfs_start|"
"(jiffies|jiffies_64)|"
#if ELF_BITS == 64
- "init_per_cpu__.*|"
"__end_rodata_hpage_align|"
#endif
"__vvar_page|"
--
2.41.0

2023-10-23 21:18:53

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 8/9] percpu: Remove PER_CPU_FIRST_SECTION

x86-64 was the only user.

Signed-off-by: Brian Gerst <[email protected]>
---
include/asm-generic/vmlinux.lds.h | 1 -
include/linux/percpu-defs.h | 12 ------------
2 files changed, 13 deletions(-)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 67d8dd2f1bde..23d8acc72760 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -1032,7 +1032,6 @@
*/
#define PERCPU_INPUT(cacheline) \
__per_cpu_start = .; \
- *(.data..percpu..first) \
. = ALIGN(PAGE_SIZE); \
*(.data..percpu..page_aligned) \
. = ALIGN(cacheline); \
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index ec3573119923..b9ddee91e6c7 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -26,13 +26,11 @@
#define PER_CPU_SHARED_ALIGNED_SECTION "..shared_aligned"
#define PER_CPU_ALIGNED_SECTION "..shared_aligned"
#endif
-#define PER_CPU_FIRST_SECTION "..first"

#else

#define PER_CPU_SHARED_ALIGNED_SECTION ""
#define PER_CPU_ALIGNED_SECTION "..shared_aligned"
-#define PER_CPU_FIRST_SECTION ""

#endif

@@ -114,16 +112,6 @@
#define DEFINE_PER_CPU(type, name) \
DEFINE_PER_CPU_SECTION(type, name, "")

-/*
- * Declaration/definition used for per-CPU variables that must come first in
- * the set of variables.
- */
-#define DECLARE_PER_CPU_FIRST(type, name) \
- DECLARE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION)
-
-#define DEFINE_PER_CPU_FIRST(type, name) \
- DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION)
-
/*
* Declaration/definition used for per-CPU variables that must be cacheline
* aligned under SMP conditions so that, whilst a particular instance of the
--
2.41.0

2023-10-23 21:19:16

by Brian Gerst

[permalink] [raw]
Subject: [PATCH 5/9] x86/percpu/64: Use relative percpu offsets

The percpu section is currently linked at virtual address 0, because
older compilers hardcoded the stack protector canary value at a fixed
offset from the start of the GS segment. Now that the canary is a
normal percpu variable, the percpu section can be linked normally.
This means that x86-64 will calculate percpu offsets like most other
architectures, as the delta between the initial percpu address and the
dynamically allocated memory.

Signed-off-by: Brian Gerst <[email protected]>
---
arch/x86/kernel/head_64.S | 6 ------
arch/x86/kernel/setup_percpu.c | 12 ++----------
arch/x86/kernel/vmlinux.lds.S | 24 +-----------------------
arch/x86/tools/relocs.c | 10 +++-------
arch/x86/xen/xen-head.S | 6 ------
init/Kconfig | 2 +-
6 files changed, 7 insertions(+), 53 deletions(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index f2453eb38417..b35f74e58dd7 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -72,14 +72,8 @@ SYM_CODE_START_NOALIGN(startup_64)

/* Setup GSBASE to allow stack canary access for C code */
movl $MSR_GS_BASE, %ecx
-#ifdef CONFIG_SMP
- leaq __per_cpu_load(%rip), %rdx
- movl %edx, %eax
- shrq $32, %rdx
-#else
xorl %eax, %eax
xorl %edx, %edx
-#endif
wrmsr

call startup_64_setup_env
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 2c97bf7b56ae..8707dd07b9ce 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -23,18 +23,10 @@
#include <asm/cpumask.h>
#include <asm/cpu.h>

-#ifdef CONFIG_X86_64
-#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
-#else
-#define BOOT_PERCPU_OFFSET 0
-#endif
-
-DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
EXPORT_PER_CPU_SYMBOL(this_cpu_off);

-unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
- [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
-};
+unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init;
EXPORT_SYMBOL(__per_cpu_offset);

/*
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index c87dc8de2084..8d8eb4d9ff9d 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -102,12 +102,6 @@ jiffies = jiffies_64;
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
data PT_LOAD FLAGS(6); /* RW_ */
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_SMP
- percpu PT_LOAD FLAGS(6); /* RW_ */
-#endif
- init PT_LOAD FLAGS(7); /* RWE */
-#endif
note PT_NOTE FLAGS(0); /* ___ */
}

@@ -223,21 +217,7 @@ SECTIONS
__init_begin = .; /* paired with __init_end */
}

-#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
- /*
- * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
- * output PHDR, so the next output section - .init.text - should
- * start another segment - init.
- */
- PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
- ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START,
- "per-CPU data too large - increase CONFIG_PHYSICAL_START")
-#endif
-
INIT_TEXT_SECTION(PAGE_SIZE)
-#ifdef CONFIG_X86_64
- :init
-#endif

/*
* Section for code used exclusively before alternatives are run. All
@@ -367,9 +347,7 @@ SECTIONS
EXIT_DATA
}

-#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
PERCPU_SECTION(INTERNODE_CACHE_BYTES)
-#endif

. = ALIGN(PAGE_SIZE);

@@ -507,7 +485,7 @@ SECTIONS
* Per-cpu symbols which need to be offset from __per_cpu_load
* for the boot processor.
*/
-#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
+#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x)
INIT_PER_CPU(gdt_page);
INIT_PER_CPU(irq_stack_backing_store);

diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 3ccd9d4fcf9c..01efbfdd3eb3 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -815,12 +815,7 @@ static void percpu_init(void)
*/
static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
{
- int shndx = sym_index(sym);
-
- return (shndx == per_cpu_shndx) &&
- strcmp(symname, "__init_begin") &&
- strcmp(symname, "__per_cpu_load") &&
- strncmp(symname, "init_per_cpu_", 13);
+ return 0;
}


@@ -1043,7 +1038,8 @@ static int cmp_relocs(const void *va, const void *vb)

static void sort_relocs(struct relocs *r)
{
- qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs);
+ if (r->count)
+ qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs);
}

static int write32(uint32_t v, FILE *f)
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 9ce0d9d268bb..c1d9c92b417a 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -57,14 +57,8 @@ SYM_CODE_START(startup_xen)
* the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
-#ifdef CONFIG_SMP
- leaq __per_cpu_load(%rip), %rdx
- movl %edx, %eax
- shrq $32, %rdx
-#else
xorl %eax, %eax
xorl %edx, %edx
-#endif
wrmsr

mov %rsi, %rdi
diff --git a/init/Kconfig b/init/Kconfig
index 6d35728b94b2..1af31b23e376 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1718,7 +1718,7 @@ config KALLSYMS_ALL
config KALLSYMS_ABSOLUTE_PERCPU
bool
depends on KALLSYMS
- default X86_64 && SMP
+ default n

config KALLSYMS_BASE_RELATIVE
bool
--
2.41.0

2023-10-24 12:33:40

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH 4/9] x86/percpu/64: Remove fixed_percpu_data

On Mon, Oct 23, 2023 at 11:17 PM Brian Gerst <[email protected]> wrote:
>
> Now that the stack protector canary value is a normal percpu variable,
> fixed_percpu_data is unused and can be removed.
>
> Signed-off-by: Brian Gerst <[email protected]>
> ---
> arch/x86/include/asm/processor.h | 13 +++++--------
> arch/x86/kernel/cpu/common.c | 4 ----
> arch/x86/kernel/head_64.S | 12 ++++++------
> arch/x86/kernel/vmlinux.lds.S | 6 ------
> arch/x86/tools/relocs.c | 1 -
> arch/x86/xen/xen-head.S | 12 ++++++++----
> 6 files changed, 19 insertions(+), 29 deletions(-)
>
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index 04371f60e3c6..48c31b8e3e72 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -393,16 +393,13 @@ struct irq_stack {
> } __aligned(IRQ_STACK_SIZE);
>
> #ifdef CONFIG_X86_64
> -struct fixed_percpu_data {
> - char gs_base[40];
> -};
> -
> -DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
> -DECLARE_INIT_PER_CPU(fixed_percpu_data);
> -
> static inline unsigned long cpu_kernelmode_gs_base(int cpu)
> {
> - return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
> +#ifdef CONFIG_SMP
> + return per_cpu_offset(cpu);
> +#else
> + return 0;
> +#endif
> }
>
> extern asmlinkage void entry_SYSCALL32_ignore(void);
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index f9c8bd27b642..a44fd3ad460e 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -2051,10 +2051,6 @@ DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
> EXPORT_PER_CPU_SYMBOL(pcpu_hot);
>
> #ifdef CONFIG_X86_64
> -DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
> - fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
> -EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
> -
> static void wrmsrl_cstar(unsigned long val)
> {
> /*
> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> index 3dcabbc49149..f2453eb38417 100644
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -72,9 +72,14 @@ SYM_CODE_START_NOALIGN(startup_64)
>
> /* Setup GSBASE to allow stack canary access for C code */
> movl $MSR_GS_BASE, %ecx
> - leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
> +#ifdef CONFIG_SMP
> + leaq __per_cpu_load(%rip), %rdx
> movl %edx, %eax
> shrq $32, %rdx
> +#else
> + xorl %eax, %eax
> + xorl %edx, %edx
> +#endif
> wrmsr
>
> call startup_64_setup_env
> @@ -345,15 +350,10 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
>
> /* Set up %gs.
> *
> - * The base of %gs always points to fixed_percpu_data. If the
> - * stack protector canary is enabled, it is located at %gs:40.
> * Note that, on SMP, the boot cpu uses init data section until
> * the per cpu areas are set up.
> */
> movl $MSR_GS_BASE,%ecx
> -#ifndef CONFIG_SMP
> - leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
> -#endif
> movl %edx, %eax
> shrq $32, %rdx
> wrmsr
> diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
> index 54a5596adaa6..c87dc8de2084 100644
> --- a/arch/x86/kernel/vmlinux.lds.S
> +++ b/arch/x86/kernel/vmlinux.lds.S
> @@ -509,14 +509,8 @@ SECTIONS
> */
> #define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
> INIT_PER_CPU(gdt_page);
> -INIT_PER_CPU(fixed_percpu_data);
> INIT_PER_CPU(irq_stack_backing_store);
>
> -#ifdef CONFIG_SMP
> -. = ASSERT((fixed_percpu_data == 0),
> - "fixed_percpu_data is not at start of per-cpu area");
> -#endif
> -
> #ifdef CONFIG_CPU_UNRET_ENTRY
> . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
> #endif
> diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
> index d30949e25ebd..3ccd9d4fcf9c 100644
> --- a/arch/x86/tools/relocs.c
> +++ b/arch/x86/tools/relocs.c
> @@ -811,7 +811,6 @@ static void percpu_init(void)
> * __per_cpu_load
> *
> * The "gold" linker incorrectly associates:
> - * init_per_cpu__fixed_percpu_data
> * init_per_cpu__gdt_page
> */
> static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
> diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
> index a0ea285878db..9ce0d9d268bb 100644
> --- a/arch/x86/xen/xen-head.S
> +++ b/arch/x86/xen/xen-head.S
> @@ -53,14 +53,18 @@ SYM_CODE_START(startup_xen)
>
> /* Set up %gs.
> *
> - * The base of %gs always points to fixed_percpu_data. If the
> - * stack protector canary is enabled, it is located at %gs:40.
> * Note that, on SMP, the boot cpu uses init data section until
> * the per cpu areas are set up.
> */
> movl $MSR_GS_BASE,%ecx
> - movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
> - cdq
> +#ifdef CONFIG_SMP
> + leaq __per_cpu_load(%rip), %rdx
> + movl %edx, %eax
> + shrq $32, %rdx
> +#else
> + xorl %eax, %eax
> + xorl %edx, %edx
> +#endif
> wrmsr
>
> mov %rsi, %rdi

Please note there is another access to $MSR_GS_BASE in
/arch/x86/platform/pvh/head.S around line 98. Should this be fixed,
too?

Uros.

2023-10-24 12:44:21

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH 5/9] x86/percpu/64: Use relative percpu offsets

On Mon, Oct 23, 2023 at 11:17 PM Brian Gerst <[email protected]> wrote:
>
> The percpu section is currently linked at virtual address 0, because
> older compilers hardcoded the stack protector canary value at a fixed
> offset from the start of the GS segment. Now that the canary is a
> normal percpu variable, the percpu section can be linked normally.
> This means that x86-64 will calculate percpu offsets like most other
> architectures, as the delta between the initial percpu address and the
> dynamically allocated memory.
>
> Signed-off-by: Brian Gerst <[email protected]>
> ---
> arch/x86/kernel/head_64.S | 6 ------
> arch/x86/kernel/setup_percpu.c | 12 ++----------
> arch/x86/kernel/vmlinux.lds.S | 24 +-----------------------
> arch/x86/tools/relocs.c | 10 +++-------
> arch/x86/xen/xen-head.S | 6 ------
> init/Kconfig | 2 +-
> 6 files changed, 7 insertions(+), 53 deletions(-)
>
> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> index f2453eb38417..b35f74e58dd7 100644
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -72,14 +72,8 @@ SYM_CODE_START_NOALIGN(startup_64)
>
> /* Setup GSBASE to allow stack canary access for C code */
> movl $MSR_GS_BASE, %ecx
> -#ifdef CONFIG_SMP
> - leaq __per_cpu_load(%rip), %rdx
> - movl %edx, %eax
> - shrq $32, %rdx
> -#else
> xorl %eax, %eax
> xorl %edx, %edx
> -#endif
> wrmsr
>
> call startup_64_setup_env
> diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
> index 2c97bf7b56ae..8707dd07b9ce 100644
> --- a/arch/x86/kernel/setup_percpu.c
> +++ b/arch/x86/kernel/setup_percpu.c
> @@ -23,18 +23,10 @@
> #include <asm/cpumask.h>
> #include <asm/cpu.h>
>
> -#ifdef CONFIG_X86_64
> -#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
> -#else
> -#define BOOT_PERCPU_OFFSET 0
> -#endif
> -
> -DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
> EXPORT_PER_CPU_SYMBOL(this_cpu_off);
>
> -unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
> - [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
> -};
> +unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init;
> EXPORT_SYMBOL(__per_cpu_offset);
>
> /*
> diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
> index c87dc8de2084..8d8eb4d9ff9d 100644
> --- a/arch/x86/kernel/vmlinux.lds.S
> +++ b/arch/x86/kernel/vmlinux.lds.S
> @@ -102,12 +102,6 @@ jiffies = jiffies_64;
> PHDRS {
> text PT_LOAD FLAGS(5); /* R_E */
> data PT_LOAD FLAGS(6); /* RW_ */
> -#ifdef CONFIG_X86_64
> -#ifdef CONFIG_SMP
> - percpu PT_LOAD FLAGS(6); /* RW_ */
> -#endif
> - init PT_LOAD FLAGS(7); /* RWE */
> -#endif
> note PT_NOTE FLAGS(0); /* ___ */
> }
>
> @@ -223,21 +217,7 @@ SECTIONS
> __init_begin = .; /* paired with __init_end */
> }
>
> -#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
> - /*
> - * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
> - * output PHDR, so the next output section - .init.text - should
> - * start another segment - init.
> - */
> - PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
> - ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START,
> - "per-CPU data too large - increase CONFIG_PHYSICAL_START")
> -#endif
> -
> INIT_TEXT_SECTION(PAGE_SIZE)
> -#ifdef CONFIG_X86_64
> - :init
> -#endif
>
> /*
> * Section for code used exclusively before alternatives are run. All
> @@ -367,9 +347,7 @@ SECTIONS
> EXIT_DATA
> }
>
> -#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
> PERCPU_SECTION(INTERNODE_CACHE_BYTES)
> -#endif
>
> . = ALIGN(PAGE_SIZE);
>
> @@ -507,7 +485,7 @@ SECTIONS
> * Per-cpu symbols which need to be offset from __per_cpu_load
> * for the boot processor.
> */
> -#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
> +#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x)
> INIT_PER_CPU(gdt_page);
> INIT_PER_CPU(irq_stack_backing_store);
>
> diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
> index 3ccd9d4fcf9c..01efbfdd3eb3 100644
> --- a/arch/x86/tools/relocs.c
> +++ b/arch/x86/tools/relocs.c
> @@ -815,12 +815,7 @@ static void percpu_init(void)
> */
> static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
> {
> - int shndx = sym_index(sym);
> -
> - return (shndx == per_cpu_shndx) &&
> - strcmp(symname, "__init_begin") &&
> - strcmp(symname, "__per_cpu_load") &&
> - strncmp(symname, "init_per_cpu_", 13);
> + return 0;
> }
>
>
> @@ -1043,7 +1038,8 @@ static int cmp_relocs(const void *va, const void *vb)
>
> static void sort_relocs(struct relocs *r)
> {
> - qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs);
> + if (r->count)
> + qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs);
> }
>
> static int write32(uint32_t v, FILE *f)
> diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
> index 9ce0d9d268bb..c1d9c92b417a 100644
> --- a/arch/x86/xen/xen-head.S
> +++ b/arch/x86/xen/xen-head.S
> @@ -57,14 +57,8 @@ SYM_CODE_START(startup_xen)
> * the per cpu areas are set up.
> */
> movl $MSR_GS_BASE,%ecx
> -#ifdef CONFIG_SMP
> - leaq __per_cpu_load(%rip), %rdx
> - movl %edx, %eax
> - shrq $32, %rdx
> -#else
> xorl %eax, %eax
> xorl %edx, %edx
> -#endif
> wrmsr
>
> mov %rsi, %rdi

Again, please note there is another access to $MSR_GS_BASE in
/arch/x86/platform/pvh/head.S that should be reviewed.

Uros.

> diff --git a/init/Kconfig b/init/Kconfig
> index 6d35728b94b2..1af31b23e376 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1718,7 +1718,7 @@ config KALLSYMS_ALL
> config KALLSYMS_ABSOLUTE_PERCPU
> bool
> depends on KALLSYMS
> - default X86_64 && SMP
> + default n
>
> config KALLSYMS_BASE_RELATIVE
> bool
> --
> 2.41.0
>

2023-10-24 14:12:29

by Brian Gerst

[permalink] [raw]
Subject: Re: [PATCH 4/9] x86/percpu/64: Remove fixed_percpu_data

On Tue, Oct 24, 2023 at 8:33 AM Uros Bizjak <[email protected]> wrote:
>
> On Mon, Oct 23, 2023 at 11:17 PM Brian Gerst <[email protected]> wrote:
> >
> > Now that the stack protector canary value is a normal percpu variable,
> > fixed_percpu_data is unused and can be removed.
> >
> > Signed-off-by: Brian Gerst <[email protected]>
> > ---
> > arch/x86/include/asm/processor.h | 13 +++++--------
> > arch/x86/kernel/cpu/common.c | 4 ----
> > arch/x86/kernel/head_64.S | 12 ++++++------
> > arch/x86/kernel/vmlinux.lds.S | 6 ------
> > arch/x86/tools/relocs.c | 1 -
> > arch/x86/xen/xen-head.S | 12 ++++++++----
> > 6 files changed, 19 insertions(+), 29 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> > index 04371f60e3c6..48c31b8e3e72 100644
> > --- a/arch/x86/include/asm/processor.h
> > +++ b/arch/x86/include/asm/processor.h
> > @@ -393,16 +393,13 @@ struct irq_stack {
> > } __aligned(IRQ_STACK_SIZE);
> >
> > #ifdef CONFIG_X86_64
> > -struct fixed_percpu_data {
> > - char gs_base[40];
> > -};
> > -
> > -DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
> > -DECLARE_INIT_PER_CPU(fixed_percpu_data);
> > -
> > static inline unsigned long cpu_kernelmode_gs_base(int cpu)
> > {
> > - return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
> > +#ifdef CONFIG_SMP
> > + return per_cpu_offset(cpu);
> > +#else
> > + return 0;
> > +#endif
> > }
> >
> > extern asmlinkage void entry_SYSCALL32_ignore(void);
> > diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> > index f9c8bd27b642..a44fd3ad460e 100644
> > --- a/arch/x86/kernel/cpu/common.c
> > +++ b/arch/x86/kernel/cpu/common.c
> > @@ -2051,10 +2051,6 @@ DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
> > EXPORT_PER_CPU_SYMBOL(pcpu_hot);
> >
> > #ifdef CONFIG_X86_64
> > -DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
> > - fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
> > -EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
> > -
> > static void wrmsrl_cstar(unsigned long val)
> > {
> > /*
> > diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> > index 3dcabbc49149..f2453eb38417 100644
> > --- a/arch/x86/kernel/head_64.S
> > +++ b/arch/x86/kernel/head_64.S
> > @@ -72,9 +72,14 @@ SYM_CODE_START_NOALIGN(startup_64)
> >
> > /* Setup GSBASE to allow stack canary access for C code */
> > movl $MSR_GS_BASE, %ecx
> > - leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
> > +#ifdef CONFIG_SMP
> > + leaq __per_cpu_load(%rip), %rdx
> > movl %edx, %eax
> > shrq $32, %rdx
> > +#else
> > + xorl %eax, %eax
> > + xorl %edx, %edx
> > +#endif
> > wrmsr
> >
> > call startup_64_setup_env
> > @@ -345,15 +350,10 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
> >
> > /* Set up %gs.
> > *
> > - * The base of %gs always points to fixed_percpu_data. If the
> > - * stack protector canary is enabled, it is located at %gs:40.
> > * Note that, on SMP, the boot cpu uses init data section until
> > * the per cpu areas are set up.
> > */
> > movl $MSR_GS_BASE,%ecx
> > -#ifndef CONFIG_SMP
> > - leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
> > -#endif
> > movl %edx, %eax
> > shrq $32, %rdx
> > wrmsr
> > diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
> > index 54a5596adaa6..c87dc8de2084 100644
> > --- a/arch/x86/kernel/vmlinux.lds.S
> > +++ b/arch/x86/kernel/vmlinux.lds.S
> > @@ -509,14 +509,8 @@ SECTIONS
> > */
> > #define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
> > INIT_PER_CPU(gdt_page);
> > -INIT_PER_CPU(fixed_percpu_data);
> > INIT_PER_CPU(irq_stack_backing_store);
> >
> > -#ifdef CONFIG_SMP
> > -. = ASSERT((fixed_percpu_data == 0),
> > - "fixed_percpu_data is not at start of per-cpu area");
> > -#endif
> > -
> > #ifdef CONFIG_CPU_UNRET_ENTRY
> > . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
> > #endif
> > diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
> > index d30949e25ebd..3ccd9d4fcf9c 100644
> > --- a/arch/x86/tools/relocs.c
> > +++ b/arch/x86/tools/relocs.c
> > @@ -811,7 +811,6 @@ static void percpu_init(void)
> > * __per_cpu_load
> > *
> > * The "gold" linker incorrectly associates:
> > - * init_per_cpu__fixed_percpu_data
> > * init_per_cpu__gdt_page
> > */
> > static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
> > diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
> > index a0ea285878db..9ce0d9d268bb 100644
> > --- a/arch/x86/xen/xen-head.S
> > +++ b/arch/x86/xen/xen-head.S
> > @@ -53,14 +53,18 @@ SYM_CODE_START(startup_xen)
> >
> > /* Set up %gs.
> > *
> > - * The base of %gs always points to fixed_percpu_data. If the
> > - * stack protector canary is enabled, it is located at %gs:40.
> > * Note that, on SMP, the boot cpu uses init data section until
> > * the per cpu areas are set up.
> > */
> > movl $MSR_GS_BASE,%ecx
> > - movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
> > - cdq
> > +#ifdef CONFIG_SMP
> > + leaq __per_cpu_load(%rip), %rdx
> > + movl %edx, %eax
> > + shrq $32, %rdx
> > +#else
> > + xorl %eax, %eax
> > + xorl %edx, %edx
> > +#endif
> > wrmsr
> >
> > mov %rsi, %rdi
>
> Please note there is another access to $MSR_GS_BASE in
> /arch/x86/platform/pvh/head.S around line 98. Should this be fixed,
> too?

Thanks for pointing that out. I missed it since it didn't reference
fixed_percpu_data. Fixed for v2.

Brian Gerst