2024-04-22 08:48:01

by Mike Rapoport

[permalink] [raw]
Subject: [PATCH v5 01/15] arm64: module: remove unneeded call to kasan_alloc_module_shadow()

From: "Mike Rapoport (IBM)" <[email protected]>

Since commit f6f37d9320a1 ("arm64: select KASAN_VMALLOC for SW/HW_TAGS
modes") KASAN_VMALLOC is always enabled when KASAN is on. This means
that allocations in module_alloc() will be tracked by KASAN protection
for vmalloc() and that kasan_alloc_module_shadow() will be always an
empty inline and there is no point in calling it.

Drop meaningless call to kasan_alloc_module_shadow() from
module_alloc().

Signed-off-by: Mike Rapoport (IBM) <[email protected]>
---
arch/arm64/kernel/module.c | 5 -----
1 file changed, 5 deletions(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 47e0be610bb6..e92da4da1b2a 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -141,11 +141,6 @@ void *module_alloc(unsigned long size)
__func__);
}

- if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) {
- vfree(p);
- return NULL;
- }
-
/* Memory is intended to be executable, reset the pointer tag. */
return kasan_reset_tag(p);
}
--
2.43.0



2024-04-22 08:48:23

by Mike Rapoport

[permalink] [raw]
Subject: [PATCH v5 02/15] mips: module: rename MODULE_START to MODULES_VADDR

From: "Mike Rapoport (IBM)" <[email protected]>

and MODULE_END to MODULES_END to match other architectures that define
custom address space for modules.

Signed-off-by: Mike Rapoport (IBM) <[email protected]>
---
arch/mips/include/asm/pgtable-64.h | 4 ++--
arch/mips/kernel/module.c | 4 ++--
arch/mips/mm/fault.c | 4 ++--
3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 20ca48c1b606..c0109aff223b 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -147,8 +147,8 @@
#if defined(CONFIG_MODULES) && defined(KBUILD_64BIT_SYM32) && \
VMALLOC_START != CKSSEG
/* Load modules into 32bit-compatible segment. */
-#define MODULE_START CKSSEG
-#define MODULE_END (FIXADDR_START-2*PAGE_SIZE)
+#define MODULES_VADDR CKSSEG
+#define MODULES_END (FIXADDR_START-2*PAGE_SIZE)
#endif

#define pte_ERROR(e) \
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 7b2fbaa9cac5..9a6c96014904 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -31,10 +31,10 @@ struct mips_hi16 {
static LIST_HEAD(dbe_list);
static DEFINE_SPINLOCK(dbe_lock);

-#ifdef MODULE_START
+#ifdef MODULES_VADDR
void *module_alloc(unsigned long size)
{
- return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
+ return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
}
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index aaa9a242ebba..37fedeaca2e9 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -83,8 +83,8 @@ static void __do_page_fault(struct pt_regs *regs, unsigned long write,

if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END))
goto VMALLOC_FAULT_TARGET;
-#ifdef MODULE_START
- if (unlikely(address >= MODULE_START && address < MODULE_END))
+#ifdef MODULES_VADDR
+ if (unlikely(address >= MODULES_VADDR && address < MODULES_END))
goto VMALLOC_FAULT_TARGET;
#endif

--
2.43.0


2024-04-22 08:49:13

by Mike Rapoport

[permalink] [raw]
Subject: [PATCH v5 03/15] nios2: define virtual address space for modules

From: "Mike Rapoport (IBM)" <[email protected]>

nios2 uses kmalloc() to implement module_alloc() because CALL26/PCREL26
cannot reach all of vmalloc address space.

Define module space as 32MiB below the kernel base and switch nios2 to
use vmalloc for module allocations.

Suggested-by: Thomas Gleixner <[email protected]>
Acked-by: Dinh Nguyen <[email protected]>
Acked-by: Song Liu <[email protected]>
Signed-off-by: Mike Rapoport (IBM) <[email protected]>
---
arch/nios2/include/asm/pgtable.h | 5 ++++-
arch/nios2/kernel/module.c | 19 ++++---------------
2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index d052dfcbe8d3..eab87c6beacb 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -25,7 +25,10 @@
#include <asm-generic/pgtable-nopmd.h>

#define VMALLOC_START CONFIG_NIOS2_KERNEL_MMU_REGION_BASE
-#define VMALLOC_END (CONFIG_NIOS2_KERNEL_REGION_BASE - 1)
+#define VMALLOC_END (CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M - 1)
+
+#define MODULES_VADDR (CONFIG_NIOS2_KERNEL_REGION_BASE - SZ_32M)
+#define MODULES_END (CONFIG_NIOS2_KERNEL_REGION_BASE - 1)

struct mm_struct;

diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c
index 76e0a42d6e36..9c97b7513853 100644
--- a/arch/nios2/kernel/module.c
+++ b/arch/nios2/kernel/module.c
@@ -21,23 +21,12 @@

#include <asm/cacheflush.h>

-/*
- * Modules should NOT be allocated with kmalloc for (obvious) reasons.
- * But we do it for now to avoid relocation issues. CALL26/PCREL26 cannot reach
- * from 0x80000000 (vmalloc area) to 0xc00000000 (kernel) (kmalloc returns
- * addresses in 0xc0000000)
- */
void *module_alloc(unsigned long size)
{
- if (size == 0)
- return NULL;
- return kmalloc(size, GFP_KERNEL);
-}
-
-/* Free memory returned from module_alloc */
-void module_memfree(void *module_region)
-{
- kfree(module_region);
+ return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+ GFP_KERNEL, PAGE_KERNEL_EXEC,
+ VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+ __builtin_return_address(0));
}

int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
--
2.43.0


2024-04-22 08:49:16

by Mike Rapoport

[permalink] [raw]
Subject: [PATCH v5 04/15] sparc: simplify module_alloc()

From: "Mike Rapoport (IBM)" <[email protected]>

Define MODULES_VADDR and MODULES_END as VMALLOC_START and VMALLOC_END
for 32-bit and reduce module_alloc() to

__vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, ...)

as with the new defines the allocations becames identical for both 32
and 64 bits.

While on it, drop unsed include of <linux/jump_label.h>

Suggested-by: Sam Ravnborg <[email protected]>
Signed-off-by: Mike Rapoport (IBM) <[email protected]>
---
arch/sparc/include/asm/pgtable_32.h | 2 ++
arch/sparc/kernel/module.c | 25 +------------------------
2 files changed, 3 insertions(+), 24 deletions(-)

diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 9e85d57ac3f2..62bcafe38b1f 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -432,6 +432,8 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma,

#define VMALLOC_START _AC(0xfe600000,UL)
#define VMALLOC_END _AC(0xffc00000,UL)
+#define MODULES_VADDR VMALLOC_START
+#define MODULES_END VMALLOC_END

/* We provide our own get_unmapped_area to cope with VA holes for userland */
#define HAVE_ARCH_UNMAPPED_AREA
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index 66c45a2764bc..d37adb2a0b54 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -21,35 +21,12 @@

#include "entry.h"

-#ifdef CONFIG_SPARC64
-
-#include <linux/jump_label.h>
-
-static void *module_map(unsigned long size)
+void *module_alloc(unsigned long size)
{
- if (PAGE_ALIGN(size) > MODULES_LEN)
- return NULL;
return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
}
-#else
-static void *module_map(unsigned long size)
-{
- return vmalloc(size);
-}
-#endif /* CONFIG_SPARC64 */
-
-void *module_alloc(unsigned long size)
-{
- void *ret;
-
- ret = module_map(size);
- if (ret)
- memset(ret, 0, size);
-
- return ret;
-}

/* Make generic code ignore STT_REGISTER dummy undefined symbols. */
int module_frob_arch_sections(Elf_Ehdr *hdr,
--
2.43.0


2024-04-22 08:50:19

by Mike Rapoport

[permalink] [raw]
Subject: [PATCH v5 05/15] module: make module_memory_{alloc,free} more self-contained

From: "Mike Rapoport (IBM)" <[email protected]>

Move the logic related to the memory allocation and freeing into
module_memory_alloc() and module_memory_free().

Signed-off-by: Mike Rapoport (IBM) <[email protected]>
---
kernel/module/main.c | 64 +++++++++++++++++++++++++++-----------------
1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/kernel/module/main.c b/kernel/module/main.c
index e1e8a7a9d6c1..5b82b069e0d3 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1203,15 +1203,44 @@ static bool mod_mem_use_vmalloc(enum mod_mem_type type)
mod_mem_type_is_core_data(type);
}

-static void *module_memory_alloc(unsigned int size, enum mod_mem_type type)
+static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
{
+ unsigned int size = PAGE_ALIGN(mod->mem[type].size);
+ void *ptr;
+
+ mod->mem[type].size = size;
+
if (mod_mem_use_vmalloc(type))
- return vzalloc(size);
- return module_alloc(size);
+ ptr = vmalloc(size);
+ else
+ ptr = module_alloc(size);
+
+ if (!ptr)
+ return -ENOMEM;
+
+ /*
+ * The pointer to these blocks of memory are stored on the module
+ * structure and we keep that around so long as the module is
+ * around. We only free that memory when we unload the module.
+ * Just mark them as not being a leak then. The .init* ELF
+ * sections *do* get freed after boot so we *could* treat them
+ * slightly differently with kmemleak_ignore() and only grey
+ * them out as they work as typical memory allocations which
+ * *do* eventually get freed, but let's just keep things simple
+ * and avoid *any* false positives.
+ */
+ kmemleak_not_leak(ptr);
+
+ memset(ptr, 0, size);
+ mod->mem[type].base = ptr;
+
+ return 0;
}

-static void module_memory_free(void *ptr, enum mod_mem_type type)
+static void module_memory_free(struct module *mod, enum mod_mem_type type)
{
+ void *ptr = mod->mem[type].base;
+
if (mod_mem_use_vmalloc(type))
vfree(ptr);
else
@@ -1229,12 +1258,12 @@ static void free_mod_mem(struct module *mod)
/* Free lock-classes; relies on the preceding sync_rcu(). */
lockdep_free_key_range(mod_mem->base, mod_mem->size);
if (mod_mem->size)
- module_memory_free(mod_mem->base, type);
+ module_memory_free(mod, type);
}

/* MOD_DATA hosts mod, so free it at last */
lockdep_free_key_range(mod->mem[MOD_DATA].base, mod->mem[MOD_DATA].size);
- module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA);
+ module_memory_free(mod, MOD_DATA);
}

/* Free a module, remove from lists, etc. */
@@ -2225,7 +2254,6 @@ static int find_module_sections(struct module *mod, struct load_info *info)
static int move_module(struct module *mod, struct load_info *info)
{
int i;
- void *ptr;
enum mod_mem_type t = 0;
int ret = -ENOMEM;

@@ -2234,26 +2262,12 @@ static int move_module(struct module *mod, struct load_info *info)
mod->mem[type].base = NULL;
continue;
}
- mod->mem[type].size = PAGE_ALIGN(mod->mem[type].size);
- ptr = module_memory_alloc(mod->mem[type].size, type);
- /*
- * The pointer to these blocks of memory are stored on the module
- * structure and we keep that around so long as the module is
- * around. We only free that memory when we unload the module.
- * Just mark them as not being a leak then. The .init* ELF
- * sections *do* get freed after boot so we *could* treat them
- * slightly differently with kmemleak_ignore() and only grey
- * them out as they work as typical memory allocations which
- * *do* eventually get freed, but let's just keep things simple
- * and avoid *any* false positives.
- */
- kmemleak_not_leak(ptr);
- if (!ptr) {
+
+ ret = module_memory_alloc(mod, type);
+ if (ret) {
t = type;
goto out_enomem;
}
- memset(ptr, 0, mod->mem[type].size);
- mod->mem[type].base = ptr;
}

/* Transfer each section which specifies SHF_ALLOC */
@@ -2296,7 +2310,7 @@ static int move_module(struct module *mod, struct load_info *info)
return 0;
out_enomem:
for (t--; t >= 0; t--)
- module_memory_free(mod->mem[t].base, t);
+ module_memory_free(mod, t);
return ret;
}

--
2.43.0


2024-04-22 08:51:25

by Mike Rapoport

[permalink] [raw]
Subject: [PATCH v5 07/15] mm/execmem, arch: convert simple overrides of module_alloc to execmem

From: "Mike Rapoport (IBM)" <[email protected]>

Several architectures override module_alloc() only to define address
range for code allocations different than VMALLOC address space.

Provide a generic implementation in execmem that uses the parameters for
address space ranges, required alignment and page protections provided
by architectures.

The architectures must fill execmem_info structure and implement
execmem_arch_setup() that returns a pointer to that structure. This way the
execmem initialization won't be called from every architecture, but rather
from a central place, namely a core_initcall() in execmem.

The execmem provides execmem_alloc() API that wraps __vmalloc_node_range()
with the parameters defined by the architectures. If an architecture does
not implement execmem_arch_setup(), execmem_alloc() will fall back to
module_alloc().

Signed-off-by: Mike Rapoport (IBM) <[email protected]>
---
arch/loongarch/kernel/module.c | 19 +++++++--
arch/mips/kernel/module.c | 20 ++++++++--
arch/nios2/kernel/module.c | 21 +++++++---
arch/parisc/kernel/module.c | 24 +++++++----
arch/riscv/kernel/module.c | 24 +++++++----
arch/sparc/kernel/module.c | 20 ++++++++--
include/linux/execmem.h | 41 +++++++++++++++++++
mm/execmem.c | 73 ++++++++++++++++++++++++++++++++--
8 files changed, 208 insertions(+), 34 deletions(-)

diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
index c7d0338d12c1..ca6dd7ea1610 100644
--- a/arch/loongarch/kernel/module.c
+++ b/arch/loongarch/kernel/module.c
@@ -18,6 +18,7 @@
#include <linux/ftrace.h>
#include <linux/string.h>
#include <linux/kernel.h>
+#include <linux/execmem.h>
#include <asm/alternative.h>
#include <asm/inst.h>
#include <asm/unwind.h>
@@ -490,10 +491,22 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
return 0;
}

-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
{
- return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
- GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0));
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = MODULES_VADDR,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
}

static void module_init_ftrace_plt(const Elf_Ehdr *hdr,
diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c
index 9a6c96014904..59225a3cf918 100644
--- a/arch/mips/kernel/module.c
+++ b/arch/mips/kernel/module.c
@@ -20,6 +20,7 @@
#include <linux/kernel.h>
#include <linux/spinlock.h>
#include <linux/jump_label.h>
+#include <linux/execmem.h>
#include <asm/jump_label.h>

struct mips_hi16 {
@@ -32,11 +33,22 @@ static LIST_HEAD(dbe_list);
static DEFINE_SPINLOCK(dbe_lock);

#ifdef MODULES_VADDR
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
{
- return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
- GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = MODULES_VADDR,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
}
#endif

diff --git a/arch/nios2/kernel/module.c b/arch/nios2/kernel/module.c
index 9c97b7513853..0d1ee86631fc 100644
--- a/arch/nios2/kernel/module.c
+++ b/arch/nios2/kernel/module.c
@@ -18,15 +18,26 @@
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/kernel.h>
+#include <linux/execmem.h>

#include <asm/cacheflush.h>

-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
{
- return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
- GFP_KERNEL, PAGE_KERNEL_EXEC,
- VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
- __builtin_return_address(0));
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = MODULES_VADDR,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL_EXEC,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
}

int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index d214bbe3c2af..bdfa85e10c1b 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -49,6 +49,7 @@
#include <linux/bug.h>
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/execmem.h>

#include <asm/unwind.h>
#include <asm/sections.h>
@@ -173,15 +174,22 @@ static inline int reassemble_22(int as22)
((as22 & 0x0003ff) << 3));
}

-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
{
- /* using RWX means less protection for modules, but it's
- * easier than trying to map the text, data, init_text and
- * init_data correctly */
- return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL,
- PAGE_KERNEL_RWX, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = VMALLOC_START,
+ .end = VMALLOC_END,
+ .pgprot = PAGE_KERNEL_RWX,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
}

#ifndef CONFIG_64BIT
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 5e5a82644451..182904127ba0 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -14,6 +14,7 @@
#include <linux/vmalloc.h>
#include <linux/sizes.h>
#include <linux/pgtable.h>
+#include <linux/execmem.h>
#include <asm/alternative.h>
#include <asm/sections.h>

@@ -906,13 +907,22 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
}

#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
-void *module_alloc(unsigned long size)
-{
- return __vmalloc_node_range(size, 1, MODULES_VADDR,
- MODULES_END, GFP_KERNEL,
- PAGE_KERNEL, VM_FLUSH_RESET_PERMS,
- NUMA_NO_NODE,
- __builtin_return_address(0));
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = MODULES_VADDR,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
}
#endif

diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
index d37adb2a0b54..8b7ee45defc3 100644
--- a/arch/sparc/kernel/module.c
+++ b/arch/sparc/kernel/module.c
@@ -14,6 +14,7 @@
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/mm.h>
+#include <linux/execmem.h>

#include <asm/processor.h>
#include <asm/spitfire.h>
@@ -21,11 +22,22 @@

#include "entry.h"

-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
{
- return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
- GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = MODULES_VADDR,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
}

/* Make generic code ignore STT_REGISTER dummy undefined symbols. */
diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index 8eebc8ef66e7..ad5d99bb2871 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -33,6 +33,47 @@ enum execmem_type {
EXECMEM_TYPE_MAX,
};

+/**
+ * struct execmem_range - definition of an address space suitable for code and
+ * related data allocations
+ * @start: address space start
+ * @end: address space end (inclusive)
+ * @pgprot: permissions for memory in this address space
+ * @alignment: alignment required for text allocations
+ */
+struct execmem_range {
+ unsigned long start;
+ unsigned long end;
+ pgprot_t pgprot;
+ unsigned int alignment;
+};
+
+/**
+ * struct execmem_info - architecture parameters for code allocations
+ * @ranges: array of parameter sets defining architecture specific
+ * parameters for executable memory allocations. The ranges that are not
+ * explicitly initialized by an architecture use parameters defined for
+ * @EXECMEM_DEFAULT.
+ */
+struct execmem_info {
+ struct execmem_range ranges[EXECMEM_TYPE_MAX];
+};
+
+/**
+ * execmem_arch_setup - define parameters for allocations of executable memory
+ *
+ * A hook for architectures to define parameters for allocations of
+ * executable memory. These parameters should be filled into the
+ * @execmem_info structure.
+ *
+ * For architectures that do not implement this method a default set of
+ * parameters will be used
+ *
+ * Return: a structure defining architecture parameters and restrictions
+ * for allocations of executable memory
+ */
+struct execmem_info *execmem_arch_setup(void);
+
/**
* execmem_alloc - allocate executable memory
* @type: type of the allocation
diff --git a/mm/execmem.c b/mm/execmem.c
index 480adc69b20d..d062bc21c6b2 100644
--- a/mm/execmem.c
+++ b/mm/execmem.c
@@ -11,14 +11,30 @@
#include <linux/execmem.h>
#include <linux/moduleloader.h>

-static void *__execmem_alloc(size_t size)
+static struct execmem_info *execmem_info __ro_after_init;
+
+static void *__execmem_alloc(struct execmem_range *range, size_t size)
{
- return module_alloc(size);
+ unsigned long start = range->start;
+ unsigned long end = range->end;
+ unsigned int align = range->alignment;
+ pgprot_t pgprot = range->pgprot;
+
+ return __vmalloc_node_range(size, align, start, end,
+ GFP_KERNEL, pgprot, VM_FLUSH_RESET_PERMS,
+ NUMA_NO_NODE, __builtin_return_address(0));
}

void *execmem_alloc(enum execmem_type type, size_t size)
{
- return __execmem_alloc(size);
+ struct execmem_range *range;
+
+ if (!execmem_info)
+ return module_alloc(size);
+
+ range = &execmem_info->ranges[type];
+
+ return __execmem_alloc(range, size);
}

void execmem_free(void *ptr)
@@ -30,3 +46,54 @@ void execmem_free(void *ptr)
WARN_ON(in_interrupt());
vfree(ptr);
}
+
+static bool execmem_validate(struct execmem_info *info)
+{
+ struct execmem_range *r = &info->ranges[EXECMEM_DEFAULT];
+
+ if (!r->alignment || !r->start || !r->end || !pgprot_val(r->pgprot)) {
+ pr_crit("Invalid parameters for execmem allocator, module loading will fail");
+ return false;
+ }
+
+ return true;
+}
+
+static void execmem_init_missing(struct execmem_info *info)
+{
+ struct execmem_range *default_range = &info->ranges[EXECMEM_DEFAULT];
+
+ for (int i = EXECMEM_DEFAULT + 1; i < EXECMEM_TYPE_MAX; i++) {
+ struct execmem_range *r = &info->ranges[i];
+
+ if (!r->start) {
+ r->pgprot = default_range->pgprot;
+ r->alignment = default_range->alignment;
+ r->start = default_range->start;
+ r->end = default_range->end;
+ }
+ }
+}
+
+struct execmem_info * __weak execmem_arch_setup(void)
+{
+ return NULL;
+}
+
+static int __init execmem_init(void)
+{
+ struct execmem_info *info = execmem_arch_setup();
+
+ if (!info)
+ return 0;
+
+ if (!execmem_validate(info))
+ return -EINVAL;
+
+ execmem_init_missing(info);
+
+ execmem_info = info;
+
+ return 0;
+}
+core_initcall(execmem_init);
--
2.43.0


2024-04-22 08:51:42

by Mike Rapoport

[permalink] [raw]
Subject: [PATCH v5 06/15] mm: introduce execmem_alloc() and execmem_free()

From: "Mike Rapoport (IBM)" <[email protected]>

module_alloc() is used everywhere as a mean to allocate memory for code.

Beside being semantically wrong, this unnecessarily ties all subsystems
that need to allocate code, such as ftrace, kprobes and BPF to modules and
puts the burden of code allocation to the modules code.

Several architectures override module_alloc() because of various
constraints where the executable memory can be located and this causes
additional obstacles for improvements of code allocation.

Start splitting code allocation from modules by introducing execmem_alloc()
and execmem_free() APIs.

Initially, execmem_alloc() is a wrapper for module_alloc() and
execmem_free() is a replacement of module_memfree() to allow updating all
call sites to use the new APIs.

Since architectures define different restrictions on placement,
permissions, alignment and other parameters for memory that can be used by
different subsystems that allocate executable memory, execmem_alloc() takes
a type argument, that will be used to identify the calling subsystem and to
allow architectures define parameters for ranges suitable for that
subsystem.

No functional changes.

Signed-off-by: Mike Rapoport (IBM) <[email protected]>
Acked-by: Masami Hiramatsu (Google) <[email protected]>
---
arch/powerpc/kernel/kprobes.c | 6 ++--
arch/s390/kernel/ftrace.c | 4 +--
arch/s390/kernel/kprobes.c | 4 +--
arch/s390/kernel/module.c | 5 +--
arch/sparc/net/bpf_jit_comp_32.c | 8 ++---
arch/x86/kernel/ftrace.c | 6 ++--
arch/x86/kernel/kprobes/core.c | 4 +--
include/linux/execmem.h | 57 ++++++++++++++++++++++++++++++++
include/linux/moduleloader.h | 3 --
kernel/bpf/core.c | 6 ++--
kernel/kprobes.c | 8 ++---
kernel/module/Kconfig | 1 +
kernel/module/main.c | 25 +++++---------
mm/Kconfig | 3 ++
mm/Makefile | 1 +
mm/execmem.c | 32 ++++++++++++++++++
16 files changed, 128 insertions(+), 45 deletions(-)
create mode 100644 include/linux/execmem.h
create mode 100644 mm/execmem.c

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index bbca90a5e2ec..9fcd01bb2ce6 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -19,8 +19,8 @@
#include <linux/extable.h>
#include <linux/kdebug.h>
#include <linux/slab.h>
-#include <linux/moduleloader.h>
#include <linux/set_memory.h>
+#include <linux/execmem.h>
#include <asm/code-patching.h>
#include <asm/cacheflush.h>
#include <asm/sstep.h>
@@ -130,7 +130,7 @@ void *alloc_insn_page(void)
{
void *page;

- page = module_alloc(PAGE_SIZE);
+ page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;

@@ -142,7 +142,7 @@ void *alloc_insn_page(void)
}
return page;
error:
- module_memfree(page);
+ execmem_free(page);
return NULL;
}

diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index c46381ea04ec..798249ef5646 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -7,13 +7,13 @@
* Author(s): Martin Schwidefsky <[email protected]>
*/

-#include <linux/moduleloader.h>
#include <linux/hardirq.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/kprobes.h>
+#include <linux/execmem.h>
#include <trace/syscall.h>
#include <asm/asm-offsets.h>
#include <asm/text-patching.h>
@@ -220,7 +220,7 @@ static int __init ftrace_plt_init(void)
{
const char *start, *end;

- ftrace_plt = module_alloc(PAGE_SIZE);
+ ftrace_plt = execmem_alloc(EXECMEM_FTRACE, PAGE_SIZE);
if (!ftrace_plt)
panic("cannot allocate ftrace plt\n");

diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index f0cf20d4b3c5..3c1b1be744de 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -9,7 +9,6 @@

#define pr_fmt(fmt) "kprobes: " fmt

-#include <linux/moduleloader.h>
#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/preempt.h>
@@ -21,6 +20,7 @@
#include <linux/slab.h>
#include <linux/hardirq.h>
#include <linux/ftrace.h>
+#include <linux/execmem.h>
#include <asm/set_memory.h>
#include <asm/sections.h>
#include <asm/dis.h>
@@ -38,7 +38,7 @@ void *alloc_insn_page(void)
{
void *page;

- page = module_alloc(PAGE_SIZE);
+ page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;
set_memory_rox((unsigned long)page, 1);
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 42215f9404af..ac97a905e8cd 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -21,6 +21,7 @@
#include <linux/moduleloader.h>
#include <linux/bug.h>
#include <linux/memory.h>
+#include <linux/execmem.h>
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
#include <asm/facility.h>
@@ -76,7 +77,7 @@ void *module_alloc(unsigned long size)
#ifdef CONFIG_FUNCTION_TRACER
void module_arch_cleanup(struct module *mod)
{
- module_memfree(mod->arch.trampolines_start);
+ execmem_free(mod->arch.trampolines_start);
}
#endif

@@ -510,7 +511,7 @@ static int module_alloc_ftrace_hotpatch_trampolines(struct module *me,

size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
numpages = DIV_ROUND_UP(size, PAGE_SIZE);
- start = module_alloc(numpages * PAGE_SIZE);
+ start = execmem_alloc(EXECMEM_FTRACE, numpages * PAGE_SIZE);
if (!start)
return -ENOMEM;
set_memory_rox((unsigned long)start, numpages);
diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c
index da2df1e84ed4..bda2dbd3f4c5 100644
--- a/arch/sparc/net/bpf_jit_comp_32.c
+++ b/arch/sparc/net/bpf_jit_comp_32.c
@@ -1,10 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/moduleloader.h>
#include <linux/workqueue.h>
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/cache.h>
#include <linux/if_vlan.h>
+#include <linux/execmem.h>

#include <asm/cacheflush.h>
#include <asm/ptrace.h>
@@ -713,7 +713,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf];
if (unlikely(proglen + ilen > oldproglen)) {
pr_err("bpb_jit_compile fatal error\n");
kfree(addrs);
- module_memfree(image);
+ execmem_free(image);
return;
}
memcpy(image + proglen, temp, ilen);
@@ -736,7 +736,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf];
break;
}
if (proglen == oldproglen) {
- image = module_alloc(proglen);
+ image = execmem_alloc(EXECMEM_BPF, proglen);
if (!image)
goto out;
}
@@ -758,7 +758,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf];
void bpf_jit_free(struct bpf_prog *fp)
{
if (fp->jited)
- module_memfree(fp->bpf_func);
+ execmem_free(fp->bpf_func);

bpf_prog_unlock_free(fp);
}
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 70139d9d2e01..c8ddb7abda7c 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -25,6 +25,7 @@
#include <linux/memory.h>
#include <linux/vmalloc.h>
#include <linux/set_memory.h>
+#include <linux/execmem.h>

#include <trace/syscall.h>

@@ -261,15 +262,14 @@ void arch_ftrace_update_code(int command)
#ifdef CONFIG_X86_64

#ifdef CONFIG_MODULES
-#include <linux/moduleloader.h>
/* Module allocation simplifies allocating memory for code */
static inline void *alloc_tramp(unsigned long size)
{
- return module_alloc(size);
+ return execmem_alloc(EXECMEM_FTRACE, size);
}
static inline void tramp_free(void *tramp)
{
- module_memfree(tramp);
+ execmem_free(tramp);
}
#else
/* Trampolines can only be created if modules are supported */
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index d0e49bd7c6f3..72e6a45e7ec2 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -40,12 +40,12 @@
#include <linux/kgdb.h>
#include <linux/ftrace.h>
#include <linux/kasan.h>
-#include <linux/moduleloader.h>
#include <linux/objtool.h>
#include <linux/vmalloc.h>
#include <linux/pgtable.h>
#include <linux/set_memory.h>
#include <linux/cfi.h>
+#include <linux/execmem.h>

#include <asm/text-patching.h>
#include <asm/cacheflush.h>
@@ -495,7 +495,7 @@ void *alloc_insn_page(void)
{
void *page;

- page = module_alloc(PAGE_SIZE);
+ page = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
if (!page)
return NULL;

diff --git a/include/linux/execmem.h b/include/linux/execmem.h
new file mode 100644
index 000000000000..8eebc8ef66e7
--- /dev/null
+++ b/include/linux/execmem.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_EXECMEM_ALLOC_H
+#define _LINUX_EXECMEM_ALLOC_H
+
+#include <linux/types.h>
+#include <linux/moduleloader.h>
+
+/**
+ * enum execmem_type - types of executable memory ranges
+ *
+ * There are several subsystems that allocate executable memory.
+ * Architectures define different restrictions on placement,
+ * permissions, alignment and other parameters for memory that can be used
+ * by these subsystems.
+ * Types in this enum identify subsystems that allocate executable memory
+ * and let architectures define parameters for ranges suitable for
+ * allocations by each subsystem.
+ *
+ * @EXECMEM_DEFAULT: default parameters that would be used for types that
+ * are not explicitly defined.
+ * @EXECMEM_MODULE_TEXT: parameters for module text sections
+ * @EXECMEM_KPROBES: parameters for kprobes
+ * @EXECMEM_FTRACE: parameters for ftrace
+ * @EXECMEM_BPF: parameters for BPF
+ * @EXECMEM_TYPE_MAX:
+ */
+enum execmem_type {
+ EXECMEM_DEFAULT,
+ EXECMEM_MODULE_TEXT = EXECMEM_DEFAULT,
+ EXECMEM_KPROBES,
+ EXECMEM_FTRACE,
+ EXECMEM_BPF,
+ EXECMEM_TYPE_MAX,
+};
+
+/**
+ * execmem_alloc - allocate executable memory
+ * @type: type of the allocation
+ * @size: how many bytes of memory are required
+ *
+ * Allocates memory that will contain executable code, either generated or
+ * loaded from kernel modules.
+ *
+ * The memory will have protections defined by architecture for executable
+ * region of the @type.
+ *
+ * Return: a pointer to the allocated memory or %NULL
+ */
+void *execmem_alloc(enum execmem_type type, size_t size);
+
+/**
+ * execmem_free - free executable memory
+ * @ptr: pointer to the memory that should be freed
+ */
+void execmem_free(void *ptr);
+
+#endif /* _LINUX_EXECMEM_ALLOC_H */
diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
index 89b1e0ed9811..a3b8caee9405 100644
--- a/include/linux/moduleloader.h
+++ b/include/linux/moduleloader.h
@@ -29,9 +29,6 @@ unsigned int arch_mod_section_prepend(struct module *mod, unsigned int section);
sections. Returns NULL on failure. */
void *module_alloc(unsigned long size);

-/* Free memory returned from module_alloc. */
-void module_memfree(void *module_region);
-
/* Determines if the section name is an init section (that is only used during
* module loading).
*/
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 696bc55de8e8..75a54024e2f4 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -22,7 +22,6 @@
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
#include <linux/random.h>
-#include <linux/moduleloader.h>
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/objtool.h>
@@ -37,6 +36,7 @@
#include <linux/nospec.h>
#include <linux/bpf_mem_alloc.h>
#include <linux/memcontrol.h>
+#include <linux/execmem.h>

#include <asm/barrier.h>
#include <asm/unaligned.h>
@@ -1050,12 +1050,12 @@ void bpf_jit_uncharge_modmem(u32 size)

void *__weak bpf_jit_alloc_exec(unsigned long size)
{
- return module_alloc(size);
+ return execmem_alloc(EXECMEM_BPF, size);
}

void __weak bpf_jit_free_exec(void *addr)
{
- module_memfree(addr);
+ execmem_free(addr);
}

struct bpf_binary_header *
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 65adc815fc6e..ddd7cdc16edf 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -26,7 +26,6 @@
#include <linux/slab.h>
#include <linux/stddef.h>
#include <linux/export.h>
-#include <linux/moduleloader.h>
#include <linux/kallsyms.h>
#include <linux/freezer.h>
#include <linux/seq_file.h>
@@ -39,6 +38,7 @@
#include <linux/jump_label.h>
#include <linux/static_call.h>
#include <linux/perf_event.h>
+#include <linux/execmem.h>

#include <asm/sections.h>
#include <asm/cacheflush.h>
@@ -113,17 +113,17 @@ enum kprobe_slot_state {
void __weak *alloc_insn_page(void)
{
/*
- * Use module_alloc() so this page is within +/- 2GB of where the
+ * Use execmem_alloc() so this page is within +/- 2GB of where the
* kernel image and loaded module images reside. This is required
* for most of the architectures.
* (e.g. x86-64 needs this to handle the %rip-relative fixups.)
*/
- return module_alloc(PAGE_SIZE);
+ return execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE);
}

static void free_insn_page(void *page)
{
- module_memfree(page);
+ execmem_free(page);
}

struct kprobe_insn_cache kprobe_insn_slots = {
diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
index f3e0329337f6..744383c1eed1 100644
--- a/kernel/module/Kconfig
+++ b/kernel/module/Kconfig
@@ -2,6 +2,7 @@
menuconfig MODULES
bool "Enable loadable module support"
modules
+ select EXECMEM
help
Kernel modules are small pieces of compiled code which can
be inserted in the running kernel, rather than being
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 5b82b069e0d3..d56b7df0cbb6 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -57,6 +57,7 @@
#include <linux/audit.h>
#include <linux/cfi.h>
#include <linux/debugfs.h>
+#include <linux/execmem.h>
#include <uapi/linux/module.h>
#include "internal.h"

@@ -1179,16 +1180,6 @@ resolve_symbol_wait(struct module *mod,
return ksym;
}

-void __weak module_memfree(void *module_region)
-{
- /*
- * This memory may be RO, and freeing RO memory in an interrupt is not
- * supported by vmalloc.
- */
- WARN_ON(in_interrupt());
- vfree(module_region);
-}
-
void __weak module_arch_cleanup(struct module *mod)
{
}
@@ -1213,7 +1204,7 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
if (mod_mem_use_vmalloc(type))
ptr = vmalloc(size);
else
- ptr = module_alloc(size);
+ ptr = execmem_alloc(EXECMEM_MODULE_TEXT, size);

if (!ptr)
return -ENOMEM;
@@ -1244,7 +1235,7 @@ static void module_memory_free(struct module *mod, enum mod_mem_type type)
if (mod_mem_use_vmalloc(type))
vfree(ptr);
else
- module_memfree(ptr);
+ execmem_free(ptr);
}

static void free_mod_mem(struct module *mod)
@@ -2496,9 +2487,9 @@ static void do_free_init(struct work_struct *w)

llist_for_each_safe(pos, n, list) {
initfree = container_of(pos, struct mod_initfree, node);
- module_memfree(initfree->init_text);
- module_memfree(initfree->init_data);
- module_memfree(initfree->init_rodata);
+ execmem_free(initfree->init_text);
+ execmem_free(initfree->init_data);
+ execmem_free(initfree->init_rodata);
kfree(initfree);
}
}
@@ -2608,10 +2599,10 @@ static noinline int do_init_module(struct module *mod)
* We want to free module_init, but be aware that kallsyms may be
* walking this with preempt disabled. In all the failure paths, we
* call synchronize_rcu(), but we don't want to slow down the success
- * path. module_memfree() cannot be called in an interrupt, so do the
+ * path. execmem_free() cannot be called in an interrupt, so do the
* work and call synchronize_rcu() in a work queue.
*
- * Note that module_alloc() on most architectures creates W+X page
+ * Note that execmem_alloc() on most architectures creates W+X page
* mappings which won't be cleaned up until do_free_init() runs. Any
* code such as mark_rodata_ro() which depends on those mappings to
* be cleaned up needs to sync with the queued work by invoking
diff --git a/mm/Kconfig b/mm/Kconfig
index b1448aa81e15..f08a216d4793 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1241,6 +1241,9 @@ config LOCK_MM_AND_FIND_VMA
config IOMMU_MM_DATA
bool

+config EXECMEM
+ bool
+
source "mm/damon/Kconfig"

endmenu
diff --git a/mm/Makefile b/mm/Makefile
index 4abb40b911ec..001336c91864 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -133,3 +133,4 @@ obj-$(CONFIG_IO_MAPPING) += io-mapping.o
obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o
obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o
obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o
+obj-$(CONFIG_EXECMEM) += execmem.o
diff --git a/mm/execmem.c b/mm/execmem.c
new file mode 100644
index 000000000000..480adc69b20d
--- /dev/null
+++ b/mm/execmem.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2002 Richard Henderson
+ * Copyright (C) 2001 Rusty Russell, 2002, 2010 Rusty Russell IBM.
+ * Copyright (C) 2023 Luis Chamberlain <[email protected]>
+ * Copyright (C) 2024 Mike Rapoport IBM.
+ */
+
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/execmem.h>
+#include <linux/moduleloader.h>
+
+static void *__execmem_alloc(size_t size)
+{
+ return module_alloc(size);
+}
+
+void *execmem_alloc(enum execmem_type type, size_t size)
+{
+ return __execmem_alloc(size);
+}
+
+void execmem_free(void *ptr)
+{
+ /*
+ * This memory may be RO, and freeing RO memory in an interrupt is not
+ * supported by vmalloc.
+ */
+ WARN_ON(in_interrupt());
+ vfree(ptr);
+}
--
2.43.0


2024-04-22 08:51:56

by Mike Rapoport

[permalink] [raw]
Subject: [PATCH v5 08/15] mm/execmem, arch: convert remaining overrides of module_alloc to execmem

From: "Mike Rapoport (IBM)" <[email protected]>

Extend execmem parameters to accommodate more complex overrides of
module_alloc() by architectures.

This includes specification of a fallback range required by arm, arm64
and powerpc, EXECMEM_MODULE_DATA type required by powerpc, support for
allocation of KASAN shadow required by s390 and x86 and support for
early initialization of execmem required by x86.

The core implementation of execmem_alloc() takes care of suppressing
warnings when the initial allocation fails but there is a fallback range
defined.

Signed-off-by: Mike Rapoport (IBM) <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/Kconfig | 6 +++
arch/arm/kernel/module.c | 41 ++++++++++-------
arch/arm64/kernel/module.c | 67 ++++++++++++++++++----------
arch/arm64/kernel/probes/kprobes.c | 7 ---
arch/arm64/net/bpf_jit_comp.c | 11 -----
arch/powerpc/kernel/module.c | 60 ++++++++++++++++---------
arch/s390/kernel/module.c | 54 ++++++++++-------------
arch/x86/Kconfig | 1 +
arch/x86/kernel/module.c | 70 ++++++++++--------------------
include/linux/execmem.h | 34 +++++++++++++++
include/linux/moduleloader.h | 12 -----
kernel/module/main.c | 26 +++--------
mm/execmem.c | 70 +++++++++++++++++++++++++-----
mm/mm_init.c | 2 +
14 files changed, 259 insertions(+), 202 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 65afb1de48b3..7006f71f0110 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -960,6 +960,12 @@ config ARCH_WANTS_MODULES_DATA_IN_VMALLOC
For architectures like powerpc/32 which have constraints on module
allocation and need to allocate module data outside of module area.

+config ARCH_WANTS_EXECMEM_EARLY
+ bool
+ help
+ For architectures that might allocate executable memory early on
+ boot, for instance ftrace on x86.
+
config HAVE_IRQ_EXIT_ON_IRQ_STACK
bool
help
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index e74d84f58b77..a98fdf6ff26c 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -16,6 +16,7 @@
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/gfp.h>
+#include <linux/execmem.h>

#include <asm/sections.h>
#include <asm/smp_plat.h>
@@ -34,23 +35,31 @@
#endif

#ifdef CONFIG_MMU
-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
{
- gfp_t gfp_mask = GFP_KERNEL;
- void *p;
-
- /* Silence the initial allocation */
- if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS))
- gfp_mask |= __GFP_NOWARN;
-
- p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
- gfp_mask, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
- if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
- return p;
- return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
+ unsigned long fallback_start = 0, fallback_end = 0;
+
+ if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS)) {
+ fallback_start = VMALLOC_START;
+ fallback_end = VMALLOC_END;
+ }
+
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = MODULES_VADDR,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL_EXEC,
+ .alignment = 1,
+ .fallback_start = fallback_start,
+ .fallback_end = fallback_end,
+ },
+ },
+ };
+
+ return &execmem_info;
}
#endif

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index e92da4da1b2a..a52240ea084b 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -20,6 +20,7 @@
#include <linux/random.h>
#include <linux/scs.h>
#include <linux/vmalloc.h>
+#include <linux/execmem.h>

#include <asm/alternative.h>
#include <asm/insn.h>
@@ -108,41 +109,59 @@ static int __init module_init_limits(void)

return 0;
}
-subsys_initcall(module_init_limits);

-void *module_alloc(unsigned long size)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
{
- void *p = NULL;
+ unsigned long fallback_start = 0, fallback_end = 0;
+ unsigned long start = 0, end = 0;
+
+ module_init_limits();

/*
* Where possible, prefer to allocate within direct branch range of the
* kernel such that no PLTs are necessary.
*/
if (module_direct_base) {
- p = __vmalloc_node_range(size, MODULE_ALIGN,
- module_direct_base,
- module_direct_base + SZ_128M,
- GFP_KERNEL | __GFP_NOWARN,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
- }
+ start = module_direct_base;
+ end = module_direct_base + SZ_128M;

- if (!p && module_plt_base) {
- p = __vmalloc_node_range(size, MODULE_ALIGN,
- module_plt_base,
- module_plt_base + SZ_2G,
- GFP_KERNEL | __GFP_NOWARN,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
- }
-
- if (!p) {
- pr_warn_ratelimited("%s: unable to allocate memory\n",
- __func__);
+ if (module_plt_base) {
+ fallback_start = module_plt_base;
+ fallback_end = module_plt_base + SZ_2G;
+ }
+ } else if (module_plt_base) {
+ start = module_plt_base;
+ end = module_plt_base + SZ_2G;
}

- /* Memory is intended to be executable, reset the pointer tag. */
- return kasan_reset_tag(p);
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = start,
+ .end = end,
+ .pgprot = PAGE_KERNEL,
+ .alignment = 1,
+ .fallback_start = fallback_start,
+ .fallback_end = fallback_end,
+ },
+ [EXECMEM_KPROBES] = {
+ .start = VMALLOC_START,
+ .end = VMALLOC_END,
+ .pgprot = PAGE_KERNEL_ROX,
+ .alignment = 1,
+ },
+ [EXECMEM_BPF] = {
+ .start = VMALLOC_START,
+ .end = VMALLOC_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
}

enum aarch64_reloc_op {
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 327855a11df2..4268678d0e86 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -129,13 +129,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
}

-void *alloc_insn_page(void)
-{
- return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
- NUMA_NO_NODE, __builtin_return_address(0));
-}
-
/* arm kprobe: install breakpoint in text */
void __kprobes arch_arm_kprobe(struct kprobe *p)
{
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 122021f9bdfc..456f5af239fc 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -1793,17 +1793,6 @@ u64 bpf_jit_alloc_exec_limit(void)
return VMALLOC_END - VMALLOC_START;
}

-void *bpf_jit_alloc_exec(unsigned long size)
-{
- /* Memory is intended to be executable, reset the pointer tag. */
- return kasan_reset_tag(vmalloc(size));
-}
-
-void bpf_jit_free_exec(void *addr)
-{
- return vfree(addr);
-}
-
/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
bool bpf_jit_supports_subprog_tailcalls(void)
{
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index f6d6ae0a1692..ac80559015a3 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -10,6 +10,7 @@
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/bug.h>
+#include <linux/execmem.h>
#include <asm/module.h>
#include <linux/uaccess.h>
#include <asm/firmware.h>
@@ -89,39 +90,56 @@ int module_finalize(const Elf_Ehdr *hdr,
return 0;
}

-static __always_inline void *
-__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
{
pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC;
- gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0);
+ unsigned long fallback_start = 0, fallback_end = 0;
+ unsigned long start, end;

/*
- * Don't do huge page allocations for modules yet until more testing
- * is done. STRICT_MODULE_RWX may require extra work to support this
- * too.
+ * BOOK3S_32 and 8xx define MODULES_VADDR for text allocations and
+ * allow allocating data in the entire vmalloc space
*/
- return __vmalloc_node_range(size, 1, start, end, gfp, prot,
- VM_FLUSH_RESET_PERMS,
- NUMA_NO_NODE, __builtin_return_address(0));
-}
-
-void *module_alloc(unsigned long size)
-{
#ifdef MODULES_VADDR
unsigned long limit = (unsigned long)_etext - SZ_32M;
- void *ptr = NULL;

BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);

/* First try within 32M limit from _etext to avoid branch trampolines */
- if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit)
- ptr = __module_alloc(size, limit, MODULES_END, true);
-
- if (!ptr)
- ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false);
+ if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) {
+ start = limit;
+ fallback_start = MODULES_VADDR;
+ fallback_end = MODULES_END;
+ } else {
+ start = MODULES_VADDR;
+ }

- return ptr;
+ end = MODULES_END;
#else
- return __module_alloc(size, VMALLOC_START, VMALLOC_END, false);
+ start = VMALLOC_START;
+ end = VMALLOC_END;
#endif
+
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = start,
+ .end = end,
+ .pgprot = prot,
+ .alignment = 1,
+ .fallback_start = fallback_start,
+ .fallback_end = fallback_end,
+ },
+ [EXECMEM_MODULE_DATA] = {
+ .start = VMALLOC_START,
+ .end = VMALLOC_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
}
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index ac97a905e8cd..7fee64fdc1bb 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -37,41 +37,31 @@

#define PLT_ENTRY_SIZE 22

-static unsigned long get_module_load_offset(void)
+static struct execmem_info execmem_info __ro_after_init;
+
+struct execmem_info __init *execmem_arch_setup(void)
{
- static DEFINE_MUTEX(module_kaslr_mutex);
- static unsigned long module_load_offset;
-
- if (!kaslr_enabled())
- return 0;
- /*
- * Calculate the module_load_offset the first time this code
- * is called. Once calculated it stays the same until reboot.
- */
- mutex_lock(&module_kaslr_mutex);
- if (!module_load_offset)
+ unsigned long module_load_offset = 0;
+ unsigned long start;
+
+ if (kaslr_enabled())
module_load_offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE;
- mutex_unlock(&module_kaslr_mutex);
- return module_load_offset;
-}

-void *module_alloc(unsigned long size)
-{
- gfp_t gfp_mask = GFP_KERNEL;
- void *p;
-
- if (PAGE_ALIGN(size) > MODULES_LEN)
- return NULL;
- p = __vmalloc_node_range(size, MODULE_ALIGN,
- MODULES_VADDR + get_module_load_offset(),
- MODULES_END, gfp_mask, PAGE_KERNEL,
- VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK,
- NUMA_NO_NODE, __builtin_return_address(0));
- if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
- vfree(p);
- return NULL;
- }
- return p;
+ start = MODULES_VADDR + module_load_offset;
+
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .flags = EXECMEM_KASAN_SHADOW,
+ .start = start,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = MODULE_ALIGN,
+ },
+ },
+ };
+
+ return &execmem_info;
}

#ifdef CONFIG_FUNCTION_TRACER
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4474bf32d0a4..3f5ba72c9480 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -135,6 +135,7 @@ config X86
select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if X86_64
select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP if X86_64
select ARCH_WANTS_THP_SWAP if X86_64
+ select ARCH_WANTS_EXECMEM_EARLY if EXECMEM
select ARCH_HAS_PARANOID_L1D_FLUSH
select BUILDTIME_TABLE_SORT
select CLKEVT_I8253
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index e18914c0e38a..45b1a7c03379 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -19,6 +19,7 @@
#include <linux/jump_label.h>
#include <linux/random.h>
#include <linux/memory.h>
+#include <linux/execmem.h>

#include <asm/text-patching.h>
#include <asm/page.h>
@@ -36,55 +37,30 @@ do { \
} while (0)
#endif

-#ifdef CONFIG_RANDOMIZE_BASE
-static unsigned long module_load_offset;
+static struct execmem_info execmem_info __ro_after_init;

-/* Mutex protects the module_load_offset. */
-static DEFINE_MUTEX(module_kaslr_mutex);
-
-static unsigned long int get_module_load_offset(void)
-{
- if (kaslr_enabled()) {
- mutex_lock(&module_kaslr_mutex);
- /*
- * Calculate the module_load_offset the first time this
- * code is called. Once calculated it stays the same until
- * reboot.
- */
- if (module_load_offset == 0)
- module_load_offset =
- get_random_u32_inclusive(1, 1024) * PAGE_SIZE;
- mutex_unlock(&module_kaslr_mutex);
- }
- return module_load_offset;
-}
-#else
-static unsigned long int get_module_load_offset(void)
+struct execmem_info __init *execmem_arch_setup(void)
{
- return 0;
-}
-#endif
-
-void *module_alloc(unsigned long size)
-{
- gfp_t gfp_mask = GFP_KERNEL;
- void *p;
-
- if (PAGE_ALIGN(size) > MODULES_LEN)
- return NULL;
-
- p = __vmalloc_node_range(size, MODULE_ALIGN,
- MODULES_VADDR + get_module_load_offset(),
- MODULES_END, gfp_mask, PAGE_KERNEL,
- VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK,
- NUMA_NO_NODE, __builtin_return_address(0));
-
- if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
- vfree(p);
- return NULL;
- }
-
- return p;
+ unsigned long start, offset = 0;
+
+ if (kaslr_enabled())
+ offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE;
+
+ start = MODULES_VADDR + offset;
+
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .flags = EXECMEM_KASAN_SHADOW,
+ .start = start,
+ .end = MODULES_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = MODULE_ALIGN,
+ },
+ },
+ };
+
+ return &execmem_info;
}

#ifdef CONFIG_X86_32
diff --git a/include/linux/execmem.h b/include/linux/execmem.h
index ad5d99bb2871..36686f013cd2 100644
--- a/include/linux/execmem.h
+++ b/include/linux/execmem.h
@@ -5,6 +5,14 @@
#include <linux/types.h>
#include <linux/moduleloader.h>

+#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
+ !defined(CONFIG_KASAN_VMALLOC)
+#include <linux/kasan.h>
+#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
+#else
+#define MODULE_ALIGN PAGE_SIZE
+#endif
+
/**
* enum execmem_type - types of executable memory ranges
*
@@ -22,6 +30,7 @@
* @EXECMEM_KPROBES: parameters for kprobes
* @EXECMEM_FTRACE: parameters for ftrace
* @EXECMEM_BPF: parameters for BPF
+ * @EXECMEM_MODULE_DATA: parameters for module data sections
* @EXECMEM_TYPE_MAX:
*/
enum execmem_type {
@@ -30,22 +39,38 @@ enum execmem_type {
EXECMEM_KPROBES,
EXECMEM_FTRACE,
EXECMEM_BPF,
+ EXECMEM_MODULE_DATA,
EXECMEM_TYPE_MAX,
};

+/**
+ * enum execmem_range_flags - options for executable memory allocations
+ * @EXECMEM_KASAN_SHADOW: allocate kasan shadow
+ */
+enum execmem_range_flags {
+ EXECMEM_KASAN_SHADOW = (1 << 0),
+};
+
/**
* struct execmem_range - definition of an address space suitable for code and
* related data allocations
* @start: address space start
* @end: address space end (inclusive)
+ * @fallback_start: start of the secondary address space range for fallback
+ * allocations on architectures that require it
+ * @fallback_end: start of the secondary address space (inclusive)
* @pgprot: permissions for memory in this address space
* @alignment: alignment required for text allocations
+ * @flags: options for memory allocations for this range
*/
struct execmem_range {
unsigned long start;
unsigned long end;
+ unsigned long fallback_start;
+ unsigned long fallback_end;
pgprot_t pgprot;
unsigned int alignment;
+ enum execmem_range_flags flags;
};

/**
@@ -82,6 +107,9 @@ struct execmem_info *execmem_arch_setup(void);
* Allocates memory that will contain executable code, either generated or
* loaded from kernel modules.
*
+ * Allocates memory that will contain data coupled with executable code,
+ * like data sections in kernel modules.
+ *
* The memory will have protections defined by architecture for executable
* region of the @type.
*
@@ -95,4 +123,10 @@ void *execmem_alloc(enum execmem_type type, size_t size);
*/
void execmem_free(void *ptr);

+#ifdef CONFIG_ARCH_WANTS_EXECMEM_EARLY
+void execmem_early_init(void);
+#else
+static inline void execmem_early_init(void) {}
+#endif
+
#endif /* _LINUX_EXECMEM_ALLOC_H */
diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
index a3b8caee9405..e395461d59e5 100644
--- a/include/linux/moduleloader.h
+++ b/include/linux/moduleloader.h
@@ -25,10 +25,6 @@ int module_frob_arch_sections(Elf_Ehdr *hdr,
/* Additional bytes needed by arch in front of individual sections */
unsigned int arch_mod_section_prepend(struct module *mod, unsigned int section);

-/* Allocator used for allocating struct module, core sections and init
- sections. Returns NULL on failure. */
-void *module_alloc(unsigned long size);
-
/* Determines if the section name is an init section (that is only used during
* module loading).
*/
@@ -126,12 +122,4 @@ void module_arch_cleanup(struct module *mod);
/* Any cleanup before freeing mod->module_init */
void module_arch_freeing_init(struct module *mod);

-#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
- !defined(CONFIG_KASAN_VMALLOC)
-#include <linux/kasan.h>
-#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
-#else
-#define MODULE_ALIGN PAGE_SIZE
-#endif
-
#endif
diff --git a/kernel/module/main.c b/kernel/module/main.c
index d56b7df0cbb6..91e185607d4b 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1188,24 +1188,20 @@ void __weak module_arch_freeing_init(struct module *mod)
{
}

-static bool mod_mem_use_vmalloc(enum mod_mem_type type)
-{
- return IS_ENABLED(CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC) &&
- mod_mem_type_is_core_data(type);
-}
-
static int module_memory_alloc(struct module *mod, enum mod_mem_type type)
{
unsigned int size = PAGE_ALIGN(mod->mem[type].size);
+ enum execmem_type execmem_type;
void *ptr;

mod->mem[type].size = size;

- if (mod_mem_use_vmalloc(type))
- ptr = vmalloc(size);
+ if (mod_mem_type_is_data(type))
+ execmem_type = EXECMEM_MODULE_DATA;
else
- ptr = execmem_alloc(EXECMEM_MODULE_TEXT, size);
+ execmem_type = EXECMEM_MODULE_TEXT;

+ ptr = execmem_alloc(execmem_type, size);
if (!ptr)
return -ENOMEM;

@@ -1232,10 +1228,7 @@ static void module_memory_free(struct module *mod, enum mod_mem_type type)
{
void *ptr = mod->mem[type].base;

- if (mod_mem_use_vmalloc(type))
- vfree(ptr);
- else
- execmem_free(ptr);
+ execmem_free(ptr);
}

static void free_mod_mem(struct module *mod)
@@ -1630,13 +1623,6 @@ static void free_modinfo(struct module *mod)
}
}

-void * __weak module_alloc(unsigned long size)
-{
- return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
- NUMA_NO_NODE, __builtin_return_address(0));
-}
-
bool __weak module_init_section(const char *name)
{
return strstarts(name, ".init");
diff --git a/mm/execmem.c b/mm/execmem.c
index d062bc21c6b2..33bf473e508e 100644
--- a/mm/execmem.c
+++ b/mm/execmem.c
@@ -12,27 +12,49 @@
#include <linux/moduleloader.h>

static struct execmem_info *execmem_info __ro_after_init;
+static struct execmem_info default_execmem_info __ro_after_init;

static void *__execmem_alloc(struct execmem_range *range, size_t size)
{
+ bool kasan = range->flags & EXECMEM_KASAN_SHADOW;
+ unsigned long vm_flags = VM_FLUSH_RESET_PERMS;
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_NOWARN;
unsigned long start = range->start;
unsigned long end = range->end;
unsigned int align = range->alignment;
pgprot_t pgprot = range->pgprot;
+ void *p;
+
+ if (kasan)
+ vm_flags |= VM_DEFER_KMEMLEAK;
+
+ p = __vmalloc_node_range(size, align, start, end, gfp_flags,
+ pgprot, vm_flags, NUMA_NO_NODE,
+ __builtin_return_address(0));
+ if (!p && range->fallback_start) {
+ start = range->fallback_start;
+ end = range->fallback_end;
+ p = __vmalloc_node_range(size, align, start, end, gfp_flags,
+ pgprot, vm_flags, NUMA_NO_NODE,
+ __builtin_return_address(0));
+ }
+
+ if (!p) {
+ pr_warn_ratelimited("execmem: unable to allocate memory\n");
+ return NULL;
+ }
+
+ if (kasan && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) {
+ vfree(p);
+ return NULL;
+ }

- return __vmalloc_node_range(size, align, start, end,
- GFP_KERNEL, pgprot, VM_FLUSH_RESET_PERMS,
- NUMA_NO_NODE, __builtin_return_address(0));
+ return kasan_reset_tag(p);
}

void *execmem_alloc(enum execmem_type type, size_t size)
{
- struct execmem_range *range;
-
- if (!execmem_info)
- return module_alloc(size);
-
- range = &execmem_info->ranges[type];
+ struct execmem_range *range = &execmem_info->ranges[type];

return __execmem_alloc(range, size);
}
@@ -67,10 +89,16 @@ static void execmem_init_missing(struct execmem_info *info)
struct execmem_range *r = &info->ranges[i];

if (!r->start) {
- r->pgprot = default_range->pgprot;
+ if (i == EXECMEM_MODULE_DATA)
+ r->pgprot = PAGE_KERNEL;
+ else
+ r->pgprot = default_range->pgprot;
r->alignment = default_range->alignment;
r->start = default_range->start;
r->end = default_range->end;
+ r->flags = default_range->flags;
+ r->fallback_start = default_range->fallback_start;
+ r->fallback_end = default_range->fallback_end;
}
}
}
@@ -80,12 +108,18 @@ struct execmem_info * __weak execmem_arch_setup(void)
return NULL;
}

-static int __init execmem_init(void)
+static int __init __execmem_init(void)
{
struct execmem_info *info = execmem_arch_setup();

- if (!info)
+ if (!info) {
+ info = execmem_info = &default_execmem_info;
+ info->ranges[EXECMEM_DEFAULT].start = VMALLOC_START;
+ info->ranges[EXECMEM_DEFAULT].end = VMALLOC_END;
+ info->ranges[EXECMEM_DEFAULT].pgprot = PAGE_KERNEL_EXEC;
+ info->ranges[EXECMEM_DEFAULT].alignment = 1;
return 0;
+ }

if (!execmem_validate(info))
return -EINVAL;
@@ -96,4 +130,16 @@ static int __init execmem_init(void)

return 0;
}
+
+#ifndef CONFIG_ARCH_WANTS_EXECMEM_EARLY
+static int __init execmem_init(void)
+{
+ return __execmem_init();
+}
core_initcall(execmem_init);
+#else
+void __init execmem_early_init(void)
+{
+ (void)__execmem_init();
+}
+#endif
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 549e76af8f82..dae777234a31 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -27,6 +27,7 @@
#include <linux/swap.h>
#include <linux/cma.h>
#include <linux/crash_dump.h>
+#include <linux/execmem.h>
#include "internal.h"
#include "slab.h"
#include "shuffle.h"
@@ -2793,4 +2794,5 @@ void __init mm_core_init(void)
pti_init();
kmsan_init_runtime();
mm_cache_init();
+ execmem_early_init();
}
--
2.43.0


2024-04-22 08:52:39

by Mike Rapoport

[permalink] [raw]
Subject: [PATCH v5 09/15] riscv: extend execmem_params for generated code allocations

From: "Mike Rapoport (IBM)" <[email protected]>

The memory allocations for kprobes and BPF on RISC-V are not placed in
the modules area and these custom allocations are implemented with
overrides of alloc_insn_page() and bpf_jit_alloc_exec().

Slightly reorder execmem_params initialization to support both 32 and 64
bit variants, define EXECMEM_KPROBES and EXECMEM_BPF ranges in
riscv::execmem_params and drop overrides of alloc_insn_page() and
bpf_jit_alloc_exec().

Signed-off-by: Mike Rapoport (IBM) <[email protected]>
Reviewed-by: Alexandre Ghiti <[email protected]>
---
arch/riscv/kernel/module.c | 28 +++++++++++++++++++++++++---
arch/riscv/kernel/probes/kprobes.c | 10 ----------
arch/riscv/net/bpf_jit_core.c | 13 -------------
3 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 182904127ba0..2ecbacbc9993 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -906,19 +906,41 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
return 0;
}

-#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
+#ifdef CONFIG_MMU
static struct execmem_info execmem_info __ro_after_init;

struct execmem_info __init *execmem_arch_setup(void)
{
+ unsigned long start, end;
+
+ if (IS_ENABLED(CONFIG_64BIT)) {
+ start = MODULES_VADDR;
+ end = MODULES_END;
+ } else {
+ start = VMALLOC_START;
+ end = VMALLOC_END;
+ }
+
execmem_info = (struct execmem_info){
.ranges = {
[EXECMEM_DEFAULT] = {
- .start = MODULES_VADDR,
- .end = MODULES_END,
+ .start = start,
+ .end = end,
.pgprot = PAGE_KERNEL,
.alignment = 1,
},
+ [EXECMEM_KPROBES] = {
+ .start = VMALLOC_START,
+ .end = VMALLOC_END,
+ .pgprot = PAGE_KERNEL_READ_EXEC,
+ .alignment = 1,
+ },
+ [EXECMEM_BPF] = {
+ .start = BPF_JIT_REGION_START,
+ .end = BPF_JIT_REGION_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = PAGE_SIZE,
+ },
},
};

diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index 2f08c14a933d..e64f2f3064eb 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -104,16 +104,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return 0;
}

-#ifdef CONFIG_MMU
-void *alloc_insn_page(void)
-{
- return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL, PAGE_KERNEL_READ_EXEC,
- VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
- __builtin_return_address(0));
-}
-#endif
-
/* install breakpoint in text */
void __kprobes arch_arm_kprobe(struct kprobe *p)
{
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 6b3acac30c06..e238fdbd5dbc 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -219,19 +219,6 @@ u64 bpf_jit_alloc_exec_limit(void)
return BPF_JIT_REGION_SIZE;
}

-void *bpf_jit_alloc_exec(unsigned long size)
-{
- return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
- BPF_JIT_REGION_END, GFP_KERNEL,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
-}
-
-void bpf_jit_free_exec(void *addr)
-{
- return vfree(addr);
-}
-
void *bpf_arch_text_copy(void *dst, void *src, size_t len)
{
int ret;
--
2.43.0