2022-01-24 18:57:45

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH 0/7] Allocate module text and data separately

This series allow architectures to request having modules data in
vmalloc area instead of module area.

This is required on powerpc book3s/32 in order to set data non
executable, because it is not possible to set executability on page
basis, this is done per 256 Mbytes segments. The module area has exec
right, vmalloc area has noexec.

This can also be useful on other powerpc/32 in order to maximize the
chance of code being close enough to kernel core to avoid branch
trampolines.

Christophe Leroy (7):
modules: Refactor within_module_core() and within_module_init()
modules: Add within_module_text() macro
modules: Always have struct mod_tree_root
modules: Prepare for handling several RB trees
modules: Introduce data_layout
modules: Add CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
powerpc: Select ARCH_WANTS_MODULES_DATA_IN_VMALLOC on book3s/32 and
8xx

arch/Kconfig | 6 ++
arch/powerpc/Kconfig | 1 +
include/linux/module.h | 38 ++++++-
kernel/debug/kdb/kdb_main.c | 10 +-
kernel/module.c | 207 ++++++++++++++++++++++++------------
5 files changed, 186 insertions(+), 76 deletions(-)

--
2.33.1


2022-01-24 18:57:46

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH 4/7] modules: Prepare for handling several RB trees

In order to separate text and data, we need to setup
two rb trees. So modify functions to give the tree
as a parameter.

Signed-off-by: Christophe Leroy <[email protected]>
---
kernel/module.c | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index 346bc2e7a150..051fecef416b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -159,14 +159,14 @@ static const struct latch_tree_ops mod_tree_ops = {
.comp = mod_tree_comp,
};

-static noinline void __mod_tree_insert(struct mod_tree_node *node)
+static noinline void __mod_tree_insert(struct mod_tree_node *node, struct mod_tree_root *tree)
{
- latch_tree_insert(&node->node, &mod_tree.root, &mod_tree_ops);
+ latch_tree_insert(&node->node, &tree->root, &mod_tree_ops);
}

-static void __mod_tree_remove(struct mod_tree_node *node)
+static void __mod_tree_remove(struct mod_tree_node *node, struct mod_tree_root *tree)
{
- latch_tree_erase(&node->node, &mod_tree.root, &mod_tree_ops);
+ latch_tree_erase(&node->node, &tree->root, &mod_tree_ops);
}

/*
@@ -178,28 +178,28 @@ static void mod_tree_insert(struct module *mod)
mod->core_layout.mtn.mod = mod;
mod->init_layout.mtn.mod = mod;

- __mod_tree_insert(&mod->core_layout.mtn);
+ __mod_tree_insert(&mod->core_layout.mtn, &mod_tree);
if (mod->init_layout.size)
- __mod_tree_insert(&mod->init_layout.mtn);
+ __mod_tree_insert(&mod->init_layout.mtn, &mod_tree);
}

static void mod_tree_remove_init(struct module *mod)
{
if (mod->init_layout.size)
- __mod_tree_remove(&mod->init_layout.mtn);
+ __mod_tree_remove(&mod->init_layout.mtn, &mod_tree);
}

static void mod_tree_remove(struct module *mod)
{
- __mod_tree_remove(&mod->core_layout.mtn);
+ __mod_tree_remove(&mod->core_layout.mtn, &mod_tree);
mod_tree_remove_init(mod);
}

-static struct module *mod_find(unsigned long addr)
+static struct module *mod_find(unsigned long addr, struct mod_tree_root *tree)
{
struct latch_tree_node *ltn;

- ltn = latch_tree_find((void *)addr, &mod_tree.root, &mod_tree_ops);
+ ltn = latch_tree_find((void *)addr, &tree->root, &mod_tree_ops);
if (!ltn)
return NULL;

@@ -212,7 +212,7 @@ static void mod_tree_insert(struct module *mod) { }
static void mod_tree_remove_init(struct module *mod) { }
static void mod_tree_remove(struct module *mod) { }

-static struct module *mod_find(unsigned long addr)
+static struct module *mod_find(unsigned long addr, struct mod_tree_root *tree)
{
struct module *mod;

@@ -231,22 +231,22 @@ static struct module *mod_find(unsigned long addr)
* Bounds of module text, for speeding up __module_address.
* Protected by module_mutex.
*/
-static void __mod_update_bounds(void *base, unsigned int size)
+static void __mod_update_bounds(void *base, unsigned int size, struct mod_tree_root *tree)
{
unsigned long min = (unsigned long)base;
unsigned long max = min + size;

- if (min < mod_tree.addr_min)
- mod_tree.addr_min = min;
- if (max > mod_tree.addr_max)
- mod_tree.addr_max = max;
+ if (min < tree->addr_min)
+ tree->addr_min = min;
+ if (max > tree->addr_max)
+ tree->addr_max = max;
}

static void mod_update_bounds(struct module *mod)
{
- __mod_update_bounds(mod->core_layout.base, mod->core_layout.size);
+ __mod_update_bounds(mod->core_layout.base, mod->core_layout.size, &mod_tree);
if (mod->init_layout.size)
- __mod_update_bounds(mod->init_layout.base, mod->init_layout.size);
+ __mod_update_bounds(mod->init_layout.base, mod->init_layout.size, &mod_tree);
}

#ifdef CONFIG_KGDB_KDB
@@ -4719,7 +4719,7 @@ struct module *__module_address(unsigned long addr)

module_assert_mutex_or_preempt();

- mod = mod_find(addr);
+ mod = mod_find(addr, &mod_tree);
if (mod) {
BUG_ON(!within_module(addr, mod));
if (mod->state == MODULE_STATE_UNFORMED)
--
2.33.1

2022-01-24 18:57:52

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH 5/7] modules: Introduce data_layout

In order to allow separation of data from text, add another layout,
called data_layout. For architectures requesting separation of text
and data, only text will go in core_layout and data will go in
data_layout.

For architectures which keep text and data together, make data_layout
an alias of core_layout, that way data_layout can be used for all
data manipulations, regardless of whether data is in core_layout or
data_layout.

Signed-off-by: Christophe Leroy <[email protected]>
---
kernel/module.c | 52 ++++++++++++++++++++++++++++---------------------
1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index 051fecef416b..de1a9de6a544 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -81,6 +81,8 @@
/* If this is set, the section belongs in the init part of the module */
#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))

+#define data_layout core_layout
+
/*
* Mutex protects:
* 1) List of modules (also safely readable with preempt_disable),
@@ -2012,19 +2014,20 @@ static void module_enable_ro(const struct module *mod, bool after_init)
set_vm_flush_reset_perms(mod->init_layout.base);
frob_text(&mod->core_layout, set_memory_ro);

- frob_rodata(&mod->core_layout, set_memory_ro);
+ frob_rodata(&mod->data_layout, set_memory_ro);
+
frob_text(&mod->init_layout, set_memory_ro);
frob_rodata(&mod->init_layout, set_memory_ro);

if (after_init)
- frob_ro_after_init(&mod->core_layout, set_memory_ro);
+ frob_ro_after_init(&mod->data_layout, set_memory_ro);
}

static void module_enable_nx(const struct module *mod)
{
- frob_rodata(&mod->core_layout, set_memory_nx);
- frob_ro_after_init(&mod->core_layout, set_memory_nx);
- frob_writable_data(&mod->core_layout, set_memory_nx);
+ frob_rodata(&mod->data_layout, set_memory_nx);
+ frob_ro_after_init(&mod->data_layout, set_memory_nx);
+ frob_writable_data(&mod->data_layout, set_memory_nx);
frob_rodata(&mod->init_layout, set_memory_nx);
frob_writable_data(&mod->init_layout, set_memory_nx);
}
@@ -2202,7 +2205,7 @@ static void free_module(struct module *mod)
percpu_modfree(mod);

/* Free lock-classes; relies on the preceding sync_rcu(). */
- lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size);
+ lockdep_free_key_range(mod->data_layout.base, mod->data_layout.size);

/* Finally, free the core (containing the module structure) */
module_memfree(mod->core_layout.base);
@@ -2449,7 +2452,10 @@ static void layout_sections(struct module *mod, struct load_info *info)
|| s->sh_entsize != ~0UL
|| module_init_layout_section(sname))
continue;
- s->sh_entsize = get_offset(mod, &mod->core_layout.size, s, i);
+ if (m)
+ s->sh_entsize = get_offset(mod, &mod->data_layout.size, s, i);
+ else
+ s->sh_entsize = get_offset(mod, &mod->core_layout.size, s, i);
pr_debug("\t%s\n", sname);
}
switch (m) {
@@ -2458,15 +2464,15 @@ static void layout_sections(struct module *mod, struct load_info *info)
mod->core_layout.text_size = mod->core_layout.size;
break;
case 1: /* RO: text and ro-data */
- mod->core_layout.size = debug_align(mod->core_layout.size);
- mod->core_layout.ro_size = mod->core_layout.size;
+ mod->data_layout.size = debug_align(mod->data_layout.size);
+ mod->data_layout.ro_size = mod->data_layout.size;
break;
case 2: /* RO after init */
- mod->core_layout.size = debug_align(mod->core_layout.size);
- mod->core_layout.ro_after_init_size = mod->core_layout.size;
+ mod->data_layout.size = debug_align(mod->data_layout.size);
+ mod->data_layout.ro_after_init_size = mod->data_layout.size;
break;
case 4: /* whole core */
- mod->core_layout.size = debug_align(mod->core_layout.size);
+ mod->data_layout.size = debug_align(mod->data_layout.size);
break;
}
}
@@ -2719,12 +2725,12 @@ static void layout_symtab(struct module *mod, struct load_info *info)
}

/* Append room for core symbols at end of core part. */
- info->symoffs = ALIGN(mod->core_layout.size, symsect->sh_addralign ?: 1);
- info->stroffs = mod->core_layout.size = info->symoffs + ndst * sizeof(Elf_Sym);
- mod->core_layout.size += strtab_size;
- info->core_typeoffs = mod->core_layout.size;
- mod->core_layout.size += ndst * sizeof(char);
- mod->core_layout.size = debug_align(mod->core_layout.size);
+ info->symoffs = ALIGN(mod->data_layout.size, symsect->sh_addralign ?: 1);
+ info->stroffs = mod->data_layout.size = info->symoffs + ndst * sizeof(Elf_Sym);
+ mod->data_layout.size += strtab_size;
+ info->core_typeoffs = mod->data_layout.size;
+ mod->data_layout.size += ndst * sizeof(char);
+ mod->data_layout.size = debug_align(mod->data_layout.size);

/* Put string table section at end of init part of module. */
strsect->sh_flags |= SHF_ALLOC;
@@ -2768,9 +2774,9 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
* Now populate the cut down core kallsyms for after init
* and set types up while we still have access to sections.
*/
- mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs;
- mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs;
- mod->core_kallsyms.typetab = mod->core_layout.base + info->core_typeoffs;
+ mod->core_kallsyms.symtab = dst = mod->data_layout.base + info->symoffs;
+ mod->core_kallsyms.strtab = s = mod->data_layout.base + info->stroffs;
+ mod->core_kallsyms.typetab = mod->data_layout.base + info->core_typeoffs;
src = mod->kallsyms->symtab;
for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) {
mod->kallsyms->typetab[i] = elf_type(src + i, info);
@@ -3462,6 +3468,8 @@ static int move_module(struct module *mod, struct load_info *info)
if (shdr->sh_entsize & INIT_OFFSET_MASK)
dest = mod->init_layout.base
+ (shdr->sh_entsize & ~INIT_OFFSET_MASK);
+ else if (!(shdr->sh_flags & SHF_EXECINSTR))
+ dest = mod->data_layout.base + shdr->sh_entsize;
else
dest = mod->core_layout.base + shdr->sh_entsize;

@@ -4167,7 +4175,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
mutex_unlock(&module_mutex);
free_module:
/* Free lock-classes; relies on the preceding sync_rcu() */
- lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size);
+ lockdep_free_key_range(mod->data_layout.base, mod->data_layout.size);

module_deallocate(mod, info);
free_copy:
--
2.33.1

2022-01-24 18:57:54

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH 6/7] modules: Add CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC

Add CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC to allow architectures
to request having modules data in vmalloc area instead of module area.

This is required on powerpc book3s/32 in order to set data non
executable, because it is not possible to set executability on page
basis, this is done per 256 Mbytes segments. The module area has exec
right, vmalloc area has noexec.

This can also be useful on other powerpc/32 in order to maximize the
chance of code being close enough to kernel core to avoid branch
trampolines.

Signed-off-by: Christophe Leroy <[email protected]>
Cc: Jason Wessel <[email protected]>
Cc: Daniel Thompson <[email protected]>
Cc: Douglas Anderson <[email protected]>
---
arch/Kconfig | 6 +++
include/linux/module.h | 8 ++++
kernel/debug/kdb/kdb_main.c | 10 ++++-
kernel/module.c | 73 ++++++++++++++++++++++++++++++++++++-
4 files changed, 93 insertions(+), 4 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 847fde3d22cd..ed6a5ab8796d 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -883,6 +883,12 @@ config MODULES_USE_ELF_REL
Modules only use ELF REL relocations. Modules with ELF RELA
relocations will give an error.

+config ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ bool
+ help
+ For architectures like powerpc/32 which have constraints on module
+ allocation and need to allocate module data outside of module area.
+
config HAVE_IRQ_EXIT_ON_IRQ_STACK
bool
help
diff --git a/include/linux/module.h b/include/linux/module.h
index fc7adb110a81..3d908bb7ed08 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -421,6 +421,9 @@ struct module {
/* Core layout: rbtree is accessed frequently, so keep together. */
struct module_layout core_layout __module_layout_align;
struct module_layout init_layout;
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ struct module_layout data_layout;
+#endif

/* Arch-specific module values */
struct mod_arch_specific arch;
@@ -592,7 +595,12 @@ static inline bool within_module_layout(unsigned long addr,
static inline bool within_module_core(unsigned long addr,
const struct module *mod)
{
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ return within_module_layout(addr, &mod->core_layout) ||
+ within_module_layout(addr, &mod->data_layout);
+#else
return within_module_layout(addr, &mod->core_layout);
+#endif
}

static inline bool within_module_init(unsigned long addr,
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 0852a537dad4..b09e92f2c78d 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2022,8 +2022,11 @@ static int kdb_lsmod(int argc, const char **argv)
if (mod->state == MODULE_STATE_UNFORMED)
continue;

- kdb_printf("%-20s%8u 0x%px ", mod->name,
- mod->core_layout.size, (void *)mod);
+ kdb_printf("%-20s%8u", mod->name, mod->core_layout.size);
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ kdb_printf("/%8u 0x%px ", mod->data_layout.size);
+#endif
+ kdb_printf(" 0x%px ", (void *)mod);
#ifdef CONFIG_MODULE_UNLOAD
kdb_printf("%4d ", module_refcount(mod));
#endif
@@ -2034,6 +2037,9 @@ static int kdb_lsmod(int argc, const char **argv)
else
kdb_printf(" (Live)");
kdb_printf(" 0x%px", mod->core_layout.base);
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ kdb_printf("/0x%px", mod->data_layout.base);
+#endif

#ifdef CONFIG_MODULE_UNLOAD
{
diff --git a/kernel/module.c b/kernel/module.c
index de1a9de6a544..53486a65750e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -81,7 +81,9 @@
/* If this is set, the section belongs in the init part of the module */
#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))

+#ifndef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
#define data_layout core_layout
+#endif

/*
* Mutex protects:
@@ -108,6 +110,12 @@ static struct mod_tree_root {
.addr_min = -1UL,
};

+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+static struct mod_tree_root mod_data_tree __cacheline_aligned = {
+ .addr_min = -1UL,
+};
+#endif
+
#ifdef CONFIG_MODULES_TREE_LOOKUP

/*
@@ -183,6 +191,11 @@ static void mod_tree_insert(struct module *mod)
__mod_tree_insert(&mod->core_layout.mtn, &mod_tree);
if (mod->init_layout.size)
__mod_tree_insert(&mod->init_layout.mtn, &mod_tree);
+
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ mod->data_layout.mtn.mod = mod;
+ __mod_tree_insert(&mod->data_layout.mtn, &mod_data_tree);
+#endif
}

static void mod_tree_remove_init(struct module *mod)
@@ -195,6 +208,9 @@ static void mod_tree_remove(struct module *mod)
{
__mod_tree_remove(&mod->core_layout.mtn, &mod_tree);
mod_tree_remove_init(mod);
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ __mod_tree_remove(&mod->core_layout.mtn, &mod_data_tree);
+#endif
}

static struct module *mod_find(unsigned long addr, struct mod_tree_root *tree)
@@ -249,6 +265,9 @@ static void mod_update_bounds(struct module *mod)
__mod_update_bounds(mod->core_layout.base, mod->core_layout.size, &mod_tree);
if (mod->init_layout.size)
__mod_update_bounds(mod->init_layout.base, mod->init_layout.size, &mod_tree);
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ __mod_update_bounds(mod->data_layout.base, mod->data_layout.size, &mod_data_tree);
+#endif
}

#ifdef CONFIG_KGDB_KDB
@@ -1179,6 +1198,17 @@ static ssize_t show_coresize(struct module_attribute *mattr,
static struct module_attribute modinfo_coresize =
__ATTR(coresize, 0444, show_coresize, NULL);

+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+static ssize_t show_datasize(struct module_attribute *mattr,
+ struct module_kobject *mk, char *buffer)
+{
+ return sprintf(buffer, "%u\n", mk->mod->data_layout.size);
+}
+
+static struct module_attribute modinfo_datasize =
+ __ATTR(datasize, 0444, show_datasize, NULL);
+#endif
+
static ssize_t show_initsize(struct module_attribute *mattr,
struct module_kobject *mk, char *buffer)
{
@@ -1207,6 +1237,9 @@ static struct module_attribute *modinfo_attrs[] = {
&modinfo_srcversion,
&modinfo_initstate,
&modinfo_coresize,
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ &modinfo_datasize,
+#endif
&modinfo_initsize,
&modinfo_taint,
#ifdef CONFIG_MODULE_UNLOAD
@@ -2209,6 +2242,9 @@ static void free_module(struct module *mod)

/* Finally, free the core (containing the module structure) */
module_memfree(mod->core_layout.base);
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ vfree(mod->data_layout.base);
+#endif
}

void *__symbol_get(const char *symbol)
@@ -3456,6 +3492,24 @@ static int move_module(struct module *mod, struct load_info *info)
} else
mod->init_layout.base = NULL;

+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ /* Do the allocs. */
+ ptr = vmalloc(mod->data_layout.size);
+ /*
+ * The pointer to this block is stored in the module structure
+ * which is inside the block. Just mark it as not being a
+ * leak.
+ */
+ kmemleak_not_leak(ptr);
+ if (!ptr) {
+ module_memfree(mod->core_layout.base);
+ module_memfree(mod->init_layout.base);
+ return -ENOMEM;
+ }
+
+ memset(ptr, 0, mod->data_layout.size);
+ mod->data_layout.base = ptr;
+#endif
/* Transfer each section which specifies SHF_ALLOC */
pr_debug("final section addresses:\n");
for (i = 0; i < info->hdr->e_shnum; i++) {
@@ -3631,6 +3685,9 @@ static void module_deallocate(struct module *mod, struct load_info *info)
module_arch_freeing_init(mod);
module_memfree(mod->init_layout.base);
module_memfree(mod->core_layout.base);
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ vfree(mod->data_layout.base);
+#endif
}

int __weak module_finalize(const Elf_Ehdr *hdr,
@@ -4597,8 +4654,13 @@ static int m_show(struct seq_file *m, void *p)
if (mod->state == MODULE_STATE_UNFORMED)
return 0;

+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ seq_printf(m, "%s %u", mod->name,
+ mod->init_layout.size + mod->core_layout.size + mod->data_layout.size);
+#else
seq_printf(m, "%s %u",
mod->name, mod->init_layout.size + mod->core_layout.size);
+#endif
print_unload_info(m, mod);

/* Informative for users. */
@@ -4721,13 +4783,20 @@ bool is_module_address(unsigned long addr)
struct module *__module_address(unsigned long addr)
{
struct module *mod;
+ struct mod_tree_root *tree;

- if (addr < mod_tree.addr_min || addr > mod_tree.addr_max)
+ if (addr >= mod_tree.addr_min && addr <= mod_tree.addr_max)
+ tree = &mod_tree;
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ else if (addr >= mod_data_tree.addr_min && addr <= mod_data_tree.addr_max)
+ tree = &mod_data_tree;
+#endif
+ else
return NULL;

module_assert_mutex_or_preempt();

- mod = mod_find(addr, &mod_tree);
+ mod = mod_find(addr, tree);
if (mod) {
BUG_ON(!within_module(addr, mod));
if (mod->state == MODULE_STATE_UNFORMED)
--
2.33.1

2022-01-24 18:57:57

by Christophe Leroy

[permalink] [raw]
Subject: [PATCH 7/7] powerpc: Select ARCH_WANTS_MODULES_DATA_IN_VMALLOC on book3s/32 and 8xx

book3s/32 and 8xx have a separate area for allocating modules,
defined by MODULES_VADDR / MODULES_END.

On book3s/32, it is not possible to protect against execution
on a page basis. A full 256M segment is either Exec or NoExec.
The module area is in an Exec segment while vmalloc area is
in a NoExec segment.

In order to protect module data against execution, select
ARCH_WANTS_MODULES_DATA_IN_VMALLOC.

For the 8xx (and possibly other 32 bits platform in the future),
there is no such constraint on Exec/NoExec protection, however
there is a critical distance between kernel functions and callers
that needs to remain below 32Mbytes in order to avoid costly
trampolines. By allocating data outside of module area, we
increase the chance for module text to remain within acceptable
distance from kernel core text.

So select ARCH_WANTS_MODULES_DATA_IN_VMALLOC for 8xx as well.

Signed-off-by: Christophe Leroy <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
---
arch/powerpc/Kconfig | 1 +
1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 0631c9241af3..0360d6438359 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -161,6 +161,7 @@ config PPC
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
select ARCH_WANT_LD_ORPHAN_WARN
+ select ARCH_WANTS_MODULES_DATA_IN_VMALLOC if PPC_BOOK3S_32 || PPC_8xx
select ARCH_WEAK_RELEASE_ACQUIRE
select BINFMT_ELF
select BUILDTIME_TABLE_SORT
--
2.33.1

2022-01-24 19:12:36

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 7/7] powerpc: Select ARCH_WANTS_MODULES_DATA_IN_VMALLOC on book3s/32 and 8xx

Hi Christophe,

I love your patch! Perhaps something to improve:

[auto build test WARNING on mcgrof/modules-next]
[also build test WARNING on powerpc/next linus/master jeyu/modules-next v5.17-rc1 next-20220124]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url: https://github.com/0day-ci/linux/commits/Christophe-Leroy/Allocate-module-text-and-data-separately/20220124-172517
base: https://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git modules-next
config: powerpc-allmodconfig (https://download.01.org/0day-ci/archive/20220124/[email protected]/config)
compiler: powerpc-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/2a5f7a254dd5c1efcfb852f5747632c85582016d
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Christophe-Leroy/Allocate-module-text-and-data-separately/20220124-172517
git checkout 2a5f7a254dd5c1efcfb852f5747632c85582016d
# save the config file to linux build tree
mkdir build_dir
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=powerpc SHELL=/bin/bash kernel/debug/kdb/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>

All warnings (new ones prefixed by >>):

kernel/debug/kdb/kdb_main.c: In function 'kdb_lsmod':
>> kernel/debug/kdb/kdb_main.c:2027:38: warning: format '%p' expects a matching 'void *' argument [-Wformat=]
2027 | kdb_printf("/%8u 0x%px ", mod->data_layout.size);
| ~^
| |
| void *


vim +2027 kernel/debug/kdb/kdb_main.c

5d5314d6795f3c1 Jason Wessel 2010-05-20 2006
5d5314d6795f3c1 Jason Wessel 2010-05-20 2007 #if defined(CONFIG_MODULES)
5d5314d6795f3c1 Jason Wessel 2010-05-20 2008 /*
5d5314d6795f3c1 Jason Wessel 2010-05-20 2009 * kdb_lsmod - This function implements the 'lsmod' command. Lists
5d5314d6795f3c1 Jason Wessel 2010-05-20 2010 * currently loaded kernel modules.
5d5314d6795f3c1 Jason Wessel 2010-05-20 2011 * Mostly taken from userland lsmod.
5d5314d6795f3c1 Jason Wessel 2010-05-20 2012 */
5d5314d6795f3c1 Jason Wessel 2010-05-20 2013 static int kdb_lsmod(int argc, const char **argv)
5d5314d6795f3c1 Jason Wessel 2010-05-20 2014 {
5d5314d6795f3c1 Jason Wessel 2010-05-20 2015 struct module *mod;
5d5314d6795f3c1 Jason Wessel 2010-05-20 2016
5d5314d6795f3c1 Jason Wessel 2010-05-20 2017 if (argc != 0)
5d5314d6795f3c1 Jason Wessel 2010-05-20 2018 return KDB_ARGCOUNT;
5d5314d6795f3c1 Jason Wessel 2010-05-20 2019
5d5314d6795f3c1 Jason Wessel 2010-05-20 2020 kdb_printf("Module Size modstruct Used by\n");
5d5314d6795f3c1 Jason Wessel 2010-05-20 2021 list_for_each_entry(mod, kdb_modules, list) {
0d21b0e3477395e Rusty Russell 2013-01-12 2022 if (mod->state == MODULE_STATE_UNFORMED)
0d21b0e3477395e Rusty Russell 2013-01-12 2023 continue;
5d5314d6795f3c1 Jason Wessel 2010-05-20 2024
299a20e0bead4b7 Christophe Leroy 2022-01-24 2025 kdb_printf("%-20s%8u", mod->name, mod->core_layout.size);
299a20e0bead4b7 Christophe Leroy 2022-01-24 2026 #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
299a20e0bead4b7 Christophe Leroy 2022-01-24 @2027 kdb_printf("/%8u 0x%px ", mod->data_layout.size);
299a20e0bead4b7 Christophe Leroy 2022-01-24 2028 #endif
299a20e0bead4b7 Christophe Leroy 2022-01-24 2029 kdb_printf(" 0x%px ", (void *)mod);
5d5314d6795f3c1 Jason Wessel 2010-05-20 2030 #ifdef CONFIG_MODULE_UNLOAD
d5db139ab376464 Rusty Russell 2015-01-22 2031 kdb_printf("%4d ", module_refcount(mod));
5d5314d6795f3c1 Jason Wessel 2010-05-20 2032 #endif
5d5314d6795f3c1 Jason Wessel 2010-05-20 2033 if (mod->state == MODULE_STATE_GOING)
5d5314d6795f3c1 Jason Wessel 2010-05-20 2034 kdb_printf(" (Unloading)");
5d5314d6795f3c1 Jason Wessel 2010-05-20 2035 else if (mod->state == MODULE_STATE_COMING)
5d5314d6795f3c1 Jason Wessel 2010-05-20 2036 kdb_printf(" (Loading)");
5d5314d6795f3c1 Jason Wessel 2010-05-20 2037 else
5d5314d6795f3c1 Jason Wessel 2010-05-20 2038 kdb_printf(" (Live)");
568fb6f42ac6851 Christophe Leroy 2018-09-27 2039 kdb_printf(" 0x%px", mod->core_layout.base);
299a20e0bead4b7 Christophe Leroy 2022-01-24 2040 #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
299a20e0bead4b7 Christophe Leroy 2022-01-24 2041 kdb_printf("/0x%px", mod->data_layout.base);
299a20e0bead4b7 Christophe Leroy 2022-01-24 2042 #endif
5d5314d6795f3c1 Jason Wessel 2010-05-20 2043

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]

2022-01-25 08:17:58

by Doug Anderson

[permalink] [raw]
Subject: Re: [PATCH 6/7] modules: Add CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC

Hi,

On Mon, Jan 24, 2022 at 1:22 AM Christophe Leroy
<[email protected]> wrote:
>
> --- a/kernel/debug/kdb/kdb_main.c
> +++ b/kernel/debug/kdb/kdb_main.c
> @@ -2022,8 +2022,11 @@ static int kdb_lsmod(int argc, const char **argv)
> if (mod->state == MODULE_STATE_UNFORMED)
> continue;
>
> - kdb_printf("%-20s%8u 0x%px ", mod->name,
> - mod->core_layout.size, (void *)mod);
> + kdb_printf("%-20s%8u", mod->name, mod->core_layout.size);
> +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
> + kdb_printf("/%8u 0x%px ", mod->data_layout.size);

Just counting percentages and arguments, it seems like something's
wrong in the above print statement.

-Doug

2022-01-25 08:56:43

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH 6/7] modules: Add CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC



Le 24/01/2022 à 22:43, Doug Anderson a écrit :
> Hi,
>
> On Mon, Jan 24, 2022 at 1:22 AM Christophe Leroy
> <[email protected]> wrote:
>>
>> --- a/kernel/debug/kdb/kdb_main.c
>> +++ b/kernel/debug/kdb/kdb_main.c
>> @@ -2022,8 +2022,11 @@ static int kdb_lsmod(int argc, const char **argv)
>> if (mod->state == MODULE_STATE_UNFORMED)
>> continue;
>>
>> - kdb_printf("%-20s%8u 0x%px ", mod->name,
>> - mod->core_layout.size, (void *)mod);
>> + kdb_printf("%-20s%8u", mod->name, mod->core_layout.size);
>> +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
>> + kdb_printf("/%8u 0x%px ", mod->data_layout.size);
>
> Just counting percentages and arguments, it seems like something's
> wrong in the above print statement.
>

Yes it seems, the build robot reported something here as well.

Thanks
Christophe

2022-01-26 09:46:15

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [PATCH 0/7] Allocate module text and data separately

On Mon, Jan 24, 2022 at 09:22:11AM +0000, Christophe Leroy wrote:
> This series allow architectures to request having modules data in
> vmalloc area instead of module area.
>
> This is required on powerpc book3s/32 in order to set data non
> executable, because it is not possible to set executability on page
> basis, this is done per 256 Mbytes segments. The module area has exec
> right, vmalloc area has noexec.
>
> This can also be useful on other powerpc/32 in order to maximize the
> chance of code being close enough to kernel core to avoid branch
> trampolines.

Am I understanding that this entire effort is for 32-bit powerpc?
If so, why such an interest in 32-bit these days?

Luis

2022-01-26 09:54:31

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [PATCH 6/7] modules: Add CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC

On Mon, Jan 24, 2022 at 09:22:34AM +0000, Christophe Leroy wrote:
> This can also be useful on other powerpc/32 in order to maximize the
> chance of code being close enough to kernel core to avoid branch
> trampolines.

Curious about all this branch trampoline talk. Do you have data to show
negative impact with things as-is?

Also, was powerpc/32 broken then without this? The commit log seems to
suggest so, but I don't think that's the case. How was this issue noticed?

Are there other future CPU families being planned where this is all true for
as well? Are they goin to be 32-bit as well?

Luis

2022-01-26 19:51:29

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH 0/7] Allocate module text and data separately



Le 25/01/2022 à 21:52, Luis Chamberlain a écrit :
> On Mon, Jan 24, 2022 at 09:22:11AM +0000, Christophe Leroy wrote:
>> This series allow architectures to request having modules data in
>> vmalloc area instead of module area.
>>
>> This is required on powerpc book3s/32 in order to set data non
>> executable, because it is not possible to set executability on page
>> basis, this is done per 256 Mbytes segments. The module area has exec
>> right, vmalloc area has noexec.
>>
>> This can also be useful on other powerpc/32 in order to maximize the
>> chance of code being close enough to kernel core to avoid branch
>> trampolines.
>
> Am I understanding that this entire effort is for 32-bit powerpc?
> If so, why such an interest in 32-bit these days?
>

32 bit powerpc processors are still manufactured and are widely used in
embedded products like internet boxes, small routers, etc ...
One of the reason is that there power consumption hence their heat
dissipation is way lower than 64 bits variants.

I found the effort quite small compared to the benefit it provides.

Christophe

2022-01-26 20:07:53

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH 6/7] modules: Add CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC



Le 25/01/2022 à 22:10, Luis Chamberlain a écrit :
> On Mon, Jan 24, 2022 at 09:22:34AM +0000, Christophe Leroy wrote:
>> This can also be useful on other powerpc/32 in order to maximize the
>> chance of code being close enough to kernel core to avoid branch
>> trampolines.
>
> Curious about all this branch trampoline talk. Do you have data to show
> negative impact with things as-is?

See
https://github.com/linuxppc/linux/commit/2ec13df167040cd153c25c4d96d0ffc573ac4c40

Or
https://github.com/linuxppc/linux/commit/7d485f647c1f4a6976264c90447fb0dbf07b111d


>
> Also, was powerpc/32 broken then without this? The commit log seems to
> suggest so, but I don't think that's the case. How was this issue noticed?


Your question is related to the trampoline topic or the exec/noexec
flagging ?

Regarding trampoline, everything is working OK. That's just cherry on
the cake, when putting data away you can have more code closer to the
kernel. But that would not have been a reason in itself for this series.

Regarding the exec/noexec discussion, it's a real issue. powerpc/32
doesn't honor page exec flag, so when you select STRICT_MODULES_RWX and
flag module data as no-exec, it remains executable. That's because
powerpc/32 MMU doesn't have a per page exec flag but only a per
256Mbytes segment exec flag.

Typical PPC32 layount:
0xf0000000-0xffffffff : VMALLOC AREA ==> NO EXEC
0xc0000000-0xefffffff : Linear kernel memory mapping
0xb0000000-0xbfffffff : MODULES AREA ==> EXEC
0x00000000-0xafffffff : User space ==> EXEC

So STRICT_MODULES_RWX is broken on some powerpc/32

>
> Are there other future CPU families being planned where this is all true for
> as well? Are they goin to be 32-bit as well?

Future I don't know.

Regarding the trampoline stuff, I see at least the following existing
architectures with a similar constraint:

ARM:

https://elixir.bootlin.com/linux/v5.16/source/arch/arm/include/asm/memory.h#L55

ARM even has a config item to allow trampolines or not. I might add the
same to powerpc to reduce number of pages used by modules.

https://elixir.bootlin.com/linux/v5.16/source/arch/arm/Kconfig#L1514

NDS32 has the constraint

https://elixir.bootlin.com/linux/v5.16/source/arch/nds32/include/asm/memory.h#L41

NIOS2 has the constraint, allthough they handled it in a different way:

https://elixir.bootlin.com/linux/v5.16/source/arch/nios2/kernel/module.c#L30



Even ARM64 benefits from modules closer to kernel:

https://elixir.bootlin.com/linux/v5.16/source/arch/arm64/Kconfig#L1848


Another future opportunity with the ability to allocate module parts
separately is the possibility to then use huge vmalloc mappings.

Today huge vmalloc mappings cannot be used for modules, see recent
discussion at
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/[email protected]/

Christophe

2022-01-28 08:41:51

by Miroslav Benes

[permalink] [raw]
Subject: Re: [PATCH 6/7] modules: Add CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC

> @@ -195,6 +208,9 @@ static void mod_tree_remove(struct module *mod)
> {
> __mod_tree_remove(&mod->core_layout.mtn, &mod_tree);
> mod_tree_remove_init(mod);
> +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
> + __mod_tree_remove(&mod->core_layout.mtn, &mod_data_tree);

s/core_layout/data_layout/ ?

> +#endif
> }

Miroslav

2022-01-28 13:20:18

by Christophe Leroy

[permalink] [raw]
Subject: Re: [PATCH 6/7] modules: Add CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC



Le 27/01/2022 à 17:05, Miroslav Benes a écrit :
>> @@ -195,6 +208,9 @@ static void mod_tree_remove(struct module *mod)
>> {
>> __mod_tree_remove(&mod->core_layout.mtn, &mod_tree);
>> mod_tree_remove_init(mod);
>> +#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
>> + __mod_tree_remove(&mod->core_layout.mtn, &mod_data_tree);
>
> s/core_layout/data_layout/ ?

Oops, you are right. I should have awaited a few more hours before
sending v2.

Thanks

Christophe

2022-02-04 01:18:09

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [PATCH 6/7] modules: Add CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC

On Wed, Jan 26, 2022 at 06:38:30AM +0000, Christophe Leroy wrote:
>
>
> Le 25/01/2022 ? 22:10, Luis Chamberlain a ?crit?:
> > On Mon, Jan 24, 2022 at 09:22:34AM +0000, Christophe Leroy wrote:
> >> This can also be useful on other powerpc/32 in order to maximize the
> >> chance of code being close enough to kernel core to avoid branch
> >> trampolines.
> >
> > Curious about all this branch trampoline talk. Do you have data to show
> > negative impact with things as-is?
>
> See
> https://github.com/linuxppc/linux/commit/2ec13df167040cd153c25c4d96d0ffc573ac4c40
>
> Or
> https://github.com/linuxppc/linux/commit/7d485f647c1f4a6976264c90447fb0dbf07b111d


This was useful and fun to read, thanks.

> > Also, was powerpc/32 broken then without this? The commit log seems to
> > suggest so, but I don't think that's the case. How was this issue noticed?
>
>
> Your question is related to the trampoline topic or the exec/noexec
> flagging ?
>
> Regarding trampoline, everything is working OK. That's just cherry on
> the cake, when putting data away you can have more code closer to the
> kernel. But that would not have been a reason in itself for this series.
>
> Regarding the exec/noexec discussion, it's a real issue. powerpc/32
> doesn't honor page exec flag, so when you select STRICT_MODULES_RWX and
> flag module data as no-exec, it remains executable. That's because
> powerpc/32 MMU doesn't have a per page exec flag but only a per
> 256Mbytes segment exec flag.
>
> Typical PPC32 layount:
> 0xf0000000-0xffffffff : VMALLOC AREA ==> NO EXEC
> 0xc0000000-0xefffffff : Linear kernel memory mapping
> 0xb0000000-0xbfffffff : MODULES AREA ==> EXEC
> 0x00000000-0xafffffff : User space ==> EXEC
>
> So STRICT_MODULES_RWX is broken on some powerpc/32

You know, this is the sort of information that I think would be
very useful for the commit log. Can you ammend?

> >
> > Are there other future CPU families being planned where this is all true for
> > as well? Are they goin to be 32-bit as well?
>
> Future I don't know.
>
> Regarding the trampoline stuff, I see at least the following existing
> architectures with a similar constraint:
>
> ARM:
>
> https://elixir.bootlin.com/linux/v5.16/source/arch/arm/include/asm/memory.h#L55
>
> ARM even has a config item to allow trampolines or not. I might add the
> same to powerpc to reduce number of pages used by modules.
>
> https://elixir.bootlin.com/linux/v5.16/source/arch/arm/Kconfig#L1514
>
> NDS32 has the constraint
>
> https://elixir.bootlin.com/linux/v5.16/source/arch/nds32/include/asm/memory.h#L41
>
> NIOS2 has the constraint, allthough they handled it in a different way:
>
> https://elixir.bootlin.com/linux/v5.16/source/arch/nios2/kernel/module.c#L30
>
>
>
> Even ARM64 benefits from modules closer to kernel:
>
> https://elixir.bootlin.com/linux/v5.16/source/arch/arm64/Kconfig#L1848
>
>
> Another future opportunity with the ability to allocate module parts
> separately is the possibility to then use huge vmalloc mappings.
>
> Today huge vmalloc mappings cannot be used for modules, see recent
> discussion at
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/[email protected]/

Alrighty, this is sufficient information, thanks!

Luis