This series allow architectures to request having modules data in
vmalloc area instead of module area.
This is required on powerpc book3s/32 in order to set data non
executable, because it is not possible to set executability on page
basis, this is done per 256 Mbytes segments. The module area has exec
right, vmalloc area has noexec. Without this change module data
remains executable regardless of CONFIG_STRICT_MODULES_RWX.
This can also be useful on other powerpc/32 in order to maximize the
chance of code being close enough to kernel core to avoid branch
trampolines.
Changes in v2:
- Dropped first two patches which are not necessary. They may be added back later as a follow-up series.
- Fixed the printks in GDB
Christophe Leroy (5):
modules: Always have struct mod_tree_root
modules: Prepare for handling several RB trees
modules: Introduce data_layout
modules: Add CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
powerpc: Select ARCH_WANTS_MODULES_DATA_IN_VMALLOC on book3s/32 and
8xx
arch/Kconfig | 6 ++
arch/powerpc/Kconfig | 1 +
include/linux/module.h | 8 ++
kernel/debug/kdb/kdb_main.c | 10 +-
kernel/module.c | 193 +++++++++++++++++++++++++-----------
5 files changed, 156 insertions(+), 62 deletions(-)
--
2.33.1
In order to separate text and data, we need to setup
two rb trees.
This also means that struct mod_tree_root is required even without
MODULES_TREE_LOOKUP.
Also remove module_addr_min and module_addr_max as there will
be one min and one max for each tree.
Signed-off-by: Christophe Leroy <[email protected]>
---
kernel/module.c | 39 ++++++++++++++++++---------------------
1 file changed, 18 insertions(+), 21 deletions(-)
diff --git a/kernel/module.c b/kernel/module.c
index 24dab046e16c..c0f9d63d3f05 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -85,7 +85,7 @@
* Mutex protects:
* 1) List of modules (also safely readable with preempt_disable),
* 2) module_use links,
- * 3) module_addr_min/module_addr_max.
+ * 3) mod_tree.addr_min/mod_tree.addr_max.
* (delete and add uses RCU list operations).
*/
static DEFINE_MUTEX(module_mutex);
@@ -96,6 +96,16 @@ static void do_free_init(struct work_struct *w);
static DECLARE_WORK(init_free_wq, do_free_init);
static LLIST_HEAD(init_free_list);
+static struct mod_tree_root {
+#ifdef CONFIG_MODULES_TREE_LOOKUP
+ struct latch_tree_root root;
+#endif
+ unsigned long addr_min;
+ unsigned long addr_max;
+} mod_tree __cacheline_aligned = {
+ .addr_min = -1UL,
+};
+
#ifdef CONFIG_MODULES_TREE_LOOKUP
/*
@@ -149,17 +159,6 @@ static const struct latch_tree_ops mod_tree_ops = {
.comp = mod_tree_comp,
};
-static struct mod_tree_root {
- struct latch_tree_root root;
- unsigned long addr_min;
- unsigned long addr_max;
-} mod_tree __cacheline_aligned = {
- .addr_min = -1UL,
-};
-
-#define module_addr_min mod_tree.addr_min
-#define module_addr_max mod_tree.addr_max
-
static noinline void __mod_tree_insert(struct mod_tree_node *node)
{
latch_tree_insert(&node->node, &mod_tree.root, &mod_tree_ops);
@@ -209,8 +208,6 @@ static struct module *mod_find(unsigned long addr)
#else /* MODULES_TREE_LOOKUP */
-static unsigned long module_addr_min = -1UL, module_addr_max = 0;
-
static void mod_tree_insert(struct module *mod) { }
static void mod_tree_remove_init(struct module *mod) { }
static void mod_tree_remove(struct module *mod) { }
@@ -239,10 +236,10 @@ static void __mod_update_bounds(void *base, unsigned int size)
unsigned long min = (unsigned long)base;
unsigned long max = min + size;
- if (min < module_addr_min)
- module_addr_min = min;
- if (max > module_addr_max)
- module_addr_max = max;
+ if (min < mod_tree.addr_min)
+ mod_tree.addr_min = min;
+ if (max > mod_tree.addr_max)
+ mod_tree.addr_max = max;
}
static void mod_update_bounds(struct module *mod)
@@ -4546,14 +4543,14 @@ static void cfi_init(struct module *mod)
mod->exit = *exit;
#endif
- cfi_module_add(mod, module_addr_min);
+ cfi_module_add(mod, mod_tree.addr_min);
#endif
}
static void cfi_cleanup(struct module *mod)
{
#ifdef CONFIG_CFI_CLANG
- cfi_module_remove(mod, module_addr_min);
+ cfi_module_remove(mod, mod_tree.addr_min);
#endif
}
@@ -4737,7 +4734,7 @@ struct module *__module_address(unsigned long addr)
{
struct module *mod;
- if (addr < module_addr_min || addr > module_addr_max)
+ if (addr < mod_tree.addr_min || addr > mod_tree.addr_max)
return NULL;
module_assert_mutex_or_preempt();
--
2.33.1
Add CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC to allow architectures
to request having modules data in vmalloc area instead of module area.
This is required on powerpc book3s/32 in order to set data non
executable, because it is not possible to set executability on page
basis, this is done per 256 Mbytes segments. The module area has exec
right, vmalloc area has noexec.
This can also be useful on other powerpc/32 in order to maximize the
chance of code being close enough to kernel core to avoid branch
trampolines.
Signed-off-by: Christophe Leroy <[email protected]>
Cc: Jason Wessel <[email protected]>
Cc: Daniel Thompson <[email protected]>
Cc: Douglas Anderson <[email protected]>
---
arch/Kconfig | 6 +++
include/linux/module.h | 8 ++++
kernel/debug/kdb/kdb_main.c | 10 ++++-
kernel/module.c | 76 +++++++++++++++++++++++++++++++++++--
4 files changed, 94 insertions(+), 6 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 678a80713b21..b5d1f2c19c27 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -882,6 +882,12 @@ config MODULES_USE_ELF_REL
Modules only use ELF REL relocations. Modules with ELF RELA
relocations will give an error.
+config ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ bool
+ help
+ For architectures like powerpc/32 which have constraints on module
+ allocation and need to allocate module data outside of module area.
+
config HAVE_IRQ_EXIT_ON_IRQ_STACK
bool
help
diff --git a/include/linux/module.h b/include/linux/module.h
index 1e135fd5c076..3a892bdcbb5f 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -422,6 +422,9 @@ struct module {
/* Core layout: rbtree is accessed frequently, so keep together. */
struct module_layout core_layout __module_layout_align;
struct module_layout init_layout;
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ struct module_layout data_layout;
+#endif
/* Arch-specific module values */
struct mod_arch_specific arch;
@@ -569,6 +572,11 @@ bool is_module_text_address(unsigned long addr);
static inline bool within_module_core(unsigned long addr,
const struct module *mod)
{
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ if ((unsigned long)mod->data_layout.base <= addr &&
+ addr < (unsigned long)mod->data_layout.base + mod->data_layout.size)
+ return true;
+#endif
return (unsigned long)mod->core_layout.base <= addr &&
addr < (unsigned long)mod->core_layout.base + mod->core_layout.size;
}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 0852a537dad4..85d3fd40b7fe 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2022,8 +2022,11 @@ static int kdb_lsmod(int argc, const char **argv)
if (mod->state == MODULE_STATE_UNFORMED)
continue;
- kdb_printf("%-20s%8u 0x%px ", mod->name,
- mod->core_layout.size, (void *)mod);
+ kdb_printf("%-20s%8u", mod->name, mod->core_layout.size);
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ kdb_printf("/%8u", mod->data_layout.size);
+#endif
+ kdb_printf(" 0x%px ", (void *)mod);
#ifdef CONFIG_MODULE_UNLOAD
kdb_printf("%4d ", module_refcount(mod));
#endif
@@ -2034,6 +2037,9 @@ static int kdb_lsmod(int argc, const char **argv)
else
kdb_printf(" (Live)");
kdb_printf(" 0x%px", mod->core_layout.base);
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ kdb_printf("/0x%px", mod->data_layout.base);
+#endif
#ifdef CONFIG_MODULE_UNLOAD
{
diff --git a/kernel/module.c b/kernel/module.c
index 2b70b997a36d..884c9fb11813 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -81,7 +81,9 @@
/* If this is set, the section belongs in the init part of the module */
#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
+#ifndef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
#define data_layout core_layout
+#endif
/*
* Mutex protects:
@@ -108,6 +110,12 @@ static struct mod_tree_root {
.addr_min = -1UL,
};
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+static struct mod_tree_root mod_data_tree __cacheline_aligned = {
+ .addr_min = -1UL,
+};
+#endif
+
#ifdef CONFIG_MODULES_TREE_LOOKUP
/*
@@ -183,6 +191,11 @@ static void mod_tree_insert(struct module *mod)
__mod_tree_insert(&mod->core_layout.mtn, &mod_tree);
if (mod->init_layout.size)
__mod_tree_insert(&mod->init_layout.mtn, &mod_tree);
+
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ mod->data_layout.mtn.mod = mod;
+ __mod_tree_insert(&mod->data_layout.mtn, &mod_data_tree);
+#endif
}
static void mod_tree_remove_init(struct module *mod)
@@ -195,6 +208,9 @@ static void mod_tree_remove(struct module *mod)
{
__mod_tree_remove(&mod->core_layout.mtn, &mod_tree);
mod_tree_remove_init(mod);
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ __mod_tree_remove(&mod->core_layout.mtn, &mod_data_tree);
+#endif
}
static struct module *mod_find(unsigned long addr, struct mod_tree_root *tree)
@@ -249,6 +265,9 @@ static void mod_update_bounds(struct module *mod)
__mod_update_bounds(mod->core_layout.base, mod->core_layout.size, &mod_tree);
if (mod->init_layout.size)
__mod_update_bounds(mod->init_layout.base, mod->init_layout.size, &mod_tree);
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ __mod_update_bounds(mod->data_layout.base, mod->data_layout.size, &mod_data_tree);
+#endif
}
#ifdef CONFIG_KGDB_KDB
@@ -1178,6 +1197,17 @@ static ssize_t show_coresize(struct module_attribute *mattr,
static struct module_attribute modinfo_coresize =
__ATTR(coresize, 0444, show_coresize, NULL);
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+static ssize_t show_datasize(struct module_attribute *mattr,
+ struct module_kobject *mk, char *buffer)
+{
+ return sprintf(buffer, "%u\n", mk->mod->data_layout.size);
+}
+
+static struct module_attribute modinfo_datasize =
+ __ATTR(datasize, 0444, show_datasize, NULL);
+#endif
+
static ssize_t show_initsize(struct module_attribute *mattr,
struct module_kobject *mk, char *buffer)
{
@@ -1206,6 +1236,9 @@ static struct module_attribute *modinfo_attrs[] = {
&modinfo_srcversion,
&modinfo_initstate,
&modinfo_coresize,
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ &modinfo_datasize,
+#endif
&modinfo_initsize,
&modinfo_taint,
#ifdef CONFIG_MODULE_UNLOAD
@@ -2208,6 +2241,9 @@ static void free_module(struct module *mod)
/* Finally, free the core (containing the module structure) */
module_memfree(mod->core_layout.base);
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ vfree(mod->data_layout.base);
+#endif
}
void *__symbol_get(const char *symbol)
@@ -3459,6 +3495,24 @@ static int move_module(struct module *mod, struct load_info *info)
} else
mod->init_layout.base = NULL;
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ /* Do the allocs. */
+ ptr = vmalloc(mod->data_layout.size);
+ /*
+ * The pointer to this block is stored in the module structure
+ * which is inside the block. Just mark it as not being a
+ * leak.
+ */
+ kmemleak_not_leak(ptr);
+ if (!ptr) {
+ module_memfree(mod->core_layout.base);
+ module_memfree(mod->init_layout.base);
+ return -ENOMEM;
+ }
+
+ memset(ptr, 0, mod->data_layout.size);
+ mod->data_layout.base = ptr;
+#endif
/* Transfer each section which specifies SHF_ALLOC */
pr_debug("final section addresses:\n");
for (i = 0; i < info->hdr->e_shnum; i++) {
@@ -3634,6 +3688,9 @@ static void module_deallocate(struct module *mod, struct load_info *info)
module_arch_freeing_init(mod);
module_memfree(mod->init_layout.base);
module_memfree(mod->core_layout.base);
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ vfree(mod->data_layout.base);
+#endif
}
int __weak module_finalize(const Elf_Ehdr *hdr,
@@ -4612,13 +4669,17 @@ static int m_show(struct seq_file *m, void *p)
struct module *mod = list_entry(p, struct module, list);
char buf[MODULE_FLAGS_BUF_SIZE];
void *value;
+ unsigned int size;
/* We always ignore unformed modules. */
if (mod->state == MODULE_STATE_UNFORMED)
return 0;
- seq_printf(m, "%s %u",
- mod->name, mod->init_layout.size + mod->core_layout.size);
+ size = mod->init_layout.size + mod->core_layout.size;
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ size += mod->data_layout.size;
+#endif
+ seq_printf(m, "%s %u", mod->name, size);
print_unload_info(m, mod);
/* Informative for users. */
@@ -4741,13 +4802,20 @@ bool is_module_address(unsigned long addr)
struct module *__module_address(unsigned long addr)
{
struct module *mod;
+ struct mod_tree_root *tree;
- if (addr < mod_tree.addr_min || addr > mod_tree.addr_max)
+ if (addr >= mod_tree.addr_min && addr <= mod_tree.addr_max)
+ tree = &mod_tree;
+#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
+ else if (addr >= mod_data_tree.addr_min && addr <= mod_data_tree.addr_max)
+ tree = &mod_data_tree;
+#endif
+ else
return NULL;
module_assert_mutex_or_preempt();
- mod = mod_find(addr, &mod_tree);
+ mod = mod_find(addr, tree);
if (mod) {
BUG_ON(!within_module(addr, mod));
if (mod->state == MODULE_STATE_UNFORMED)
--
2.33.1
In order to allow separation of data from text, add another layout,
called data_layout. For architectures requesting separation of text
and data, only text will go in core_layout and data will go in
data_layout.
For architectures which keep text and data together, make data_layout
an alias of core_layout, that way data_layout can be used for all
data manipulations, regardless of whether data is in core_layout or
data_layout.
Signed-off-by: Christophe Leroy <[email protected]>
---
kernel/module.c | 52 ++++++++++++++++++++++++++++---------------------
1 file changed, 30 insertions(+), 22 deletions(-)
diff --git a/kernel/module.c b/kernel/module.c
index 2b9a3d9d3c0d..2b70b997a36d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -81,6 +81,8 @@
/* If this is set, the section belongs in the init part of the module */
#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
+#define data_layout core_layout
+
/*
* Mutex protects:
* 1) List of modules (also safely readable with preempt_disable),
@@ -2011,19 +2013,20 @@ static void module_enable_ro(const struct module *mod, bool after_init)
set_vm_flush_reset_perms(mod->init_layout.base);
frob_text(&mod->core_layout, set_memory_ro);
- frob_rodata(&mod->core_layout, set_memory_ro);
+ frob_rodata(&mod->data_layout, set_memory_ro);
+
frob_text(&mod->init_layout, set_memory_ro);
frob_rodata(&mod->init_layout, set_memory_ro);
if (after_init)
- frob_ro_after_init(&mod->core_layout, set_memory_ro);
+ frob_ro_after_init(&mod->data_layout, set_memory_ro);
}
static void module_enable_nx(const struct module *mod)
{
- frob_rodata(&mod->core_layout, set_memory_nx);
- frob_ro_after_init(&mod->core_layout, set_memory_nx);
- frob_writable_data(&mod->core_layout, set_memory_nx);
+ frob_rodata(&mod->data_layout, set_memory_nx);
+ frob_ro_after_init(&mod->data_layout, set_memory_nx);
+ frob_writable_data(&mod->data_layout, set_memory_nx);
frob_rodata(&mod->init_layout, set_memory_nx);
frob_writable_data(&mod->init_layout, set_memory_nx);
}
@@ -2201,7 +2204,7 @@ static void free_module(struct module *mod)
percpu_modfree(mod);
/* Free lock-classes; relies on the preceding sync_rcu(). */
- lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size);
+ lockdep_free_key_range(mod->data_layout.base, mod->data_layout.size);
/* Finally, free the core (containing the module structure) */
module_memfree(mod->core_layout.base);
@@ -2448,7 +2451,10 @@ static void layout_sections(struct module *mod, struct load_info *info)
|| s->sh_entsize != ~0UL
|| module_init_layout_section(sname))
continue;
- s->sh_entsize = get_offset(mod, &mod->core_layout.size, s, i);
+ if (m)
+ s->sh_entsize = get_offset(mod, &mod->data_layout.size, s, i);
+ else
+ s->sh_entsize = get_offset(mod, &mod->core_layout.size, s, i);
pr_debug("\t%s\n", sname);
}
switch (m) {
@@ -2457,15 +2463,15 @@ static void layout_sections(struct module *mod, struct load_info *info)
mod->core_layout.text_size = mod->core_layout.size;
break;
case 1: /* RO: text and ro-data */
- mod->core_layout.size = debug_align(mod->core_layout.size);
- mod->core_layout.ro_size = mod->core_layout.size;
+ mod->data_layout.size = debug_align(mod->data_layout.size);
+ mod->data_layout.ro_size = mod->data_layout.size;
break;
case 2: /* RO after init */
- mod->core_layout.size = debug_align(mod->core_layout.size);
- mod->core_layout.ro_after_init_size = mod->core_layout.size;
+ mod->data_layout.size = debug_align(mod->data_layout.size);
+ mod->data_layout.ro_after_init_size = mod->data_layout.size;
break;
case 4: /* whole core */
- mod->core_layout.size = debug_align(mod->core_layout.size);
+ mod->data_layout.size = debug_align(mod->data_layout.size);
break;
}
}
@@ -2718,12 +2724,12 @@ static void layout_symtab(struct module *mod, struct load_info *info)
}
/* Append room for core symbols at end of core part. */
- info->symoffs = ALIGN(mod->core_layout.size, symsect->sh_addralign ?: 1);
- info->stroffs = mod->core_layout.size = info->symoffs + ndst * sizeof(Elf_Sym);
- mod->core_layout.size += strtab_size;
- info->core_typeoffs = mod->core_layout.size;
- mod->core_layout.size += ndst * sizeof(char);
- mod->core_layout.size = debug_align(mod->core_layout.size);
+ info->symoffs = ALIGN(mod->data_layout.size, symsect->sh_addralign ?: 1);
+ info->stroffs = mod->data_layout.size = info->symoffs + ndst * sizeof(Elf_Sym);
+ mod->data_layout.size += strtab_size;
+ info->core_typeoffs = mod->data_layout.size;
+ mod->data_layout.size += ndst * sizeof(char);
+ mod->data_layout.size = debug_align(mod->data_layout.size);
/* Put string table section at end of init part of module. */
strsect->sh_flags |= SHF_ALLOC;
@@ -2767,9 +2773,9 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
* Now populate the cut down core kallsyms for after init
* and set types up while we still have access to sections.
*/
- mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs;
- mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs;
- mod->core_kallsyms.typetab = mod->core_layout.base + info->core_typeoffs;
+ mod->core_kallsyms.symtab = dst = mod->data_layout.base + info->symoffs;
+ mod->core_kallsyms.strtab = s = mod->data_layout.base + info->stroffs;
+ mod->core_kallsyms.typetab = mod->data_layout.base + info->core_typeoffs;
src = mod->kallsyms->symtab;
for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) {
mod->kallsyms->typetab[i] = elf_type(src + i, info);
@@ -3465,6 +3471,8 @@ static int move_module(struct module *mod, struct load_info *info)
if (shdr->sh_entsize & INIT_OFFSET_MASK)
dest = mod->init_layout.base
+ (shdr->sh_entsize & ~INIT_OFFSET_MASK);
+ else if (!(shdr->sh_flags & SHF_EXECINSTR))
+ dest = mod->data_layout.base + shdr->sh_entsize;
else
dest = mod->core_layout.base + shdr->sh_entsize;
@@ -4170,7 +4178,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
mutex_unlock(&module_mutex);
free_module:
/* Free lock-classes; relies on the preceding sync_rcu() */
- lockdep_free_key_range(mod->core_layout.base, mod->core_layout.size);
+ lockdep_free_key_range(mod->data_layout.base, mod->data_layout.size);
module_deallocate(mod, info);
free_copy:
--
2.33.1
book3s/32 and 8xx have a separate area for allocating modules,
defined by MODULES_VADDR / MODULES_END.
On book3s/32, it is not possible to protect against execution
on a page basis. A full 256M segment is either Exec or NoExec.
The module area is in an Exec segment while vmalloc area is
in a NoExec segment.
In order to protect module data against execution, select
ARCH_WANTS_MODULES_DATA_IN_VMALLOC.
For the 8xx (and possibly other 32 bits platform in the future),
there is no such constraint on Exec/NoExec protection, however
there is a critical distance between kernel functions and callers
that needs to remain below 32Mbytes in order to avoid costly
trampolines. By allocating data outside of module area, we
increase the chance for module text to remain within acceptable
distance from kernel core text.
So select ARCH_WANTS_MODULES_DATA_IN_VMALLOC for 8xx as well.
Signed-off-by: Christophe Leroy <[email protected]>
Cc: Michael Ellerman <[email protected]>
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Paul Mackerras <[email protected]>
---
arch/powerpc/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b779603978e1..242eed8cedf8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -152,6 +152,7 @@ config PPC
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
select ARCH_WANT_LD_ORPHAN_WARN
+ select ARCH_WANTS_MODULES_DATA_IN_VMALLOC if PPC_BOOK3S_32 || PPC_8xx
select ARCH_WEAK_RELEASE_ACQUIRE
select BINFMT_ELF
select BUILDTIME_TABLE_SORT
--
2.33.1
In order to separate text and data, we need to setup
two rb trees. So modify functions to give the tree
as a parameter.
Signed-off-by: Christophe Leroy <[email protected]>
---
kernel/module.c | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)
diff --git a/kernel/module.c b/kernel/module.c
index c0f9d63d3f05..2b9a3d9d3c0d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -159,14 +159,14 @@ static const struct latch_tree_ops mod_tree_ops = {
.comp = mod_tree_comp,
};
-static noinline void __mod_tree_insert(struct mod_tree_node *node)
+static noinline void __mod_tree_insert(struct mod_tree_node *node, struct mod_tree_root *tree)
{
- latch_tree_insert(&node->node, &mod_tree.root, &mod_tree_ops);
+ latch_tree_insert(&node->node, &tree->root, &mod_tree_ops);
}
-static void __mod_tree_remove(struct mod_tree_node *node)
+static void __mod_tree_remove(struct mod_tree_node *node, struct mod_tree_root *tree)
{
- latch_tree_erase(&node->node, &mod_tree.root, &mod_tree_ops);
+ latch_tree_erase(&node->node, &tree->root, &mod_tree_ops);
}
/*
@@ -178,28 +178,28 @@ static void mod_tree_insert(struct module *mod)
mod->core_layout.mtn.mod = mod;
mod->init_layout.mtn.mod = mod;
- __mod_tree_insert(&mod->core_layout.mtn);
+ __mod_tree_insert(&mod->core_layout.mtn, &mod_tree);
if (mod->init_layout.size)
- __mod_tree_insert(&mod->init_layout.mtn);
+ __mod_tree_insert(&mod->init_layout.mtn, &mod_tree);
}
static void mod_tree_remove_init(struct module *mod)
{
if (mod->init_layout.size)
- __mod_tree_remove(&mod->init_layout.mtn);
+ __mod_tree_remove(&mod->init_layout.mtn, &mod_tree);
}
static void mod_tree_remove(struct module *mod)
{
- __mod_tree_remove(&mod->core_layout.mtn);
+ __mod_tree_remove(&mod->core_layout.mtn, &mod_tree);
mod_tree_remove_init(mod);
}
-static struct module *mod_find(unsigned long addr)
+static struct module *mod_find(unsigned long addr, struct mod_tree_root *tree)
{
struct latch_tree_node *ltn;
- ltn = latch_tree_find((void *)addr, &mod_tree.root, &mod_tree_ops);
+ ltn = latch_tree_find((void *)addr, &tree->root, &mod_tree_ops);
if (!ltn)
return NULL;
@@ -212,7 +212,7 @@ static void mod_tree_insert(struct module *mod) { }
static void mod_tree_remove_init(struct module *mod) { }
static void mod_tree_remove(struct module *mod) { }
-static struct module *mod_find(unsigned long addr)
+static struct module *mod_find(unsigned long addr, struct mod_tree_root *tree)
{
struct module *mod;
@@ -231,22 +231,22 @@ static struct module *mod_find(unsigned long addr)
* Bounds of module text, for speeding up __module_address.
* Protected by module_mutex.
*/
-static void __mod_update_bounds(void *base, unsigned int size)
+static void __mod_update_bounds(void *base, unsigned int size, struct mod_tree_root *tree)
{
unsigned long min = (unsigned long)base;
unsigned long max = min + size;
- if (min < mod_tree.addr_min)
- mod_tree.addr_min = min;
- if (max > mod_tree.addr_max)
- mod_tree.addr_max = max;
+ if (min < tree->addr_min)
+ tree->addr_min = min;
+ if (max > tree->addr_max)
+ tree->addr_max = max;
}
static void mod_update_bounds(struct module *mod)
{
- __mod_update_bounds(mod->core_layout.base, mod->core_layout.size);
+ __mod_update_bounds(mod->core_layout.base, mod->core_layout.size, &mod_tree);
if (mod->init_layout.size)
- __mod_update_bounds(mod->init_layout.base, mod->init_layout.size);
+ __mod_update_bounds(mod->init_layout.base, mod->init_layout.size, &mod_tree);
}
#ifdef CONFIG_KGDB_KDB
@@ -4739,7 +4739,7 @@ struct module *__module_address(unsigned long addr)
module_assert_mutex_or_preempt();
- mod = mod_find(addr);
+ mod = mod_find(addr, &mod_tree);
if (mod) {
BUG_ON(!within_module(addr, mod));
if (mod->state == MODULE_STATE_UNFORMED)
--
2.33.1
On Thu, Jan 27, 2022 at 11:28:09AM +0000, Christophe Leroy wrote:
> Add CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC to allow architectures
> to request having modules data in vmalloc area instead of module area.
>
> This is required on powerpc book3s/32 in order to set data non
> executable, because it is not possible to set executability on page
> basis, this is done per 256 Mbytes segments. The module area has exec
> right, vmalloc area has noexec.
>
> This can also be useful on other powerpc/32 in order to maximize the
> chance of code being close enough to kernel core to avoid branch
> trampolines.
>
> Signed-off-by: Christophe Leroy <[email protected]>
> Cc: Jason Wessel <[email protected]>
> Cc: Daniel Thompson <[email protected]>
> Cc: Douglas Anderson <[email protected]>
Thanks for diligence in making sure kdb is up to date!
Acked-by: Daniel Thompson <[email protected]>
Daniel.
On Thu, Jan 27, 2022 at 11:28:12AM +0000, Christophe Leroy wrote:
> book3s/32 and 8xx have a separate area for allocating modules,
> defined by MODULES_VADDR / MODULES_END.
>
> On book3s/32, it is not possible to protect against execution
> on a page basis. A full 256M segment is either Exec or NoExec.
> The module area is in an Exec segment while vmalloc area is
> in a NoExec segment.
>
> In order to protect module data against execution, select
> ARCH_WANTS_MODULES_DATA_IN_VMALLOC.
>
> For the 8xx (and possibly other 32 bits platform in the future),
> there is no such constraint on Exec/NoExec protection, however
> there is a critical distance between kernel functions and callers
> that needs to remain below 32Mbytes in order to avoid costly
> trampolines. By allocating data outside of module area, we
> increase the chance for module text to remain within acceptable
> distance from kernel core text.
>
> So select ARCH_WANTS_MODULES_DATA_IN_VMALLOC for 8xx as well.
>
> Signed-off-by: Christophe Leroy <[email protected]>
> Cc: Michael Ellerman <[email protected]>
> Cc: Benjamin Herrenschmidt <[email protected]>
> Cc: Paul Mackerras <[email protected]>
Cc list first and then the SOB.
Luis
Le 03/02/2022 à 06:39, Michael Ellerman a écrit :
> Luis Chamberlain <[email protected]> writes:
>> On Thu, Jan 27, 2022 at 11:28:12AM +0000, Christophe Leroy wrote:
>>> book3s/32 and 8xx have a separate area for allocating modules,
>>> defined by MODULES_VADDR / MODULES_END.
>>>
>>> On book3s/32, it is not possible to protect against execution
>>> on a page basis. A full 256M segment is either Exec or NoExec.
>>> The module area is in an Exec segment while vmalloc area is
>>> in a NoExec segment.
>>>
>>> In order to protect module data against execution, select
>>> ARCH_WANTS_MODULES_DATA_IN_VMALLOC.
>>>
>>> For the 8xx (and possibly other 32 bits platform in the future),
>>> there is no such constraint on Exec/NoExec protection, however
>>> there is a critical distance between kernel functions and callers
>>> that needs to remain below 32Mbytes in order to avoid costly
>>> trampolines. By allocating data outside of module area, we
>>> increase the chance for module text to remain within acceptable
>>> distance from kernel core text.
>>>
>>> So select ARCH_WANTS_MODULES_DATA_IN_VMALLOC for 8xx as well.
>>>
>>> Signed-off-by: Christophe Leroy <[email protected]>
>>> Cc: Michael Ellerman <[email protected]>
>>> Cc: Benjamin Herrenschmidt <[email protected]>
>>> Cc: Paul Mackerras <[email protected]>
>>
>> Cc list first and then the SOB.
>
> Just delete the Cc: list, it's meaningless.
>
Was an easy way to copy you automatically with 'git send-email', but
getting it through linuxppc-dev list is enough I guess ?
Christophe
Luis Chamberlain <[email protected]> writes:
> On Thu, Jan 27, 2022 at 11:28:12AM +0000, Christophe Leroy wrote:
>> book3s/32 and 8xx have a separate area for allocating modules,
>> defined by MODULES_VADDR / MODULES_END.
>>
>> On book3s/32, it is not possible to protect against execution
>> on a page basis. A full 256M segment is either Exec or NoExec.
>> The module area is in an Exec segment while vmalloc area is
>> in a NoExec segment.
>>
>> In order to protect module data against execution, select
>> ARCH_WANTS_MODULES_DATA_IN_VMALLOC.
>>
>> For the 8xx (and possibly other 32 bits platform in the future),
>> there is no such constraint on Exec/NoExec protection, however
>> there is a critical distance between kernel functions and callers
>> that needs to remain below 32Mbytes in order to avoid costly
>> trampolines. By allocating data outside of module area, we
>> increase the chance for module text to remain within acceptable
>> distance from kernel core text.
>>
>> So select ARCH_WANTS_MODULES_DATA_IN_VMALLOC for 8xx as well.
>>
>> Signed-off-by: Christophe Leroy <[email protected]>
>> Cc: Michael Ellerman <[email protected]>
>> Cc: Benjamin Herrenschmidt <[email protected]>
>> Cc: Paul Mackerras <[email protected]>
>
> Cc list first and then the SOB.
Just delete the Cc: list, it's meaningless.
cheers
Christophe Leroy <[email protected]> writes:
> Le 03/02/2022 à 06:39, Michael Ellerman a écrit :
>> Luis Chamberlain <[email protected]> writes:
>>> On Thu, Jan 27, 2022 at 11:28:12AM +0000, Christophe Leroy wrote:
>>>> book3s/32 and 8xx have a separate area for allocating modules,
>>>> defined by MODULES_VADDR / MODULES_END.
>>>>
>>>> On book3s/32, it is not possible to protect against execution
>>>> on a page basis. A full 256M segment is either Exec or NoExec.
>>>> The module area is in an Exec segment while vmalloc area is
>>>> in a NoExec segment.
>>>>
>>>> In order to protect module data against execution, select
>>>> ARCH_WANTS_MODULES_DATA_IN_VMALLOC.
>>>>
>>>> For the 8xx (and possibly other 32 bits platform in the future),
>>>> there is no such constraint on Exec/NoExec protection, however
>>>> there is a critical distance between kernel functions and callers
>>>> that needs to remain below 32Mbytes in order to avoid costly
>>>> trampolines. By allocating data outside of module area, we
>>>> increase the chance for module text to remain within acceptable
>>>> distance from kernel core text.
>>>>
>>>> So select ARCH_WANTS_MODULES_DATA_IN_VMALLOC for 8xx as well.
>>>>
>>>> Signed-off-by: Christophe Leroy <[email protected]>
>>>> Cc: Michael Ellerman <[email protected]>
>>>> Cc: Benjamin Herrenschmidt <[email protected]>
>>>> Cc: Paul Mackerras <[email protected]>
>>>
>>> Cc list first and then the SOB.
>>
>> Just delete the Cc: list, it's meaningless.
>>
>
> Was an easy way to copy you automatically with 'git send-email', but
> getting it through linuxppc-dev list is enough I guess ?
It's useful for making the tooling Cc the right people, it's fine to use
them for that.
But there's no value in committing them to the git history, I actively
strip them when applying. The fact that someone is Cc'ed on a patch
tells you nothing, given the volume of mail maintainers receive.
The link tag back to the original submission gives you the Cc list
anyway.
cheers
On Thu, 27 Jan 2022, Christophe Leroy wrote:
> This series allow architectures to request having modules data in
> vmalloc area instead of module area.
>
> This is required on powerpc book3s/32 in order to set data non
> executable, because it is not possible to set executability on page
> basis, this is done per 256 Mbytes segments. The module area has exec
> right, vmalloc area has noexec. Without this change module data
> remains executable regardless of CONFIG_STRICT_MODULES_RWX.
>
> This can also be useful on other powerpc/32 in order to maximize the
> chance of code being close enough to kernel core to avoid branch
> trampolines.
>
> Changes in v2:
> - Dropped first two patches which are not necessary. They may be added back later as a follow-up series.
> - Fixed the printks in GDB
>
> Christophe Leroy (5):
> modules: Always have struct mod_tree_root
> modules: Prepare for handling several RB trees
> modules: Introduce data_layout
> modules: Add CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
> powerpc: Select ARCH_WANTS_MODULES_DATA_IN_VMALLOC on book3s/32 and
> 8xx
>
> arch/Kconfig | 6 ++
> arch/powerpc/Kconfig | 1 +
> include/linux/module.h | 8 ++
> kernel/debug/kdb/kdb_main.c | 10 +-
> kernel/module.c | 193 +++++++++++++++++++++++++-----------
> 5 files changed, 156 insertions(+), 62 deletions(-)
Looks good to me apart from the typo I mentioned at v1. I will review
again once it is rebased on Aaron's patch set.
Regards,
Miroslav