Remove MODULES dependency by migrating from module_alloc() to the new
text_alloc() API. Essentially these changes provide preliminaries for
allowing to compile a static kernel with a proper tracing support.
The same API can be used later on in other sites that allocate space for
trampolines, and trivially scaled to other arch's. An arch can inform
with CONFIG_ARCH_HAS_TEXT_ALLOC that it's providing implementation for
text_alloc().
Cc: [email protected]
Cc: Andi Kleen <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
v4:
* Squash lock_modules() patches into one.
* Remove fallback versions of text_alloc() and text_free(). Instead, use
ARCH_HAS_TEXT_ALLOC at site when required.
* Use lockdep_assert_irqs_enabled() in text_free() instead of
WARN_ON(in_interrupt()).
v3:
* Make text_alloc() API disjoint.
* Remove all the possible extra clutter not absolutely required and
split into more logical pieces.
Jarkko Sakkinen (6):
kprobes: Remove dependency to the module_mutex
vmalloc: Add text_alloc() and text_free()
arch/x86: Implement text_alloc() and text_free()
arch/x86: kprobes: Use text_alloc() and text_free()
kprobes: Use text_alloc() and text_free()
kprobes: Remove CONFIG_MODULES dependency
arch/Kconfig | 2 +-
arch/x86/Kconfig | 3 ++
arch/x86/kernel/Makefile | 1 +
arch/x86/kernel/kprobes/core.c | 4 +--
arch/x86/kernel/text_alloc.c | 41 +++++++++++++++++++++++
include/linux/module.h | 32 ++++++++++++++----
include/linux/vmalloc.h | 17 ++++++++++
kernel/kprobes.c | 61 +++++++++++++++++++++++-----------
kernel/trace/trace_kprobe.c | 20 ++++++++---
9 files changed, 147 insertions(+), 34 deletions(-)
create mode 100644 arch/x86/kernel/text_alloc.c
--
2.25.1
Add lock_modules() and unlock_modules() wrappers for acquiring module_mutex
in order to remove the compile time dependency to it.
Cc: [email protected]
Cc: Andi Kleen <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Suggested-by: Masami Hiramatsu <[email protected]>
Signed-off-by: Jarkko Sakkinen <[email protected]>
---
include/linux/module.h | 18 ++++++++++++++++++
kernel/kprobes.c | 4 ++--
kernel/trace/trace_kprobe.c | 4 ++--
3 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/include/linux/module.h b/include/linux/module.h
index 2e6670860d27..8850b9692b8f 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -705,6 +705,16 @@ static inline bool is_livepatch_module(struct module *mod)
bool is_module_sig_enforced(void);
void set_module_sig_enforced(void);
+static inline void lock_modules(void)
+{
+ mutex_lock(&module_mutex);
+}
+
+static inline void unlock_modules(void)
+{
+ mutex_unlock(&module_mutex);
+}
+
#else /* !CONFIG_MODULES... */
static inline struct module *__module_address(unsigned long addr)
@@ -852,6 +862,14 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr)
return ptr;
}
+static inline void lock_modules(void)
+{
+}
+
+static inline void unlock_modules(void)
+{
+}
+
#endif /* CONFIG_MODULES */
#ifdef CONFIG_SYSFS
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 2e97febeef77..4e46d96d4e16 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -564,7 +564,7 @@ static void kprobe_optimizer(struct work_struct *work)
cpus_read_lock();
mutex_lock(&text_mutex);
/* Lock modules while optimizing kprobes */
- mutex_lock(&module_mutex);
+ lock_modules();
/*
* Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
@@ -589,7 +589,7 @@ static void kprobe_optimizer(struct work_struct *work)
/* Step 4: Free cleaned kprobes after quiesence period */
do_free_cleaned_kprobes();
- mutex_unlock(&module_mutex);
+ unlock_modules();
mutex_unlock(&text_mutex);
cpus_read_unlock();
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index aefb6065b508..710ec6a6aa8f 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -122,9 +122,9 @@ static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
if (!p)
return true;
*p = '\0';
- mutex_lock(&module_mutex);
+ lock_modules();
ret = !!find_module(tk->symbol);
- mutex_unlock(&module_mutex);
+ unlock_modules();
*p = ':';
return ret;
--
2.25.1
Introduce functions for allocating memory for dynamic trampolines, such
as kprobes. An arch can promote the availability of these functions with
CONFIG_ARCH_HAS_TEXT_ALLOC.
Cc: [email protected]
Cc: Andi Kleen <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
Suggested-by: Peter Zijlstra <[email protected]>
Signed-off-by: Jarkko Sakkinen <[email protected]>
---
include/linux/vmalloc.h | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 0221f852a7e1..6c151a0ac02a 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -249,4 +249,21 @@ pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
int register_vmap_purge_notifier(struct notifier_block *nb);
int unregister_vmap_purge_notifier(struct notifier_block *nb);
+/*
+ * These functions can be optionally implemented by an arch in order to
+ * provide specialized functions for allocating trampoline code.
+ */
+
+#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
+/*
+ * Allocate memory to be used for trampoline code.
+ */
+void *text_alloc(unsigned long size);
+
+/*
+ * Free memory returned from text_alloc().
+ */
+void text_free(void *region);
+#endif /* CONFIG_ARCH_HAS_TEXT_ALLOC */
+
#endif /* _LINUX_VMALLOC_H */
--
2.25.1
Implement text_alloc() and text_free() with with the vmalloc API. These can
be used to allocate pages for trampoline code without relying on the module
loader code.
Cc: [email protected]
Cc: Masami Hiramatsu <[email protected]>
Cc: Andi Kleen <[email protected]>
Suggested-by: Peter Zijlstra <[email protected]>
Signed-off-by: Jarkko Sakkinen <[email protected]>
---
arch/x86/Kconfig | 3 +++
arch/x86/kernel/Makefile | 1 +
arch/x86/kernel/text_alloc.c | 41 ++++++++++++++++++++++++++++++++++++
3 files changed, 45 insertions(+)
create mode 100644 arch/x86/kernel/text_alloc.c
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0dea7fdd7a00..a4ee5d1300f6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2035,6 +2035,9 @@ config KEXEC_FILE
config ARCH_HAS_KEXEC_PURGATORY
def_bool KEXEC_FILE
+config ARCH_HAS_TEXT_ALLOC
+ def_bool y
+
config KEXEC_SIG
bool "Verify kernel signature during kexec_file_load() syscall"
depends on KEXEC_FILE
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e77261db2391..fa1415424ae3 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -68,6 +68,7 @@ obj-y += tsc.o tsc_msr.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
obj-y += irqflags.o
+obj-y += text_alloc.o
obj-y += process.o
obj-y += fpu/
diff --git a/arch/x86/kernel/text_alloc.c b/arch/x86/kernel/text_alloc.c
new file mode 100644
index 000000000000..f31c2d82c258
--- /dev/null
+++ b/arch/x86/kernel/text_alloc.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Kernel module help for x86.
+ * Copyright (C) 2001 Rusty Russell.
+ */
+
+#include <linux/kasan.h>
+#include <linux/mm.h>
+#include <linux/moduleloader.h>
+#include <linux/vmalloc.h>
+#include <asm/setup.h>
+
+void *text_alloc(unsigned long size)
+{
+ void *p;
+
+ if (PAGE_ALIGN(size) > MODULES_LEN)
+ return NULL;
+
+ p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
+ GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+
+ if (p && (kasan_module_alloc(p, size) < 0)) {
+ vfree(p);
+ return NULL;
+ }
+
+ return p;
+}
+
+void text_free(void *region)
+{
+ /*
+ * This memory may be RO, and freeing RO memory in an interrupt is not
+ * supported by vmalloc.
+ */
+ lockdep_assert_irqs_enabled();
+
+ vfree(region);
+}
--
2.25.1
Use text_alloc() and text_free() to implement alloc_insn_page() and
free_insn_page().
Cc: [email protected]
Cc: Andi Kleen <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Signed-off-by: Jarkko Sakkinen <[email protected]>Im
---
arch/x86/kernel/kprobes/core.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index ada39ddbc922..9e57452b3a51 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -423,7 +423,7 @@ void *alloc_insn_page(void)
{
void *page;
- page = module_alloc(PAGE_SIZE);
+ page = text_alloc(PAGE_SIZE);
if (!page)
return NULL;
@@ -446,7 +446,7 @@ void *alloc_insn_page(void)
/* Recover page to RW mode before releasing it */
void free_insn_page(void *page)
{
- module_memfree(page);
+ text_free(page);
}
static int arch_copy_kprobe(struct kprobe *p)
--
2.25.1
Use text_alloc() and text_free() instead of module_alloc() and
module_memfree() when an arch provides them.
Cc: [email protected]
Cc: Andi Kleen <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Signed-off-by: Jarkko Sakkinen <[email protected]>
---
kernel/kprobes.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 4e46d96d4e16..611fcda9f6bf 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -40,6 +40,7 @@
#include <asm/cacheflush.h>
#include <asm/errno.h>
#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
#define KPROBE_HASH_BITS 6
#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
@@ -111,12 +112,20 @@ enum kprobe_slot_state {
void __weak *alloc_insn_page(void)
{
+#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
+ return text_alloc(PAGE_SIZE);
+#else
return module_alloc(PAGE_SIZE);
+#endif
}
void __weak free_insn_page(void *page)
{
+#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
+ text_free(page);
+#else
module_memfree(page);
+#endif
}
struct kprobe_insn_cache kprobe_insn_slots = {
--
2.25.1
Remove CONFIG_MODULES dependency by flagging out the dependent code. This
allows to use kprobes in a kernel without support for loadable modules,
which is an useful feature for test kernels and embedded kernels.
Cc: [email protected]
Cc: Andi Kleen <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Signed-off-by: Jarkko Sakkinen <[email protected]>
---
arch/Kconfig | 2 +-
include/linux/module.h | 14 +++++------
kernel/kprobes.c | 48 +++++++++++++++++++++++--------------
kernel/trace/trace_kprobe.c | 16 +++++++++++--
4 files changed, 52 insertions(+), 28 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 8cc35dc556c7..e3d6b6868ccb 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -61,7 +61,7 @@ config OPROFILE_NMI_TIMER
config KPROBES
bool "Kprobes"
- depends on MODULES
+ depends on MODULES || ARCH_HAS_TEXT_ALLOC
depends on HAVE_KPROBES
select KALLSYMS
help
diff --git a/include/linux/module.h b/include/linux/module.h
index 8850b9692b8f..22c646cff6bd 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -290,6 +290,13 @@ extern typeof(name) __mod_##type##__##name##_device_table \
struct notifier_block;
+enum module_state {
+ MODULE_STATE_LIVE, /* Normal state. */
+ MODULE_STATE_COMING, /* Full formed, running module_init. */
+ MODULE_STATE_GOING, /* Going away. */
+ MODULE_STATE_UNFORMED, /* Still setting it up. */
+};
+
#ifdef CONFIG_MODULES
extern int modules_disabled; /* for sysctl */
@@ -305,13 +312,6 @@ struct module_use {
struct module *source, *target;
};
-enum module_state {
- MODULE_STATE_LIVE, /* Normal state. */
- MODULE_STATE_COMING, /* Full formed, running module_init. */
- MODULE_STATE_GOING, /* Going away. */
- MODULE_STATE_UNFORMED, /* Still setting it up. */
-};
-
struct mod_tree_node {
struct module *mod;
struct latch_tree_node node;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 611fcda9f6bf..bb2e3070481a 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1617,6 +1617,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
goto out;
}
+#ifdef CONFIG_MODULES
/*
* If the module freed .init.text, we couldn't insert
* kprobes in there.
@@ -1627,6 +1628,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
*probed_mod = NULL;
ret = -ENOENT;
}
+#endif
}
out:
preempt_enable();
@@ -2223,24 +2225,6 @@ int kprobe_add_area_blacklist(unsigned long start, unsigned long end)
return 0;
}
-/* Remove all symbols in given area from kprobe blacklist */
-static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end)
-{
- struct kprobe_blacklist_entry *ent, *n;
-
- list_for_each_entry_safe(ent, n, &kprobe_blacklist, list) {
- if (ent->start_addr < start || ent->start_addr >= end)
- continue;
- list_del(&ent->list);
- kfree(ent);
- }
-}
-
-static void kprobe_remove_ksym_blacklist(unsigned long entry)
-{
- kprobe_remove_area_blacklist(entry, entry + 1);
-}
-
int __init __weak arch_populate_kprobe_blacklist(void)
{
return 0;
@@ -2283,6 +2267,25 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
return ret ? : arch_populate_kprobe_blacklist();
}
+#ifdef CONFIG_MODULES
+/* Remove all symbols in given area from kprobe blacklist */
+static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end)
+{
+ struct kprobe_blacklist_entry *ent, *n;
+
+ list_for_each_entry_safe(ent, n, &kprobe_blacklist, list) {
+ if (ent->start_addr < start || ent->start_addr >= end)
+ continue;
+ list_del(&ent->list);
+ kfree(ent);
+ }
+}
+
+static void kprobe_remove_ksym_blacklist(unsigned long entry)
+{
+ kprobe_remove_area_blacklist(entry, entry + 1);
+}
+
static void add_module_kprobe_blacklist(struct module *mod)
{
unsigned long start, end;
@@ -2328,6 +2331,15 @@ static void remove_module_kprobe_blacklist(struct module *mod)
kprobe_remove_area_blacklist(start, end);
}
}
+#else
+static void add_module_kprobe_blacklist(struct module *mod)
+{
+}
+
+static void remove_module_kprobe_blacklist(struct module *mod)
+{
+}
+#endif /* CONFIG_MODULES */
/* Module notifier call back, checking kprobes on the module */
static int kprobes_module_callback(struct notifier_block *nb,
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 710ec6a6aa8f..881c998d0162 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -103,8 +103,9 @@ static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
return !!(kprobe_gone(&tk->rp.kp));
}
+#ifdef CONFIG_MODULES
static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
- struct module *mod)
+ struct module *mod)
{
int len = strlen(mod->name);
const char *name = trace_kprobe_symbol(tk);
@@ -129,6 +130,17 @@ static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
return ret;
}
+#else
+static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
+ struct module *mod)
+{
+ return false;
+}
+static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
+{
+ return false;
+}
+#endif
static bool trace_kprobe_is_busy(struct dyn_event *ev)
{
@@ -688,7 +700,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
if (ret)
pr_warn("Failed to re-register probe %s on %s: %d\n",
trace_probe_name(&tk->tp),
- mod->name, ret);
+ module_name(mod), ret);
}
}
mutex_unlock(&event_mutex);
--
2.25.1
On Fri, Jul 24, 2020 at 08:05:47AM +0300, Jarkko Sakkinen wrote:
> Remove MODULES dependency by migrating from module_alloc() to the new
> text_alloc() API. Essentially these changes provide preliminaries for
> allowing to compile a static kernel with a proper tracing support.
>
> The same API can be used later on in other sites that allocate space for
> trampolines, and trivially scaled to other arch's. An arch can inform
> with CONFIG_ARCH_HAS_TEXT_ALLOC that it's providing implementation for
> text_alloc().
>
> Cc: [email protected]
> Cc: Andi Kleen <[email protected]>
> Cc: Masami Hiramatsu <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
>
> v4:
> * Squash lock_modules() patches into one.
> * Remove fallback versions of text_alloc() and text_free(). Instead, use
> ARCH_HAS_TEXT_ALLOC at site when required.
> * Use lockdep_assert_irqs_enabled() in text_free() instead of
> WARN_ON(in_interrupt()).
>
> v3:
> * Make text_alloc() API disjoint.
> * Remove all the possible extra clutter not absolutely required and
> split into more logical pieces.
>
> Jarkko Sakkinen (6):
> kprobes: Remove dependency to the module_mutex
> vmalloc: Add text_alloc() and text_free()
> arch/x86: Implement text_alloc() and text_free()
> arch/x86: kprobes: Use text_alloc() and text_free()
> kprobes: Use text_alloc() and text_free()
> kprobes: Remove CONFIG_MODULES dependency
>
> arch/Kconfig | 2 +-
> arch/x86/Kconfig | 3 ++
> arch/x86/kernel/Makefile | 1 +
> arch/x86/kernel/kprobes/core.c | 4 +--
> arch/x86/kernel/text_alloc.c | 41 +++++++++++++++++++++++
> include/linux/module.h | 32 ++++++++++++++----
> include/linux/vmalloc.h | 17 ++++++++++
> kernel/kprobes.c | 61 +++++++++++++++++++++++-----------
> kernel/trace/trace_kprobe.c | 20 ++++++++---
> 9 files changed, 147 insertions(+), 34 deletions(-)
> create mode 100644 arch/x86/kernel/text_alloc.c
>
> --
> 2.25.1
>
Duplicates were caused by Internet connection breaking up in the
middle. Apologies.
/Jarkko
* Jarkko Sakkinen <[email protected]> wrote:
> Add lock_modules() and unlock_modules() wrappers for acquiring module_mutex
> in order to remove the compile time dependency to it.
>
> Cc: [email protected]
> Cc: Andi Kleen <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Suggested-by: Masami Hiramatsu <[email protected]>
> Signed-off-by: Jarkko Sakkinen <[email protected]>
> ---
> include/linux/module.h | 18 ++++++++++++++++++
> kernel/kprobes.c | 4 ++--
> kernel/trace/trace_kprobe.c | 4 ++--
> 3 files changed, 22 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/module.h b/include/linux/module.h
> index 2e6670860d27..8850b9692b8f 100644
> --- a/include/linux/module.h
> +++ b/include/linux/module.h
> @@ -705,6 +705,16 @@ static inline bool is_livepatch_module(struct module *mod)
> bool is_module_sig_enforced(void);
> void set_module_sig_enforced(void);
>
> +static inline void lock_modules(void)
> +{
> + mutex_lock(&module_mutex);
> +}
> +
> +static inline void unlock_modules(void)
> +{
> + mutex_unlock(&module_mutex);
> +}
> +
> #else /* !CONFIG_MODULES... */
>
> static inline struct module *__module_address(unsigned long addr)
> @@ -852,6 +862,14 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr)
> return ptr;
> }
>
> +static inline void lock_modules(void)
> +{
> +}
> +
> +static inline void unlock_modules(void)
> +{
> +}
Minor namespace nit: when introducing new locking wrappers please
standardize on the XYZ_lock()/XYZ_unlock() nomenclature, i.e.:
modules_lock()
...
modules_unlock()
Similarly to the mutex_lock/unlock(&module_mutex) API that it is
wrapping.
Also, JFYI, the overwhelming majority of the modules related APIs use
module_*(), i.e. singular - not plural, so
module_lock()/module_unlock() would be the more canonical choice.
(But both sound fine to me)
Thanks,
Ingo
* Jarkko Sakkinen <[email protected]> wrote:
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -564,7 +564,7 @@ static void kprobe_optimizer(struct work_struct *work)
> cpus_read_lock();
> mutex_lock(&text_mutex);
> /* Lock modules while optimizing kprobes */
> - mutex_lock(&module_mutex);
> + lock_modules();
>
> /*
> * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
> @@ -589,7 +589,7 @@ static void kprobe_optimizer(struct work_struct *work)
> /* Step 4: Free cleaned kprobes after quiesence period */
> do_free_cleaned_kprobes();
>
> - mutex_unlock(&module_mutex);
> + unlock_modules();
> mutex_unlock(&text_mutex);
> cpus_read_unlock();
BTW., it would be nice to expand on the comments above - exactly which
parts of the modules code is being serialized against and why?
We already hold the text_mutex here, which should protect against most
kprobes related activities interfering - and it's unclear (to me)
which part of the modules code is being serialized with here, and the
'lock modules while optimizing kprobes' comments is unhelpful. :-)
Thanks,
Ingo
* Jarkko Sakkinen <[email protected]> wrote:
> +void text_free(void *region)
> +{
> + /*
> + * This memory may be RO, and freeing RO memory in an interrupt is not
> + * supported by vmalloc.
> + */
> + lockdep_assert_irqs_enabled();
> +
> + vfree(region);
Had to dig around a bit to find the source of this restriction. Might
make sense to clarify this comment to:
/*
* This memory may be read-only, and freeing VM_FLUSH_RESET_PERMS memory
* in an interrupt is not supported by vmalloc.
*/
Thanks,
Ingo
* Jarkko Sakkinen <[email protected]> wrote:
> Use text_alloc() and text_free() instead of module_alloc() and
> module_memfree() when an arch provides them.
>
> Cc: [email protected]
> Cc: Andi Kleen <[email protected]>
> Cc: Masami Hiramatsu <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Signed-off-by: Jarkko Sakkinen <[email protected]>
> ---
> kernel/kprobes.c | 9 +++++++++
> 1 file changed, 9 insertions(+)
>
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 4e46d96d4e16..611fcda9f6bf 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -40,6 +40,7 @@
> #include <asm/cacheflush.h>
> #include <asm/errno.h>
> #include <linux/uaccess.h>
> +#include <linux/vmalloc.h>
>
> #define KPROBE_HASH_BITS 6
> #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
> @@ -111,12 +112,20 @@ enum kprobe_slot_state {
>
> void __weak *alloc_insn_page(void)
> {
> +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> + return text_alloc(PAGE_SIZE);
> +#else
> return module_alloc(PAGE_SIZE);
> +#endif
> }
>
> void __weak free_insn_page(void *page)
> {
> +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> + text_free(page);
> +#else
> module_memfree(page);
> +#endif
> }
I've read the observations in the other threads, but this #ifdef
jungle is silly, it's a de-facto open coded text_alloc() with a
module_alloc() fallback...
Thanks,
Ingo
On Fri, Jul 24, 2020 at 08:05:48AM +0300, Jarkko Sakkinen wrote:
> Add lock_modules() and unlock_modules() wrappers for acquiring module_mutex
> in order to remove the compile time dependency to it.
>
> Cc: [email protected]
> Cc: Andi Kleen <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Suggested-by: Masami Hiramatsu <[email protected]>
> Signed-off-by: Jarkko Sakkinen <[email protected]>
> ---
> include/linux/module.h | 18 ++++++++++++++++++
> kernel/kprobes.c | 4 ++--
> kernel/trace/trace_kprobe.c | 4 ++--
> 3 files changed, 22 insertions(+), 4 deletions(-)
Any reason to convert only kprobes to the new API and leave others with
opencoded implementation?
> diff --git a/include/linux/module.h b/include/linux/module.h
> index 2e6670860d27..8850b9692b8f 100644
> --- a/include/linux/module.h
> +++ b/include/linux/module.h
> @@ -705,6 +705,16 @@ static inline bool is_livepatch_module(struct module *mod)
> bool is_module_sig_enforced(void);
> void set_module_sig_enforced(void);
>
> +static inline void lock_modules(void)
> +{
> + mutex_lock(&module_mutex);
> +}
> +
> +static inline void unlock_modules(void)
> +{
> + mutex_unlock(&module_mutex);
> +}
> +
> #else /* !CONFIG_MODULES... */
>
> static inline struct module *__module_address(unsigned long addr)
> @@ -852,6 +862,14 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr)
> return ptr;
> }
>
> +static inline void lock_modules(void)
> +{
> +}
> +
> +static inline void unlock_modules(void)
> +{
> +}
> +
> #endif /* CONFIG_MODULES */
>
> #ifdef CONFIG_SYSFS
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 2e97febeef77..4e46d96d4e16 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -564,7 +564,7 @@ static void kprobe_optimizer(struct work_struct *work)
> cpus_read_lock();
> mutex_lock(&text_mutex);
> /* Lock modules while optimizing kprobes */
> - mutex_lock(&module_mutex);
> + lock_modules();
>
> /*
> * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
> @@ -589,7 +589,7 @@ static void kprobe_optimizer(struct work_struct *work)
> /* Step 4: Free cleaned kprobes after quiesence period */
> do_free_cleaned_kprobes();
>
> - mutex_unlock(&module_mutex);
> + unlock_modules();
> mutex_unlock(&text_mutex);
> cpus_read_unlock();
>
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index aefb6065b508..710ec6a6aa8f 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -122,9 +122,9 @@ static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
> if (!p)
> return true;
> *p = '\0';
> - mutex_lock(&module_mutex);
> + lock_modules();
> ret = !!find_module(tk->symbol);
> - mutex_unlock(&module_mutex);
> + unlock_modules();
> *p = ':';
>
> return ret;
> --
> 2.25.1
>
--
Sincerely yours,
Mike.
On Fri, Jul 24, 2020 at 08:05:49AM +0300, Jarkko Sakkinen wrote:
> Introduce functions for allocating memory for dynamic trampolines, such
> as kprobes. An arch can promote the availability of these functions with
> CONFIG_ARCH_HAS_TEXT_ALLOC.
As it was pointed out at the discussion on the previous version [1],
text_alloc() alone won't necessarily suit other architectures.
I don't see a point in defining a "generic" interface that apriory could
not be imeplemented by several architectures.
[1] https://lore.kernel.org/lkml/[email protected]/
> Cc: [email protected]
> Cc: Andi Kleen <[email protected]>
> Cc: Masami Hiramatsu <[email protected]>
> Suggested-by: Peter Zijlstra <[email protected]>
> Signed-off-by: Jarkko Sakkinen <[email protected]>
> ---
> include/linux/vmalloc.h | 17 +++++++++++++++++
> 1 file changed, 17 insertions(+)
>
> diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
> index 0221f852a7e1..6c151a0ac02a 100644
> --- a/include/linux/vmalloc.h
> +++ b/include/linux/vmalloc.h
> @@ -249,4 +249,21 @@ pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
> int register_vmap_purge_notifier(struct notifier_block *nb);
> int unregister_vmap_purge_notifier(struct notifier_block *nb);
>
> +/*
> + * These functions can be optionally implemented by an arch in order to
> + * provide specialized functions for allocating trampoline code.
> + */
> +
> +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> +/*
> + * Allocate memory to be used for trampoline code.
> + */
> +void *text_alloc(unsigned long size);
> +
> +/*
> + * Free memory returned from text_alloc().
> + */
> +void text_free(void *region);
> +#endif /* CONFIG_ARCH_HAS_TEXT_ALLOC */
> +
> #endif /* _LINUX_VMALLOC_H */
> --
> 2.25.1
>
--
Sincerely yours,
Mike.
(cc people whi particpaged in v2 disuccsion)
On Fri, Jul 24, 2020 at 08:05:47AM +0300, Jarkko Sakkinen wrote:
> Remove MODULES dependency by migrating from module_alloc() to the new
> text_alloc() API. Essentially these changes provide preliminaries for
> allowing to compile a static kernel with a proper tracing support.
>
> The same API can be used later on in other sites that allocate space for
> trampolines, and trivially scaled to other arch's. An arch can inform
> with CONFIG_ARCH_HAS_TEXT_ALLOC that it's providing implementation for
> text_alloc().
>
> Cc: [email protected]
> Cc: Andi Kleen <[email protected]>
> Cc: Masami Hiramatsu <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
>
> v4:
> * Squash lock_modules() patches into one.
> * Remove fallback versions of text_alloc() and text_free(). Instead, use
> ARCH_HAS_TEXT_ALLOC at site when required.
> * Use lockdep_assert_irqs_enabled() in text_free() instead of
> WARN_ON(in_interrupt()).
>
> v3:
> * Make text_alloc() API disjoint.
> * Remove all the possible extra clutter not absolutely required and
> split into more logical pieces.
>
> Jarkko Sakkinen (6):
> kprobes: Remove dependency to the module_mutex
> vmalloc: Add text_alloc() and text_free()
> arch/x86: Implement text_alloc() and text_free()
> arch/x86: kprobes: Use text_alloc() and text_free()
> kprobes: Use text_alloc() and text_free()
> kprobes: Remove CONFIG_MODULES dependency
>
> arch/Kconfig | 2 +-
> arch/x86/Kconfig | 3 ++
> arch/x86/kernel/Makefile | 1 +
> arch/x86/kernel/kprobes/core.c | 4 +--
> arch/x86/kernel/text_alloc.c | 41 +++++++++++++++++++++++
> include/linux/module.h | 32 ++++++++++++++----
> include/linux/vmalloc.h | 17 ++++++++++
> kernel/kprobes.c | 61 +++++++++++++++++++++++-----------
> kernel/trace/trace_kprobe.c | 20 ++++++++---
> 9 files changed, 147 insertions(+), 34 deletions(-)
> create mode 100644 arch/x86/kernel/text_alloc.c
>
> --
> 2.25.1
>
--
Sincerely yours,
Mike.
On Fri, Jul 24, 2020 at 08:05:52AM +0300, Jarkko Sakkinen wrote:
> Use text_alloc() and text_free() instead of module_alloc() and
> module_memfree() when an arch provides them.
>
> Cc: [email protected]
> Cc: Andi Kleen <[email protected]>
> Cc: Masami Hiramatsu <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Signed-off-by: Jarkko Sakkinen <[email protected]>
> ---
> kernel/kprobes.c | 9 +++++++++
> 1 file changed, 9 insertions(+)
>
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 4e46d96d4e16..611fcda9f6bf 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -40,6 +40,7 @@
> #include <asm/cacheflush.h>
> #include <asm/errno.h>
> #include <linux/uaccess.h>
> +#include <linux/vmalloc.h>
>
> #define KPROBE_HASH_BITS 6
> #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
> @@ -111,12 +112,20 @@ enum kprobe_slot_state {
>
> void __weak *alloc_insn_page(void)
> {
> +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> + return text_alloc(PAGE_SIZE);
> +#else
> return module_alloc(PAGE_SIZE);
> +#endif
> }
>
> void __weak free_insn_page(void *page)
> {
> +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> + text_free(page);
> +#else
> module_memfree(page);
> +#endif
> }
Both alloc_insn_page() and free_insn_page() are __weak and can be simple
overriden in arch/x86 code.
> struct kprobe_insn_cache kprobe_insn_slots = {
> --
> 2.25.1
>
--
Sincerely yours,
Mike.
On Fri, 24 Jul 2020 at 12:27, Ingo Molnar <[email protected]> wrote:
>
>
> * Jarkko Sakkinen <[email protected]> wrote:
>
> > Use text_alloc() and text_free() instead of module_alloc() and
> > module_memfree() when an arch provides them.
> >
> > Cc: [email protected]
> > Cc: Andi Kleen <[email protected]>
> > Cc: Masami Hiramatsu <[email protected]>
> > Cc: Peter Zijlstra <[email protected]>
> > Signed-off-by: Jarkko Sakkinen <[email protected]>
> > ---
> > kernel/kprobes.c | 9 +++++++++
> > 1 file changed, 9 insertions(+)
> >
> > diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> > index 4e46d96d4e16..611fcda9f6bf 100644
> > --- a/kernel/kprobes.c
> > +++ b/kernel/kprobes.c
> > @@ -40,6 +40,7 @@
> > #include <asm/cacheflush.h>
> > #include <asm/errno.h>
> > #include <linux/uaccess.h>
> > +#include <linux/vmalloc.h>
> >
> > #define KPROBE_HASH_BITS 6
> > #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
> > @@ -111,12 +112,20 @@ enum kprobe_slot_state {
> >
> > void __weak *alloc_insn_page(void)
> > {
> > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > + return text_alloc(PAGE_SIZE);
> > +#else
> > return module_alloc(PAGE_SIZE);
> > +#endif
> > }
> >
> > void __weak free_insn_page(void *page)
> > {
> > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > + text_free(page);
> > +#else
> > module_memfree(page);
> > +#endif
> > }
>
> I've read the observations in the other threads, but this #ifdef
> jungle is silly, it's a de-facto open coded text_alloc() with a
> module_alloc() fallback...
>
Also, as I attempted to explain before, there is no reason to allocate
kasan shadow for any of these use cases, so cloning module_alloc() to
implement text_alloc() is not the correct approach even on x86.
I suppose module_alloc() could be reimplemented in terms of
text_alloc() in this case, but simply relabelling it like this seems
inappropriate on all architectures.
On Fri, 24 Jul 2020 08:05:48 +0300
Jarkko Sakkinen <[email protected]> wrote:
> Add lock_modules() and unlock_modules() wrappers for acquiring module_mutex
> in order to remove the compile time dependency to it.
This subject is a bit confusing. This is just wrapping modules_mutex in
kpprobes. We still have compile time dependency, e.g. module_state, right?
Thank you,
>
> Cc: [email protected]
> Cc: Andi Kleen <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Suggested-by: Masami Hiramatsu <[email protected]>
> Signed-off-by: Jarkko Sakkinen <[email protected]>
> ---
> include/linux/module.h | 18 ++++++++++++++++++
> kernel/kprobes.c | 4 ++--
> kernel/trace/trace_kprobe.c | 4 ++--
> 3 files changed, 22 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/module.h b/include/linux/module.h
> index 2e6670860d27..8850b9692b8f 100644
> --- a/include/linux/module.h
> +++ b/include/linux/module.h
> @@ -705,6 +705,16 @@ static inline bool is_livepatch_module(struct module *mod)
> bool is_module_sig_enforced(void);
> void set_module_sig_enforced(void);
>
> +static inline void lock_modules(void)
> +{
> + mutex_lock(&module_mutex);
> +}
> +
> +static inline void unlock_modules(void)
> +{
> + mutex_unlock(&module_mutex);
> +}
> +
> #else /* !CONFIG_MODULES... */
>
> static inline struct module *__module_address(unsigned long addr)
> @@ -852,6 +862,14 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr)
> return ptr;
> }
>
> +static inline void lock_modules(void)
> +{
> +}
> +
> +static inline void unlock_modules(void)
> +{
> +}
> +
> #endif /* CONFIG_MODULES */
>
> #ifdef CONFIG_SYSFS
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 2e97febeef77..4e46d96d4e16 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -564,7 +564,7 @@ static void kprobe_optimizer(struct work_struct *work)
> cpus_read_lock();
> mutex_lock(&text_mutex);
> /* Lock modules while optimizing kprobes */
> - mutex_lock(&module_mutex);
> + lock_modules();
>
> /*
> * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
> @@ -589,7 +589,7 @@ static void kprobe_optimizer(struct work_struct *work)
> /* Step 4: Free cleaned kprobes after quiesence period */
> do_free_cleaned_kprobes();
>
> - mutex_unlock(&module_mutex);
> + unlock_modules();
> mutex_unlock(&text_mutex);
> cpus_read_unlock();
>
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index aefb6065b508..710ec6a6aa8f 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -122,9 +122,9 @@ static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
> if (!p)
> return true;
> *p = '\0';
> - mutex_lock(&module_mutex);
> + lock_modules();
> ret = !!find_module(tk->symbol);
> - mutex_unlock(&module_mutex);
> + unlock_modules();
> *p = ':';
>
> return ret;
> --
> 2.25.1
>
--
Masami Hiramatsu <[email protected]>
On Fri, 24 Jul 2020 13:27:48 +0300
Mike Rapoport <[email protected]> wrote:
> On Fri, Jul 24, 2020 at 08:05:52AM +0300, Jarkko Sakkinen wrote:
> > Use text_alloc() and text_free() instead of module_alloc() and
> > module_memfree() when an arch provides them.
> >
> > Cc: [email protected]
> > Cc: Andi Kleen <[email protected]>
> > Cc: Masami Hiramatsu <[email protected]>
> > Cc: Peter Zijlstra <[email protected]>
> > Signed-off-by: Jarkko Sakkinen <[email protected]>
> > ---
> > kernel/kprobes.c | 9 +++++++++
> > 1 file changed, 9 insertions(+)
> >
> > diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> > index 4e46d96d4e16..611fcda9f6bf 100644
> > --- a/kernel/kprobes.c
> > +++ b/kernel/kprobes.c
> > @@ -40,6 +40,7 @@
> > #include <asm/cacheflush.h>
> > #include <asm/errno.h>
> > #include <linux/uaccess.h>
> > +#include <linux/vmalloc.h>
> >
> > #define KPROBE_HASH_BITS 6
> > #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
> > @@ -111,12 +112,20 @@ enum kprobe_slot_state {
> >
> > void __weak *alloc_insn_page(void)
> > {
> > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > + return text_alloc(PAGE_SIZE);
> > +#else
> > return module_alloc(PAGE_SIZE);
> > +#endif
> > }
> >
> > void __weak free_insn_page(void *page)
> > {
> > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > + text_free(page);
> > +#else
> > module_memfree(page);
> > +#endif
> > }
>
> Both alloc_insn_page() and free_insn_page() are __weak and can be simple
> overriden in arch/x86 code.
No, we can't use module_alloc/memfree() without CONFIG_MODULES, so
we can not escape from this #ifdefs. (and I think this is not so bad.)
Thank you,
--
Masami Hiramatsu <[email protected]>
On Fri, Jul 24, 2020 at 01:27:48PM +0300, Mike Rapoport wrote:
> On Fri, Jul 24, 2020 at 08:05:52AM +0300, Jarkko Sakkinen wrote:
> > Use text_alloc() and text_free() instead of module_alloc() and
> > module_memfree() when an arch provides them.
> >
> > Cc: [email protected]
> > Cc: Andi Kleen <[email protected]>
> > Cc: Masami Hiramatsu <[email protected]>
> > Cc: Peter Zijlstra <[email protected]>
> > Signed-off-by: Jarkko Sakkinen <[email protected]>
> > ---
> > kernel/kprobes.c | 9 +++++++++
> > 1 file changed, 9 insertions(+)
> >
> > diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> > index 4e46d96d4e16..611fcda9f6bf 100644
> > --- a/kernel/kprobes.c
> > +++ b/kernel/kprobes.c
> > @@ -40,6 +40,7 @@
> > #include <asm/cacheflush.h>
> > #include <asm/errno.h>
> > #include <linux/uaccess.h>
> > +#include <linux/vmalloc.h>
> >
> > #define KPROBE_HASH_BITS 6
> > #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
> > @@ -111,12 +112,20 @@ enum kprobe_slot_state {
> >
> > void __weak *alloc_insn_page(void)
> > {
> > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > + return text_alloc(PAGE_SIZE);
> > +#else
> > return module_alloc(PAGE_SIZE);
> > +#endif
> > }
> >
> > void __weak free_insn_page(void *page)
> > {
> > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > + text_free(page);
> > +#else
> > module_memfree(page);
> > +#endif
> > }
>
> Both alloc_insn_page() and free_insn_page() are __weak and can be simple
> overriden in arch/x86 code.
That is correct, but the override is obviously happening at linking time.
module_alloc() and module_memfree() will still leave a compile time
dependency to CONFIG_MODULES.
/Jarkko
On Fri, Jul 24, 2020 at 11:22:58AM +0200, Ingo Molnar wrote:
>
> * Jarkko Sakkinen <[email protected]> wrote:
>
> > +void text_free(void *region)
> > +{
> > + /*
> > + * This memory may be RO, and freeing RO memory in an interrupt is not
> > + * supported by vmalloc.
> > + */
> > + lockdep_assert_irqs_enabled();
> > +
> > + vfree(region);
>
> Had to dig around a bit to find the source of this restriction. Might
> make sense to clarify this comment to:
>
> /*
> * This memory may be read-only, and freeing VM_FLUSH_RESET_PERMS memory
> * in an interrupt is not supported by vmalloc.
> */
This definitely clarifies and is a better explanation. I updated the
commit accordingly. Thank you.
>
> Thanks,
>
> Ingo
/Jarkko
On Fri, Jul 24, 2020 at 01:22:43PM +0300, Mike Rapoport wrote:
> On Fri, Jul 24, 2020 at 08:05:49AM +0300, Jarkko Sakkinen wrote:
> > Introduce functions for allocating memory for dynamic trampolines, such
> > as kprobes. An arch can promote the availability of these functions with
> > CONFIG_ARCH_HAS_TEXT_ALLOC.
>
> As it was pointed out at the discussion on the previous version [1],
> text_alloc() alone won't necessarily suit other architectures.
>
> I don't see a point in defining a "generic" interface that apriory could
> not be imeplemented by several architectures.
>
> [1] https://lore.kernel.org/lkml/[email protected]/
These changes do actually acknowledge the feedback [1][2][3]. They do not
interfere with module_alloc() and are fully optional.
[1] https://lore.kernel.org/linux-riscv/20200714102826.GB4756@willie-the-truck/
[2] https://lore.kernel.org/linux-riscv/[email protected]/
[3] https://lore.kernel.org/linux-riscv/20200714135651.GA27819@linux-8ccs/
/Jarkko
On Fri, Jul 24, 2020 at 11:13:19AM +0200, Ingo Molnar wrote:
>
> * Jarkko Sakkinen <[email protected]> wrote:
>
> > Add lock_modules() and unlock_modules() wrappers for acquiring module_mutex
> > in order to remove the compile time dependency to it.
> >
> > Cc: [email protected]
> > Cc: Andi Kleen <[email protected]>
> > Cc: Peter Zijlstra <[email protected]>
> > Suggested-by: Masami Hiramatsu <[email protected]>
> > Signed-off-by: Jarkko Sakkinen <[email protected]>
> > ---
> > include/linux/module.h | 18 ++++++++++++++++++
> > kernel/kprobes.c | 4 ++--
> > kernel/trace/trace_kprobe.c | 4 ++--
> > 3 files changed, 22 insertions(+), 4 deletions(-)
> >
> > diff --git a/include/linux/module.h b/include/linux/module.h
> > index 2e6670860d27..8850b9692b8f 100644
> > --- a/include/linux/module.h
> > +++ b/include/linux/module.h
> > @@ -705,6 +705,16 @@ static inline bool is_livepatch_module(struct module *mod)
> > bool is_module_sig_enforced(void);
> > void set_module_sig_enforced(void);
> >
> > +static inline void lock_modules(void)
> > +{
> > + mutex_lock(&module_mutex);
> > +}
> > +
> > +static inline void unlock_modules(void)
> > +{
> > + mutex_unlock(&module_mutex);
> > +}
> > +
> > #else /* !CONFIG_MODULES... */
> >
> > static inline struct module *__module_address(unsigned long addr)
> > @@ -852,6 +862,14 @@ void *dereference_module_function_descriptor(struct module *mod, void *ptr)
> > return ptr;
> > }
> >
> > +static inline void lock_modules(void)
> > +{
> > +}
> > +
> > +static inline void unlock_modules(void)
> > +{
> > +}
>
> Minor namespace nit: when introducing new locking wrappers please
> standardize on the XYZ_lock()/XYZ_unlock() nomenclature, i.e.:
>
> modules_lock()
> ...
> modules_unlock()
>
> Similarly to the mutex_lock/unlock(&module_mutex) API that it is
> wrapping.
>
> Also, JFYI, the overwhelming majority of the modules related APIs use
> module_*(), i.e. singular - not plural, so
> module_lock()/module_unlock() would be the more canonical choice.
> (But both sound fine to me)
Thanks, I renamed them as module_lock() and module_unlock().
> Thanks,
>
> Ingo
/Jarkko
On Fri, Jul 24, 2020 at 01:22:58PM +0300, Mike Rapoport wrote:
> On Fri, Jul 24, 2020 at 08:05:48AM +0300, Jarkko Sakkinen wrote:
> > Add lock_modules() and unlock_modules() wrappers for acquiring module_mutex
> > in order to remove the compile time dependency to it.
> >
> > Cc: [email protected]
> > Cc: Andi Kleen <[email protected]>
> > Cc: Peter Zijlstra <[email protected]>
> > Suggested-by: Masami Hiramatsu <[email protected]>
> > Signed-off-by: Jarkko Sakkinen <[email protected]>
> > ---
> > include/linux/module.h | 18 ++++++++++++++++++
> > kernel/kprobes.c | 4 ++--
> > kernel/trace/trace_kprobe.c | 4 ++--
> > 3 files changed, 22 insertions(+), 4 deletions(-)
>
> Any reason to convert only kprobes to the new API and leave others with
> opencoded implementation?
Not anything particular.
Lets see:
$ git --no-pager grep "mutex_lock(&module_mutex)"
arch/powerpc/platforms/powernv/pci-cxl.c: mutex_lock(&module_mutex);
drivers/gpu/drm/drm_fb_helper.c: mutex_lock(&module_mutex);
include/linux/module.h: mutex_lock(&module_mutex);
kernel/livepatch/core.c: mutex_lock(&module_mutex);
kernel/livepatch/core.c: mutex_lock(&module_mutex);
kernel/module.c: mutex_lock(&module_mutex);
kernel/module.c: mutex_lock(&module_mutex);
kernel/module.c: mutex_lock(&module_mutex);
kernel/module.c: mutex_lock(&module_mutex);
kernel/module.c: mutex_lock(&module_mutex);
kernel/module.c: mutex_lock(&module_mutex);
kernel/module.c: mutex_lock(&module_mutex);
kernel/module.c: mutex_lock(&module_mutex);
kernel/module.c: mutex_lock(&module_mutex);
kernel/module.c: mutex_lock(&module_mutex);
kernel/module.c: mutex_lock(&module_mutex);
kernel/module.c: mutex_lock(&module_mutex);
kernel/module.c: mutex_lock(&module_mutex);
I could refine this commit to patch these sites. Or should I split it
into multiple?
/Jarkko
On Fri, Jul 24, 2020 at 11:46:31PM +0900, Masami Hiramatsu wrote:
> On Fri, 24 Jul 2020 08:05:48 +0300
> Jarkko Sakkinen <[email protected]> wrote:
>
> > Add lock_modules() and unlock_modules() wrappers for acquiring module_mutex
> > in order to remove the compile time dependency to it.
>
> This subject is a bit confusing. This is just wrapping modules_mutex in
> kpprobes. We still have compile time dependency, e.g. module_state, right?
Yes. This more like a preliminary change to make that happen. The
actual flagging is in 6/6 ("Remove CONFIG_MODULE dependency").
Maybe a better angle would be to make this update all sites that
deal with module_mutex [*] and base the whole rationale on that?
[*] https://lore.kernel.org/lkml/[email protected]/
/Jarkko
On Fri, Jul 24, 2020 at 11:17:11AM +0200, Ingo Molnar wrote:
>
> * Jarkko Sakkinen <[email protected]> wrote:
>
> > --- a/kernel/kprobes.c
> > +++ b/kernel/kprobes.c
> > @@ -564,7 +564,7 @@ static void kprobe_optimizer(struct work_struct *work)
> > cpus_read_lock();
> > mutex_lock(&text_mutex);
> > /* Lock modules while optimizing kprobes */
> > - mutex_lock(&module_mutex);
> > + lock_modules();
> >
> > /*
> > * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
> > @@ -589,7 +589,7 @@ static void kprobe_optimizer(struct work_struct *work)
> > /* Step 4: Free cleaned kprobes after quiesence period */
> > do_free_cleaned_kprobes();
> >
> > - mutex_unlock(&module_mutex);
> > + unlock_modules();
> > mutex_unlock(&text_mutex);
> > cpus_read_unlock();
>
> BTW., it would be nice to expand on the comments above - exactly which
> parts of the modules code is being serialized against and why?
>
> We already hold the text_mutex here, which should protect against most
> kprobes related activities interfering - and it's unclear (to me)
> which part of the modules code is being serialized with here, and the
> 'lock modules while optimizing kprobes' comments is unhelpful. :-)
>
> Thanks,
>
> Ingo
AFAIK, only if you need to call find_module(), you ever need to acquire
this mutex. 99% of time it is internally taken care by kernel/module.c.
I cannot make up any obvious reason to acquire it here.
/Jarkko
On Fri, Jul 24, 2020 at 11:27:46AM +0200, Ingo Molnar wrote:
>
> * Jarkko Sakkinen <[email protected]> wrote:
>
> > Use text_alloc() and text_free() instead of module_alloc() and
> > module_memfree() when an arch provides them.
> >
> > Cc: [email protected]
> > Cc: Andi Kleen <[email protected]>
> > Cc: Masami Hiramatsu <[email protected]>
> > Cc: Peter Zijlstra <[email protected]>
> > Signed-off-by: Jarkko Sakkinen <[email protected]>
> > ---
> > kernel/kprobes.c | 9 +++++++++
> > 1 file changed, 9 insertions(+)
> >
> > diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> > index 4e46d96d4e16..611fcda9f6bf 100644
> > --- a/kernel/kprobes.c
> > +++ b/kernel/kprobes.c
> > @@ -40,6 +40,7 @@
> > #include <asm/cacheflush.h>
> > #include <asm/errno.h>
> > #include <linux/uaccess.h>
> > +#include <linux/vmalloc.h>
> >
> > #define KPROBE_HASH_BITS 6
> > #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
> > @@ -111,12 +112,20 @@ enum kprobe_slot_state {
> >
> > void __weak *alloc_insn_page(void)
> > {
> > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > + return text_alloc(PAGE_SIZE);
> > +#else
> > return module_alloc(PAGE_SIZE);
> > +#endif
> > }
> >
> > void __weak free_insn_page(void *page)
> > {
> > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > + text_free(page);
> > +#else
> > module_memfree(page);
> > +#endif
> > }
>
> I've read the observations in the other threads, but this #ifdef
> jungle is silly, it's a de-facto open coded text_alloc() with a
> module_alloc() fallback...
In the previous version I had:
https://lore.kernel.org/lkml/[email protected]/
and I had just calls to text_alloc() and text_free() in corresponding
snippet to the above.
I got this feedback from Mike:
https://lore.kernel.org/lkml/[email protected]/
I'm not still sure that I fully understand this feedback as I don't see
any inherent and obvious difference to the v4. In that version fallbacks
are to module_alloc() and module_memfree() and text_alloc() and
text_memfree() can be overridden by arch.
> Thanks,
>
> Ingo
/Jarkko
On Fri, Jul 24, 2020 at 03:16:08PM +0300, Ard Biesheuvel wrote:
> On Fri, 24 Jul 2020 at 12:27, Ingo Molnar <[email protected]> wrote:
> >
> >
> > * Jarkko Sakkinen <[email protected]> wrote:
> >
> > > Use text_alloc() and text_free() instead of module_alloc() and
> > > module_memfree() when an arch provides them.
> > >
> > > Cc: [email protected]
> > > Cc: Andi Kleen <[email protected]>
> > > Cc: Masami Hiramatsu <[email protected]>
> > > Cc: Peter Zijlstra <[email protected]>
> > > Signed-off-by: Jarkko Sakkinen <[email protected]>
> > > ---
> > > kernel/kprobes.c | 9 +++++++++
> > > 1 file changed, 9 insertions(+)
> > >
> > > diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> > > index 4e46d96d4e16..611fcda9f6bf 100644
> > > --- a/kernel/kprobes.c
> > > +++ b/kernel/kprobes.c
> > > @@ -40,6 +40,7 @@
> > > #include <asm/cacheflush.h>
> > > #include <asm/errno.h>
> > > #include <linux/uaccess.h>
> > > +#include <linux/vmalloc.h>
> > >
> > > #define KPROBE_HASH_BITS 6
> > > #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
> > > @@ -111,12 +112,20 @@ enum kprobe_slot_state {
> > >
> > > void __weak *alloc_insn_page(void)
> > > {
> > > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > > + return text_alloc(PAGE_SIZE);
> > > +#else
> > > return module_alloc(PAGE_SIZE);
> > > +#endif
> > > }
> > >
> > > void __weak free_insn_page(void *page)
> > > {
> > > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > > + text_free(page);
> > > +#else
> > > module_memfree(page);
> > > +#endif
> > > }
> >
> > I've read the observations in the other threads, but this #ifdef
> > jungle is silly, it's a de-facto open coded text_alloc() with a
> > module_alloc() fallback...
> >
>
> Also, as I attempted to explain before, there is no reason to allocate
> kasan shadow for any of these use cases, so cloning module_alloc() to
> implement text_alloc() is not the correct approach even on x86.
>
> I suppose module_alloc() could be reimplemented in terms of
> text_alloc() in this case, but simply relabelling it like this seems
> inappropriate on all architectures.
I agree with this. Even if there was chance to do a merge of some
kind, it should probably happen over time and accept some redundancy
first.
/Jarkko
* Jarkko Sakkinen <[email protected]> wrote:
> On Fri, Jul 24, 2020 at 11:17:11AM +0200, Ingo Molnar wrote:
> >
> > * Jarkko Sakkinen <[email protected]> wrote:
> >
> > > --- a/kernel/kprobes.c
> > > +++ b/kernel/kprobes.c
> > > @@ -564,7 +564,7 @@ static void kprobe_optimizer(struct work_struct *work)
> > > cpus_read_lock();
> > > mutex_lock(&text_mutex);
> > > /* Lock modules while optimizing kprobes */
> > > - mutex_lock(&module_mutex);
> > > + lock_modules();
> > >
> > > /*
> > > * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
> > > @@ -589,7 +589,7 @@ static void kprobe_optimizer(struct work_struct *work)
> > > /* Step 4: Free cleaned kprobes after quiesence period */
> > > do_free_cleaned_kprobes();
> > >
> > > - mutex_unlock(&module_mutex);
> > > + unlock_modules();
> > > mutex_unlock(&text_mutex);
> > > cpus_read_unlock();
> >
> > BTW., it would be nice to expand on the comments above - exactly which
> > parts of the modules code is being serialized against and why?
> >
> > We already hold the text_mutex here, which should protect against most
> > kprobes related activities interfering - and it's unclear (to me)
> > which part of the modules code is being serialized with here, and the
> > 'lock modules while optimizing kprobes' comments is unhelpful. :-)
> >
> > Thanks,
> >
> > Ingo
>
> AFAIK, only if you need to call find_module(), you ever need to acquire
> this mutex. 99% of time it is internally taken care by kernel/module.c.
>
> I cannot make up any obvious reason to acquire it here.
If it's unnecessary, then it needs to be removed.
If it's necessary, then it needs to be documented better.
Thanks,
Ingo
On Sat, Jul 25, 2020 at 06:16:48AM +0300, Jarkko Sakkinen wrote:
> On Fri, Jul 24, 2020 at 11:27:46AM +0200, Ingo Molnar wrote:
> >
> > * Jarkko Sakkinen <[email protected]> wrote:
> >
> > > Use text_alloc() and text_free() instead of module_alloc() and
> > > module_memfree() when an arch provides them.
> > >
> > > Cc: [email protected]
> > > Cc: Andi Kleen <[email protected]>
> > > Cc: Masami Hiramatsu <[email protected]>
> > > Cc: Peter Zijlstra <[email protected]>
> > > Signed-off-by: Jarkko Sakkinen <[email protected]>
> > > ---
> > > kernel/kprobes.c | 9 +++++++++
> > > 1 file changed, 9 insertions(+)
> > >
> > > diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> > > index 4e46d96d4e16..611fcda9f6bf 100644
> > > --- a/kernel/kprobes.c
> > > +++ b/kernel/kprobes.c
> > > @@ -40,6 +40,7 @@
> > > #include <asm/cacheflush.h>
> > > #include <asm/errno.h>
> > > #include <linux/uaccess.h>
> > > +#include <linux/vmalloc.h>
> > >
> > > #define KPROBE_HASH_BITS 6
> > > #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
> > > @@ -111,12 +112,20 @@ enum kprobe_slot_state {
> > >
> > > void __weak *alloc_insn_page(void)
> > > {
> > > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > > + return text_alloc(PAGE_SIZE);
> > > +#else
> > > return module_alloc(PAGE_SIZE);
> > > +#endif
> > > }
> > >
> > > void __weak free_insn_page(void *page)
> > > {
> > > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > > + text_free(page);
> > > +#else
> > > module_memfree(page);
> > > +#endif
> > > }
> >
> > I've read the observations in the other threads, but this #ifdef
> > jungle is silly, it's a de-facto open coded text_alloc() with a
> > module_alloc() fallback...
>
> In the previous version I had:
>
> https://lore.kernel.org/lkml/[email protected]/
>
> and I had just calls to text_alloc() and text_free() in corresponding
> snippet to the above.
>
> I got this feedback from Mike:
>
> https://lore.kernel.org/lkml/[email protected]/
>
> I'm not still sure that I fully understand this feedback as I don't see
> any inherent and obvious difference to the v4. In that version fallbacks
> are to module_alloc() and module_memfree() and text_alloc() and
> text_memfree() can be overridden by arch.
Let me try to elaborate.
There are several subsystems that need to allocate memory for executable
text. As it happens, they use module_alloc() with some abilities for
architectures to override this behaviour.
For many architectures, it would be enough to rename modules_alloc() to
text_alloc(), make it built-in and this way allow removing dependency on
MODULES.
Yet, some architectures have different restrictions for code allocation
for different subsystems so it would make sense to have more than one
variant of text_alloc() and a single config option ARCH_HAS_TEXT_ALLOC
won't be sufficient.
I liked Mark's suggestion to have text_alloc_<something>() and proposed
a way to introduce text_alloc_kprobes() along with
HAVE_KPROBES_TEXT_ALLOC to enable arch overrides of this function.
The major difference between your v4 and my suggestion is that I'm not
trying to impose a single ARCH_HAS_TEXT_ALLOC as an alternative to
MODULES but rather to use per subsystem config option, e.g.
HAVE_KPROBES_TEXT_ALLOC.
Another thing, which might be worth doing regardless of the outcome of
this discussion is to rename alloc_insn_pages() to text_alloc_kprobes()
because the former is way too generic and does not emphasize that the
instruction page is actually used by kprobes only.
> /Jarkko
>
--
Sincerely yours,
Mike.
On Sun, 26 Jul 2020 at 11:14, Mike Rapoport <[email protected]> wrote:
>
> On Sat, Jul 25, 2020 at 06:16:48AM +0300, Jarkko Sakkinen wrote:
> > On Fri, Jul 24, 2020 at 11:27:46AM +0200, Ingo Molnar wrote:
> > >
> > > * Jarkko Sakkinen <[email protected]> wrote:
> > >
> > > > Use text_alloc() and text_free() instead of module_alloc() and
> > > > module_memfree() when an arch provides them.
> > > >
> > > > Cc: [email protected]
> > > > Cc: Andi Kleen <[email protected]>
> > > > Cc: Masami Hiramatsu <[email protected]>
> > > > Cc: Peter Zijlstra <[email protected]>
> > > > Signed-off-by: Jarkko Sakkinen <[email protected]>
> > > > ---
> > > > kernel/kprobes.c | 9 +++++++++
> > > > 1 file changed, 9 insertions(+)
> > > >
> > > > diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> > > > index 4e46d96d4e16..611fcda9f6bf 100644
> > > > --- a/kernel/kprobes.c
> > > > +++ b/kernel/kprobes.c
> > > > @@ -40,6 +40,7 @@
> > > > #include <asm/cacheflush.h>
> > > > #include <asm/errno.h>
> > > > #include <linux/uaccess.h>
> > > > +#include <linux/vmalloc.h>
> > > >
> > > > #define KPROBE_HASH_BITS 6
> > > > #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
> > > > @@ -111,12 +112,20 @@ enum kprobe_slot_state {
> > > >
> > > > void __weak *alloc_insn_page(void)
> > > > {
> > > > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > > > + return text_alloc(PAGE_SIZE);
> > > > +#else
> > > > return module_alloc(PAGE_SIZE);
> > > > +#endif
> > > > }
> > > >
> > > > void __weak free_insn_page(void *page)
> > > > {
> > > > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > > > + text_free(page);
> > > > +#else
> > > > module_memfree(page);
> > > > +#endif
> > > > }
> > >
> > > I've read the observations in the other threads, but this #ifdef
> > > jungle is silly, it's a de-facto open coded text_alloc() with a
> > > module_alloc() fallback...
> >
> > In the previous version I had:
> >
> > https://lore.kernel.org/lkml/[email protected]/
> >
> > and I had just calls to text_alloc() and text_free() in corresponding
> > snippet to the above.
> >
> > I got this feedback from Mike:
> >
> > https://lore.kernel.org/lkml/[email protected]/
> >
> > I'm not still sure that I fully understand this feedback as I don't see
> > any inherent and obvious difference to the v4. In that version fallbacks
> > are to module_alloc() and module_memfree() and text_alloc() and
> > text_memfree() can be overridden by arch.
>
> Let me try to elaborate.
>
> There are several subsystems that need to allocate memory for executable
> text. As it happens, they use module_alloc() with some abilities for
> architectures to override this behaviour.
>
> For many architectures, it would be enough to rename modules_alloc() to
> text_alloc(), make it built-in and this way allow removing dependency on
> MODULES.
>
> Yet, some architectures have different restrictions for code allocation
> for different subsystems so it would make sense to have more than one
> variant of text_alloc() and a single config option ARCH_HAS_TEXT_ALLOC
> won't be sufficient.
>
> I liked Mark's suggestion to have text_alloc_<something>() and proposed
> a way to introduce text_alloc_kprobes() along with
> HAVE_KPROBES_TEXT_ALLOC to enable arch overrides of this function.
>
> The major difference between your v4 and my suggestion is that I'm not
> trying to impose a single ARCH_HAS_TEXT_ALLOC as an alternative to
> MODULES but rather to use per subsystem config option, e.g.
> HAVE_KPROBES_TEXT_ALLOC.
>
> Another thing, which might be worth doing regardless of the outcome of
> this discussion is to rename alloc_insn_pages() to text_alloc_kprobes()
> because the former is way too generic and does not emphasize that the
> instruction page is actually used by kprobes only.
>
Masami or Peter should correct me if I am wrong, but it seems to me
that the way kprobes uses these pages does not require them to be in
relative branching range of the core kernel on any architecture, given
that they are populated with individual instruction opcodes that are
executed in single step mode, and relative branches are emulated (when
needed)
So for kprobes in particular, we should be able to come up with a
generic sequence that does not involve module_alloc(), and therefore
removes the kprobes dependency on module support entirely (with the
exception of power which maps the vmalloc space nx when module support
is disabled). Renaming alloc_insn_page() to something more descriptive
makes sense imo, but is a separate issue.
On Sat, 25 Jul 2020 12:21:10 +0200
Ingo Molnar <[email protected]> wrote:
>
> * Jarkko Sakkinen <[email protected]> wrote:
>
> > On Fri, Jul 24, 2020 at 11:17:11AM +0200, Ingo Molnar wrote:
> > >
> > > * Jarkko Sakkinen <[email protected]> wrote:
> > >
> > > > --- a/kernel/kprobes.c
> > > > +++ b/kernel/kprobes.c
> > > > @@ -564,7 +564,7 @@ static void kprobe_optimizer(struct work_struct *work)
> > > > cpus_read_lock();
> > > > mutex_lock(&text_mutex);
> > > > /* Lock modules while optimizing kprobes */
> > > > - mutex_lock(&module_mutex);
> > > > + lock_modules();
> > > >
> > > > /*
> > > > * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
> > > > @@ -589,7 +589,7 @@ static void kprobe_optimizer(struct work_struct *work)
> > > > /* Step 4: Free cleaned kprobes after quiesence period */
> > > > do_free_cleaned_kprobes();
> > > >
> > > > - mutex_unlock(&module_mutex);
> > > > + unlock_modules();
> > > > mutex_unlock(&text_mutex);
> > > > cpus_read_unlock();
> > >
> > > BTW., it would be nice to expand on the comments above - exactly which
> > > parts of the modules code is being serialized against and why?
> > >
> > > We already hold the text_mutex here, which should protect against most
> > > kprobes related activities interfering - and it's unclear (to me)
> > > which part of the modules code is being serialized with here, and the
> > > 'lock modules while optimizing kprobes' comments is unhelpful. :-)
> > >
> > > Thanks,
> > >
> > > Ingo
> >
> > AFAIK, only if you need to call find_module(), you ever need to acquire
> > this mutex. 99% of time it is internally taken care by kernel/module.c.
> >
> > I cannot make up any obvious reason to acquire it here.
>
> If it's unnecessary, then it needs to be removed.
>
> If it's necessary, then it needs to be documented better.
Good catch! This is not needed anymore.
It has been introduced to avoid conflict with text modification, at that
point we didn't get text_mutex. But after commit f1c6ece23729 ("kprobes: Fix
potential deadlock in kprobe_optimizer()") moved the text_mutex in the current
position, we don't need it. (and anyway, keeping kprobe_mutex locked means
any module unloading will be stopped inside kprobes_module_callback())
This may help?
From 2355ecd941c3234b12a6de7443592848ed4e2087 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <[email protected]>
Date: Tue, 28 Jul 2020 16:32:34 +0900
Subject: [PATCH] kprobes: Remove unneeded module_mutex lock from the optimizer
Remove unneeded module_mutex locking from the optimizer. Since
we already locks both kprobe_mutex and text_mutex in the optimizer,
text will not be changed and the module unloading will be stopped
inside kprobes_module_callback().
Suggested-by: Ingo Molnar <[email protected]>
Signed-off-by: Masami Hiramatsu <[email protected]>
---
kernel/kprobes.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 4a904cc56d68..d1b02e890793 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -563,8 +563,6 @@ static void kprobe_optimizer(struct work_struct *work)
mutex_lock(&kprobe_mutex);
cpus_read_lock();
mutex_lock(&text_mutex);
- /* Lock modules while optimizing kprobes */
- mutex_lock(&module_mutex);
/*
* Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
@@ -589,7 +587,6 @@ static void kprobe_optimizer(struct work_struct *work)
/* Step 4: Free cleaned kprobes after quiesence period */
do_free_cleaned_kprobes();
- mutex_unlock(&module_mutex);
mutex_unlock(&text_mutex);
cpus_read_unlock();
--
2.25.1
--
Masami Hiramatsu <[email protected]>
On Sun, 26 Jul 2020 19:06:20 +0300
Ard Biesheuvel <[email protected]> wrote:
> On Sun, 26 Jul 2020 at 11:14, Mike Rapoport <[email protected]> wrote:
> >
> > On Sat, Jul 25, 2020 at 06:16:48AM +0300, Jarkko Sakkinen wrote:
> > > On Fri, Jul 24, 2020 at 11:27:46AM +0200, Ingo Molnar wrote:
> > > >
> > > > * Jarkko Sakkinen <[email protected]> wrote:
> > > >
> > > > > Use text_alloc() and text_free() instead of module_alloc() and
> > > > > module_memfree() when an arch provides them.
> > > > >
> > > > > Cc: [email protected]
> > > > > Cc: Andi Kleen <[email protected]>
> > > > > Cc: Masami Hiramatsu <[email protected]>
> > > > > Cc: Peter Zijlstra <[email protected]>
> > > > > Signed-off-by: Jarkko Sakkinen <[email protected]>
> > > > > ---
> > > > > kernel/kprobes.c | 9 +++++++++
> > > > > 1 file changed, 9 insertions(+)
> > > > >
> > > > > diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> > > > > index 4e46d96d4e16..611fcda9f6bf 100644
> > > > > --- a/kernel/kprobes.c
> > > > > +++ b/kernel/kprobes.c
> > > > > @@ -40,6 +40,7 @@
> > > > > #include <asm/cacheflush.h>
> > > > > #include <asm/errno.h>
> > > > > #include <linux/uaccess.h>
> > > > > +#include <linux/vmalloc.h>
> > > > >
> > > > > #define KPROBE_HASH_BITS 6
> > > > > #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
> > > > > @@ -111,12 +112,20 @@ enum kprobe_slot_state {
> > > > >
> > > > > void __weak *alloc_insn_page(void)
> > > > > {
> > > > > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > > > > + return text_alloc(PAGE_SIZE);
> > > > > +#else
> > > > > return module_alloc(PAGE_SIZE);
> > > > > +#endif
> > > > > }
> > > > >
> > > > > void __weak free_insn_page(void *page)
> > > > > {
> > > > > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > > > > + text_free(page);
> > > > > +#else
> > > > > module_memfree(page);
> > > > > +#endif
> > > > > }
> > > >
> > > > I've read the observations in the other threads, but this #ifdef
> > > > jungle is silly, it's a de-facto open coded text_alloc() with a
> > > > module_alloc() fallback...
> > >
> > > In the previous version I had:
> > >
> > > https://lore.kernel.org/lkml/[email protected]/
> > >
> > > and I had just calls to text_alloc() and text_free() in corresponding
> > > snippet to the above.
> > >
> > > I got this feedback from Mike:
> > >
> > > https://lore.kernel.org/lkml/[email protected]/
> > >
> > > I'm not still sure that I fully understand this feedback as I don't see
> > > any inherent and obvious difference to the v4. In that version fallbacks
> > > are to module_alloc() and module_memfree() and text_alloc() and
> > > text_memfree() can be overridden by arch.
> >
> > Let me try to elaborate.
> >
> > There are several subsystems that need to allocate memory for executable
> > text. As it happens, they use module_alloc() with some abilities for
> > architectures to override this behaviour.
> >
> > For many architectures, it would be enough to rename modules_alloc() to
> > text_alloc(), make it built-in and this way allow removing dependency on
> > MODULES.
> >
> > Yet, some architectures have different restrictions for code allocation
> > for different subsystems so it would make sense to have more than one
> > variant of text_alloc() and a single config option ARCH_HAS_TEXT_ALLOC
> > won't be sufficient.
> >
> > I liked Mark's suggestion to have text_alloc_<something>() and proposed
> > a way to introduce text_alloc_kprobes() along with
> > HAVE_KPROBES_TEXT_ALLOC to enable arch overrides of this function.
> >
> > The major difference between your v4 and my suggestion is that I'm not
> > trying to impose a single ARCH_HAS_TEXT_ALLOC as an alternative to
> > MODULES but rather to use per subsystem config option, e.g.
> > HAVE_KPROBES_TEXT_ALLOC.
> >
> > Another thing, which might be worth doing regardless of the outcome of
> > this discussion is to rename alloc_insn_pages() to text_alloc_kprobes()
> > because the former is way too generic and does not emphasize that the
> > instruction page is actually used by kprobes only.
The name of the insn_pages came from the struct kprobe_insn_page, so
if there is a text_alloc_kprobe(), I'm OK to rename it. (anyway, that
is an allocation operator, we don't call it directly.)
> Masami or Peter should correct me if I am wrong, but it seems to me
> that the way kprobes uses these pages does not require them to be in
> relative branching range of the core kernel on any architecture, given
> that they are populated with individual instruction opcodes that are
> executed in single step mode, and relative branches are emulated (when
> needed)
Actually, x86 and arm has the "relative branching range" requirements
for the jump optimized kprobes. For the other architectures, I think
we don't need it. Only executable text buffer is needed.
Thank you,
> So for kprobes in particular, we should be able to come up with a
> generic sequence that does not involve module_alloc(), and therefore
> removes the kprobes dependency on module support entirely (with the
> exception of power which maps the vmalloc space nx when module support
> is disabled). Renaming alloc_insn_page() to something more descriptive
> makes sense imo, but is a separate issue.
--
Masami Hiramatsu <[email protected]>
On Tue, 28 Jul 2020 at 11:17, Masami Hiramatsu <[email protected]> wrote:
>
> On Sun, 26 Jul 2020 19:06:20 +0300
> Ard Biesheuvel <[email protected]> wrote:
>
> > On Sun, 26 Jul 2020 at 11:14, Mike Rapoport <[email protected]> wrote:
> > >
> > > On Sat, Jul 25, 2020 at 06:16:48AM +0300, Jarkko Sakkinen wrote:
> > > > On Fri, Jul 24, 2020 at 11:27:46AM +0200, Ingo Molnar wrote:
> > > > >
> > > > > * Jarkko Sakkinen <[email protected]> wrote:
> > > > >
> > > > > > Use text_alloc() and text_free() instead of module_alloc() and
> > > > > > module_memfree() when an arch provides them.
> > > > > >
> > > > > > Cc: [email protected]
> > > > > > Cc: Andi Kleen <[email protected]>
> > > > > > Cc: Masami Hiramatsu <[email protected]>
> > > > > > Cc: Peter Zijlstra <[email protected]>
> > > > > > Signed-off-by: Jarkko Sakkinen <[email protected]>
> > > > > > ---
> > > > > > kernel/kprobes.c | 9 +++++++++
> > > > > > 1 file changed, 9 insertions(+)
> > > > > >
> > > > > > diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> > > > > > index 4e46d96d4e16..611fcda9f6bf 100644
> > > > > > --- a/kernel/kprobes.c
> > > > > > +++ b/kernel/kprobes.c
> > > > > > @@ -40,6 +40,7 @@
> > > > > > #include <asm/cacheflush.h>
> > > > > > #include <asm/errno.h>
> > > > > > #include <linux/uaccess.h>
> > > > > > +#include <linux/vmalloc.h>
> > > > > >
> > > > > > #define KPROBE_HASH_BITS 6
> > > > > > #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
> > > > > > @@ -111,12 +112,20 @@ enum kprobe_slot_state {
> > > > > >
> > > > > > void __weak *alloc_insn_page(void)
> > > > > > {
> > > > > > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > > > > > + return text_alloc(PAGE_SIZE);
> > > > > > +#else
> > > > > > return module_alloc(PAGE_SIZE);
> > > > > > +#endif
> > > > > > }
> > > > > >
> > > > > > void __weak free_insn_page(void *page)
> > > > > > {
> > > > > > +#ifdef CONFIG_ARCH_HAS_TEXT_ALLOC
> > > > > > + text_free(page);
> > > > > > +#else
> > > > > > module_memfree(page);
> > > > > > +#endif
> > > > > > }
> > > > >
> > > > > I've read the observations in the other threads, but this #ifdef
> > > > > jungle is silly, it's a de-facto open coded text_alloc() with a
> > > > > module_alloc() fallback...
> > > >
> > > > In the previous version I had:
> > > >
> > > > https://lore.kernel.org/lkml/[email protected]/
> > > >
> > > > and I had just calls to text_alloc() and text_free() in corresponding
> > > > snippet to the above.
> > > >
> > > > I got this feedback from Mike:
> > > >
> > > > https://lore.kernel.org/lkml/[email protected]/
> > > >
> > > > I'm not still sure that I fully understand this feedback as I don't see
> > > > any inherent and obvious difference to the v4. In that version fallbacks
> > > > are to module_alloc() and module_memfree() and text_alloc() and
> > > > text_memfree() can be overridden by arch.
> > >
> > > Let me try to elaborate.
> > >
> > > There are several subsystems that need to allocate memory for executable
> > > text. As it happens, they use module_alloc() with some abilities for
> > > architectures to override this behaviour.
> > >
> > > For many architectures, it would be enough to rename modules_alloc() to
> > > text_alloc(), make it built-in and this way allow removing dependency on
> > > MODULES.
> > >
> > > Yet, some architectures have different restrictions for code allocation
> > > for different subsystems so it would make sense to have more than one
> > > variant of text_alloc() and a single config option ARCH_HAS_TEXT_ALLOC
> > > won't be sufficient.
> > >
> > > I liked Mark's suggestion to have text_alloc_<something>() and proposed
> > > a way to introduce text_alloc_kprobes() along with
> > > HAVE_KPROBES_TEXT_ALLOC to enable arch overrides of this function.
> > >
> > > The major difference between your v4 and my suggestion is that I'm not
> > > trying to impose a single ARCH_HAS_TEXT_ALLOC as an alternative to
> > > MODULES but rather to use per subsystem config option, e.g.
> > > HAVE_KPROBES_TEXT_ALLOC.
> > >
> > > Another thing, which might be worth doing regardless of the outcome of
> > > this discussion is to rename alloc_insn_pages() to text_alloc_kprobes()
> > > because the former is way too generic and does not emphasize that the
> > > instruction page is actually used by kprobes only.
>
> The name of the insn_pages came from the struct kprobe_insn_page, so
> if there is a text_alloc_kprobe(), I'm OK to rename it. (anyway, that
> is an allocation operator, we don't call it directly.)
>
> > Masami or Peter should correct me if I am wrong, but it seems to me
> > that the way kprobes uses these pages does not require them to be in
> > relative branching range of the core kernel on any architecture, given
> > that they are populated with individual instruction opcodes that are
> > executed in single step mode, and relative branches are emulated (when
> > needed)
>
> Actually, x86 and arm has the "relative branching range" requirements
> for the jump optimized kprobes. For the other architectures, I think
> we don't need it. Only executable text buffer is needed.
>
Thanks for the explanation. Today, arm64 uses the definition below.
void *alloc_insn_page(void)
{
return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
NUMA_NO_NODE, __builtin_return_address(0));
}
Do you think we could use that as the generic implementation if we use
MODULES_START/_END as the allocation window?
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 112a0e4171e111e963aada3fe790c71accf4d705
Gitweb: https://git.kernel.org/tip/112a0e4171e111e963aada3fe790c71accf4d705
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Tue, 28 Jul 2020 16:34:00 +09:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Tue, 28 Jul 2020 13:19:05 +02:00
kprobes: Remove unnecessary module_mutex locking from kprobe_optimizer()
Since we already lock both kprobe_mutex and text_mutex in the optimizer,
text will not be changed and the module unloading will be stopped
inside kprobes_module_callback().
The mutex_lock() has originally been introduced to avoid conflict with text modification,
at that point we didn't hold text_mutex.
But after:
f1c6ece23729 ("kprobes: Fix potential deadlock in kprobe_optimizer()")
We started holding the text_mutex and don't need the modules mutex anyway.
So remove the module_mutex locking.
[ mingo: Amended the changelog. ]
Suggested-by: Ingo Molnar <[email protected]>
Signed-off-by: Masami Hiramatsu <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Cc: Jarkko Sakkinen <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/kprobes.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 146c648..e87679a 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -598,8 +598,6 @@ static void kprobe_optimizer(struct work_struct *work)
mutex_lock(&kprobe_mutex);
cpus_read_lock();
mutex_lock(&text_mutex);
- /* Lock modules while optimizing kprobes */
- mutex_lock(&module_mutex);
/*
* Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
@@ -624,7 +622,6 @@ static void kprobe_optimizer(struct work_struct *work)
/* Step 4: Free cleaned kprobes after quiesence period */
do_free_cleaned_kprobes();
- mutex_unlock(&module_mutex);
mutex_unlock(&text_mutex);
cpus_read_unlock();
On Tue, 28 Jul 2020 13:56:43 +0300
Ard Biesheuvel <[email protected]> wrote:
> On Tue, 28 Jul 2020 at 11:17, Masami Hiramatsu <[email protected]> wrote:
> > > Masami or Peter should correct me if I am wrong, but it seems to me
> > > that the way kprobes uses these pages does not require them to be in
> > > relative branching range of the core kernel on any architecture, given
> > > that they are populated with individual instruction opcodes that are
> > > executed in single step mode, and relative branches are emulated (when
> > > needed)
> >
> > Actually, x86 and arm has the "relative branching range" requirements
> > for the jump optimized kprobes. For the other architectures, I think
> > we don't need it. Only executable text buffer is needed.
> >
>
> Thanks for the explanation. Today, arm64 uses the definition below.
>
> void *alloc_insn_page(void)
> {
> return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
> GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
> NUMA_NO_NODE, __builtin_return_address(0));
> }
>
> Do you think we could use that as the generic implementation if we use
> MODULES_START/_END as the allocation window?
Yes, but for the generic implementation, we don't need to consider the
relative branching range since we can override it for x86 and arm.
(and that will be almost same as module_alloc() default code)
BTW, is PAGE_KERNEL_ROX flag available generically?
Thank you,
--
Masami Hiramatsu <[email protected]>
On Tue, 28 Jul 2020 at 16:35, Masami Hiramatsu <[email protected]> wrote:
>
> On Tue, 28 Jul 2020 13:56:43 +0300
> Ard Biesheuvel <[email protected]> wrote:
>
> > On Tue, 28 Jul 2020 at 11:17, Masami Hiramatsu <[email protected]> wrote:
> > > > Masami or Peter should correct me if I am wrong, but it seems to me
> > > > that the way kprobes uses these pages does not require them to be in
> > > > relative branching range of the core kernel on any architecture, given
> > > > that they are populated with individual instruction opcodes that are
> > > > executed in single step mode, and relative branches are emulated (when
> > > > needed)
> > >
> > > Actually, x86 and arm has the "relative branching range" requirements
> > > for the jump optimized kprobes. For the other architectures, I think
> > > we don't need it. Only executable text buffer is needed.
> > >
> >
> > Thanks for the explanation. Today, arm64 uses the definition below.
> >
> > void *alloc_insn_page(void)
> > {
> > return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
> > GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
> > NUMA_NO_NODE, __builtin_return_address(0));
> > }
> >
> > Do you think we could use that as the generic implementation if we use
> > MODULES_START/_END as the allocation window?
>
> Yes, but for the generic implementation, we don't need to consider the
> relative branching range since we can override it for x86 and arm.
> (and that will be almost same as module_alloc() default code)
Indeed. So having kprobes specific macros that default to
VMALLOC_START/END but can be overridden would be sufficient.
> BTW, is PAGE_KERNEL_ROX flag available generically?
>
Turns out that it is not :-(
> Thank you,
> --
> Masami Hiramatsu <[email protected]>
On Tue, 28 Jul 2020 20:51:08 +0300
Ard Biesheuvel <[email protected]> wrote:
> On Tue, 28 Jul 2020 at 16:35, Masami Hiramatsu <[email protected]> wrote:
> >
> > On Tue, 28 Jul 2020 13:56:43 +0300
> > Ard Biesheuvel <[email protected]> wrote:
> >
> > > On Tue, 28 Jul 2020 at 11:17, Masami Hiramatsu <[email protected]> wrote:
> > > > > Masami or Peter should correct me if I am wrong, but it seems to me
> > > > > that the way kprobes uses these pages does not require them to be in
> > > > > relative branching range of the core kernel on any architecture, given
> > > > > that they are populated with individual instruction opcodes that are
> > > > > executed in single step mode, and relative branches are emulated (when
> > > > > needed)
> > > >
> > > > Actually, x86 and arm has the "relative branching range" requirements
> > > > for the jump optimized kprobes. For the other architectures, I think
> > > > we don't need it. Only executable text buffer is needed.
> > > >
> > >
> > > Thanks for the explanation. Today, arm64 uses the definition below.
> > >
> > > void *alloc_insn_page(void)
> > > {
> > > return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
> > > GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
> > > NUMA_NO_NODE, __builtin_return_address(0));
> > > }
> > >
> > > Do you think we could use that as the generic implementation if we use
> > > MODULES_START/_END as the allocation window?
> >
> > Yes, but for the generic implementation, we don't need to consider the
> > relative branching range since we can override it for x86 and arm.
> > (and that will be almost same as module_alloc() default code)
>
> Indeed. So having kprobes specific macros that default to
> VMALLOC_START/END but can be overridden would be sufficient.
>
> > BTW, is PAGE_KERNEL_ROX flag available generically?
> >
>
> Turns out that it is not :-(
Hmm, in that case, we need to use PAGE_KERNEL_EXEC.
In the result, may it be similar to this? :)
void * __weak module_alloc(unsigned long size)
{
return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
NUMA_NO_NODE, __builtin_return_address(0));
}
The major difference between module_alloc() and kprobe's alloc_page_insn()
is the alloc_page_insn() makes the page ROX after allocating the pages *ONLY*
on x86 and arm64.
$ git grep -w alloc_insn_page -- arch
arch/arm64/kernel/probes/kprobes.c:void *alloc_insn_page(void)
arch/x86/kernel/kprobes/core.c:void *alloc_insn_page(void)
However since the module_alloc() owns its arch-dependent implementations
most of major architectures, if we implement independent text_alloc_kprobe(),
we need to make deadcopies of module_alloc() for each architecture.
$ git grep 'module_alloc(unsigned' arch/
arch/arm/kernel/module.c:void *module_alloc(unsigned long size)
arch/arm64/kernel/module.c:void *module_alloc(unsigned long size)
arch/mips/kernel/module.c:void *module_alloc(unsigned long size)
arch/nds32/kernel/module.c:void *module_alloc(unsigned long size)
arch/nios2/kernel/module.c:void *module_alloc(unsigned long size)
arch/parisc/kernel/module.c:void *module_alloc(unsigned long size)
arch/riscv/kernel/module.c:void *module_alloc(unsigned long size)
arch/s390/kernel/module.c:void *module_alloc(unsigned long size)
arch/sparc/kernel/module.c:void *module_alloc(unsigned long size)
arch/unicore32/kernel/module.c:void *module_alloc(unsigned long size)
arch/x86/kernel/module.c:void *module_alloc(unsigned long size)
It seems that some constrains for module_alloc() exists for above
architectures.
Anyway, for kprobe's text_alloc() requirements are
- It must be executable for the arch which uses a single-step out-of-line.
(and need to be registered to KASAN?)
- It must be ROX if implemented (currently only for x86 and arm64)
- It must be in the range of relative branching only for x86 and arm.
Thank you,
--
Masami Hiramatsu <[email protected]>
On Wed, 29 Jul 2020 at 04:51, Masami Hiramatsu <[email protected]> wrote:
>
> On Tue, 28 Jul 2020 20:51:08 +0300
> Ard Biesheuvel <[email protected]> wrote:
>
> > On Tue, 28 Jul 2020 at 16:35, Masami Hiramatsu <[email protected]> wrote:
> > >
> > > On Tue, 28 Jul 2020 13:56:43 +0300
> > > Ard Biesheuvel <[email protected]> wrote:
> > >
> > > > On Tue, 28 Jul 2020 at 11:17, Masami Hiramatsu <[email protected]> wrote:
> > > > > > Masami or Peter should correct me if I am wrong, but it seems to me
> > > > > > that the way kprobes uses these pages does not require them to be in
> > > > > > relative branching range of the core kernel on any architecture, given
> > > > > > that they are populated with individual instruction opcodes that are
> > > > > > executed in single step mode, and relative branches are emulated (when
> > > > > > needed)
> > > > >
> > > > > Actually, x86 and arm has the "relative branching range" requirements
> > > > > for the jump optimized kprobes. For the other architectures, I think
> > > > > we don't need it. Only executable text buffer is needed.
> > > > >
> > > >
> > > > Thanks for the explanation. Today, arm64 uses the definition below.
> > > >
> > > > void *alloc_insn_page(void)
> > > > {
> > > > return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
> > > > GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
> > > > NUMA_NO_NODE, __builtin_return_address(0));
> > > > }
> > > >
> > > > Do you think we could use that as the generic implementation if we use
> > > > MODULES_START/_END as the allocation window?
> > >
> > > Yes, but for the generic implementation, we don't need to consider the
> > > relative branching range since we can override it for x86 and arm.
> > > (and that will be almost same as module_alloc() default code)
> >
> > Indeed. So having kprobes specific macros that default to
> > VMALLOC_START/END but can be overridden would be sufficient.
> >
> > > BTW, is PAGE_KERNEL_ROX flag available generically?
> > >
> >
> > Turns out that it is not :-(
>
> Hmm, in that case, we need to use PAGE_KERNEL_EXEC.
>
> In the result, may it be similar to this? :)
>
> void * __weak module_alloc(unsigned long size)
> {
> return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
> GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
> NUMA_NO_NODE, __builtin_return_address(0));
> }
>
> The major difference between module_alloc() and kprobe's alloc_page_insn()
> is the alloc_page_insn() makes the page ROX after allocating the pages *ONLY*
> on x86 and arm64.
>
Right.
> $ git grep -w alloc_insn_page -- arch
> arch/arm64/kernel/probes/kprobes.c:void *alloc_insn_page(void)
> arch/x86/kernel/kprobes/core.c:void *alloc_insn_page(void)
>
> However since the module_alloc() owns its arch-dependent implementations
> most of major architectures, if we implement independent text_alloc_kprobe(),
> we need to make deadcopies of module_alloc() for each architecture.
>
No, that is what we are trying to avoid.
> $ git grep 'module_alloc(unsigned' arch/
> arch/arm/kernel/module.c:void *module_alloc(unsigned long size)
> arch/arm64/kernel/module.c:void *module_alloc(unsigned long size)
> arch/mips/kernel/module.c:void *module_alloc(unsigned long size)
> arch/nds32/kernel/module.c:void *module_alloc(unsigned long size)
> arch/nios2/kernel/module.c:void *module_alloc(unsigned long size)
> arch/parisc/kernel/module.c:void *module_alloc(unsigned long size)
> arch/riscv/kernel/module.c:void *module_alloc(unsigned long size)
> arch/s390/kernel/module.c:void *module_alloc(unsigned long size)
> arch/sparc/kernel/module.c:void *module_alloc(unsigned long size)
> arch/unicore32/kernel/module.c:void *module_alloc(unsigned long size)
> arch/x86/kernel/module.c:void *module_alloc(unsigned long size)
>
> It seems that some constrains for module_alloc() exists for above
> architectures.
>
> Anyway, for kprobe's text_alloc() requirements are
> - It must be executable for the arch which uses a single-step out-of-line.
> (and need to be registered to KASAN?)
No, kasan shadow is not needed here.
> - It must be ROX if implemented (currently only for x86 and arm64)
x86 does not actually define thr macro, but the result is the same.
> - It must be in the range of relative branching only for x86 and arm.
>
So in summary, the generic module_alloc() above can be reused for
kprobes on all arches except x86 and arm64, right? Then we can remove
the call to it, and drop the modules dependency.
On Wed, 29 Jul 2020 09:13:21 +0300
Ard Biesheuvel <[email protected]> wrote:
> On Wed, 29 Jul 2020 at 04:51, Masami Hiramatsu <[email protected]> wrote:
> >
> > On Tue, 28 Jul 2020 20:51:08 +0300
> > Ard Biesheuvel <[email protected]> wrote:
> >
> > > On Tue, 28 Jul 2020 at 16:35, Masami Hiramatsu <[email protected]> wrote:
> > > >
> > > > On Tue, 28 Jul 2020 13:56:43 +0300
> > > > Ard Biesheuvel <[email protected]> wrote:
> > > >
> > > > > On Tue, 28 Jul 2020 at 11:17, Masami Hiramatsu <[email protected]> wrote:
> > > > > > > Masami or Peter should correct me if I am wrong, but it seems to me
> > > > > > > that the way kprobes uses these pages does not require them to be in
> > > > > > > relative branching range of the core kernel on any architecture, given
> > > > > > > that they are populated with individual instruction opcodes that are
> > > > > > > executed in single step mode, and relative branches are emulated (when
> > > > > > > needed)
> > > > > >
> > > > > > Actually, x86 and arm has the "relative branching range" requirements
> > > > > > for the jump optimized kprobes. For the other architectures, I think
> > > > > > we don't need it. Only executable text buffer is needed.
> > > > > >
> > > > >
> > > > > Thanks for the explanation. Today, arm64 uses the definition below.
> > > > >
> > > > > void *alloc_insn_page(void)
> > > > > {
> > > > > return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
> > > > > GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
> > > > > NUMA_NO_NODE, __builtin_return_address(0));
> > > > > }
> > > > >
> > > > > Do you think we could use that as the generic implementation if we use
> > > > > MODULES_START/_END as the allocation window?
> > > >
> > > > Yes, but for the generic implementation, we don't need to consider the
> > > > relative branching range since we can override it for x86 and arm.
> > > > (and that will be almost same as module_alloc() default code)
> > >
> > > Indeed. So having kprobes specific macros that default to
> > > VMALLOC_START/END but can be overridden would be sufficient.
> > >
> > > > BTW, is PAGE_KERNEL_ROX flag available generically?
> > > >
> > >
> > > Turns out that it is not :-(
> >
> > Hmm, in that case, we need to use PAGE_KERNEL_EXEC.
> >
> > In the result, may it be similar to this? :)
> >
> > void * __weak module_alloc(unsigned long size)
> > {
> > return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
> > GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
> > NUMA_NO_NODE, __builtin_return_address(0));
> > }
> >
> > The major difference between module_alloc() and kprobe's alloc_page_insn()
> > is the alloc_page_insn() makes the page ROX after allocating the pages *ONLY*
> > on x86 and arm64.
> >
>
> Right.
>
> > $ git grep -w alloc_insn_page -- arch
> > arch/arm64/kernel/probes/kprobes.c:void *alloc_insn_page(void)
> > arch/x86/kernel/kprobes/core.c:void *alloc_insn_page(void)
> >
> > However since the module_alloc() owns its arch-dependent implementations
> > most of major architectures, if we implement independent text_alloc_kprobe(),
> > we need to make deadcopies of module_alloc() for each architecture.
> >
>
> No, that is what we are trying to avoid.
>
> > $ git grep 'module_alloc(unsigned' arch/
> > arch/arm/kernel/module.c:void *module_alloc(unsigned long size)
> > arch/arm64/kernel/module.c:void *module_alloc(unsigned long size)
> > arch/mips/kernel/module.c:void *module_alloc(unsigned long size)
> > arch/nds32/kernel/module.c:void *module_alloc(unsigned long size)
> > arch/nios2/kernel/module.c:void *module_alloc(unsigned long size)
> > arch/parisc/kernel/module.c:void *module_alloc(unsigned long size)
> > arch/riscv/kernel/module.c:void *module_alloc(unsigned long size)
> > arch/s390/kernel/module.c:void *module_alloc(unsigned long size)
> > arch/sparc/kernel/module.c:void *module_alloc(unsigned long size)
> > arch/unicore32/kernel/module.c:void *module_alloc(unsigned long size)
> > arch/x86/kernel/module.c:void *module_alloc(unsigned long size)
> >
> > It seems that some constrains for module_alloc() exists for above
> > architectures.
> >
> > Anyway, for kprobe's text_alloc() requirements are
> > - It must be executable for the arch which uses a single-step out-of-line.
> > (and need to be registered to KASAN?)
>
> No, kasan shadow is not needed here.
OK, I just saw the KASAN shadow was generated (kasan_module_alloc was
called) in module_alloc() on x86.
So current x86 alloc_insn_page() has it.
> > - It must be ROX if implemented (currently only for x86 and arm64)
>
> x86 does not actually define thr macro, but the result is the same.
Yes, alloc_insn_page() for x86 is adding the RO and X flag on the page.
> > - It must be in the range of relative branching only for x86 and arm.
> >
>
> So in summary, the generic module_alloc() above can be reused for
> kprobes on all arches except x86 and arm64, right? Then we can remove
> the call to it, and drop the modules dependency.
Yes, that's correct. If there is a generic module_alloc(), kprobes is
happy to use it :)
Thank you,
--
Masami Hiramatsu <[email protected]>
On Tue, Jul 28, 2020 at 04:34:00PM +0900, Masami Hiramatsu wrote:
> On Sat, 25 Jul 2020 12:21:10 +0200
> Ingo Molnar <[email protected]> wrote:
>
> >
> > * Jarkko Sakkinen <[email protected]> wrote:
> >
> > > On Fri, Jul 24, 2020 at 11:17:11AM +0200, Ingo Molnar wrote:
> > > >
> > > > * Jarkko Sakkinen <[email protected]> wrote:
> > > >
> > > > > --- a/kernel/kprobes.c
> > > > > +++ b/kernel/kprobes.c
> > > > > @@ -564,7 +564,7 @@ static void kprobe_optimizer(struct work_struct *work)
> > > > > cpus_read_lock();
> > > > > mutex_lock(&text_mutex);
> > > > > /* Lock modules while optimizing kprobes */
> > > > > - mutex_lock(&module_mutex);
> > > > > + lock_modules();
> > > > >
> > > > > /*
> > > > > * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
> > > > > @@ -589,7 +589,7 @@ static void kprobe_optimizer(struct work_struct *work)
> > > > > /* Step 4: Free cleaned kprobes after quiesence period */
> > > > > do_free_cleaned_kprobes();
> > > > >
> > > > > - mutex_unlock(&module_mutex);
> > > > > + unlock_modules();
> > > > > mutex_unlock(&text_mutex);
> > > > > cpus_read_unlock();
> > > >
> > > > BTW., it would be nice to expand on the comments above - exactly which
> > > > parts of the modules code is being serialized against and why?
> > > >
> > > > We already hold the text_mutex here, which should protect against most
> > > > kprobes related activities interfering - and it's unclear (to me)
> > > > which part of the modules code is being serialized with here, and the
> > > > 'lock modules while optimizing kprobes' comments is unhelpful. :-)
> > > >
> > > > Thanks,
> > > >
> > > > Ingo
> > >
> > > AFAIK, only if you need to call find_module(), you ever need to acquire
> > > this mutex. 99% of time it is internally taken care by kernel/module.c.
> > >
> > > I cannot make up any obvious reason to acquire it here.
> >
> > If it's unnecessary, then it needs to be removed.
> >
> > If it's necessary, then it needs to be documented better.
>
> Good catch! This is not needed anymore.
> It has been introduced to avoid conflict with text modification, at that
> point we didn't get text_mutex. But after commit f1c6ece23729 ("kprobes: Fix
> potential deadlock in kprobe_optimizer()") moved the text_mutex in the current
> position, we don't need it. (and anyway, keeping kprobe_mutex locked means
> any module unloading will be stopped inside kprobes_module_callback())
>
> This may help?
Hey, thanks a lot. This will help to clean my patch set. I'll send a
follow up version as soon I'm on track with my work. I have to recall
my set of changes and backtrack some of the discussion.
I was two weeks in vacation and last week had bunch of network
connectivity issues last week. Anyway, enough time for details to fade
away :-)
/Jarkko
>
> From 2355ecd941c3234b12a6de7443592848ed4e2087 Mon Sep 17 00:00:00 2001
> From: Masami Hiramatsu <[email protected]>
> Date: Tue, 28 Jul 2020 16:32:34 +0900
> Subject: [PATCH] kprobes: Remove unneeded module_mutex lock from the optimizer
>
> Remove unneeded module_mutex locking from the optimizer. Since
> we already locks both kprobe_mutex and text_mutex in the optimizer,
> text will not be changed and the module unloading will be stopped
> inside kprobes_module_callback().
>
> Suggested-by: Ingo Molnar <[email protected]>
> Signed-off-by: Masami Hiramatsu <[email protected]>
> ---
> kernel/kprobes.c | 3 ---
> 1 file changed, 3 deletions(-)
>
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 4a904cc56d68..d1b02e890793 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -563,8 +563,6 @@ static void kprobe_optimizer(struct work_struct *work)
> mutex_lock(&kprobe_mutex);
> cpus_read_lock();
> mutex_lock(&text_mutex);
> - /* Lock modules while optimizing kprobes */
> - mutex_lock(&module_mutex);
>
> /*
> * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
> @@ -589,7 +587,6 @@ static void kprobe_optimizer(struct work_struct *work)
> /* Step 4: Free cleaned kprobes after quiesence period */
> do_free_cleaned_kprobes();
>
> - mutex_unlock(&module_mutex);
> mutex_unlock(&text_mutex);
> cpus_read_unlock();
>
> --
> 2.25.1
> --
> Masami Hiramatsu <[email protected]>
On Sun, Jul 26, 2020 at 11:14:08AM +0300, Mike Rapoport wrote:
> On Sat, Jul 25, 2020 at 06:16:48AM +0300, Jarkko Sakkinen wrote:
> > > I've read the observations in the other threads, but this #ifdef
> > > jungle is silly, it's a de-facto open coded text_alloc() with a
> > > module_alloc() fallback...
> >
> > In the previous version I had:
> >
> > https://lore.kernel.org/lkml/[email protected]/
> >
> > and I had just calls to text_alloc() and text_free() in corresponding
> > snippet to the above.
> >
> > I got this feedback from Mike:
> >
> > https://lore.kernel.org/lkml/[email protected]/
> >
> > I'm not still sure that I fully understand this feedback as I don't see
> > any inherent and obvious difference to the v4. In that version fallbacks
> > are to module_alloc() and module_memfree() and text_alloc() and
> > text_memfree() can be overridden by arch.
>
> Let me try to elaborate.
>
> There are several subsystems that need to allocate memory for executable
> text. As it happens, they use module_alloc() with some abilities for
> architectures to override this behaviour.
>
> For many architectures, it would be enough to rename modules_alloc() to
> text_alloc(), make it built-in and this way allow removing dependency on
> MODULES.
>
> Yet, some architectures have different restrictions for code allocation
> for different subsystems so it would make sense to have more than one
> variant of text_alloc() and a single config option ARCH_HAS_TEXT_ALLOC
> won't be sufficient.
>
> I liked Mark's suggestion to have text_alloc_<something>() and proposed
> a way to introduce text_alloc_kprobes() along with
> HAVE_KPROBES_TEXT_ALLOC to enable arch overrides of this function.
>
> The major difference between your v4 and my suggestion is that I'm not
> trying to impose a single ARCH_HAS_TEXT_ALLOC as an alternative to
> MODULES but rather to use per subsystem config option, e.g.
> HAVE_KPROBES_TEXT_ALLOC.
>
> Another thing, which might be worth doing regardless of the outcome of
> this discussion is to rename alloc_insn_pages() to text_alloc_kprobes()
> because the former is way too generic and does not emphasize that the
> instruction page is actually used by kprobes only.
What if we in kernel/kprobes.c just:
#ifndef CONFIG_ARCH_HAS_TEXT_ALLOC
void __weak *alloc_insn_page(void)
{
return module_alloc(PAGE_SIZE);
}
void __weak free_insn_page(void *page)
{
module_memfree(page);
}
#endif
In Kconfig (as in v5):
config KPROBES
bool "Kprobes"
depends on MODULES || ARCH_HAS_TEXT_ALLOC
I checked architectures that override alloc_insn_page(). With the
exception of x86, they do not call module_alloc().
If no rename was done, then with this approach a more consistent.
config flag name would be CONFIG_ARCH_HAS_ALLOC_INSN_PAGE.
I'd call the function just as kprobes_alloc_page(). Then the
config flag would become CONFIG_HAS_KPROBES_ALLOC_PAGE.
> --
> Sincerely yours,
> Mike.
Thanks for the feedback!
/Jarkko
On Tue, Aug 18, 2020 at 08:30:29AM +0300, Jarkko Sakkinen wrote:
> On Sun, Jul 26, 2020 at 11:14:08AM +0300, Mike Rapoport wrote:
> > >
> > > I'm not still sure that I fully understand this feedback as I don't see
> > > any inherent and obvious difference to the v4. In that version fallbacks
> > > are to module_alloc() and module_memfree() and text_alloc() and
> > > text_memfree() can be overridden by arch.
> >
> > The major difference between your v4 and my suggestion is that I'm not
> > trying to impose a single ARCH_HAS_TEXT_ALLOC as an alternative to
> > MODULES but rather to use per subsystem config option, e.g.
> > HAVE_KPROBES_TEXT_ALLOC.
> >
> > Another thing, which might be worth doing regardless of the outcome of
> > this discussion is to rename alloc_insn_pages() to text_alloc_kprobes()
> > because the former is way too generic and does not emphasize that the
> > instruction page is actually used by kprobes only.
>
> What if we in kernel/kprobes.c just:
>
> #ifndef CONFIG_ARCH_HAS_TEXT_ALLOC
I don't think that CONFIG_ARCH_HAS_TEXT_ALLOC will work for all
architectures.
If an architecture has different restrictions for allocation of text
for, say, modules, kprobes, bfp, it won't be able to use a single
ARCH_HAS_TEXT_ALLOC. Which means that this architecture is stuck with
dependency of kprobes on MODULES until the next rework.
> void __weak *alloc_insn_page(void)
> {
> return module_alloc(PAGE_SIZE);
> }
>
> void __weak free_insn_page(void *page)
> {
> module_memfree(page);
> }
> #endif
>
> In Kconfig (as in v5):
>
> config KPROBES
> bool "Kprobes"
> depends on MODULES || ARCH_HAS_TEXT_ALLOC
>
> I checked architectures that override alloc_insn_page(). With the
> exception of x86, they do not call module_alloc().
>
> If no rename was done, then with this approach a more consistent.
> config flag name would be CONFIG_ARCH_HAS_ALLOC_INSN_PAGE.
>
> I'd call the function just as kprobes_alloc_page(). Then the
> config flag would become CONFIG_HAS_KPROBES_ALLOC_PAGE.
>
> > --
> > Sincerely yours,
> > Mike.
>
> Thanks for the feedback!
>
> /Jarkko
--
Sincerely yours,
Mike.
On Tue, Aug 18, 2020 at 02:51:41PM +0300, Mike Rapoport wrote:
> On Tue, Aug 18, 2020 at 08:30:29AM +0300, Jarkko Sakkinen wrote:
> > On Sun, Jul 26, 2020 at 11:14:08AM +0300, Mike Rapoport wrote:
> > > >
> > > > I'm not still sure that I fully understand this feedback as I don't see
> > > > any inherent and obvious difference to the v4. In that version fallbacks
> > > > are to module_alloc() and module_memfree() and text_alloc() and
> > > > text_memfree() can be overridden by arch.
> > >
> > > The major difference between your v4 and my suggestion is that I'm not
> > > trying to impose a single ARCH_HAS_TEXT_ALLOC as an alternative to
> > > MODULES but rather to use per subsystem config option, e.g.
> > > HAVE_KPROBES_TEXT_ALLOC.
> > >
> > > Another thing, which might be worth doing regardless of the outcome of
> > > this discussion is to rename alloc_insn_pages() to text_alloc_kprobes()
> > > because the former is way too generic and does not emphasize that the
> > > instruction page is actually used by kprobes only.
> >
> > What if we in kernel/kprobes.c just:
> >
> > #ifndef CONFIG_ARCH_HAS_TEXT_ALLOC
>
> I don't think that CONFIG_ARCH_HAS_TEXT_ALLOC will work for all
> architectures.
>
> If an architecture has different restrictions for allocation of text
> for, say, modules, kprobes, bfp, it won't be able to use a single
> ARCH_HAS_TEXT_ALLOC. Which means that this architecture is stuck with
> dependency of kprobes on MODULES until the next rework.
I was thinking to name it as CONFIG_HAS_KPROBES_ALLOC_PAGE, alogn the
lines described below, so it is merely a glitch in my example.
>
> > void __weak *alloc_insn_page(void)
> > {
> > return module_alloc(PAGE_SIZE);
> > }
> >
> > void __weak free_insn_page(void *page)
> > {
> > module_memfree(page);
> > }
> > #endif
> >
> > In Kconfig (as in v5):
> >
> > config KPROBES
> > bool "Kprobes"
> > depends on MODULES || ARCH_HAS_TEXT_ALLOC
> >
> > I checked architectures that override alloc_insn_page(). With the
> > exception of x86, they do not call module_alloc().
> >
> > If no rename was done, then with this approach a more consistent.
> > config flag name would be CONFIG_ARCH_HAS_ALLOC_INSN_PAGE.
> >
> > I'd call the function just as kprobes_alloc_page(). Then the
> > config flag would become CONFIG_HAS_KPROBES_ALLOC_PAGE.
> >
> > > --
> > > Sincerely yours,
> > > Mike.
> >
> > Thanks for the feedback!
> >
> > /Jarkko
>
> --
> Sincerely yours,
> Mike.
BR,
/Jarkko
On Tue, Aug 18, 2020 at 07:30:33PM +0300, Jarkko Sakkinen wrote:
> On Tue, Aug 18, 2020 at 02:51:41PM +0300, Mike Rapoport wrote:
> > On Tue, Aug 18, 2020 at 08:30:29AM +0300, Jarkko Sakkinen wrote:
> > > On Sun, Jul 26, 2020 at 11:14:08AM +0300, Mike Rapoport wrote:
> > > > >
> > > > > I'm not still sure that I fully understand this feedback as I don't see
> > > > > any inherent and obvious difference to the v4. In that version fallbacks
> > > > > are to module_alloc() and module_memfree() and text_alloc() and
> > > > > text_memfree() can be overridden by arch.
> > > >
> > > > The major difference between your v4 and my suggestion is that I'm not
> > > > trying to impose a single ARCH_HAS_TEXT_ALLOC as an alternative to
> > > > MODULES but rather to use per subsystem config option, e.g.
> > > > HAVE_KPROBES_TEXT_ALLOC.
> > > >
> > > > Another thing, which might be worth doing regardless of the outcome of
> > > > this discussion is to rename alloc_insn_pages() to text_alloc_kprobes()
> > > > because the former is way too generic and does not emphasize that the
> > > > instruction page is actually used by kprobes only.
> > >
> > > What if we in kernel/kprobes.c just:
> > >
> > > #ifndef CONFIG_ARCH_HAS_TEXT_ALLOC
> >
> > I don't think that CONFIG_ARCH_HAS_TEXT_ALLOC will work for all
> > architectures.
> >
> > If an architecture has different restrictions for allocation of text
> > for, say, modules, kprobes, bfp, it won't be able to use a single
> > ARCH_HAS_TEXT_ALLOC. Which means that this architecture is stuck with
> > dependency of kprobes on MODULES until the next rework.
>
> I was thinking to name it as CONFIG_HAS_KPROBES_ALLOC_PAGE, alogn the
> lines described below, so it is merely a glitch in my example.
IMHO, it would be better to emphasize that this is text page. I liked
Mark's idea [1] to have text_alloc_<subsys>() naming for the allocation
functions. The Kconfig options then would become
HAVE_TEXT_ALLOC_<SUBSYS>.
[1] https://lore.kernel.org/linux-riscv/[email protected]/
> > > void __weak *alloc_insn_page(void)
> > > {
> > > return module_alloc(PAGE_SIZE);
> > > }
> > >
> > > void __weak free_insn_page(void *page)
> > > {
> > > module_memfree(page);
> > > }
> > > #endif
> > >
> > > In Kconfig (as in v5):
> > >
> > > config KPROBES
> > > bool "Kprobes"
> > > depends on MODULES || ARCH_HAS_TEXT_ALLOC
> > >
> > > I checked architectures that override alloc_insn_page(). With the
> > > exception of x86, they do not call module_alloc().
> > >
> > > If no rename was done, then with this approach a more consistent.
> > > config flag name would be CONFIG_ARCH_HAS_ALLOC_INSN_PAGE.
> > >
> > > I'd call the function just as kprobes_alloc_page(). Then the
> > > config flag would become CONFIG_HAS_KPROBES_ALLOC_PAGE.
> > >
> > > > --
> > > > Sincerely yours,
> > > > Mike.
> > >
> > > Thanks for the feedback!
> > >
> > > /Jarkko
> >
>
> > --
> > Sincerely yours,
> > Mike.
>
> BR,
> /Jarkko
--
Sincerely yours,
Mike.
On Wed, Aug 19, 2020 at 09:47:18AM +0300, Mike Rapoport wrote:
> On Tue, Aug 18, 2020 at 07:30:33PM +0300, Jarkko Sakkinen wrote:
> > On Tue, Aug 18, 2020 at 02:51:41PM +0300, Mike Rapoport wrote:
> > > On Tue, Aug 18, 2020 at 08:30:29AM +0300, Jarkko Sakkinen wrote:
> > > > On Sun, Jul 26, 2020 at 11:14:08AM +0300, Mike Rapoport wrote:
> > > > > >
> > > > > > I'm not still sure that I fully understand this feedback as I don't see
> > > > > > any inherent and obvious difference to the v4. In that version fallbacks
> > > > > > are to module_alloc() and module_memfree() and text_alloc() and
> > > > > > text_memfree() can be overridden by arch.
> > > > >
> > > > > The major difference between your v4 and my suggestion is that I'm not
> > > > > trying to impose a single ARCH_HAS_TEXT_ALLOC as an alternative to
> > > > > MODULES but rather to use per subsystem config option, e.g.
> > > > > HAVE_KPROBES_TEXT_ALLOC.
> > > > >
> > > > > Another thing, which might be worth doing regardless of the outcome of
> > > > > this discussion is to rename alloc_insn_pages() to text_alloc_kprobes()
> > > > > because the former is way too generic and does not emphasize that the
> > > > > instruction page is actually used by kprobes only.
> > > >
> > > > What if we in kernel/kprobes.c just:
> > > >
> > > > #ifndef CONFIG_ARCH_HAS_TEXT_ALLOC
> > >
> > > I don't think that CONFIG_ARCH_HAS_TEXT_ALLOC will work for all
> > > architectures.
> > >
> > > If an architecture has different restrictions for allocation of text
> > > for, say, modules, kprobes, bfp, it won't be able to use a single
> > > ARCH_HAS_TEXT_ALLOC. Which means that this architecture is stuck with
> > > dependency of kprobes on MODULES until the next rework.
> >
> > I was thinking to name it as CONFIG_HAS_KPROBES_ALLOC_PAGE, alogn the
> > lines described below, so it is merely a glitch in my example.
>
> IMHO, it would be better to emphasize that this is text page. I liked
> Mark's idea [1] to have text_alloc_<subsys>() naming for the allocation
> functions. The Kconfig options then would become
> HAVE_TEXT_ALLOC_<SUBSYS>.
>
> [1] https://lore.kernel.org/linux-riscv/[email protected]/
I think I got the point now, the point being in future other subsystems
could use the same naming convention for analogous stuff?
I'll follow this convention. Thank you for the patience with this!
/Jarkko