2021-11-24 17:33:36

by Aaron Tomlin

[permalink] [raw]
Subject: [RFC PATCH] module: Introduce module unload taint tracking

Currently, only the initial module that tainted the kernel is
recorded e.g. when an out-of-tree module is loaded.

So the purpose of this patch is to allow the kernel to maintain a record of
each unloaded module that taints the kernel. Now, in addition to displaying
a list of linked modules (see print_modules()) e.g. in the event of an
Oops, unloaded modules that carried a taint/or taints are also displayed.
If the previously unloaded module is loaded once again it will be removed
from the list only if the taints bitmask is the same.

The number of tracked modules is not fixed and can be modified accordingly.
This feature is disabled by default.

Signed-off-by: Aaron Tomlin <[email protected]>
---
include/linux/module.h | 5 ++
init/Kconfig | 9 ++++
kernel/module.c | 106 +++++++++++++++++++++++++++++++++++++++--
kernel/sysctl.c | 10 ++++
4 files changed, 126 insertions(+), 4 deletions(-)

diff --git a/include/linux/module.h b/include/linux/module.h
index 8a298d820dbc..6f089953f28a 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -672,6 +672,11 @@ static inline bool is_livepatch_module(struct module *mod)
bool is_module_sig_enforced(void);
void set_module_sig_enforced(void);

+#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
+
+extern int tainted_list_max_count; /* for sysctl */
+
+#endif
#else /* !CONFIG_MODULES... */

static inline struct module *__module_address(unsigned long addr)
diff --git a/init/Kconfig b/init/Kconfig
index bb0d6e6262b1..699c6cf948d8 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -2087,6 +2087,15 @@ config MODULE_FORCE_UNLOAD
rmmod). This is mainly for kernel developers and desperate users.
If unsure, say N.

+config MODULE_UNLOAD_TAINT_TRACKING
+ bool "Tainted module unload tracking"
+ default n
+ help
+ This option allows you to maintain a record of each unloaded
+ module that taints the kernel. Now in addition to displaying a
+ list of linked modules e.g. in the event of an Oops, the
+ aforementioned details are also displayed. If unsure, say N.
+
config MODVERSIONS
bool "Module versioning support"
help
diff --git a/kernel/module.c b/kernel/module.c
index ed13917ea5f3..11e10b571d64 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -90,6 +90,17 @@
*/
static DEFINE_MUTEX(module_mutex);
static LIST_HEAD(modules);
+#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
+static LIST_HEAD(unloaded_tainted_modules);
+static int tainted_list_count;
+int __read_mostly tainted_list_max_count = 20;
+
+struct mod_unloaded_taint {
+ struct list_head list;
+ char name[MODULE_NAME_LEN];
+ unsigned long taints;
+};
+#endif

/* Work queue for freeing init sections in success case */
static void do_free_init(struct work_struct *w);
@@ -310,6 +321,47 @@ int unregister_module_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL(unregister_module_notifier);

+#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
+
+static int try_add_tainted_module(struct module *mod)
+{
+ struct mod_unload_taint *mod_taint;
+
+ module_assert_mutex_or_preempt();
+
+ if (tainted_list_max_count >= 0 && mod->taints) {
+ if (!tainted_list_max_count &&
+ tainted_list_count >= tainted_list_max_count) {
+ pr_warn_once("%s: limit reached on the unloaded tainted modules list (count: %d).\n",
+ mod->name, tainted_list_count);
+ goto out;
+ }
+
+ mod_taint = kmalloc(sizeof(*mod_taint), GFP_KERNEL);
+ if (unlikely(!mod_taint))
+ return -ENOMEM;
+ else {
+ strlcpy(mod_taint->name, mod->name,
+ MODULE_NAME_LEN);
+ mod_taint->taints = mod->taints;
+ list_add_rcu(&mod_taint->list,
+ &unloaded_tainted_modules);
+ tainted_list_count++;
+ }
+out:
+ }
+ return 0;
+}
+
+#else /* MODULE_UNLOAD_TAINT_TRACKING */
+
+static int try_add_tainted_module(struct module *mod)
+{
+ return 0;
+}
+
+#endif /* MODULE_UNLOAD_TAINT_TRACKING */
+
/*
* We require a truly strong try_module_get(): 0 means success.
* Otherwise an error is returned due to ongoing or failed
@@ -579,6 +631,23 @@ struct module *find_module(const char *name)
{
return find_module_all(name, strlen(name), false);
}
+#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
+struct mod_unload_taint *find_mod_unload_taint(const char *name, size_t len,
+ unsigned long taints)
+{
+ struct mod_unload_taint *mod_taint;
+
+ module_assert_mutex_or_preempt();
+
+ list_for_each_entry_rcu(mod_taint, &unloaded_tainted_modules, list,
+ lockdep_is_held(&module_mutex)) {
+ if (strlen(mod_taint->name) == len && !memcmp(mod_taint->name,
+ name, len) && mod_taint->taints & taints) {
+ return mod_taint;
+ }
+ }
+ return NULL;
+#endif

#ifdef CONFIG_SMP

@@ -1121,13 +1190,13 @@ static inline int module_unload_init(struct module *mod)
}
#endif /* CONFIG_MODULE_UNLOAD */

-static size_t module_flags_taint(struct module *mod, char *buf)
+static size_t module_flags_taint(unsigned long taints, char *buf)
{
size_t l = 0;
int i;

for (i = 0; i < TAINT_FLAGS_COUNT; i++) {
- if (taint_flags[i].module && test_bit(i, &mod->taints))
+ if (taint_flags[i].module && test_bit(i, &taints))
buf[l++] = taint_flags[i].c_true;
}

@@ -1194,7 +1263,7 @@ static ssize_t show_taint(struct module_attribute *mattr,
{
size_t l;

- l = module_flags_taint(mk->mod, buffer);
+ l = module_flags_taint(mk->mod->taints, buffer);
buffer[l++] = '\n';
return l;
}
@@ -2193,6 +2262,9 @@ static void free_module(struct module *mod)
module_bug_cleanup(mod);
/* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */
synchronize_rcu();
+ if (try_add_tainted_module(mod))
+ pr_error("%s: adding tainted module to the unloaded tainted modules list failed.\n",
+ mod->name);
mutex_unlock(&module_mutex);

/* Clean up CFI for the module. */
@@ -3670,6 +3742,9 @@ static noinline int do_init_module(struct module *mod)
{
int ret = 0;
struct mod_initfree *freeinit;
+#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
+ struct mod_unload_taint *old;
+#endif

freeinit = kmalloc(sizeof(*freeinit), GFP_KERNEL);
if (!freeinit) {
@@ -3703,6 +3778,16 @@ static noinline int do_init_module(struct module *mod)
mod->state = MODULE_STATE_LIVE;
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_LIVE, mod);
+#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
+ mutex_lock(&module_mutex);
+ old = find_mod_unload_taint(mod->name, strlen(mod->name),
+ mod->taints);
+ if (old) {
+ list_del_rcu(&old->list);
+ synchronize_rcu();
+ }
+ mutex_unlock(&module_mutex);
+#endif

/* Delay uevent until module has finished its init routine */
kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
@@ -4511,7 +4596,7 @@ static char *module_flags(struct module *mod, char *buf)
mod->state == MODULE_STATE_GOING ||
mod->state == MODULE_STATE_COMING) {
buf[bx++] = '(';
- bx += module_flags_taint(mod, buf + bx);
+ bx += module_flags_taint(mod->taints, buf + bx);
/* Show a - for module-is-being-unloaded */
if (mod->state == MODULE_STATE_GOING)
buf[bx++] = '-';
@@ -4735,6 +4820,10 @@ void print_modules(void)
{
struct module *mod;
char buf[MODULE_FLAGS_BUF_SIZE];
+#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
+ struct mod_unload_taint *mod_taint;
+ size_t l;
+#endif

printk(KERN_DEFAULT "Modules linked in:");
/* Most callers should already have preempt disabled, but make sure */
@@ -4744,6 +4833,15 @@ void print_modules(void)
continue;
pr_cont(" %s%s", mod->name, module_flags(mod, buf));
}
+#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
+ printk(KERN_DEFAULT "\nUnloaded tainted modules:");
+ list_for_each_entry_rcu(mod_taint, &unloaded_tainted_modules,
+ list) {
+ l = module_flags_taint(mod_taint->taints, buf);
+ buf[l++] = '\0';
+ pr_cont(" %s(%s)", mod_taint->name, buf);
+ }
+#endif
preempt_enable();
if (last_unloaded_module[0])
pr_cont(" [last unloaded: %s]", last_unloaded_module);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 272f4a272f8c..290ffaa5b553 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2078,6 +2078,16 @@ static struct ctl_table kern_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = SYSCTL_ONE,
},
+#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
+ {
+ .procname = "tainted_list_max_count",
+ .data = &tainted_list_max_count,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &neg_one,
+ },
+#endif
#endif
#ifdef CONFIG_UEVENT_HELPER
{
--
2.31.1



2021-12-08 20:47:39

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [RFC PATCH] module: Introduce module unload taint tracking

Hey Aaron thanks for your patch! Please Cc the folks I added in future
iterations. My review below.

Andrew,

just Cc'ing you in case the sysctl changes in here need to rely on
your changes. If so then we'll have these changes (if and once reviewed)
go through your tree.

On Wed, Nov 24, 2021 at 05:33:27PM +0000, Aaron Tomlin wrote:
> Currently, only the initial module that tainted the kernel is
> recorded e.g. when an out-of-tree module is loaded.
>
> So the purpose of this patch is to allow the kernel to maintain a record of
> each unloaded module that taints the kernel. Now, in addition to displaying
> a list of linked modules (see print_modules()) e.g. in the event of an
> Oops, unloaded modules that carried a taint/or taints are also displayed.

This all does indeed seem useful to me.

> If the previously unloaded module is loaded once again it will be removed
> from the list only if the taints bitmask is the same.

That doesn't seem to be clear. What if say a user loads a module which
taints the kernel, and then unloads it, and then tries to load a similar
module with the same name but that it does not taint the kernel?

Would't we loose visibility that at one point the tainting module was
loaded? OK I see after reviewing the patch that we keep track of each
module instance unloaded with an attached unsigned long taints. So if
a module was unloaded with a different taint, we'd see it twice. Is that
right?

> The number of tracked modules is not fixed and can be modified accordingly.

The commit should mention what happens if the limit is reached.

> This feature is disabled by default.
>
> Signed-off-by: Aaron Tomlin <[email protected]>
> ---
> include/linux/module.h | 5 ++
> init/Kconfig | 9 ++++
> kernel/module.c | 106 +++++++++++++++++++++++++++++++++++++++--
> kernel/sysctl.c | 10 ++++
> 4 files changed, 126 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/module.h b/include/linux/module.h
> index 8a298d820dbc..6f089953f28a 100644
> --- a/include/linux/module.h
> +++ b/include/linux/module.h
> @@ -672,6 +672,11 @@ static inline bool is_livepatch_module(struct module *mod)
> bool is_module_sig_enforced(void);
> void set_module_sig_enforced(void);
>
> +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> +
> +extern int tainted_list_max_count; /* for sysctl */
> +
> +#endif
> #else /* !CONFIG_MODULES... */
>
> static inline struct module *__module_address(unsigned long addr)
> diff --git a/init/Kconfig b/init/Kconfig
> index bb0d6e6262b1..699c6cf948d8 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -2087,6 +2087,15 @@ config MODULE_FORCE_UNLOAD
> rmmod). This is mainly for kernel developers and desperate users.
> If unsure, say N.
>
> +config MODULE_UNLOAD_TAINT_TRACKING
> + bool "Tainted module unload tracking"
> + default n
> + help
> + This option allows you to maintain a record of each unloaded
> + module that taints the kernel. Now in addition to displaying a
> + list of linked modules e.g. in the event of an Oops, the
> + aforementioned details are also displayed. If unsure, say N.
> +
> config MODVERSIONS
> bool "Module versioning support"
> help
> diff --git a/kernel/module.c b/kernel/module.c
> index ed13917ea5f3..11e10b571d64 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -90,6 +90,17 @@
> */
> static DEFINE_MUTEX(module_mutex);
> static LIST_HEAD(modules);
> +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING

wc -l kernel/*.c| sort -r -n -k 1| head
84550 total
6143 kernel/workqueue.c
4810 kernel/module.c
4789 kernel/signal.c
3170 kernel/fork.c
2997 kernel/auditsc.c
2902 kernel/kprobes.c
2857 kernel/sysctl.c
2760 kernel/sys.c
2712 kernel/cpu.c

I think it is time we start splitting module.c out into components,
and here we might have a good opportunity to do that. There are tons
of nasty cob webs I'd like to start cleaning up from module.c. So
how about we start by moving module stuff out to kernel/modules/main.c
and then you can bring in your taint friend into that directory.

That way we can avoid the #ifdefs, which seem to attract huge spiders.

Maybe live patch stuff go in its own file too?

> +static LIST_HEAD(unloaded_tainted_modules);
> +static int tainted_list_count;
> +int __read_mostly tainted_list_max_count = 20;

Please read the guidance for __read_mostly on include/linux/cache.h.
I don't see performance metrics on your commit log to justify this use.
We don't want people to just be using that for anything they think is
read often... but not really in the context of what it was originally
desinged for.

Loading and unloading modules... to keep track of *which ones are
tainted*. I'd find it extremely hard to believe this is such a common
thing and hot path that we need this.

In any case, since a linked list is used, I'm curious why did you
decide to bound this to an arbitrary limit of say 20? If this
feature is enabled why not make this boundless?

> +struct mod_unloaded_taint {
> + struct list_head list;
> + char name[MODULE_NAME_LEN];
> + unsigned long taints;
> +};
> +#endif
>
> /* Work queue for freeing init sections in success case */
> static void do_free_init(struct work_struct *w);
> @@ -310,6 +321,47 @@ int unregister_module_notifier(struct notifier_block *nb)
> }
> EXPORT_SYMBOL(unregister_module_notifier);
>
> +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> +
> +static int try_add_tainted_module(struct module *mod)
> +{
> + struct mod_unload_taint *mod_taint;
> +
> + module_assert_mutex_or_preempt();
> +
> + if (tainted_list_max_count >= 0 && mod->taints) {
> + if (!tainted_list_max_count &&
> + tainted_list_count >= tainted_list_max_count) {
> + pr_warn_once("%s: limit reached on the unloaded tainted modules list (count: %d).\n",
> + mod->name, tainted_list_count);
> + goto out;
> + }
> +
> + mod_taint = kmalloc(sizeof(*mod_taint), GFP_KERNEL);
> + if (unlikely(!mod_taint))
> + return -ENOMEM;
> + else {
> + strlcpy(mod_taint->name, mod->name,
> + MODULE_NAME_LEN);
> + mod_taint->taints = mod->taints;
> + list_add_rcu(&mod_taint->list,
> + &unloaded_tainted_modules);
> + tainted_list_count++;
> + }
> +out:
> + }
> + return 0;
> +}
> +
> +#else /* MODULE_UNLOAD_TAINT_TRACKING */
> +
> +static int try_add_tainted_module(struct module *mod)
> +{
> + return 0;
> +}
> +
> +#endif /* MODULE_UNLOAD_TAINT_TRACKING */
> +
> /*
> * We require a truly strong try_module_get(): 0 means success.
> * Otherwise an error is returned due to ongoing or failed
> @@ -579,6 +631,23 @@ struct module *find_module(const char *name)
> {
> return find_module_all(name, strlen(name), false);
> }
> +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> +struct mod_unload_taint *find_mod_unload_taint(const char *name, size_t len,
> + unsigned long taints)
> +{
> + struct mod_unload_taint *mod_taint;
> +
> + module_assert_mutex_or_preempt();
> +
> + list_for_each_entry_rcu(mod_taint, &unloaded_tainted_modules, list,
> + lockdep_is_held(&module_mutex)) {
> + if (strlen(mod_taint->name) == len && !memcmp(mod_taint->name,
> + name, len) && mod_taint->taints & taints) {
> + return mod_taint;
> + }
> + }
> + return NULL;
> +#endif
>
> #ifdef CONFIG_SMP
>
> @@ -1121,13 +1190,13 @@ static inline int module_unload_init(struct module *mod)
> }
> #endif /* CONFIG_MODULE_UNLOAD */
>
> -static size_t module_flags_taint(struct module *mod, char *buf)
> +static size_t module_flags_taint(unsigned long taints, char *buf)
> {
> size_t l = 0;
> int i;
>
> for (i = 0; i < TAINT_FLAGS_COUNT; i++) {
> - if (taint_flags[i].module && test_bit(i, &mod->taints))
> + if (taint_flags[i].module && test_bit(i, &taints))
> buf[l++] = taint_flags[i].c_true;
> }

Please make this its own separate patch. This makes it easier to review
the other changes.

>
> @@ -1194,7 +1263,7 @@ static ssize_t show_taint(struct module_attribute *mattr,
> {
> size_t l;
>
> - l = module_flags_taint(mk->mod, buffer);
> + l = module_flags_taint(mk->mod->taints, buffer);
> buffer[l++] = '\n';
> return l;
> }
> @@ -2193,6 +2262,9 @@ static void free_module(struct module *mod)
> module_bug_cleanup(mod);
> /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */
> synchronize_rcu();
> + if (try_add_tainted_module(mod))
> + pr_error("%s: adding tainted module to the unloaded tainted modules list failed.\n",
> + mod->name);
> mutex_unlock(&module_mutex);
>
> /* Clean up CFI for the module. */
> @@ -3670,6 +3742,9 @@ static noinline int do_init_module(struct module *mod)
> {
> int ret = 0;
> struct mod_initfree *freeinit;
> +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> + struct mod_unload_taint *old;
> +#endif
>
> freeinit = kmalloc(sizeof(*freeinit), GFP_KERNEL);
> if (!freeinit) {
> @@ -3703,6 +3778,16 @@ static noinline int do_init_module(struct module *mod)
> mod->state = MODULE_STATE_LIVE;
> blocking_notifier_call_chain(&module_notify_list,
> MODULE_STATE_LIVE, mod);
> +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> + mutex_lock(&module_mutex);
> + old = find_mod_unload_taint(mod->name, strlen(mod->name),
> + mod->taints);
> + if (old) {
> + list_del_rcu(&old->list);
> + synchronize_rcu();
> + }
> + mutex_unlock(&module_mutex);

But here we seem to delete an old instance of the module taint
history if it is loaded again and has the same taint properties.
Why?

I mean, if a taint happened once, and our goal is to keep track
of them, I'd imagine I'd want to know that this had happened
before, so instead how about just an increment counter for this,
so know how many times this has happened? Please use u64 for that.
I have some test environments where module unloaded happens *a lot*.

> +#endif
>
> /* Delay uevent until module has finished its init routine */
> kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
> @@ -4511,7 +4596,7 @@ static char *module_flags(struct module *mod, char *buf)
> mod->state == MODULE_STATE_GOING ||
> mod->state == MODULE_STATE_COMING) {
> buf[bx++] = '(';
> - bx += module_flags_taint(mod, buf + bx);
> + bx += module_flags_taint(mod->taints, buf + bx);

This change can be its own separate patch.

> /* Show a - for module-is-being-unloaded */
> if (mod->state == MODULE_STATE_GOING)
> buf[bx++] = '-';
> @@ -4735,6 +4820,10 @@ void print_modules(void)
> {
> struct module *mod;
> char buf[MODULE_FLAGS_BUF_SIZE];
> +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> + struct mod_unload_taint *mod_taint;
> + size_t l;
> +#endif
>
> printk(KERN_DEFAULT "Modules linked in:");
> /* Most callers should already have preempt disabled, but make sure */
> @@ -4744,6 +4833,15 @@ void print_modules(void)
> continue;
> pr_cont(" %s%s", mod->name, module_flags(mod, buf));
> }
> +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> + printk(KERN_DEFAULT "\nUnloaded tainted modules:");
> + list_for_each_entry_rcu(mod_taint, &unloaded_tainted_modules,
> + list) {
> + l = module_flags_taint(mod_taint->taints, buf);
> + buf[l++] = '\0';
> + pr_cont(" %s(%s)", mod_taint->name, buf);
> + }
> +#endif

Ugh yeah no, this has to be in its own file. Reading this file
is just one huge effort right now. Please make this a helper so we
don't have to see this eye blinding code.

> preempt_enable();
> if (last_unloaded_module[0])
> pr_cont(" [last unloaded: %s]", last_unloaded_module);
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 272f4a272f8c..290ffaa5b553 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -2078,6 +2078,16 @@ static struct ctl_table kern_table[] = {
> .extra1 = SYSCTL_ONE,
> .extra2 = SYSCTL_ONE,
> },
> +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> + {
> + .procname = "tainted_list_max_count",
> + .data = &tainted_list_max_count,
> + .maxlen = sizeof(int),
> + .mode = 0644,
> + .proc_handler = proc_dointvec_minmax,
> + .extra1 = &neg_one,
> + },
> +#endif
> #endif
> #ifdef CONFIG_UEVENT_HELPER

Please see kernel/sysctl.c changes on linux-next, we're moving away
from everyone stuffing their sysclts in kernel/sysctl.c and there
you can find helpers and examples of how *not* to do this. Its
on the kernel table so you should be able to just
register_sysctl_init("kernel", modules_sysctls) and while at it,
if you spot any sysctls for module under the kern_table, please
move those over and then your patch would be adding just one new
entry to that new local modules_sysctls table.

We'll have to coordinate with Andrew given that if your changes
depend on those changes then we might as well get all your
changes through Andrew for the next release cycle.

Luis

2021-12-09 16:49:26

by Aaron Tomlin

[permalink] [raw]
Subject: Re: [RFC PATCH] module: Introduce module unload taint tracking

On Wed 2021-12-08 12:47 -0800, Luis Chamberlain wrote:
> Hey Aaron thanks for your patch!

Hi Luis,

Firstly, thank you for your review and feedback thus far.

> Please Cc the folks I added in future iterations.

All right.

> > If the previously unloaded module is loaded once again it will be removed
> > from the list only if the taints bitmask is the same.
>
> That doesn't seem to be clear. What if say a user loads a module which
> taints the kernel, and then unloads it, and then tries to load a similar
> module with the same name but that it does not taint the kernel?
>
> Would't we loose visibility that at one point the tainting module was
> loaded? OK I see after reviewing the patch that we keep track of each
> module instance unloaded with an attached unsigned long taints. So if
> a module was unloaded with a different taint, we'd see it twice. Is that
> right?

Indeed - is this acceptable to you? I prefer this approach rather than
remove it from the aforementioned list solely based on the module name.

> > The number of tracked modules is not fixed and can be modified accordingly.
>
> The commit should mention what happens if the limit is reached.

I will mention this accordingly.

> wc -l kernel/*.c| sort -r -n -k 1| head
> 84550 total
> 6143 kernel/workqueue.c
> 4810 kernel/module.c
> 4789 kernel/signal.c
> 3170 kernel/fork.c
> 2997 kernel/auditsc.c
> 2902 kernel/kprobes.c
> 2857 kernel/sysctl.c
> 2760 kernel/sys.c
> 2712 kernel/cpu.c
>
> I think it is time we start splitting module.c out into components,
> and here we might have a good opportunity to do that. There are tons
> of nasty cob webs I'd like to start cleaning up from module.c. So
> how about we start by moving module stuff out to kernel/modules/main.c
> and then you can bring in your taint friend into that directory.
>
> That way we can avoid the #ifdefs, which seem to attract huge spiders.

Agreed. This makes sense. I'll work on it.

> Maybe live patch stuff go in its own file too?

At first glance, I believe this is possible too.

>
> > +static LIST_HEAD(unloaded_tainted_modules);
> > +static int tainted_list_count;
> > +int __read_mostly tainted_list_max_count = 20;
>
> Please read the guidance for __read_mostly on include/linux/cache.h.
> I don't see performance metrics on your commit log to justify this use.
> We don't want people to just be using that for anything they think is
> read often... but not really in the context of what it was originally
> desinged for.

Understood.

> Loading and unloading modules... to keep track of *which ones are
> tainted*. I'd find it extremely hard to believe this is such a common
> thing and hot path that we need this.
>
> In any case, since a linked list is used, I'm curious why did you
> decide to bound this to an arbitrary limit of say 20? If this
> feature is enabled why not make this boundless?

It can be, once set to 0. Indeed, the limit specified above is arbitrary.
Personally, I prefer to have some limit that can be controlled by the user.
In fact, if agreed, I can incorporate the limit [when specified] into the
output generated via print_modules().

>
> > +struct mod_unloaded_taint {
> > + struct list_head list;
> > + char name[MODULE_NAME_LEN];
> > + unsigned long taints;
> > +};
> > +#endif
> >
> > /* Work queue for freeing init sections in success case */
> > static void do_free_init(struct work_struct *w);
> > @@ -310,6 +321,47 @@ int unregister_module_notifier(struct notifier_block *nb)
> > }
> > EXPORT_SYMBOL(unregister_module_notifier);
> >
> > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > +
> > +static int try_add_tainted_module(struct module *mod)
> > +{
> > + struct mod_unload_taint *mod_taint;
> > +
> > + module_assert_mutex_or_preempt();
> > +
> > + if (tainted_list_max_count >= 0 && mod->taints) {
> > + if (!tainted_list_max_count &&
> > + tainted_list_count >= tainted_list_max_count) {
> > + pr_warn_once("%s: limit reached on the unloaded tainted modules list (count: %d).\n",
> > + mod->name, tainted_list_count);
> > + goto out;
> > + }
> > +
> > + mod_taint = kmalloc(sizeof(*mod_taint), GFP_KERNEL);
> > + if (unlikely(!mod_taint))
> > + return -ENOMEM;
> > + else {
> > + strlcpy(mod_taint->name, mod->name,
> > + MODULE_NAME_LEN);
> > + mod_taint->taints = mod->taints;
> > + list_add_rcu(&mod_taint->list,
> > + &unloaded_tainted_modules);
> > + tainted_list_count++;
> > + }
> > +out:
> > + }
> > + return 0;
> > +}
> > +
> > +#else /* MODULE_UNLOAD_TAINT_TRACKING */
> > +
> > +static int try_add_tainted_module(struct module *mod)
> > +{
> > + return 0;
> > +}
> > +
> > +#endif /* MODULE_UNLOAD_TAINT_TRACKING */
> > +
> > /*
> > * We require a truly strong try_module_get(): 0 means success.
> > * Otherwise an error is returned due to ongoing or failed
> > @@ -579,6 +631,23 @@ struct module *find_module(const char *name)
> > {
> > return find_module_all(name, strlen(name), false);
> > }
> > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > +struct mod_unload_taint *find_mod_unload_taint(const char *name, size_t len,
> > + unsigned long taints)
> > +{
> > + struct mod_unload_taint *mod_taint;
> > +
> > + module_assert_mutex_or_preempt();
> > +
> > + list_for_each_entry_rcu(mod_taint, &unloaded_tainted_modules, list,
> > + lockdep_is_held(&module_mutex)) {
> > + if (strlen(mod_taint->name) == len && !memcmp(mod_taint->name,
> > + name, len) && mod_taint->taints & taints) {
> > + return mod_taint;
> > + }
> > + }
> > + return NULL;
> > +#endif
> >
> > #ifdef CONFIG_SMP
> >
> > @@ -1121,13 +1190,13 @@ static inline int module_unload_init(struct module *mod)
> > }
> > #endif /* CONFIG_MODULE_UNLOAD */
> >
> > -static size_t module_flags_taint(struct module *mod, char *buf)
> > +static size_t module_flags_taint(unsigned long taints, char *buf)
> > {
> > size_t l = 0;
> > int i;
> >
> > for (i = 0; i < TAINT_FLAGS_COUNT; i++) {
> > - if (taint_flags[i].module && test_bit(i, &mod->taints))
> > + if (taint_flags[i].module && test_bit(i, &taints))
> > buf[l++] = taint_flags[i].c_true;
> > }
>
> Please make this its own separate patch. This makes it easier to review
> the other changes.

No problem, will do.

> >
> > @@ -1194,7 +1263,7 @@ static ssize_t show_taint(struct module_attribute *mattr,
> > {
> > size_t l;
> >
> > - l = module_flags_taint(mk->mod, buffer);
> > + l = module_flags_taint(mk->mod->taints, buffer);
> > buffer[l++] = '\n';
> > return l;
> > }
> > @@ -2193,6 +2262,9 @@ static void free_module(struct module *mod)
> > module_bug_cleanup(mod);
> > /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */
> > synchronize_rcu();
> > + if (try_add_tainted_module(mod))
> > + pr_error("%s: adding tainted module to the unloaded tainted modules list failed.\n",
> > + mod->name);
> > mutex_unlock(&module_mutex);
> >
> > /* Clean up CFI for the module. */
> > @@ -3670,6 +3742,9 @@ static noinline int do_init_module(struct module *mod)
> > {
> > int ret = 0;
> > struct mod_initfree *freeinit;
> > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > + struct mod_unload_taint *old;
> > +#endif
> >
> > freeinit = kmalloc(sizeof(*freeinit), GFP_KERNEL);
> > if (!freeinit) {
> > @@ -3703,6 +3778,16 @@ static noinline int do_init_module(struct module *mod)
> > mod->state = MODULE_STATE_LIVE;
> > blocking_notifier_call_chain(&module_notify_list,
> > MODULE_STATE_LIVE, mod);
> > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > + mutex_lock(&module_mutex);
> > + old = find_mod_unload_taint(mod->name, strlen(mod->name),
> > + mod->taints);
> > + if (old) {
> > + list_del_rcu(&old->list);
> > + synchronize_rcu();
> > + }
> > + mutex_unlock(&module_mutex);
>
> But here we seem to delete an old instance of the module taint
> history if it is loaded again and has the same taint properties.
> Why?

At first glance, in this particular case, I believe this makes sense to
avoid duplication i.e. the taint module would be stored in the 'modules'
list thus should be shown once via print_modules(). So, the initial
objective was to only track a "tainted" module when unloaded and once
added/or loaded again [with the same taint(s)] further tracking cease.

> I mean, if a taint happened once, and our goal is to keep track
> of them, I'd imagine I'd want to know that this had happened
> before, so instead how about just an increment counter for this,
> so know how many times this has happened? Please use u64 for that.
> I have some test environments where module unloaded happens *a lot*.

If I understand correctly, I do not like this approach but indeed it could
work. Personally, I would like to incorporate the above idea i.e. track
the unload count, into the initial goal.

>
> > +#endif
> >
> > /* Delay uevent until module has finished its init routine */
> > kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
> > @@ -4511,7 +4596,7 @@ static char *module_flags(struct module *mod, char *buf)
> > mod->state == MODULE_STATE_GOING ||
> > mod->state == MODULE_STATE_COMING) {
> > buf[bx++] = '(';
> > - bx += module_flags_taint(mod, buf + bx);
> > + bx += module_flags_taint(mod->taints, buf + bx);
>
> This change can be its own separate patch.

Will do.

>
> > /* Show a - for module-is-being-unloaded */
> > if (mod->state == MODULE_STATE_GOING)
> > buf[bx++] = '-';
> > @@ -4735,6 +4820,10 @@ void print_modules(void)
> > {
> > struct module *mod;
> > char buf[MODULE_FLAGS_BUF_SIZE];
> > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > + struct mod_unload_taint *mod_taint;
> > + size_t l;
> > +#endif
> >
> > printk(KERN_DEFAULT "Modules linked in:");
> > /* Most callers should already have preempt disabled, but make sure */
> > @@ -4744,6 +4833,15 @@ void print_modules(void)
> > continue;
> > pr_cont(" %s%s", mod->name, module_flags(mod, buf));
> > }
> > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > + printk(KERN_DEFAULT "\nUnloaded tainted modules:");
> > + list_for_each_entry_rcu(mod_taint, &unloaded_tainted_modules,
> > + list) {
> > + l = module_flags_taint(mod_taint->taints, buf);
> > + buf[l++] = '\0';
> > + pr_cont(" %s(%s)", mod_taint->name, buf);
> > + }
> > +#endif
>
> Ugh yeah no, this has to be in its own file. Reading this file
> is just one huge effort right now. Please make this a helper so we
> don't have to see this eye blinding code.

Sure, no problem.

>
> > preempt_enable();
> > if (last_unloaded_module[0])
> > pr_cont(" [last unloaded: %s]", last_unloaded_module);
> > diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> > index 272f4a272f8c..290ffaa5b553 100644
> > --- a/kernel/sysctl.c
> > +++ b/kernel/sysctl.c
> > @@ -2078,6 +2078,16 @@ static struct ctl_table kern_table[] = {
> > .extra1 = SYSCTL_ONE,
> > .extra2 = SYSCTL_ONE,
> > },
> > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > + {
> > + .procname = "tainted_list_max_count",
> > + .data = &tainted_list_max_count,
> > + .maxlen = sizeof(int),
> > + .mode = 0644,
> > + .proc_handler = proc_dointvec_minmax,
> > + .extra1 = &neg_one,
> > + },
> > +#endif
> > #endif
> > #ifdef CONFIG_UEVENT_HELPER
>
> Please see kernel/sysctl.c changes on linux-next, we're moving away
> from everyone stuffing their sysclts in kernel/sysctl.c and there
> you can find helpers and examples of how *not* to do this. Its
> on the kernel table so you should be able to just
> register_sysctl_init("kernel", modules_sysctls) and while at it,
> if you spot any sysctls for module under the kern_table, please
> move those over and then your patch would be adding just one new
> entry to that new local modules_sysctls table.
>
> We'll have to coordinate with Andrew given that if your changes
> depend on those changes then we might as well get all your
> changes through Andrew for the next release cycle.

All right. I will make the required changes. Thanks once again.



Regards,

--
Aaron Tomlin


2021-12-09 23:42:16

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [RFC PATCH] module: Introduce module unload taint tracking

On Thu, Dec 09, 2021 at 04:49:17PM +0000, Aaron Tomlin wrote:
> On Wed 2021-12-08 12:47 -0800, Luis Chamberlain wrote:
> > > If the previously unloaded module is loaded once again it will be removed
> > > from the list only if the taints bitmask is the same.
> >
> > That doesn't seem to be clear. What if say a user loads a module which
> > taints the kernel, and then unloads it, and then tries to load a similar
> > module with the same name but that it does not taint the kernel?
> >
> > Would't we loose visibility that at one point the tainting module was
> > loaded? OK I see after reviewing the patch that we keep track of each
> > module instance unloaded with an attached unsigned long taints. So if
> > a module was unloaded with a different taint, we'd see it twice. Is that
> > right?
>
> Indeed - is this acceptable to you? I prefer this approach rather than
> remove it from the aforementioned list solely based on the module name.

Sure, it makes sense to keep all the stupid ways we are harming the
kernel. Makes sense. The other point I made about count though would
be good, in case the taint was the same.

> > wc -l kernel/*.c| sort -r -n -k 1| head
> > 84550 total
> > 6143 kernel/workqueue.c
> > 4810 kernel/module.c
> > 4789 kernel/signal.c
> > 3170 kernel/fork.c
> > 2997 kernel/auditsc.c
> > 2902 kernel/kprobes.c
> > 2857 kernel/sysctl.c
> > 2760 kernel/sys.c
> > 2712 kernel/cpu.c
> >
> > I think it is time we start splitting module.c out into components,
> > and here we might have a good opportunity to do that. There are tons
> > of nasty cob webs I'd like to start cleaning up from module.c. So
> > how about we start by moving module stuff out to kernel/modules/main.c
> > and then you can bring in your taint friend into that directory.
> >
> > That way we can avoid the #ifdefs, which seem to attract huge spiders.
>
> Agreed. This makes sense. I'll work on it.

Wonderful, thanks!

> > Maybe live patch stuff go in its own file too?
>
> At first glance, I believe this is possible too.

Great! Thanks for being willing to doing this!

> > Loading and unloading modules... to keep track of *which ones are
> > tainted*. I'd find it extremely hard to believe this is such a common
> > thing and hot path that we need this.
> >
> > In any case, since a linked list is used, I'm curious why did you
> > decide to bound this to an arbitrary limit of say 20? If this
> > feature is enabled why not make this boundless?
>
> It can be, once set to 0. Indeed, the limit specified above is arbitrary.
> Personally, I prefer to have some limit that can be controlled by the user.
> In fact, if agreed, I can incorporate the limit [when specified] into the
> output generated via print_modules().

If someone enables this feature I can't think of a reason why they
would want to limit this to some arbitrary number. So my preference
is to remove that limitation completely. I see no point to it.

> > > @@ -3703,6 +3778,16 @@ static noinline int do_init_module(struct module *mod)
> > > mod->state = MODULE_STATE_LIVE;
> > > blocking_notifier_call_chain(&module_notify_list,
> > > MODULE_STATE_LIVE, mod);
> > > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > > + mutex_lock(&module_mutex);
> > > + old = find_mod_unload_taint(mod->name, strlen(mod->name),
> > > + mod->taints);
> > > + if (old) {
> > > + list_del_rcu(&old->list);
> > > + synchronize_rcu();
> > > + }
> > > + mutex_unlock(&module_mutex);
> >
> > But here we seem to delete an old instance of the module taint
> > history if it is loaded again and has the same taint properties.
> > Why?
>
> At first glance, in this particular case, I believe this makes sense to
> avoid duplication

If you just bump the count then its not duplication, it just adds
more information that the same module name with the same taint flag
has been unloaded now more than once.

> i.e. the taint module would be stored in the 'modules'
> list thus should be shown once via print_modules(). So, the initial
> objective was to only track a "tainted" module when unloaded and once
> added/or loaded again [with the same taint(s)] further tracking cease.

This makes me wonder, why not just grow the list at driver insertion
time, rather than removal.

> > I mean, if a taint happened once, and our goal is to keep track
> > of them, I'd imagine I'd want to know that this had happened
> > before, so instead how about just an increment counter for this,
> > so know how many times this has happened? Please use u64 for that.
> > I have some test environments where module unloaded happens *a lot*.
>
> If I understand correctly, I do not like this approach but indeed it could
> work.

I'm a bit confused, because here you seem to suggest you don't like the
idea, and then...

> Personally, I would like to incorporate the above idea i.e. track
> the unload count, into the initial goal.

Here you say you'd like to keep the unloud count.

> > Please see kernel/sysctl.c changes on linux-next, we're moving away
> > from everyone stuffing their sysclts in kernel/sysctl.c and there
> > you can find helpers and examples of how *not* to do this. Its
> > on the kernel table so you should be able to just
> > register_sysctl_init("kernel", modules_sysctls) and while at it,
> > if you spot any sysctls for module under the kern_table, please
> > move those over and then your patch would be adding just one new
> > entry to that new local modules_sysctls table.
> >
> > We'll have to coordinate with Andrew given that if your changes
> > depend on those changes then we might as well get all your
> > changes through Andrew for the next release cycle.
>
> All right. I will make the required changes. Thanks once again.

Sure, so hey just one more thing. Can you add a simple selftest
lib/test_taint.c which can be used to test tainting and you new
tracker ? You can add a new selftest on

tools/testing/selftests/module/

I had already written some module based testing on
tools/testing/selftests/kmod/kmod.sh so you can borrow stuff
from there if you find it useful. But I think we need to start
doing basic testing for module. I know Lucas has tons of test
on kmod, so we should also look at what is there and what needs
testing outside of that.

Then there is the question of what should be tested using kunit and
or selftests. From my experience, if you need a shell, use selftests.
Also, if you need parallelization, use selftests, given kunit by
default uses a uniprocessor architecture, user-mode-linux. I'll let
you figure out what is the best place to add the test for this. It
could just be its a better place to add these tests to kmod upstream
as there are tons of tests there already. But kunit test can't be
added there.

Live patching already has its own set of selftests.

Luis

2021-12-10 10:00:56

by Petr Mladek

[permalink] [raw]
Subject: Re: [RFC PATCH] module: Introduce module unload taint tracking

On Thu 2021-12-09 15:42:08, Luis Chamberlain wrote:
> On Thu, Dec 09, 2021 at 04:49:17PM +0000, Aaron Tomlin wrote:
> > On Wed 2021-12-08 12:47 -0800, Luis Chamberlain wrote:
> > > Loading and unloading modules... to keep track of *which ones are
> > > tainted*. I'd find it extremely hard to believe this is such a common
> > > thing and hot path that we need this.
> > >
> > > In any case, since a linked list is used, I'm curious why did you
> > > decide to bound this to an arbitrary limit of say 20? If this
> > > feature is enabled why not make this boundless?
> >
> > It can be, once set to 0. Indeed, the limit specified above is arbitrary.
> > Personally, I prefer to have some limit that can be controlled by the user.
> > In fact, if agreed, I can incorporate the limit [when specified] into the
> > output generated via print_modules().
>
> If someone enables this feature I can't think of a reason why they
> would want to limit this to some arbitrary number. So my preference
> is to remove that limitation completely. I see no point to it.

I agree with Luis here. We could always add the limit later when
people report some real life problems with too long list. It is
always good to know that someone did some heavy lifting in
the system.

It might be even interesting to remember timestamp of the removal
to match it with another events reported in the system log.

> > > > @@ -3703,6 +3778,16 @@ static noinline int do_init_module(struct module *mod)
> > > > mod->state = MODULE_STATE_LIVE;
> > > > blocking_notifier_call_chain(&module_notify_list,
> > > > MODULE_STATE_LIVE, mod);
> > > > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > > > + mutex_lock(&module_mutex);
> > > > + old = find_mod_unload_taint(mod->name, strlen(mod->name),
> > > > + mod->taints);
> > > > + if (old) {
> > > > + list_del_rcu(&old->list);
> > > > + synchronize_rcu();
> > > > + }
> > > > + mutex_unlock(&module_mutex);
> > >
> > > But here we seem to delete an old instance of the module taint
> > > history if it is loaded again and has the same taint properties.
> > > Why?
> >
> > At first glance, in this particular case, I believe this makes sense to
> > avoid duplication
>
> If you just bump the count then its not duplication, it just adds
> more information that the same module name with the same taint flag
> has been unloaded now more than once.

Please, do not remove records that a module was removed. IMHO, it
might be useful to track all removed module, including the non-tainted
ones. Module removal is always tricky and not much tested. The tain
flags might be just shown as extra information in the output.

Best Regards,
Petr

2021-12-10 15:49:10

by Aaron Tomlin

[permalink] [raw]
Subject: Re: [RFC PATCH] module: Introduce module unload taint tracking

On Thu 2021-12-09 15:42 -0800, Luis Chamberlain wrote:
> > Indeed - is this acceptable to you? I prefer this approach rather than
> > remove it from the aforementioned list solely based on the module name.
>
> Sure, it makes sense to keep all the stupid ways we are harming the
> kernel. Makes sense. The other point I made about count though would
> be good, in case the taint was the same.

Agreed. So, just to confirm you'd prefer not remove any module that tainted
the kernel from the aforementioned list when the same module and taints
bitmask is reintroduced? If I understand correctly, we'd simply maintain a
list of modules that tainted the kernel during module deletion/or unload
and their respective unload count? If so then this was not my original
objective yet I'm happy with this approach too - I'll take on this
implementation in the next iteration.

> > It can be, once set to 0. Indeed, the limit specified above is arbitrary.
> > Personally, I prefer to have some limit that can be controlled by the user.
> > In fact, if agreed, I can incorporate the limit [when specified] into the
> > output generated via print_modules().
>
> If someone enables this feature I can't think of a reason why they
> would want to limit this to some arbitrary number. So my preference
> is to remove that limitation completely. I see no point to it.

Fair enough. If necessary we could introduce the above later.

> > > > @@ -3703,6 +3778,16 @@ static noinline int do_init_module(struct module *mod)
> > > > mod->state = MODULE_STATE_LIVE;
> > > > blocking_notifier_call_chain(&module_notify_list,
> > > > MODULE_STATE_LIVE, mod);
> > > > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > > > + mutex_lock(&module_mutex);
> > > > + old = find_mod_unload_taint(mod->name, strlen(mod->name),
> > > > + mod->taints);
> > > > + if (old) {
> > > > + list_del_rcu(&old->list);
> > > > + synchronize_rcu();
> > > > + }
> > > > + mutex_unlock(&module_mutex);
> > >
> > > But here we seem to delete an old instance of the module taint
> > > history if it is loaded again and has the same taint properties.
> > > Why?

Yes, this was my original approach. Once the same module [with the same
taints bitmask] is reintroduced it will be listed on the 'modules' list
thus no need to track it on the unloaded list anymore. That being said, as
per the above, let's now keep track of each removal and maintain an unload
count.

> > At first glance, in this particular case, I believe this makes sense to
> > avoid duplication
>
> If you just bump the count then its not duplication, it just adds
> more information that the same module name with the same taint flag
> has been unloaded now more than once.

Agreed.

> > All right. I will make the required changes. Thanks once again.
>
> Sure, so hey just one more thing. Can you add a simple selftest
> lib/test_taint.c which can be used to test tainting and you new
> tracker ? You can add a new selftest on
>
> tools/testing/selftests/module/
>
> I had already written some module based testing on
> tools/testing/selftests/kmod/kmod.sh so you can borrow stuff
> from there if you find it useful. But I think we need to start
> doing basic testing for module. I know Lucas has tons of test
> on kmod, so we should also look at what is there and what needs
> testing outside of that.
>
> Then there is the question of what should be tested using kunit and
> or selftests. From my experience, if you need a shell, use selftests.
> Also, if you need parallelization, use selftests, given kunit by
> default uses a uniprocessor architecture, user-mode-linux. I'll let
> you figure out what is the best place to add the test for this. It
> could just be its a better place to add these tests to kmod upstream
> as there are tons of tests there already. But kunit test can't be
> added there.
>
> Live patching already has its own set of selftests.


Sure - will do. Thanks once again!

--
Aaron Tomlin


2021-12-10 16:09:38

by Aaron Tomlin

[permalink] [raw]
Subject: Re: [RFC PATCH] module: Introduce module unload taint tracking

On Fri 2021-12-10 11:00 +0100, Petr Mladek wrote:
> > If someone enables this feature I can't think of a reason why they
> > would want to limit this to some arbitrary number. So my preference
> > is to remove that limitation completely. I see no point to it.
>
> I agree with Luis here. We could always add the limit later when
> people report some real life problems with too long list. It is
> always good to know that someone did some heavy lifting in
> the system.

Fair enough.

> It might be even interesting to remember timestamp of the removal
> to match it with another events reported in the system log.

I'm not so sure about this. We could gather such details already via Ftrace
(e.g. see load_module()). Personally, I'd prefer to maintain a simple list.

> > If you just bump the count then its not duplication, it just adds
> > more information that the same module name with the same taint flag
> > has been unloaded now more than once.
>
> Please, do not remove records that a module was removed. IMHO, it
> might be useful to track all removed module, including the non-tainted
> ones. Module removal is always tricky and not much tested. The tain
> flags might be just shown as extra information in the output.

This is an interesting suggestion. Albeit, as per the subject, I prefer to
just keep track of any module that tainted the kernel. That being said,
Petr, if you'd prefer to track each module unload/or deletion event, then I
would suggest for instance to remove a module once it has been reintroduced
or maintain an unload count as suggested by Luis.

Please let me know your thoughts.


Kind regards,

--
Aaron Tomlin


2021-12-10 17:03:41

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [RFC PATCH] module: Introduce module unload taint tracking

On Fri, Dec 10, 2021 at 11:00:52AM +0100, Petr Mladek wrote:
> Please, do not remove records that a module was removed. IMHO, it
> might be useful to track all removed module, including the non-tainted
> ones.

Then we'd need two features. One modules removed, and another which
limits this to only tainted modules. On kernel-ci systems where I
try to reproduce issues with fstests or blktests I might be unloading
a module 10,000 times, and so for those systems I'd like to disable
the tracking of all modules removed, otherwise we'd end up with
-ENOMEM eventually.

> Module removal is always tricky and not much tested.

It is tricky but I have been trying to correct issues along that path
and given that fstests and blktests uses it heavily I want to start
dispelling the false narrative that this is not a common use case.

> The tain flags might be just shown as extra information in the output.

Yes!

Luis

2021-12-10 17:09:34

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [RFC PATCH] module: Introduce module unload taint tracking

On Fri, Dec 10, 2021 at 04:09:31PM +0000, Aaron Tomlin wrote:
> On Fri 2021-12-10 11:00 +0100, Petr Mladek wrote:
> This is an interesting suggestion. Albeit, as per the subject, I prefer to
> just keep track of any module that tainted the kernel. That being said,
> Petr, if you'd prefer to track each module unload/or deletion event, then I
> would suggest for instance to remove a module once it has been reintroduced
> or maintain an unload count as suggested by Luis.

Come to think of this again, although at first it might be enticing to
keep track of module unloads (without taint), we have to ask ourselves
who would need / enable such a feature. And I think Aaron is right that
this might be better tracked in userspace.

The taint though, that seems critical due to the potential harm.

Maybe how many unloads one has done though, that might be useful
debugging information and does not create a huge overhead like a
pontential -ENOMEM.

Luis

2021-12-13 13:00:23

by Allen

[permalink] [raw]
Subject: Re: [RFC PATCH] module: Introduce module unload taint tracking

>
> Hi Luis,
>
> Firstly, thank you for your review and feedback thus far.
>
> > Please Cc the folks I added in future iterations.
>
> All right.
>
> > > If the previously unloaded module is loaded once again it will be removed
> > > from the list only if the taints bitmask is the same.
> >
> > That doesn't seem to be clear. What if say a user loads a module which
> > taints the kernel, and then unloads it, and then tries to load a similar
> > module with the same name but that it does not taint the kernel?
> >
> > Would't we loose visibility that at one point the tainting module was
> > loaded? OK I see after reviewing the patch that we keep track of each
> > module instance unloaded with an attached unsigned long taints. So if
> > a module was unloaded with a different taint, we'd see it twice. Is that
> > right?
>
> Indeed - is this acceptable to you? I prefer this approach rather than
> remove it from the aforementioned list solely based on the module name.
>
> > > The number of tracked modules is not fixed and can be modified accordingly.
> >
> > The commit should mention what happens if the limit is reached.
>
> I will mention this accordingly.
>
> > wc -l kernel/*.c| sort -r -n -k 1| head
> > 84550 total
> > 6143 kernel/workqueue.c
> > 4810 kernel/module.c
> > 4789 kernel/signal.c
> > 3170 kernel/fork.c
> > 2997 kernel/auditsc.c
> > 2902 kernel/kprobes.c
> > 2857 kernel/sysctl.c
> > 2760 kernel/sys.c
> > 2712 kernel/cpu.c
> >
> > I think it is time we start splitting module.c out into components,
> > and here we might have a good opportunity to do that. There are tons
> > of nasty cob webs I'd like to start cleaning up from module.c. So
> > how about we start by moving module stuff out to kernel/modules/main.c
> > and then you can bring in your taint friend into that directory.
> >
> > That way we can avoid the #ifdefs, which seem to attract huge spiders.
>
> Agreed. This makes sense. I'll work on it.

Aaron, Luis,

I have some ideas and did some work on it. Let me know if we could
work together on this.

- Allen

>
> > Maybe live patch stuff go in its own file too?
>
> At first glance, I believe this is possible too.
>
> >
> > > +static LIST_HEAD(unloaded_tainted_modules);
> > > +static int tainted_list_count;
> > > +int __read_mostly tainted_list_max_count = 20;
> >
> > Please read the guidance for __read_mostly on include/linux/cache.h.
> > I don't see performance metrics on your commit log to justify this use.
> > We don't want people to just be using that for anything they think is
> > read often... but not really in the context of what it was originally
> > desinged for.
>
> Understood.
>
> > Loading and unloading modules... to keep track of *which ones are
> > tainted*. I'd find it extremely hard to believe this is such a common
> > thing and hot path that we need this.
> >
> > In any case, since a linked list is used, I'm curious why did you
> > decide to bound this to an arbitrary limit of say 20? If this
> > feature is enabled why not make this boundless?
>
> It can be, once set to 0. Indeed, the limit specified above is arbitrary.
> Personally, I prefer to have some limit that can be controlled by the user.
> In fact, if agreed, I can incorporate the limit [when specified] into the
> output generated via print_modules().
>
> >
> > > +struct mod_unloaded_taint {
> > > + struct list_head list;
> > > + char name[MODULE_NAME_LEN];
> > > + unsigned long taints;
> > > +};
> > > +#endif
> > >
> > > /* Work queue for freeing init sections in success case */
> > > static void do_free_init(struct work_struct *w);
> > > @@ -310,6 +321,47 @@ int unregister_module_notifier(struct notifier_block *nb)
> > > }
> > > EXPORT_SYMBOL(unregister_module_notifier);
> > >
> > > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > > +
> > > +static int try_add_tainted_module(struct module *mod)
> > > +{
> > > + struct mod_unload_taint *mod_taint;
> > > +
> > > + module_assert_mutex_or_preempt();
> > > +
> > > + if (tainted_list_max_count >= 0 && mod->taints) {
> > > + if (!tainted_list_max_count &&
> > > + tainted_list_count >= tainted_list_max_count) {
> > > + pr_warn_once("%s: limit reached on the unloaded tainted modules list (count: %d).\n",
> > > + mod->name, tainted_list_count);
> > > + goto out;
> > > + }
> > > +
> > > + mod_taint = kmalloc(sizeof(*mod_taint), GFP_KERNEL);
> > > + if (unlikely(!mod_taint))
> > > + return -ENOMEM;
> > > + else {
> > > + strlcpy(mod_taint->name, mod->name,
> > > + MODULE_NAME_LEN);
> > > + mod_taint->taints = mod->taints;
> > > + list_add_rcu(&mod_taint->list,
> > > + &unloaded_tainted_modules);
> > > + tainted_list_count++;
> > > + }
> > > +out:
> > > + }
> > > + return 0;
> > > +}
> > > +
> > > +#else /* MODULE_UNLOAD_TAINT_TRACKING */
> > > +
> > > +static int try_add_tainted_module(struct module *mod)
> > > +{
> > > + return 0;
> > > +}
> > > +
> > > +#endif /* MODULE_UNLOAD_TAINT_TRACKING */
> > > +
> > > /*
> > > * We require a truly strong try_module_get(): 0 means success.
> > > * Otherwise an error is returned due to ongoing or failed
> > > @@ -579,6 +631,23 @@ struct module *find_module(const char *name)
> > > {
> > > return find_module_all(name, strlen(name), false);
> > > }
> > > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > > +struct mod_unload_taint *find_mod_unload_taint(const char *name, size_t len,
> > > + unsigned long taints)
> > > +{
> > > + struct mod_unload_taint *mod_taint;
> > > +
> > > + module_assert_mutex_or_preempt();
> > > +
> > > + list_for_each_entry_rcu(mod_taint, &unloaded_tainted_modules, list,
> > > + lockdep_is_held(&module_mutex)) {
> > > + if (strlen(mod_taint->name) == len && !memcmp(mod_taint->name,
> > > + name, len) && mod_taint->taints & taints) {
> > > + return mod_taint;
> > > + }
> > > + }
> > > + return NULL;
> > > +#endif
> > >
> > > #ifdef CONFIG_SMP
> > >
> > > @@ -1121,13 +1190,13 @@ static inline int module_unload_init(struct module *mod)
> > > }
> > > #endif /* CONFIG_MODULE_UNLOAD */
> > >
> > > -static size_t module_flags_taint(struct module *mod, char *buf)
> > > +static size_t module_flags_taint(unsigned long taints, char *buf)
> > > {
> > > size_t l = 0;
> > > int i;
> > >
> > > for (i = 0; i < TAINT_FLAGS_COUNT; i++) {
> > > - if (taint_flags[i].module && test_bit(i, &mod->taints))
> > > + if (taint_flags[i].module && test_bit(i, &taints))
> > > buf[l++] = taint_flags[i].c_true;
> > > }
> >
> > Please make this its own separate patch. This makes it easier to review
> > the other changes.
>
> No problem, will do.
>
> > >
> > > @@ -1194,7 +1263,7 @@ static ssize_t show_taint(struct module_attribute *mattr,
> > > {
> > > size_t l;
> > >
> > > - l = module_flags_taint(mk->mod, buffer);
> > > + l = module_flags_taint(mk->mod->taints, buffer);
> > > buffer[l++] = '\n';
> > > return l;
> > > }
> > > @@ -2193,6 +2262,9 @@ static void free_module(struct module *mod)
> > > module_bug_cleanup(mod);
> > > /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */
> > > synchronize_rcu();
> > > + if (try_add_tainted_module(mod))
> > > + pr_error("%s: adding tainted module to the unloaded tainted modules list failed.\n",
> > > + mod->name);
> > > mutex_unlock(&module_mutex);
> > >
> > > /* Clean up CFI for the module. */
> > > @@ -3670,6 +3742,9 @@ static noinline int do_init_module(struct module *mod)
> > > {
> > > int ret = 0;
> > > struct mod_initfree *freeinit;
> > > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > > + struct mod_unload_taint *old;
> > > +#endif
> > >
> > > freeinit = kmalloc(sizeof(*freeinit), GFP_KERNEL);
> > > if (!freeinit) {
> > > @@ -3703,6 +3778,16 @@ static noinline int do_init_module(struct module *mod)
> > > mod->state = MODULE_STATE_LIVE;
> > > blocking_notifier_call_chain(&module_notify_list,
> > > MODULE_STATE_LIVE, mod);
> > > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > > + mutex_lock(&module_mutex);
> > > + old = find_mod_unload_taint(mod->name, strlen(mod->name),
> > > + mod->taints);
> > > + if (old) {
> > > + list_del_rcu(&old->list);
> > > + synchronize_rcu();
> > > + }
> > > + mutex_unlock(&module_mutex);
> >
> > But here we seem to delete an old instance of the module taint
> > history if it is loaded again and has the same taint properties.
> > Why?
>
> At first glance, in this particular case, I believe this makes sense to
> avoid duplication i.e. the taint module would be stored in the 'modules'
> list thus should be shown once via print_modules(). So, the initial
> objective was to only track a "tainted" module when unloaded and once
> added/or loaded again [with the same taint(s)] further tracking cease.
>
> > I mean, if a taint happened once, and our goal is to keep track
> > of them, I'd imagine I'd want to know that this had happened
> > before, so instead how about just an increment counter for this,
> > so know how many times this has happened? Please use u64 for that.
> > I have some test environments where module unloaded happens *a lot*.
>
> If I understand correctly, I do not like this approach but indeed it could
> work. Personally, I would like to incorporate the above idea i.e. track
> the unload count, into the initial goal.
>
> >
> > > +#endif
> > >
> > > /* Delay uevent until module has finished its init routine */
> > > kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
> > > @@ -4511,7 +4596,7 @@ static char *module_flags(struct module *mod, char *buf)
> > > mod->state == MODULE_STATE_GOING ||
> > > mod->state == MODULE_STATE_COMING) {
> > > buf[bx++] = '(';
> > > - bx += module_flags_taint(mod, buf + bx);
> > > + bx += module_flags_taint(mod->taints, buf + bx);
> >
> > This change can be its own separate patch.
>
> Will do.
>
> >
> > > /* Show a - for module-is-being-unloaded */
> > > if (mod->state == MODULE_STATE_GOING)
> > > buf[bx++] = '-';
> > > @@ -4735,6 +4820,10 @@ void print_modules(void)
> > > {
> > > struct module *mod;
> > > char buf[MODULE_FLAGS_BUF_SIZE];
> > > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > > + struct mod_unload_taint *mod_taint;
> > > + size_t l;
> > > +#endif
> > >
> > > printk(KERN_DEFAULT "Modules linked in:");
> > > /* Most callers should already have preempt disabled, but make sure */
> > > @@ -4744,6 +4833,15 @@ void print_modules(void)
> > > continue;
> > > pr_cont(" %s%s", mod->name, module_flags(mod, buf));
> > > }
> > > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > > + printk(KERN_DEFAULT "\nUnloaded tainted modules:");
> > > + list_for_each_entry_rcu(mod_taint, &unloaded_tainted_modules,
> > > + list) {
> > > + l = module_flags_taint(mod_taint->taints, buf);
> > > + buf[l++] = '\0';
> > > + pr_cont(" %s(%s)", mod_taint->name, buf);
> > > + }
> > > +#endif
> >
> > Ugh yeah no, this has to be in its own file. Reading this file
> > is just one huge effort right now. Please make this a helper so we
> > don't have to see this eye blinding code.
>
> Sure, no problem.
>
> >
> > > preempt_enable();
> > > if (last_unloaded_module[0])
> > > pr_cont(" [last unloaded: %s]", last_unloaded_module);
> > > diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> > > index 272f4a272f8c..290ffaa5b553 100644
> > > --- a/kernel/sysctl.c
> > > +++ b/kernel/sysctl.c
> > > @@ -2078,6 +2078,16 @@ static struct ctl_table kern_table[] = {
> > > .extra1 = SYSCTL_ONE,
> > > .extra2 = SYSCTL_ONE,
> > > },
> > > +#ifdef CONFIG_MODULE_UNLOAD_TAINT_TRACKING
> > > + {
> > > + .procname = "tainted_list_max_count",
> > > + .data = &tainted_list_max_count,
> > > + .maxlen = sizeof(int),
> > > + .mode = 0644,
> > > + .proc_handler = proc_dointvec_minmax,
> > > + .extra1 = &neg_one,
> > > + },
> > > +#endif
> > > #endif
> > > #ifdef CONFIG_UEVENT_HELPER
> >
> > Please see kernel/sysctl.c changes on linux-next, we're moving away
> > from everyone stuffing their sysclts in kernel/sysctl.c and there
> > you can find helpers and examples of how *not* to do this. Its
> > on the kernel table so you should be able to just
> > register_sysctl_init("kernel", modules_sysctls) and while at it,
> > if you spot any sysctls for module under the kern_table, please
> > move those over and then your patch would be adding just one new
> > entry to that new local modules_sysctls table.
> >
> > We'll have to coordinate with Andrew given that if your changes
> > depend on those changes then we might as well get all your
> > changes through Andrew for the next release cycle.
>
> All right. I will make the required changes. Thanks once again.
>
>
>
> Regards,
>
> --
> Aaron Tomlin
>


--
- Allen

2021-12-13 15:16:40

by Petr Mladek

[permalink] [raw]
Subject: Re: [RFC PATCH] module: Introduce module unload taint tracking

On Fri 2021-12-10 16:09:31, Aaron Tomlin wrote:
> On Fri 2021-12-10 11:00 +0100, Petr Mladek wrote:
> > > If someone enables this feature I can't think of a reason why they
> > > would want to limit this to some arbitrary number. So my preference
> > > is to remove that limitation completely. I see no point to it.
> >
> > I agree with Luis here. We could always add the limit later when
> > people report some real life problems with too long list. It is
> > always good to know that someone did some heavy lifting in
> > the system.
>
> Fair enough.
>
> > It might be even interesting to remember timestamp of the removal
> > to match it with another events reported in the system log.
>
> I'm not so sure about this. We could gather such details already via Ftrace
> (e.g. see load_module()). Personally, I'd prefer to maintain a simple list.

Fair enough. It was just an idea. Simple list is a good start. We
could always add more details if people find it useful.


> > > If you just bump the count then its not duplication, it just adds
> > > more information that the same module name with the same taint flag
> > > has been unloaded now more than once.
> >
> > Please, do not remove records that a module was removed. IMHO, it
> > might be useful to track all removed module, including the non-tainted
> > ones. Module removal is always tricky and not much tested. The tain
> > flags might be just shown as extra information in the output.
>
> This is an interesting suggestion. Albeit, as per the subject, I prefer to
> just keep track of any module that tainted the kernel. That being said,
> Petr, if you'd prefer to track each module unload/or deletion event, then I
> would suggest for instance to remove a module once it has been reintroduced
> or maintain an unload count as suggested by Luis.

I just have fresh in mind the patchset
https://lore.kernel.org/r/[email protected]
It is about that removing sysfs interface is tricky and might lead to
use after free problems. I could imagine many other similar problems
that might happen with any module.

But I agree that the information about modules that tainted the kernel is
more important. I do not want to block the feature by requiring more
requirements.


Also we should keep in mind that the default panic() message should
be reasonably short. Only the last lines might be visible on screen.
Serial consoles might be really slow.

It is perfectly fine to add few lines, like the existing list of
loaded modules. Any potentially extensive output should be optional.
There already is support for optional info, see panic_print_sys_info().

Best Regards,
Petr

2021-12-20 19:23:39

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [RFC PATCH] module: Introduce module unload taint tracking

On Mon, Dec 13, 2021 at 06:30:07PM +0530, Allen wrote:
> Aaron, Luis,
>
> I have some ideas and did some work on it. Let me know if we could
> work together on this.

Patches welcomed.

Luis

2021-12-21 11:44:45

by Aaron Tomlin

[permalink] [raw]
Subject: Re: [RFC PATCH] module: Introduce module unload taint tracking

On Mon 2021-12-13 18:30 +0530, Allen wrote:
> Aaron, Luis,

Hi Allen

> I have some ideas and did some work on it. Let me know if we could work
> together on this.

Yes, we can. What have you done so far?

Kind regards,

--
Aaron Tomlin


2021-12-21 11:58:35

by Aaron Tomlin

[permalink] [raw]
Subject: Re: [RFC PATCH] module: Introduce module unload taint tracking

On Mon 2021-12-13 16:16 +0100, Petr Mladek wrote:
> > I'm not so sure about this. We could gather such details already via Ftrace
> > (e.g. see load_module()). Personally, I'd prefer to maintain a simple list.
>
> Fair enough. It was just an idea. Simple list is a good start. We
> could always add more details if people find it useful.

Indeed we could.

> Also we should keep in mind that the default panic() message should
> be reasonably short. Only the last lines might be visible on screen.
> Serial consoles might be really slow.

Absolutely, I agree. This feature should be entirely optional. In fact, it
is likely only useful while reviewing the data via /proc/vmcore given the
potential amount of data generated, in addition to that seen in
panic_print_sys_info(), when explicitly enabled.


Kind regards,

--
Aaron Tomlin


2021-12-28 21:30:47

by Aaron Tomlin

[permalink] [raw]
Subject: [RFC PATCH 00/12] module: core code clean up

Hi Luis, Allen,

I had some free time so decided to make a quick start.
There is more outstanding; albeit, I wanted to share what
was accomplished thus far. Unfortunately, nothing has been
thoroughly tested yet. Please let me know your thoughts.


Aaron Tomlin (12):
module: Move all into module/
module: Simple refactor in preparation for split
module: Move livepatch support to a separate file
module: Move latched RB-tree support to a separate file
module: Move arch strict rwx support to a separate file
module: Move strict rwx support to a separate file
module: Move extra signature support out of core code
module: Move kmemleak support to a separate file
module: Move kallsyms support into a separate file
module: Move procfs support into a separate file
module: Move sysfs support into a separate file
module: Move kdb_modules list out of core code

include/linux/module.h | 76 +-
kernel/Makefile | 4 +-
kernel/debug/kdb/kdb_main.c | 5 +
kernel/module-internal.h | 31 -
kernel/module/Makefile | 16 +
kernel/module/arch_strict_rwx.c | 44 +
kernel/module/debug_kmemleak.c | 30 +
kernel/module/internal.h | 121 ++
kernel/module/kallsyms.c | 506 ++++++
kernel/module/livepatch.c | 75 +
kernel/{module.c => module/main.c} | 1616 +----------------
kernel/module/procfs.c | 111 ++
.../signature.c} | 0
kernel/module/signing.c | 120 ++
kernel/module/strict_rwx.c | 83 +
kernel/module/sysfs.c | 426 +++++
kernel/module/tree_lookup.c | 108 ++
kernel/module_signing.c | 45 -
18 files changed, 1751 insertions(+), 1666 deletions(-)
delete mode 100644 kernel/module-internal.h
create mode 100644 kernel/module/Makefile
create mode 100644 kernel/module/arch_strict_rwx.c
create mode 100644 kernel/module/debug_kmemleak.c
create mode 100644 kernel/module/internal.h
create mode 100644 kernel/module/kallsyms.c
create mode 100644 kernel/module/livepatch.c
rename kernel/{module.c => module/main.c} (66%)
create mode 100644 kernel/module/procfs.c
rename kernel/{module_signature.c => module/signature.c} (100%)
create mode 100644 kernel/module/signing.c
create mode 100644 kernel/module/strict_rwx.c
create mode 100644 kernel/module/sysfs.c
create mode 100644 kernel/module/tree_lookup.c
delete mode 100644 kernel/module_signing.c

--
2.31.1


2021-12-28 21:30:51

by Aaron Tomlin

[permalink] [raw]
Subject: [RFC PATCH 01/12] module: Move all into module/

No functional changes.

This patch moves all module related code into a separate directory,
modifies each file name and creates a new Makefile. Note: this effort
is in preparation to refactor core module code.

Signed-off-by: Aaron Tomlin <[email protected]>
---
kernel/Makefile | 4 +---
kernel/module/Makefile | 8 ++++++++
kernel/{module-internal.h => module/internal.h} | 0
kernel/{module.c => module/main.c} | 2 +-
kernel/{module_signature.c => module/signature.c} | 0
kernel/{module_signing.c => module/signing.c} | 2 +-
6 files changed, 11 insertions(+), 5 deletions(-)
create mode 100644 kernel/module/Makefile
rename kernel/{module-internal.h => module/internal.h} (100%)
rename kernel/{module.c => module/main.c} (99%)
rename kernel/{module_signature.c => module/signature.c} (100%)
rename kernel/{module_signing.c => module/signing.c} (97%)

diff --git a/kernel/Makefile b/kernel/Makefile
index 4df609be42d0..466477d4dafe 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -53,6 +53,7 @@ obj-y += rcu/
obj-y += livepatch/
obj-y += dma/
obj-y += entry/
+obj-y += module/

obj-$(CONFIG_KCMP) += kcmp.o
obj-$(CONFIG_FREEZER) += freezer.o
@@ -66,9 +67,6 @@ ifneq ($(CONFIG_SMP),y)
obj-y += up.o
endif
obj-$(CONFIG_UID16) += uid16.o
-obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_MODULE_SIG) += module_signing.o
-obj-$(CONFIG_MODULE_SIG_FORMAT) += module_signature.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
obj-$(CONFIG_CRASH_CORE) += crash_core.o
diff --git a/kernel/module/Makefile b/kernel/module/Makefile
new file mode 100644
index 000000000000..a9cf6e822075
--- /dev/null
+++ b/kernel/module/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for linux kernel module support
+#
+
+obj-$(CONFIG_MODULES) += main.o
+obj-$(CONFIG_MODULE_SIG) += signing.o
+obj-$(CONFIG_MODULE_SIG_FORMAT) += signature.o
diff --git a/kernel/module-internal.h b/kernel/module/internal.h
similarity index 100%
rename from kernel/module-internal.h
rename to kernel/module/internal.h
diff --git a/kernel/module.c b/kernel/module/main.c
similarity index 99%
rename from kernel/module.c
rename to kernel/module/main.c
index ed13917ea5f3..bc997c3e2c95 100644
--- a/kernel/module.c
+++ b/kernel/module/main.c
@@ -58,7 +58,7 @@
#include <linux/dynamic_debug.h>
#include <linux/audit.h>
#include <uapi/linux/module.h>
-#include "module-internal.h"
+#include "internal.h"

#define CREATE_TRACE_POINTS
#include <trace/events/module.h>
diff --git a/kernel/module_signature.c b/kernel/module/signature.c
similarity index 100%
rename from kernel/module_signature.c
rename to kernel/module/signature.c
diff --git a/kernel/module_signing.c b/kernel/module/signing.c
similarity index 97%
rename from kernel/module_signing.c
rename to kernel/module/signing.c
index 8723ae70ea1f..8aeb6d2ee94b 100644
--- a/kernel/module_signing.c
+++ b/kernel/module/signing.c
@@ -12,7 +12,7 @@
#include <linux/string.h>
#include <linux/verification.h>
#include <crypto/public_key.h>
-#include "module-internal.h"
+#include "internal.h"

/*
* Verify the signature on a module.
--
2.31.1


2021-12-28 21:30:53

by Aaron Tomlin

[permalink] [raw]
Subject: [RFC PATCH 02/12] module: Simple refactor in preparation for split

No functional change.

This patch makes it possible to move non-essential code
out of core module code.

Signed-off-by: Aaron Tomlin <[email protected]>
---
kernel/module/internal.h | 22 ++++++++++++++++++++++
kernel/module/main.c | 23 ++---------------------
2 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index 33783abc377b..ffc50df010a7 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -7,6 +7,28 @@

#include <linux/elf.h>
#include <asm/module.h>
+#include <linux/mutex.h>
+
+#ifndef ARCH_SHF_SMALL
+#define ARCH_SHF_SMALL 0
+#endif
+
+/* If this is set, the section belongs in the init part of the module */
+#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
+/* Maximum number of characters written by module_flags() */
+#define MODULE_FLAGS_BUF_SIZE (TAINT_FLAGS_COUNT + 4)
+#define MODULE_SECT_READ_SIZE (3 /* "0x", "\n" */ + (BITS_PER_LONG / 4))
+
+extern struct mutex module_mutex;
+extern struct list_head modules;
+
+/* Provided by the linker */
+extern const struct kernel_symbol __start___ksymtab[];
+extern const struct kernel_symbol __stop___ksymtab[];
+extern const struct kernel_symbol __start___ksymtab_gpl[];
+extern const struct kernel_symbol __stop___ksymtab_gpl[];
+extern const s32 __start___kcrctab[];
+extern const s32 __start___kcrctab_gpl[];

struct load_info {
const char *name;
diff --git a/kernel/module/main.c b/kernel/module/main.c
index bc997c3e2c95..2a6b859716c0 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -63,10 +63,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/module.h>

-#ifndef ARCH_SHF_SMALL
-#define ARCH_SHF_SMALL 0
-#endif
-
/*
* Modules' sections will be aligned on page boundaries
* to ensure complete separation of code and data, but
@@ -78,9 +74,6 @@
# define debug_align(X) (X)
#endif

-/* If this is set, the section belongs in the init part of the module */
-#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
-
/*
* Mutex protects:
* 1) List of modules (also safely readable with preempt_disable),
@@ -88,8 +81,8 @@
* 3) module_addr_min/module_addr_max.
* (delete and add uses RCU list operations).
*/
-static DEFINE_MUTEX(module_mutex);
-static LIST_HEAD(modules);
+DEFINE_MUTEX(module_mutex);
+LIST_HEAD(modules);

/* Work queue for freeing init sections in success case */
static void do_free_init(struct work_struct *w);
@@ -408,14 +401,6 @@ static __maybe_unused void *any_section_objs(const struct load_info *info,
return (void *)info->sechdrs[sec].sh_addr;
}

-/* Provided by the linker */
-extern const struct kernel_symbol __start___ksymtab[];
-extern const struct kernel_symbol __stop___ksymtab[];
-extern const struct kernel_symbol __start___ksymtab_gpl[];
-extern const struct kernel_symbol __stop___ksymtab_gpl[];
-extern const s32 __start___kcrctab[];
-extern const s32 __start___kcrctab_gpl[];
-
#ifndef CONFIG_MODVERSIONS
#define symversion(base, idx) NULL
#else
@@ -1491,7 +1476,6 @@ struct module_sect_attrs {
struct module_sect_attr attrs[];
};

-#define MODULE_SECT_READ_SIZE (3 /* "0x", "\n" */ + (BITS_PER_LONG / 4))
static ssize_t module_sect_read(struct file *file, struct kobject *kobj,
struct bin_attribute *battr,
char *buf, loff_t pos, size_t count)
@@ -4498,9 +4482,6 @@ static void cfi_cleanup(struct module *mod)
#endif
}

-/* Maximum number of characters written by module_flags() */
-#define MODULE_FLAGS_BUF_SIZE (TAINT_FLAGS_COUNT + 4)
-
/* Keep in sync with MODULE_FLAGS_BUF_SIZE !!! */
static char *module_flags(struct module *mod, char *buf)
{
--
2.31.1


2021-12-28 21:30:54

by Aaron Tomlin

[permalink] [raw]
Subject: [RFC PATCH 03/12] module: Move livepatch support to a separate file

No functional change.

This patch migrates livepatch support (i.e. used during module
add/or load and remove/or deletion) from core module code into
kernel/module/livepatch.c. At the moment it contains code to
persist Elf information about a given livepatch module, only.

Signed-off-by: Aaron Tomlin <[email protected]>
---
kernel/module/Makefile | 1 +
kernel/module/internal.h | 12 ++++++
kernel/module/livepatch.c | 75 +++++++++++++++++++++++++++++++++
kernel/module/main.c | 89 +--------------------------------------
4 files changed, 89 insertions(+), 88 deletions(-)
create mode 100644 kernel/module/livepatch.c

diff --git a/kernel/module/Makefile b/kernel/module/Makefile
index a9cf6e822075..47d70bb18da3 100644
--- a/kernel/module/Makefile
+++ b/kernel/module/Makefile
@@ -6,3 +6,4 @@
obj-$(CONFIG_MODULES) += main.o
obj-$(CONFIG_MODULE_SIG) += signing.o
obj-$(CONFIG_MODULE_SIG_FORMAT) += signature.o
+obj-$(CONFIG_LIVEPATCH) += livepatch.o
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index ffc50df010a7..91ef152aeffb 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -51,3 +51,15 @@ struct load_info {
};

extern int mod_verify_sig(const void *mod, struct load_info *info);
+
+#ifdef CONFIG_LIVEPATCH
+extern int copy_module_elf(struct module *mod, struct load_info *info);
+extern void free_module_elf(struct module *mod);
+extern int check_modinfo_livepatch(struct module *mod, struct load_info *info);
+#else /* !CONFIG_LIVEPATCH */
+static inline int copy_module_elf(struct module *mod, struct load_info *info)
+{
+ return 0;
+}
+static inline void free_module_elf(struct module *mod) { }
+#endif /* CONFIG_LIVEPATCH */
diff --git a/kernel/module/livepatch.c b/kernel/module/livepatch.c
new file mode 100644
index 000000000000..e147f5418327
--- /dev/null
+++ b/kernel/module/livepatch.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * kernel/module/livepatch.c - module livepatch support
+ *
+ * Copyright (C) 2016 Jessica Yu <[email protected]>
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+/*
+ * Persist Elf information about a module. Copy the Elf header,
+ * section header table, section string table, and symtab section
+ * index from info to mod->klp_info.
+ */
+int copy_module_elf(struct module *mod, struct load_info *info)
+{
+ unsigned int size, symndx;
+ int ret;
+
+ size = sizeof(*mod->klp_info);
+ mod->klp_info = kmalloc(size, GFP_KERNEL);
+ if (mod->klp_info == NULL)
+ return -ENOMEM;
+
+ /* Elf header */
+ size = sizeof(mod->klp_info->hdr);
+ memcpy(&mod->klp_info->hdr, info->hdr, size);
+
+ /* Elf section header table */
+ size = sizeof(*info->sechdrs) * info->hdr->e_shnum;
+ mod->klp_info->sechdrs = kmemdup(info->sechdrs, size, GFP_KERNEL);
+ if (mod->klp_info->sechdrs == NULL) {
+ ret = -ENOMEM;
+ goto free_info;
+ }
+
+ /* Elf section name string table */
+ size = info->sechdrs[info->hdr->e_shstrndx].sh_size;
+ mod->klp_info->secstrings = kmemdup(info->secstrings, size, GFP_KERNEL);
+ if (mod->klp_info->secstrings == NULL) {
+ ret = -ENOMEM;
+ goto free_sechdrs;
+ }
+
+ /* Elf symbol section index */
+ symndx = info->index.sym;
+ mod->klp_info->symndx = symndx;
+
+ /*
+ * For livepatch modules, core_kallsyms.symtab is a complete
+ * copy of the original symbol table. Adjust sh_addr to point
+ * to core_kallsyms.symtab since the copy of the symtab in module
+ * init memory is freed at the end of do_init_module().
+ */
+ mod->klp_info->sechdrs[symndx].sh_addr = \
+ (unsigned long) mod->core_kallsyms.symtab;
+
+ return 0;
+
+free_sechdrs:
+ kfree(mod->klp_info->sechdrs);
+free_info:
+ kfree(mod->klp_info);
+ return ret;
+}
+
+void free_module_elf(struct module *mod)
+{
+ kfree(mod->klp_info->sechdrs);
+ kfree(mod->klp_info->secstrings);
+ kfree(mod->klp_info);
+}
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 2a6b859716c0..9bcaf251e109 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -2043,81 +2043,6 @@ static int module_enforce_rwx_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
}
#endif /* CONFIG_STRICT_MODULE_RWX */

-#ifdef CONFIG_LIVEPATCH
-/*
- * Persist Elf information about a module. Copy the Elf header,
- * section header table, section string table, and symtab section
- * index from info to mod->klp_info.
- */
-static int copy_module_elf(struct module *mod, struct load_info *info)
-{
- unsigned int size, symndx;
- int ret;
-
- size = sizeof(*mod->klp_info);
- mod->klp_info = kmalloc(size, GFP_KERNEL);
- if (mod->klp_info == NULL)
- return -ENOMEM;
-
- /* Elf header */
- size = sizeof(mod->klp_info->hdr);
- memcpy(&mod->klp_info->hdr, info->hdr, size);
-
- /* Elf section header table */
- size = sizeof(*info->sechdrs) * info->hdr->e_shnum;
- mod->klp_info->sechdrs = kmemdup(info->sechdrs, size, GFP_KERNEL);
- if (mod->klp_info->sechdrs == NULL) {
- ret = -ENOMEM;
- goto free_info;
- }
-
- /* Elf section name string table */
- size = info->sechdrs[info->hdr->e_shstrndx].sh_size;
- mod->klp_info->secstrings = kmemdup(info->secstrings, size, GFP_KERNEL);
- if (mod->klp_info->secstrings == NULL) {
- ret = -ENOMEM;
- goto free_sechdrs;
- }
-
- /* Elf symbol section index */
- symndx = info->index.sym;
- mod->klp_info->symndx = symndx;
-
- /*
- * For livepatch modules, core_kallsyms.symtab is a complete
- * copy of the original symbol table. Adjust sh_addr to point
- * to core_kallsyms.symtab since the copy of the symtab in module
- * init memory is freed at the end of do_init_module().
- */
- mod->klp_info->sechdrs[symndx].sh_addr = \
- (unsigned long) mod->core_kallsyms.symtab;
-
- return 0;
-
-free_sechdrs:
- kfree(mod->klp_info->sechdrs);
-free_info:
- kfree(mod->klp_info);
- return ret;
-}
-
-static void free_module_elf(struct module *mod)
-{
- kfree(mod->klp_info->sechdrs);
- kfree(mod->klp_info->secstrings);
- kfree(mod->klp_info);
-}
-#else /* !CONFIG_LIVEPATCH */
-static int copy_module_elf(struct module *mod, struct load_info *info)
-{
- return 0;
-}
-
-static void free_module_elf(struct module *mod)
-{
-}
-#endif /* CONFIG_LIVEPATCH */
-
void __weak module_memfree(void *module_region)
{
/*
@@ -3052,19 +2977,7 @@ static int copy_chunked_from_user(void *dst, const void __user *usrc, unsigned l
return 0;
}

-#ifdef CONFIG_LIVEPATCH
-static int check_modinfo_livepatch(struct module *mod, struct load_info *info)
-{
- if (get_modinfo(info, "livepatch")) {
- mod->klp = true;
- add_taint_module(mod, TAINT_LIVEPATCH, LOCKDEP_STILL_OK);
- pr_notice_once("%s: tainting kernel with TAINT_LIVEPATCH\n",
- mod->name);
- }
-
- return 0;
-}
-#else /* !CONFIG_LIVEPATCH */
+#ifndef CONFIG_LIVEPATCH
static int check_modinfo_livepatch(struct module *mod, struct load_info *info)
{
if (get_modinfo(info, "livepatch")) {
--
2.31.1


2021-12-28 21:30:55

by Aaron Tomlin

[permalink] [raw]
Subject: [RFC PATCH 04/12] module: Move latched RB-tree support to a separate file

No functional change.

This patch migrates module latched RB-tree support
(e.g. see __module_address()) from core module code
into kernel/module/tree_lookup.c.

Signed-off-by: Aaron Tomlin <[email protected]>
---
include/linux/module.h | 37 +++++++++-
kernel/module/Makefile | 1 +
kernel/module/main.c | 134 ------------------------------------
kernel/module/tree_lookup.c | 108 +++++++++++++++++++++++++++++
4 files changed, 144 insertions(+), 136 deletions(-)
create mode 100644 kernel/module/tree_lookup.c

diff --git a/include/linux/module.h b/include/linux/module.h
index 8a298d820dbc..8b4d254a84c0 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -339,11 +339,44 @@ struct module_layout {
};

#ifdef CONFIG_MODULES_TREE_LOOKUP
+struct mod_tree_root {
+ struct latch_tree_root root;
+ unsigned long addr_min;
+ unsigned long addr_max;
+} mod_tree __cacheline_aligned = {
+ .addr_min = -1UL,
+};
+
+#define module_addr_min mod_tree.addr_min
+#define module_addr_max mod_tree.addr_max
+
+extern void mod_tree_insert(struct module *mod);
+extern void mod_tree_remove_init(struct module *mod);
+extern void mod_tree_remove(struct module *mod);
+extern struct module *mod_find(unsigned long addr);
/* Only touch one cacheline for common rbtree-for-core-layout case. */
#define __module_layout_align ____cacheline_aligned
-#else
+#else /* !CONFIG_MODULES_TREE_LOOKUP */
+
+static unsigned long module_addr_min = -1UL, module_addr_max = 0;
+
+static void mod_tree_insert(struct module *mod) { }
+static void mod_tree_remove_init(struct module *mod) { }
+static void mod_tree_remove(struct module *mod) { }
+static struct module *mod_find(unsigned long addr)
+{
+ struct module *mod;
+
+ list_for_each_entry_rcu(mod, &modules, list,
+ lockdep_is_held(&module_mutex)) {
+ if (within_module(addr, mod))
+ return mod;
+ }
+
+ return NULL;
+}
#define __module_layout_align
-#endif
+#endif /* CONFIG_MODULES_TREE_LOOKUP */

struct mod_kallsyms {
Elf_Sym *symtab;
diff --git a/kernel/module/Makefile b/kernel/module/Makefile
index 47d70bb18da3..9d593362156d 100644
--- a/kernel/module/Makefile
+++ b/kernel/module/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_MODULES) += main.o
obj-$(CONFIG_MODULE_SIG) += signing.o
obj-$(CONFIG_MODULE_SIG_FORMAT) += signature.o
obj-$(CONFIG_LIVEPATCH) += livepatch.o
+obj-$(CONFIG_MODULES_TREE_LOOKUP) += tree_lookup.o
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 9bcaf251e109..692cc520420d 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -89,140 +89,6 @@ static void do_free_init(struct work_struct *w);
static DECLARE_WORK(init_free_wq, do_free_init);
static LLIST_HEAD(init_free_list);

-#ifdef CONFIG_MODULES_TREE_LOOKUP
-
-/*
- * Use a latched RB-tree for __module_address(); this allows us to use
- * RCU-sched lookups of the address from any context.
- *
- * This is conditional on PERF_EVENTS || TRACING because those can really hit
- * __module_address() hard by doing a lot of stack unwinding; potentially from
- * NMI context.
- */
-
-static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n)
-{
- struct module_layout *layout = container_of(n, struct module_layout, mtn.node);
-
- return (unsigned long)layout->base;
-}
-
-static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n)
-{
- struct module_layout *layout = container_of(n, struct module_layout, mtn.node);
-
- return (unsigned long)layout->size;
-}
-
-static __always_inline bool
-mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b)
-{
- return __mod_tree_val(a) < __mod_tree_val(b);
-}
-
-static __always_inline int
-mod_tree_comp(void *key, struct latch_tree_node *n)
-{
- unsigned long val = (unsigned long)key;
- unsigned long start, end;
-
- start = __mod_tree_val(n);
- if (val < start)
- return -1;
-
- end = start + __mod_tree_size(n);
- if (val >= end)
- return 1;
-
- return 0;
-}
-
-static const struct latch_tree_ops mod_tree_ops = {
- .less = mod_tree_less,
- .comp = mod_tree_comp,
-};
-
-static struct mod_tree_root {
- struct latch_tree_root root;
- unsigned long addr_min;
- unsigned long addr_max;
-} mod_tree __cacheline_aligned = {
- .addr_min = -1UL,
-};
-
-#define module_addr_min mod_tree.addr_min
-#define module_addr_max mod_tree.addr_max
-
-static noinline void __mod_tree_insert(struct mod_tree_node *node)
-{
- latch_tree_insert(&node->node, &mod_tree.root, &mod_tree_ops);
-}
-
-static void __mod_tree_remove(struct mod_tree_node *node)
-{
- latch_tree_erase(&node->node, &mod_tree.root, &mod_tree_ops);
-}
-
-/*
- * These modifications: insert, remove_init and remove; are serialized by the
- * module_mutex.
- */
-static void mod_tree_insert(struct module *mod)
-{
- mod->core_layout.mtn.mod = mod;
- mod->init_layout.mtn.mod = mod;
-
- __mod_tree_insert(&mod->core_layout.mtn);
- if (mod->init_layout.size)
- __mod_tree_insert(&mod->init_layout.mtn);
-}
-
-static void mod_tree_remove_init(struct module *mod)
-{
- if (mod->init_layout.size)
- __mod_tree_remove(&mod->init_layout.mtn);
-}
-
-static void mod_tree_remove(struct module *mod)
-{
- __mod_tree_remove(&mod->core_layout.mtn);
- mod_tree_remove_init(mod);
-}
-
-static struct module *mod_find(unsigned long addr)
-{
- struct latch_tree_node *ltn;
-
- ltn = latch_tree_find((void *)addr, &mod_tree.root, &mod_tree_ops);
- if (!ltn)
- return NULL;
-
- return container_of(ltn, struct mod_tree_node, node)->mod;
-}
-
-#else /* MODULES_TREE_LOOKUP */
-
-static unsigned long module_addr_min = -1UL, module_addr_max = 0;
-
-static void mod_tree_insert(struct module *mod) { }
-static void mod_tree_remove_init(struct module *mod) { }
-static void mod_tree_remove(struct module *mod) { }
-
-static struct module *mod_find(unsigned long addr)
-{
- struct module *mod;
-
- list_for_each_entry_rcu(mod, &modules, list,
- lockdep_is_held(&module_mutex)) {
- if (within_module(addr, mod))
- return mod;
- }
-
- return NULL;
-}
-
-#endif /* MODULES_TREE_LOOKUP */
-
/*
* Bounds of module text, for speeding up __module_address.
* Protected by module_mutex.
diff --git a/kernel/module/tree_lookup.c b/kernel/module/tree_lookup.c
new file mode 100644
index 000000000000..7703687c6e2e
--- /dev/null
+++ b/kernel/module/tree_lookup.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * kernel/module/tree_lookup.c - modules tree lookup
+ *
+ * Copyright (C) 2015 Peter Zijlstra
+ * Copyright (C) 2015 Rusty Russell
+ */
+
+#include <linux/module.h>
+#include <linux/rbtree_latch.h>
+
+/*
+ * Use a latched RB-tree for __module_address(); this allows us to use
+ * RCU-sched lookups of the address from any context.
+ *
+ * This is conditional on PERF_EVENTS || TRACING because those can really hit
+ * __module_address() hard by doing a lot of stack unwinding; potentially from
+ * NMI context.
+ */
+
+__always_inline unsigned long __mod_tree_val(struct latch_tree_node *n)
+{
+ struct module_layout *layout = container_of(n, struct module_layout, mtn.node);
+
+ return (unsigned long)layout->base;
+}
+
+__always_inline unsigned long __mod_tree_size(struct latch_tree_node *n)
+{
+ struct module_layout *layout = container_of(n, struct module_layout, mtn.node);
+
+ return (unsigned long)layout->size;
+}
+
+__always_inline bool
+mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b)
+{
+ return __mod_tree_val(a) < __mod_tree_val(b);
+}
+
+__always_inline int
+mod_tree_comp(void *key, struct latch_tree_node *n)
+{
+ unsigned long val = (unsigned long)key;
+ unsigned long start, end;
+
+ start = __mod_tree_val(n);
+ if (val < start)
+ return -1;
+
+ end = start + __mod_tree_size(n);
+ if (val >= end)
+ return 1;
+
+ return 0;
+}
+
+const struct latch_tree_ops mod_tree_ops = {
+ .less = mod_tree_less,
+ .comp = mod_tree_comp,
+};
+
+noinline void __mod_tree_insert(struct mod_tree_node *node)
+{
+ latch_tree_insert(&node->node, &mod_tree.root, &mod_tree_ops);
+}
+
+void __mod_tree_remove(struct mod_tree_node *node)
+{
+ latch_tree_erase(&node->node, &mod_tree.root, &mod_tree_ops);
+}
+
+/*
+ * These modifications: insert, remove_init and remove; are serialized by the
+ * module_mutex.
+ */
+void mod_tree_insert(struct module *mod)
+{
+ mod->core_layout.mtn.mod = mod;
+ mod->init_layout.mtn.mod = mod;
+
+ __mod_tree_insert(&mod->core_layout.mtn);
+ if (mod->init_layout.size)
+ __mod_tree_insert(&mod->init_layout.mtn);
+}
+
+void mod_tree_remove_init(struct module *mod)
+{
+ if (mod->init_layout.size)
+ __mod_tree_remove(&mod->init_layout.mtn);
+}
+
+void mod_tree_remove(struct module *mod)
+{
+ __mod_tree_remove(&mod->core_layout.mtn);
+ mod_tree_remove_init(mod);
+}
+
+struct module *mod_find(unsigned long addr)
+{
+ struct latch_tree_node *ltn;
+
+ ltn = latch_tree_find((void *)addr, &mod_tree.root, &mod_tree_ops);
+ if (!ltn)
+ return NULL;
+
+ return container_of(ltn, struct mod_tree_node, node)->mod;
+}
--
2.31.1


2021-12-28 21:30:59

by Aaron Tomlin

[permalink] [raw]
Subject: [RFC PATCH 05/12] module: Move arch strict rwx support to a separate file

No functional change.

This patch migrates applicable architecture code
that support strict module rwx from core module code
into kernel/module/arch_strict_rwx.c

Signed-off-by: Aaron Tomlin <[email protected]>
---
include/linux/module.h | 16 +++++++++++
kernel/module/Makefile | 1 +
kernel/module/arch_strict_rwx.c | 44 ++++++++++++++++++++++++++++
kernel/module/main.c | 51 ---------------------------------
4 files changed, 61 insertions(+), 51 deletions(-)
create mode 100644 kernel/module/arch_strict_rwx.c

diff --git a/include/linux/module.h b/include/linux/module.h
index 8b4d254a84c0..caa7212cf754 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -338,6 +338,22 @@ struct module_layout {
#endif
};

+/*
+ * Modules' sections will be aligned on page boundaries
+ * to ensure complete separation of code and data, but
+ * only when CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y
+ */
+#ifdef CONFIG_ARCH_HAS_STRICT_MODULE_RWX
+# define debug_align(X) ALIGN(X, PAGE_SIZE)
+
+extern void frob_text(const struct module_layout *layout, int (*set_memory)(unsigned long start, int num_pages));
+extern void module_enable_x(const struct module *mod);
+#else /* !CONFIG_ARCH_HAS_STRICT_MODULE_RWX */
+# define debug_align(X) (X)
+
+static void module_enable_x(const struct module *mod) { }
+#endif /* CONFIG_ARCH_HAS_STRICT_MODULE_RWX */
+
#ifdef CONFIG_MODULES_TREE_LOOKUP
struct mod_tree_root {
struct latch_tree_root root;
diff --git a/kernel/module/Makefile b/kernel/module/Makefile
index 9d593362156d..95fad95a0549 100644
--- a/kernel/module/Makefile
+++ b/kernel/module/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_MODULE_SIG) += signing.o
obj-$(CONFIG_MODULE_SIG_FORMAT) += signature.o
obj-$(CONFIG_LIVEPATCH) += livepatch.o
obj-$(CONFIG_MODULES_TREE_LOOKUP) += tree_lookup.o
+obj-$(CONFIG_ARCH_HAS_STRICT_MODULE_RWX) += arch_strict_rwx.o
diff --git a/kernel/module/arch_strict_rwx.c b/kernel/module/arch_strict_rwx.c
new file mode 100644
index 000000000000..68e970671ec5
--- /dev/null
+++ b/kernel/module/arch_strict_rwx.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * kernel/module/arch_strict_rwx.c - module arch strict rwx
+ *
+ * Copyright (C) 2015 Rusty Russell
+ */
+
+#include <linux/module.h>
+#include <linux/set_memory.h>
+
+/*
+ * LKM RO/NX protection: protect module's text/ro-data
+ * from modification and any data from execution.
+ *
+ * General layout of module is:
+ * [text] [read-only-data] [ro-after-init] [writable data]
+ * text_size -----^ ^ ^ ^
+ * ro_size ------------------------| | |
+ * ro_after_init_size -----------------------------| |
+ * size -----------------------------------------------------------|
+ *
+ * These values are always page-aligned (as is base)
+ */
+
+/*
+ * Since some arches are moving towards PAGE_KERNEL module allocations instead
+ * of PAGE_KERNEL_EXEC, keep frob_text() and module_enable_x() outside of the
+ * CONFIG_STRICT_MODULE_RWX block below because they are needed regardless of
+ * whether we are strict.
+ */
+void frob_text(const struct module_layout *layout,
+ int (*set_memory)(unsigned long start, int num_pages))
+{
+ BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1));
+ BUG_ON((unsigned long)layout->text_size & (PAGE_SIZE-1));
+ set_memory((unsigned long)layout->base,
+ layout->text_size >> PAGE_SHIFT);
+}
+
+void module_enable_x(const struct module *mod)
+{
+ frob_text(&mod->core_layout, set_memory_x);
+ frob_text(&mod->init_layout, set_memory_x);
+}
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 692cc520420d..a0619256b343 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -13,7 +13,6 @@
#include <linux/trace_events.h>
#include <linux/init.h>
#include <linux/kallsyms.h>
-#include <linux/buildid.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/sysfs.h>
@@ -63,17 +62,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/module.h>

-/*
- * Modules' sections will be aligned on page boundaries
- * to ensure complete separation of code and data, but
- * only when CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y
- */
-#ifdef CONFIG_ARCH_HAS_STRICT_MODULE_RWX
-# define debug_align(X) ALIGN(X, PAGE_SIZE)
-#else
-# define debug_align(X) (X)
-#endif
-
/*
* Mutex protects:
* 1) List of modules (also safely readable with preempt_disable),
@@ -1786,45 +1774,6 @@ static void mod_sysfs_teardown(struct module *mod)
mod_sysfs_fini(mod);
}

-/*
- * LKM RO/NX protection: protect module's text/ro-data
- * from modification and any data from execution.
- *
- * General layout of module is:
- * [text] [read-only-data] [ro-after-init] [writable data]
- * text_size -----^ ^ ^ ^
- * ro_size ------------------------| | |
- * ro_after_init_size -----------------------------| |
- * size -----------------------------------------------------------|
- *
- * These values are always page-aligned (as is base)
- */
-
-/*
- * Since some arches are moving towards PAGE_KERNEL module allocations instead
- * of PAGE_KERNEL_EXEC, keep frob_text() and module_enable_x() outside of the
- * CONFIG_STRICT_MODULE_RWX block below because they are needed regardless of
- * whether we are strict.
- */
-#ifdef CONFIG_ARCH_HAS_STRICT_MODULE_RWX
-static void frob_text(const struct module_layout *layout,
- int (*set_memory)(unsigned long start, int num_pages))
-{
- BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1));
- BUG_ON((unsigned long)layout->text_size & (PAGE_SIZE-1));
- set_memory((unsigned long)layout->base,
- layout->text_size >> PAGE_SHIFT);
-}
-
-static void module_enable_x(const struct module *mod)
-{
- frob_text(&mod->core_layout, set_memory_x);
- frob_text(&mod->init_layout, set_memory_x);
-}
-#else /* !CONFIG_ARCH_HAS_STRICT_MODULE_RWX */
-static void module_enable_x(const struct module *mod) { }
-#endif /* CONFIG_ARCH_HAS_STRICT_MODULE_RWX */
-
#ifdef CONFIG_STRICT_MODULE_RWX
static void frob_rodata(const struct module_layout *layout,
int (*set_memory)(unsigned long start, int num_pages))
--
2.31.1


2021-12-28 21:31:02

by Aaron Tomlin

[permalink] [raw]
Subject: [RFC PATCH 06/12] module: Move strict rwx support to a separate file

No functional change.

This patch migrates code that makes module text
and rodata memory read-only and non-text memory
non-executable from core module code into
kernel/module/strict_rwx.c.

Signed-off-by: Aaron Tomlin <[email protected]>
---
include/linux/module.h | 18 ++++++++
kernel/module/Makefile | 1 +
kernel/module/main.c | 84 --------------------------------------
kernel/module/strict_rwx.c | 83 +++++++++++++++++++++++++++++++++++++
4 files changed, 102 insertions(+), 84 deletions(-)
create mode 100644 kernel/module/strict_rwx.c

diff --git a/include/linux/module.h b/include/linux/module.h
index caa7212cf754..218ac6768433 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -354,6 +354,24 @@ extern void module_enable_x(const struct module *mod);
static void module_enable_x(const struct module *mod) { }
#endif /* CONFIG_ARCH_HAS_STRICT_MODULE_RWX */

+#ifdef CONFIG_STRICT_MODULE_RWX
+extern void frob_rodata(const struct module_layout *layout, int (*set_memory)(unsigned long start, int num_pages));
+extern void frob_ro_after_init(const struct module_layout *layout, int (*set_memory)(unsigned long start, int num_pages));
+extern void frob_writable_data(const struct module_layout *layout, int (*set_memory)(unsigned long start, int num_pages));
+extern void module_enable_ro(const struct module *mod, bool after_init);
+extern void module_enable_nx(const struct module *mod);
+extern int module_enforce_rwx_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod);
+
+#else /* !CONFIG_STRICT_MODULE_RWX */
+static void module_enable_nx(const struct module *mod) { }
+static void module_enable_ro(const struct module *mod, bool after_init) {}
+static int module_enforce_rwx_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+ char *secstrings, struct module *mod)
+{
+ return 0;
+}
+#endif /* CONFIG_STRICT_MODULE_RWX */
+
#ifdef CONFIG_MODULES_TREE_LOOKUP
struct mod_tree_root {
struct latch_tree_root root;
diff --git a/kernel/module/Makefile b/kernel/module/Makefile
index 95fad95a0549..795fe10ac530 100644
--- a/kernel/module/Makefile
+++ b/kernel/module/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_MODULE_SIG_FORMAT) += signature.o
obj-$(CONFIG_LIVEPATCH) += livepatch.o
obj-$(CONFIG_MODULES_TREE_LOOKUP) += tree_lookup.o
obj-$(CONFIG_ARCH_HAS_STRICT_MODULE_RWX) += arch_strict_rwx.o
+obj-$(CONFIG_STRICT_MODULE_RWX) += strict_rwx.o
diff --git a/kernel/module/main.c b/kernel/module/main.c
index a0619256b343..c404d00f7958 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1774,90 +1774,6 @@ static void mod_sysfs_teardown(struct module *mod)
mod_sysfs_fini(mod);
}

-#ifdef CONFIG_STRICT_MODULE_RWX
-static void frob_rodata(const struct module_layout *layout,
- int (*set_memory)(unsigned long start, int num_pages))
-{
- BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1));
- BUG_ON((unsigned long)layout->text_size & (PAGE_SIZE-1));
- BUG_ON((unsigned long)layout->ro_size & (PAGE_SIZE-1));
- set_memory((unsigned long)layout->base + layout->text_size,
- (layout->ro_size - layout->text_size) >> PAGE_SHIFT);
-}
-
-static void frob_ro_after_init(const struct module_layout *layout,
- int (*set_memory)(unsigned long start, int num_pages))
-{
- BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1));
- BUG_ON((unsigned long)layout->ro_size & (PAGE_SIZE-1));
- BUG_ON((unsigned long)layout->ro_after_init_size & (PAGE_SIZE-1));
- set_memory((unsigned long)layout->base + layout->ro_size,
- (layout->ro_after_init_size - layout->ro_size) >> PAGE_SHIFT);
-}
-
-static void frob_writable_data(const struct module_layout *layout,
- int (*set_memory)(unsigned long start, int num_pages))
-{
- BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1));
- BUG_ON((unsigned long)layout->ro_after_init_size & (PAGE_SIZE-1));
- BUG_ON((unsigned long)layout->size & (PAGE_SIZE-1));
- set_memory((unsigned long)layout->base + layout->ro_after_init_size,
- (layout->size - layout->ro_after_init_size) >> PAGE_SHIFT);
-}
-
-static void module_enable_ro(const struct module *mod, bool after_init)
-{
- if (!rodata_enabled)
- return;
-
- set_vm_flush_reset_perms(mod->core_layout.base);
- set_vm_flush_reset_perms(mod->init_layout.base);
- frob_text(&mod->core_layout, set_memory_ro);
-
- frob_rodata(&mod->core_layout, set_memory_ro);
- frob_text(&mod->init_layout, set_memory_ro);
- frob_rodata(&mod->init_layout, set_memory_ro);
-
- if (after_init)
- frob_ro_after_init(&mod->core_layout, set_memory_ro);
-}
-
-static void module_enable_nx(const struct module *mod)
-{
- frob_rodata(&mod->core_layout, set_memory_nx);
- frob_ro_after_init(&mod->core_layout, set_memory_nx);
- frob_writable_data(&mod->core_layout, set_memory_nx);
- frob_rodata(&mod->init_layout, set_memory_nx);
- frob_writable_data(&mod->init_layout, set_memory_nx);
-}
-
-static int module_enforce_rwx_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
- char *secstrings, struct module *mod)
-{
- const unsigned long shf_wx = SHF_WRITE|SHF_EXECINSTR;
- int i;
-
- for (i = 0; i < hdr->e_shnum; i++) {
- if ((sechdrs[i].sh_flags & shf_wx) == shf_wx) {
- pr_err("%s: section %s (index %d) has invalid WRITE|EXEC flags\n",
- mod->name, secstrings + sechdrs[i].sh_name, i);
- return -ENOEXEC;
- }
- }
-
- return 0;
-}
-
-#else /* !CONFIG_STRICT_MODULE_RWX */
-static void module_enable_nx(const struct module *mod) { }
-static void module_enable_ro(const struct module *mod, bool after_init) {}
-static int module_enforce_rwx_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
- char *secstrings, struct module *mod)
-{
- return 0;
-}
-#endif /* CONFIG_STRICT_MODULE_RWX */
-
void __weak module_memfree(void *module_region)
{
/*
diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c
new file mode 100644
index 000000000000..8a513ced02c6
--- /dev/null
+++ b/kernel/module/strict_rwx.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * kernel/module/strict_rwx.c - module strict rwx
+ *
+ * Copyright (C) 2015 Rusty Russell
+ */
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/set_memory.h>
+
+void frob_rodata(const struct module_layout *layout,
+ int (*set_memory)(unsigned long start, int num_pages))
+{
+ BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1));
+ BUG_ON((unsigned long)layout->text_size & (PAGE_SIZE-1));
+ BUG_ON((unsigned long)layout->ro_size & (PAGE_SIZE-1));
+ set_memory((unsigned long)layout->base + layout->text_size,
+ (layout->ro_size - layout->text_size) >> PAGE_SHIFT);
+}
+
+void frob_ro_after_init(const struct module_layout *layout,
+ int (*set_memory)(unsigned long start, int num_pages))
+{
+ BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1));
+ BUG_ON((unsigned long)layout->ro_size & (PAGE_SIZE-1));
+ BUG_ON((unsigned long)layout->ro_after_init_size & (PAGE_SIZE-1));
+ set_memory((unsigned long)layout->base + layout->ro_size,
+ (layout->ro_after_init_size - layout->ro_size) >> PAGE_SHIFT);
+}
+
+void frob_writable_data(const struct module_layout *layout,
+ int (*set_memory)(unsigned long start, int num_pages))
+{
+ BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1));
+ BUG_ON((unsigned long)layout->ro_after_init_size & (PAGE_SIZE-1));
+ BUG_ON((unsigned long)layout->size & (PAGE_SIZE-1));
+ set_memory((unsigned long)layout->base + layout->ro_after_init_size,
+ (layout->size - layout->ro_after_init_size) >> PAGE_SHIFT);
+}
+
+void module_enable_ro(const struct module *mod, bool after_init)
+{
+ if (!rodata_enabled)
+ return;
+
+ set_vm_flush_reset_perms(mod->core_layout.base);
+ set_vm_flush_reset_perms(mod->init_layout.base);
+ frob_text(&mod->core_layout, set_memory_ro);
+
+ frob_rodata(&mod->core_layout, set_memory_ro);
+ frob_text(&mod->init_layout, set_memory_ro);
+ frob_rodata(&mod->init_layout, set_memory_ro);
+
+ if (after_init)
+ frob_ro_after_init(&mod->core_layout, set_memory_ro);
+}
+
+void module_enable_nx(const struct module *mod)
+{
+ frob_rodata(&mod->core_layout, set_memory_nx);
+ frob_ro_after_init(&mod->core_layout, set_memory_nx);
+ frob_writable_data(&mod->core_layout, set_memory_nx);
+ frob_rodata(&mod->init_layout, set_memory_nx);
+ frob_writable_data(&mod->init_layout, set_memory_nx);
+}
+
+int module_enforce_rwx_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+ char *secstrings, struct module *mod)
+{
+ const unsigned long shf_wx = SHF_WRITE|SHF_EXECINSTR;
+ int i;
+
+ for (i = 0; i < hdr->e_shnum; i++) {
+ if ((sechdrs[i].sh_flags & shf_wx) == shf_wx) {
+ pr_err("%s: section %s (index %d) has invalid WRITE|EXEC flags\n",
+ mod->name, secstrings + sechdrs[i].sh_name, i);
+ return -ENOEXEC;
+ }
+ }
+
+ return 0;
+}
--
2.31.1


2021-12-28 21:31:03

by Aaron Tomlin

[permalink] [raw]
Subject: [RFC PATCH 08/12] module: Move kmemleak support to a separate file

No functional change.

This patch migrates kmemleak code out of core module
code into kernel/module/debug_kmemleak.c

Signed-off-by: Aaron Tomlin <[email protected]>
---
kernel/module/Makefile | 1 +
kernel/module/debug_kmemleak.c | 30 ++++++++++++++++++++++++++++++
kernel/module/internal.h | 7 +++++++
kernel/module/main.c | 27 ---------------------------
4 files changed, 38 insertions(+), 27 deletions(-)
create mode 100644 kernel/module/debug_kmemleak.c

diff --git a/kernel/module/Makefile b/kernel/module/Makefile
index 795fe10ac530..2e03da799833 100644
--- a/kernel/module/Makefile
+++ b/kernel/module/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_LIVEPATCH) += livepatch.o
obj-$(CONFIG_MODULES_TREE_LOOKUP) += tree_lookup.o
obj-$(CONFIG_ARCH_HAS_STRICT_MODULE_RWX) += arch_strict_rwx.o
obj-$(CONFIG_STRICT_MODULE_RWX) += strict_rwx.o
+obj-$(CONFIG_DEBUG_KMEMLEAK) += debug_kmemleak.o
diff --git a/kernel/module/debug_kmemleak.c b/kernel/module/debug_kmemleak.c
new file mode 100644
index 000000000000..818c9d168aed
--- /dev/null
+++ b/kernel/module/debug_kmemleak.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * kernel/module/debug_kmemleak.c - module kmemleak support
+ *
+ * Copyright (C) 2009 Catalin Marinas
+ */
+
+#include <linux/module.h>
+#include <linux/kmemleak.h>
+#include "internal.h"
+
+void kmemleak_load_module(const struct module *mod,
+ const struct load_info *info)
+{
+ unsigned int i;
+
+ /* only scan the sections containing data */
+ kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL);
+
+ for (i = 1; i < info->hdr->e_shnum; i++) {
+ /* Scan all writable sections that's not executable */
+ if (!(info->sechdrs[i].sh_flags & SHF_ALLOC) ||
+ !(info->sechdrs[i].sh_flags & SHF_WRITE) ||
+ (info->sechdrs[i].sh_flags & SHF_EXECINSTR))
+ continue;
+
+ kmemleak_scan_area((void *)info->sechdrs[i].sh_addr,
+ info->sechdrs[i].sh_size, GFP_KERNEL);
+ }
+}
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index b4db57bafcd3..31d767416f0c 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -6,6 +6,7 @@
*/

#include <linux/elf.h>
+#include <linux/compiler.h>
#include <asm/module.h>
#include <linux/mutex.h>

@@ -72,3 +73,9 @@ static int module_sig_check(struct load_info *info, int flags)
return 0;
}
#endif /* !CONFIG_MODULE_SIG */
+
+#ifdef CONFIG_DEBUG_KMEMLEAK
+extern void kmemleak_load_module(const struct module *mod, const struct load_info *info);
+#else /* !CONFIG_DEBUG_KMEMLEAK */
+static inline void __maybe_unused kmemleak_load_module(const struct module *mod, const struct load_info *info) { }
+#endif /* CONFIG_DEBUG_KMEMLEAK */
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 8f8a904d5ba7..672a977b1320 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -2467,33 +2467,6 @@ bool __weak module_exit_section(const char *name)
return strstarts(name, ".exit");
}

-#ifdef CONFIG_DEBUG_KMEMLEAK
-static void kmemleak_load_module(const struct module *mod,
- const struct load_info *info)
-{
- unsigned int i;
-
- /* only scan the sections containing data */
- kmemleak_scan_area(mod, sizeof(struct module), GFP_KERNEL);
-
- for (i = 1; i < info->hdr->e_shnum; i++) {
- /* Scan all writable sections that's not executable */
- if (!(info->sechdrs[i].sh_flags & SHF_ALLOC) ||
- !(info->sechdrs[i].sh_flags & SHF_WRITE) ||
- (info->sechdrs[i].sh_flags & SHF_EXECINSTR))
- continue;
-
- kmemleak_scan_area((void *)info->sechdrs[i].sh_addr,
- info->sechdrs[i].sh_size, GFP_KERNEL);
- }
-}
-#else
-static inline void kmemleak_load_module(const struct module *mod,
- const struct load_info *info)
-{
-}
-#endif
-
static int validate_section_offset(struct load_info *info, Elf_Shdr *shdr)
{
unsigned long secend;
--
2.31.1


2021-12-28 21:31:06

by Aaron Tomlin

[permalink] [raw]
Subject: [RFC PATCH 10/12] module: Move procfs support into a separate file

No functional change.

This patch migrates code that allows one to generate a
list of loaded/or linked modules via /proc when procfs
support is enabled into kernel/module/procfs.c.

Signed-off-by: Aaron Tomlin <[email protected]>
---
kernel/module/Makefile | 1 +
kernel/module/main.c | 98 -------------------------------------
kernel/module/procfs.c | 108 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 109 insertions(+), 98 deletions(-)
create mode 100644 kernel/module/procfs.c

diff --git a/kernel/module/Makefile b/kernel/module/Makefile
index 23582011ab08..cba3e608b4ca 100644
--- a/kernel/module/Makefile
+++ b/kernel/module/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_ARCH_HAS_STRICT_MODULE_RWX) += arch_strict_rwx.o
obj-$(CONFIG_STRICT_MODULE_RWX) += strict_rwx.o
obj-$(CONFIG_DEBUG_KMEMLEAK) += debug_kmemleak.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
+obj-$(CONFIG_PROC_FS) += procfs.o
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 9813e1672d8c..2da580c7b069 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -21,7 +21,6 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/elf.h>
-#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/syscalls.h>
#include <linux/fcntl.h>
@@ -3529,103 +3528,6 @@ static char *module_flags(struct module *mod, char *buf)
return buf;
}

-#ifdef CONFIG_PROC_FS
-/* Called by the /proc file system to return a list of modules. */
-static void *m_start(struct seq_file *m, loff_t *pos)
-{
- mutex_lock(&module_mutex);
- return seq_list_start(&modules, *pos);
-}
-
-static void *m_next(struct seq_file *m, void *p, loff_t *pos)
-{
- return seq_list_next(p, &modules, pos);
-}
-
-static void m_stop(struct seq_file *m, void *p)
-{
- mutex_unlock(&module_mutex);
-}
-
-static int m_show(struct seq_file *m, void *p)
-{
- struct module *mod = list_entry(p, struct module, list);
- char buf[MODULE_FLAGS_BUF_SIZE];
- void *value;
-
- /* We always ignore unformed modules. */
- if (mod->state == MODULE_STATE_UNFORMED)
- return 0;
-
- seq_printf(m, "%s %u",
- mod->name, mod->init_layout.size + mod->core_layout.size);
- print_unload_info(m, mod);
-
- /* Informative for users. */
- seq_printf(m, " %s",
- mod->state == MODULE_STATE_GOING ? "Unloading" :
- mod->state == MODULE_STATE_COMING ? "Loading" :
- "Live");
- /* Used by oprofile and other similar tools. */
- value = m->private ? NULL : mod->core_layout.base;
- seq_printf(m, " 0x%px", value);
-
- /* Taints info */
- if (mod->taints)
- seq_printf(m, " %s", module_flags(mod, buf));
-
- seq_puts(m, "\n");
- return 0;
-}
-
-/*
- * Format: modulename size refcount deps address
- *
- * Where refcount is a number or -, and deps is a comma-separated list
- * of depends or -.
- */
-static const struct seq_operations modules_op = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = m_show
-};
-
-/*
- * This also sets the "private" pointer to non-NULL if the
- * kernel pointers should be hidden (so you can just test
- * "m->private" to see if you should keep the values private).
- *
- * We use the same logic as for /proc/kallsyms.
- */
-static int modules_open(struct inode *inode, struct file *file)
-{
- int err = seq_open(file, &modules_op);
-
- if (!err) {
- struct seq_file *m = file->private_data;
- m->private = kallsyms_show_value(file->f_cred) ? NULL : (void *)8ul;
- }
-
- return err;
-}
-
-static const struct proc_ops modules_proc_ops = {
- .proc_flags = PROC_ENTRY_PERMANENT,
- .proc_open = modules_open,
- .proc_read = seq_read,
- .proc_lseek = seq_lseek,
- .proc_release = seq_release,
-};
-
-static int __init proc_modules_init(void)
-{
- proc_create("modules", 0, NULL, &modules_proc_ops);
- return 0;
-}
-module_init(proc_modules_init);
-#endif
-
/* Given an address, look for it in the module exception tables. */
const struct exception_table_entry *search_module_extables(unsigned long addr)
{
diff --git a/kernel/module/procfs.c b/kernel/module/procfs.c
new file mode 100644
index 000000000000..98d898250475
--- /dev/null
+++ b/kernel/module/procfs.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * kernel/module/proc.c - module proc support
+ *
+ * Copyright (C) 2008 Alexey Dobriyan
+ */
+
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/mutex.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include "internal.h"
+
+/* Called by the /proc file system to return a list of modules. */
+static void *m_start(struct seq_file *m, loff_t *pos)
+{
+ mutex_lock(&module_mutex);
+ return seq_list_start(&modules, *pos);
+}
+
+static void *m_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ return seq_list_next(p, &modules, pos);
+}
+
+static void m_stop(struct seq_file *m, void *p)
+{
+ mutex_unlock(&module_mutex);
+}
+
+static int m_show(struct seq_file *m, void *p)
+{
+ struct module *mod = list_entry(p, struct module, list);
+ char buf[MODULE_FLAGS_BUF_SIZE];
+ void *value;
+
+ /* We always ignore unformed modules. */
+ if (mod->state == MODULE_STATE_UNFORMED)
+ return 0;
+
+ seq_printf(m, "%s %u",
+ mod->name, mod->init_layout.size + mod->core_layout.size);
+ print_unload_info(m, mod);
+
+ /* Informative for users. */
+ seq_printf(m, " %s",
+ mod->state == MODULE_STATE_GOING ? "Unloading" :
+ mod->state == MODULE_STATE_COMING ? "Loading" :
+ "Live");
+ /* Used by oprofile and other similar tools. */
+ value = m->private ? NULL : mod->core_layout.base;
+ seq_printf(m, " 0x%px", value);
+
+ /* Taints info */
+ if (mod->taints)
+ seq_printf(m, " %s", module_flags(mod, buf));
+
+ seq_puts(m, "\n");
+ return 0;
+}
+
+/*
+ * Format: modulename size refcount deps address
+ *
+ * Where refcount is a number or -, and deps is a comma-separated list
+ * of depends or -.
+ */
+static const struct seq_operations modules_op = {
+ .start = m_start,
+ .next = m_next,
+ .stop = m_stop,
+ .show = m_show
+};
+
+/*
+ * This also sets the "private" pointer to non-NULL if the
+ * kernel pointers should be hidden (so you can just test
+ * "m->private" to see if you should keep the values private).
+ *
+ * We use the same logic as for /proc/kallsyms.
+ */
+static int modules_open(struct inode *inode, struct file *file)
+{
+ int err = seq_open(file, &modules_op);
+
+ if (!err) {
+ struct seq_file *m = file->private_data;
+ m->private = kallsyms_show_value(file->f_cred) ? NULL : (void *)8ul;
+ }
+
+ return err;
+}
+
+static const struct proc_ops modules_proc_ops = {
+ .proc_flags = PROC_ENTRY_PERMANENT,
+ .proc_open = modules_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = seq_release,
+};
+
+static int __init proc_modules_init(void)
+{
+ proc_create("modules", 0, NULL, &modules_proc_ops);
+ return 0;
+}
+module_init(proc_modules_init);
--
2.31.1


2021-12-28 21:31:09

by Aaron Tomlin

[permalink] [raw]
Subject: [RFC PATCH 07/12] module: Move extra signature support out of core code

No functional change.

This patch migrates additional module signature check
code from core module code into kernel/module/signing.c.

Signed-off-by: Aaron Tomlin <[email protected]>
---
include/linux/module.h | 5 ++-
kernel/module/internal.h | 9 +++++
kernel/module/main.c | 86 ----------------------------------------
kernel/module/signing.c | 75 +++++++++++++++++++++++++++++++++++
4 files changed, 87 insertions(+), 88 deletions(-)

diff --git a/include/linux/module.h b/include/linux/module.h
index 218ac6768433..3383912268af 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -736,8 +736,8 @@ static inline bool is_livepatch_module(struct module *mod)
}
#endif /* CONFIG_LIVEPATCH */

-bool is_module_sig_enforced(void);
-void set_module_sig_enforced(void);
+extern bool is_module_sig_enforced(void);
+extern void set_module_sig_enforced(void);

#else /* !CONFIG_MODULES... */

@@ -927,6 +927,7 @@ static inline bool module_sig_ok(struct module *module)
{
return true;
}
+#define sig_enforce false
#endif /* CONFIG_MODULE_SIG */

int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index 91ef152aeffb..b4db57bafcd3 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -63,3 +63,12 @@ static inline int copy_module_elf(struct module *mod, struct load_info *info)
}
static inline void free_module_elf(struct module *mod) { }
#endif /* CONFIG_LIVEPATCH */
+
+#ifdef CONFIG_MODULE_SIG
+extern int module_sig_check(struct load_info *info, int flags);
+#else /* !CONFIG_MODULE_SIG */
+static int module_sig_check(struct load_info *info, int flags)
+{
+ return 0;
+}
+#endif /* !CONFIG_MODULE_SIG */
diff --git a/kernel/module/main.c b/kernel/module/main.c
index c404d00f7958..8f8a904d5ba7 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -22,7 +22,6 @@
#include <linux/vmalloc.h>
#include <linux/elf.h>
#include <linux/proc_fs.h>
-#include <linux/security.h>
#include <linux/seq_file.h>
#include <linux/syscalls.h>
#include <linux/fcntl.h>
@@ -114,28 +113,6 @@ static void module_assert_mutex_or_preempt(void)
#endif
}

-#ifdef CONFIG_MODULE_SIG
-static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE);
-module_param(sig_enforce, bool_enable_only, 0644);
-
-void set_module_sig_enforced(void)
-{
- sig_enforce = true;
-}
-#else
-#define sig_enforce false
-#endif
-
-/*
- * Export sig_enforce kernel cmdline parameter to allow other subsystems rely
- * on that instead of directly to CONFIG_MODULE_SIG_FORCE config.
- */
-bool is_module_sig_enforced(void)
-{
- return sig_enforce;
-}
-EXPORT_SYMBOL(is_module_sig_enforced);
-
/* Block module loading/unloading? */
int modules_disabled = 0;
core_param(nomodule, modules_disabled, bint, 0);
@@ -2517,69 +2494,6 @@ static inline void kmemleak_load_module(const struct module *mod,
}
#endif

-#ifdef CONFIG_MODULE_SIG
-static int module_sig_check(struct load_info *info, int flags)
-{
- int err = -ENODATA;
- const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
- const char *reason;
- const void *mod = info->hdr;
-
- /*
- * Require flags == 0, as a module with version information
- * removed is no longer the module that was signed
- */
- if (flags == 0 &&
- info->len > markerlen &&
- memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
- /* We truncate the module to discard the signature */
- info->len -= markerlen;
- err = mod_verify_sig(mod, info);
- if (!err) {
- info->sig_ok = true;
- return 0;
- }
- }
-
- /*
- * We don't permit modules to be loaded into the trusted kernels
- * without a valid signature on them, but if we're not enforcing,
- * certain errors are non-fatal.
- */
- switch (err) {
- case -ENODATA:
- reason = "unsigned module";
- break;
- case -ENOPKG:
- reason = "module with unsupported crypto";
- break;
- case -ENOKEY:
- reason = "module with unavailable key";
- break;
-
- default:
- /*
- * All other errors are fatal, including lack of memory,
- * unparseable signatures, and signature check failures --
- * even if signatures aren't required.
- */
- return err;
- }
-
- if (is_module_sig_enforced()) {
- pr_notice("Loading of %s is rejected\n", reason);
- return -EKEYREJECTED;
- }
-
- return security_locked_down(LOCKDOWN_MODULE_SIGNATURE);
-}
-#else /* !CONFIG_MODULE_SIG */
-static int module_sig_check(struct load_info *info, int flags)
-{
- return 0;
-}
-#endif /* !CONFIG_MODULE_SIG */
-
static int validate_section_offset(struct load_info *info, Elf_Shdr *shdr)
{
unsigned long secend;
diff --git a/kernel/module/signing.c b/kernel/module/signing.c
index 8aeb6d2ee94b..ff41541e982a 100644
--- a/kernel/module/signing.c
+++ b/kernel/module/signing.c
@@ -11,9 +11,28 @@
#include <linux/module_signature.h>
#include <linux/string.h>
#include <linux/verification.h>
+#include <linux/security.h>
#include <crypto/public_key.h>
#include "internal.h"

+static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE);
+module_param(sig_enforce, bool_enable_only, 0644);
+
+/*
+ * Export sig_enforce kernel cmdline parameter to allow other subsystems rely
+ * on that instead of directly to CONFIG_MODULE_SIG_FORCE config.
+ */
+bool is_module_sig_enforced(void)
+{
+ return sig_enforce;
+}
+EXPORT_SYMBOL(is_module_sig_enforced);
+
+void set_module_sig_enforced(void)
+{
+ sig_enforce = true;
+}
+
/*
* Verify the signature on a module.
*/
@@ -43,3 +62,59 @@ int mod_verify_sig(const void *mod, struct load_info *info)
VERIFYING_MODULE_SIGNATURE,
NULL, NULL);
}
+
+int module_sig_check(struct load_info *info, int flags)
+{
+ int err = -ENODATA;
+ const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
+ const char *reason;
+ const void *mod = info->hdr;
+
+ /*
+ * Require flags == 0, as a module with version information
+ * removed is no longer the module that was signed
+ */
+ if (flags == 0 &&
+ info->len > markerlen &&
+ memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
+ /* We truncate the module to discard the signature */
+ info->len -= markerlen;
+ err = mod_verify_sig(mod, info);
+ if (!err) {
+ info->sig_ok = true;
+ return 0;
+ }
+ }
+
+ /*
+ * We don't permit modules to be loaded into the trusted kernels
+ * without a valid signature on them, but if we're not enforcing,
+ * certain errors are non-fatal.
+ */
+ switch (err) {
+ case -ENODATA:
+ reason = "unsigned module";
+ break;
+ case -ENOPKG:
+ reason = "module with unsupported crypto";
+ break;
+ case -ENOKEY:
+ reason = "module with unavailable key";
+ break;
+
+ default:
+ /*
+ * All other errors are fatal, including lack of memory,
+ * unparseable signatures, and signature check failures --
+ * even if signatures aren't required.
+ */
+ return err;
+ }
+
+ if (is_module_sig_enforced()) {
+ pr_notice("Loading of %s is rejected\n", reason);
+ return -EKEYREJECTED;
+ }
+
+ return security_locked_down(LOCKDOWN_MODULE_SIGNATURE);
+}
--
2.31.1


2021-12-28 21:31:11

by Aaron Tomlin

[permalink] [raw]
Subject: [RFC PATCH 11/12] module: Move sysfs support into a separate file

No functional change.

This patch migrates module sysfs support out of core code into
kernel/module/sysfs.c. In addition simple code refactoring to
make this possible.

Signed-off-by: Aaron Tomlin <[email protected]>
---
kernel/module/Makefile | 1 +
kernel/module/internal.h | 20 ++
kernel/module/main.c | 462 +--------------------------------------
kernel/module/procfs.c | 3 +
kernel/module/sysfs.c | 426 ++++++++++++++++++++++++++++++++++++
5 files changed, 455 insertions(+), 457 deletions(-)
create mode 100644 kernel/module/sysfs.c

diff --git a/kernel/module/Makefile b/kernel/module/Makefile
index cba3e608b4ca..917910302ac0 100644
--- a/kernel/module/Makefile
+++ b/kernel/module/Makefile
@@ -13,3 +13,4 @@ obj-$(CONFIG_STRICT_MODULE_RWX) += strict_rwx.o
obj-$(CONFIG_DEBUG_KMEMLEAK) += debug_kmemleak.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
obj-$(CONFIG_PROC_FS) += procfs.o
+obj-$(CONFIG_SYSFS) += sysfs.o
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index 119d42c304a1..4d81eb1f58ef 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -99,3 +99,23 @@ static inline char *find_kallsyms_symbol(struct module *mod, unsigned long addr,
return NULL;
}
#endif /* CONFIG_KALLSYMS */
+
+#ifdef CONFIG_SYSFS
+extern int mod_sysfs_setup(struct module *mod, const struct load_info *info, struct kernel_param *kparam, unsigned int num_params);
+extern void mod_sysfs_fini(struct module *mod);
+extern void module_remove_modinfo_attrs(struct module *mod, int end);
+extern void del_usage_links(struct module *mod);
+extern void init_param_lock(struct module *mod);
+#else /* !CONFIG_SYSFS */
+static int mod_sysfs_setup(struct module *mod,
+ const struct load_info *info,
+ struct kernel_param *kparam,
+ unsigned int num_params)
+{
+ return 0;
+}
+static inline void mod_sysfs_fini(struct module *mod) { }
+static inline void module_remove_modinfo_attrs(struct module *mod, int end) { }
+static inline void del_usage_links(struct module *mod) { }
+static inline void init_param_lock(struct module *mod) { }
+#endif /* CONFIG_SYSFS */
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 2da580c7b069..354fb2697188 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -13,9 +13,7 @@
#include <linux/trace_events.h>
#include <linux/init.h>
#include <linux/kallsyms.h>
-#include <linux/file.h>
#include <linux/fs.h>
-#include <linux/sysfs.h>
#include <linux/kernel.h>
#include <linux/kernel_read_file.h>
#include <linux/slab.h>
@@ -801,7 +799,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
return ret;
}

-static inline void print_unload_info(struct seq_file *m, struct module *mod)
+inline void print_unload_info(struct seq_file *m, struct module *mod)
{
struct module_use *use;
int printed_something = 0;
@@ -1017,7 +1015,7 @@ static ssize_t show_taint(struct module_attribute *mattr,
static struct module_attribute modinfo_taint =
__ATTR(taint, 0444, show_taint, NULL);

-static struct module_attribute *modinfo_attrs[] = {
+struct module_attribute *modinfo_attrs[] = {
&module_uevent,
&modinfo_version,
&modinfo_srcversion,
@@ -1031,6 +1029,8 @@ static struct module_attribute *modinfo_attrs[] = {
NULL,
};

+size_t modinfo_attrs_count = ARRAY_SIZE(modinfo_attrs);
+
static const char vermagic[] = VERMAGIC_STRING;

static int try_to_force_load(struct module *mod, const char *reason)
@@ -1281,458 +1281,6 @@ resolve_symbol_wait(struct module *mod,
return ksym;
}

-/*
- * /sys/module/foo/sections stuff
- * J. Corbet <[email protected]>
- */
-#ifdef CONFIG_SYSFS
-
-#ifdef CONFIG_KALLSYMS
-struct module_sect_attr {
- struct bin_attribute battr;
- unsigned long address;
-};
-
-struct module_sect_attrs {
- struct attribute_group grp;
- unsigned int nsections;
- struct module_sect_attr attrs[];
-};
-
-static ssize_t module_sect_read(struct file *file, struct kobject *kobj,
- struct bin_attribute *battr,
- char *buf, loff_t pos, size_t count)
-{
- struct module_sect_attr *sattr =
- container_of(battr, struct module_sect_attr, battr);
- char bounce[MODULE_SECT_READ_SIZE + 1];
- size_t wrote;
-
- if (pos != 0)
- return -EINVAL;
-
- /*
- * Since we're a binary read handler, we must account for the
- * trailing NUL byte that sprintf will write: if "buf" is
- * too small to hold the NUL, or the NUL is exactly the last
- * byte, the read will look like it got truncated by one byte.
- * Since there is no way to ask sprintf nicely to not write
- * the NUL, we have to use a bounce buffer.
- */
- wrote = scnprintf(bounce, sizeof(bounce), "0x%px\n",
- kallsyms_show_value(file->f_cred)
- ? (void *)sattr->address : NULL);
- count = min(count, wrote);
- memcpy(buf, bounce, count);
-
- return count;
-}
-
-static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
-{
- unsigned int section;
-
- for (section = 0; section < sect_attrs->nsections; section++)
- kfree(sect_attrs->attrs[section].battr.attr.name);
- kfree(sect_attrs);
-}
-
-static void add_sect_attrs(struct module *mod, const struct load_info *info)
-{
- unsigned int nloaded = 0, i, size[2];
- struct module_sect_attrs *sect_attrs;
- struct module_sect_attr *sattr;
- struct bin_attribute **gattr;
-
- /* Count loaded sections and allocate structures */
- for (i = 0; i < info->hdr->e_shnum; i++)
- if (!sect_empty(&info->sechdrs[i]))
- nloaded++;
- size[0] = ALIGN(struct_size(sect_attrs, attrs, nloaded),
- sizeof(sect_attrs->grp.bin_attrs[0]));
- size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.bin_attrs[0]);
- sect_attrs = kzalloc(size[0] + size[1], GFP_KERNEL);
- if (sect_attrs == NULL)
- return;
-
- /* Setup section attributes. */
- sect_attrs->grp.name = "sections";
- sect_attrs->grp.bin_attrs = (void *)sect_attrs + size[0];
-
- sect_attrs->nsections = 0;
- sattr = &sect_attrs->attrs[0];
- gattr = &sect_attrs->grp.bin_attrs[0];
- for (i = 0; i < info->hdr->e_shnum; i++) {
- Elf_Shdr *sec = &info->sechdrs[i];
- if (sect_empty(sec))
- continue;
- sysfs_bin_attr_init(&sattr->battr);
- sattr->address = sec->sh_addr;
- sattr->battr.attr.name =
- kstrdup(info->secstrings + sec->sh_name, GFP_KERNEL);
- if (sattr->battr.attr.name == NULL)
- goto out;
- sect_attrs->nsections++;
- sattr->battr.read = module_sect_read;
- sattr->battr.size = MODULE_SECT_READ_SIZE;
- sattr->battr.attr.mode = 0400;
- *(gattr++) = &(sattr++)->battr;
- }
- *gattr = NULL;
-
- if (sysfs_create_group(&mod->mkobj.kobj, &sect_attrs->grp))
- goto out;
-
- mod->sect_attrs = sect_attrs;
- return;
- out:
- free_sect_attrs(sect_attrs);
-}
-
-static void remove_sect_attrs(struct module *mod)
-{
- if (mod->sect_attrs) {
- sysfs_remove_group(&mod->mkobj.kobj,
- &mod->sect_attrs->grp);
- /*
- * We are positive that no one is using any sect attrs
- * at this point. Deallocate immediately.
- */
- free_sect_attrs(mod->sect_attrs);
- mod->sect_attrs = NULL;
- }
-}
-
-/*
- * /sys/module/foo/notes/.section.name gives contents of SHT_NOTE sections.
- */
-
-struct module_notes_attrs {
- struct kobject *dir;
- unsigned int notes;
- struct bin_attribute attrs[];
-};
-
-static ssize_t module_notes_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr,
- char *buf, loff_t pos, size_t count)
-{
- /*
- * The caller checked the pos and count against our size.
- */
- memcpy(buf, bin_attr->private + pos, count);
- return count;
-}
-
-static void free_notes_attrs(struct module_notes_attrs *notes_attrs,
- unsigned int i)
-{
- if (notes_attrs->dir) {
- while (i-- > 0)
- sysfs_remove_bin_file(notes_attrs->dir,
- &notes_attrs->attrs[i]);
- kobject_put(notes_attrs->dir);
- }
- kfree(notes_attrs);
-}
-
-static void add_notes_attrs(struct module *mod, const struct load_info *info)
-{
- unsigned int notes, loaded, i;
- struct module_notes_attrs *notes_attrs;
- struct bin_attribute *nattr;
-
- /* failed to create section attributes, so can't create notes */
- if (!mod->sect_attrs)
- return;
-
- /* Count notes sections and allocate structures. */
- notes = 0;
- for (i = 0; i < info->hdr->e_shnum; i++)
- if (!sect_empty(&info->sechdrs[i]) &&
- (info->sechdrs[i].sh_type == SHT_NOTE))
- ++notes;
-
- if (notes == 0)
- return;
-
- notes_attrs = kzalloc(struct_size(notes_attrs, attrs, notes),
- GFP_KERNEL);
- if (notes_attrs == NULL)
- return;
-
- notes_attrs->notes = notes;
- nattr = &notes_attrs->attrs[0];
- for (loaded = i = 0; i < info->hdr->e_shnum; ++i) {
- if (sect_empty(&info->sechdrs[i]))
- continue;
- if (info->sechdrs[i].sh_type == SHT_NOTE) {
- sysfs_bin_attr_init(nattr);
- nattr->attr.name = mod->sect_attrs->attrs[loaded].battr.attr.name;
- nattr->attr.mode = S_IRUGO;
- nattr->size = info->sechdrs[i].sh_size;
- nattr->private = (void *) info->sechdrs[i].sh_addr;
- nattr->read = module_notes_read;
- ++nattr;
- }
- ++loaded;
- }
-
- notes_attrs->dir = kobject_create_and_add("notes", &mod->mkobj.kobj);
- if (!notes_attrs->dir)
- goto out;
-
- for (i = 0; i < notes; ++i)
- if (sysfs_create_bin_file(notes_attrs->dir,
- &notes_attrs->attrs[i]))
- goto out;
-
- mod->notes_attrs = notes_attrs;
- return;
-
- out:
- free_notes_attrs(notes_attrs, i);
-}
-
-static void remove_notes_attrs(struct module *mod)
-{
- if (mod->notes_attrs)
- free_notes_attrs(mod->notes_attrs, mod->notes_attrs->notes);
-}
-
-#else
-
-static inline void add_sect_attrs(struct module *mod,
- const struct load_info *info)
-{
-}
-
-static inline void remove_sect_attrs(struct module *mod)
-{
-}
-
-static inline void add_notes_attrs(struct module *mod,
- const struct load_info *info)
-{
-}
-
-static inline void remove_notes_attrs(struct module *mod)
-{
-}
-#endif /* CONFIG_KALLSYMS */
-
-static void del_usage_links(struct module *mod)
-{
-#ifdef CONFIG_MODULE_UNLOAD
- struct module_use *use;
-
- mutex_lock(&module_mutex);
- list_for_each_entry(use, &mod->target_list, target_list)
- sysfs_remove_link(use->target->holders_dir, mod->name);
- mutex_unlock(&module_mutex);
-#endif
-}
-
-static int add_usage_links(struct module *mod)
-{
- int ret = 0;
-#ifdef CONFIG_MODULE_UNLOAD
- struct module_use *use;
-
- mutex_lock(&module_mutex);
- list_for_each_entry(use, &mod->target_list, target_list) {
- ret = sysfs_create_link(use->target->holders_dir,
- &mod->mkobj.kobj, mod->name);
- if (ret)
- break;
- }
- mutex_unlock(&module_mutex);
- if (ret)
- del_usage_links(mod);
-#endif
- return ret;
-}
-
-static void module_remove_modinfo_attrs(struct module *mod, int end);
-
-static int module_add_modinfo_attrs(struct module *mod)
-{
- struct module_attribute *attr;
- struct module_attribute *temp_attr;
- int error = 0;
- int i;
-
- mod->modinfo_attrs = kzalloc((sizeof(struct module_attribute) *
- (ARRAY_SIZE(modinfo_attrs) + 1)),
- GFP_KERNEL);
- if (!mod->modinfo_attrs)
- return -ENOMEM;
-
- temp_attr = mod->modinfo_attrs;
- for (i = 0; (attr = modinfo_attrs[i]); i++) {
- if (!attr->test || attr->test(mod)) {
- memcpy(temp_attr, attr, sizeof(*temp_attr));
- sysfs_attr_init(&temp_attr->attr);
- error = sysfs_create_file(&mod->mkobj.kobj,
- &temp_attr->attr);
- if (error)
- goto error_out;
- ++temp_attr;
- }
- }
-
- return 0;
-
-error_out:
- if (i > 0)
- module_remove_modinfo_attrs(mod, --i);
- else
- kfree(mod->modinfo_attrs);
- return error;
-}
-
-static void module_remove_modinfo_attrs(struct module *mod, int end)
-{
- struct module_attribute *attr;
- int i;
-
- for (i = 0; (attr = &mod->modinfo_attrs[i]); i++) {
- if (end >= 0 && i > end)
- break;
- /* pick a field to test for end of list */
- if (!attr->attr.name)
- break;
- sysfs_remove_file(&mod->mkobj.kobj, &attr->attr);
- if (attr->free)
- attr->free(mod);
- }
- kfree(mod->modinfo_attrs);
-}
-
-static void mod_kobject_put(struct module *mod)
-{
- DECLARE_COMPLETION_ONSTACK(c);
- mod->mkobj.kobj_completion = &c;
- kobject_put(&mod->mkobj.kobj);
- wait_for_completion(&c);
-}
-
-static int mod_sysfs_init(struct module *mod)
-{
- int err;
- struct kobject *kobj;
-
- if (!module_sysfs_initialized) {
- pr_err("%s: module sysfs not initialized\n", mod->name);
- err = -EINVAL;
- goto out;
- }
-
- kobj = kset_find_obj(module_kset, mod->name);
- if (kobj) {
- pr_err("%s: module is already loaded\n", mod->name);
- kobject_put(kobj);
- err = -EINVAL;
- goto out;
- }
-
- mod->mkobj.mod = mod;
-
- memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj));
- mod->mkobj.kobj.kset = module_kset;
- err = kobject_init_and_add(&mod->mkobj.kobj, &module_ktype, NULL,
- "%s", mod->name);
- if (err)
- mod_kobject_put(mod);
-
-out:
- return err;
-}
-
-static int mod_sysfs_setup(struct module *mod,
- const struct load_info *info,
- struct kernel_param *kparam,
- unsigned int num_params)
-{
- int err;
-
- err = mod_sysfs_init(mod);
- if (err)
- goto out;
-
- mod->holders_dir = kobject_create_and_add("holders", &mod->mkobj.kobj);
- if (!mod->holders_dir) {
- err = -ENOMEM;
- goto out_unreg;
- }
-
- err = module_param_sysfs_setup(mod, kparam, num_params);
- if (err)
- goto out_unreg_holders;
-
- err = module_add_modinfo_attrs(mod);
- if (err)
- goto out_unreg_param;
-
- err = add_usage_links(mod);
- if (err)
- goto out_unreg_modinfo_attrs;
-
- add_sect_attrs(mod, info);
- add_notes_attrs(mod, info);
-
- return 0;
-
-out_unreg_modinfo_attrs:
- module_remove_modinfo_attrs(mod, -1);
-out_unreg_param:
- module_param_sysfs_remove(mod);
-out_unreg_holders:
- kobject_put(mod->holders_dir);
-out_unreg:
- mod_kobject_put(mod);
-out:
- return err;
-}
-
-static void mod_sysfs_fini(struct module *mod)
-{
- remove_notes_attrs(mod);
- remove_sect_attrs(mod);
- mod_kobject_put(mod);
-}
-
-static void init_param_lock(struct module *mod)
-{
- mutex_init(&mod->param_lock);
-}
-#else /* !CONFIG_SYSFS */
-
-static int mod_sysfs_setup(struct module *mod,
- const struct load_info *info,
- struct kernel_param *kparam,
- unsigned int num_params)
-{
- return 0;
-}
-
-static void mod_sysfs_fini(struct module *mod)
-{
-}
-
-static void module_remove_modinfo_attrs(struct module *mod, int end)
-{
-}
-
-static void del_usage_links(struct module *mod)
-{
-}
-
-static void init_param_lock(struct module *mod)
-{
-}
-#endif /* CONFIG_SYSFS */
-
static void mod_sysfs_teardown(struct module *mod)
{
del_usage_links(mod);
@@ -3505,7 +3053,7 @@ static void cfi_cleanup(struct module *mod)
}

/* Keep in sync with MODULE_FLAGS_BUF_SIZE !!! */
-static char *module_flags(struct module *mod, char *buf)
+char *module_flags(struct module *mod, char *buf)
{
int bx = 0;

diff --git a/kernel/module/procfs.c b/kernel/module/procfs.c
index 98d898250475..365d5e52ac23 100644
--- a/kernel/module/procfs.c
+++ b/kernel/module/procfs.c
@@ -12,6 +12,9 @@
#include <linux/proc_fs.h>
#include "internal.h"

+extern void print_unload_info(struct seq_file *m, struct module *mod);
+extern char *module_flags(struct module *mod, char *buf);
+
/* Called by the /proc file system to return a list of modules. */
static void *m_start(struct seq_file *m, loff_t *pos)
{
diff --git a/kernel/module/sysfs.c b/kernel/module/sysfs.c
new file mode 100644
index 000000000000..1cd91c214b72
--- /dev/null
+++ b/kernel/module/sysfs.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * kernel/module/sysfs.c - module sysfs support
+ *
+ * Copyright (C) 2008 Rusty Russell
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/sysfs.h>
+#include <linux/slab.h>
+#include <linux/kallsyms.h>
+#include <linux/mutex.h>
+#include "internal.h"
+
+/*
+ * /sys/module/foo/sections stuff
+ * J. Corbet <[email protected]>
+ */
+#ifdef CONFIG_KALLSYMS
+struct module_sect_attr {
+ struct bin_attribute battr;
+ unsigned long address;
+};
+
+struct module_sect_attrs {
+ struct attribute_group grp;
+ unsigned int nsections;
+ struct module_sect_attr attrs[];
+};
+
+static ssize_t module_sect_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *battr,
+ char *buf, loff_t pos, size_t count)
+{
+ struct module_sect_attr *sattr =
+ container_of(battr, struct module_sect_attr, battr);
+ char bounce[MODULE_SECT_READ_SIZE + 1];
+ size_t wrote;
+
+ if (pos != 0)
+ return -EINVAL;
+
+ /*
+ * Since we're a binary read handler, we must account for the
+ * trailing NUL byte that sprintf will write: if "buf" is
+ * too small to hold the NUL, or the NUL is exactly the last
+ * byte, the read will look like it got truncated by one byte.
+ * Since there is no way to ask sprintf nicely to not write
+ * the NUL, we have to use a bounce buffer.
+ */
+ wrote = scnprintf(bounce, sizeof(bounce), "0x%px\n",
+ kallsyms_show_value(file->f_cred)
+ ? (void *)sattr->address : NULL);
+ count = min(count, wrote);
+ memcpy(buf, bounce, count);
+
+ return count;
+}
+
+static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
+{
+ unsigned int section;
+
+ for (section = 0; section < sect_attrs->nsections; section++)
+ kfree(sect_attrs->attrs[section].battr.attr.name);
+ kfree(sect_attrs);
+}
+
+static void add_sect_attrs(struct module *mod, const struct load_info *info)
+{
+ unsigned int nloaded = 0, i, size[2];
+ struct module_sect_attrs *sect_attrs;
+ struct module_sect_attr *sattr;
+ struct bin_attribute **gattr;
+
+ /* Count loaded sections and allocate structures */
+ for (i = 0; i < info->hdr->e_shnum; i++)
+ if (!sect_empty(&info->sechdrs[i]))
+ nloaded++;
+ size[0] = ALIGN(struct_size(sect_attrs, attrs, nloaded),
+ sizeof(sect_attrs->grp.bin_attrs[0]));
+ size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.bin_attrs[0]);
+ sect_attrs = kzalloc(size[0] + size[1], GFP_KERNEL);
+ if (sect_attrs == NULL)
+ return;
+
+ /* Setup section attributes. */
+ sect_attrs->grp.name = "sections";
+ sect_attrs->grp.bin_attrs = (void *)sect_attrs + size[0];
+
+ sect_attrs->nsections = 0;
+ sattr = &sect_attrs->attrs[0];
+ gattr = &sect_attrs->grp.bin_attrs[0];
+ for (i = 0; i < info->hdr->e_shnum; i++) {
+ Elf_Shdr *sec = &info->sechdrs[i];
+ if (sect_empty(sec))
+ continue;
+ sysfs_bin_attr_init(&sattr->battr);
+ sattr->address = sec->sh_addr;
+ sattr->battr.attr.name =
+ kstrdup(info->secstrings + sec->sh_name, GFP_KERNEL);
+ if (sattr->battr.attr.name == NULL)
+ goto out;
+ sect_attrs->nsections++;
+ sattr->battr.read = module_sect_read;
+ sattr->battr.size = MODULE_SECT_READ_SIZE;
+ sattr->battr.attr.mode = 0400;
+ *(gattr++) = &(sattr++)->battr;
+ }
+ *gattr = NULL;
+
+ if (sysfs_create_group(&mod->mkobj.kobj, &sect_attrs->grp))
+ goto out;
+
+ mod->sect_attrs = sect_attrs;
+ return;
+ out:
+ free_sect_attrs(sect_attrs);
+}
+
+static void remove_sect_attrs(struct module *mod)
+{
+ if (mod->sect_attrs) {
+ sysfs_remove_group(&mod->mkobj.kobj,
+ &mod->sect_attrs->grp);
+ /*
+ * We are positive that no one is using any sect attrs
+ * at this point. Deallocate immediately.
+ */
+ free_sect_attrs(mod->sect_attrs);
+ mod->sect_attrs = NULL;
+ }
+}
+
+/*
+ * /sys/module/foo/notes/.section.name gives contents of SHT_NOTE sections.
+ */
+
+struct module_notes_attrs {
+ struct kobject *dir;
+ unsigned int notes;
+ struct bin_attribute attrs[];
+};
+
+static ssize_t module_notes_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
+{
+ /*
+ * The caller checked the pos and count against our size.
+ */
+ memcpy(buf, bin_attr->private + pos, count);
+ return count;
+}
+
+static void free_notes_attrs(struct module_notes_attrs *notes_attrs,
+ unsigned int i)
+{
+ if (notes_attrs->dir) {
+ while (i-- > 0)
+ sysfs_remove_bin_file(notes_attrs->dir,
+ &notes_attrs->attrs[i]);
+ kobject_put(notes_attrs->dir);
+ }
+ kfree(notes_attrs);
+}
+
+static void add_notes_attrs(struct module *mod, const struct load_info *info)
+{
+ unsigned int notes, loaded, i;
+ struct module_notes_attrs *notes_attrs;
+ struct bin_attribute *nattr;
+
+ /* failed to create section attributes, so can't create notes */
+ if (!mod->sect_attrs)
+ return;
+
+ /* Count notes sections and allocate structures. */
+ notes = 0;
+ for (i = 0; i < info->hdr->e_shnum; i++)
+ if (!sect_empty(&info->sechdrs[i]) &&
+ (info->sechdrs[i].sh_type == SHT_NOTE))
+ ++notes;
+
+ if (notes == 0)
+ return;
+
+ notes_attrs = kzalloc(struct_size(notes_attrs, attrs, notes),
+ GFP_KERNEL);
+ if (notes_attrs == NULL)
+ return;
+
+ notes_attrs->notes = notes;
+ nattr = &notes_attrs->attrs[0];
+ for (loaded = i = 0; i < info->hdr->e_shnum; ++i) {
+ if (sect_empty(&info->sechdrs[i]))
+ continue;
+ if (info->sechdrs[i].sh_type == SHT_NOTE) {
+ sysfs_bin_attr_init(nattr);
+ nattr->attr.name = mod->sect_attrs->attrs[loaded].battr.attr.name;
+ nattr->attr.mode = S_IRUGO;
+ nattr->size = info->sechdrs[i].sh_size;
+ nattr->private = (void *) info->sechdrs[i].sh_addr;
+ nattr->read = module_notes_read;
+ ++nattr;
+ }
+ ++loaded;
+ }
+
+ notes_attrs->dir = kobject_create_and_add("notes", &mod->mkobj.kobj);
+ if (!notes_attrs->dir)
+ goto out;
+
+ for (i = 0; i < notes; ++i)
+ if (sysfs_create_bin_file(notes_attrs->dir,
+ &notes_attrs->attrs[i]))
+ goto out;
+
+ mod->notes_attrs = notes_attrs;
+ return;
+
+ out:
+ free_notes_attrs(notes_attrs, i);
+}
+
+static void remove_notes_attrs(struct module *mod)
+{
+ if (mod->notes_attrs)
+ free_notes_attrs(mod->notes_attrs, mod->notes_attrs->notes);
+}
+
+#else /* !CONFIG_KALLSYMS */
+static inline void add_sect_attrs(struct module *mod, const struct load_info *info) { }
+static inline void remove_sect_attrs(struct module *mod) { }
+static inline void add_notes_attrs(struct module *mod, const struct load_info *info) { }
+static inline void remove_notes_attrs(struct module *mod) { }
+#endif /* CONFIG_KALLSYMS */
+
+void del_usage_links(struct module *mod)
+{
+#ifdef CONFIG_MODULE_UNLOAD
+ struct module_use *use;
+
+ mutex_lock(&module_mutex);
+ list_for_each_entry(use, &mod->target_list, target_list)
+ sysfs_remove_link(use->target->holders_dir, mod->name);
+ mutex_unlock(&module_mutex);
+#endif
+}
+
+static int add_usage_links(struct module *mod)
+{
+ int ret = 0;
+#ifdef CONFIG_MODULE_UNLOAD
+ struct module_use *use;
+
+ mutex_lock(&module_mutex);
+ list_for_each_entry(use, &mod->target_list, target_list) {
+ ret = sysfs_create_link(use->target->holders_dir,
+ &mod->mkobj.kobj, mod->name);
+ if (ret)
+ break;
+ }
+ mutex_unlock(&module_mutex);
+ if (ret)
+ del_usage_links(mod);
+#endif
+ return ret;
+}
+
+extern struct module_attribute *modinfo_attrs[];
+extern size_t modinfo_attrs_count;
+
+static int module_add_modinfo_attrs(struct module *mod)
+{
+ struct module_attribute *attr;
+ struct module_attribute *temp_attr;
+ int error = 0;
+ int i;
+
+ mod->modinfo_attrs = kzalloc((sizeof(struct module_attribute) *
+ (modinfo_attrs_count + 1)),
+ GFP_KERNEL);
+ if (!mod->modinfo_attrs)
+ return -ENOMEM;
+
+ temp_attr = mod->modinfo_attrs;
+ for (i = 0; (attr = modinfo_attrs[i]); i++) {
+ if (!attr->test || attr->test(mod)) {
+ memcpy(temp_attr, attr, sizeof(*temp_attr));
+ sysfs_attr_init(&temp_attr->attr);
+ error = sysfs_create_file(&mod->mkobj.kobj,
+ &temp_attr->attr);
+ if (error)
+ goto error_out;
+ ++temp_attr;
+ }
+ }
+
+ return 0;
+
+error_out:
+ if (i > 0)
+ module_remove_modinfo_attrs(mod, --i);
+ else
+ kfree(mod->modinfo_attrs);
+ return error;
+}
+
+void module_remove_modinfo_attrs(struct module *mod, int end)
+{
+ struct module_attribute *attr;
+ int i;
+
+ for (i = 0; (attr = &mod->modinfo_attrs[i]); i++) {
+ if (end >= 0 && i > end)
+ break;
+ /* pick a field to test for end of list */
+ if (!attr->attr.name)
+ break;
+ sysfs_remove_file(&mod->mkobj.kobj, &attr->attr);
+ if (attr->free)
+ attr->free(mod);
+ }
+ kfree(mod->modinfo_attrs);
+}
+
+static void mod_kobject_put(struct module *mod)
+{
+ DECLARE_COMPLETION_ONSTACK(c);
+ mod->mkobj.kobj_completion = &c;
+ kobject_put(&mod->mkobj.kobj);
+ wait_for_completion(&c);
+}
+
+static int mod_sysfs_init(struct module *mod)
+{
+ int err;
+ struct kobject *kobj;
+
+ if (!module_sysfs_initialized) {
+ pr_err("%s: module sysfs not initialized\n", mod->name);
+ err = -EINVAL;
+ goto out;
+ }
+
+ kobj = kset_find_obj(module_kset, mod->name);
+ if (kobj) {
+ pr_err("%s: module is already loaded\n", mod->name);
+ kobject_put(kobj);
+ err = -EINVAL;
+ goto out;
+ }
+
+ mod->mkobj.mod = mod;
+
+ memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj));
+ mod->mkobj.kobj.kset = module_kset;
+ err = kobject_init_and_add(&mod->mkobj.kobj, &module_ktype, NULL,
+ "%s", mod->name);
+ if (err)
+ mod_kobject_put(mod);
+
+out:
+ return err;
+}
+
+int mod_sysfs_setup(struct module *mod,
+ const struct load_info *info,
+ struct kernel_param *kparam,
+ unsigned int num_params)
+{
+ int err;
+
+ err = mod_sysfs_init(mod);
+ if (err)
+ goto out;
+
+ mod->holders_dir = kobject_create_and_add("holders", &mod->mkobj.kobj);
+ if (!mod->holders_dir) {
+ err = -ENOMEM;
+ goto out_unreg;
+ }
+
+ err = module_param_sysfs_setup(mod, kparam, num_params);
+ if (err)
+ goto out_unreg_holders;
+
+ err = module_add_modinfo_attrs(mod);
+ if (err)
+ goto out_unreg_param;
+
+ err = add_usage_links(mod);
+ if (err)
+ goto out_unreg_modinfo_attrs;
+
+ add_sect_attrs(mod, info);
+ add_notes_attrs(mod, info);
+
+ return 0;
+
+out_unreg_modinfo_attrs:
+ module_remove_modinfo_attrs(mod, -1);
+out_unreg_param:
+ module_param_sysfs_remove(mod);
+out_unreg_holders:
+ kobject_put(mod->holders_dir);
+out_unreg:
+ mod_kobject_put(mod);
+out:
+ return err;
+}
+
+void mod_sysfs_fini(struct module *mod)
+{
+ remove_notes_attrs(mod);
+ remove_sect_attrs(mod);
+ mod_kobject_put(mod);
+}
+
+void init_param_lock(struct module *mod)
+{
+ mutex_init(&mod->param_lock);
+}
--
2.31.1


2021-12-28 21:31:13

by Aaron Tomlin

[permalink] [raw]
Subject: [RFC PATCH 12/12] module: Move kdb_modules list out of core code

No functional change.

This patch migrates kdb_modules list to core kdb code
since the list of added/or loaded modules is no longer
private.

Signed-off-by: Aaron Tomlin <[email protected]>
---
kernel/debug/kdb/kdb_main.c | 5 +++++
kernel/module/main.c | 4 ----
2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index d8ee5647b732..ef61fb65671a 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -60,6 +60,11 @@ EXPORT_SYMBOL(kdb_grepping_flag);
int kdb_grep_leading;
int kdb_grep_trailing;

+#ifdef CONFIG_MODULES
+extern struct list_head modules;
+struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
+#endif /* CONFIG_MODULES */
+
/*
* Kernel debugger state flags
*/
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 354fb2697188..09012246907a 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -95,10 +95,6 @@ static void mod_update_bounds(struct module *mod)
__mod_update_bounds(mod->init_layout.base, mod->init_layout.size);
}

-#ifdef CONFIG_KGDB_KDB
-struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
-#endif /* CONFIG_KGDB_KDB */
-
static void module_assert_mutex_or_preempt(void)
{
#ifdef CONFIG_LOCKDEP
--
2.31.1


2021-12-28 21:31:21

by Aaron Tomlin

[permalink] [raw]
Subject: [RFC PATCH 09/12] module: Move kallsyms support into a separate file

No functional change.

This patch migrates kallsyms code out of core module
code kernel/module/kallsyms.c

Signed-off-by: Aaron Tomlin <[email protected]>
---
kernel/module/Makefile | 1 +
kernel/module/internal.h | 20 ++
kernel/module/kallsyms.c | 506 ++++++++++++++++++++++++++++++++++++++
kernel/module/main.c | 516 +--------------------------------------
4 files changed, 531 insertions(+), 512 deletions(-)
create mode 100644 kernel/module/kallsyms.c

diff --git a/kernel/module/Makefile b/kernel/module/Makefile
index 2e03da799833..23582011ab08 100644
--- a/kernel/module/Makefile
+++ b/kernel/module/Makefile
@@ -11,3 +11,4 @@ obj-$(CONFIG_MODULES_TREE_LOOKUP) += tree_lookup.o
obj-$(CONFIG_ARCH_HAS_STRICT_MODULE_RWX) += arch_strict_rwx.o
obj-$(CONFIG_STRICT_MODULE_RWX) += strict_rwx.o
obj-$(CONFIG_DEBUG_KMEMLEAK) += debug_kmemleak.o
+obj-$(CONFIG_KALLSYMS) += kallsyms.o
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index 31d767416f0c..119d42c304a1 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -79,3 +79,23 @@ extern void kmemleak_load_module(const struct module *mod, const struct load_inf
#else /* !CONFIG_DEBUG_KMEMLEAK */
static inline void __maybe_unused kmemleak_load_module(const struct module *mod, const struct load_info *info) { }
#endif /* CONFIG_DEBUG_KMEMLEAK */
+
+#ifdef CONFIG_KALLSYMS
+#ifdef CONFIG_STACKTRACE_BUILD_ID
+extern void init_build_id(struct module *mod, const struct load_info *info);
+#else /* !CONFIG_STACKTRACE_BUILD_ID */
+static inline void init_build_id(struct module *mod, const struct load_info *info) { }
+
+#endif
+extern void layout_symtab(struct module *mod, struct load_info *info);
+extern void add_kallsyms(struct module *mod, const struct load_info *info);
+extern bool sect_empty(const Elf_Shdr *sect);
+extern const char *find_kallsyms_symbol(struct module *mod, unsigned long addr, unsigned long *size, unsigned long *offset);
+#else /* !CONFIG_KALLSYMS */
+static inline void layout_symtab(struct module *mod, struct load_info *info) { }
+static inline void add_kallsyms(struct module *mod, const struct load_info *info) { }
+static inline char *find_kallsyms_symbol(struct module *mod, unsigned long addr, unsigned long *size, unsigned long *offset)
+{
+ return NULL;
+}
+#endif /* CONFIG_KALLSYMS */
diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c
new file mode 100644
index 000000000000..f682415459fe
--- /dev/null
+++ b/kernel/module/kallsyms.c
@@ -0,0 +1,506 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * kernel/module/kallsyms.c - module kallsyms support
+ *
+ * Copyright (C) 2010 Rusty Russell
+ */
+
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/buildid.h>
+#include <linux/bsearch.h>
+#include "internal.h"
+
+extern struct module *find_module_all(const char *name, size_t len, bool even_unformed);
+extern unsigned long kernel_symbol_value(const struct kernel_symbol *sym);
+extern int cmp_name(const void *name, const void *sym);
+extern long get_offset(struct module *mod, unsigned int *size, Elf_Shdr *sechdr, unsigned int section);
+
+/* Lookup exported symbol in given range of kernel_symbols */
+static const struct kernel_symbol *lookup_exported_symbol(const char *name,
+ const struct kernel_symbol *start,
+ const struct kernel_symbol *stop)
+{
+ return bsearch(name, start, stop - start,
+ sizeof(struct kernel_symbol), cmp_name);
+}
+
+static int is_exported(const char *name, unsigned long value,
+ const struct module *mod)
+{
+ const struct kernel_symbol *ks;
+ if (!mod)
+ ks = lookup_exported_symbol(name, __start___ksymtab, __stop___ksymtab);
+ else
+ ks = lookup_exported_symbol(name, mod->syms, mod->syms + mod->num_syms);
+
+ return ks != NULL && kernel_symbol_value(ks) == value;
+}
+
+/* As per nm */
+static char elf_type(const Elf_Sym *sym, const struct load_info *info)
+{
+ const Elf_Shdr *sechdrs = info->sechdrs;
+
+ if (ELF_ST_BIND(sym->st_info) == STB_WEAK) {
+ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT)
+ return 'v';
+ else
+ return 'w';
+ }
+ if (sym->st_shndx == SHN_UNDEF)
+ return 'U';
+ if (sym->st_shndx == SHN_ABS || sym->st_shndx == info->index.pcpu)
+ return 'a';
+ if (sym->st_shndx >= SHN_LORESERVE)
+ return '?';
+ if (sechdrs[sym->st_shndx].sh_flags & SHF_EXECINSTR)
+ return 't';
+ if (sechdrs[sym->st_shndx].sh_flags & SHF_ALLOC
+ && sechdrs[sym->st_shndx].sh_type != SHT_NOBITS) {
+ if (!(sechdrs[sym->st_shndx].sh_flags & SHF_WRITE))
+ return 'r';
+ else if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL)
+ return 'g';
+ else
+ return 'd';
+ }
+ if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) {
+ if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL)
+ return 's';
+ else
+ return 'b';
+ }
+ if (strstarts(info->secstrings + sechdrs[sym->st_shndx].sh_name,
+ ".debug")) {
+ return 'n';
+ }
+ return '?';
+}
+
+static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
+ unsigned int shnum, unsigned int pcpundx)
+{
+ const Elf_Shdr *sec;
+
+ if (src->st_shndx == SHN_UNDEF
+ || src->st_shndx >= shnum
+ || !src->st_name)
+ return false;
+
+#ifdef CONFIG_KALLSYMS_ALL
+ if (src->st_shndx == pcpundx)
+ return true;
+#endif
+
+ sec = sechdrs + src->st_shndx;
+ if (!(sec->sh_flags & SHF_ALLOC)
+#ifndef CONFIG_KALLSYMS_ALL
+ || !(sec->sh_flags & SHF_EXECINSTR)
+#endif
+ || (sec->sh_entsize & INIT_OFFSET_MASK))
+ return false;
+
+ return true;
+}
+
+/*
+ * We only allocate and copy the strings needed by the parts of symtab
+ * we keep. This is simple, but has the effect of making multiple
+ * copies of duplicates. We could be more sophisticated, see
+ * linux-kernel thread starting with
+ * <73defb5e4bca04a6431392cc341112b1@localhost>.
+ */
+void layout_symtab(struct module *mod, struct load_info *info)
+{
+ Elf_Shdr *symsect = info->sechdrs + info->index.sym;
+ Elf_Shdr *strsect = info->sechdrs + info->index.str;
+ const Elf_Sym *src;
+ unsigned int i, nsrc, ndst, strtab_size = 0;
+
+ /* Put symbol section at end of init part of module. */
+ symsect->sh_flags |= SHF_ALLOC;
+ symsect->sh_entsize = get_offset(mod, &mod->init_layout.size, symsect,
+ info->index.sym) | INIT_OFFSET_MASK;
+ pr_debug("\t%s\n", info->secstrings + symsect->sh_name);
+
+ src = (void *)info->hdr + symsect->sh_offset;
+ nsrc = symsect->sh_size / sizeof(*src);
+
+ /* Compute total space required for the core symbols' strtab. */
+ for (ndst = i = 0; i < nsrc; i++) {
+ if (i == 0 || is_livepatch_module(mod) ||
+ is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
+ info->index.pcpu)) {
+ strtab_size += strlen(&info->strtab[src[i].st_name])+1;
+ ndst++;
+ }
+ }
+
+ /* Append room for core symbols at end of core part. */
+ info->symoffs = ALIGN(mod->core_layout.size, symsect->sh_addralign ?: 1);
+ info->stroffs = mod->core_layout.size = info->symoffs + ndst * sizeof(Elf_Sym);
+ mod->core_layout.size += strtab_size;
+ info->core_typeoffs = mod->core_layout.size;
+ mod->core_layout.size += ndst * sizeof(char);
+ mod->core_layout.size = debug_align(mod->core_layout.size);
+
+ /* Put string table section at end of init part of module. */
+ strsect->sh_flags |= SHF_ALLOC;
+ strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect,
+ info->index.str) | INIT_OFFSET_MASK;
+ pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
+
+ /* We'll tack temporary mod_kallsyms on the end. */
+ mod->init_layout.size = ALIGN(mod->init_layout.size,
+ __alignof__(struct mod_kallsyms));
+ info->mod_kallsyms_init_off = mod->init_layout.size;
+ mod->init_layout.size += sizeof(struct mod_kallsyms);
+ info->init_typeoffs = mod->init_layout.size;
+ mod->init_layout.size += nsrc * sizeof(char);
+ mod->init_layout.size = debug_align(mod->init_layout.size);
+}
+
+/*
+ * We use the full symtab and strtab which layout_symtab arranged to
+ * be appended to the init section. Later we switch to the cut-down
+ * core-only ones.
+ */
+void add_kallsyms(struct module *mod, const struct load_info *info)
+{
+ unsigned int i, ndst;
+ const Elf_Sym *src;
+ Elf_Sym *dst;
+ char *s;
+ Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
+
+ /* Set up to point into init section. */
+ mod->kallsyms = mod->init_layout.base + info->mod_kallsyms_init_off;
+
+ mod->kallsyms->symtab = (void *)symsec->sh_addr;
+ mod->kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
+ /* Make sure we get permanent strtab: don't use info->strtab. */
+ mod->kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
+ mod->kallsyms->typetab = mod->init_layout.base + info->init_typeoffs;
+
+ /*
+ * Now populate the cut down core kallsyms for after init
+ * and set types up while we still have access to sections.
+ */
+ mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs;
+ mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs;
+ mod->core_kallsyms.typetab = mod->core_layout.base + info->core_typeoffs;
+ src = mod->kallsyms->symtab;
+ for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) {
+ mod->kallsyms->typetab[i] = elf_type(src + i, info);
+ if (i == 0 || is_livepatch_module(mod) ||
+ is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
+ info->index.pcpu)) {
+ mod->core_kallsyms.typetab[ndst] =
+ mod->kallsyms->typetab[i];
+ dst[ndst] = src[i];
+ dst[ndst++].st_name = s - mod->core_kallsyms.strtab;
+ s += strlcpy(s, &mod->kallsyms->strtab[src[i].st_name],
+ KSYM_NAME_LEN) + 1;
+ }
+ }
+ mod->core_kallsyms.num_symtab = ndst;
+}
+
+inline bool sect_empty(const Elf_Shdr *sect)
+{
+ return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
+}
+
+#ifdef CONFIG_STACKTRACE_BUILD_ID
+void init_build_id(struct module *mod, const struct load_info *info)
+{
+ const Elf_Shdr *sechdr;
+ unsigned int i;
+
+ for (i = 0; i < info->hdr->e_shnum; i++) {
+ sechdr = &info->sechdrs[i];
+ if (!sect_empty(sechdr) && sechdr->sh_type == SHT_NOTE &&
+ !build_id_parse_buf((void *)sechdr->sh_addr, mod->build_id,
+ sechdr->sh_size))
+ break;
+ }
+}
+#endif
+
+/*
+ * This ignores the intensely annoying "mapping symbols" found
+ * in ARM ELF files: $a, $t and $d.
+ */
+static inline int is_arm_mapping_symbol(const char *str)
+{
+ if (str[0] == '.' && str[1] == 'L')
+ return true;
+ return str[0] == '$' && strchr("axtd", str[1])
+ && (str[2] == '\0' || str[2] == '.');
+}
+
+static const char *kallsyms_symbol_name(struct mod_kallsyms *kallsyms, unsigned int symnum)
+{
+ return kallsyms->strtab + kallsyms->symtab[symnum].st_name;
+}
+
+/*
+ * Given a module and address, find the corresponding symbol and return its name
+ * while providing its size and offset if needed.
+ */
+const char *find_kallsyms_symbol(struct module *mod,
+ unsigned long addr,
+ unsigned long *size,
+ unsigned long *offset)
+{
+ unsigned int i, best = 0;
+ unsigned long nextval, bestval;
+ struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
+
+ /* At worse, next value is at end of module */
+ if (within_module_init(addr, mod))
+ nextval = (unsigned long)mod->init_layout.base+mod->init_layout.text_size;
+ else
+ nextval = (unsigned long)mod->core_layout.base+mod->core_layout.text_size;
+
+ bestval = kallsyms_symbol_value(&kallsyms->symtab[best]);
+
+ /*
+ * Scan for closest preceding symbol, and next symbol. (ELF
+ * starts real symbols at 1).
+ */
+ for (i = 1; i < kallsyms->num_symtab; i++) {
+ const Elf_Sym *sym = &kallsyms->symtab[i];
+ unsigned long thisval = kallsyms_symbol_value(sym);
+
+ if (sym->st_shndx == SHN_UNDEF)
+ continue;
+
+ /*
+ * We ignore unnamed symbols: they're uninformative
+ * and inserted at a whim.
+ */
+ if (*kallsyms_symbol_name(kallsyms, i) == '\0'
+ || is_arm_mapping_symbol(kallsyms_symbol_name(kallsyms, i)))
+ continue;
+
+ if (thisval <= addr && thisval > bestval) {
+ best = i;
+ bestval = thisval;
+ }
+ if (thisval > addr && thisval < nextval)
+ nextval = thisval;
+ }
+
+ if (!best)
+ return NULL;
+
+ if (size)
+ *size = nextval - bestval;
+ if (offset)
+ *offset = addr - bestval;
+
+ return kallsyms_symbol_name(kallsyms, best);
+}
+
+void * __weak dereference_module_function_descriptor(struct module *mod,
+ void *ptr)
+{
+ return ptr;
+}
+
+/*
+ * For kallsyms to ask for address resolution. NULL means not found. Careful
+ * not to lock to avoid deadlock on oopses, simply disable preemption.
+ */
+const char *module_address_lookup(unsigned long addr,
+ unsigned long *size,
+ unsigned long *offset,
+ char **modname,
+ const unsigned char **modbuildid,
+ char *namebuf)
+{
+ const char *ret = NULL;
+ struct module *mod;
+
+ preempt_disable();
+ mod = __module_address(addr);
+ if (mod) {
+ if (modname)
+ *modname = mod->name;
+ if (modbuildid) {
+#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID)
+ *modbuildid = mod->build_id;
+#else
+ *modbuildid = NULL;
+#endif
+ }
+
+ ret = find_kallsyms_symbol(mod, addr, size, offset);
+ }
+ /* Make a copy in here where it's safe */
+ if (ret) {
+ strncpy(namebuf, ret, KSYM_NAME_LEN - 1);
+ ret = namebuf;
+ }
+ preempt_enable();
+
+ return ret;
+}
+
+int lookup_module_symbol_name(unsigned long addr, char *symname)
+{
+ struct module *mod;
+
+ preempt_disable();
+ list_for_each_entry_rcu(mod, &modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
+ if (within_module(addr, mod)) {
+ const char *sym;
+
+ sym = find_kallsyms_symbol(mod, addr, NULL, NULL);
+ if (!sym)
+ goto out;
+
+ strlcpy(symname, sym, KSYM_NAME_LEN);
+ preempt_enable();
+ return 0;
+ }
+ }
+out:
+ preempt_enable();
+ return -ERANGE;
+}
+
+int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
+ unsigned long *offset, char *modname, char *name)
+{
+ struct module *mod;
+
+ preempt_disable();
+ list_for_each_entry_rcu(mod, &modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
+ if (within_module(addr, mod)) {
+ const char *sym;
+
+ sym = find_kallsyms_symbol(mod, addr, size, offset);
+ if (!sym)
+ goto out;
+ if (modname)
+ strlcpy(modname, mod->name, MODULE_NAME_LEN);
+ if (name)
+ strlcpy(name, sym, KSYM_NAME_LEN);
+ preempt_enable();
+ return 0;
+ }
+ }
+out:
+ preempt_enable();
+ return -ERANGE;
+}
+
+int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
+ char *name, char *module_name, int *exported)
+{
+ struct module *mod;
+
+ preempt_disable();
+ list_for_each_entry_rcu(mod, &modules, list) {
+ struct mod_kallsyms *kallsyms;
+
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
+ kallsyms = rcu_dereference_sched(mod->kallsyms);
+ if (symnum < kallsyms->num_symtab) {
+ const Elf_Sym *sym = &kallsyms->symtab[symnum];
+
+ *value = kallsyms_symbol_value(sym);
+ *type = kallsyms->typetab[symnum];
+ strlcpy(name, kallsyms_symbol_name(kallsyms, symnum), KSYM_NAME_LEN);
+ strlcpy(module_name, mod->name, MODULE_NAME_LEN);
+ *exported = is_exported(name, *value, mod);
+ preempt_enable();
+ return 0;
+ }
+ symnum -= kallsyms->num_symtab;
+ }
+ preempt_enable();
+ return -ERANGE;
+}
+
+/* Given a module and name of symbol, find and return the symbol's value */
+static unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name)
+{
+ unsigned int i;
+ struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
+
+ for (i = 0; i < kallsyms->num_symtab; i++) {
+ const Elf_Sym *sym = &kallsyms->symtab[i];
+
+ if (strcmp(name, kallsyms_symbol_name(kallsyms, i)) == 0 &&
+ sym->st_shndx != SHN_UNDEF)
+ return kallsyms_symbol_value(sym);
+ }
+ return 0;
+}
+
+/* Look for this name: can be of form module:name. */
+unsigned long module_kallsyms_lookup_name(const char *name)
+{
+ struct module *mod;
+ char *colon;
+ unsigned long ret = 0;
+
+ /* Don't lock: we're in enough trouble already. */
+ preempt_disable();
+ if ((colon = strnchr(name, MODULE_NAME_LEN, ':')) != NULL) {
+ if ((mod = find_module_all(name, colon - name, false)) != NULL)
+ ret = find_kallsyms_symbol_value(mod, colon+1);
+ } else {
+ list_for_each_entry_rcu(mod, &modules, list) {
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
+ if ((ret = find_kallsyms_symbol_value(mod, name)) != 0)
+ break;
+ }
+ }
+ preempt_enable();
+ return ret;
+}
+
+#ifdef CONFIG_LIVEPATCH
+int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
+ struct module *, unsigned long),
+ void *data)
+{
+ struct module *mod;
+ unsigned int i;
+ int ret = 0;
+
+ mutex_lock(&module_mutex);
+ list_for_each_entry(mod, &modules, list) {
+ /* We hold module_mutex: no need for rcu_dereference_sched */
+ struct mod_kallsyms *kallsyms = mod->kallsyms;
+
+ if (mod->state == MODULE_STATE_UNFORMED)
+ continue;
+ for (i = 0; i < kallsyms->num_symtab; i++) {
+ const Elf_Sym *sym = &kallsyms->symtab[i];
+
+ if (sym->st_shndx == SHN_UNDEF)
+ continue;
+
+ ret = fn(data, kallsyms_symbol_name(kallsyms, i),
+ mod, kallsyms_symbol_value(sym));
+ if (ret != 0)
+ goto out;
+ }
+ }
+out:
+ mutex_unlock(&module_mutex);
+ return ret;
+}
+#endif /* CONFIG_LIVEPATCH */
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 672a977b1320..9813e1672d8c 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -275,7 +275,7 @@ static bool check_exported_symbol(const struct symsearch *syms,
return true;
}

-static unsigned long kernel_symbol_value(const struct kernel_symbol *sym)
+unsigned long kernel_symbol_value(const struct kernel_symbol *sym)
{
#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
return (unsigned long)offset_to_ptr(&sym->value_offset);
@@ -304,7 +304,7 @@ static const char *kernel_symbol_namespace(const struct kernel_symbol *sym)
#endif
}

-static int cmp_name(const void *name, const void *sym)
+int cmp_name(const void *name, const void *sym)
{
return strcmp(name, kernel_symbol_name(sym));
}
@@ -374,7 +374,7 @@ static bool find_symbol(struct find_symbol_arg *fsa)
* Search for module by name: must hold module_mutex (or preempt disabled
* for read-only access).
*/
-static struct module *find_module_all(const char *name, size_t len,
+struct module *find_module_all(const char *name, size_t len,
bool even_unformed)
{
struct module *mod;
@@ -1282,13 +1282,6 @@ resolve_symbol_wait(struct module *mod,
return ksym;
}

-#ifdef CONFIG_KALLSYMS
-static inline bool sect_empty(const Elf_Shdr *sect)
-{
- return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
-}
-#endif
-
/*
* /sys/module/foo/sections stuff
* J. Corbet <[email protected]>
@@ -2013,7 +2006,7 @@ unsigned int __weak arch_mod_section_prepend(struct module *mod,
}

/* Update size with this section: return offset. */
-static long get_offset(struct module *mod, unsigned int *size,
+long get_offset(struct module *mod, unsigned int *size,
Elf_Shdr *sechdr, unsigned int section)
{
long ret;
@@ -2215,228 +2208,6 @@ static void free_modinfo(struct module *mod)
}
}

-#ifdef CONFIG_KALLSYMS
-
-/* Lookup exported symbol in given range of kernel_symbols */
-static const struct kernel_symbol *lookup_exported_symbol(const char *name,
- const struct kernel_symbol *start,
- const struct kernel_symbol *stop)
-{
- return bsearch(name, start, stop - start,
- sizeof(struct kernel_symbol), cmp_name);
-}
-
-static int is_exported(const char *name, unsigned long value,
- const struct module *mod)
-{
- const struct kernel_symbol *ks;
- if (!mod)
- ks = lookup_exported_symbol(name, __start___ksymtab, __stop___ksymtab);
- else
- ks = lookup_exported_symbol(name, mod->syms, mod->syms + mod->num_syms);
-
- return ks != NULL && kernel_symbol_value(ks) == value;
-}
-
-/* As per nm */
-static char elf_type(const Elf_Sym *sym, const struct load_info *info)
-{
- const Elf_Shdr *sechdrs = info->sechdrs;
-
- if (ELF_ST_BIND(sym->st_info) == STB_WEAK) {
- if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT)
- return 'v';
- else
- return 'w';
- }
- if (sym->st_shndx == SHN_UNDEF)
- return 'U';
- if (sym->st_shndx == SHN_ABS || sym->st_shndx == info->index.pcpu)
- return 'a';
- if (sym->st_shndx >= SHN_LORESERVE)
- return '?';
- if (sechdrs[sym->st_shndx].sh_flags & SHF_EXECINSTR)
- return 't';
- if (sechdrs[sym->st_shndx].sh_flags & SHF_ALLOC
- && sechdrs[sym->st_shndx].sh_type != SHT_NOBITS) {
- if (!(sechdrs[sym->st_shndx].sh_flags & SHF_WRITE))
- return 'r';
- else if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL)
- return 'g';
- else
- return 'd';
- }
- if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) {
- if (sechdrs[sym->st_shndx].sh_flags & ARCH_SHF_SMALL)
- return 's';
- else
- return 'b';
- }
- if (strstarts(info->secstrings + sechdrs[sym->st_shndx].sh_name,
- ".debug")) {
- return 'n';
- }
- return '?';
-}
-
-static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
- unsigned int shnum, unsigned int pcpundx)
-{
- const Elf_Shdr *sec;
-
- if (src->st_shndx == SHN_UNDEF
- || src->st_shndx >= shnum
- || !src->st_name)
- return false;
-
-#ifdef CONFIG_KALLSYMS_ALL
- if (src->st_shndx == pcpundx)
- return true;
-#endif
-
- sec = sechdrs + src->st_shndx;
- if (!(sec->sh_flags & SHF_ALLOC)
-#ifndef CONFIG_KALLSYMS_ALL
- || !(sec->sh_flags & SHF_EXECINSTR)
-#endif
- || (sec->sh_entsize & INIT_OFFSET_MASK))
- return false;
-
- return true;
-}
-
-/*
- * We only allocate and copy the strings needed by the parts of symtab
- * we keep. This is simple, but has the effect of making multiple
- * copies of duplicates. We could be more sophisticated, see
- * linux-kernel thread starting with
- * <73defb5e4bca04a6431392cc341112b1@localhost>.
- */
-static void layout_symtab(struct module *mod, struct load_info *info)
-{
- Elf_Shdr *symsect = info->sechdrs + info->index.sym;
- Elf_Shdr *strsect = info->sechdrs + info->index.str;
- const Elf_Sym *src;
- unsigned int i, nsrc, ndst, strtab_size = 0;
-
- /* Put symbol section at end of init part of module. */
- symsect->sh_flags |= SHF_ALLOC;
- symsect->sh_entsize = get_offset(mod, &mod->init_layout.size, symsect,
- info->index.sym) | INIT_OFFSET_MASK;
- pr_debug("\t%s\n", info->secstrings + symsect->sh_name);
-
- src = (void *)info->hdr + symsect->sh_offset;
- nsrc = symsect->sh_size / sizeof(*src);
-
- /* Compute total space required for the core symbols' strtab. */
- for (ndst = i = 0; i < nsrc; i++) {
- if (i == 0 || is_livepatch_module(mod) ||
- is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
- info->index.pcpu)) {
- strtab_size += strlen(&info->strtab[src[i].st_name])+1;
- ndst++;
- }
- }
-
- /* Append room for core symbols at end of core part. */
- info->symoffs = ALIGN(mod->core_layout.size, symsect->sh_addralign ?: 1);
- info->stroffs = mod->core_layout.size = info->symoffs + ndst * sizeof(Elf_Sym);
- mod->core_layout.size += strtab_size;
- info->core_typeoffs = mod->core_layout.size;
- mod->core_layout.size += ndst * sizeof(char);
- mod->core_layout.size = debug_align(mod->core_layout.size);
-
- /* Put string table section at end of init part of module. */
- strsect->sh_flags |= SHF_ALLOC;
- strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect,
- info->index.str) | INIT_OFFSET_MASK;
- pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
-
- /* We'll tack temporary mod_kallsyms on the end. */
- mod->init_layout.size = ALIGN(mod->init_layout.size,
- __alignof__(struct mod_kallsyms));
- info->mod_kallsyms_init_off = mod->init_layout.size;
- mod->init_layout.size += sizeof(struct mod_kallsyms);
- info->init_typeoffs = mod->init_layout.size;
- mod->init_layout.size += nsrc * sizeof(char);
- mod->init_layout.size = debug_align(mod->init_layout.size);
-}
-
-/*
- * We use the full symtab and strtab which layout_symtab arranged to
- * be appended to the init section. Later we switch to the cut-down
- * core-only ones.
- */
-static void add_kallsyms(struct module *mod, const struct load_info *info)
-{
- unsigned int i, ndst;
- const Elf_Sym *src;
- Elf_Sym *dst;
- char *s;
- Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
-
- /* Set up to point into init section. */
- mod->kallsyms = mod->init_layout.base + info->mod_kallsyms_init_off;
-
- mod->kallsyms->symtab = (void *)symsec->sh_addr;
- mod->kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
- /* Make sure we get permanent strtab: don't use info->strtab. */
- mod->kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
- mod->kallsyms->typetab = mod->init_layout.base + info->init_typeoffs;
-
- /*
- * Now populate the cut down core kallsyms for after init
- * and set types up while we still have access to sections.
- */
- mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs;
- mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs;
- mod->core_kallsyms.typetab = mod->core_layout.base + info->core_typeoffs;
- src = mod->kallsyms->symtab;
- for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) {
- mod->kallsyms->typetab[i] = elf_type(src + i, info);
- if (i == 0 || is_livepatch_module(mod) ||
- is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
- info->index.pcpu)) {
- mod->core_kallsyms.typetab[ndst] =
- mod->kallsyms->typetab[i];
- dst[ndst] = src[i];
- dst[ndst++].st_name = s - mod->core_kallsyms.strtab;
- s += strlcpy(s, &mod->kallsyms->strtab[src[i].st_name],
- KSYM_NAME_LEN) + 1;
- }
- }
- mod->core_kallsyms.num_symtab = ndst;
-}
-#else
-static inline void layout_symtab(struct module *mod, struct load_info *info)
-{
-}
-
-static void add_kallsyms(struct module *mod, const struct load_info *info)
-{
-}
-#endif /* CONFIG_KALLSYMS */
-
-#if IS_ENABLED(CONFIG_KALLSYMS) && IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID)
-static void init_build_id(struct module *mod, const struct load_info *info)
-{
- const Elf_Shdr *sechdr;
- unsigned int i;
-
- for (i = 0; i < info->hdr->e_shnum; i++) {
- sechdr = &info->sechdrs[i];
- if (!sect_empty(sechdr) && sechdr->sh_type == SHT_NOTE &&
- !build_id_parse_buf((void *)sechdr->sh_addr, mod->build_id,
- sechdr->sh_size))
- break;
- }
-}
-#else
-static void init_build_id(struct module *mod, const struct load_info *info)
-{
-}
-#endif
-
static void dynamic_debug_setup(struct module *mod, struct _ddebug *debug, unsigned int num)
{
if (!debug)
@@ -3702,285 +3473,6 @@ static inline int within(unsigned long addr, void *start, unsigned long size)
return ((void *)addr >= start && (void *)addr < start + size);
}

-#ifdef CONFIG_KALLSYMS
-/*
- * This ignores the intensely annoying "mapping symbols" found
- * in ARM ELF files: $a, $t and $d.
- */
-static inline int is_arm_mapping_symbol(const char *str)
-{
- if (str[0] == '.' && str[1] == 'L')
- return true;
- return str[0] == '$' && strchr("axtd", str[1])
- && (str[2] == '\0' || str[2] == '.');
-}
-
-static const char *kallsyms_symbol_name(struct mod_kallsyms *kallsyms, unsigned int symnum)
-{
- return kallsyms->strtab + kallsyms->symtab[symnum].st_name;
-}
-
-/*
- * Given a module and address, find the corresponding symbol and return its name
- * while providing its size and offset if needed.
- */
-static const char *find_kallsyms_symbol(struct module *mod,
- unsigned long addr,
- unsigned long *size,
- unsigned long *offset)
-{
- unsigned int i, best = 0;
- unsigned long nextval, bestval;
- struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
-
- /* At worse, next value is at end of module */
- if (within_module_init(addr, mod))
- nextval = (unsigned long)mod->init_layout.base+mod->init_layout.text_size;
- else
- nextval = (unsigned long)mod->core_layout.base+mod->core_layout.text_size;
-
- bestval = kallsyms_symbol_value(&kallsyms->symtab[best]);
-
- /*
- * Scan for closest preceding symbol, and next symbol. (ELF
- * starts real symbols at 1).
- */
- for (i = 1; i < kallsyms->num_symtab; i++) {
- const Elf_Sym *sym = &kallsyms->symtab[i];
- unsigned long thisval = kallsyms_symbol_value(sym);
-
- if (sym->st_shndx == SHN_UNDEF)
- continue;
-
- /*
- * We ignore unnamed symbols: they're uninformative
- * and inserted at a whim.
- */
- if (*kallsyms_symbol_name(kallsyms, i) == '\0'
- || is_arm_mapping_symbol(kallsyms_symbol_name(kallsyms, i)))
- continue;
-
- if (thisval <= addr && thisval > bestval) {
- best = i;
- bestval = thisval;
- }
- if (thisval > addr && thisval < nextval)
- nextval = thisval;
- }
-
- if (!best)
- return NULL;
-
- if (size)
- *size = nextval - bestval;
- if (offset)
- *offset = addr - bestval;
-
- return kallsyms_symbol_name(kallsyms, best);
-}
-
-void * __weak dereference_module_function_descriptor(struct module *mod,
- void *ptr)
-{
- return ptr;
-}
-
-/*
- * For kallsyms to ask for address resolution. NULL means not found. Careful
- * not to lock to avoid deadlock on oopses, simply disable preemption.
- */
-const char *module_address_lookup(unsigned long addr,
- unsigned long *size,
- unsigned long *offset,
- char **modname,
- const unsigned char **modbuildid,
- char *namebuf)
-{
- const char *ret = NULL;
- struct module *mod;
-
- preempt_disable();
- mod = __module_address(addr);
- if (mod) {
- if (modname)
- *modname = mod->name;
- if (modbuildid) {
-#if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID)
- *modbuildid = mod->build_id;
-#else
- *modbuildid = NULL;
-#endif
- }
-
- ret = find_kallsyms_symbol(mod, addr, size, offset);
- }
- /* Make a copy in here where it's safe */
- if (ret) {
- strncpy(namebuf, ret, KSYM_NAME_LEN - 1);
- ret = namebuf;
- }
- preempt_enable();
-
- return ret;
-}
-
-int lookup_module_symbol_name(unsigned long addr, char *symname)
-{
- struct module *mod;
-
- preempt_disable();
- list_for_each_entry_rcu(mod, &modules, list) {
- if (mod->state == MODULE_STATE_UNFORMED)
- continue;
- if (within_module(addr, mod)) {
- const char *sym;
-
- sym = find_kallsyms_symbol(mod, addr, NULL, NULL);
- if (!sym)
- goto out;
-
- strlcpy(symname, sym, KSYM_NAME_LEN);
- preempt_enable();
- return 0;
- }
- }
-out:
- preempt_enable();
- return -ERANGE;
-}
-
-int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
- unsigned long *offset, char *modname, char *name)
-{
- struct module *mod;
-
- preempt_disable();
- list_for_each_entry_rcu(mod, &modules, list) {
- if (mod->state == MODULE_STATE_UNFORMED)
- continue;
- if (within_module(addr, mod)) {
- const char *sym;
-
- sym = find_kallsyms_symbol(mod, addr, size, offset);
- if (!sym)
- goto out;
- if (modname)
- strlcpy(modname, mod->name, MODULE_NAME_LEN);
- if (name)
- strlcpy(name, sym, KSYM_NAME_LEN);
- preempt_enable();
- return 0;
- }
- }
-out:
- preempt_enable();
- return -ERANGE;
-}
-
-int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
- char *name, char *module_name, int *exported)
-{
- struct module *mod;
-
- preempt_disable();
- list_for_each_entry_rcu(mod, &modules, list) {
- struct mod_kallsyms *kallsyms;
-
- if (mod->state == MODULE_STATE_UNFORMED)
- continue;
- kallsyms = rcu_dereference_sched(mod->kallsyms);
- if (symnum < kallsyms->num_symtab) {
- const Elf_Sym *sym = &kallsyms->symtab[symnum];
-
- *value = kallsyms_symbol_value(sym);
- *type = kallsyms->typetab[symnum];
- strlcpy(name, kallsyms_symbol_name(kallsyms, symnum), KSYM_NAME_LEN);
- strlcpy(module_name, mod->name, MODULE_NAME_LEN);
- *exported = is_exported(name, *value, mod);
- preempt_enable();
- return 0;
- }
- symnum -= kallsyms->num_symtab;
- }
- preempt_enable();
- return -ERANGE;
-}
-
-/* Given a module and name of symbol, find and return the symbol's value */
-static unsigned long find_kallsyms_symbol_value(struct module *mod, const char *name)
-{
- unsigned int i;
- struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
-
- for (i = 0; i < kallsyms->num_symtab; i++) {
- const Elf_Sym *sym = &kallsyms->symtab[i];
-
- if (strcmp(name, kallsyms_symbol_name(kallsyms, i)) == 0 &&
- sym->st_shndx != SHN_UNDEF)
- return kallsyms_symbol_value(sym);
- }
- return 0;
-}
-
-/* Look for this name: can be of form module:name. */
-unsigned long module_kallsyms_lookup_name(const char *name)
-{
- struct module *mod;
- char *colon;
- unsigned long ret = 0;
-
- /* Don't lock: we're in enough trouble already. */
- preempt_disable();
- if ((colon = strnchr(name, MODULE_NAME_LEN, ':')) != NULL) {
- if ((mod = find_module_all(name, colon - name, false)) != NULL)
- ret = find_kallsyms_symbol_value(mod, colon+1);
- } else {
- list_for_each_entry_rcu(mod, &modules, list) {
- if (mod->state == MODULE_STATE_UNFORMED)
- continue;
- if ((ret = find_kallsyms_symbol_value(mod, name)) != 0)
- break;
- }
- }
- preempt_enable();
- return ret;
-}
-
-#ifdef CONFIG_LIVEPATCH
-int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
- struct module *, unsigned long),
- void *data)
-{
- struct module *mod;
- unsigned int i;
- int ret = 0;
-
- mutex_lock(&module_mutex);
- list_for_each_entry(mod, &modules, list) {
- /* We hold module_mutex: no need for rcu_dereference_sched */
- struct mod_kallsyms *kallsyms = mod->kallsyms;
-
- if (mod->state == MODULE_STATE_UNFORMED)
- continue;
- for (i = 0; i < kallsyms->num_symtab; i++) {
- const Elf_Sym *sym = &kallsyms->symtab[i];
-
- if (sym->st_shndx == SHN_UNDEF)
- continue;
-
- ret = fn(data, kallsyms_symbol_name(kallsyms, i),
- mod, kallsyms_symbol_value(sym));
- if (ret != 0)
- goto out;
- }
- }
-out:
- mutex_unlock(&module_mutex);
- return ret;
-}
-#endif /* CONFIG_LIVEPATCH */
-#endif /* CONFIG_KALLSYMS */
-
static void cfi_init(struct module *mod)
{
#ifdef CONFIG_CFI_CLANG
--
2.31.1


2021-12-29 08:59:04

by Aaron Tomlin

[permalink] [raw]
Subject: Re: [RFC PATCH 00/12] module: core code clean up

On Tue 2021-12-28 21:30 +0000, Aaron Tomlin wrote:
> Hi Luis, Allen,

Adding Allen.

--
Aaron Tomlin


2022-01-12 16:13:24

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [RFC PATCH 05/12] module: Move arch strict rwx support to a separate file

On Tue, Dec 28, 2021 at 09:30:34PM +0000, Aaron Tomlin wrote:
> diff --git a/kernel/module/Makefile b/kernel/module/Makefile
> index 9d593362156d..95fad95a0549 100644
> --- a/kernel/module/Makefile
> +++ b/kernel/module/Makefile
> @@ -8,3 +8,4 @@ obj-$(CONFIG_MODULE_SIG) += signing.o
> obj-$(CONFIG_MODULE_SIG_FORMAT) += signature.o
> obj-$(CONFIG_LIVEPATCH) += livepatch.o
> obj-$(CONFIG_MODULES_TREE_LOOKUP) += tree_lookup.o
> +obj-$(CONFIG_ARCH_HAS_STRICT_MODULE_RWX) += arch_strict_rwx.o
> diff --git a/kernel/module/arch_strict_rwx.c b/kernel/module/arch_strict_rwx.c
> new file mode 100644
> index 000000000000..68e970671ec5
> --- /dev/null
> +++ b/kernel/module/arch_strict_rwx.c
> @@ -0,0 +1,44 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * kernel/module/arch_strict_rwx.c - module arch strict rwx

No need to repeat the name of the file, a simple:

+ * module arch strict rwx

Would do it.

Please get 0day to test your git tree as you work on it, and before
posting patches ensure they get tested by 0day, so we run into less
issues once merged into modules-next.

Luis

2022-01-12 16:20:22

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [RFC PATCH 11/12] module: Move sysfs support into a separate file

On Tue, Dec 28, 2021 at 09:30:40PM +0000, Aaron Tomlin wrote:
> diff --git a/kernel/module/main.c b/kernel/module/main.c
> index 2da580c7b069..354fb2697188 100644
> --- a/kernel/module/main.c
> +++ b/kernel/module/main.c
> @@ -1017,7 +1015,7 @@ static ssize_t show_taint(struct module_attribute *mattr,
> static struct module_attribute modinfo_taint =
> __ATTR(taint, 0444, show_taint, NULL);
>
> -static struct module_attribute *modinfo_attrs[] = {
> +struct module_attribute *modinfo_attrs[] = {
> &module_uevent,
> &modinfo_version,
> &modinfo_srcversion,
> @@ -1031,6 +1029,8 @@ static struct module_attribute *modinfo_attrs[] = {
> NULL,
> };
>
> +size_t modinfo_attrs_count = ARRAY_SIZE(modinfo_attrs);
> +
> static const char vermagic[] = VERMAGIC_STRING;
>
> static int try_to_force_load(struct module *mod, const char *reason)

Can't modinfo_attrs and modinfo_attrs_count be left static and moved to
the new file?

Luis

2022-01-12 16:21:29

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [RFC PATCH 00/12] module: core code clean up

On Tue, Dec 28, 2021 at 09:30:29PM +0000, Aaron Tomlin wrote:
> Hi Luis, Allen,
>
> I had some free time so decided to make a quick start.
> There is more outstanding; albeit, I wanted to share what
> was accomplished thus far. Unfortunately, nothing has been
> thoroughly tested yet. Please let me know your thoughts.
>

This is looking good!

Luis