Hi,
This series implements the tracepoint events on modules.
This version separates a patch for tracepoint subsystem from
fprobe-event patch, and adds a selftests for tracepoint
events on modules.
Thank you,
---
Masami Hiramatsu (Google) (3):
tracepoint: Support iterating over tracepoints on modules
tracing/fprobe: Support raw tracepoint events on modules
sefltests/tracing: Add a test for tracepoint events on modules
include/linux/tracepoint.h | 7 +++
kernel/trace/trace_fprobe.c | 46 +++++++++++++++++---
kernel/tracepoint.c | 16 +++++++
tools/testing/selftests/ftrace/config | 1
.../test.d/dynevent/add_remove_tprobe_module.tc | 34 +++++++++++++++
5 files changed, 96 insertions(+), 8 deletions(-)
create mode 100644 tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc
--
Masami Hiramatsu (Google) <[email protected]>
From: Masami Hiramatsu (Google) <[email protected]>
Support raw tracepoint event on module by fprobe events.
Since it only uses for_each_kernel_tracepoint() to find a tracepoint,
the tracepoints on modules are not handled. Thus if user specified a
tracepoint on a module, it shows an error.
This adds new for_each_module_tracepoint() API to tracepoint subsystem,
and uses it to find tracepoints on modules.
Reported-by: don <[email protected]>
Closes: https://lore.kernel.org/all/[email protected]/
Signed-off-by: Masami Hiramatsu (Google) <[email protected]>
---
Changes in v2:
- Fix build errors with CONFIG_MODULES=y.
---
kernel/trace/trace_fprobe.c | 46 ++++++++++++++++++++++++++++++++++++-------
1 file changed, 38 insertions(+), 8 deletions(-)
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 62e6a8f4aae9..1d8a983e1edc 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -385,6 +385,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group,
const char *event,
const char *symbol,
struct tracepoint *tpoint,
+ struct module *mod,
int maxactive,
int nargs, bool is_return)
{
@@ -405,6 +406,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group,
tf->fp.entry_handler = fentry_dispatcher;
tf->tpoint = tpoint;
+ tf->mod = mod;
tf->fp.nr_maxactive = maxactive;
ret = trace_probe_init(&tf->tp, event, group, false, nargs);
@@ -895,8 +897,23 @@ static struct notifier_block tracepoint_module_nb = {
struct __find_tracepoint_cb_data {
const char *tp_name;
struct tracepoint *tpoint;
+ struct module *mod;
};
+static void __find_tracepoint_module_cb(struct tracepoint *tp, void *priv)
+{
+ struct __find_tracepoint_cb_data *data = priv;
+
+ if (!data->tpoint && !strcmp(data->tp_name, tp->name)) {
+ data->tpoint = tp;
+ data->mod = __module_text_address((unsigned long)tp->probestub);
+ if (!try_module_get(data->mod)) {
+ data->tpoint = NULL;
+ data->mod = NULL;
+ }
+ }
+}
+
static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
{
struct __find_tracepoint_cb_data *data = priv;
@@ -905,14 +922,28 @@ static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
data->tpoint = tp;
}
-static struct tracepoint *find_tracepoint(const char *tp_name)
+/*
+ * Find a tracepoint from kernel and module. If the tracepoint is in a module,
+ * this increments the module refcount to prevent unloading until the
+ * trace_fprobe is registered to the list. After registering the trace_fprobe
+ * on the trace_fprobe list, the module refcount is decremented because
+ * tracepoint_probe_module_cb will handle it.
+ */
+static struct tracepoint *find_tracepoint(const char *tp_name,
+ struct module **tp_mod)
{
struct __find_tracepoint_cb_data data = {
.tp_name = tp_name,
+ .mod = NULL,
};
for_each_kernel_tracepoint(__find_tracepoint_cb, &data);
+ if (!data.tpoint && IS_ENABLED(CONFIG_MODULES)) {
+ for_each_module_tracepoint(__find_tracepoint_module_cb, &data);
+ *tp_mod = data.mod;
+ }
+
return data.tpoint;
}
@@ -996,6 +1027,7 @@ static int __trace_fprobe_create(int argc, const char *argv[])
char abuf[MAX_BTF_ARGS_LEN];
char *dbuf = NULL;
bool is_tracepoint = false;
+ struct module *tp_mod = NULL;
struct tracepoint *tpoint = NULL;
struct traceprobe_parse_context ctx = {
.flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE,
@@ -1080,7 +1112,7 @@ static int __trace_fprobe_create(int argc, const char *argv[])
if (is_tracepoint) {
ctx.flags |= TPARG_FL_TPOINT;
- tpoint = find_tracepoint(symbol);
+ tpoint = find_tracepoint(symbol, &tp_mod);
if (!tpoint) {
trace_probe_log_set_index(1);
trace_probe_log_err(0, NO_TRACEPOINT);
@@ -1110,8 +1142,8 @@ static int __trace_fprobe_create(int argc, const char *argv[])
goto out;
/* setup a probe */
- tf = alloc_trace_fprobe(group, event, symbol, tpoint, maxactive,
- argc, is_return);
+ tf = alloc_trace_fprobe(group, event, symbol, tpoint, tp_mod,
+ maxactive, argc, is_return);
if (IS_ERR(tf)) {
ret = PTR_ERR(tf);
/* This must return -ENOMEM, else there is a bug */
@@ -1119,10 +1151,6 @@ static int __trace_fprobe_create(int argc, const char *argv[])
goto out; /* We know tf is not allocated */
}
- if (is_tracepoint)
- tf->mod = __module_text_address(
- (unsigned long)tf->tpoint->probestub);
-
/* parse arguments */
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
trace_probe_log_set_index(i + 2);
@@ -1155,6 +1183,8 @@ static int __trace_fprobe_create(int argc, const char *argv[])
}
out:
+ if (tp_mod)
+ module_put(tp_mod);
traceprobe_finish_parse(&ctx);
trace_probe_log_clear();
kfree(new_argv);
From: Masami Hiramatsu (Google) <[email protected]>
Add for_each_module_tracepoint() for iterating over tracepoints
on modules. This is similar to the for_each_kernel_tracepoint()
but only for the tracepoints on modules (not including kernel
built-in tracepoints).
Signed-off-by: Masami Hiramatsu (Google) <[email protected]>
---
include/linux/tracepoint.h | 7 +++++++
kernel/tracepoint.c | 16 ++++++++++++++++
2 files changed, 23 insertions(+)
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 689b6d71590e..46e6a5e759fd 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -65,6 +65,8 @@ struct tp_module {
bool trace_module_has_bad_taint(struct module *mod);
extern int register_tracepoint_module_notifier(struct notifier_block *nb);
extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *),
+ void *priv);
#else
static inline bool trace_module_has_bad_taint(struct module *mod)
{
@@ -80,6 +82,11 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb)
{
return 0;
}
+static inline
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *, void *),
+ void *priv)
+{
+}
#endif /* CONFIG_MODULES */
/*
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 8d1507dd0724..b9b90dc46ab1 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -735,6 +735,22 @@ static __init int init_tracepoints(void)
return ret;
}
__initcall(init_tracepoints);
+
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
+ void *priv)
+{
+ struct tp_module *tp_mod;
+ struct module *mod;
+
+ mutex_lock(&tracepoint_module_list_mutex);
+ list_for_each_entry(tp_mod, &tracepoint_module_list, list) {
+ mod = tp_mod->mod;
+ for_each_tracepoint_range(mod->tracepoints_ptrs,
+ mod->tracepoints_ptrs + mod->num_tracepoints,
+ fct, priv);
+ }
+ mutex_unlock(&tracepoint_module_list_mutex);
+}
#endif /* CONFIG_MODULES */
/**
From: Masami Hiramatsu (Google) <[email protected]>
Add a test case for tracepoint events on modules. This checks if it can add
and remove the events correctly.
Signed-off-by: Masami Hiramatsu (Google) <[email protected]>
---
tools/testing/selftests/ftrace/config | 1 +
.../test.d/dynevent/add_remove_tprobe_module.tc | 34 ++++++++++++++++++++
2 files changed, 35 insertions(+)
create mode 100644 tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc
diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config
index 048a312abf40..544de0db5f58 100644
--- a/tools/testing/selftests/ftrace/config
+++ b/tools/testing/selftests/ftrace/config
@@ -20,6 +20,7 @@ CONFIG_PREEMPT_TRACER=y
CONFIG_PROBE_EVENTS_BTF_ARGS=y
CONFIG_SAMPLES=y
CONFIG_SAMPLE_FTRACE_DIRECT=m
+CONFIG_SAMPLE_TRACE_EVENTS=m
CONFIG_SAMPLE_TRACE_PRINTK=m
CONFIG_SCHED_TRACER=y
CONFIG_STACK_TRACER=y
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc
new file mode 100644
index 000000000000..2caed9454caa
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc
@@ -0,0 +1,34 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove tracepoint probe events on module
+# requires: dynamic_events "t[:[<group>/][<event>]] <tracepoint> [<args>]":README
+
+rmmod trace-events-sample ||:
+if ! modprobe trace-events-sample ; then
+ echo "No trace-events sample module - please make CONFIG_SAMPLE_TRACE_EVENTS=m"
+ exit_unresolved;
+fi
+trap "rmmod trace-events-sample" EXIT
+
+echo 0 > events/enable
+echo > dynamic_events
+
+TRACEPOINT1=foo_bar
+TRACEPOINT2=foo_bar_with_cond
+
+echo "t:myevent1 $TRACEPOINT1" >> dynamic_events
+echo "t:myevent2 $TRACEPOINT2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+grep -q myevent2 dynamic_events
+test -d events/tracepoints/myevent1
+test -d events/tracepoints/myevent2
+
+echo "-:myevent2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+! grep -q myevent2 dynamic_events
+
+echo > dynamic_events
+
+clear_trace
On 2024-06-01 04:22, Masami Hiramatsu (Google) wrote:
> From: Masami Hiramatsu (Google) <[email protected]>
>
> Support raw tracepoint event on module by fprobe events.
> Since it only uses for_each_kernel_tracepoint() to find a tracepoint,
> the tracepoints on modules are not handled. Thus if user specified a
> tracepoint on a module, it shows an error.
> This adds new for_each_module_tracepoint() API to tracepoint subsystem,
> and uses it to find tracepoints on modules.
Hi Masami,
Why prevent module unload when a fprobe tracepoint is attached to a
module ? This changes the kernel's behavior significantly just for the
sake of instrumentation.
As an alternative, LTTng-modules attach/detach to/from modules with the
coming/going notifiers, so the instrumentation gets removed when a
module is unloaded rather than preventing its unload by holding a module
reference count. I would recommend a similar approach for fprobe.
Thanks,
Mathieu
>
> Reported-by: don <[email protected]>
> Closes: https://lore.kernel.org/all/[email protected]/
> Signed-off-by: Masami Hiramatsu (Google) <[email protected]>
> ---
> Changes in v2:
> - Fix build errors with CONFIG_MODULES=y.
> ---
> kernel/trace/trace_fprobe.c | 46 ++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 38 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
> index 62e6a8f4aae9..1d8a983e1edc 100644
> --- a/kernel/trace/trace_fprobe.c
> +++ b/kernel/trace/trace_fprobe.c
> @@ -385,6 +385,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group,
> const char *event,
> const char *symbol,
> struct tracepoint *tpoint,
> + struct module *mod,
> int maxactive,
> int nargs, bool is_return)
> {
> @@ -405,6 +406,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group,
> tf->fp.entry_handler = fentry_dispatcher;
>
> tf->tpoint = tpoint;
> + tf->mod = mod;
> tf->fp.nr_maxactive = maxactive;
>
> ret = trace_probe_init(&tf->tp, event, group, false, nargs);
> @@ -895,8 +897,23 @@ static struct notifier_block tracepoint_module_nb = {
> struct __find_tracepoint_cb_data {
> const char *tp_name;
> struct tracepoint *tpoint;
> + struct module *mod;
> };
>
> +static void __find_tracepoint_module_cb(struct tracepoint *tp, void *priv)
> +{
> + struct __find_tracepoint_cb_data *data = priv;
> +
> + if (!data->tpoint && !strcmp(data->tp_name, tp->name)) {
> + data->tpoint = tp;
> + data->mod = __module_text_address((unsigned long)tp->probestub);
> + if (!try_module_get(data->mod)) {
> + data->tpoint = NULL;
> + data->mod = NULL;
> + }
> + }
> +}
> +
> static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
> {
> struct __find_tracepoint_cb_data *data = priv;
> @@ -905,14 +922,28 @@ static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
> data->tpoint = tp;
> }
>
> -static struct tracepoint *find_tracepoint(const char *tp_name)
> +/*
> + * Find a tracepoint from kernel and module. If the tracepoint is in a module,
> + * this increments the module refcount to prevent unloading until the
> + * trace_fprobe is registered to the list. After registering the trace_fprobe
> + * on the trace_fprobe list, the module refcount is decremented because
> + * tracepoint_probe_module_cb will handle it.
> + */
> +static struct tracepoint *find_tracepoint(const char *tp_name,
> + struct module **tp_mod)
> {
> struct __find_tracepoint_cb_data data = {
> .tp_name = tp_name,
> + .mod = NULL,
> };
>
> for_each_kernel_tracepoint(__find_tracepoint_cb, &data);
>
> + if (!data.tpoint && IS_ENABLED(CONFIG_MODULES)) {
> + for_each_module_tracepoint(__find_tracepoint_module_cb, &data);
> + *tp_mod = data.mod;
> + }
> +
> return data.tpoint;
> }
>
> @@ -996,6 +1027,7 @@ static int __trace_fprobe_create(int argc, const char *argv[])
> char abuf[MAX_BTF_ARGS_LEN];
> char *dbuf = NULL;
> bool is_tracepoint = false;
> + struct module *tp_mod = NULL;
> struct tracepoint *tpoint = NULL;
> struct traceprobe_parse_context ctx = {
> .flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE,
> @@ -1080,7 +1112,7 @@ static int __trace_fprobe_create(int argc, const char *argv[])
>
> if (is_tracepoint) {
> ctx.flags |= TPARG_FL_TPOINT;
> - tpoint = find_tracepoint(symbol);
> + tpoint = find_tracepoint(symbol, &tp_mod);
> if (!tpoint) {
> trace_probe_log_set_index(1);
> trace_probe_log_err(0, NO_TRACEPOINT);
> @@ -1110,8 +1142,8 @@ static int __trace_fprobe_create(int argc, const char *argv[])
> goto out;
>
> /* setup a probe */
> - tf = alloc_trace_fprobe(group, event, symbol, tpoint, maxactive,
> - argc, is_return);
> + tf = alloc_trace_fprobe(group, event, symbol, tpoint, tp_mod,
> + maxactive, argc, is_return);
> if (IS_ERR(tf)) {
> ret = PTR_ERR(tf);
> /* This must return -ENOMEM, else there is a bug */
> @@ -1119,10 +1151,6 @@ static int __trace_fprobe_create(int argc, const char *argv[])
> goto out; /* We know tf is not allocated */
> }
>
> - if (is_tracepoint)
> - tf->mod = __module_text_address(
> - (unsigned long)tf->tpoint->probestub);
> -
> /* parse arguments */
> for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
> trace_probe_log_set_index(i + 2);
> @@ -1155,6 +1183,8 @@ static int __trace_fprobe_create(int argc, const char *argv[])
> }
>
> out:
> + if (tp_mod)
> + module_put(tp_mod);
> traceprobe_finish_parse(&ctx);
> trace_probe_log_clear();
> kfree(new_argv);
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
On Mon, 3 Jun 2024 15:50:55 -0400
Mathieu Desnoyers <[email protected]> wrote:
> Hi Masami,
>
> Why prevent module unload when a fprobe tracepoint is attached to a
> module ? This changes the kernel's behavior significantly just for the
> sake of instrumentation.
>
> As an alternative, LTTng-modules attach/detach to/from modules with the
> coming/going notifiers, so the instrumentation gets removed when a
> module is unloaded rather than preventing its unload by holding a module
> reference count. I would recommend a similar approach for fprobe.
I think one major difference between fprobes and LTTng module attachment,
is that fprobes is an internal mechanism used by other utilities (like BPF).
You could have a program loaded that expects an fprobe to succeed, and may
have undefined behavior if the fprobe suddenly disappears. That is, we
don't know what is depending on that fprobe to simply disable it on module
unload.
-- Steve
On Mon, 3 Jun 2024 15:50:55 -0400
Mathieu Desnoyers <[email protected]> wrote:
> On 2024-06-01 04:22, Masami Hiramatsu (Google) wrote:
> > From: Masami Hiramatsu (Google) <[email protected]>
> >
> > Support raw tracepoint event on module by fprobe events.
> > Since it only uses for_each_kernel_tracepoint() to find a tracepoint,
> > the tracepoints on modules are not handled. Thus if user specified a
> > tracepoint on a module, it shows an error.
> > This adds new for_each_module_tracepoint() API to tracepoint subsystem,
> > and uses it to find tracepoints on modules.
>
> Hi Masami,
>
> Why prevent module unload when a fprobe tracepoint is attached to a
> module ? This changes the kernel's behavior significantly just for the
> sake of instrumentation.
I don't prevent module unloading all the time, just before registering
tracepoint handler (something like booking a ticket :-) ).
See the last hunk of this patch, it puts the module before exiting
__trace_fprobe_create().
>
> As an alternative, LTTng-modules attach/detach to/from modules with the
> coming/going notifiers, so the instrumentation gets removed when a
> module is unloaded rather than preventing its unload by holding a module
> reference count. I would recommend a similar approach for fprobe.
Yes, since tracepoint subsystem provides a notifier API to notify the
tracepoint is gone, fprobe already uses it to find unloading and
unregister the target function. (see __tracepoint_probe_module_cb)
Thank you!
>
> Thanks,
>
> Mathieu
>
>
> >
> > Reported-by: don <[email protected]>
> > Closes: https://lore.kernel.org/all/[email protected]/
> > Signed-off-by: Masami Hiramatsu (Google) <[email protected]>
> > ---
> > Changes in v2:
> > - Fix build errors with CONFIG_MODULES=y.
> > ---
> > kernel/trace/trace_fprobe.c | 46 ++++++++++++++++++++++++++++++++++++-------
> > 1 file changed, 38 insertions(+), 8 deletions(-)
> >
> > diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
> > index 62e6a8f4aae9..1d8a983e1edc 100644
> > --- a/kernel/trace/trace_fprobe.c
> > +++ b/kernel/trace/trace_fprobe.c
> > @@ -385,6 +385,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group,
> > const char *event,
> > const char *symbol,
> > struct tracepoint *tpoint,
> > + struct module *mod,
> > int maxactive,
> > int nargs, bool is_return)
> > {
> > @@ -405,6 +406,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group,
> > tf->fp.entry_handler = fentry_dispatcher;
> >
> > tf->tpoint = tpoint;
> > + tf->mod = mod;
> > tf->fp.nr_maxactive = maxactive;
> >
> > ret = trace_probe_init(&tf->tp, event, group, false, nargs);
> > @@ -895,8 +897,23 @@ static struct notifier_block tracepoint_module_nb = {
> > struct __find_tracepoint_cb_data {
> > const char *tp_name;
> > struct tracepoint *tpoint;
> > + struct module *mod;
> > };
> >
> > +static void __find_tracepoint_module_cb(struct tracepoint *tp, void *priv)
> > +{
> > + struct __find_tracepoint_cb_data *data = priv;
> > +
> > + if (!data->tpoint && !strcmp(data->tp_name, tp->name)) {
> > + data->tpoint = tp;
> > + data->mod = __module_text_address((unsigned long)tp->probestub);
> > + if (!try_module_get(data->mod)) {
> > + data->tpoint = NULL;
> > + data->mod = NULL;
> > + }
> > + }
> > +}
> > +
> > static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
> > {
> > struct __find_tracepoint_cb_data *data = priv;
> > @@ -905,14 +922,28 @@ static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
> > data->tpoint = tp;
> > }
> >
> > -static struct tracepoint *find_tracepoint(const char *tp_name)
> > +/*
> > + * Find a tracepoint from kernel and module. If the tracepoint is in a module,
> > + * this increments the module refcount to prevent unloading until the
> > + * trace_fprobe is registered to the list. After registering the trace_fprobe
> > + * on the trace_fprobe list, the module refcount is decremented because
> > + * tracepoint_probe_module_cb will handle it.
> > + */
> > +static struct tracepoint *find_tracepoint(const char *tp_name,
> > + struct module **tp_mod)
> > {
> > struct __find_tracepoint_cb_data data = {
> > .tp_name = tp_name,
> > + .mod = NULL,
> > };
> >
> > for_each_kernel_tracepoint(__find_tracepoint_cb, &data);
> >
> > + if (!data.tpoint && IS_ENABLED(CONFIG_MODULES)) {
> > + for_each_module_tracepoint(__find_tracepoint_module_cb, &data);
> > + *tp_mod = data.mod;
> > + }
> > +
> > return data.tpoint;
> > }
> >
> > @@ -996,6 +1027,7 @@ static int __trace_fprobe_create(int argc, const char *argv[])
> > char abuf[MAX_BTF_ARGS_LEN];
> > char *dbuf = NULL;
> > bool is_tracepoint = false;
> > + struct module *tp_mod = NULL;
> > struct tracepoint *tpoint = NULL;
> > struct traceprobe_parse_context ctx = {
> > .flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE,
> > @@ -1080,7 +1112,7 @@ static int __trace_fprobe_create(int argc, const char *argv[])
> >
> > if (is_tracepoint) {
> > ctx.flags |= TPARG_FL_TPOINT;
> > - tpoint = find_tracepoint(symbol);
> > + tpoint = find_tracepoint(symbol, &tp_mod);
> > if (!tpoint) {
> > trace_probe_log_set_index(1);
> > trace_probe_log_err(0, NO_TRACEPOINT);
> > @@ -1110,8 +1142,8 @@ static int __trace_fprobe_create(int argc, const char *argv[])
> > goto out;
> >
> > /* setup a probe */
> > - tf = alloc_trace_fprobe(group, event, symbol, tpoint, maxactive,
> > - argc, is_return);
> > + tf = alloc_trace_fprobe(group, event, symbol, tpoint, tp_mod,
> > + maxactive, argc, is_return);
> > if (IS_ERR(tf)) {
> > ret = PTR_ERR(tf);
> > /* This must return -ENOMEM, else there is a bug */
> > @@ -1119,10 +1151,6 @@ static int __trace_fprobe_create(int argc, const char *argv[])
> > goto out; /* We know tf is not allocated */
> > }
> >
> > - if (is_tracepoint)
> > - tf->mod = __module_text_address(
> > - (unsigned long)tf->tpoint->probestub);
> > -
> > /* parse arguments */
> > for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
> > trace_probe_log_set_index(i + 2);
> > @@ -1155,6 +1183,8 @@ static int __trace_fprobe_create(int argc, const char *argv[])
> > }
> >
> > out:
> > + if (tp_mod)
> > + module_put(tp_mod);
> > traceprobe_finish_parse(&ctx);
> > trace_probe_log_clear();
> > kfree(new_argv);
> >
>
> --
> Mathieu Desnoyers
> EfficiOS Inc.
> https://www.efficios.com
>
--
Masami Hiramatsu (Google) <[email protected]>
On 2024-06-03 19:49, Masami Hiramatsu (Google) wrote:
> On Mon, 3 Jun 2024 15:50:55 -0400
> Mathieu Desnoyers <[email protected]> wrote:
>
>> On 2024-06-01 04:22, Masami Hiramatsu (Google) wrote:
>>> From: Masami Hiramatsu (Google) <[email protected]>
>>>
>>> Support raw tracepoint event on module by fprobe events.
>>> Since it only uses for_each_kernel_tracepoint() to find a tracepoint,
>>> the tracepoints on modules are not handled. Thus if user specified a
>>> tracepoint on a module, it shows an error.
>>> This adds new for_each_module_tracepoint() API to tracepoint subsystem,
>>> and uses it to find tracepoints on modules.
>>
>> Hi Masami,
>>
>> Why prevent module unload when a fprobe tracepoint is attached to a
>> module ? This changes the kernel's behavior significantly just for the
>> sake of instrumentation.
>
> I don't prevent module unloading all the time, just before registering
> tracepoint handler (something like booking a ticket :-) ).
> See the last hunk of this patch, it puts the module before exiting
> __trace_fprobe_create().
So at least this is transient, but I still have concerns, see below.
>>
>> As an alternative, LTTng-modules attach/detach to/from modules with the
>> coming/going notifiers, so the instrumentation gets removed when a
>> module is unloaded rather than preventing its unload by holding a module
>> reference count. I would recommend a similar approach for fprobe.
>
> Yes, since tracepoint subsystem provides a notifier API to notify the
> tracepoint is gone, fprobe already uses it to find unloading and
> unregister the target function. (see __tracepoint_probe_module_cb)
I see.
It looks like there are a few things we could improve there:
1) With your approach, modules need to be already loaded before
attaching an fprobe event to them. This effectively prevents
attaching to any module init code. Is there any way we could allow
this by implementing a module coming notifier in fprobe as well ?
This would require that fprobes are kept around in a data structure
that matches the modules when they are loaded in the coming notifier.
2) Given that the fprobe module going notifier is protected by the
event_mutex, can we use locking rather than reference counting
in fprobe attach to guarantee the target module is not reclaimed
concurrently ? This would remove the transient side-effect of
holding a module reference count which temporarily prevents module
unload.
Thanks,
Mathieu
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
On Tue, 4 Jun 2024 08:49:55 +0900
Masami Hiramatsu (Google) <[email protected]> wrote:
> On Mon, 3 Jun 2024 15:50:55 -0400
> Mathieu Desnoyers <[email protected]> wrote:
>
> > On 2024-06-01 04:22, Masami Hiramatsu (Google) wrote:
> > > From: Masami Hiramatsu (Google) <[email protected]>
> > >
> > > Support raw tracepoint event on module by fprobe events.
> > > Since it only uses for_each_kernel_tracepoint() to find a tracepoint,
> > > the tracepoints on modules are not handled. Thus if user specified a
> > > tracepoint on a module, it shows an error.
> > > This adds new for_each_module_tracepoint() API to tracepoint subsystem,
> > > and uses it to find tracepoints on modules.
> >
> > Hi Masami,
> >
> > Why prevent module unload when a fprobe tracepoint is attached to a
> > module ? This changes the kernel's behavior significantly just for the
> > sake of instrumentation.
>
> I don't prevent module unloading all the time, just before registering
> tracepoint handler (something like booking a ticket :-) ).
> See the last hunk of this patch, it puts the module before exiting
> __trace_fprobe_create().
>
> >
> > As an alternative, LTTng-modules attach/detach to/from modules with the
> > coming/going notifiers, so the instrumentation gets removed when a
> > module is unloaded rather than preventing its unload by holding a module
> > reference count. I would recommend a similar approach for fprobe.
>
> Yes, since tracepoint subsystem provides a notifier API to notify the
> tracepoint is gone, fprobe already uses it to find unloading and
> unregister the target function. (see __tracepoint_probe_module_cb)
>
Ah, it only prevents module unloading in __trace_fprobe_create()
> +static void __find_tracepoint_module_cb(struct tracepoint *tp, void *priv)
> +{
> + struct __find_tracepoint_cb_data *data = priv;
> +
> + if (!data->tpoint && !strcmp(data->tp_name, tp->name)) {
> + data->tpoint = tp;
> + data->mod = __module_text_address((unsigned long)tp->probestub);
> + if (!try_module_get(data->mod)) {
Here it gets the module. Should only happen once, as it sets data->tpoint.
> + data->tpoint = NULL;
> + data->mod = NULL;
> + }
> + }
> +}
> +
> static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
> {
> struct __find_tracepoint_cb_data *data = priv;
> @@ -905,14 +922,28 @@ static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
> data->tpoint = tp;
> }
>
> -static struct tracepoint *find_tracepoint(const char *tp_name)
> +/*
> + * Find a tracepoint from kernel and module. If the tracepoint is in a module,
> + * this increments the module refcount to prevent unloading until the
> + * trace_fprobe is registered to the list. After registering the trace_fprobe
> + * on the trace_fprobe list, the module refcount is decremented because
> + * tracepoint_probe_module_cb will handle it.
> + */
> +static struct tracepoint *find_tracepoint(const char *tp_name,
> + struct module **tp_mod)
> {
> struct __find_tracepoint_cb_data data = {
> .tp_name = tp_name,
> + .mod = NULL,
> };
>
> for_each_kernel_tracepoint(__find_tracepoint_cb, &data);
>
> + if (!data.tpoint && IS_ENABLED(CONFIG_MODULES)) {
> + for_each_module_tracepoint(__find_tracepoint_module_cb, &data);
> + *tp_mod = data.mod;
> + }
> +
> return data.tpoint;
> }
>
> @@ -996,6 +1027,7 @@ static int __trace_fprobe_create(int argc, const char *argv[])
> char abuf[MAX_BTF_ARGS_LEN];
> char *dbuf = NULL;
> bool is_tracepoint = false;
> + struct module *tp_mod = NULL;
> struct tracepoint *tpoint = NULL;
> struct traceprobe_parse_context ctx = {
> .flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE,
> @@ -1080,7 +1112,7 @@ static int __trace_fprobe_create(int argc, const char *argv[])
>
> if (is_tracepoint) {
> ctx.flags |= TPARG_FL_TPOINT;
> - tpoint = find_tracepoint(symbol);
> + tpoint = find_tracepoint(symbol, &tp_mod);
> if (!tpoint) {
> trace_probe_log_set_index(1);
> trace_probe_log_err(0, NO_TRACEPOINT);
> @@ -1110,8 +1142,8 @@ static int __trace_fprobe_create(int argc, const char *argv[])
> goto out;
>
> /* setup a probe */
> - tf = alloc_trace_fprobe(group, event, symbol, tpoint, maxactive,
> - argc, is_return);
> + tf = alloc_trace_fprobe(group, event, symbol, tpoint, tp_mod,
> + maxactive, argc, is_return);
> if (IS_ERR(tf)) {
> ret = PTR_ERR(tf);
> /* This must return -ENOMEM, else there is a bug */
> @@ -1119,10 +1151,6 @@ static int __trace_fprobe_create(int argc, const char *argv[])
> goto out; /* We know tf is not allocated */
> }
>
> - if (is_tracepoint)
> - tf->mod = __module_text_address(
> - (unsigned long)tf->tpoint->probestub);
> -
> /* parse arguments */
> for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
> trace_probe_log_set_index(i + 2);
> @@ -1155,6 +1183,8 @@ static int __trace_fprobe_create(int argc, const char *argv[])
> }
>
> out:
> + if (tp_mod)
> + module_put(tp_mod);
And on the way out, it puts it.
-- Steve
> traceprobe_finish_parse(&ctx);
> trace_probe_log_clear();
> kfree(new_argv);
On Sat, 1 Jun 2024 17:22:55 +0900
"Masami Hiramatsu (Google)" <[email protected]> wrote:
> From: Masami Hiramatsu (Google) <[email protected]>
>
> Add a test case for tracepoint events on modules. This checks if it can add
> and remove the events correctly.
>
> Signed-off-by: Masami Hiramatsu (Google) <[email protected]>
Reviewed-by: Steven Rostedt (Google) <[email protected]>
-- Steve
On Tue, 4 Jun 2024 11:02:16 -0400
Mathieu Desnoyers <[email protected]> wrote:
> I see.
>
> It looks like there are a few things we could improve there:
>
> 1) With your approach, modules need to be already loaded before
> attaching an fprobe event to them. This effectively prevents
> attaching to any module init code. Is there any way we could allow
> this by implementing a module coming notifier in fprobe as well ?
> This would require that fprobes are kept around in a data structure
> that matches the modules when they are loaded in the coming notifier.
The above sounds like a nice enhancement, but not something necessary for
this series.
>
> 2) Given that the fprobe module going notifier is protected by the
> event_mutex, can we use locking rather than reference counting
> in fprobe attach to guarantee the target module is not reclaimed
> concurrently ? This would remove the transient side-effect of
> holding a module reference count which temporarily prevents module
> unload.
Why do we care about unloading modules during the transition? Note, module
unload has always been considered a second class citizen, and there's been
talks in the past to even rip it out.
-- Steve
On 2024-06-04 12:34, Steven Rostedt wrote:
> On Tue, 4 Jun 2024 11:02:16 -0400
> Mathieu Desnoyers <[email protected]> wrote:
>
>> I see.
>>
>> It looks like there are a few things we could improve there:
>>
>> 1) With your approach, modules need to be already loaded before
>> attaching an fprobe event to them. This effectively prevents
>> attaching to any module init code. Is there any way we could allow
>> this by implementing a module coming notifier in fprobe as well ?
>> This would require that fprobes are kept around in a data structure
>> that matches the modules when they are loaded in the coming notifier.
>
> The above sounds like a nice enhancement, but not something necessary for
> this series.
IMHO it is nevertheless relevant to discuss the impact of supporting
this kind of use-case on the ABI presented to userspace, at least to
validate that what is exposed today can incrementally be enhanced
towards that goal.
I'm not saying that it needs to be implemented today, but we should
at least give it some thoughts right now to make sure the ABI is a
good fit.
>>
>> 2) Given that the fprobe module going notifier is protected by the
>> event_mutex, can we use locking rather than reference counting
>> in fprobe attach to guarantee the target module is not reclaimed
>> concurrently ? This would remove the transient side-effect of
>> holding a module reference count which temporarily prevents module
>> unload.
>
> Why do we care about unloading modules during the transition? Note, module
> unload has always been considered a second class citizen, and there's been
> talks in the past to even rip it out.
As a general rule I try to ensure tracing has as little impact on the
system behavior so issues that occur without tracing can be reproduced
with instrumentation.
On systems where modules are loaded/unloaded with udev, holding
references on modules can spuriously prevent module unload, which
as a consequence changes the system behavior.
About the relative importance of the various kernel subsystems,
following your reasoning that module unload is considered a
second-class citizen within the kernel, I would argue that tracing
is a third-class citizen and should not needlessly modify the
behavior of classes above it.
Thanks,
Mathieu
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com