2022-10-20 00:44:25

by Steven Rostedt

[permalink] [raw]
Subject: [PATCH] tracing: Add trace_trigger kernel command line option

From: "Steven Rostedt (Google)" <[email protected]>

Allow triggers to be enabled at kernel boot up. For example:

trace_trigger="sched_switch.stacktrace if prev_state == 2"

The above will enable the stacktrace trigger on top of the sched_switch
event and only trigger if its prev_state is 2 (TASK_UNINTERRUPTIBLE). Then
at boot up, a stacktrace will trigger and be recorded in the tracing ring
buffer every time the sched_switch happens where the previous state is
TASK_INTERRUPTIBLE.

As this calls into tracepoint logic during very early boot (before
interrupts are enabled), a check has to be done to see if early boot
interrupts are still disabled, and if so, avoid any call to RCU
synchronization, as that will enable interrupts and cause boot up issues.

Signed-off-by: Steven Rostedt (Google) <[email protected]>
---
.../admin-guide/kernel-parameters.txt | 19 ++++++
include/linux/tracepoint.h | 4 ++
kernel/trace/trace.c | 3 +-
kernel/trace/trace_events.c | 63 ++++++++++++++++++-
kernel/tracepoint.c | 6 ++
5 files changed, 92 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a465d5242774..ccf91a4bf113 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6257,6 +6257,25 @@
See also Documentation/trace/ftrace.rst "trace options"
section.

+ trace_trigger=[trigger-list]
+ [FTRACE] Add a event trigger on specific events.
+ Set a trigger on top of a specific event, with an optional
+ filter.
+
+ The format is is "trace_trigger=<event>.<trigger>[ if <filter>],..."
+ Where more than one trigger may be specified that are comma deliminated.
+
+ For example:
+
+ trace_trigger="sched_switch.stacktrace if prev_state == 2"
+
+ The above will enable the "stacktrace" trigger on the "sched_switch"
+ event but only trigger it if the "prev_state" of the "sched_switch"
+ event is "2" (TASK_UNINTERUPTIBLE).
+
+ See also "Event triggers" in Documentation/trace/events.rst
+
+
traceoff_on_warning
[FTRACE] enable this option to disable tracing when a
warning is hit. This turns off "tracing_on". Tracing can
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 4b33b95eb8be..a5c6b5772897 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -90,6 +90,10 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb)
#ifdef CONFIG_TRACEPOINTS
static inline void tracepoint_synchronize_unregister(void)
{
+ /* Early updates do not need synchronization */
+ if (early_boot_irqs_disabled)
+ return;
+
synchronize_srcu(&tracepoint_srcu);
synchronize_rcu();
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 47a44b055a1d..c03fd7037add 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2749,7 +2749,8 @@ void trace_buffered_event_disable(void)
preempt_enable();

/* Wait for all current users to finish */
- synchronize_rcu();
+ if (!early_boot_irqs_disabled)
+ synchronize_rcu();

for_each_tracing_cpu(cpu) {
free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 0356cae0cf74..06554939252c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2796,6 +2796,44 @@ trace_create_new_event(struct trace_event_call *call,
return file;
}

+#ifdef CONFIG_HIST_TRIGGERS
+#define MAX_BOOT_TRIGGERS 32
+
+static struct boot_triggers {
+ const char *event;
+ char *trigger;
+} bootup_triggers[MAX_BOOT_TRIGGERS];
+
+static char bootup_trigger_buf[COMMAND_LINE_SIZE];
+static int nr_boot_triggers;
+
+static __init int setup_trace_triggers(char *str)
+{
+ char *trigger;
+ char *buf;
+ int i;
+
+ strlcpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE);
+ ring_buffer_expanded = true;
+ disable_tracing_selftest("running event triggers");
+
+ buf = bootup_trigger_buf;
+ for (i = 0; i < MAX_BOOT_TRIGGERS; i++) {
+ trigger = strsep(&buf, ",");
+ if (!trigger)
+ break;
+ bootup_triggers[i].event = strsep(&trigger, ".");
+ bootup_triggers[i].trigger = strsep(&trigger, ".");
+ if (!bootup_triggers[i].trigger)
+ break;
+ }
+
+ nr_boot_triggers = i;
+ return 1;
+}
+__setup("trace_trigger=", setup_trace_triggers);
+#endif
+
/* Add an event to a trace directory */
static int
__trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
@@ -2822,12 +2860,32 @@ __trace_early_add_new_event(struct trace_event_call *call,
struct trace_array *tr)
{
struct trace_event_file *file;
+ int ret;
+ int i;

file = trace_create_new_event(call, tr);
if (!file)
return -ENOMEM;

- return event_define_fields(call);
+ ret = event_define_fields(call);
+ if (ret)
+ return ret;
+
+#ifdef CONFIG_HIST_TRIGGERS
+ for (i = 0; i < nr_boot_triggers; i++) {
+ if (strcmp(trace_event_name(call), bootup_triggers[i].event))
+ continue;
+ mutex_lock(&event_mutex);
+ ret = trigger_process_regex(file, bootup_triggers[i].trigger);
+ mutex_unlock(&event_mutex);
+ if (ret)
+ pr_err("Failed to register trigger '%s' on event %s\n",
+ bootup_triggers[i].trigger,
+ bootup_triggers[i].event);
+ }
+#endif
+
+ return 0;
}

struct ftrace_module_file_ops;
@@ -3726,6 +3784,8 @@ static __init int event_trace_enable(void)
list_add(&call->list, &ftrace_events);
}

+ register_trigger_cmds();
+
/*
* We need the top trace array to have a working set of trace
* points at early init, before the debug files and directories
@@ -3740,7 +3800,6 @@ static __init int event_trace_enable(void)

register_event_cmds();

- register_trigger_cmds();

return 0;
}
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index f23144af5743..f6e4ee1e40b3 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -48,6 +48,9 @@ static void tp_rcu_get_state(enum tp_transition_sync sync)
{
struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync];

+ if (early_boot_irqs_disabled)
+ return;
+
/* Keep the latest get_state snapshot. */
snapshot->rcu = get_state_synchronize_rcu();
snapshot->srcu = start_poll_synchronize_srcu(&tracepoint_srcu);
@@ -58,6 +61,9 @@ static void tp_rcu_cond_sync(enum tp_transition_sync sync)
{
struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync];

+ if (early_boot_irqs_disabled)
+ return;
+
if (!snapshot->ongoing)
return;
cond_synchronize_rcu(snapshot->rcu);
--
2.35.1


2022-10-20 08:26:28

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] tracing: Add trace_trigger kernel command line option

Hi Steven,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on akpm-mm/mm-everything]
[also build test WARNING on linus/master v6.1-rc1 next-20221020]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Steven-Rostedt/tracing-Add-trace_trigger-kernel-command-line-option/20221020-102230
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link: https://lore.kernel.org/r/20221019200137.70343645%40gandalf.local.home
patch subject: [PATCH] tracing: Add trace_trigger kernel command line option
config: i386-randconfig-a001
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/476ab107174fa2ac06ef7171b093235382008c77
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Steven-Rostedt/tracing-Add-trace_trigger-kernel-command-line-option/20221020-102230
git checkout 476ab107174fa2ac06ef7171b093235382008c77
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash kernel/trace/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>

All warnings (new ones prefixed by >>):

kernel/trace/trace_events.c: In function '__trace_early_add_new_event':
>> kernel/trace/trace_events.c:2864:13: warning: unused variable 'i' [-Wunused-variable]
2864 | int i;
| ^


vim +/i +2864 kernel/trace/trace_events.c

2852
2853 /*
2854 * Just create a descriptor for early init. A descriptor is required
2855 * for enabling events at boot. We want to enable events before
2856 * the filesystem is initialized.
2857 */
2858 static int
2859 __trace_early_add_new_event(struct trace_event_call *call,
2860 struct trace_array *tr)
2861 {
2862 struct trace_event_file *file;
2863 int ret;
> 2864 int i;
2865
2866 file = trace_create_new_event(call, tr);
2867 if (!file)
2868 return -ENOMEM;
2869
2870 ret = event_define_fields(call);
2871 if (ret)
2872 return ret;
2873

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (2.48 kB)
config (155.14 kB)
Download all attachments

2022-10-20 08:56:54

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] tracing: Add trace_trigger kernel command line option

Hi Steven,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on akpm-mm/mm-everything]
[also build test ERROR on linus/master v6.1-rc1 next-20221020]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Steven-Rostedt/tracing-Add-trace_trigger-kernel-command-line-option/20221020-102230
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link: https://lore.kernel.org/r/20221019200137.70343645%40gandalf.local.home
patch subject: [PATCH] tracing: Add trace_trigger kernel command line option
config: s390-allmodconfig
compiler: s390-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/476ab107174fa2ac06ef7171b093235382008c77
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Steven-Rostedt/tracing-Add-trace_trigger-kernel-command-line-option/20221020-102230
git checkout 476ab107174fa2ac06ef7171b093235382008c77
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=s390 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>, old ones prefixed by <<):

>> ERROR: modpost: "early_boot_irqs_disabled" [mm/kasan/kasan_test.ko] undefined!
>> ERROR: modpost: "early_boot_irqs_disabled" [mm/kfence/kfence_test.ko] undefined!
ERROR: modpost: "devm_ioremap_resource" [drivers/dma/qcom/hdma.ko] undefined!
ERROR: modpost: "devm_ioremap_resource" [drivers/dma/fsl-edma.ko] undefined!
ERROR: modpost: "devm_ioremap_resource" [drivers/dma/idma64.ko] undefined!
ERROR: modpost: "iounmap" [drivers/tty/ipwireless/ipwireless.ko] undefined!
ERROR: modpost: "ioremap" [drivers/tty/ipwireless/ipwireless.ko] undefined!
ERROR: modpost: "devm_platform_ioremap_resource" [drivers/char/xillybus/xillybus_of.ko] undefined!
ERROR: modpost: "devm_memremap" [drivers/misc/open-dice.ko] undefined!
ERROR: modpost: "devm_memunmap" [drivers/misc/open-dice.ko] undefined!
WARNING: modpost: suppressed 20 unresolved symbol warnings because there were too many)

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (2.66 kB)
config (123.64 kB)
Download all attachments

2022-10-20 14:43:47

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH] tracing: Add trace_trigger kernel command line option

On Wed, 19 Oct 2022 20:01:37 -0400
Steven Rostedt <[email protected]> wrote:

> From: "Steven Rostedt (Google)" <[email protected]>
>
> Allow triggers to be enabled at kernel boot up. For example:
>
> trace_trigger="sched_switch.stacktrace if prev_state == 2"
>
> The above will enable the stacktrace trigger on top of the sched_switch
> event and only trigger if its prev_state is 2 (TASK_UNINTERRUPTIBLE). Then
> at boot up, a stacktrace will trigger and be recorded in the tracing ring
> buffer every time the sched_switch happens where the previous state is
> TASK_INTERRUPTIBLE.
>
> As this calls into tracepoint logic during very early boot (before
> interrupts are enabled), a check has to be done to see if early boot
> interrupts are still disabled, and if so, avoid any call to RCU
> synchronization, as that will enable interrupts and cause boot up issues.

Just out of curiousity, can you do it by boot-time tracer?
(Is it too late for your issue?)

$ cat >> stacktrace.bconf
ftrace.event.sched.sched_switch.actions = "stacktrace if prev_state == 2"
^D
$ bootconfig -a stacktrace.bconf initrd.img

And boot kernel with "bootconfig".
Then,

----------
init-1 [000] d..3. 0.546668: <stack trace>
=> trace_event_raw_event_sched_switch
=> __traceiter_sched_switch
=> __schedule
=> schedule
=> schedule_timeout
=> wait_for_completion_killable
=> __kthread_create_on_node
=> kthread_create_on_node
=> audit_init
=> do_one_initcall
=> kernel_init_freeable
=> kernel_init
=> ret_from_fork
kauditd-57 [007] d..3. 0.546677: <stack trace>
=> trace_event_raw_event_sched_switch
=> __traceiter_sched_switch
=> __schedule
=> schedule
=> schedule_preempt_disabled
=> kthread
=> ret_from_fork
----------

Thank you,

>
> Signed-off-by: Steven Rostedt (Google) <[email protected]>
> ---
> .../admin-guide/kernel-parameters.txt | 19 ++++++
> include/linux/tracepoint.h | 4 ++
> kernel/trace/trace.c | 3 +-
> kernel/trace/trace_events.c | 63 ++++++++++++++++++-
> kernel/tracepoint.c | 6 ++
> 5 files changed, 92 insertions(+), 3 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index a465d5242774..ccf91a4bf113 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -6257,6 +6257,25 @@
> See also Documentation/trace/ftrace.rst "trace options"
> section.
>
> + trace_trigger=[trigger-list]
> + [FTRACE] Add a event trigger on specific events.
> + Set a trigger on top of a specific event, with an optional
> + filter.
> +
> + The format is is "trace_trigger=<event>.<trigger>[ if <filter>],..."
> + Where more than one trigger may be specified that are comma deliminated.
> +
> + For example:
> +
> + trace_trigger="sched_switch.stacktrace if prev_state == 2"
> +
> + The above will enable the "stacktrace" trigger on the "sched_switch"
> + event but only trigger it if the "prev_state" of the "sched_switch"
> + event is "2" (TASK_UNINTERUPTIBLE).
> +
> + See also "Event triggers" in Documentation/trace/events.rst
> +
> +
> traceoff_on_warning
> [FTRACE] enable this option to disable tracing when a
> warning is hit. This turns off "tracing_on". Tracing can
> diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
> index 4b33b95eb8be..a5c6b5772897 100644
> --- a/include/linux/tracepoint.h
> +++ b/include/linux/tracepoint.h
> @@ -90,6 +90,10 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb)
> #ifdef CONFIG_TRACEPOINTS
> static inline void tracepoint_synchronize_unregister(void)
> {
> + /* Early updates do not need synchronization */
> + if (early_boot_irqs_disabled)
> + return;
> +
> synchronize_srcu(&tracepoint_srcu);
> synchronize_rcu();
> }
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 47a44b055a1d..c03fd7037add 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -2749,7 +2749,8 @@ void trace_buffered_event_disable(void)
> preempt_enable();
>
> /* Wait for all current users to finish */
> - synchronize_rcu();
> + if (!early_boot_irqs_disabled)
> + synchronize_rcu();
>
> for_each_tracing_cpu(cpu) {
> free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index 0356cae0cf74..06554939252c 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -2796,6 +2796,44 @@ trace_create_new_event(struct trace_event_call *call,
> return file;
> }
>
> +#ifdef CONFIG_HIST_TRIGGERS
> +#define MAX_BOOT_TRIGGERS 32
> +
> +static struct boot_triggers {
> + const char *event;
> + char *trigger;
> +} bootup_triggers[MAX_BOOT_TRIGGERS];
> +
> +static char bootup_trigger_buf[COMMAND_LINE_SIZE];
> +static int nr_boot_triggers;
> +
> +static __init int setup_trace_triggers(char *str)
> +{
> + char *trigger;
> + char *buf;
> + int i;
> +
> + strlcpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE);
> + ring_buffer_expanded = true;
> + disable_tracing_selftest("running event triggers");
> +
> + buf = bootup_trigger_buf;
> + for (i = 0; i < MAX_BOOT_TRIGGERS; i++) {
> + trigger = strsep(&buf, ",");
> + if (!trigger)
> + break;
> + bootup_triggers[i].event = strsep(&trigger, ".");
> + bootup_triggers[i].trigger = strsep(&trigger, ".");
> + if (!bootup_triggers[i].trigger)
> + break;
> + }
> +
> + nr_boot_triggers = i;
> + return 1;
> +}
> +__setup("trace_trigger=", setup_trace_triggers);
> +#endif
> +
> /* Add an event to a trace directory */
> static int
> __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
> @@ -2822,12 +2860,32 @@ __trace_early_add_new_event(struct trace_event_call *call,
> struct trace_array *tr)
> {
> struct trace_event_file *file;
> + int ret;
> + int i;
>
> file = trace_create_new_event(call, tr);
> if (!file)
> return -ENOMEM;
>
> - return event_define_fields(call);
> + ret = event_define_fields(call);
> + if (ret)
> + return ret;
> +
> +#ifdef CONFIG_HIST_TRIGGERS
> + for (i = 0; i < nr_boot_triggers; i++) {
> + if (strcmp(trace_event_name(call), bootup_triggers[i].event))
> + continue;
> + mutex_lock(&event_mutex);
> + ret = trigger_process_regex(file, bootup_triggers[i].trigger);
> + mutex_unlock(&event_mutex);
> + if (ret)
> + pr_err("Failed to register trigger '%s' on event %s\n",
> + bootup_triggers[i].trigger,
> + bootup_triggers[i].event);
> + }
> +#endif
> +
> + return 0;
> }
>
> struct ftrace_module_file_ops;
> @@ -3726,6 +3784,8 @@ static __init int event_trace_enable(void)
> list_add(&call->list, &ftrace_events);
> }
>
> + register_trigger_cmds();
> +
> /*
> * We need the top trace array to have a working set of trace
> * points at early init, before the debug files and directories
> @@ -3740,7 +3800,6 @@ static __init int event_trace_enable(void)
>
> register_event_cmds();
>
> - register_trigger_cmds();
>
> return 0;
> }
> diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
> index f23144af5743..f6e4ee1e40b3 100644
> --- a/kernel/tracepoint.c
> +++ b/kernel/tracepoint.c
> @@ -48,6 +48,9 @@ static void tp_rcu_get_state(enum tp_transition_sync sync)
> {
> struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync];
>
> + if (early_boot_irqs_disabled)
> + return;
> +
> /* Keep the latest get_state snapshot. */
> snapshot->rcu = get_state_synchronize_rcu();
> snapshot->srcu = start_poll_synchronize_srcu(&tracepoint_srcu);
> @@ -58,6 +61,9 @@ static void tp_rcu_cond_sync(enum tp_transition_sync sync)
> {
> struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync];
>
> + if (early_boot_irqs_disabled)
> + return;
> +
> if (!snapshot->ongoing)
> return;
> cond_synchronize_rcu(snapshot->rcu);
> --
> 2.35.1
>


--
Masami Hiramatsu (Google) <[email protected]>

2022-10-20 15:35:43

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH] tracing: Add trace_trigger kernel command line option

On Thu, 20 Oct 2022 23:33:18 +0900
Masami Hiramatsu (Google) <[email protected]> wrote:

> On Wed, 19 Oct 2022 20:01:37 -0400
> Steven Rostedt <[email protected]> wrote:
>
> > From: "Steven Rostedt (Google)" <[email protected]>
> >
> > Allow triggers to be enabled at kernel boot up. For example:
> >
> > trace_trigger="sched_switch.stacktrace if prev_state == 2"
> >
> > The above will enable the stacktrace trigger on top of the sched_switch
> > event and only trigger if its prev_state is 2 (TASK_UNINTERRUPTIBLE). Then
> > at boot up, a stacktrace will trigger and be recorded in the tracing ring
> > buffer every time the sched_switch happens where the previous state is
> > TASK_INTERRUPTIBLE.
> >
> > As this calls into tracepoint logic during very early boot (before
> > interrupts are enabled), a check has to be done to see if early boot
> > interrupts are still disabled, and if so, avoid any call to RCU
> > synchronization, as that will enable interrupts and cause boot up issues.
>
> Just out of curiousity, can you do it by boot-time tracer?
> (Is it too late for your issue?)

Yeah, I'm looking at adding triggers very early. I could even add
"traceoff" trigger too here. I'm guessing that bootconf is done later in
boot up? Especially since I needed to fix RCU calls to get this working.

>
> $ cat >> stacktrace.bconf
> ftrace.event.sched.sched_switch.actions = "stacktrace if prev_state == 2"
> ^D
> $ bootconfig -a stacktrace.bconf initrd.img
>

Not to mention, I'm doing this for Chromebooks where it's easy to update
the command line (on dev devices), but not as easy to modify the kernel.

-- Steve

2022-10-20 15:36:26

by Joel Fernandes

[permalink] [raw]
Subject: Re: [PATCH] tracing: Add trace_trigger kernel command line option

Hi Steven,

On Wed, Oct 19, 2022 at 8:01 PM Steven Rostedt <[email protected]> wrote:
>
> From: "Steven Rostedt (Google)" <[email protected]>
>
> Allow triggers to be enabled at kernel boot up. For example:
>
> trace_trigger="sched_switch.stacktrace if prev_state == 2"
>
> The above will enable the stacktrace trigger on top of the sched_switch
> event and only trigger if its prev_state is 2 (TASK_UNINTERRUPTIBLE). Then
> at boot up, a stacktrace will trigger and be recorded in the tracing ring
> buffer every time the sched_switch happens where the previous state is
> TASK_INTERRUPTIBLE.
>
> As this calls into tracepoint logic during very early boot (before
> interrupts are enabled), a check has to be done to see if early boot
> interrupts are still disabled, and if so, avoid any call to RCU
> synchronization, as that will enable interrupts and cause boot up issues.
>
> Signed-off-by: Steven Rostedt (Google) <[email protected]>
> ---

rest_init() -> rcu_scheduler_starting() sets RCU_SCHEDULER_INIT

start_kernel() disables interrupts and does the dance you mentioned,
while setting that early_ flag. start_kernel() is called from arch
code.

From your crash, the trace trigger is happening before the early_ boot
flag is set to true.
rcu_blocking_is_gp() should return true at the point your trigger
fires because RCU_SCHEDULER_INACTIVE is still set.

So I would expect your synchronize_rcu() to not do anything.

There's comments in rcu_blocking_is_gp() like so:
* During early boot, any blocking grace-period wait automatically
* implies a grace period.

So it should be returning straight from here. I wonder where/why it's
re-enabling interrupts for you. What does rcu_blocking_is_gp() return
if you can print that, at the time of the crash?

Cheers,

- J


> .../admin-guide/kernel-parameters.txt | 19 ++++++
> include/linux/tracepoint.h | 4 ++
> kernel/trace/trace.c | 3 +-
> kernel/trace/trace_events.c | 63 ++++++++++++++++++-
> kernel/tracepoint.c | 6 ++
> 5 files changed, 92 insertions(+), 3 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index a465d5242774..ccf91a4bf113 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -6257,6 +6257,25 @@
> See also Documentation/trace/ftrace.rst "trace options"
> section.
>
> + trace_trigger=[trigger-list]
> + [FTRACE] Add a event trigger on specific events.
> + Set a trigger on top of a specific event, with an optional
> + filter.
> +
> + The format is is "trace_trigger=<event>.<trigger>[ if <filter>],..."
> + Where more than one trigger may be specified that are comma deliminated.
> +
> + For example:
> +
> + trace_trigger="sched_switch.stacktrace if prev_state == 2"
> +
> + The above will enable the "stacktrace" trigger on the "sched_switch"
> + event but only trigger it if the "prev_state" of the "sched_switch"
> + event is "2" (TASK_UNINTERUPTIBLE).
> +
> + See also "Event triggers" in Documentation/trace/events.rst
> +
> +
> traceoff_on_warning
> [FTRACE] enable this option to disable tracing when a
> warning is hit. This turns off "tracing_on". Tracing can
> diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
> index 4b33b95eb8be..a5c6b5772897 100644
> --- a/include/linux/tracepoint.h
> +++ b/include/linux/tracepoint.h
> @@ -90,6 +90,10 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb)
> #ifdef CONFIG_TRACEPOINTS
> static inline void tracepoint_synchronize_unregister(void)
> {
> + /* Early updates do not need synchronization */
> + if (early_boot_irqs_disabled)
> + return;
> +
> synchronize_srcu(&tracepoint_srcu);
> synchronize_rcu();
> }
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 47a44b055a1d..c03fd7037add 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -2749,7 +2749,8 @@ void trace_buffered_event_disable(void)
> preempt_enable();
>
> /* Wait for all current users to finish */
> - synchronize_rcu();
> + if (!early_boot_irqs_disabled)
> + synchronize_rcu();
>
> for_each_tracing_cpu(cpu) {
> free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index 0356cae0cf74..06554939252c 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -2796,6 +2796,44 @@ trace_create_new_event(struct trace_event_call *call,
> return file;
> }
>
> +#ifdef CONFIG_HIST_TRIGGERS
> +#define MAX_BOOT_TRIGGERS 32
> +
> +static struct boot_triggers {
> + const char *event;
> + char *trigger;
> +} bootup_triggers[MAX_BOOT_TRIGGERS];
> +
> +static char bootup_trigger_buf[COMMAND_LINE_SIZE];
> +static int nr_boot_triggers;
> +
> +static __init int setup_trace_triggers(char *str)
> +{
> + char *trigger;
> + char *buf;
> + int i;
> +
> + strlcpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE);
> + ring_buffer_expanded = true;
> + disable_tracing_selftest("running event triggers");
> +
> + buf = bootup_trigger_buf;
> + for (i = 0; i < MAX_BOOT_TRIGGERS; i++) {
> + trigger = strsep(&buf, ",");
> + if (!trigger)
> + break;
> + bootup_triggers[i].event = strsep(&trigger, ".");
> + bootup_triggers[i].trigger = strsep(&trigger, ".");
> + if (!bootup_triggers[i].trigger)
> + break;
> + }
> +
> + nr_boot_triggers = i;
> + return 1;
> +}
> +__setup("trace_trigger=", setup_trace_triggers);
> +#endif
> +
> /* Add an event to a trace directory */
> static int
> __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
> @@ -2822,12 +2860,32 @@ __trace_early_add_new_event(struct trace_event_call *call,
> struct trace_array *tr)
> {
> struct trace_event_file *file;
> + int ret;
> + int i;
>
> file = trace_create_new_event(call, tr);
> if (!file)
> return -ENOMEM;
>
> - return event_define_fields(call);
> + ret = event_define_fields(call);
> + if (ret)
> + return ret;
> +
> +#ifdef CONFIG_HIST_TRIGGERS
> + for (i = 0; i < nr_boot_triggers; i++) {
> + if (strcmp(trace_event_name(call), bootup_triggers[i].event))
> + continue;
> + mutex_lock(&event_mutex);
> + ret = trigger_process_regex(file, bootup_triggers[i].trigger);
> + mutex_unlock(&event_mutex);
> + if (ret)
> + pr_err("Failed to register trigger '%s' on event %s\n",
> + bootup_triggers[i].trigger,
> + bootup_triggers[i].event);
> + }
> +#endif
> +
> + return 0;
> }
>
> struct ftrace_module_file_ops;
> @@ -3726,6 +3784,8 @@ static __init int event_trace_enable(void)
> list_add(&call->list, &ftrace_events);
> }
>
> + register_trigger_cmds();
> +
> /*
> * We need the top trace array to have a working set of trace
> * points at early init, before the debug files and directories
> @@ -3740,7 +3800,6 @@ static __init int event_trace_enable(void)
>
> register_event_cmds();
>
> - register_trigger_cmds();
>
> return 0;
> }
> diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
> index f23144af5743..f6e4ee1e40b3 100644
> --- a/kernel/tracepoint.c
> +++ b/kernel/tracepoint.c
> @@ -48,6 +48,9 @@ static void tp_rcu_get_state(enum tp_transition_sync sync)
> {
> struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync];
>
> + if (early_boot_irqs_disabled)
> + return;
> +
> /* Keep the latest get_state snapshot. */
> snapshot->rcu = get_state_synchronize_rcu();
> snapshot->srcu = start_poll_synchronize_srcu(&tracepoint_srcu);
> @@ -58,6 +61,9 @@ static void tp_rcu_cond_sync(enum tp_transition_sync sync)
> {
> struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync];
>
> + if (early_boot_irqs_disabled)
> + return;
> +
> if (!snapshot->ongoing)
> return;
> cond_synchronize_rcu(snapshot->rcu);
> --
> 2.35.1
>

2022-10-20 15:44:55

by Joel Fernandes

[permalink] [raw]
Subject: Re: [PATCH] tracing: Add trace_trigger kernel command line option

On Thu, Oct 20, 2022 at 10:43 AM Joel Fernandes <[email protected]> wrote:
>
> Hi Steven,
>
> On Wed, Oct 19, 2022 at 8:01 PM Steven Rostedt <[email protected]> wrote:
> >
> > From: "Steven Rostedt (Google)" <[email protected]>
> >
> > Allow triggers to be enabled at kernel boot up. For example:
> >
> > trace_trigger="sched_switch.stacktrace if prev_state == 2"
> >
> > The above will enable the stacktrace trigger on top of the sched_switch
> > event and only trigger if its prev_state is 2 (TASK_UNINTERRUPTIBLE). Then
> > at boot up, a stacktrace will trigger and be recorded in the tracing ring
> > buffer every time the sched_switch happens where the previous state is
> > TASK_INTERRUPTIBLE.
> >
> > As this calls into tracepoint logic during very early boot (before
> > interrupts are enabled), a check has to be done to see if early boot
> > interrupts are still disabled, and if so, avoid any call to RCU
> > synchronization, as that will enable interrupts and cause boot up issues.
> >
> > Signed-off-by: Steven Rostedt (Google) <[email protected]>
> > ---
>
> rest_init() -> rcu_scheduler_starting() sets RCU_SCHEDULER_INIT
>
> start_kernel() disables interrupts and does the dance you mentioned,
> while setting that early_ flag. start_kernel() is called from arch
> code.
>
> From your crash, the trace trigger is happening before the early_ boot
> flag is set to true.

My apologies. I meant, before early_boot_irqs_disabled is set to false.

2022-10-20 22:27:39

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH] tracing: Add trace_trigger kernel command line option

On Thu, 20 Oct 2022 15:21:08 -0700
"Paul E. McKenney" <[email protected]> wrote:

> My plan is to push it in after a bit of testing. The Fixes tag should
> make the right -stable stuff happen.

Great, thanks!

I'll resend my patch without the synchronize_rcu() avoidance.

-- Steve