Hi
These are some changes I have accumulated in the last
weeks. Mostly improving the tool's user experience.
- On timerlat top, remove an extra \n that was breaking
the output.
- Replace \t with ' ' on the auto analysis, fixing some
copy & paste.
- On timerlat top, do not print TTY formatting characters when
the output is not sent to a terminal, because they break the
output.
- on timerlat top and hist, add a summary with the overall results.
For instance, the minimum value for all CPUs, the overall average
and the maximum value from all CPUs.
- Add a --warm-up <seconds> option, allowing the workload to run for
<seconds> before starting to collect results.
- Add a --trace-buffer-size option, allowing the user to set the
tracing buffer size. This is mainly useful for reducing the trace
file.
- Make the user-space threads the default choice, also adding the
a -k option, allowing the user to switch to kerne-threads.
Daniel Bristot de Oliveira (8):
rtla/timerlat: Simplify "no value" printing on top
rtla/auto-analysis: Replace \t with spaces
rtla/timerlat: Use pretty formatting only on interactive tty
rtla/timerlat: Add a summary for top mode
rtla/timerlat: Add a summary for hist mode
rtla: Add the --warm-up option
rtla/timerlat: Make user-space threads the default
rtla: Add --trace-buffer-size option
Documentation/tools/rtla/common_options.rst | 7 +
.../tools/rtla/common_timerlat_options.rst | 6 +-
tools/tracing/rtla/src/osnoise_hist.c | 41 ++-
tools/tracing/rtla/src/osnoise_top.c | 41 ++-
tools/tracing/rtla/src/timerlat_aa.c | 109 ++++----
tools/tracing/rtla/src/timerlat_hist.c | 220 ++++++++++++++--
tools/tracing/rtla/src/timerlat_top.c | 236 +++++++++++++++---
tools/tracing/rtla/src/trace.c | 15 ++
tools/tracing/rtla/src/trace.h | 1 +
9 files changed, 566 insertions(+), 110 deletions(-)
--
2.44.0
Instead of printing three times the same output, print it only once,
reducing lines and being sure that all no values have the same length.
It also fixes an extra '\n' when running the with kernel threads, like
here:
=============== %< ==============
Timer Latency
0 00:00:01 | IRQ Timer Latency (us) | Thread Timer Latency (us)
CPU COUNT | cur min avg max | cur min avg max
2 #0 | - - - - | 161 161 161 161
3 #0 | - - - - | 161 161 161 161
8 #1 | 54 54 54 54 | - - - -'\n'
---------------|----------------------------------------|---------------------------------------
ALL #1 e0 | 54 54 54 | 161 161 161
=============== %< ==============
This '\n' should have been removed with the user-space support that
added another '\n' if not running with kernel threads.
Cc: [email protected]
Fixes: cdca4f4e5e8e ("rtla/timerlat_top: Add timerlat user-space support")
Signed-off-by: Daniel Bristot de Oliveira <[email protected]>
---
tools/tracing/rtla/src/timerlat_top.c | 17 +++++------------
1 file changed, 5 insertions(+), 12 deletions(-)
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index 8a3fa64319c6..2665e0bb5f1e 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -212,6 +212,8 @@ static void timerlat_top_header(struct osnoise_tool *top)
trace_seq_printf(s, "\n");
}
+static const char *no_value = " -";
+
/*
* timerlat_top_print - prints the output of a given CPU
*/
@@ -239,10 +241,7 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu)
trace_seq_printf(s, "%3d #%-9d |", cpu, cpu_data->irq_count);
if (!cpu_data->irq_count) {
- trace_seq_printf(s, " - ");
- trace_seq_printf(s, " - ");
- trace_seq_printf(s, " - ");
- trace_seq_printf(s, " - |");
+ trace_seq_printf(s, "%s %s %s %s |", no_value, no_value, no_value, no_value);
} else {
trace_seq_printf(s, "%9llu ", cpu_data->cur_irq / params->output_divisor);
trace_seq_printf(s, "%9llu ", cpu_data->min_irq / params->output_divisor);
@@ -251,10 +250,7 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu)
}
if (!cpu_data->thread_count) {
- trace_seq_printf(s, " - ");
- trace_seq_printf(s, " - ");
- trace_seq_printf(s, " - ");
- trace_seq_printf(s, " -\n");
+ trace_seq_printf(s, "%s %s %s %s", no_value, no_value, no_value, no_value);
} else {
trace_seq_printf(s, "%9llu ", cpu_data->cur_thread / divisor);
trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor);
@@ -271,10 +267,7 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu)
trace_seq_printf(s, " |");
if (!cpu_data->user_count) {
- trace_seq_printf(s, " - ");
- trace_seq_printf(s, " - ");
- trace_seq_printf(s, " - ");
- trace_seq_printf(s, " -\n");
+ trace_seq_printf(s, "%s %s %s %s\n", no_value, no_value, no_value, no_value);
} else {
trace_seq_printf(s, "%9llu ", cpu_data->cur_user / divisor);
trace_seq_printf(s, "%9llu ", cpu_data->min_user / divisor);
--
2.44.0
Like on rtla timerlat top, add an overall summary at the bottom
of timerlat hist. For instance:
# timerlat hist -c 0-1 -d 10s -E 20
# RTLA timerlat histogram
# Time unit is microseconds (us)
# Duration: 0 00:00:10
Index IRQ-000 Thr-000 IRQ-001 Thr-001
6 1 0 0 0
7 1 0 0 0
8 1 0 1 0
9 7 0 0 0
10 16 0 0 0
11 1 0 3 0
15 0 0 3 0
16 0 0 12 0
17 0 0 28 0
18 0 2 26 0
19 1 1 80 1
over: 9973 9998 9848 10000
count: 10001 10001 10001 10001
min: 6 18 8 19
avg: 185 204 95 113
max: 428 450 341 371
ALL: IRQ Thr
count: 20002 20002
min: 6 18
avg: 140 159
max: 428 450
Suggested-by: Juri Lelli <[email protected]>
Signed-off-by: Daniel Bristot de Oliveira <[email protected]>
---
tools/tracing/rtla/src/timerlat_hist.c | 130 ++++++++++++++++++++++++-
1 file changed, 129 insertions(+), 1 deletion(-)
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
index 8bd51aab6513..b12c6b571dd4 100644
--- a/tools/tracing/rtla/src/timerlat_hist.c
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -401,8 +401,135 @@ timerlat_print_summary(struct timerlat_hist_params *params,
trace_seq_reset(trace->seq);
}
+static void
+timerlat_print_stats_all(struct timerlat_hist_params *params,
+ struct trace_instance *trace,
+ struct timerlat_hist_data *data)
+{
+ struct timerlat_hist_cpu *cpu_data;
+ struct timerlat_hist_cpu sum;
+ int cpu;
+
+ if (params->no_summary)
+ return;
+
+ memset(&sum, 0, sizeof(sum));
+ sum.min_irq = ~0;
+ sum.min_thread = ~0;
+ sum.min_user = ~0;
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus))
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ cpu_data = &data->hist[cpu];
+
+ sum.irq_count += cpu_data->irq_count;
+ update_min(&sum.min_irq, &cpu_data->min_irq);
+ update_sum(&sum.sum_irq, &cpu_data->sum_irq);
+ update_max(&sum.max_irq, &cpu_data->max_irq);
+
+ sum.thread_count += cpu_data->thread_count;
+ update_min(&sum.min_thread, &cpu_data->min_thread);
+ update_sum(&sum.sum_thread, &cpu_data->sum_thread);
+ update_max(&sum.max_thread, &cpu_data->max_thread);
+
+ sum.user_count += cpu_data->user_count;
+ update_min(&sum.min_user, &cpu_data->min_user);
+ update_sum(&sum.sum_user, &cpu_data->sum_user);
+ update_max(&sum.max_user, &cpu_data->max_user);
+ }
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "ALL: ");
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, " IRQ");
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, " Thr");
+
+ if (params->user_hist)
+ trace_seq_printf(trace->seq, " Usr");
+
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "count:");
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, "%9d ",
+ sum.irq_count);
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, "%9d ",
+ sum.thread_count);
+
+ if (params->user_hist)
+ trace_seq_printf(trace->seq, "%9d ",
+ sum.user_count);
+
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "min: ");
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, "%9llu ",
+ sum.min_irq);
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, "%9llu ",
+ sum.min_thread);
+
+ if (params->user_hist)
+ trace_seq_printf(trace->seq, "%9llu ",
+ sum.min_user);
+
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "avg: ");
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, "%9llu ",
+ sum.sum_irq / sum.irq_count);
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, "%9llu ",
+ sum.sum_thread / sum.thread_count);
+
+ if (params->user_hist)
+ trace_seq_printf(trace->seq, "%9llu ",
+ sum.sum_user / sum.user_count);
+
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "max: ");
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, "%9llu ",
+ sum.max_irq);
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, "%9llu ",
+ sum.max_thread);
+
+ if (params->user_hist)
+ trace_seq_printf(trace->seq, "%9llu ",
+ sum.max_user);
+
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+}
+
/*
- * timerlat_print_stats - print data for all CPUs
+ * timerlat_print_stats - print data for each CPUs
*/
static void
timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *tool)
@@ -485,6 +612,7 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t
trace_seq_reset(trace->seq);
timerlat_print_summary(params, trace, data);
+ timerlat_print_stats_all(params, trace, data);
}
/*
--
2.44.0
After ther -u addition, most of the known users are setting it. And
it makes sense, as it adds more information, and inherits the default
setup for the threads - e.g., cgroups configs.
Thus, if the user-space interface is available, enable -u. Otherwise,
use the in-kernel thread.
Add the -k option to allow the user to request kernel-threads.
Signed-off-by: Daniel Bristot de Oliveira <[email protected]>
---
.../tools/rtla/common_timerlat_options.rst | 6 +++-
tools/tracing/rtla/src/timerlat_hist.c | 31 +++++++++++++++++--
tools/tracing/rtla/src/timerlat_top.c | 31 +++++++++++++++++--
3 files changed, 61 insertions(+), 7 deletions(-)
diff --git a/Documentation/tools/rtla/common_timerlat_options.rst b/Documentation/tools/rtla/common_timerlat_options.rst
index d3255ed70195..090700a6ae9f 100644
--- a/Documentation/tools/rtla/common_timerlat_options.rst
+++ b/Documentation/tools/rtla/common_timerlat_options.rst
@@ -27,12 +27,16 @@
*cyclictest* sets this value to *0* by default, use **--dma-latency** *0* to have
similar results.
+**-k**, **--kernel-threads**
+
+ Use timerlat kernel-space threads, in contrast of **-u**.
+
**-u**, **--user-threads**
Set timerlat to run without a workload, and then dispatches user-space workloads
to wait on the timerlat_fd. Once the workload is awakes, it goes to sleep again
adding so the measurement for the kernel-to-user and user-to-kernel to the tracer
- output.
+ output. **--user-threads** will be used unless the user specify **-k**.
**-U**, **--user-load**
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
index da1c353bdac9..6eb6e38d4a05 100644
--- a/tools/tracing/rtla/src/timerlat_hist.c
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -40,6 +40,7 @@ struct timerlat_hist_params {
int no_aa;
int dump_tasks;
int user_workload;
+ int kernel_workload;
int user_hist;
cpu_set_t hk_cpu_set;
struct sched_attr sched_param;
@@ -628,7 +629,7 @@ static void timerlat_hist_usage(char *usage)
" usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-a us] [-p us] [-i us] [-T us] [-s us] \\",
" [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\",
" [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\",
- " [--no-index] [--with-zeros] [--dma-latency us] [-C[=cgroup_name]] [--no-aa] [--dump-task] [-u]",
+ " [--no-index] [--with-zeros] [--dma-latency us] [-C[=cgroup_name]] [--no-aa] [--dump-task] [-u|-k]",
" [--warm-up s]",
"",
" -h/--help: print this menu",
@@ -664,7 +665,8 @@ static void timerlat_hist_usage(char *usage)
" f:prio - use SCHED_FIFO with prio",
" d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
" in nanoseconds",
- " -u/--user-threads: use rtla user-space threads instead of in-kernel timerlat threads",
+ " -u/--user-threads: use rtla user-space threads instead of kernel-space timerlat threads",
+ " -k/--kernel-threads: use timerlat kernel-space threads instead of rtla user-space threads",
" -U/--user-load: enable timerlat for user-defined user-space workload",
" --warm-up s: let the workload run for s seconds before collecting data",
NULL,
@@ -728,6 +730,7 @@ static struct timerlat_hist_params
{"thread", required_argument, 0, 'T'},
{"trace", optional_argument, 0, 't'},
{"user-threads", no_argument, 0, 'u'},
+ {"kernel-threads", no_argument, 0, 'k'},
{"user-load", no_argument, 0, 'U'},
{"event", required_argument, 0, 'e'},
{"no-irq", no_argument, 0, '0'},
@@ -748,7 +751,7 @@ static struct timerlat_hist_params
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:np:P:s:t::T:uU0123456:7:8:9\1\2:",
+ c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:knp:P:s:t::T:uU0123456:7:8:9\1\2:",
long_options, &option_index);
/* detect the end of the options. */
@@ -831,6 +834,9 @@ static struct timerlat_hist_params
case 'i':
params->stop_us = get_llong_from_str(optarg);
break;
+ case 'k':
+ params->kernel_workload = 1;
+ break;
case 'n':
params->output_divisor = 1;
break;
@@ -942,6 +948,9 @@ static struct timerlat_hist_params
if (!params->stop_us && !params->stop_total_us)
params->no_aa = 1;
+ if (params->kernel_workload && params->user_workload)
+ timerlat_hist_usage("--kernel-threads and --user-threads are mutually exclusive!");
+
return params;
}
@@ -1017,6 +1026,22 @@ timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_param
auto_house_keeping(¶ms->monitored_cpus);
}
+ /*
+ * If the user did not specify a type of thread, try user-threads first.
+ * Fall back to kernel threads otherwise.
+ */
+ if (!params->kernel_workload && !params->user_workload) {
+ retval = tracefs_file_exists(NULL, "osnoise/per_cpu/cpu0/timerlat_fd");
+ if (retval) {
+ debug_msg("User-space interface detected, setting user-threads\n");
+ params->user_workload = 1;
+ params->user_hist = 1;
+ } else {
+ debug_msg("User-space interface not detected, setting kernel-threads\n");
+ params->kernel_workload = 1;
+ }
+ }
+
if (params->user_hist) {
retval = osnoise_set_workload(tool->context, 0);
if (retval) {
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index 410eb5174913..0acfefe151f7 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -44,6 +44,7 @@ struct timerlat_top_params {
int hk_cpus;
int user_top;
int user_workload;
+ int kernel_workload;
int pretty_output;
int warmup;
cpu_set_t hk_cpu_set;
@@ -445,7 +446,7 @@ static void timerlat_top_usage(char *usage)
"",
" usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\",
" [[-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\",
- " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u] [--warm-up s]",
+ " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u|-k] [--warm-up s]",
"",
" -h/--help: print this menu",
" -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit",
@@ -474,7 +475,8 @@ static void timerlat_top_usage(char *usage)
" f:prio - use SCHED_FIFO with prio",
" d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
" in nanoseconds",
- " -u/--user-threads: use rtla user-space threads instead of in-kernel timerlat threads",
+ " -u/--user-threads: use rtla user-space threads instead of kernel-space timerlat threads",
+ " -k/--kernel-threads: use timerlat kernel-space threads instead of rtla user-space threads",
" -U/--user-load: enable timerlat for user-defined user-space workload",
" --warm-up s: let the workload run for s seconds before collecting data",
NULL,
@@ -536,6 +538,7 @@ static struct timerlat_top_params
{"thread", required_argument, 0, 'T'},
{"trace", optional_argument, 0, 't'},
{"user-threads", no_argument, 0, 'u'},
+ {"kernel-threads", no_argument, 0, 'k'},
{"user-load", no_argument, 0, 'U'},
{"trigger", required_argument, 0, '0'},
{"filter", required_argument, 0, '1'},
@@ -550,7 +553,7 @@ static struct timerlat_top_params
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "a:c:C::d:De:hH:i:np:P:qs:t::T:uU0:1:2:345:6:",
+ c = getopt_long(argc, argv, "a:c:C::d:De:hH:i:knp:P:qs:t::T:uU0:1:2:345:6:",
long_options, &option_index);
/* detect the end of the options. */
@@ -635,6 +638,9 @@ static struct timerlat_top_params
case 'i':
params->stop_us = get_llong_from_str(optarg);
break;
+ case 'k':
+ params->kernel_workload = true;
+ break;
case 'n':
params->output_divisor = 1;
break;
@@ -729,6 +735,9 @@ static struct timerlat_top_params
if (params->no_aa && params->aa_only)
timerlat_top_usage("--no-aa and --aa-only are mutually exclusive!");
+ if (params->kernel_workload && params->user_workload)
+ timerlat_top_usage("--kernel-threads and --user-threads are mutually exclusive!");
+
return params;
}
@@ -807,6 +816,22 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *
auto_house_keeping(¶ms->monitored_cpus);
}
+ /*
+ * If the user did not specify a type of thread, try user-threads first.
+ * Fall back to kernel threads otherwise.
+ */
+ if (!params->kernel_workload && !params->user_workload) {
+ retval = tracefs_file_exists(NULL, "osnoise/per_cpu/cpu0/timerlat_fd");
+ if (retval) {
+ debug_msg("User-space interface detected, setting user-threads\n");
+ params->user_workload = 1;
+ params->user_top = 1;
+ } else {
+ debug_msg("User-space interface not detected, setting kernel-threads\n");
+ params->kernel_workload = 1;
+ }
+ }
+
if (params->user_top) {
retval = osnoise_set_workload(top->context, 0);
if (retval) {
--
2.44.0
timerlat top does some background/font color formatting. While useful
on terminal, it breaks the output on other formats. For example, when
piping the output for pastebin tools, the format strings are printed
as characters. For instance:
[2;37;40m Timer Latency [0;0;0m
0 00:00:01 | IRQ Timer Latency (us) | Thread Timer Latency (us)
[2;30;47mCPU COUNT | cur min avg max | cur min avg max[0;0;0m
0 #1013 | 1 0 1 54 | 5 2 4 57
1 #1013 | 3 0 1 10 | 6 2 4 15
To avoid this problem, do the formatting only if running on a tty,
and in !quiet mode.
Signed-off-by: Daniel Bristot de Oliveira <[email protected]>
---
tools/tracing/rtla/src/timerlat_top.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index 2665e0bb5f1e..c9cf90ed4e6d 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -44,6 +44,7 @@ struct timerlat_top_params {
int hk_cpus;
int user_top;
int user_workload;
+ int pretty_output;
cpu_set_t hk_cpu_set;
struct sched_attr sched_param;
struct trace_events *events;
@@ -179,19 +180,22 @@ timerlat_top_handler(struct trace_seq *s, struct tep_record *record,
/*
* timerlat_top_header - print the header of the tool output
*/
-static void timerlat_top_header(struct osnoise_tool *top)
+static void timerlat_top_header(struct timerlat_top_params *params, struct osnoise_tool *top)
{
- struct timerlat_top_params *params = top->params;
struct trace_seq *s = top->trace.seq;
char duration[26];
get_duration(top->start_time, duration, sizeof(duration));
- trace_seq_printf(s, "\033[2;37;40m");
+ if (params->pretty_output)
+ trace_seq_printf(s, "\033[2;37;40m");
+
trace_seq_printf(s, " Timer Latency ");
if (params->user_top)
trace_seq_printf(s, " ");
- trace_seq_printf(s, "\033[0;0;0m");
+
+ if (params->pretty_output)
+ trace_seq_printf(s, "\033[0;0;0m");
trace_seq_printf(s, "\n");
trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)", duration,
@@ -204,11 +208,15 @@ static void timerlat_top_header(struct osnoise_tool *top)
}
trace_seq_printf(s, "\n");
- trace_seq_printf(s, "\033[2;30;47m");
+ if (params->pretty_output)
+ trace_seq_printf(s, "\033[2;30;47m");
+
trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max");
if (params->user_top)
trace_seq_printf(s, " | cur min avg max");
- trace_seq_printf(s, "\033[0;0;0m");
+
+ if (params->pretty_output)
+ trace_seq_printf(s, "\033[0;0;0m");
trace_seq_printf(s, "\n");
}
@@ -305,7 +313,7 @@ timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *to
if (!params->quiet)
clear_terminal(trace->seq);
- timerlat_top_header(top);
+ timerlat_top_header(params, top);
for (i = 0; i < nr_cpus; i++) {
if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus))
@@ -693,6 +701,9 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *
}
}
+ if (isatty(1) && !params->quiet)
+ params->pretty_output = 1;
+
return 0;
out_err:
--
2.44.0
While the per-cpu values are the results to take into consideration, the
overall system values are also useful.
Add a summary at the bottom of rtla timerlat top showing the overall
results. For instance:
Timer Latency
0 00:00:10 | IRQ Timer Latency (us) | Thread Timer Latency (us)
CPU COUNT | cur min avg max | cur min avg max
0 #10003 | 113 19 150 441 | 134 35 170 459
1 #10003 | 63 8 99 462 | 84 15 119 481
2 #10003 | 3 2 89 396 | 21 8 108 414
3 #10002 | 206 11 210 394 | 223 21 228 415
---------------|----------------------------------------|---------------------------------------
ALL #40011 e0 | 2 137 462 | 8 156 481
Suggested-by: Juri Lelli <[email protected]>
Signed-off-by: Daniel Bristot de Oliveira <[email protected]>
---
tools/tracing/rtla/src/timerlat_top.c | 108 ++++++++++++++++++++++++++
1 file changed, 108 insertions(+)
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index c9cf90ed4e6d..bdcf8ef8f815 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -119,6 +119,37 @@ static struct timerlat_top_data *timerlat_alloc_top(int nr_cpus)
return NULL;
}
+static void
+timerlat_top_reset_sum(struct timerlat_top_cpu *summary)
+{
+ memset(summary, 0, sizeof(*summary));
+ summary->min_irq = ~0;
+ summary->min_thread = ~0;
+ summary->min_user = ~0;
+}
+
+static void
+timerlat_top_update_sum(struct osnoise_tool *tool, int cpu, struct timerlat_top_cpu *sum)
+{
+ struct timerlat_top_data *data = tool->data;
+ struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu];
+
+ sum->irq_count += cpu_data->irq_count;
+ update_min(&sum->min_irq, &cpu_data->min_irq);
+ update_sum(&sum->sum_irq, &cpu_data->sum_irq);
+ update_max(&sum->max_irq, &cpu_data->max_irq);
+
+ sum->thread_count += cpu_data->thread_count;
+ update_min(&sum->min_thread, &cpu_data->min_thread);
+ update_sum(&sum->sum_thread, &cpu_data->sum_thread);
+ update_max(&sum->max_thread, &cpu_data->max_thread);
+
+ sum->user_count += cpu_data->user_count;
+ update_min(&sum->min_user, &cpu_data->min_user);
+ update_sum(&sum->sum_user, &cpu_data->sum_user);
+ update_max(&sum->max_user, &cpu_data->max_user);
+}
+
/*
* timerlat_hist_update - record a new timerlat occurent on cpu, updating data
*/
@@ -285,6 +316,77 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu)
}
}
+/*
+ * timerlat_top_print_sum - prints the summary output
+ */
+static void
+timerlat_top_print_sum(struct osnoise_tool *top, struct timerlat_top_cpu *summary)
+{
+ const char *split = "----------------------------------------";
+ struct timerlat_top_params *params = top->params;
+ unsigned long long count = summary->irq_count;
+ int divisor = params->output_divisor;
+ struct trace_seq *s = top->trace.seq;
+ int e = 0;
+
+ if (divisor == 0)
+ return;
+
+ /*
+ * Skip if no data is available: is this cpu offline?
+ */
+ if (!summary->irq_count && !summary->thread_count)
+ return;
+
+ while (count > 999999) {
+ e++;
+ count /= 10;
+ }
+
+ trace_seq_printf(s, "%.*s|%.*s|%.*s", 15, split, 40, split, 39, split);
+ if (params->user_top)
+ trace_seq_printf(s, "-|%.*s", 39, split);
+ trace_seq_printf(s, "\n");
+
+ trace_seq_printf(s, "ALL #%-6llu e%d |", count, e);
+
+ if (!summary->irq_count) {
+ trace_seq_printf(s, " %s %s %s |", no_value, no_value, no_value);
+ } else {
+ trace_seq_printf(s, " ");
+ trace_seq_printf(s, "%9llu ", summary->min_irq / params->output_divisor);
+ trace_seq_printf(s, "%9llu ", (summary->sum_irq / summary->irq_count) / divisor);
+ trace_seq_printf(s, "%9llu |", summary->max_irq / divisor);
+ }
+
+ if (!summary->thread_count) {
+ trace_seq_printf(s, "%s %s %s %s", no_value, no_value, no_value, no_value);
+ } else {
+ trace_seq_printf(s, " ");
+ trace_seq_printf(s, "%9llu ", summary->min_thread / divisor);
+ trace_seq_printf(s, "%9llu ",
+ (summary->sum_thread / summary->thread_count) / divisor);
+ trace_seq_printf(s, "%9llu", summary->max_thread / divisor);
+ }
+
+ if (!params->user_top) {
+ trace_seq_printf(s, "\n");
+ return;
+ }
+
+ trace_seq_printf(s, " |");
+
+ if (!summary->user_count) {
+ trace_seq_printf(s, " %s %s %s |", no_value, no_value, no_value);
+ } else {
+ trace_seq_printf(s, " ");
+ trace_seq_printf(s, "%9llu ", summary->min_user / divisor);
+ trace_seq_printf(s, "%9llu ",
+ (summary->sum_user / summary->user_count) / divisor);
+ trace_seq_printf(s, "%9llu\n", summary->max_user / divisor);
+ }
+}
+
/*
* clear_terminal - clears the output terminal
*/
@@ -301,6 +403,7 @@ static void
timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *top)
{
struct trace_instance *trace = &top->trace;
+ struct timerlat_top_cpu summary;
static int nr_cpus = -1;
int i;
@@ -313,14 +416,19 @@ timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *to
if (!params->quiet)
clear_terminal(trace->seq);
+ timerlat_top_reset_sum(&summary);
+
timerlat_top_header(params, top);
for (i = 0; i < nr_cpus; i++) {
if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus))
continue;
timerlat_top_print(top, i);
+ timerlat_top_update_sum(top, i, &summary);
}
+ timerlat_top_print_sum(top, &summary);
+
trace_seq_do_printf(trace->seq);
trace_seq_reset(trace->seq);
}
--
2.44.0
When copying timerlat auto-analysis from a terminal to some web pages or
chats, the \t are being replaced with a single ' ' or ' ', breaking
the output.
For example:
## CPU 3 hit stop tracing, analyzing it ##
IRQ handler delay: 1.30 us (0.11 %)
IRQ latency: 1.90 us
Timerlat IRQ duration: 3.00 us (0.24 %)
Blocking thread: 1223.16 us (99.00 %)
insync:4048 1223.16 us
IRQ interference 4.93 us (0.40 %)
local_timer:236 4.93 us
------------------------------------------------------------------------
Thread latency: 1235.47 us (100%)
Replace \t with spaces to avoid this problem.
Cc: [email protected]
Fixes: 27e348b221f6 ("rtla/timerlat: Add auto-analysis core")
Signed-off-by: Daniel Bristot de Oliveira <[email protected]>
---
tools/tracing/rtla/src/timerlat_aa.c | 109 ++++++++++++++++-----------
1 file changed, 63 insertions(+), 46 deletions(-)
diff --git a/tools/tracing/rtla/src/timerlat_aa.c b/tools/tracing/rtla/src/timerlat_aa.c
index 7093fd5333be..7bd80ee2a5b4 100644
--- a/tools/tracing/rtla/src/timerlat_aa.c
+++ b/tools/tracing/rtla/src/timerlat_aa.c
@@ -16,6 +16,9 @@ enum timelat_state {
TIMERLAT_WAITING_THREAD,
};
+/* Used to fill spaces in the output */
+static const char *spaces = " ";
+
#define MAX_COMM 24
/*
@@ -274,14 +277,17 @@ static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *recor
taa_data->prev_irq_timstamp = start;
trace_seq_reset(taa_data->prev_irqs_seq);
- trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s \t\t\t%9.2f us\n",
- "nmi", ns_to_usf(duration));
+ trace_seq_printf(taa_data->prev_irqs_seq, " %24s %.*s %9.2f us\n",
+ "nmi",
+ 24, spaces,
+ ns_to_usf(duration));
return 0;
}
taa_data->thread_nmi_sum += duration;
- trace_seq_printf(taa_data->nmi_seq, " %24s \t\t\t%9.2f us\n",
- "nmi", ns_to_usf(duration));
+ trace_seq_printf(taa_data->nmi_seq, " %24s %.*s %9.2f us\n",
+ "nmi",
+ 24, spaces, ns_to_usf(duration));
return 0;
}
@@ -323,8 +329,10 @@ static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *recor
taa_data->prev_irq_timstamp = start;
trace_seq_reset(taa_data->prev_irqs_seq);
- trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu \t\t%9.2f us\n",
- desc, vector, ns_to_usf(duration));
+ trace_seq_printf(taa_data->prev_irqs_seq, " %24s:%-3llu %.*s %9.2f us\n",
+ desc, vector,
+ 15, spaces,
+ ns_to_usf(duration));
return 0;
}
@@ -372,8 +380,10 @@ static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *recor
* IRQ interference.
*/
taa_data->thread_irq_sum += duration;
- trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu \t %9.2f us\n",
- desc, vector, ns_to_usf(duration));
+ trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu %.*s %9.2f us\n",
+ desc, vector,
+ 24, spaces,
+ ns_to_usf(duration));
return 0;
}
@@ -408,8 +418,10 @@ static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *r
taa_data->thread_softirq_sum += duration;
- trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu \t %9.2f us\n",
- softirq_name[vector], vector, ns_to_usf(duration));
+ trace_seq_printf(taa_data->softirqs_seq, " %24s:%-3llu %.*s %9.2f us\n",
+ softirq_name[vector], vector,
+ 24, spaces,
+ ns_to_usf(duration));
return 0;
}
@@ -452,8 +464,10 @@ static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *re
} else {
taa_data->thread_thread_sum += duration;
- trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu \t\t%9.2f us\n",
- comm, pid, ns_to_usf(duration));
+ trace_seq_printf(taa_data->threads_seq, " %24s:%-12llu %.*s %9.2f us\n",
+ comm, pid,
+ 15, spaces,
+ ns_to_usf(duration));
}
return 0;
@@ -482,7 +496,8 @@ static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *rec
function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
if (!function)
break;
- trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function);
+ trace_seq_printf(taa_data->stack_seq, " %.*s -> %s\n",
+ 14, spaces, function);
}
}
return 0;
@@ -568,23 +583,24 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) {
if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time)
- printf(" Previous IRQ interference: \t\t up to %9.2f us\n",
- ns_to_usf(taa_data->prev_irq_duration));
+ printf(" Previous IRQ interference: %.*s up to %9.2f us\n",
+ 16, spaces,
+ ns_to_usf(taa_data->prev_irq_duration));
}
/*
* The delay that the IRQ suffered before starting.
*/
- printf(" IRQ handler delay: %16s %9.2f us (%.2f %%)\n",
- (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
- ns_to_usf(taa_data->timer_irq_start_delay),
- ns_to_per(total, taa_data->timer_irq_start_delay));
+ printf(" IRQ handler delay: %.*s %16s %9.2f us (%.2f %%)\n", 16, spaces,
+ (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
+ ns_to_usf(taa_data->timer_irq_start_delay),
+ ns_to_per(total, taa_data->timer_irq_start_delay));
/*
* Timerlat IRQ.
*/
- printf(" IRQ latency: \t\t\t\t %9.2f us\n",
- ns_to_usf(taa_data->tlat_irq_latency));
+ printf(" IRQ latency: %.*s %9.2f us\n", 40, spaces,
+ ns_to_usf(taa_data->tlat_irq_latency));
if (irq) {
/*
@@ -595,15 +611,16 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
* so it will be displayed, it is the key.
*/
printf(" Blocking thread:\n");
- printf(" %24s:%-9llu\n",
- taa_data->run_thread_comm, taa_data->run_thread_pid);
+ printf(" %.*s %24s:%-9llu\n", 6, spaces, taa_data->run_thread_comm,
+ taa_data->run_thread_pid);
} else {
/*
* The duration of the IRQ handler that handled the timerlat IRQ.
*/
- printf(" Timerlat IRQ duration: \t\t %9.2f us (%.2f %%)\n",
- ns_to_usf(taa_data->timer_irq_duration),
- ns_to_per(total, taa_data->timer_irq_duration));
+ printf(" Timerlat IRQ duration: %.*s %9.2f us (%.2f %%)\n",
+ 30, spaces,
+ ns_to_usf(taa_data->timer_irq_duration),
+ ns_to_per(total, taa_data->timer_irq_duration));
/*
* The amount of time that the current thread postponed the scheduler.
@@ -611,13 +628,13 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
* Recalling that it is net from NMI/IRQ/Softirq interference, so there
* is no need to compute values here.
*/
- printf(" Blocking thread: \t\t\t %9.2f us (%.2f %%)\n",
- ns_to_usf(taa_data->thread_blocking_duration),
- ns_to_per(total, taa_data->thread_blocking_duration));
+ printf(" Blocking thread: %.*s %9.2f us (%.2f %%)\n", 36, spaces,
+ ns_to_usf(taa_data->thread_blocking_duration),
+ ns_to_per(total, taa_data->thread_blocking_duration));
- printf(" %24s:%-9llu %9.2f us\n",
- taa_data->run_thread_comm, taa_data->run_thread_pid,
- ns_to_usf(taa_data->thread_blocking_duration));
+ printf(" %.*s %24s:%-9llu %.*s %9.2f us\n", 6, spaces,
+ taa_data->run_thread_comm, taa_data->run_thread_pid,
+ 12, spaces, ns_to_usf(taa_data->thread_blocking_duration));
}
/*
@@ -629,9 +646,9 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
* NMIs can happen during the IRQ, so they are always possible.
*/
if (taa_data->thread_nmi_sum)
- printf(" NMI interference \t\t\t %9.2f us (%.2f %%)\n",
- ns_to_usf(taa_data->thread_nmi_sum),
- ns_to_per(total, taa_data->thread_nmi_sum));
+ printf(" NMI interference %.*s %9.2f us (%.2f %%)\n", 36, spaces,
+ ns_to_usf(taa_data->thread_nmi_sum),
+ ns_to_per(total, taa_data->thread_nmi_sum));
/*
* If it is an IRQ latency, the other factors can be skipped.
@@ -643,9 +660,9 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
* Prints the interference caused by IRQs to the thread latency.
*/
if (taa_data->thread_irq_sum) {
- printf(" IRQ interference \t\t\t %9.2f us (%.2f %%)\n",
- ns_to_usf(taa_data->thread_irq_sum),
- ns_to_per(total, taa_data->thread_irq_sum));
+ printf(" IRQ interference %.*s %9.2f us (%.2f %%)\n", 36, spaces,
+ ns_to_usf(taa_data->thread_irq_sum),
+ ns_to_per(total, taa_data->thread_irq_sum));
trace_seq_do_printf(taa_data->irqs_seq);
}
@@ -654,9 +671,9 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
* Prints the interference caused by Softirqs to the thread latency.
*/
if (taa_data->thread_softirq_sum) {
- printf(" Softirq interference \t\t\t %9.2f us (%.2f %%)\n",
- ns_to_usf(taa_data->thread_softirq_sum),
- ns_to_per(total, taa_data->thread_softirq_sum));
+ printf(" Softirq interference %.*s %9.2f us (%.2f %%)\n", 32, spaces,
+ ns_to_usf(taa_data->thread_softirq_sum),
+ ns_to_per(total, taa_data->thread_softirq_sum));
trace_seq_do_printf(taa_data->softirqs_seq);
}
@@ -670,9 +687,9 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
* timer handling latency.
*/
if (taa_data->thread_thread_sum) {
- printf(" Thread interference \t\t\t %9.2f us (%.2f %%)\n",
- ns_to_usf(taa_data->thread_thread_sum),
- ns_to_per(total, taa_data->thread_thread_sum));
+ printf(" Thread interference %.*s %9.2f us (%.2f %%)\n", 33, spaces,
+ ns_to_usf(taa_data->thread_thread_sum),
+ ns_to_per(total, taa_data->thread_thread_sum));
trace_seq_do_printf(taa_data->threads_seq);
}
@@ -682,8 +699,8 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
*/
print_total:
printf("------------------------------------------------------------------------\n");
- printf(" %s latency: \t\t\t %9.2f us (100%%)\n", irq ? "IRQ" : "Thread",
- ns_to_usf(total));
+ printf(" %s latency: %.*s %9.2f us (100%%)\n", irq ? " IRQ" : "Thread",
+ 37, spaces, ns_to_usf(total));
}
static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
--
2.44.0
On many cases, the results right after the startup are different
from the rest of the execution, biasing the results. For example,
on osnoise, the scheduler might take some time to adapt to the new
busy-loop workload.
Add the --warm-up <seconds> option, adding a warm-up phase (in
seconds) where the workload is set, but the results are discarded.
Signed-off-by: Daniel Bristot de Oliveira <[email protected]>
---
Documentation/tools/rtla/common_options.rst | 4 ++
tools/tracing/rtla/src/osnoise_hist.c | 30 +++++++++++--
tools/tracing/rtla/src/osnoise_top.c | 29 +++++++++++-
tools/tracing/rtla/src/timerlat_hist.c | 49 ++++++++++++++-------
tools/tracing/rtla/src/timerlat_top.c | 47 ++++++++++++--------
5 files changed, 119 insertions(+), 40 deletions(-)
diff --git a/Documentation/tools/rtla/common_options.rst b/Documentation/tools/rtla/common_options.rst
index aeb91ff3bd68..a96ea0ed662e 100644
--- a/Documentation/tools/rtla/common_options.rst
+++ b/Documentation/tools/rtla/common_options.rst
@@ -50,6 +50,10 @@
Set a *cgroup* to the tracer's threads. If the **-C** option is passed without arguments, the tracer's thread will inherit **rtla**'s *cgroup*. Otherwise, the threads will be placed on the *cgroup* passed to the option.
+**--warm-up** *s*
+
+ After starting the workload, let it run for *s* seconds before starting collecting the data, allowing the system to warm-up. Statistical data generated during warm-up is discarded.
+
**-h**, **--help**
Print help menu.
diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c
index 01870d50942a..c6100ff46a7f 100644
--- a/tools/tracing/rtla/src/osnoise_hist.c
+++ b/tools/tracing/rtla/src/osnoise_hist.c
@@ -36,13 +36,13 @@ struct osnoise_hist_params {
cpu_set_t hk_cpu_set;
struct sched_attr sched_param;
struct trace_events *events;
-
char no_header;
char no_summary;
char no_index;
char with_zeros;
int bucket_size;
int entries;
+ int warmup;
};
struct osnoise_hist_cpu {
@@ -438,7 +438,7 @@ static void osnoise_hist_usage(char *usage)
" usage: rtla osnoise hist [-h] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\",
" [-T us] [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\",
" [-c cpu-list] [-H cpu-list] [-P priority] [-b N] [-E N] [--no-header] [--no-summary] \\",
- " [--no-index] [--with-zeros] [-C[=cgroup_name]]",
+ " [--no-index] [--with-zeros] [-C[=cgroup_name]] [--warm-up]",
"",
" -h/--help: print this menu",
" -a/--auto: set automatic trace mode, stopping the session if argument in us sample is hit",
@@ -468,6 +468,7 @@ static void osnoise_hist_usage(char *usage)
" f:prio - use SCHED_FIFO with prio",
" d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
" in nanoseconds",
+ " --warm-up: let the workload run for s seconds before collecting data",
NULL,
};
@@ -531,13 +532,14 @@ static struct osnoise_hist_params
{"with-zeros", no_argument, 0, '3'},
{"trigger", required_argument, 0, '4'},
{"filter", required_argument, 0, '5'},
+ {"warm-up", required_argument, 0, '6'},
{0, 0, 0, 0}
};
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:p:P:r:s:S:t::T:01234:5:",
+ c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:p:P:r:s:S:t::T:01234:5:6:",
long_options, &option_index);
/* detect the end of the options. */
@@ -680,6 +682,9 @@ static struct osnoise_hist_params
osnoise_hist_usage("--filter requires a previous -e\n");
}
break;
+ case '6':
+ params->warmup = get_llong_from_str(optarg);
+ break;
default:
osnoise_hist_usage("Invalid option");
}
@@ -899,6 +904,25 @@ int osnoise_hist_main(int argc, char *argv[])
trace_instance_start(&record->trace);
trace_instance_start(trace);
+ if (params->warmup > 0) {
+ debug_msg("Warming up for %d seconds\n", params->warmup);
+ sleep(params->warmup);
+ if (stop_tracing)
+ goto out_hist;
+
+ /*
+ * Clean up the buffer. The osnoise workload do not run
+ * with tracing off to avoid creating a performance penalty
+ * when not needed.
+ */
+ retval = tracefs_instance_file_write(trace->inst, "trace", "");
+ if (retval < 0) {
+ debug_msg("Error cleaning up the buffer");
+ goto out_hist;
+ }
+
+ }
+
tool->start_time = time(NULL);
osnoise_hist_set_signals(params);
diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c
index 457360db0767..53a074c1222e 100644
--- a/tools/tracing/rtla/src/osnoise_top.c
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -40,6 +40,7 @@ struct osnoise_top_params {
int set_sched;
int cgroup;
int hk_cpus;
+ int warmup;
cpu_set_t hk_cpu_set;
struct sched_attr sched_param;
struct trace_events *events;
@@ -282,7 +283,7 @@ static void osnoise_top_usage(struct osnoise_top_params *params, char *usage)
static const char * const msg[] = {
" [-h] [-q] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\",
" [-T us] [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\",
- " [-c cpu-list] [-H cpu-list] [-P priority] [-C[=cgroup_name]]",
+ " [-c cpu-list] [-H cpu-list] [-P priority] [-C[=cgroup_name]] [--warm-up s]",
"",
" -h/--help: print this menu",
" -a/--auto: set automatic trace mode, stopping the session if argument in us sample is hit",
@@ -307,6 +308,7 @@ static void osnoise_top_usage(struct osnoise_top_params *params, char *usage)
" f:prio - use SCHED_FIFO with prio",
" d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
" in nanoseconds",
+ " --warm-up s: let the workload run for s seconds before collecting data",
NULL,
};
@@ -381,13 +383,14 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
{"trace", optional_argument, 0, 't'},
{"trigger", required_argument, 0, '0'},
{"filter", required_argument, 0, '1'},
+ {"warm-up", required_argument, 0, '2'},
{0, 0, 0, 0}
};
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "a:c:C::d:De:hH:p:P:qr:s:S:t::T:0:1:",
+ c = getopt_long(argc, argv, "a:c:C::d:De:hH:p:P:qr:s:S:t::T:0:1:2:",
long_options, &option_index);
/* Detect the end of the options. */
@@ -511,6 +514,9 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
osnoise_top_usage(params, "--filter requires a previous -e\n");
}
break;
+ case '2':
+ params->warmup = get_llong_from_str(optarg);
+ break;
default:
osnoise_top_usage(params, "Invalid option");
}
@@ -732,6 +738,25 @@ int osnoise_top_main(int argc, char **argv)
trace_instance_start(&record->trace);
trace_instance_start(trace);
+ if (params->warmup > 0) {
+ debug_msg("Warming up for %d seconds\n", params->warmup);
+ sleep(params->warmup);
+ if (stop_tracing)
+ goto out_top;
+
+ /*
+ * Clean up the buffer. The osnoise workload do not run
+ * with tracing off to avoid creating a performance penalty
+ * when not needed.
+ */
+ retval = tracefs_instance_file_write(trace->inst, "trace", "");
+ if (retval < 0) {
+ debug_msg("Error cleaning up the buffer");
+ goto out_top;
+ }
+
+ }
+
tool->start_time = time(NULL);
osnoise_top_set_signals(params);
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
index b12c6b571dd4..da1c353bdac9 100644
--- a/tools/tracing/rtla/src/timerlat_hist.c
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -52,6 +52,7 @@ struct timerlat_hist_params {
char with_zeros;
int bucket_size;
int entries;
+ int warmup;
};
struct timerlat_hist_cpu {
@@ -628,6 +629,7 @@ static void timerlat_hist_usage(char *usage)
" [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\",
" [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\",
" [--no-index] [--with-zeros] [--dma-latency us] [-C[=cgroup_name]] [--no-aa] [--dump-task] [-u]",
+ " [--warm-up s]",
"",
" -h/--help: print this menu",
" -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit",
@@ -664,6 +666,7 @@ static void timerlat_hist_usage(char *usage)
" in nanoseconds",
" -u/--user-threads: use rtla user-space threads instead of in-kernel timerlat threads",
" -U/--user-load: enable timerlat for user-defined user-space workload",
+ " --warm-up s: let the workload run for s seconds before collecting data",
NULL,
};
@@ -738,13 +741,14 @@ static struct timerlat_hist_params
{"dma-latency", required_argument, 0, '8'},
{"no-aa", no_argument, 0, '9'},
{"dump-task", no_argument, 0, '\1'},
+ {"warm-up", required_argument, 0, '\2'},
{0, 0, 0, 0}
};
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:np:P:s:t::T:uU0123456:7:8:9\1",
+ c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:np:P:s:t::T:uU0123456:7:8:9\1\2:",
long_options, &option_index);
/* detect the end of the options. */
@@ -913,6 +917,9 @@ static struct timerlat_hist_params
case '\1':
params->dump_tasks = 1;
break;
+ case '\2':
+ params->warmup = get_llong_from_str(optarg);
+ break;
default:
timerlat_hist_usage("Invalid option");
}
@@ -1167,22 +1174,6 @@ int timerlat_hist_main(int argc, char *argv[])
}
}
- /*
- * Start the tracers here, after having set all instances.
- *
- * Let the trace instance start first for the case of hitting a stop
- * tracing while enabling other instances. The trace instance is the
- * one with most valuable information.
- */
- if (params->trace_output)
- trace_instance_start(&record->trace);
- if (!params->no_aa)
- trace_instance_start(&aa->trace);
- trace_instance_start(trace);
-
- tool->start_time = time(NULL);
- timerlat_hist_set_signals(params);
-
if (params->user_workload) {
/* rtla asked to stop */
params_u.should_run = 1;
@@ -1202,6 +1193,29 @@ int timerlat_hist_main(int argc, char *argv[])
err_msg("Error creating timerlat user-space threads\n");
}
+ if (params->warmup > 0) {
+ debug_msg("Warming up for %d seconds\n", params->warmup);
+ sleep(params->warmup);
+ if (stop_tracing)
+ goto out_hist;
+ }
+
+ /*
+ * Start the tracers here, after having set all instances.
+ *
+ * Let the trace instance start first for the case of hitting a stop
+ * tracing while enabling other instances. The trace instance is the
+ * one with most valuable information.
+ */
+ if (params->trace_output)
+ trace_instance_start(&record->trace);
+ if (!params->no_aa)
+ trace_instance_start(&aa->trace);
+ trace_instance_start(trace);
+
+ tool->start_time = time(NULL);
+ timerlat_hist_set_signals(params);
+
while (!stop_tracing) {
sleep(params->sleep_time);
@@ -1227,6 +1241,7 @@ int timerlat_hist_main(int argc, char *argv[])
}
}
}
+
if (params->user_workload && !params_u.stopped_running) {
params_u.should_run = 0;
sleep(1);
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index bdcf8ef8f815..410eb5174913 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -45,6 +45,7 @@ struct timerlat_top_params {
int user_top;
int user_workload;
int pretty_output;
+ int warmup;
cpu_set_t hk_cpu_set;
struct sched_attr sched_param;
struct trace_events *events;
@@ -444,7 +445,7 @@ static void timerlat_top_usage(char *usage)
"",
" usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\",
" [[-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\",
- " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u]",
+ " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u] [--warm-up s]",
"",
" -h/--help: print this menu",
" -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit",
@@ -475,6 +476,7 @@ static void timerlat_top_usage(char *usage)
" in nanoseconds",
" -u/--user-threads: use rtla user-space threads instead of in-kernel timerlat threads",
" -U/--user-load: enable timerlat for user-defined user-space workload",
+ " --warm-up s: let the workload run for s seconds before collecting data",
NULL,
};
@@ -541,13 +543,14 @@ static struct timerlat_top_params
{"no-aa", no_argument, 0, '3'},
{"dump-tasks", no_argument, 0, '4'},
{"aa-only", required_argument, 0, '5'},
+ {"warm-up", required_argument, 0, '6'},
{0, 0, 0, 0}
};
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "a:c:C::d:De:hH:i:np:P:qs:t::T:uU0:1:2:345:",
+ c = getopt_long(argc, argv, "a:c:C::d:De:hH:i:np:P:qs:t::T:uU0:1:2:345:6:",
long_options, &option_index);
/* detect the end of the options. */
@@ -704,6 +707,9 @@ static struct timerlat_top_params
case '4':
params->dump_tasks = 1;
break;
+ case '6':
+ params->warmup = get_llong_from_str(optarg);
+ break;
default:
timerlat_top_usage("Invalid option");
}
@@ -971,22 +977,6 @@ int timerlat_top_main(int argc, char *argv[])
}
}
- /*
- * Start the tracers here, after having set all instances.
- *
- * Let the trace instance start first for the case of hitting a stop
- * tracing while enabling other instances. The trace instance is the
- * one with most valuable information.
- */
- if (params->trace_output)
- trace_instance_start(&record->trace);
- if (!params->no_aa && aa != top)
- trace_instance_start(&aa->trace);
- trace_instance_start(trace);
-
- top->start_time = time(NULL);
- timerlat_top_set_signals(params);
-
if (params->user_workload) {
/* rtla asked to stop */
params_u.should_run = 1;
@@ -1006,6 +996,27 @@ int timerlat_top_main(int argc, char *argv[])
err_msg("Error creating timerlat user-space threads\n");
}
+ if (params->warmup > 0) {
+ debug_msg("Warming up for %d seconds\n", params->warmup);
+ sleep(params->warmup);
+ }
+
+ /*
+ * Start the tracers here, after having set all instances.
+ *
+ * Let the trace instance start first for the case of hitting a stop
+ * tracing while enabling other instances. The trace instance is the
+ * one with most valuable information.
+ */
+ if (params->trace_output)
+ trace_instance_start(&record->trace);
+ if (!params->no_aa && aa != top)
+ trace_instance_start(&aa->trace);
+ trace_instance_start(trace);
+
+ top->start_time = time(NULL);
+ timerlat_top_set_signals(params);
+
while (!stop_tracing) {
sleep(params->sleep_time);
--
2.44.0
Add the option allow the users to set a different buffer size for the
trace. For example, in large systems, the user might be interested on
reducing the trace buffer to avoid large tracing files.
The buffer size is specified in kB, and it is only affecting
the tracing instance.
Signed-off-by: Daniel Bristot de Oliveira <[email protected]>
---
Documentation/tools/rtla/common_options.rst | 3 +++
tools/tracing/rtla/src/osnoise_hist.c | 13 ++++++++++++-
tools/tracing/rtla/src/osnoise_top.c | 14 +++++++++++++-
tools/tracing/rtla/src/timerlat_hist.c | 14 +++++++++++++-
tools/tracing/rtla/src/timerlat_top.c | 14 +++++++++++++-
tools/tracing/rtla/src/trace.c | 15 +++++++++++++++
tools/tracing/rtla/src/trace.h | 1 +
7 files changed, 70 insertions(+), 4 deletions(-)
diff --git a/Documentation/tools/rtla/common_options.rst b/Documentation/tools/rtla/common_options.rst
index a96ea0ed662e..7ac7b7581466 100644
--- a/Documentation/tools/rtla/common_options.rst
+++ b/Documentation/tools/rtla/common_options.rst
@@ -54,6 +54,9 @@
After starting the workload, let it run for *s* seconds before starting collecting the data, allowing the system to warm-up. Statistical data generated during warm-up is discarded.
+**--trace-buffer-size** *kB*
+ Set the per-cpu trace buffer size in kB for the tracing output.
+
**-h**, **--help**
Print help menu.
diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c
index c6100ff46a7f..198a17a3ea2e 100644
--- a/tools/tracing/rtla/src/osnoise_hist.c
+++ b/tools/tracing/rtla/src/osnoise_hist.c
@@ -43,6 +43,7 @@ struct osnoise_hist_params {
int bucket_size;
int entries;
int warmup;
+ int buffer_size;
};
struct osnoise_hist_cpu {
@@ -469,6 +470,7 @@ static void osnoise_hist_usage(char *usage)
" d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
" in nanoseconds",
" --warm-up: let the workload run for s seconds before collecting data",
+ " --trace-buffer-size kB: set the per-cpu trace buffer size in kB",
NULL,
};
@@ -533,13 +535,14 @@ static struct osnoise_hist_params
{"trigger", required_argument, 0, '4'},
{"filter", required_argument, 0, '5'},
{"warm-up", required_argument, 0, '6'},
+ {"trace-buffer-size", required_argument, 0, '7'},
{0, 0, 0, 0}
};
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:p:P:r:s:S:t::T:01234:5:6:",
+ c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:p:P:r:s:S:t::T:01234:5:6:7:",
long_options, &option_index);
/* detect the end of the options. */
@@ -685,6 +688,9 @@ static struct osnoise_hist_params
case '6':
params->warmup = get_llong_from_str(optarg);
break;
+ case '7':
+ params->buffer_size = get_llong_from_str(optarg);
+ break;
default:
osnoise_hist_usage("Invalid option");
}
@@ -891,6 +897,11 @@ int osnoise_hist_main(int argc, char *argv[])
goto out_hist;
}
+ if (params->buffer_size > 0) {
+ retval = trace_set_buffer_size(&record->trace, params->buffer_size);
+ if (retval)
+ goto out_hist;
+ }
}
/*
diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c
index 53a074c1222e..7e5aab22727d 100644
--- a/tools/tracing/rtla/src/osnoise_top.c
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -41,6 +41,7 @@ struct osnoise_top_params {
int cgroup;
int hk_cpus;
int warmup;
+ int buffer_size;
cpu_set_t hk_cpu_set;
struct sched_attr sched_param;
struct trace_events *events;
@@ -309,6 +310,7 @@ static void osnoise_top_usage(struct osnoise_top_params *params, char *usage)
" d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
" in nanoseconds",
" --warm-up s: let the workload run for s seconds before collecting data",
+ " --trace-buffer-size kB: set the per-cpu trace buffer size in kB",
NULL,
};
@@ -384,13 +386,14 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
{"trigger", required_argument, 0, '0'},
{"filter", required_argument, 0, '1'},
{"warm-up", required_argument, 0, '2'},
+ {"trace-buffer-size", required_argument, 0, '3'},
{0, 0, 0, 0}
};
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "a:c:C::d:De:hH:p:P:qr:s:S:t::T:0:1:2:",
+ c = getopt_long(argc, argv, "a:c:C::d:De:hH:p:P:qr:s:S:t::T:0:1:2:3:",
long_options, &option_index);
/* Detect the end of the options. */
@@ -517,6 +520,9 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
case '2':
params->warmup = get_llong_from_str(optarg);
break;
+ case '3':
+ params->buffer_size = get_llong_from_str(optarg);
+ break;
default:
osnoise_top_usage(params, "Invalid option");
}
@@ -725,6 +731,12 @@ int osnoise_top_main(int argc, char **argv)
if (retval)
goto out_top;
}
+
+ if (params->buffer_size > 0) {
+ retval = trace_set_buffer_size(&record->trace, params->buffer_size);
+ if (retval)
+ goto out_top;
+ }
}
/*
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
index 6eb6e38d4a05..d4bab86ca1b9 100644
--- a/tools/tracing/rtla/src/timerlat_hist.c
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -54,6 +54,7 @@ struct timerlat_hist_params {
int bucket_size;
int entries;
int warmup;
+ int buffer_size;
};
struct timerlat_hist_cpu {
@@ -669,6 +670,7 @@ static void timerlat_hist_usage(char *usage)
" -k/--kernel-threads: use timerlat kernel-space threads instead of rtla user-space threads",
" -U/--user-load: enable timerlat for user-defined user-space workload",
" --warm-up s: let the workload run for s seconds before collecting data",
+ " --trace-buffer-size kB: set the per-cpu trace buffer size in kB",
NULL,
};
@@ -745,13 +747,14 @@ static struct timerlat_hist_params
{"no-aa", no_argument, 0, '9'},
{"dump-task", no_argument, 0, '\1'},
{"warm-up", required_argument, 0, '\2'},
+ {"trace-buffer-size", required_argument, 0, '\3'},
{0, 0, 0, 0}
};
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:knp:P:s:t::T:uU0123456:7:8:9\1\2:",
+ c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:knp:P:s:t::T:uU0123456:7:8:9\1\2:\3",
long_options, &option_index);
/* detect the end of the options. */
@@ -926,6 +929,9 @@ static struct timerlat_hist_params
case '\2':
params->warmup = get_llong_from_str(optarg);
break;
+ case '\3':
+ params->buffer_size = get_llong_from_str(optarg);
+ break;
default:
timerlat_hist_usage("Invalid option");
}
@@ -1179,6 +1185,12 @@ int timerlat_hist_main(int argc, char *argv[])
if (retval)
goto out_hist;
}
+
+ if (params->buffer_size > 0) {
+ retval = trace_set_buffer_size(&record->trace, params->buffer_size);
+ if (retval)
+ goto out_hist;
+ }
}
if (!params->no_aa) {
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index 0acfefe151f7..3a23e8d481c6 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -47,6 +47,7 @@ struct timerlat_top_params {
int kernel_workload;
int pretty_output;
int warmup;
+ int buffer_size;
cpu_set_t hk_cpu_set;
struct sched_attr sched_param;
struct trace_events *events;
@@ -479,6 +480,7 @@ static void timerlat_top_usage(char *usage)
" -k/--kernel-threads: use timerlat kernel-space threads instead of rtla user-space threads",
" -U/--user-load: enable timerlat for user-defined user-space workload",
" --warm-up s: let the workload run for s seconds before collecting data",
+ " --trace-buffer-size kB: set the per-cpu trace buffer size in kB",
NULL,
};
@@ -547,13 +549,14 @@ static struct timerlat_top_params
{"dump-tasks", no_argument, 0, '4'},
{"aa-only", required_argument, 0, '5'},
{"warm-up", required_argument, 0, '6'},
+ {"trace-buffer-size", required_argument, 0, '7'},
{0, 0, 0, 0}
};
/* getopt_long stores the option index here. */
int option_index = 0;
- c = getopt_long(argc, argv, "a:c:C::d:De:hH:i:knp:P:qs:t::T:uU0:1:2:345:6:",
+ c = getopt_long(argc, argv, "a:c:C::d:De:hH:i:knp:P:qs:t::T:uU0:1:2:345:6:7:",
long_options, &option_index);
/* detect the end of the options. */
@@ -716,6 +719,9 @@ static struct timerlat_top_params
case '6':
params->warmup = get_llong_from_str(optarg);
break;
+ case '7':
+ params->buffer_size = get_llong_from_str(optarg);
+ break;
default:
timerlat_top_usage("Invalid option");
}
@@ -973,6 +979,12 @@ int timerlat_top_main(int argc, char *argv[])
if (retval)
goto out_top;
}
+
+ if (params->buffer_size > 0) {
+ retval = trace_set_buffer_size(&record->trace, params->buffer_size);
+ if (retval)
+ goto out_top;
+ }
}
if (!params->no_aa) {
diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c
index e1ba6d9f4265..170a706248ab 100644
--- a/tools/tracing/rtla/src/trace.c
+++ b/tools/tracing/rtla/src/trace.c
@@ -540,3 +540,18 @@ int trace_is_off(struct trace_instance *tool, struct trace_instance *trace)
return 0;
}
+
+/*
+ * trace_set_buffer_size - set the per-cpu tracing buffer size.
+ */
+int trace_set_buffer_size(struct trace_instance *trace, int size)
+{
+ int retval;
+
+ debug_msg("Setting trace buffer size to %d Kb\n", size);
+ retval = tracefs_instance_set_buffer_size(trace->inst, size, -1);
+ if (retval)
+ err_msg("Error setting trace buffer size\n");
+
+ return retval;
+}
diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h
index 2e9a89a25615..c7c92dc9a18a 100644
--- a/tools/tracing/rtla/src/trace.h
+++ b/tools/tracing/rtla/src/trace.h
@@ -48,3 +48,4 @@ int trace_events_enable(struct trace_instance *instance,
int trace_event_add_filter(struct trace_events *event, char *filter);
int trace_event_add_trigger(struct trace_events *event, char *trigger);
int trace_is_off(struct trace_instance *tool, struct trace_instance *trace);
+int trace_set_buffer_size(struct trace_instance *trace, int size);
--
2.44.0
On 4/24/24 16:36, Daniel Bristot de Oliveira wrote:
> + retval = trace_set_buffer_size(&record->trace, params->buffer_size);
This function appears on libtracefs from 1.6 up, so I need to update
the minimum required lib version to 1.6. I will send a v2 of this
patch only.
Thanks jkacur for pointing it to me, it happens on fedora 38.
-- Daniel