Hi Steven, Ingo,
Here is a new patch series [1] with more improvements and fixes.
Android systrace viewer heavily depends on the tgid to group tasks. tgid is
also useful for analyzing traces and generating analysis results for groups of
tasks. Also Michael Sartain said he also has a need for it and helped test the
series (I have added him to CC).
Following are the improvements in this series:
* Preserve existing API for existing callers
* Memory leak fix
* Improved trace output code
Looking forward to your thoughts about comments, thanks.
Joel Fernandes (4):
tracing: Remove unused declaration of trace_stop_cmdline_recording
tracing: Add support for recording tgid of tasks
tracing: Add support for display of tgid in trace output
tracing/ftrace: Add support to record and display tgid
include/linux/trace_events.h | 12 +++-
kernel/trace/trace.c | 140 +++++++++++++++++++++++++++++++-------
kernel/trace/trace.h | 8 +++
kernel/trace/trace_events.c | 42 +++++++++++-
kernel/trace/trace_functions.c | 26 +++++++
kernel/trace/trace_output.c | 9 +++
kernel/trace/trace_sched_switch.c | 79 +++++++++++++++++----
7 files changed, 274 insertions(+), 42 deletions(-)
[1] https://www.mail-archive.com/[email protected]/msg1412260.html
Cc: Steven Rostedt <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: [email protected]
Cc: Michael Sartain <[email protected]>
--
2.13.0.506.g27d5fe0cd-goog
trace_stop_cmdline_recording declaration isn't in use, remove it.
Cc: [email protected]
Cc: Ingo Molnar <[email protected]>
Cc: Steven Rostedt <[email protected]>
Signed-off-by: Joel Fernandes <[email protected]>
---
kernel/trace/trace.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1122f151466f..63deff9cdf2c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1910,8 +1910,6 @@ static void tracing_stop_tr(struct trace_array *tr)
raw_spin_unlock_irqrestore(&tr->start_lock, flags);
}
-void trace_stop_cmdline_recording(void);
-
static int trace_save_cmdline(struct task_struct *tsk)
{
unsigned pid, idx;
--
2.13.0.506.g27d5fe0cd-goog
Make function tracer able to record tgid if/when record-tgid is enabled.
Cc: [email protected]
Cc: Steven Rostedt <[email protected]>
Cc: Ingo Molnar <[email protected]>
Tested-by: Michael Sartain <[email protected]>
Signed-off-by: Joel Fernandes <[email protected]>
---
kernel/trace/trace_functions.c | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index a3bddbfd0874..d6bdc38ab273 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -27,6 +27,7 @@ static void
function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs);
static struct tracer_flags func_flags;
+static bool tgid_recorded;
/* Our option */
enum {
@@ -104,6 +105,11 @@ static int function_trace_init(struct trace_array *tr)
put_cpu();
tracing_start_cmdline_record();
+
+ if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
+ tgid_recorded = true;
+ tracing_start_tgid_record();
+ }
tracing_start_function_trace(tr);
return 0;
}
@@ -112,6 +118,10 @@ static void function_trace_reset(struct trace_array *tr)
{
tracing_stop_function_trace(tr);
tracing_stop_cmdline_record();
+ if (tgid_recorded) {
+ tracing_stop_tgid_record();
+ tgid_recorded = false;
+ }
ftrace_reset_array_ops(tr);
}
@@ -252,6 +262,21 @@ func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
return 0;
}
+static int
+func_tracer_flag_changed(struct trace_array *tr, unsigned int mask,
+ int enabled)
+{
+ if (mask == TRACE_ITER_RECORD_TGID) {
+ tgid_recorded = !!enabled;
+ if (enabled)
+ tracing_start_tgid_record();
+ else
+ tracing_stop_tgid_record();
+ }
+
+ return 0;
+}
+
static struct tracer function_trace __tracer_data =
{
.name = "function",
@@ -260,6 +285,7 @@ static struct tracer function_trace __tracer_data =
.start = function_trace_start,
.flags = &func_flags,
.set_flag = func_set_flag,
+ .flag_changed = func_tracer_flag_changed,
.allow_instances = true,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_function,
--
2.13.0.506.g27d5fe0cd-goog
Earlier patches introduced ability to record the tgid using the 'record-tgid'
option. Here we read the tgid and output it if the option is enabled.
Cc: [email protected]
Cc: Steven Rostedt <[email protected]>
Cc: Ingo Molnar <[email protected]>
Tested-by: Michael Sartain <[email protected]>
Signed-off-by: Joel Fernandes <[email protected]>
---
kernel/trace/trace.c | 36 ++++++++++++++++++++++--------------
kernel/trace/trace_output.c | 9 +++++++++
2 files changed, 31 insertions(+), 14 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 185a09f8b67e..8ce78f44e60d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3325,23 +3325,29 @@ static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
seq_puts(m, "#\n");
}
-static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
+static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
+ unsigned int flags)
{
+ bool tgid = !!(flags & TRACE_ITER_RECORD_TGID);
+
print_event_info(buf, m);
- seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"
- "# | | | | |\n");
+
+ seq_printf(m, "# TASK-PID CPU# %s TIMESTAMP FUNCTION\n", tgid ? "TGID " : "");
+ seq_printf(m, "# | | | %s | |\n", tgid ? " | " : "");
}
-static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
+static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
+ unsigned int flags)
{
- print_event_info(buf, m);
- seq_puts(m, "# _-----=> irqs-off\n"
- "# / _----=> need-resched\n"
- "# | / _---=> hardirq/softirq\n"
- "# || / _--=> preempt-depth\n"
- "# ||| / delay\n"
- "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
- "# | | | |||| | |\n");
+ bool tgid = !!(flags & TRACE_ITER_RECORD_TGID);
+
+ seq_printf(m, "# %s _-----=> irqs-off\n", tgid ? " " : "");
+ seq_printf(m, "# %s / _----=> need-resched\n", tgid ? " " : "");
+ seq_printf(m, "# %s| / _---=> hardirq/softirq\n", tgid ? " " : "");
+ seq_printf(m, "# %s|| / _--=> preempt-depth\n", tgid ? " " : "");
+ seq_printf(m, "# %s||| / delay\n", tgid ? " " : "");
+ seq_printf(m, "# TASK-PID CPU#%s|||| TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
+ seq_printf(m, "# | | | %s|||| | |\n", tgid ? " | " : "");
}
void
@@ -3657,9 +3663,11 @@ void trace_default_header(struct seq_file *m)
} else {
if (!(trace_flags & TRACE_ITER_VERBOSE)) {
if (trace_flags & TRACE_ITER_IRQ_INFO)
- print_func_help_header_irq(iter->trace_buffer, m);
+ print_func_help_header_irq(iter->trace_buffer,
+ m, trace_flags);
else
- print_func_help_header(iter->trace_buffer, m);
+ print_func_help_header(iter->trace_buffer, m,
+ trace_flags);
}
}
}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 08f9bab8089e..55c743295874 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -587,6 +587,15 @@ int trace_print_context(struct trace_iterator *iter)
trace_seq_printf(s, "%16s-%-5d [%03d] ",
comm, entry->pid, iter->cpu);
+ if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
+ unsigned int tgid = trace_find_tgid(entry->pid);
+
+ if (!tgid)
+ trace_seq_printf(s, "(-----) ");
+ else
+ trace_seq_printf(s, "(%5d) ", tgid);
+ }
+
if (tr->trace_flags & TRACE_ITER_IRQ_INFO)
trace_print_lat_fmt(s, entry);
--
2.13.0.506.g27d5fe0cd-goog
Inorder to support recording of tgid, the following changes are made:
* Introduce a new API (tracing_record_taskinfo) to additionally record the tgid
along with the task's comm at the same time. The API is flexible to
optionally also record for a group of tasks which is useful in recording the
info of multiple tasks (such as when recording previous and next task in a
sched_switch). Along with simplifying the API and the callers, this approach
also has the benefit of not setting trace_cmdline_save before information for
multiple tasks are saved.
* We preserve the old API (tracing_record_cmdline) and create it as a wrapper
around the new one. Along with this we add a tracing_record_tgid function.
* Reuse the existing sched_switch and sched_wakeup probes Add a new option
'record-tgid' to enable recording of tgid
When record-tgid option isn't enabled to begin with, we take care to make sure
that there's isn't memory or runtime overhead.
Cc: [email protected]
Cc: Steven Rostedt <[email protected]>
Cc: Ingo Molnar <[email protected]>
Tested-by: Michael Sartain <[email protected]>
Signed-off-by: Joel Fernandes <[email protected]>
---
include/linux/trace_events.h | 12 ++++-
kernel/trace/trace.c | 102 ++++++++++++++++++++++++++++++++++----
kernel/trace/trace.h | 8 +++
kernel/trace/trace_events.c | 42 +++++++++++++++-
kernel/trace/trace_sched_switch.c | 79 +++++++++++++++++++++++------
5 files changed, 217 insertions(+), 26 deletions(-)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index a556805eff8a..6c9e84b1f8ae 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -151,7 +151,14 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
int type, unsigned long len,
unsigned long flags, int pc);
-void tracing_record_cmdline(struct task_struct *tsk);
+#define TRACE_RECORD_CMDLINE BIT(0)
+#define TRACE_RECORD_TGID BIT(1)
+
+void tracing_record_taskinfo(struct task_struct **tasks, int len, int flags);
+void tracing_record_taskinfo_single(struct task_struct *task, int flags);
+
+void tracing_record_cmdline(struct task_struct *task);
+void tracing_record_tgid(struct task_struct *task);
int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...);
@@ -290,6 +297,7 @@ struct trace_subsystem_dir;
enum {
EVENT_FILE_FL_ENABLED_BIT,
EVENT_FILE_FL_RECORDED_CMD_BIT,
+ EVENT_FILE_FL_RECORDED_TGID_BIT,
EVENT_FILE_FL_FILTERED_BIT,
EVENT_FILE_FL_NO_SET_FILTER_BIT,
EVENT_FILE_FL_SOFT_MODE_BIT,
@@ -303,6 +311,7 @@ enum {
* Event file flags:
* ENABLED - The event is enabled
* RECORDED_CMD - The comms should be recorded at sched_switch
+ * RECORDED_TGID - The tgids should be recorded at sched_switch
* FILTERED - The event has a filter attached
* NO_SET_FILTER - Set when filter has error and is to be ignored
* SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED
@@ -315,6 +324,7 @@ enum {
enum {
EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT),
EVENT_FILE_FL_RECORDED_CMD = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT),
+ EVENT_FILE_FL_RECORDED_TGID = (1 << EVENT_FILE_FL_RECORDED_TGID_BIT),
EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT),
EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT),
EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT),
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 63deff9cdf2c..185a09f8b67e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -87,7 +87,7 @@ dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
* tracing is active, only save the comm when a trace event
* occurred.
*/
-static DEFINE_PER_CPU(bool, trace_cmdline_save);
+static DEFINE_PER_CPU(bool, trace_taskinfo_save);
/*
* Kill all tracing for good (never come back).
@@ -790,7 +790,7 @@ EXPORT_SYMBOL_GPL(tracing_on);
static __always_inline void
__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
{
- __this_cpu_write(trace_cmdline_save, true);
+ __this_cpu_write(trace_taskinfo_save, true);
/* If this is the temp buffer, we need to commit fully */
if (this_cpu_read(trace_buffered_event) == event) {
@@ -1709,6 +1709,18 @@ void tracing_reset_all_online_cpus(void)
}
}
+static int *tgid_map;
+
+void tracing_alloc_tgid_map(void)
+{
+ if (tgid_map)
+ return;
+
+ tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
+ GFP_KERNEL);
+ WARN_ONCE(!tgid_map, "Allocation of tgid_map failed\n");
+}
+
#define SAVED_CMDLINES_DEFAULT 128
#define NO_CMDLINE_MAP UINT_MAX
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -1722,7 +1734,7 @@ struct saved_cmdlines_buffer {
static struct saved_cmdlines_buffer *savedcmd;
/* temporary disable recording */
-static atomic_t trace_record_cmdline_disabled __read_mostly;
+static atomic_t trace_record_taskinfo_disabled __read_mostly;
static inline char *get_saved_cmdlines(int idx)
{
@@ -1990,16 +2002,83 @@ void trace_find_cmdline(int pid, char comm[])
preempt_enable();
}
-void tracing_record_cmdline(struct task_struct *tsk)
+int trace_find_tgid(int pid)
+{
+ if (!tgid_map || pid <= 0 || pid > PID_MAX_DEFAULT)
+ return 0;
+
+ return tgid_map[pid];
+}
+
+static int trace_save_tgid(struct task_struct *tsk)
{
- if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
+ if (!tgid_map || tsk->pid <= 0 || tsk->pid > PID_MAX_DEFAULT)
+ return 0;
+
+ tgid_map[tsk->pid] = tsk->tgid;
+ return 1;
+}
+
+/**
+ * tracing_record_taskinfo - record the task information of multiple tasks
+ *
+ * @tasks - list of tasks (array of task_struct pointers)
+ * @len - length of the list of tasks
+ * @flags - TRACE_RECORD_CMDLINE for recording comm
+ * TRACE_RECORD_TGID for recording tgid
+ */
+void tracing_record_taskinfo(struct task_struct **tasks, int len, int flags)
+{
+ int i;
+ bool cmdline, tgid;
+
+ tgid = !!(flags & TRACE_RECORD_TGID);
+ cmdline = !!(flags & TRACE_RECORD_CMDLINE);
+
+ if (unlikely(!cmdline && !tgid))
+ return;
+
+ if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
return;
- if (!__this_cpu_read(trace_cmdline_save))
+ if (!__this_cpu_read(trace_taskinfo_save))
return;
- if (trace_save_cmdline(tsk))
- __this_cpu_write(trace_cmdline_save, false);
+ for (i = 0; i < len; i++) {
+ if (cmdline && !trace_save_cmdline(tasks[i]))
+ break;
+ if (tgid && !trace_save_tgid(tasks[i]))
+ break;
+ }
+
+ if (i == len)
+ __this_cpu_write(trace_taskinfo_save, false);
+}
+
+/**
+ * tracing_record_taskinfo_single - record the task info of single task
+ *
+ * @task - task to record
+ * @flags - TRACE_RECORD_CMDLINE for recording comm
+ * - TRACE_RECORD_TGID for recording tgid
+ */
+void tracing_record_taskinfo_single(struct task_struct *task, int flags)
+{
+ struct task_struct *tasks[1];
+
+ tasks[0] = task;
+ tracing_record_taskinfo(tasks, 1, flags);
+}
+
+/* Helpers to record a specific task information */
+void tracing_record_cmdline(struct task_struct *task)
+{
+ tracing_record_taskinfo_single(task, TRACE_RECORD_CMDLINE);
+}
+
+void tracing_record_tgid(struct task_struct *task)
+{
+ tracing_record_taskinfo_single(task, TRACE_RECORD_TGID);
}
/*
@@ -3144,7 +3223,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
#endif
if (!iter->snapshot)
- atomic_inc(&trace_record_cmdline_disabled);
+ atomic_inc(&trace_record_taskinfo_disabled);
if (*pos != iter->pos) {
iter->ent = NULL;
@@ -3189,7 +3268,7 @@ static void s_stop(struct seq_file *m, void *p)
#endif
if (!iter->snapshot)
- atomic_dec(&trace_record_cmdline_disabled);
+ atomic_dec(&trace_record_taskinfo_disabled);
trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
@@ -4236,6 +4315,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
if (mask == TRACE_ITER_RECORD_CMD)
trace_event_enable_cmd_record(enabled);
+ if (mask == TRACE_ITER_RECORD_TGID)
+ trace_event_enable_tgid_record(enabled);
+
if (mask == TRACE_ITER_EVENT_FORK)
trace_event_follow_fork(tr, enabled);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 39fd77330aab..ecc963e25266 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -635,8 +635,12 @@ void trace_graph_return(struct ftrace_graph_ret *trace);
int trace_graph_entry(struct ftrace_graph_ent *trace);
void set_graph_array(struct trace_array *tr);
+void tracing_alloc_tgid_map(void);
void tracing_start_cmdline_record(void);
void tracing_stop_cmdline_record(void);
+void tracing_start_tgid_record(void);
+void tracing_stop_tgid_record(void);
+
int register_tracer(struct tracer *type);
int is_tracing_stopped(void);
@@ -697,6 +701,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
extern u64 ftrace_now(int cpu);
extern void trace_find_cmdline(int pid, char comm[]);
+extern int trace_find_tgid(int pid);
extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -1107,6 +1112,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \
C(LATENCY_FMT, "latency-format"), \
C(RECORD_CMD, "record-cmd"), \
+ C(RECORD_TGID, "record-tgid"), \
C(OVERWRITE, "overwrite"), \
C(STOP_ON_FREE, "disable_on_free"), \
C(IRQ_INFO, "irq-info"), \
@@ -1423,6 +1429,8 @@ struct ftrace_event_field *
trace_find_event_field(struct trace_event_call *call, char *name);
extern void trace_event_enable_cmd_record(bool enable);
+extern void trace_event_enable_tgid_record(bool enable);
+
extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
extern int event_trace_del_tracer(struct trace_array *tr);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index e7973e10398c..240c6df95ea6 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -343,6 +343,28 @@ void trace_event_enable_cmd_record(bool enable)
mutex_unlock(&event_mutex);
}
+void trace_event_enable_tgid_record(bool enable)
+{
+ struct trace_event_file *file;
+ struct trace_array *tr;
+
+ mutex_lock(&event_mutex);
+ do_for_each_event_file(tr, file) {
+ if (!(file->flags & EVENT_FILE_FL_ENABLED))
+ continue;
+
+ if (enable) {
+ tracing_start_tgid_record();
+ set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
+ } else {
+ tracing_stop_tgid_record();
+ clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT,
+ &file->flags);
+ }
+ } while_for_each_event_file();
+ mutex_unlock(&event_mutex);
+}
+
static int __ftrace_event_enable_disable(struct trace_event_file *file,
int enable, int soft_disable)
{
@@ -381,6 +403,12 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
tracing_stop_cmdline_record();
clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
}
+
+ if (file->flags & EVENT_FILE_FL_RECORDED_TGID) {
+ tracing_stop_tgid_record();
+ clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
+ }
+
call->class->reg(call, TRACE_REG_UNREGISTER, file);
}
/* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
@@ -407,18 +435,30 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
}
if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
+ bool cmd = false, tgid = false;
/* Keep the event disabled, when going to SOFT_MODE. */
if (soft_disable)
set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
+ cmd = true;
tracing_start_cmdline_record();
set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
}
+
+ if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
+ tgid = true;
+ tracing_start_tgid_record();
+ set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
+ }
+
ret = call->class->reg(call, TRACE_REG_REGISTER, file);
if (ret) {
- tracing_stop_cmdline_record();
+ if (cmd)
+ tracing_stop_cmdline_record();
+ if (tgid)
+ tracing_stop_tgid_record();
pr_info("event trace: Could not enable event "
"%s\n", trace_event_name(call));
break;
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 4c896a0101bd..d13f3cd4182a 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -12,27 +12,41 @@
#include "trace.h"
-static int sched_ref;
+#define RECORD_CMDLINE 1
+#define RECORD_TGID 2
+
+static int sched_cmdline_ref;
+static int sched_tgid_ref;
static DEFINE_MUTEX(sched_register_mutex);
static void
probe_sched_switch(void *ignore, bool preempt,
struct task_struct *prev, struct task_struct *next)
{
- if (unlikely(!sched_ref))
- return;
+ int flags = 0;
+ struct task_struct *tasks[2] = { prev, next };
+
+ if (sched_cmdline_ref != 0)
+ flags |= RECORD_CMDLINE;
+
+ if (sched_tgid_ref != 0)
+ flags |= RECORD_TGID;
- tracing_record_cmdline(prev);
- tracing_record_cmdline(next);
+ tracing_record_taskinfo(tasks, 2, flags);
}
static void
probe_sched_wakeup(void *ignore, struct task_struct *wakee)
{
- if (unlikely(!sched_ref))
- return;
+ int flags = 0;
+
+ if (sched_cmdline_ref != 0)
+ flags |= RECORD_CMDLINE;
+
+ if (sched_tgid_ref != 0)
+ flags |= RECORD_TGID;
- tracing_record_cmdline(current);
+ tracing_record_taskinfo_single(current, flags);
}
static int tracing_sched_register(void)
@@ -75,28 +89,65 @@ static void tracing_sched_unregister(void)
unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
}
-static void tracing_start_sched_switch(void)
+static void tracing_start_sched_switch(int ops)
{
+ bool sched_register;
mutex_lock(&sched_register_mutex);
- if (!(sched_ref++))
+ sched_register = (!sched_cmdline_ref && !sched_tgid_ref);
+
+ switch (ops) {
+ case RECORD_CMDLINE:
+ sched_cmdline_ref++;
+ break;
+
+ case RECORD_TGID:
+ sched_tgid_ref++;
+ break;
+ }
+
+ if (sched_tgid_ref == 1)
+ tracing_alloc_tgid_map();
+
+ if (sched_register && (sched_cmdline_ref || sched_tgid_ref))
tracing_sched_register();
mutex_unlock(&sched_register_mutex);
}
-static void tracing_stop_sched_switch(void)
+static void tracing_stop_sched_switch(int ops)
{
mutex_lock(&sched_register_mutex);
- if (!(--sched_ref))
+
+ switch (ops) {
+ case RECORD_CMDLINE:
+ sched_cmdline_ref--;
+ break;
+
+ case RECORD_TGID:
+ sched_tgid_ref--;
+ break;
+ }
+
+ if (!sched_cmdline_ref && !sched_tgid_ref)
tracing_sched_unregister();
mutex_unlock(&sched_register_mutex);
}
void tracing_start_cmdline_record(void)
{
- tracing_start_sched_switch();
+ tracing_start_sched_switch(RECORD_CMDLINE);
}
void tracing_stop_cmdline_record(void)
{
- tracing_stop_sched_switch();
+ tracing_stop_sched_switch(RECORD_CMDLINE);
+}
+
+void tracing_start_tgid_record(void)
+{
+ tracing_start_sched_switch(RECORD_TGID);
+}
+
+void tracing_stop_tgid_record(void)
+{
+ tracing_stop_sched_switch(RECORD_TGID);
}
--
2.13.0.506.g27d5fe0cd-goog
On Thu, 8 Jun 2017 19:53:24 -0700
Joel Fernandes <[email protected]> wrote:
> trace_stop_cmdline_recording declaration isn't in use, remove it.
OK, I just pulled this one in. No need to send it out again.
-- Steve
>
> Cc: [email protected]
> Cc: Ingo Molnar <[email protected]>
> Cc: Steven Rostedt <[email protected]>
> Signed-off-by: Joel Fernandes <[email protected]>
> ---
> kernel/trace/trace.c | 2 --
> 1 file changed, 2 deletions(-)
>
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 1122f151466f..63deff9cdf2c 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -1910,8 +1910,6 @@ static void tracing_stop_tr(struct trace_array *tr)
> raw_spin_unlock_irqrestore(&tr->start_lock, flags);
> }
>
> -void trace_stop_cmdline_recording(void);
> -
> static int trace_save_cmdline(struct task_struct *tsk)
> {
> unsigned pid, idx;
On Thu, 8 Jun 2017 19:53:25 -0700
Joel Fernandes <[email protected]> wrote:
> Inorder to support recording of tgid, the following changes are made:
>
> * Introduce a new API (tracing_record_taskinfo) to additionally record the tgid
> along with the task's comm at the same time. The API is flexible to
> optionally also record for a group of tasks which is useful in recording the
> info of multiple tasks (such as when recording previous and next task in a
> sched_switch). Along with simplifying the API and the callers, this approach
> also has the benefit of not setting trace_cmdline_save before information for
> multiple tasks are saved.
Hmm, I'm not so sure I like the above.
> * We preserve the old API (tracing_record_cmdline) and create it as a wrapper
> around the new one. Along with this we add a tracing_record_tgid function.
> * Reuse the existing sched_switch and sched_wakeup probes Add a new option
> 'record-tgid' to enable recording of tgid
>
> When record-tgid option isn't enabled to begin with, we take care to make sure
> that there's isn't memory or runtime overhead.
>
> Cc: [email protected]
> Cc: Steven Rostedt <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Tested-by: Michael Sartain <[email protected]>
> Signed-off-by: Joel Fernandes <[email protected]>
> ---
> include/linux/trace_events.h | 12 ++++-
> kernel/trace/trace.c | 102 ++++++++++++++++++++++++++++++++++----
> kernel/trace/trace.h | 8 +++
> kernel/trace/trace_events.c | 42 +++++++++++++++-
> kernel/trace/trace_sched_switch.c | 79 +++++++++++++++++++++++------
> 5 files changed, 217 insertions(+), 26 deletions(-)
>
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index a556805eff8a..6c9e84b1f8ae 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -151,7 +151,14 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
> int type, unsigned long len,
> unsigned long flags, int pc);
>
> -void tracing_record_cmdline(struct task_struct *tsk);
> +#define TRACE_RECORD_CMDLINE BIT(0)
> +#define TRACE_RECORD_TGID BIT(1)
> +
> +void tracing_record_taskinfo(struct task_struct **tasks, int len, int flags);
> +void tracing_record_taskinfo_single(struct task_struct *task, int flags);
> +
> +void tracing_record_cmdline(struct task_struct *task);
> +void tracing_record_tgid(struct task_struct *task);
>
> int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...);
>
> @@ -290,6 +297,7 @@ struct trace_subsystem_dir;
> enum {
> EVENT_FILE_FL_ENABLED_BIT,
> EVENT_FILE_FL_RECORDED_CMD_BIT,
> + EVENT_FILE_FL_RECORDED_TGID_BIT,
> EVENT_FILE_FL_FILTERED_BIT,
> EVENT_FILE_FL_NO_SET_FILTER_BIT,
> EVENT_FILE_FL_SOFT_MODE_BIT,
> @@ -303,6 +311,7 @@ enum {
> * Event file flags:
> * ENABLED - The event is enabled
> * RECORDED_CMD - The comms should be recorded at sched_switch
> + * RECORDED_TGID - The tgids should be recorded at sched_switch
> * FILTERED - The event has a filter attached
> * NO_SET_FILTER - Set when filter has error and is to be ignored
> * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED
> @@ -315,6 +324,7 @@ enum {
> enum {
> EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT),
> EVENT_FILE_FL_RECORDED_CMD = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT),
> + EVENT_FILE_FL_RECORDED_TGID = (1 << EVENT_FILE_FL_RECORDED_TGID_BIT),
> EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT),
> EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT),
> EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT),
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 63deff9cdf2c..185a09f8b67e 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -87,7 +87,7 @@ dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
> * tracing is active, only save the comm when a trace event
> * occurred.
> */
> -static DEFINE_PER_CPU(bool, trace_cmdline_save);
> +static DEFINE_PER_CPU(bool, trace_taskinfo_save);
>
> /*
> * Kill all tracing for good (never come back).
> @@ -790,7 +790,7 @@ EXPORT_SYMBOL_GPL(tracing_on);
> static __always_inline void
> __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
> {
> - __this_cpu_write(trace_cmdline_save, true);
> + __this_cpu_write(trace_taskinfo_save, true);
>
> /* If this is the temp buffer, we need to commit fully */
> if (this_cpu_read(trace_buffered_event) == event) {
> @@ -1709,6 +1709,18 @@ void tracing_reset_all_online_cpus(void)
> }
> }
>
> +static int *tgid_map;
> +
> +void tracing_alloc_tgid_map(void)
> +{
> + if (tgid_map)
> + return;
> +
> + tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
> + GFP_KERNEL);
> + WARN_ONCE(!tgid_map, "Allocation of tgid_map failed\n");
> +}
> +
> #define SAVED_CMDLINES_DEFAULT 128
> #define NO_CMDLINE_MAP UINT_MAX
> static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
> @@ -1722,7 +1734,7 @@ struct saved_cmdlines_buffer {
> static struct saved_cmdlines_buffer *savedcmd;
>
> /* temporary disable recording */
> -static atomic_t trace_record_cmdline_disabled __read_mostly;
> +static atomic_t trace_record_taskinfo_disabled __read_mostly;
>
> static inline char *get_saved_cmdlines(int idx)
> {
> @@ -1990,16 +2002,83 @@ void trace_find_cmdline(int pid, char comm[])
> preempt_enable();
> }
>
> -void tracing_record_cmdline(struct task_struct *tsk)
> +int trace_find_tgid(int pid)
> +{
> + if (!tgid_map || pid <= 0 || pid > PID_MAX_DEFAULT)
> + return 0;
> +
> + return tgid_map[pid];
> +}
> +
> +static int trace_save_tgid(struct task_struct *tsk)
> {
> - if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
> + if (!tgid_map || tsk->pid <= 0 || tsk->pid > PID_MAX_DEFAULT)
> + return 0;
> +
> + tgid_map[tsk->pid] = tsk->tgid;
> + return 1;
> +}
> +
> +/**
> + * tracing_record_taskinfo - record the task information of multiple tasks
> + *
> + * @tasks - list of tasks (array of task_struct pointers)
> + * @len - length of the list of tasks
> + * @flags - TRACE_RECORD_CMDLINE for recording comm
> + * TRACE_RECORD_TGID for recording tgid
> + */
> +void tracing_record_taskinfo(struct task_struct **tasks, int len, int flags)
> +{
> + int i;
> + bool cmdline, tgid;
> +
> + tgid = !!(flags & TRACE_RECORD_TGID);
> + cmdline = !!(flags & TRACE_RECORD_CMDLINE);
> +
> + if (unlikely(!cmdline && !tgid))
> + return;
This would be better to move this above the assignment of tgid and
cmdline and just have:
if (unlikely(!(flags & (TRACE_RECORD_TGID | TRACE_RECORD_CMDLINE))
return;
Also, no need for the !! in the assignments. They are already marked as
boolean. A simple:
tgid = flags & TRACE_RECORDC_TGID;
would suffice.
> +
> + if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
> return;
>
> - if (!__this_cpu_read(trace_cmdline_save))
> + if (!__this_cpu_read(trace_taskinfo_save))
> return;
You can move the assignments after the above too. gcc *should*
optimize it anyway. But I like to have all exit conditions first before
we add other code, which includes assignments.
>
> - if (trace_save_cmdline(tsk))
> - __this_cpu_write(trace_cmdline_save, false);
> + for (i = 0; i < len; i++) {
> + if (cmdline && !trace_save_cmdline(tasks[i]))
> + break;
> + if (tgid && !trace_save_tgid(tasks[i]))
> + break;
> + }
I really do not like this loop in the hot path. I say keep this as a
single code as default, and add a tracing_record_taskinfo_multi(), as
that's the exception (I only see one user compared to 3 users of
single).
You can add a helper function that is static __always_inline that can
be used by both. But please, no loops for the single case. Actually,
since there's only one multi user case, and it's always 2, I would say
nuke the loop for that one and have a:
tracing_record_taskinfo_sched_switch()
that takes a prev and a next, and updates both.
I don't envision more than 2, and we don't need this to be robust for a
use case that doesn't exist by sacrificing performance.
> +
> + if (i == len)
> + __this_cpu_write(trace_taskinfo_save, false);
> +}
> +
> +/**
> + * tracing_record_taskinfo_single - record the task info of single task
> + *
> + * @task - task to record
> + * @flags - TRACE_RECORD_CMDLINE for recording comm
> + * - TRACE_RECORD_TGID for recording tgid
> + */
> +void tracing_record_taskinfo_single(struct task_struct *task, int flags)
Like I said above. Nuke this. Have single be the default, and a special
case for sched_switch.
> +{
> + struct task_struct *tasks[1];
> +
> + tasks[0] = task;
> + tracing_record_taskinfo(tasks, 1, flags);
> +}
> +
> +/* Helpers to record a specific task information */
> +void tracing_record_cmdline(struct task_struct *task)
> +{
> + tracing_record_taskinfo_single(task, TRACE_RECORD_CMDLINE);
> +}
> +
> +void tracing_record_tgid(struct task_struct *task)
> +{
> + tracing_record_taskinfo_single(task, TRACE_RECORD_TGID);
> }
>
> /*
> @@ -3144,7 +3223,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
> #endif
>
> if (!iter->snapshot)
> - atomic_inc(&trace_record_cmdline_disabled);
> + atomic_inc(&trace_record_taskinfo_disabled);
>
> if (*pos != iter->pos) {
> iter->ent = NULL;
> @@ -3189,7 +3268,7 @@ static void s_stop(struct seq_file *m, void *p)
> #endif
>
> if (!iter->snapshot)
> - atomic_dec(&trace_record_cmdline_disabled);
> + atomic_dec(&trace_record_taskinfo_disabled);
>
> trace_access_unlock(iter->cpu_file);
> trace_event_read_unlock();
> @@ -4236,6 +4315,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
> if (mask == TRACE_ITER_RECORD_CMD)
> trace_event_enable_cmd_record(enabled);
>
> + if (mask == TRACE_ITER_RECORD_TGID)
> + trace_event_enable_tgid_record(enabled);
> +
> if (mask == TRACE_ITER_EVENT_FORK)
> trace_event_follow_fork(tr, enabled);
>
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 39fd77330aab..ecc963e25266 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -635,8 +635,12 @@ void trace_graph_return(struct ftrace_graph_ret *trace);
> int trace_graph_entry(struct ftrace_graph_ent *trace);
> void set_graph_array(struct trace_array *tr);
>
> +void tracing_alloc_tgid_map(void);
> void tracing_start_cmdline_record(void);
> void tracing_stop_cmdline_record(void);
> +void tracing_start_tgid_record(void);
> +void tracing_stop_tgid_record(void);
> +
> int register_tracer(struct tracer *type);
> int is_tracing_stopped(void);
>
> @@ -697,6 +701,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
> extern u64 ftrace_now(int cpu);
>
> extern void trace_find_cmdline(int pid, char comm[]);
> +extern int trace_find_tgid(int pid);
> extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
>
> #ifdef CONFIG_DYNAMIC_FTRACE
> @@ -1107,6 +1112,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
> C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \
> C(LATENCY_FMT, "latency-format"), \
> C(RECORD_CMD, "record-cmd"), \
> + C(RECORD_TGID, "record-tgid"), \
> C(OVERWRITE, "overwrite"), \
> C(STOP_ON_FREE, "disable_on_free"), \
> C(IRQ_INFO, "irq-info"), \
> @@ -1423,6 +1429,8 @@ struct ftrace_event_field *
> trace_find_event_field(struct trace_event_call *call, char *name);
>
> extern void trace_event_enable_cmd_record(bool enable);
> +extern void trace_event_enable_tgid_record(bool enable);
> +
> extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
> extern int event_trace_del_tracer(struct trace_array *tr);
>
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index e7973e10398c..240c6df95ea6 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -343,6 +343,28 @@ void trace_event_enable_cmd_record(bool enable)
> mutex_unlock(&event_mutex);
> }
>
> +void trace_event_enable_tgid_record(bool enable)
> +{
> + struct trace_event_file *file;
> + struct trace_array *tr;
> +
> + mutex_lock(&event_mutex);
> + do_for_each_event_file(tr, file) {
> + if (!(file->flags & EVENT_FILE_FL_ENABLED))
> + continue;
> +
> + if (enable) {
> + tracing_start_tgid_record();
> + set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
> + } else {
> + tracing_stop_tgid_record();
> + clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT,
> + &file->flags);
> + }
> + } while_for_each_event_file();
> + mutex_unlock(&event_mutex);
> +}
> +
> static int __ftrace_event_enable_disable(struct trace_event_file *file,
> int enable, int soft_disable)
> {
> @@ -381,6 +403,12 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
> tracing_stop_cmdline_record();
> clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
> }
> +
> + if (file->flags & EVENT_FILE_FL_RECORDED_TGID) {
> + tracing_stop_tgid_record();
> + clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
> + }
> +
> call->class->reg(call, TRACE_REG_UNREGISTER, file);
> }
> /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
> @@ -407,18 +435,30 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
> }
>
> if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
> + bool cmd = false, tgid = false;
>
> /* Keep the event disabled, when going to SOFT_MODE. */
> if (soft_disable)
> set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
>
> if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
> + cmd = true;
> tracing_start_cmdline_record();
> set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
> }
> +
> + if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
> + tgid = true;
> + tracing_start_tgid_record();
> + set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
> + }
> +
> ret = call->class->reg(call, TRACE_REG_REGISTER, file);
> if (ret) {
> - tracing_stop_cmdline_record();
> + if (cmd)
> + tracing_stop_cmdline_record();
> + if (tgid)
> + tracing_stop_tgid_record();
> pr_info("event trace: Could not enable event "
> "%s\n", trace_event_name(call));
> break;
> diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
> index 4c896a0101bd..d13f3cd4182a 100644
> --- a/kernel/trace/trace_sched_switch.c
> +++ b/kernel/trace/trace_sched_switch.c
> @@ -12,27 +12,41 @@
>
> #include "trace.h"
>
> -static int sched_ref;
> +#define RECORD_CMDLINE 1
> +#define RECORD_TGID 2
> +
> +static int sched_cmdline_ref;
> +static int sched_tgid_ref;
> static DEFINE_MUTEX(sched_register_mutex);
>
> static void
> probe_sched_switch(void *ignore, bool preempt,
> struct task_struct *prev, struct task_struct *next)
> {
> - if (unlikely(!sched_ref))
> - return;
> + int flags = 0;
> + struct task_struct *tasks[2] = { prev, next };
> +
> + if (sched_cmdline_ref != 0)
> + flags |= RECORD_CMDLINE;
> +
> + if (sched_tgid_ref != 0)
> + flags |= RECORD_TGID;
flags = (RECORD_CMDLINE * !!(sched_cmdline_ref)) +
(RECORD_TGID * !!(sched_tgid_ref));
if (!flags)
return;
>
> - tracing_record_cmdline(prev);
> - tracing_record_cmdline(next);
> + tracing_record_taskinfo(tasks, 2, flags);
tracing_record_taskinfo_sched_switch(prev, next, flags);
> }
>
> static void
> probe_sched_wakeup(void *ignore, struct task_struct *wakee)
> {
> - if (unlikely(!sched_ref))
> - return;
> + int flags = 0;
> +
> + if (sched_cmdline_ref != 0)
> + flags |= RECORD_CMDLINE;
> +
> + if (sched_tgid_ref != 0)
> + flags |= RECORD_TGID;
Same thing here.
>
> - tracing_record_cmdline(current);
> + tracing_record_taskinfo_single(current, flags);
> }
>
> static int tracing_sched_register(void)
> @@ -75,28 +89,65 @@ static void tracing_sched_unregister(void)
> unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
> }
>
> -static void tracing_start_sched_switch(void)
> +static void tracing_start_sched_switch(int ops)
> {
> + bool sched_register;
> mutex_lock(&sched_register_mutex);
> - if (!(sched_ref++))
> + sched_register = (!sched_cmdline_ref && !sched_tgid_ref);
> +
> + switch (ops) {
> + case RECORD_CMDLINE:
> + sched_cmdline_ref++;
> + break;
> +
> + case RECORD_TGID:
> + sched_tgid_ref++;
> + break;
> + }
> +
> + if (sched_tgid_ref == 1)
> + tracing_alloc_tgid_map();
Since tgid_map is never freed, just do:
if (!tgid_map)
tracing_alloc_tgid_map();
Then if it fails to allocate this time, there's a chance that it will
allocate another time.
-- Steve
> +
> + if (sched_register && (sched_cmdline_ref || sched_tgid_ref))
> tracing_sched_register();
> mutex_unlock(&sched_register_mutex);
> }
>
> -static void tracing_stop_sched_switch(void)
> +static void tracing_stop_sched_switch(int ops)
> {
> mutex_lock(&sched_register_mutex);
> - if (!(--sched_ref))
> +
> + switch (ops) {
> + case RECORD_CMDLINE:
> + sched_cmdline_ref--;
> + break;
> +
> + case RECORD_TGID:
> + sched_tgid_ref--;
> + break;
> + }
> +
> + if (!sched_cmdline_ref && !sched_tgid_ref)
> tracing_sched_unregister();
> mutex_unlock(&sched_register_mutex);
> }
>
> void tracing_start_cmdline_record(void)
> {
> - tracing_start_sched_switch();
> + tracing_start_sched_switch(RECORD_CMDLINE);
> }
>
> void tracing_stop_cmdline_record(void)
> {
> - tracing_stop_sched_switch();
> + tracing_stop_sched_switch(RECORD_CMDLINE);
> +}
> +
> +void tracing_start_tgid_record(void)
> +{
> + tracing_start_sched_switch(RECORD_TGID);
> +}
> +
> +void tracing_stop_tgid_record(void)
> +{
> + tracing_stop_sched_switch(RECORD_TGID);
> }
On Tue, Jun 13, 2017 at 11:52 AM, Steven Rostedt <[email protected]> wrote:
[..]
>> +/**
>> + * tracing_record_taskinfo - record the task information of multiple tasks
>> + *
>> + * @tasks - list of tasks (array of task_struct pointers)
>> + * @len - length of the list of tasks
>> + * @flags - TRACE_RECORD_CMDLINE for recording comm
>> + * TRACE_RECORD_TGID for recording tgid
>> + */
>> +void tracing_record_taskinfo(struct task_struct **tasks, int len, int flags)
>> +{
>> + int i;
>> + bool cmdline, tgid;
>> +
>> + tgid = !!(flags & TRACE_RECORD_TGID);
>> + cmdline = !!(flags & TRACE_RECORD_CMDLINE);
>> +
>> + if (unlikely(!cmdline && !tgid))
>> + return;
>
> This would be better to move this above the assignment of tgid and
> cmdline and just have:
>
> if (unlikely(!(flags & (TRACE_RECORD_TGID | TRACE_RECORD_CMDLINE))
> return;
Ok will fix.
>
> Also, no need for the !! in the assignments. They are already marked as
> boolean. A simple:
>
> tgid = flags & TRACE_RECORDC_TGID;
>
> would suffice.
Got it, will fix, thanks.
>> +
>> + if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
>> return;
>>
>> - if (!__this_cpu_read(trace_cmdline_save))
>> + if (!__this_cpu_read(trace_taskinfo_save))
>> return;
>
> You can move the assignments after the above too. gcc *should*
> optimize it anyway. But I like to have all exit conditions first before
> we add other code, which includes assignments.
Ok, I agree.
>>
>> - if (trace_save_cmdline(tsk))
>> - __this_cpu_write(trace_cmdline_save, false);
>> + for (i = 0; i < len; i++) {
>> + if (cmdline && !trace_save_cmdline(tasks[i]))
>> + break;
>> + if (tgid && !trace_save_tgid(tasks[i]))
>> + break;
>> + }
>
> I really do not like this loop in the hot path. I say keep this as a
> single code as default, and add a tracing_record_taskinfo_multi(), as
> that's the exception (I only see one user compared to 3 users of
> single).
> You can add a helper function that is static __always_inline that can
> be used by both. But please, no loops for the single case. Actually,
> since there's only one multi user case, and it's always 2, I would say
> nuke the loop for that one and have a:
>
> tracing_record_taskinfo_sched_switch()
>
> that takes a prev and a next, and updates both.
>
> I don't envision more than 2, and we don't need this to be robust for a
> use case that doesn't exist by sacrificing performance.
Ok, I agree its better to just handle the simple case first. I will
work on doing that.
>> {
>> - if (unlikely(!sched_ref))
>> - return;
>> + int flags = 0;
>> + struct task_struct *tasks[2] = { prev, next };
>> +
>> + if (sched_cmdline_ref != 0)
>> + flags |= RECORD_CMDLINE;
>> +
>> + if (sched_tgid_ref != 0)
>> + flags |= RECORD_TGID;
>
> flags = (RECORD_CMDLINE * !!(sched_cmdline_ref)) +
> (RECORD_TGID * !!(sched_tgid_ref));
>
> if (!flags)
> return;
That looks much better, I will do that.
>> - tracing_record_cmdline(prev);
>> - tracing_record_cmdline(next);
>> + tracing_record_taskinfo(tasks, 2, flags);
>
> tracing_record_taskinfo_sched_switch(prev, next, flags);
Yep.
[..]
>> -static void tracing_start_sched_switch(void)
>> +static void tracing_start_sched_switch(int ops)
>> {
>> + bool sched_register;
>> mutex_lock(&sched_register_mutex);
>> - if (!(sched_ref++))
>> + sched_register = (!sched_cmdline_ref && !sched_tgid_ref);
>> +
>> + switch (ops) {
>> + case RECORD_CMDLINE:
>> + sched_cmdline_ref++;
>> + break;
>> +
>> + case RECORD_TGID:
>> + sched_tgid_ref++;
>> + break;
>> + }
>> +
>> + if (sched_tgid_ref == 1)
>> + tracing_alloc_tgid_map();
>
> Since tgid_map is never freed, just do:
>
> if (!tgid_map)
> tracing_alloc_tgid_map();
>
> Then if it fails to allocate this time, there's a chance that it will
> allocate another time.
I still need to check for the sched_tgid_ref flag though because in
the case where record-tgid is not enabled, then it would still try to
allocate and that's what I wanted to avoid. I can change it to
(sched_tgid_ref > 0 && !tgid_map) which should take care of the
scenario you mentioned. Thanks for the idea.
Thanks for the review. I'll get to work and send out new and better
patches soon!
Regards,
Joel