Hi Steven,
Following your comments in [1], I reworked the patches. I agree its much
cleaner now. Please check them out and thanks.
Android systrace viewer heavily depends on the tgid to group tasks. tgid is
also useful for analyzing traces and generating analysis results for groups of
tasks. Also Michael Sartain said he also has a need for it and helped test the
series (I have added him to CC).
[1] https://lkml.org/lkml/2017/6/13/754
Joel Fernandes (3):
tracing: Add support for recording tgid of tasks
tracing: Add support for display of tgid in trace output
tracing/ftrace: Add support to record and display tgid
include/linux/trace_events.h | 13 +++-
kernel/trace/trace.c | 142 +++++++++++++++++++++++++++++++-------
kernel/trace/trace.h | 9 +++
kernel/trace/trace_events.c | 42 ++++++++++-
kernel/trace/trace_functions.c | 26 +++++++
kernel/trace/trace_output.c | 9 +++
kernel/trace/trace_sched_switch.c | 75 ++++++++++++++++----
7 files changed, 276 insertions(+), 40 deletions(-)
Cc: Steven Rostedt <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: [email protected]
Cc: Michael Sartain <[email protected]>
--
2.13.1.611.g7e3b11ae1-goog
Inorder to support recording of tgid, the following changes are made:
* Introduce a new API (tracing_record_taskinfo) to additionally record the tgid
along with the task's comm at the same time. This has has the benefit of not
setting trace_cmdline_save before all the information for a task is saved.
* Add a new API tracing_record_taskinfo_sched_switch to record task information
for 2 tasks at a time (previous and next) and use it from sched_switch probe.
* Preserve the old API (tracing_record_cmdline) and create it as a wrapper
around the new one so that existing callers aren't affected.
* Reuse the existing sched_switch and sched_wakeup probes to record tgid
information and add a new option 'record-tgid' to enable recording of tgid
When record-tgid option isn't enabled to being with, we take care to make sure
that there's isn't memory or runtime overhead.
Cc: [email protected]
Cc: Steven Rostedt <[email protected]>
Cc: Ingo Molnar <[email protected]>
Tested-by: Michael Sartain <[email protected]>
Signed-off-by: Joel Fernandes <[email protected]>
---
include/linux/trace_events.h | 13 ++++-
kernel/trace/trace.c | 106 ++++++++++++++++++++++++++++++++++----
kernel/trace/trace.h | 9 ++++
kernel/trace/trace_events.c | 42 ++++++++++++++-
kernel/trace/trace_sched_switch.c | 75 ++++++++++++++++++++++-----
5 files changed, 219 insertions(+), 26 deletions(-)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index a556805eff8a..f73cedfa2e0b 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -151,7 +151,15 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
int type, unsigned long len,
unsigned long flags, int pc);
-void tracing_record_cmdline(struct task_struct *tsk);
+#define TRACE_RECORD_CMDLINE BIT(0)
+#define TRACE_RECORD_TGID BIT(1)
+
+void tracing_record_taskinfo(struct task_struct *task, int flags);
+void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
+ struct task_struct *next, int flags);
+
+void tracing_record_cmdline(struct task_struct *task);
+void tracing_record_tgid(struct task_struct *task);
int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...);
@@ -290,6 +298,7 @@ struct trace_subsystem_dir;
enum {
EVENT_FILE_FL_ENABLED_BIT,
EVENT_FILE_FL_RECORDED_CMD_BIT,
+ EVENT_FILE_FL_RECORDED_TGID_BIT,
EVENT_FILE_FL_FILTERED_BIT,
EVENT_FILE_FL_NO_SET_FILTER_BIT,
EVENT_FILE_FL_SOFT_MODE_BIT,
@@ -303,6 +312,7 @@ enum {
* Event file flags:
* ENABLED - The event is enabled
* RECORDED_CMD - The comms should be recorded at sched_switch
+ * RECORDED_TGID - The tgids should be recorded at sched_switch
* FILTERED - The event has a filter attached
* NO_SET_FILTER - Set when filter has error and is to be ignored
* SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED
@@ -315,6 +325,7 @@ enum {
enum {
EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT),
EVENT_FILE_FL_RECORDED_CMD = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT),
+ EVENT_FILE_FL_RECORDED_TGID = (1 << EVENT_FILE_FL_RECORDED_TGID_BIT),
EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT),
EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT),
EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT),
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1122f151466f..6dec7bef63c7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -87,7 +87,7 @@ dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
* tracing is active, only save the comm when a trace event
* occurred.
*/
-static DEFINE_PER_CPU(bool, trace_cmdline_save);
+static DEFINE_PER_CPU(bool, trace_taskinfo_save);
/*
* Kill all tracing for good (never come back).
@@ -790,7 +790,7 @@ EXPORT_SYMBOL_GPL(tracing_on);
static __always_inline void
__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
{
- __this_cpu_write(trace_cmdline_save, true);
+ __this_cpu_write(trace_taskinfo_save, true);
/* If this is the temp buffer, we need to commit fully */
if (this_cpu_read(trace_buffered_event) == event) {
@@ -1709,6 +1709,18 @@ void tracing_reset_all_online_cpus(void)
}
}
+int *tgid_map;
+
+void tracing_alloc_tgid_map(void)
+{
+ if (tgid_map)
+ return;
+
+ tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
+ GFP_KERNEL);
+ WARN_ONCE(!tgid_map, "Allocation of tgid_map failed\n");
+}
+
#define SAVED_CMDLINES_DEFAULT 128
#define NO_CMDLINE_MAP UINT_MAX
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -1722,7 +1734,7 @@ struct saved_cmdlines_buffer {
static struct saved_cmdlines_buffer *savedcmd;
/* temporary disable recording */
-static atomic_t trace_record_cmdline_disabled __read_mostly;
+static atomic_t trace_record_taskinfo_disabled __read_mostly;
static inline char *get_saved_cmdlines(int idx)
{
@@ -1992,16 +2004,87 @@ void trace_find_cmdline(int pid, char comm[])
preempt_enable();
}
-void tracing_record_cmdline(struct task_struct *tsk)
+int trace_find_tgid(int pid)
+{
+ if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
+ return 0;
+
+ return tgid_map[pid];
+}
+
+static int trace_save_tgid(struct task_struct *tsk)
+{
+ if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT))
+ return 0;
+
+ tgid_map[tsk->pid] = tsk->tgid;
+ return 1;
+}
+
+static bool tracing_record_taskinfo_skip(int flags)
+{
+ if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
+ return true;
+ if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
+ return true;
+ if (!__this_cpu_read(trace_taskinfo_save))
+ return true;
+ return false;
+}
+
+/**
+ * tracing_record_taskinfo - record the task info of a task
+ *
+ * @task - task to record
+ * @flags - TRACE_RECORD_CMDLINE for recording comm
+ * - TRACE_RECORD_TGID for recording tgid
+ */
+void tracing_record_taskinfo(struct task_struct *task, int flags)
+{
+ if (tracing_record_taskinfo_skip(flags))
+ return;
+ if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task))
+ return;
+ if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task))
+ return;
+
+ __this_cpu_write(trace_taskinfo_save, false);
+}
+
+/**
+ * tracing_record_taskinfo_sched_switch - record task info for sched_switch
+ *
+ * @prev - previous task during sched_switch
+ * @next - next task during sched_switch
+ * @flags - TRACE_RECORD_CMDLINE for recording comm
+ * TRACE_RECORD_TGID for recording tgid
+ */
+void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
+ struct task_struct *next, int flags)
{
- if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
+ if (tracing_record_taskinfo_skip(flags))
+ return;
+
+ if ((flags & TRACE_RECORD_CMDLINE) &&
+ (!trace_save_cmdline(prev) || !trace_save_cmdline(next)))
return;
- if (!__this_cpu_read(trace_cmdline_save))
+ if ((flags & TRACE_RECORD_TGID) &&
+ (!trace_save_tgid(prev) || !trace_save_tgid(next)))
return;
- if (trace_save_cmdline(tsk))
- __this_cpu_write(trace_cmdline_save, false);
+ __this_cpu_write(trace_taskinfo_save, false);
+}
+
+/* Helpers to record a specific task information */
+void tracing_record_cmdline(struct task_struct *task)
+{
+ tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
+}
+
+void tracing_record_tgid(struct task_struct *task)
+{
+ tracing_record_taskinfo(task, TRACE_RECORD_TGID);
}
/*
@@ -3146,7 +3229,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
#endif
if (!iter->snapshot)
- atomic_inc(&trace_record_cmdline_disabled);
+ atomic_inc(&trace_record_taskinfo_disabled);
if (*pos != iter->pos) {
iter->ent = NULL;
@@ -3191,7 +3274,7 @@ static void s_stop(struct seq_file *m, void *p)
#endif
if (!iter->snapshot)
- atomic_dec(&trace_record_cmdline_disabled);
+ atomic_dec(&trace_record_taskinfo_disabled);
trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
@@ -4238,6 +4321,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
if (mask == TRACE_ITER_RECORD_CMD)
trace_event_enable_cmd_record(enabled);
+ if (mask == TRACE_ITER_RECORD_TGID)
+ trace_event_enable_tgid_record(enabled);
+
if (mask == TRACE_ITER_EVENT_FORK)
trace_event_follow_fork(tr, enabled);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 39fd77330aab..90318b016fd9 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -635,8 +635,13 @@ void trace_graph_return(struct ftrace_graph_ret *trace);
int trace_graph_entry(struct ftrace_graph_ent *trace);
void set_graph_array(struct trace_array *tr);
+extern int *tgid_map;
+void tracing_alloc_tgid_map(void);
void tracing_start_cmdline_record(void);
void tracing_stop_cmdline_record(void);
+void tracing_start_tgid_record(void);
+void tracing_stop_tgid_record(void);
+
int register_tracer(struct tracer *type);
int is_tracing_stopped(void);
@@ -697,6 +702,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
extern u64 ftrace_now(int cpu);
extern void trace_find_cmdline(int pid, char comm[]);
+extern int trace_find_tgid(int pid);
extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -1107,6 +1113,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \
C(LATENCY_FMT, "latency-format"), \
C(RECORD_CMD, "record-cmd"), \
+ C(RECORD_TGID, "record-tgid"), \
C(OVERWRITE, "overwrite"), \
C(STOP_ON_FREE, "disable_on_free"), \
C(IRQ_INFO, "irq-info"), \
@@ -1423,6 +1430,8 @@ struct ftrace_event_field *
trace_find_event_field(struct trace_event_call *call, char *name);
extern void trace_event_enable_cmd_record(bool enable);
+extern void trace_event_enable_tgid_record(bool enable);
+
extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
extern int event_trace_del_tracer(struct trace_array *tr);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index e7973e10398c..240c6df95ea6 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -343,6 +343,28 @@ void trace_event_enable_cmd_record(bool enable)
mutex_unlock(&event_mutex);
}
+void trace_event_enable_tgid_record(bool enable)
+{
+ struct trace_event_file *file;
+ struct trace_array *tr;
+
+ mutex_lock(&event_mutex);
+ do_for_each_event_file(tr, file) {
+ if (!(file->flags & EVENT_FILE_FL_ENABLED))
+ continue;
+
+ if (enable) {
+ tracing_start_tgid_record();
+ set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
+ } else {
+ tracing_stop_tgid_record();
+ clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT,
+ &file->flags);
+ }
+ } while_for_each_event_file();
+ mutex_unlock(&event_mutex);
+}
+
static int __ftrace_event_enable_disable(struct trace_event_file *file,
int enable, int soft_disable)
{
@@ -381,6 +403,12 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
tracing_stop_cmdline_record();
clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
}
+
+ if (file->flags & EVENT_FILE_FL_RECORDED_TGID) {
+ tracing_stop_tgid_record();
+ clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
+ }
+
call->class->reg(call, TRACE_REG_UNREGISTER, file);
}
/* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
@@ -407,18 +435,30 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
}
if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
+ bool cmd = false, tgid = false;
/* Keep the event disabled, when going to SOFT_MODE. */
if (soft_disable)
set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
+ cmd = true;
tracing_start_cmdline_record();
set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
}
+
+ if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
+ tgid = true;
+ tracing_start_tgid_record();
+ set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
+ }
+
ret = call->class->reg(call, TRACE_REG_REGISTER, file);
if (ret) {
- tracing_stop_cmdline_record();
+ if (cmd)
+ tracing_stop_cmdline_record();
+ if (tgid)
+ tracing_stop_tgid_record();
pr_info("event trace: Could not enable event "
"%s\n", trace_event_name(call));
break;
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 4c896a0101bd..75a0c3accf2a 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -12,27 +12,38 @@
#include "trace.h"
-static int sched_ref;
+#define RECORD_CMDLINE 1
+#define RECORD_TGID 2
+
+static int sched_cmdline_ref;
+static int sched_tgid_ref;
static DEFINE_MUTEX(sched_register_mutex);
static void
probe_sched_switch(void *ignore, bool preempt,
struct task_struct *prev, struct task_struct *next)
{
- if (unlikely(!sched_ref))
- return;
+ int flags;
+
+ flags = (RECORD_TGID * !!sched_tgid_ref) +
+ (RECORD_CMDLINE * !!sched_cmdline_ref);
- tracing_record_cmdline(prev);
- tracing_record_cmdline(next);
+ if (!flags)
+ return;
+ tracing_record_taskinfo_sched_switch(prev, next, flags);
}
static void
probe_sched_wakeup(void *ignore, struct task_struct *wakee)
{
- if (unlikely(!sched_ref))
- return;
+ int flags;
+
+ flags = (RECORD_TGID * !!sched_tgid_ref) +
+ (RECORD_CMDLINE * !!sched_cmdline_ref);
- tracing_record_cmdline(current);
+ if (!flags)
+ return;
+ tracing_record_taskinfo(current, flags);
}
static int tracing_sched_register(void)
@@ -75,28 +86,64 @@ static void tracing_sched_unregister(void)
unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
}
-static void tracing_start_sched_switch(void)
+static void tracing_start_sched_switch(int ops)
{
+ bool sched_register = (!sched_cmdline_ref && !sched_tgid_ref);
mutex_lock(&sched_register_mutex);
- if (!(sched_ref++))
+
+ switch (ops) {
+ case RECORD_CMDLINE:
+ sched_cmdline_ref++;
+ break;
+
+ case RECORD_TGID:
+ sched_tgid_ref++;
+ break;
+ }
+
+ if (sched_tgid_ref > 0 && !tgid_map)
+ tracing_alloc_tgid_map();
+
+ if (sched_register && (sched_cmdline_ref || sched_tgid_ref))
tracing_sched_register();
mutex_unlock(&sched_register_mutex);
}
-static void tracing_stop_sched_switch(void)
+static void tracing_stop_sched_switch(int ops)
{
mutex_lock(&sched_register_mutex);
- if (!(--sched_ref))
+
+ switch (ops) {
+ case RECORD_CMDLINE:
+ sched_cmdline_ref--;
+ break;
+
+ case RECORD_TGID:
+ sched_tgid_ref--;
+ break;
+ }
+
+ if (!sched_cmdline_ref && !sched_tgid_ref)
tracing_sched_unregister();
mutex_unlock(&sched_register_mutex);
}
void tracing_start_cmdline_record(void)
{
- tracing_start_sched_switch();
+ tracing_start_sched_switch(RECORD_CMDLINE);
}
void tracing_stop_cmdline_record(void)
{
- tracing_stop_sched_switch();
+ tracing_stop_sched_switch(RECORD_CMDLINE);
+}
+
+void tracing_start_tgid_record(void)
+{
+ tracing_start_sched_switch(RECORD_TGID);
+}
+
+void tracing_stop_tgid_record(void)
+{
+ tracing_stop_sched_switch(RECORD_TGID);
}
--
2.13.1.611.g7e3b11ae1-goog
Earlier patches introduced ability to record the tgid using the 'record-tgid'
option. Here we read the tgid and output it if the option is enabled.
Cc: [email protected]
Cc: Steven Rostedt <[email protected]>
Cc: Ingo Molnar <[email protected]>
Tested-by: Michael Sartain <[email protected]>
Signed-off-by: Joel Fernandes <[email protected]>
---
kernel/trace/trace.c | 36 ++++++++++++++++++++++--------------
kernel/trace/trace_output.c | 9 +++++++++
2 files changed, 31 insertions(+), 14 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6dec7bef63c7..65878211f26f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3331,23 +3331,29 @@ static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
seq_puts(m, "#\n");
}
-static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
+static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
+ unsigned int flags)
{
+ bool tgid = flags & TRACE_ITER_RECORD_TGID;
+
print_event_info(buf, m);
- seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"
- "# | | | | |\n");
+
+ seq_printf(m, "# TASK-PID CPU# %s TIMESTAMP FUNCTION\n", tgid ? "TGID " : "");
+ seq_printf(m, "# | | | %s | |\n", tgid ? " | " : "");
}
-static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
+static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
+ unsigned int flags)
{
- print_event_info(buf, m);
- seq_puts(m, "# _-----=> irqs-off\n"
- "# / _----=> need-resched\n"
- "# | / _---=> hardirq/softirq\n"
- "# || / _--=> preempt-depth\n"
- "# ||| / delay\n"
- "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
- "# | | | |||| | |\n");
+ bool tgid = flags & TRACE_ITER_RECORD_TGID;
+
+ seq_printf(m, "# %s _-----=> irqs-off\n", tgid ? " " : "");
+ seq_printf(m, "# %s / _----=> need-resched\n", tgid ? " " : "");
+ seq_printf(m, "# %s| / _---=> hardirq/softirq\n", tgid ? " " : "");
+ seq_printf(m, "# %s|| / _--=> preempt-depth\n", tgid ? " " : "");
+ seq_printf(m, "# %s||| / delay\n", tgid ? " " : "");
+ seq_printf(m, "# TASK-PID CPU#%s|||| TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
+ seq_printf(m, "# | | | %s|||| | |\n", tgid ? " | " : "");
}
void
@@ -3663,9 +3669,11 @@ void trace_default_header(struct seq_file *m)
} else {
if (!(trace_flags & TRACE_ITER_VERBOSE)) {
if (trace_flags & TRACE_ITER_IRQ_INFO)
- print_func_help_header_irq(iter->trace_buffer, m);
+ print_func_help_header_irq(iter->trace_buffer,
+ m, trace_flags);
else
- print_func_help_header(iter->trace_buffer, m);
+ print_func_help_header(iter->trace_buffer, m,
+ trace_flags);
}
}
}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 08f9bab8089e..55c743295874 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -587,6 +587,15 @@ int trace_print_context(struct trace_iterator *iter)
trace_seq_printf(s, "%16s-%-5d [%03d] ",
comm, entry->pid, iter->cpu);
+ if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
+ unsigned int tgid = trace_find_tgid(entry->pid);
+
+ if (!tgid)
+ trace_seq_printf(s, "(-----) ");
+ else
+ trace_seq_printf(s, "(%5d) ", tgid);
+ }
+
if (tr->trace_flags & TRACE_ITER_IRQ_INFO)
trace_print_lat_fmt(s, entry);
--
2.13.1.611.g7e3b11ae1-goog
Make function tracer able to record tgid if/when record-tgid is enabled.
Cc: [email protected]
Cc: Steven Rostedt <[email protected]>
Cc: Ingo Molnar <[email protected]>
Tested-by: Michael Sartain <[email protected]>
Signed-off-by: Joel Fernandes <[email protected]>
---
kernel/trace/trace_functions.c | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index a3bddbfd0874..d6bdc38ab273 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -27,6 +27,7 @@ static void
function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs);
static struct tracer_flags func_flags;
+static bool tgid_recorded;
/* Our option */
enum {
@@ -104,6 +105,11 @@ static int function_trace_init(struct trace_array *tr)
put_cpu();
tracing_start_cmdline_record();
+
+ if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
+ tgid_recorded = true;
+ tracing_start_tgid_record();
+ }
tracing_start_function_trace(tr);
return 0;
}
@@ -112,6 +118,10 @@ static void function_trace_reset(struct trace_array *tr)
{
tracing_stop_function_trace(tr);
tracing_stop_cmdline_record();
+ if (tgid_recorded) {
+ tracing_stop_tgid_record();
+ tgid_recorded = false;
+ }
ftrace_reset_array_ops(tr);
}
@@ -252,6 +262,21 @@ func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
return 0;
}
+static int
+func_tracer_flag_changed(struct trace_array *tr, unsigned int mask,
+ int enabled)
+{
+ if (mask == TRACE_ITER_RECORD_TGID) {
+ tgid_recorded = !!enabled;
+ if (enabled)
+ tracing_start_tgid_record();
+ else
+ tracing_stop_tgid_record();
+ }
+
+ return 0;
+}
+
static struct tracer function_trace __tracer_data =
{
.name = "function",
@@ -260,6 +285,7 @@ static struct tracer function_trace __tracer_data =
.start = function_trace_start,
.flags = &func_flags,
.set_flag = func_set_flag,
+ .flag_changed = func_tracer_flag_changed,
.allow_instances = true,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_function,
--
2.13.1.611.g7e3b11ae1-goog
On Sun, 25 Jun 2017 22:38:42 -0700
Joel Fernandes <[email protected]> wrote:
> +int *tgid_map;
> +
> +void tracing_alloc_tgid_map(void)
> +{
> + if (tgid_map)
> + return;
> +
> + tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
> + GFP_KERNEL);
> + WARN_ONCE(!tgid_map, "Allocation of tgid_map failed\n");
This needs to return a value. If it fails to allocate, then we must not
set the bit to do the recording. More below about why.
> +}
> +
> #define SAVED_CMDLINES_DEFAULT 128
> #define NO_CMDLINE_MAP UINT_MAX
> static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
> @@ -1722,7 +1734,7 @@ struct saved_cmdlines_buffer {
> static struct saved_cmdlines_buffer *savedcmd;
>
> /* temporary disable recording */
> -static atomic_t trace_record_cmdline_disabled __read_mostly;
> +static atomic_t trace_record_taskinfo_disabled __read_mostly;
>
> static inline char *get_saved_cmdlines(int idx)
> {
> @@ -1992,16 +2004,87 @@ void trace_find_cmdline(int pid, char comm[])
> preempt_enable();
> }
>
> -void tracing_record_cmdline(struct task_struct *tsk)
> +int trace_find_tgid(int pid)
> +{
> + if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
> + return 0;
> +
> + return tgid_map[pid];
> +}
> +
> +static int trace_save_tgid(struct task_struct *tsk)
> +{
> + if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT))
> + return 0;
If we failed to allocate, this will always return 0.
> +
> + tgid_map[tsk->pid] = tsk->tgid;
> + return 1;
> +}
> +
> +static bool tracing_record_taskinfo_skip(int flags)
> +{
> + if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
> + return true;
> + if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
> + return true;
> + if (!__this_cpu_read(trace_taskinfo_save))
> + return true;
> + return false;
> +}
> +
> +/**
> + * tracing_record_taskinfo - record the task info of a task
> + *
> + * @task - task to record
> + * @flags - TRACE_RECORD_CMDLINE for recording comm
> + * - TRACE_RECORD_TGID for recording tgid
> + */
> +void tracing_record_taskinfo(struct task_struct *task, int flags)
> +{
> + if (tracing_record_taskinfo_skip(flags))
> + return;
> + if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task))
> + return;
> + if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task))
With a failed allocation, this will return here.
> + return;
> +
> + __this_cpu_write(trace_taskinfo_save, false);
The above will never be cleared, and this callback will constantly be
called, slowing down the tracer.
> +}
> +
> +/**
> + * tracing_record_taskinfo_sched_switch - record task info for sched_switch
> + *
> + * @prev - previous task during sched_switch
> + * @next - next task during sched_switch
> + * @flags - TRACE_RECORD_CMDLINE for recording comm
> + * TRACE_RECORD_TGID for recording tgid
> + */
> +void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
> + struct task_struct *next, int flags)
> {
> - if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
> + if (tracing_record_taskinfo_skip(flags))
> + return;
> +
> + if ((flags & TRACE_RECORD_CMDLINE) &&
> + (!trace_save_cmdline(prev) || !trace_save_cmdline(next)))
> return;
>
> - if (!__this_cpu_read(trace_cmdline_save))
> + if ((flags & TRACE_RECORD_TGID) &&
> + (!trace_save_tgid(prev) || !trace_save_tgid(next)))
On failed allocation, this will return here.
> return;
>
> - if (trace_save_cmdline(tsk))
> - __this_cpu_write(trace_cmdline_save, false);
> + __this_cpu_write(trace_taskinfo_save, false);
This will not be cleared.
> +}
> +
> +/* Helpers to record a specific task information */
> +void tracing_record_cmdline(struct task_struct *task)
> +{
> + tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
> +}
> +
> +void tracing_record_tgid(struct task_struct *task)
> +{
> + tracing_record_taskinfo(task, TRACE_RECORD_TGID);
> }
>
> /*
> @@ -3146,7 +3229,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
> #endif
>
> if (!iter->snapshot)
> - atomic_inc(&trace_record_cmdline_disabled);
> + atomic_inc(&trace_record_taskinfo_disabled);
>
> if (*pos != iter->pos) {
> iter->ent = NULL;
> @@ -3191,7 +3274,7 @@ static void s_stop(struct seq_file *m, void *p)
> #endif
>
> if (!iter->snapshot)
> - atomic_dec(&trace_record_cmdline_disabled);
> + atomic_dec(&trace_record_taskinfo_disabled);
>
> trace_access_unlock(iter->cpu_file);
> trace_event_read_unlock();
> @@ -4238,6 +4321,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
> if (mask == TRACE_ITER_RECORD_CMD)
> trace_event_enable_cmd_record(enabled);
>
> + if (mask == TRACE_ITER_RECORD_TGID)
> + trace_event_enable_tgid_record(enabled);
> +
> if (mask == TRACE_ITER_EVENT_FORK)
> trace_event_follow_fork(tr, enabled);
>
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 39fd77330aab..90318b016fd9 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -635,8 +635,13 @@ void trace_graph_return(struct ftrace_graph_ret *trace);
> int trace_graph_entry(struct ftrace_graph_ent *trace);
> void set_graph_array(struct trace_array *tr);
>
> +extern int *tgid_map;
> +void tracing_alloc_tgid_map(void);
> void tracing_start_cmdline_record(void);
> void tracing_stop_cmdline_record(void);
> +void tracing_start_tgid_record(void);
> +void tracing_stop_tgid_record(void);
> +
> int register_tracer(struct tracer *type);
> int is_tracing_stopped(void);
>
> @@ -697,6 +702,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
> extern u64 ftrace_now(int cpu);
>
> extern void trace_find_cmdline(int pid, char comm[]);
> +extern int trace_find_tgid(int pid);
> extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
>
> #ifdef CONFIG_DYNAMIC_FTRACE
> @@ -1107,6 +1113,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
> C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \
> C(LATENCY_FMT, "latency-format"), \
> C(RECORD_CMD, "record-cmd"), \
> + C(RECORD_TGID, "record-tgid"), \
> C(OVERWRITE, "overwrite"), \
> C(STOP_ON_FREE, "disable_on_free"), \
> C(IRQ_INFO, "irq-info"), \
> @@ -1423,6 +1430,8 @@ struct ftrace_event_field *
> trace_find_event_field(struct trace_event_call *call, char *name);
>
> extern void trace_event_enable_cmd_record(bool enable);
> +extern void trace_event_enable_tgid_record(bool enable);
> +
> extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
> extern int event_trace_del_tracer(struct trace_array *tr);
>
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index e7973e10398c..240c6df95ea6 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -343,6 +343,28 @@ void trace_event_enable_cmd_record(bool enable)
> mutex_unlock(&event_mutex);
> }
>
> +void trace_event_enable_tgid_record(bool enable)
This should return a value.
> +{
> + struct trace_event_file *file;
> + struct trace_array *tr;
> +
> + mutex_lock(&event_mutex);
> + do_for_each_event_file(tr, file) {
> + if (!(file->flags & EVENT_FILE_FL_ENABLED))
> + continue;
> +
> + if (enable) {
> + tracing_start_tgid_record();
If we fail to start, the bit should not be set, and we should return
failure. Note, it can only fail on the first try, as once it is
allocated, you don't need to worry about it failing. Thus, if it fails,
break out of the loop now and return failure.
> + set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
> + } else {
> + tracing_stop_tgid_record();
> + clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT,
> + &file->flags);
> + }
> + } while_for_each_event_file();
> + mutex_unlock(&event_mutex);
> +}
> +
> static int __ftrace_event_enable_disable(struct trace_event_file *file,
> int enable, int soft_disable)
> {
> @@ -381,6 +403,12 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
> tracing_stop_cmdline_record();
> clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
> }
> +
> + if (file->flags & EVENT_FILE_FL_RECORDED_TGID) {
> + tracing_stop_tgid_record();
> + clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
> + }
> +
> call->class->reg(call, TRACE_REG_UNREGISTER, file);
> }
> /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
> @@ -407,18 +435,30 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
> }
>
> if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
> + bool cmd = false, tgid = false;
>
> /* Keep the event disabled, when going to SOFT_MODE. */
> if (soft_disable)
> set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
>
> if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
> + cmd = true;
> tracing_start_cmdline_record();
> set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
> }
> +
> + if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
> + tgid = true;
> + tracing_start_tgid_record();
> + set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
> + }
> +
> ret = call->class->reg(call, TRACE_REG_REGISTER, file);
> if (ret) {
> - tracing_stop_cmdline_record();
> + if (cmd)
> + tracing_stop_cmdline_record();
> + if (tgid)
> + tracing_stop_tgid_record();
> pr_info("event trace: Could not enable event "
> "%s\n", trace_event_name(call));
> break;
> diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
> index 4c896a0101bd..75a0c3accf2a 100644
> --- a/kernel/trace/trace_sched_switch.c
> +++ b/kernel/trace/trace_sched_switch.c
> @@ -12,27 +12,38 @@
>
> #include "trace.h"
>
> -static int sched_ref;
> +#define RECORD_CMDLINE 1
> +#define RECORD_TGID 2
> +
> +static int sched_cmdline_ref;
> +static int sched_tgid_ref;
> static DEFINE_MUTEX(sched_register_mutex);
>
> static void
> probe_sched_switch(void *ignore, bool preempt,
> struct task_struct *prev, struct task_struct *next)
> {
> - if (unlikely(!sched_ref))
> - return;
> + int flags;
> +
> + flags = (RECORD_TGID * !!sched_tgid_ref) +
> + (RECORD_CMDLINE * !!sched_cmdline_ref);
>
> - tracing_record_cmdline(prev);
> - tracing_record_cmdline(next);
> + if (!flags)
> + return;
> + tracing_record_taskinfo_sched_switch(prev, next, flags);
> }
>
> static void
> probe_sched_wakeup(void *ignore, struct task_struct *wakee)
> {
> - if (unlikely(!sched_ref))
> - return;
> + int flags;
> +
> + flags = (RECORD_TGID * !!sched_tgid_ref) +
> + (RECORD_CMDLINE * !!sched_cmdline_ref);
>
> - tracing_record_cmdline(current);
> + if (!flags)
> + return;
> + tracing_record_taskinfo(current, flags);
> }
>
> static int tracing_sched_register(void)
> @@ -75,28 +86,64 @@ static void tracing_sched_unregister(void)
> unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
> }
>
> -static void tracing_start_sched_switch(void)
> +static void tracing_start_sched_switch(int ops)
> {
> + bool sched_register = (!sched_cmdline_ref && !sched_tgid_ref);
> mutex_lock(&sched_register_mutex);
> - if (!(sched_ref++))
> +
> + switch (ops) {
> + case RECORD_CMDLINE:
> + sched_cmdline_ref++;
> + break;
> +
> + case RECORD_TGID:
> + sched_tgid_ref++;
> + break;
> + }
> +
> + if (sched_tgid_ref > 0 && !tgid_map)
> + tracing_alloc_tgid_map();
Should have this check for failure as well, and clean up if there is.
-- Steve
> +
> + if (sched_register && (sched_cmdline_ref || sched_tgid_ref))
> tracing_sched_register();
> mutex_unlock(&sched_register_mutex);
> }
>
> -static void tracing_stop_sched_switch(void)
> +static void tracing_stop_sched_switch(int ops)
> {
> mutex_lock(&sched_register_mutex);
> - if (!(--sched_ref))
> +
> + switch (ops) {
> + case RECORD_CMDLINE:
> + sched_cmdline_ref--;
> + break;
> +
> + case RECORD_TGID:
> + sched_tgid_ref--;
> + break;
> + }
> +
> + if (!sched_cmdline_ref && !sched_tgid_ref)
> tracing_sched_unregister();
> mutex_unlock(&sched_register_mutex);
> }
>
> void tracing_start_cmdline_record(void)
> {
> - tracing_start_sched_switch();
> + tracing_start_sched_switch(RECORD_CMDLINE);
> }
>
> void tracing_stop_cmdline_record(void)
> {
> - tracing_stop_sched_switch();
> + tracing_stop_sched_switch(RECORD_CMDLINE);
> +}
> +
> +void tracing_start_tgid_record(void)
> +{
> + tracing_start_sched_switch(RECORD_TGID);
> +}
> +
> +void tracing_stop_tgid_record(void)
> +{
> + tracing_stop_sched_switch(RECORD_TGID);
> }
On Mon, Jun 26, 2017 at 9:52 AM, Steven Rostedt <[email protected]> wrote:
> On Sun, 25 Jun 2017 22:38:42 -0700
> Joel Fernandes <[email protected]> wrote:
>
>> +int *tgid_map;
>> +
>> +void tracing_alloc_tgid_map(void)
>> +{
>> + if (tgid_map)
>> + return;
>> +
>> + tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
>> + GFP_KERNEL);
>> + WARN_ONCE(!tgid_map, "Allocation of tgid_map failed\n");
>
> This needs to return a value. If it fails to allocate, then we must not
> set the bit to do the recording. More below about why.
>
>> +}
>> +
>> #define SAVED_CMDLINES_DEFAULT 128
>> #define NO_CMDLINE_MAP UINT_MAX
>> static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
>> @@ -1722,7 +1734,7 @@ struct saved_cmdlines_buffer {
>> static struct saved_cmdlines_buffer *savedcmd;
>>
>> /* temporary disable recording */
>> -static atomic_t trace_record_cmdline_disabled __read_mostly;
>> +static atomic_t trace_record_taskinfo_disabled __read_mostly;
>>
>> static inline char *get_saved_cmdlines(int idx)
>> {
>> @@ -1992,16 +2004,87 @@ void trace_find_cmdline(int pid, char comm[])
>> preempt_enable();
>> }
>>
>> -void tracing_record_cmdline(struct task_struct *tsk)
>> +int trace_find_tgid(int pid)
>> +{
>> + if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
>> + return 0;
>> +
>> + return tgid_map[pid];
>> +}
>> +
>> +static int trace_save_tgid(struct task_struct *tsk)
>> +{
>> + if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT))
>> + return 0;
>
> If we failed to allocate, this will always return 0.
>
>> +
>> + tgid_map[tsk->pid] = tsk->tgid;
>> + return 1;
>> +}
>> +
>> +static bool tracing_record_taskinfo_skip(int flags)
>> +{
>> + if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
>> + return true;
>> + if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
>> + return true;
>> + if (!__this_cpu_read(trace_taskinfo_save))
>> + return true;
>> + return false;
>> +}
>> +
>> +/**
>> + * tracing_record_taskinfo - record the task info of a task
>> + *
>> + * @task - task to record
>> + * @flags - TRACE_RECORD_CMDLINE for recording comm
>> + * - TRACE_RECORD_TGID for recording tgid
>> + */
>> +void tracing_record_taskinfo(struct task_struct *task, int flags)
>> +{
>> + if (tracing_record_taskinfo_skip(flags))
>> + return;
>> + if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task))
>> + return;
>> + if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task))
>
> With a failed allocation, this will return here.
>
>> + return;
>> +
>> + __this_cpu_write(trace_taskinfo_save, false);
>
> The above will never be cleared, and this callback will constantly be
> called, slowing down the tracer.
>
>
>> +}
>> +
>> +/**
>> + * tracing_record_taskinfo_sched_switch - record task info for sched_switch
>> + *
>> + * @prev - previous task during sched_switch
>> + * @next - next task during sched_switch
>> + * @flags - TRACE_RECORD_CMDLINE for recording comm
>> + * TRACE_RECORD_TGID for recording tgid
>> + */
>> +void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
>> + struct task_struct *next, int flags)
>> {
>> - if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
>> + if (tracing_record_taskinfo_skip(flags))
>> + return;
>> +
>> + if ((flags & TRACE_RECORD_CMDLINE) &&
>> + (!trace_save_cmdline(prev) || !trace_save_cmdline(next)))
>> return;
>>
>> - if (!__this_cpu_read(trace_cmdline_save))
>> + if ((flags & TRACE_RECORD_TGID) &&
>> + (!trace_save_tgid(prev) || !trace_save_tgid(next)))
>
> On failed allocation, this will return here.
>
>> return;
>>
>> - if (trace_save_cmdline(tsk))
>> - __this_cpu_write(trace_cmdline_save, false);
>> + __this_cpu_write(trace_taskinfo_save, false);
>
> This will not be cleared.
>
>> +}
>> +
>> +/* Helpers to record a specific task information */
>> +void tracing_record_cmdline(struct task_struct *task)
>> +{
>> + tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
>> +}
>> +
>> +void tracing_record_tgid(struct task_struct *task)
>> +{
>> + tracing_record_taskinfo(task, TRACE_RECORD_TGID);
>> }
>>
>> /*
>> @@ -3146,7 +3229,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
>> #endif
>>
>> if (!iter->snapshot)
>> - atomic_inc(&trace_record_cmdline_disabled);
>> + atomic_inc(&trace_record_taskinfo_disabled);
>>
>> if (*pos != iter->pos) {
>> iter->ent = NULL;
>> @@ -3191,7 +3274,7 @@ static void s_stop(struct seq_file *m, void *p)
>> #endif
>>
>> if (!iter->snapshot)
>> - atomic_dec(&trace_record_cmdline_disabled);
>> + atomic_dec(&trace_record_taskinfo_disabled);
>>
>> trace_access_unlock(iter->cpu_file);
>> trace_event_read_unlock();
>> @@ -4238,6 +4321,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
>> if (mask == TRACE_ITER_RECORD_CMD)
>> trace_event_enable_cmd_record(enabled);
>>
>> + if (mask == TRACE_ITER_RECORD_TGID)
>> + trace_event_enable_tgid_record(enabled);
>> +
>> if (mask == TRACE_ITER_EVENT_FORK)
>> trace_event_follow_fork(tr, enabled);
>>
>> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
>> index 39fd77330aab..90318b016fd9 100644
>> --- a/kernel/trace/trace.h
>> +++ b/kernel/trace/trace.h
>> @@ -635,8 +635,13 @@ void trace_graph_return(struct ftrace_graph_ret *trace);
>> int trace_graph_entry(struct ftrace_graph_ent *trace);
>> void set_graph_array(struct trace_array *tr);
>>
>> +extern int *tgid_map;
>> +void tracing_alloc_tgid_map(void);
>> void tracing_start_cmdline_record(void);
>> void tracing_stop_cmdline_record(void);
>> +void tracing_start_tgid_record(void);
>> +void tracing_stop_tgid_record(void);
>> +
>> int register_tracer(struct tracer *type);
>> int is_tracing_stopped(void);
>>
>> @@ -697,6 +702,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
>> extern u64 ftrace_now(int cpu);
>>
>> extern void trace_find_cmdline(int pid, char comm[]);
>> +extern int trace_find_tgid(int pid);
>> extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
>>
>> #ifdef CONFIG_DYNAMIC_FTRACE
>> @@ -1107,6 +1113,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
>> C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \
>> C(LATENCY_FMT, "latency-format"), \
>> C(RECORD_CMD, "record-cmd"), \
>> + C(RECORD_TGID, "record-tgid"), \
>> C(OVERWRITE, "overwrite"), \
>> C(STOP_ON_FREE, "disable_on_free"), \
>> C(IRQ_INFO, "irq-info"), \
>> @@ -1423,6 +1430,8 @@ struct ftrace_event_field *
>> trace_find_event_field(struct trace_event_call *call, char *name);
>>
>> extern void trace_event_enable_cmd_record(bool enable);
>> +extern void trace_event_enable_tgid_record(bool enable);
>> +
>> extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
>> extern int event_trace_del_tracer(struct trace_array *tr);
>>
>> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
>> index e7973e10398c..240c6df95ea6 100644
>> --- a/kernel/trace/trace_events.c
>> +++ b/kernel/trace/trace_events.c
>> @@ -343,6 +343,28 @@ void trace_event_enable_cmd_record(bool enable)
>> mutex_unlock(&event_mutex);
>> }
>>
>> +void trace_event_enable_tgid_record(bool enable)
>
> This should return a value.
>
>> +{
>> + struct trace_event_file *file;
>> + struct trace_array *tr;
>> +
>> + mutex_lock(&event_mutex);
>> + do_for_each_event_file(tr, file) {
>> + if (!(file->flags & EVENT_FILE_FL_ENABLED))
>> + continue;
>> +
>> + if (enable) {
>> + tracing_start_tgid_record();
>
> If we fail to start, the bit should not be set, and we should return
> failure. Note, it can only fail on the first try, as once it is
> allocated, you don't need to worry about it failing. Thus, if it fails,
> break out of the loop now and return failure.
>
That seems Ok with me to do, I think a valid point.
I think that I should do it in the second call to
tracing_start_tgid_record too then (__ftrace_event_enable_disable) to
error out if the allocation fails.
While going this code I again, I noticed another potential issue in
__ftrace_event_enable_disable
On Mon, 26 Jun 2017 12:02:12 -0700
Joel Fernandes <[email protected]> wrote:
> On Mon, Jun 26, 2017 at 9:52 AM, Steven Rostedt <[email protected]> wrote:
> > On Sun, 25 Jun 2017 22:38:42 -0700
> > Joel Fernandes <[email protected]> wrote:
> >
> >> +int *tgid_map;
> >> +
> >> +void tracing_alloc_tgid_map(void)
> >> +{
> >> + if (tgid_map)
> >> + return;
> >> +
> >> + tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
> >> + GFP_KERNEL);
> >> + WARN_ONCE(!tgid_map, "Allocation of tgid_map failed\n");
> >
> > This needs to return a value. If it fails to allocate, then we must not
> > set the bit to do the recording. More below about why.
> >
> >> +}
> >> +
> >> #define SAVED_CMDLINES_DEFAULT 128
> >> #define NO_CMDLINE_MAP UINT_MAX
> >> static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
> >> @@ -1722,7 +1734,7 @@ struct saved_cmdlines_buffer {
> >> static struct saved_cmdlines_buffer *savedcmd;
> >>
> >> /* temporary disable recording */
> >> -static atomic_t trace_record_cmdline_disabled __read_mostly;
> >> +static atomic_t trace_record_taskinfo_disabled __read_mostly;
> >>
> >> static inline char *get_saved_cmdlines(int idx)
> >> {
> >> @@ -1992,16 +2004,87 @@ void trace_find_cmdline(int pid, char comm[])
> >> preempt_enable();
> >> }
> >>
> >> -void tracing_record_cmdline(struct task_struct *tsk)
> >> +int trace_find_tgid(int pid)
> >> +{
> >> + if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
> >> + return 0;
> >> +
> >> + return tgid_map[pid];
> >> +}
> >> +
> >> +static int trace_save_tgid(struct task_struct *tsk)
> >> +{
> >> + if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT))
> >> + return 0;
> >
> > If we failed to allocate, this will always return 0.
> >
> >> +
> >> + tgid_map[tsk->pid] = tsk->tgid;
> >> + return 1;
> >> +}
> >> +
> >> +static bool tracing_record_taskinfo_skip(int flags)
> >> +{
> >> + if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
> >> + return true;
> >> + if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
> >> + return true;
> >> + if (!__this_cpu_read(trace_taskinfo_save))
> >> + return true;
> >> + return false;
> >> +}
> >> +
> >> +/**
> >> + * tracing_record_taskinfo - record the task info of a task
> >> + *
> >> + * @task - task to record
> >> + * @flags - TRACE_RECORD_CMDLINE for recording comm
> >> + * - TRACE_RECORD_TGID for recording tgid
> >> + */
> >> +void tracing_record_taskinfo(struct task_struct *task, int flags)
> >> +{
> >> + if (tracing_record_taskinfo_skip(flags))
> >> + return;
> >> + if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task))
> >> + return;
> >> + if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task))
> >
> > With a failed allocation, this will return here.
> >
> >> + return;
> >> +
> >> + __this_cpu_write(trace_taskinfo_save, false);
> >
> > The above will never be cleared, and this callback will constantly be
> > called, slowing down the tracer.
> >
> >
> >> +}
> >> +
> >> +/**
> >> + * tracing_record_taskinfo_sched_switch - record task info for sched_switch
> >> + *
> >> + * @prev - previous task during sched_switch
> >> + * @next - next task during sched_switch
> >> + * @flags - TRACE_RECORD_CMDLINE for recording comm
> >> + * TRACE_RECORD_TGID for recording tgid
> >> + */
> >> +void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
> >> + struct task_struct *next, int flags)
> >> {
> >> - if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
> >> + if (tracing_record_taskinfo_skip(flags))
> >> + return;
> >> +
> >> + if ((flags & TRACE_RECORD_CMDLINE) &&
> >> + (!trace_save_cmdline(prev) || !trace_save_cmdline(next)))
> >> return;
> >>
> >> - if (!__this_cpu_read(trace_cmdline_save))
> >> + if ((flags & TRACE_RECORD_TGID) &&
> >> + (!trace_save_tgid(prev) || !trace_save_tgid(next)))
> >
> > On failed allocation, this will return here.
> >
> >> return;
> >>
> >> - if (trace_save_cmdline(tsk))
> >> - __this_cpu_write(trace_cmdline_save, false);
> >> + __this_cpu_write(trace_taskinfo_save, false);
> >
> > This will not be cleared.
> >
> >> +}
> >> +
> >> +/* Helpers to record a specific task information */
> >> +void tracing_record_cmdline(struct task_struct *task)
> >> +{
> >> + tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
> >> +}
> >> +
> >> +void tracing_record_tgid(struct task_struct *task)
> >> +{
> >> + tracing_record_taskinfo(task, TRACE_RECORD_TGID);
> >> }
> >>
> >> /*
> >> @@ -3146,7 +3229,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
> >> #endif
> >>
> >> if (!iter->snapshot)
> >> - atomic_inc(&trace_record_cmdline_disabled);
> >> + atomic_inc(&trace_record_taskinfo_disabled);
> >>
> >> if (*pos != iter->pos) {
> >> iter->ent = NULL;
> >> @@ -3191,7 +3274,7 @@ static void s_stop(struct seq_file *m, void *p)
> >> #endif
> >>
> >> if (!iter->snapshot)
> >> - atomic_dec(&trace_record_cmdline_disabled);
> >> + atomic_dec(&trace_record_taskinfo_disabled);
> >>
> >> trace_access_unlock(iter->cpu_file);
> >> trace_event_read_unlock();
> >> @@ -4238,6 +4321,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
> >> if (mask == TRACE_ITER_RECORD_CMD)
> >> trace_event_enable_cmd_record(enabled);
> >>
> >> + if (mask == TRACE_ITER_RECORD_TGID)
> >> + trace_event_enable_tgid_record(enabled);
> >> +
> >> if (mask == TRACE_ITER_EVENT_FORK)
> >> trace_event_follow_fork(tr, enabled);
> >>
> >> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> >> index 39fd77330aab..90318b016fd9 100644
> >> --- a/kernel/trace/trace.h
> >> +++ b/kernel/trace/trace.h
> >> @@ -635,8 +635,13 @@ void trace_graph_return(struct ftrace_graph_ret *trace);
> >> int trace_graph_entry(struct ftrace_graph_ent *trace);
> >> void set_graph_array(struct trace_array *tr);
> >>
> >> +extern int *tgid_map;
> >> +void tracing_alloc_tgid_map(void);
> >> void tracing_start_cmdline_record(void);
> >> void tracing_stop_cmdline_record(void);
> >> +void tracing_start_tgid_record(void);
> >> +void tracing_stop_tgid_record(void);
> >> +
> >> int register_tracer(struct tracer *type);
> >> int is_tracing_stopped(void);
> >>
> >> @@ -697,6 +702,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
> >> extern u64 ftrace_now(int cpu);
> >>
> >> extern void trace_find_cmdline(int pid, char comm[]);
> >> +extern int trace_find_tgid(int pid);
> >> extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
> >>
> >> #ifdef CONFIG_DYNAMIC_FTRACE
> >> @@ -1107,6 +1113,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
> >> C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \
> >> C(LATENCY_FMT, "latency-format"), \
> >> C(RECORD_CMD, "record-cmd"), \
> >> + C(RECORD_TGID, "record-tgid"), \
> >> C(OVERWRITE, "overwrite"), \
> >> C(STOP_ON_FREE, "disable_on_free"), \
> >> C(IRQ_INFO, "irq-info"), \
> >> @@ -1423,6 +1430,8 @@ struct ftrace_event_field *
> >> trace_find_event_field(struct trace_event_call *call, char *name);
> >>
> >> extern void trace_event_enable_cmd_record(bool enable);
> >> +extern void trace_event_enable_tgid_record(bool enable);
> >> +
> >> extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
> >> extern int event_trace_del_tracer(struct trace_array *tr);
> >>
> >> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> >> index e7973e10398c..240c6df95ea6 100644
> >> --- a/kernel/trace/trace_events.c
> >> +++ b/kernel/trace/trace_events.c
> >> @@ -343,6 +343,28 @@ void trace_event_enable_cmd_record(bool enable)
> >> mutex_unlock(&event_mutex);
> >> }
> >>
> >> +void trace_event_enable_tgid_record(bool enable)
> >
> > This should return a value.
> >
> >> +{
> >> + struct trace_event_file *file;
> >> + struct trace_array *tr;
> >> +
> >> + mutex_lock(&event_mutex);
> >> + do_for_each_event_file(tr, file) {
> >> + if (!(file->flags & EVENT_FILE_FL_ENABLED))
> >> + continue;
> >> +
> >> + if (enable) {
> >> + tracing_start_tgid_record();
> >
> > If we fail to start, the bit should not be set, and we should return
> > failure. Note, it can only fail on the first try, as once it is
> > allocated, you don't need to worry about it failing. Thus, if it fails,
> > break out of the loop now and return failure.
> >
>
> That seems Ok with me to do, I think a valid point.
>
> I think that I should do it in the second call to
> tracing_start_tgid_record too then (__ftrace_event_enable_disable) to
> error out if the allocation fails.
>
> While going this code I again, I noticed another potential issue in
> __ftrace_event_enable_disable
set_tracer
On Mon, Jun 26, 2017 at 12:02 PM, Joel Fernandes <[email protected]> wrote:
> On Mon, Jun 26, 2017 at 9:52 AM, Steven Rostedt <[email protected]> wrote:
>> On Sun, 25 Jun 2017 22:38:42 -0700
[..]
>>
>>> +{
>>> + struct trace_event_file *file;
>>> + struct trace_array *tr;
>>> +
>>> + mutex_lock(&event_mutex);
>>> + do_for_each_event_file(tr, file) {
>>> + if (!(file->flags & EVENT_FILE_FL_ENABLED))
>>> + continue;
>>> +
>>> + if (enable) {
>>> + tracing_start_tgid_record();
>>
>> If we fail to start, the bit should not be set, and we should return
>> failure. Note, it can only fail on the first try, as once it is
>> allocated, you don't need to worry about it failing. Thus, if it fails,
>> break out of the loop now and return failure.
>>
>
> That seems Ok with me to do, I think a valid point.
>
> I think that I should do it in the second call to
> tracing_start_tgid_record too then (__ftrace_event_enable_disable) to
> error out if the allocation fails.
>
> While going this code I again, I noticed another potential issue in
> __ftrace_event_enable_disable
Sorry I hit send too soon!
I was saying, While going through this code again I noticed another
potential issue in __ftrace_event_enable_disable in existing code:
Here we do:
ret = call->class->reg(call, TRACE_REG_REGISTER, file);
But in the error handling path we only do a
tracing_stop_cmdline_record() and not clear the
EVENT_FILE_FL_RECORDED_CMD_BIT flag. Is that not an issue? I am
guessing because enabling of the event itself will fail, that's Ok.
But just to keep it consistent, I am thinking if we should just clear
the bit here.
Thanks,
Joel
On Mon, 26 Jun 2017 12:02:12 -0700
Joel Fernandes <[email protected]> wrote:
> >> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> >> index e7973e10398c..240c6df95ea6 100644
> >> --- a/kernel/trace/trace_events.c
> >> +++ b/kernel/trace/trace_events.c
> >> @@ -343,6 +343,28 @@ void trace_event_enable_cmd_record(bool enable)
> >> mutex_unlock(&event_mutex);
> >> }
> >>
> >> +void trace_event_enable_tgid_record(bool enable)
> >
> > This should return a value.
> >
> >> +{
> >> + struct trace_event_file *file;
> >> + struct trace_array *tr;
> >> +
> >> + mutex_lock(&event_mutex);
> >> + do_for_each_event_file(tr, file) {
> >> + if (!(file->flags & EVENT_FILE_FL_ENABLED))
> >> + continue;
> >> +
> >> + if (enable) {
> >> + tracing_start_tgid_record();
> >
> > If we fail to start, the bit should not be set, and we should return
> > failure. Note, it can only fail on the first try, as once it is
> > allocated, you don't need to worry about it failing. Thus, if it fails,
> > break out of the loop now and return failure.
> >
>
> That seems Ok with me to do, I think a valid point.
>
> I think that I should do it in the second call to
> tracing_start_tgid_record too then (__ftrace_event_enable_disable) to
> error out if the allocation fails.
>
> While going this code I again, I noticed another potential issue in
> __ftrace_event_enable_disable
Thinking about this more. Just allocate the array as soon as the option
is enabled, regardless if an trace event is set. That will make it a
lot simpler. If it fails to allocate, you can simply bail out with
-ENOMEM, and the setting of the option will return that. Then we can
even remove the WARN_ONCE() at allocation failure, as the user will
know what happened.
-- Steve
On Mon, Jun 26, 2017 at 12:12 PM, Steven Rostedt <[email protected]> wrote:
> On Mon, 26 Jun 2017 12:02:12 -0700
> Joel Fernandes <[email protected]> wrote:
>
>> >> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
>> >> index e7973e10398c..240c6df95ea6 100644
>> >> --- a/kernel/trace/trace_events.c
>> >> +++ b/kernel/trace/trace_events.c
>> >> @@ -343,6 +343,28 @@ void trace_event_enable_cmd_record(bool enable)
>> >> mutex_unlock(&event_mutex);
>> >> }
>> >>
>> >> +void trace_event_enable_tgid_record(bool enable)
>> >
>> > This should return a value.
>> >
>> >> +{
>> >> + struct trace_event_file *file;
>> >> + struct trace_array *tr;
>> >> +
>> >> + mutex_lock(&event_mutex);
>> >> + do_for_each_event_file(tr, file) {
>> >> + if (!(file->flags & EVENT_FILE_FL_ENABLED))
>> >> + continue;
>> >> +
>> >> + if (enable) {
>> >> + tracing_start_tgid_record();
>> >
>> > If we fail to start, the bit should not be set, and we should return
>> > failure. Note, it can only fail on the first try, as once it is
>> > allocated, you don't need to worry about it failing. Thus, if it fails,
>> > break out of the loop now and return failure.
>> >
>>
>> That seems Ok with me to do, I think a valid point.
>>
>> I think that I should do it in the second call to
>> tracing_start_tgid_record too then (__ftrace_event_enable_disable) to
>> error out if the allocation fails.
>>
>> While going this code I again, I noticed another potential issue in
>> __ftrace_event_enable_disable
>
> Thinking about this more. Just allocate the array as soon as the option
> is enabled, regardless if an trace event is set. That will make it a
> lot simpler. If it fails to allocate, you can simply bail out with
> -ENOMEM, and the setting of the option will return that. Then we can
> even remove the WARN_ONCE() at allocation failure, as the user will
> know what happened.
Agreed!
Thanks,
Joel
On Mon, 26 Jun 2017 12:10:04 -0700
Joel Fernandes <[email protected]> wrote:
> > That seems Ok with me to do, I think a valid point.
> >
> > I think that I should do it in the second call to
> > tracing_start_tgid_record too then (__ftrace_event_enable_disable) to
> > error out if the allocation fails.
> >
> > While going this code I again, I noticed another potential issue in
> > __ftrace_event_enable_disable
>
> Sorry I hit send too soon!
>
> I was saying, While going through this code again I noticed another
> potential issue in __ftrace_event_enable_disable in existing code:
>
> Here we do:
> ret = call->class->reg(call, TRACE_REG_REGISTER, file);
>
> But in the error handling path we only do a
> tracing_stop_cmdline_record() and not clear the
> EVENT_FILE_FL_RECORDED_CMD_BIT flag. Is that not an issue? I am
> guessing because enabling of the event itself will fail, that's Ok.
> But just to keep it consistent, I am thinking if we should just clear
> the bit here.
Yeah, we should probably add some better error handling for failed
setting of events. But the tgid-map is low hanging fruit to get
right ;-)
-- Steve
Hi Steven,
Please drop this patch (3/3) for now because the new allocation
rearrangement we made to (1/3) will not work with this patch because
flag_changed is called before the allocation failure case. Also TGIDs
for event tracing is the main use case for my series, the function
tracer isn't the main use case so I can post a proper version of this
patch later once we get the main pieces in.
1/3 and 2/3 are still good to go, following are you patchwork links to them
1/3: https://patchwork.kernel.org/patch/9810685/
2/3: https://patchwork.kernel.org/patch/9808641/
Incase its confusing, please let me know and I can just send you both
these patches again.
Thanks,
Joel
On Sun, Jun 25, 2017 at 10:38 PM, Joel Fernandes <[email protected]> wrote:
> Make function tracer able to record tgid if/when record-tgid is enabled.
>
> Cc: [email protected]
> Cc: Steven Rostedt <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Tested-by: Michael Sartain <[email protected]>
> Signed-off-by: Joel Fernandes <[email protected]>
> ---
> kernel/trace/trace_functions.c | 26 ++++++++++++++++++++++++++
> 1 file changed, 26 insertions(+)
>
> diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
> index a3bddbfd0874..d6bdc38ab273 100644
> --- a/kernel/trace/trace_functions.c
> +++ b/kernel/trace/trace_functions.c
> @@ -27,6 +27,7 @@ static void
> function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
> struct ftrace_ops *op, struct pt_regs *pt_regs);
> static struct tracer_flags func_flags;
> +static bool tgid_recorded;
>
> /* Our option */
> enum {
> @@ -104,6 +105,11 @@ static int function_trace_init(struct trace_array *tr)
> put_cpu();
>
> tracing_start_cmdline_record();
> +
> + if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
> + tgid_recorded = true;
> + tracing_start_tgid_record();
> + }
> tracing_start_function_trace(tr);
> return 0;
> }
> @@ -112,6 +118,10 @@ static void function_trace_reset(struct trace_array *tr)
> {
> tracing_stop_function_trace(tr);
> tracing_stop_cmdline_record();
> + if (tgid_recorded) {
> + tracing_stop_tgid_record();
> + tgid_recorded = false;
> + }
> ftrace_reset_array_ops(tr);
> }
>
> @@ -252,6 +262,21 @@ func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
> return 0;
> }
>
> +static int
> +func_tracer_flag_changed(struct trace_array *tr, unsigned int mask,
> + int enabled)
> +{
> + if (mask == TRACE_ITER_RECORD_TGID) {
> + tgid_recorded = !!enabled;
> + if (enabled)
> + tracing_start_tgid_record();
> + else
> + tracing_stop_tgid_record();
> + }
> +
> + return 0;
> +}
> +
> static struct tracer function_trace __tracer_data =
> {
> .name = "function",
> @@ -260,6 +285,7 @@ static struct tracer function_trace __tracer_data =
> .start = function_trace_start,
> .flags = &func_flags,
> .set_flag = func_set_flag,
> + .flag_changed = func_tracer_flag_changed,
> .allow_instances = true,
> #ifdef CONFIG_FTRACE_SELFTEST
> .selftest = trace_selftest_startup_function,
> --
> 2.13.1.611.g7e3b11ae1-goog
>