Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753644AbdFMSwc (ORCPT ); Tue, 13 Jun 2017 14:52:32 -0400 Received: from mail.kernel.org ([198.145.29.99]:39698 "EHLO mail.kernel.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752452AbdFMSwb (ORCPT ); Tue, 13 Jun 2017 14:52:31 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 6F430214D7 Authentication-Results: mail.kernel.org; dmarc=none (p=none dis=none) header.from=goodmis.org Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=rostedt@goodmis.org Date: Tue, 13 Jun 2017 14:52:27 -0400 From: Steven Rostedt To: Joel Fernandes Cc: linux-kernel@vger.kernel.org, mikesart@gmail.com, kernel-team@android.com, Ingo Molnar Subject: Re: [PATCH RFC v3 2/4] tracing: Add support for recording tgid of tasks Message-ID: <20170613145227.6cc9d1d7@gandalf.local.home> In-Reply-To: <20170609025327.9508-3-joelaf@google.com> References: <20170609025327.9508-1-joelaf@google.com> <20170609025327.9508-3-joelaf@google.com> X-Mailer: Claws Mail 3.14.0 (GTK+ 2.24.31; x86_64-pc-linux-gnu) MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 17892 Lines: 561 On Thu, 8 Jun 2017 19:53:25 -0700 Joel Fernandes wrote: > Inorder to support recording of tgid, the following changes are made: > > * Introduce a new API (tracing_record_taskinfo) to additionally record the tgid > along with the task's comm at the same time. The API is flexible to > optionally also record for a group of tasks which is useful in recording the > info of multiple tasks (such as when recording previous and next task in a > sched_switch). Along with simplifying the API and the callers, this approach > also has the benefit of not setting trace_cmdline_save before information for > multiple tasks are saved. Hmm, I'm not so sure I like the above. > * We preserve the old API (tracing_record_cmdline) and create it as a wrapper > around the new one. Along with this we add a tracing_record_tgid function. > * Reuse the existing sched_switch and sched_wakeup probes Add a new option > 'record-tgid' to enable recording of tgid > > When record-tgid option isn't enabled to begin with, we take care to make sure > that there's isn't memory or runtime overhead. > > Cc: kernel-team@android.com > Cc: Steven Rostedt > Cc: Ingo Molnar > Tested-by: Michael Sartain > Signed-off-by: Joel Fernandes > --- > include/linux/trace_events.h | 12 ++++- > kernel/trace/trace.c | 102 ++++++++++++++++++++++++++++++++++---- > kernel/trace/trace.h | 8 +++ > kernel/trace/trace_events.c | 42 +++++++++++++++- > kernel/trace/trace_sched_switch.c | 79 +++++++++++++++++++++++------ > 5 files changed, 217 insertions(+), 26 deletions(-) > > diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h > index a556805eff8a..6c9e84b1f8ae 100644 > --- a/include/linux/trace_events.h > +++ b/include/linux/trace_events.h > @@ -151,7 +151,14 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer, > int type, unsigned long len, > unsigned long flags, int pc); > > -void tracing_record_cmdline(struct task_struct *tsk); > +#define TRACE_RECORD_CMDLINE BIT(0) > +#define TRACE_RECORD_TGID BIT(1) > + > +void tracing_record_taskinfo(struct task_struct **tasks, int len, int flags); > +void tracing_record_taskinfo_single(struct task_struct *task, int flags); > + > +void tracing_record_cmdline(struct task_struct *task); > +void tracing_record_tgid(struct task_struct *task); > > int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...); > > @@ -290,6 +297,7 @@ struct trace_subsystem_dir; > enum { > EVENT_FILE_FL_ENABLED_BIT, > EVENT_FILE_FL_RECORDED_CMD_BIT, > + EVENT_FILE_FL_RECORDED_TGID_BIT, > EVENT_FILE_FL_FILTERED_BIT, > EVENT_FILE_FL_NO_SET_FILTER_BIT, > EVENT_FILE_FL_SOFT_MODE_BIT, > @@ -303,6 +311,7 @@ enum { > * Event file flags: > * ENABLED - The event is enabled > * RECORDED_CMD - The comms should be recorded at sched_switch > + * RECORDED_TGID - The tgids should be recorded at sched_switch > * FILTERED - The event has a filter attached > * NO_SET_FILTER - Set when filter has error and is to be ignored > * SOFT_MODE - The event is enabled/disabled by SOFT_DISABLED > @@ -315,6 +324,7 @@ enum { > enum { > EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), > EVENT_FILE_FL_RECORDED_CMD = (1 << EVENT_FILE_FL_RECORDED_CMD_BIT), > + EVENT_FILE_FL_RECORDED_TGID = (1 << EVENT_FILE_FL_RECORDED_TGID_BIT), > EVENT_FILE_FL_FILTERED = (1 << EVENT_FILE_FL_FILTERED_BIT), > EVENT_FILE_FL_NO_SET_FILTER = (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT), > EVENT_FILE_FL_SOFT_MODE = (1 << EVENT_FILE_FL_SOFT_MODE_BIT), > diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c > index 63deff9cdf2c..185a09f8b67e 100644 > --- a/kernel/trace/trace.c > +++ b/kernel/trace/trace.c > @@ -87,7 +87,7 @@ dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) > * tracing is active, only save the comm when a trace event > * occurred. > */ > -static DEFINE_PER_CPU(bool, trace_cmdline_save); > +static DEFINE_PER_CPU(bool, trace_taskinfo_save); > > /* > * Kill all tracing for good (never come back). > @@ -790,7 +790,7 @@ EXPORT_SYMBOL_GPL(tracing_on); > static __always_inline void > __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) > { > - __this_cpu_write(trace_cmdline_save, true); > + __this_cpu_write(trace_taskinfo_save, true); > > /* If this is the temp buffer, we need to commit fully */ > if (this_cpu_read(trace_buffered_event) == event) { > @@ -1709,6 +1709,18 @@ void tracing_reset_all_online_cpus(void) > } > } > > +static int *tgid_map; > + > +void tracing_alloc_tgid_map(void) > +{ > + if (tgid_map) > + return; > + > + tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map), > + GFP_KERNEL); > + WARN_ONCE(!tgid_map, "Allocation of tgid_map failed\n"); > +} > + > #define SAVED_CMDLINES_DEFAULT 128 > #define NO_CMDLINE_MAP UINT_MAX > static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; > @@ -1722,7 +1734,7 @@ struct saved_cmdlines_buffer { > static struct saved_cmdlines_buffer *savedcmd; > > /* temporary disable recording */ > -static atomic_t trace_record_cmdline_disabled __read_mostly; > +static atomic_t trace_record_taskinfo_disabled __read_mostly; > > static inline char *get_saved_cmdlines(int idx) > { > @@ -1990,16 +2002,83 @@ void trace_find_cmdline(int pid, char comm[]) > preempt_enable(); > } > > -void tracing_record_cmdline(struct task_struct *tsk) > +int trace_find_tgid(int pid) > +{ > + if (!tgid_map || pid <= 0 || pid > PID_MAX_DEFAULT) > + return 0; > + > + return tgid_map[pid]; > +} > + > +static int trace_save_tgid(struct task_struct *tsk) > { > - if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on()) > + if (!tgid_map || tsk->pid <= 0 || tsk->pid > PID_MAX_DEFAULT) > + return 0; > + > + tgid_map[tsk->pid] = tsk->tgid; > + return 1; > +} > + > +/** > + * tracing_record_taskinfo - record the task information of multiple tasks > + * > + * @tasks - list of tasks (array of task_struct pointers) > + * @len - length of the list of tasks > + * @flags - TRACE_RECORD_CMDLINE for recording comm > + * TRACE_RECORD_TGID for recording tgid > + */ > +void tracing_record_taskinfo(struct task_struct **tasks, int len, int flags) > +{ > + int i; > + bool cmdline, tgid; > + > + tgid = !!(flags & TRACE_RECORD_TGID); > + cmdline = !!(flags & TRACE_RECORD_CMDLINE); > + > + if (unlikely(!cmdline && !tgid)) > + return; This would be better to move this above the assignment of tgid and cmdline and just have: if (unlikely(!(flags & (TRACE_RECORD_TGID | TRACE_RECORD_CMDLINE)) return; Also, no need for the !! in the assignments. They are already marked as boolean. A simple: tgid = flags & TRACE_RECORDC_TGID; would suffice. > + > + if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on()) > return; > > - if (!__this_cpu_read(trace_cmdline_save)) > + if (!__this_cpu_read(trace_taskinfo_save)) > return; You can move the assignments after the above too. gcc *should* optimize it anyway. But I like to have all exit conditions first before we add other code, which includes assignments. > > - if (trace_save_cmdline(tsk)) > - __this_cpu_write(trace_cmdline_save, false); > + for (i = 0; i < len; i++) { > + if (cmdline && !trace_save_cmdline(tasks[i])) > + break; > + if (tgid && !trace_save_tgid(tasks[i])) > + break; > + } I really do not like this loop in the hot path. I say keep this as a single code as default, and add a tracing_record_taskinfo_multi(), as that's the exception (I only see one user compared to 3 users of single). You can add a helper function that is static __always_inline that can be used by both. But please, no loops for the single case. Actually, since there's only one multi user case, and it's always 2, I would say nuke the loop for that one and have a: tracing_record_taskinfo_sched_switch() that takes a prev and a next, and updates both. I don't envision more than 2, and we don't need this to be robust for a use case that doesn't exist by sacrificing performance. > + > + if (i == len) > + __this_cpu_write(trace_taskinfo_save, false); > +} > + > +/** > + * tracing_record_taskinfo_single - record the task info of single task > + * > + * @task - task to record > + * @flags - TRACE_RECORD_CMDLINE for recording comm > + * - TRACE_RECORD_TGID for recording tgid > + */ > +void tracing_record_taskinfo_single(struct task_struct *task, int flags) Like I said above. Nuke this. Have single be the default, and a special case for sched_switch. > +{ > + struct task_struct *tasks[1]; > + > + tasks[0] = task; > + tracing_record_taskinfo(tasks, 1, flags); > +} > + > +/* Helpers to record a specific task information */ > +void tracing_record_cmdline(struct task_struct *task) > +{ > + tracing_record_taskinfo_single(task, TRACE_RECORD_CMDLINE); > +} > + > +void tracing_record_tgid(struct task_struct *task) > +{ > + tracing_record_taskinfo_single(task, TRACE_RECORD_TGID); > } > > /* > @@ -3144,7 +3223,7 @@ static void *s_start(struct seq_file *m, loff_t *pos) > #endif > > if (!iter->snapshot) > - atomic_inc(&trace_record_cmdline_disabled); > + atomic_inc(&trace_record_taskinfo_disabled); > > if (*pos != iter->pos) { > iter->ent = NULL; > @@ -3189,7 +3268,7 @@ static void s_stop(struct seq_file *m, void *p) > #endif > > if (!iter->snapshot) > - atomic_dec(&trace_record_cmdline_disabled); > + atomic_dec(&trace_record_taskinfo_disabled); > > trace_access_unlock(iter->cpu_file); > trace_event_read_unlock(); > @@ -4236,6 +4315,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) > if (mask == TRACE_ITER_RECORD_CMD) > trace_event_enable_cmd_record(enabled); > > + if (mask == TRACE_ITER_RECORD_TGID) > + trace_event_enable_tgid_record(enabled); > + > if (mask == TRACE_ITER_EVENT_FORK) > trace_event_follow_fork(tr, enabled); > > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h > index 39fd77330aab..ecc963e25266 100644 > --- a/kernel/trace/trace.h > +++ b/kernel/trace/trace.h > @@ -635,8 +635,12 @@ void trace_graph_return(struct ftrace_graph_ret *trace); > int trace_graph_entry(struct ftrace_graph_ent *trace); > void set_graph_array(struct trace_array *tr); > > +void tracing_alloc_tgid_map(void); > void tracing_start_cmdline_record(void); > void tracing_stop_cmdline_record(void); > +void tracing_start_tgid_record(void); > +void tracing_stop_tgid_record(void); > + > int register_tracer(struct tracer *type); > int is_tracing_stopped(void); > > @@ -697,6 +701,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags, > extern u64 ftrace_now(int cpu); > > extern void trace_find_cmdline(int pid, char comm[]); > +extern int trace_find_tgid(int pid); > extern void trace_event_follow_fork(struct trace_array *tr, bool enable); > > #ifdef CONFIG_DYNAMIC_FTRACE > @@ -1107,6 +1112,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, > C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \ > C(LATENCY_FMT, "latency-format"), \ > C(RECORD_CMD, "record-cmd"), \ > + C(RECORD_TGID, "record-tgid"), \ > C(OVERWRITE, "overwrite"), \ > C(STOP_ON_FREE, "disable_on_free"), \ > C(IRQ_INFO, "irq-info"), \ > @@ -1423,6 +1429,8 @@ struct ftrace_event_field * > trace_find_event_field(struct trace_event_call *call, char *name); > > extern void trace_event_enable_cmd_record(bool enable); > +extern void trace_event_enable_tgid_record(bool enable); > + > extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr); > extern int event_trace_del_tracer(struct trace_array *tr); > > diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c > index e7973e10398c..240c6df95ea6 100644 > --- a/kernel/trace/trace_events.c > +++ b/kernel/trace/trace_events.c > @@ -343,6 +343,28 @@ void trace_event_enable_cmd_record(bool enable) > mutex_unlock(&event_mutex); > } > > +void trace_event_enable_tgid_record(bool enable) > +{ > + struct trace_event_file *file; > + struct trace_array *tr; > + > + mutex_lock(&event_mutex); > + do_for_each_event_file(tr, file) { > + if (!(file->flags & EVENT_FILE_FL_ENABLED)) > + continue; > + > + if (enable) { > + tracing_start_tgid_record(); > + set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); > + } else { > + tracing_stop_tgid_record(); > + clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, > + &file->flags); > + } > + } while_for_each_event_file(); > + mutex_unlock(&event_mutex); > +} > + > static int __ftrace_event_enable_disable(struct trace_event_file *file, > int enable, int soft_disable) > { > @@ -381,6 +403,12 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, > tracing_stop_cmdline_record(); > clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); > } > + > + if (file->flags & EVENT_FILE_FL_RECORDED_TGID) { > + tracing_stop_tgid_record(); > + clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); > + } > + > call->class->reg(call, TRACE_REG_UNREGISTER, file); > } > /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ > @@ -407,18 +435,30 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, > } > > if (!(file->flags & EVENT_FILE_FL_ENABLED)) { > + bool cmd = false, tgid = false; > > /* Keep the event disabled, when going to SOFT_MODE. */ > if (soft_disable) > set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); > > if (tr->trace_flags & TRACE_ITER_RECORD_CMD) { > + cmd = true; > tracing_start_cmdline_record(); > set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); > } > + > + if (tr->trace_flags & TRACE_ITER_RECORD_TGID) { > + tgid = true; > + tracing_start_tgid_record(); > + set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); > + } > + > ret = call->class->reg(call, TRACE_REG_REGISTER, file); > if (ret) { > - tracing_stop_cmdline_record(); > + if (cmd) > + tracing_stop_cmdline_record(); > + if (tgid) > + tracing_stop_tgid_record(); > pr_info("event trace: Could not enable event " > "%s\n", trace_event_name(call)); > break; > diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c > index 4c896a0101bd..d13f3cd4182a 100644 > --- a/kernel/trace/trace_sched_switch.c > +++ b/kernel/trace/trace_sched_switch.c > @@ -12,27 +12,41 @@ > > #include "trace.h" > > -static int sched_ref; > +#define RECORD_CMDLINE 1 > +#define RECORD_TGID 2 > + > +static int sched_cmdline_ref; > +static int sched_tgid_ref; > static DEFINE_MUTEX(sched_register_mutex); > > static void > probe_sched_switch(void *ignore, bool preempt, > struct task_struct *prev, struct task_struct *next) > { > - if (unlikely(!sched_ref)) > - return; > + int flags = 0; > + struct task_struct *tasks[2] = { prev, next }; > + > + if (sched_cmdline_ref != 0) > + flags |= RECORD_CMDLINE; > + > + if (sched_tgid_ref != 0) > + flags |= RECORD_TGID; flags = (RECORD_CMDLINE * !!(sched_cmdline_ref)) + (RECORD_TGID * !!(sched_tgid_ref)); if (!flags) return; > > - tracing_record_cmdline(prev); > - tracing_record_cmdline(next); > + tracing_record_taskinfo(tasks, 2, flags); tracing_record_taskinfo_sched_switch(prev, next, flags); > } > > static void > probe_sched_wakeup(void *ignore, struct task_struct *wakee) > { > - if (unlikely(!sched_ref)) > - return; > + int flags = 0; > + > + if (sched_cmdline_ref != 0) > + flags |= RECORD_CMDLINE; > + > + if (sched_tgid_ref != 0) > + flags |= RECORD_TGID; Same thing here. > > - tracing_record_cmdline(current); > + tracing_record_taskinfo_single(current, flags); > } > > static int tracing_sched_register(void) > @@ -75,28 +89,65 @@ static void tracing_sched_unregister(void) > unregister_trace_sched_wakeup(probe_sched_wakeup, NULL); > } > > -static void tracing_start_sched_switch(void) > +static void tracing_start_sched_switch(int ops) > { > + bool sched_register; > mutex_lock(&sched_register_mutex); > - if (!(sched_ref++)) > + sched_register = (!sched_cmdline_ref && !sched_tgid_ref); > + > + switch (ops) { > + case RECORD_CMDLINE: > + sched_cmdline_ref++; > + break; > + > + case RECORD_TGID: > + sched_tgid_ref++; > + break; > + } > + > + if (sched_tgid_ref == 1) > + tracing_alloc_tgid_map(); Since tgid_map is never freed, just do: if (!tgid_map) tracing_alloc_tgid_map(); Then if it fails to allocate this time, there's a chance that it will allocate another time. -- Steve > + > + if (sched_register && (sched_cmdline_ref || sched_tgid_ref)) > tracing_sched_register(); > mutex_unlock(&sched_register_mutex); > } > > -static void tracing_stop_sched_switch(void) > +static void tracing_stop_sched_switch(int ops) > { > mutex_lock(&sched_register_mutex); > - if (!(--sched_ref)) > + > + switch (ops) { > + case RECORD_CMDLINE: > + sched_cmdline_ref--; > + break; > + > + case RECORD_TGID: > + sched_tgid_ref--; > + break; > + } > + > + if (!sched_cmdline_ref && !sched_tgid_ref) > tracing_sched_unregister(); > mutex_unlock(&sched_register_mutex); > } > > void tracing_start_cmdline_record(void) > { > - tracing_start_sched_switch(); > + tracing_start_sched_switch(RECORD_CMDLINE); > } > > void tracing_stop_cmdline_record(void) > { > - tracing_stop_sched_switch(); > + tracing_stop_sched_switch(RECORD_CMDLINE); > +} > + > +void tracing_start_tgid_record(void) > +{ > + tracing_start_sched_switch(RECORD_TGID); > +} > + > +void tracing_stop_tgid_record(void) > +{ > + tracing_stop_sched_switch(RECORD_TGID); > }