Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755389AbYLaFAT (ORCPT ); Wed, 31 Dec 2008 00:00:19 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1750704AbYLaFAH (ORCPT ); Wed, 31 Dec 2008 00:00:07 -0500 Received: from fg-out-1718.google.com ([72.14.220.159]:20179 "EHLO fg-out-1718.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754968AbYLaFAD (ORCPT ); Wed, 31 Dec 2008 00:00:03 -0500 DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=date:from:to:cc:subject:message-id:references:mime-version :content-type:content-disposition:in-reply-to:user-agent; b=VXql3ZGGFk2U2DUcSVxMxNGKHJxcH5a8EzMkXuercmIFkL2nU5kMpJ+EsGNxZe/Vrk ckSVohym1YsuUt5W+q2JCYfG3epCFo9h21Id+RSPlp4hOhAaqzTUlaA49hxB5Jteqnb3 XaAE+oV6aimoH+RetjXSXsOs4nK8lGHFoptJ0= Date: Wed, 31 Dec 2008 05:59:57 +0100 From: Frederic Weisbecker To: Lai Jiangshan Cc: Ingo Molnar , Steven Rostedt , Linux Kernel Mailing List Subject: Re: [PATCH 2/5] ftrace: infrastructure for supporting binary record Message-ID: <20081231045956.GC28472@nowhere> References: <495ADF45.2030203@cn.fujitsu.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <495ADF45.2030203@cn.fujitsu.com> User-Agent: Mutt/1.5.18 (2008-05-17) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 10675 Lines: 379 Hi Lai. On Wed, Dec 31, 2008 at 10:56:05AM +0800, Lai Jiangshan wrote: > > Impact: save on memory for tracing > > Current tracers are typically using a struct(like struct ftrace_entry, > struct ctx_switch_entry, struct special_entr etc...)to record a binary > event. These structs can only record a their own kind of events. > A new kind of tracer need a new struct and a lot of code too handle it. > > So we need a generic binary record for events. This infrastructure > is for this purpose. > > Signed-off-by: Lai Jiangshan > --- > include/linux/ftrace.h | 3 + > kernel/trace/Kconfig | 6 ++ > kernel/trace/Makefile | 1 > kernel/trace/trace.c | 57 ++++++++++++++++++++++++++++ > kernel/trace/trace.h | 12 +++++ > kernel/trace/trace_bprintk.c | 87 +++++++++++++++++++++++++++++++++++++++++++ > kernel/trace/trace_output.c | 72 +++++++++++++++++++++++++++++++++++ > 7 files changed, 238 insertions(+) > diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h > index 677432b..0ec4752 100644 > --- a/include/linux/ftrace.h > +++ b/include/linux/ftrace.h > @@ -181,6 +181,9 @@ extern int ftrace_make_nop(struct module *mod, > */ > extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); > > +#ifdef CONFIG_TRACE_BPRINTK > +extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); > +#endif > > /* May be defined in arch */ > extern int ftrace_arch_read_dyn_info(char *buf, int size); > diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig > index e2a4ff6..887bd89 100644 > --- a/kernel/trace/Kconfig > +++ b/kernel/trace/Kconfig > @@ -77,6 +77,12 @@ config FUNCTION_GRAPH_TRACER > This is done by setting the current return address on the current > task structure into a stack of calls. > > +config TRACE_BPRINTK > + bool "Binary printk for tracing" > + default y > + depends on TRACING > + select BINARY_PRINTF > + > config IRQSOFF_TRACER > bool "Interrupts-off Latency Tracer" > default n > diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile > index 31cd5fb..8ca3952 100644 > --- a/kernel/trace/Makefile > +++ b/kernel/trace/Makefile > @@ -21,6 +21,7 @@ obj-$(CONFIG_RING_BUFFER) += ring_buffer.o > obj-$(CONFIG_TRACING) += trace.o > obj-$(CONFIG_TRACING) += trace_output.o > obj-$(CONFIG_TRACING) += trace_stat.o > +obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o > obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o > obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o > obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o > diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c > index b789c01..6e5c9df 100644 > --- a/kernel/trace/trace.c > +++ b/kernel/trace/trace.c > @@ -3027,6 +3027,63 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) > } > EXPORT_SYMBOL_GPL(__ftrace_printk); > > +/** > + * trace_vbprintk - write binary msg to tracing buffer > + * > + * Caller must insure @fmt are valid when msg is in tracing buffer. > + */ > +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) > +{ > + static DEFINE_SPINLOCK(trace_buf_lock); > + static u32 trace_buf[TRACE_BUF_SIZE]; > + > + struct ring_buffer_event *event; > + struct trace_array *tr = &global_trace; > + struct trace_array_cpu *data; > + struct bprintk_entry *entry; > + unsigned long flags, irq_flags; > + int cpu, len = 0, size, pc; > + > + if (tracing_disabled || !trace_bprintk_enable) > + return 0; > + > + pc = preempt_count(); > + preempt_disable_notrace(); > + cpu = raw_smp_processor_id(); > + data = tr->data[cpu]; > + > + if (unlikely(atomic_read(&data->disabled))) > + goto out; > + > + spin_lock_irqsave(&trace_buf_lock, flags); > + len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); > + > + if (len > TRACE_BUF_SIZE || len < 0) > + goto out_unlock; > + > + size = sizeof(*entry) + sizeof(u32) * len; > + event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags); > + if (!event) > + goto out_unlock; > + entry = ring_buffer_event_data(event); > + tracing_generic_entry_update(&entry->ent, flags, pc); > + entry->ent.type = TRACE_BPRINTK; > + entry->ip = ip; > + entry->fmt = fmt; > + > + memcpy(entry->buf, trace_buf, sizeof(u32) * len); > + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); > + > +out_unlock: > + spin_unlock_irqrestore(&trace_buf_lock, flags); > + > +out: > + preempt_enable_notrace(); > + > + return len; > +} > +EXPORT_SYMBOL_GPL(trace_vbprintk); > + > static int trace_panic_handler(struct notifier_block *this, > unsigned long event, void *unused) > { > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h > index a8b624c..6885d13 100644 > --- a/kernel/trace/trace.h > +++ b/kernel/trace/trace.h > @@ -18,6 +18,7 @@ enum trace_type { > TRACE_WAKE, > TRACE_STACK, > TRACE_PRINT, > + TRACE_BPRINTK, > TRACE_SPECIAL, > TRACE_MMIO_RW, > TRACE_MMIO_MAP, > @@ -120,6 +121,16 @@ struct print_entry { > char buf[]; > }; > > +struct bprintk_entry { > + struct trace_entry ent; > + unsigned long ip; > + const char *fmt; > + u32 buf[]; > +}; > +#ifdef CONFIG_TRACE_BPRINTK > +extern int trace_bprintk_enable; > +#endif > + > #define TRACE_OLD_SIZE 88 > > struct trace_field_cont { > @@ -262,6 +273,7 @@ extern void __ftrace_bad_type(void); > IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ > IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ > IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ > + IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\ > IF_ASSIGN(var, ent, struct special_entry, 0); \ > IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ > TRACE_MMIO_RW); \ > diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c > new file mode 100644 > index 0000000..1f8e532 > --- /dev/null > +++ b/kernel/trace/trace_bprintk.c > @@ -0,0 +1,87 @@ > +/* > + * trace binary printk > + * > + * Copyright (C) 2008 Lai Jiangshan > + * > + */ > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +#include "trace.h" > + > +/* binary printk basic */ > +static DEFINE_MUTEX(btrace_mutex); Looks like you only need a mutex to protect a counter. Wouldn't it be better to have this counter atomic and then drop out the mutex? > +static int btrace_metadata_count; > + > +static inline void lock_btrace(void) > +{ > + mutex_lock(&btrace_mutex); > +} > + > +static inline void unlock_btrace(void) > +{ > + mutex_unlock(&btrace_mutex); > +} > + > +static void get_btrace_metadata(void) > +{ > + lock_btrace(); > + btrace_metadata_count++; > + unlock_btrace(); > +} > + > +static void put_btrace_metadata(void) > +{ > + lock_btrace(); > + btrace_metadata_count--; > + unlock_btrace(); > +} > + > +/* events tracer */ > +int trace_bprintk_enable; > + > +static void start_bprintk_trace(struct trace_array *tr) > +{ > + get_btrace_metadata(); > + tracing_reset_online_cpus(tr); > + trace_bprintk_enable = 1; > +} > + > +static void stop_bprintk_trace(struct trace_array *tr) > +{ > + trace_bprintk_enable = 0; > + tracing_reset_online_cpus(tr); > + put_btrace_metadata(); > +} > + > +static int init_bprintk_trace(struct trace_array *tr) > +{ > + start_bprintk_trace(tr); > + return 0; > +} > + > +static struct tracer bprintk_trace __read_mostly = > +{ > + .name = "events", > + .init = init_bprintk_trace, > + .reset = stop_bprintk_trace, > + .start = start_bprintk_trace, > + .stop = stop_bprintk_trace, > +}; > + > +static __init int init_bprintk(void) > +{ > + return register_tracer(&bprintk_trace); > +} > + > +device_initcall(init_bprintk); > diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c > index df0c25c..ccc9fa7 100644 > --- a/kernel/trace/trace_output.c > +++ b/kernel/trace/trace_output.c > @@ -53,6 +53,26 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) > return len; > } > > +static int > +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) > +{ > + int len = (PAGE_SIZE - 1) - s->len; > + int ret; > + > + if (!len) > + return 0; > + > + ret = bstr_printf(s->buffer + s->len, len, fmt, binary); > + > + /* If we can't write it all, don't bother writing anything */ > + if (ret >= len) > + return 0; > + > + s->len += ret; > + > + return len; > +} > + > /** > * trace_seq_puts - trace sequence printing of simple string > * @s: trace sequence descriptor > @@ -800,6 +820,57 @@ static struct trace_event trace_print_event = { > .binary = trace_nop_print, > }; > > +/* TRACE_BPRINTK */ > +static int > +trace_bprintk_print(struct trace_seq *s, struct trace_entry *entry, int flags) > +{ > + struct bprintk_entry *field; > + > + trace_assign_type(field, entry); > + > + if (!seq_print_ip_sym(s, field->ip, flags)) > + goto partial; > + > + if (!trace_seq_puts(s, ": ")) > + goto partial; > + > + if (!trace_seq_bprintf(s, field->fmt, field->buf)) > + goto partial; > + > + return 0; > + > + partial: > + return TRACE_TYPE_PARTIAL_LINE; > +} > + > +static int > +trace_bprintk_raw(struct trace_seq *s, struct trace_entry *entry, int flags) > +{ > + struct bprintk_entry *field; > + > + trace_assign_type(field, entry); > + > + if (!trace_seq_printf(s, ": %lx : ", field->ip)) > + goto partial; > + > + if (!trace_seq_bprintf(s, field->fmt, field->buf)) > + goto partial; > + > + return 0; > + > + partial: > + return TRACE_TYPE_PARTIAL_LINE; > +} > + > +static struct trace_event trace_bprintk_event = { > + .type = TRACE_BPRINTK, > + .trace = trace_bprintk_print, > + .latency_trace = trace_bprintk_print, > + .raw = trace_bprintk_raw, > + .hex = trace_nop_print, > + .binary = trace_nop_print, > +}; > + > static struct trace_event *events[] __initdata = { > &trace_fn_event, > &trace_ctx_event, > @@ -808,6 +879,7 @@ static struct trace_event *events[] __initdata = { > &trace_stack_event, > &trace_user_stack_event, > &trace_print_event, > + &trace_bprintk_event, > NULL > }; > Just for curiosity. Why do you need such a binary tracing? Do you need it because a string output is really too slow for your needs? Thanks. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/