2008-12-31 03:00:27

by Lai Jiangshan

[permalink] [raw]
Subject: [PATCH 2/5] ftrace: infrastructure for supporting binary record


Impact: save on memory for tracing

Current tracers are typically using a struct(like struct ftrace_entry,
struct ctx_switch_entry, struct special_entr etc...)to record a binary
event. These structs can only record a their own kind of events.
A new kind of tracer need a new struct and a lot of code too handle it.

So we need a generic binary record for events. This infrastructure
is for this purpose.

Signed-off-by: Lai Jiangshan <[email protected]>
---
include/linux/ftrace.h | 3 +
kernel/trace/Kconfig | 6 ++
kernel/trace/Makefile | 1
kernel/trace/trace.c | 57 ++++++++++++++++++++++++++++
kernel/trace/trace.h | 12 +++++
kernel/trace/trace_bprintk.c | 87 +++++++++++++++++++++++++++++++++++++++++++
kernel/trace/trace_output.c | 72 +++++++++++++++++++++++++++++++++++
7 files changed, 238 insertions(+)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 677432b..0ec4752 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -181,6 +181,9 @@ extern int ftrace_make_nop(struct module *mod,
*/
extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);

+#ifdef CONFIG_TRACE_BPRINTK
+extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
+#endif

/* May be defined in arch */
extern int ftrace_arch_read_dyn_info(char *buf, int size);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e2a4ff6..887bd89 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -77,6 +77,12 @@ config FUNCTION_GRAPH_TRACER
This is done by setting the current return address on the current
task structure into a stack of calls.

+config TRACE_BPRINTK
+ bool "Binary printk for tracing"
+ default y
+ depends on TRACING
+ select BINARY_PRINTF
+
config IRQSOFF_TRACER
bool "Interrupts-off Latency Tracer"
default n
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 31cd5fb..8ca3952 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
obj-$(CONFIG_TRACING) += trace.o
obj-$(CONFIG_TRACING) += trace_output.o
obj-$(CONFIG_TRACING) += trace_stat.o
+obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o
obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b789c01..6e5c9df 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3027,6 +3027,63 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
}
EXPORT_SYMBOL_GPL(__ftrace_printk);

+/**
+ * trace_vbprintk - write binary msg to tracing buffer
+ *
+ * Caller must insure @fmt are valid when msg is in tracing buffer.
+ */
+int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
+{
+ static DEFINE_SPINLOCK(trace_buf_lock);
+ static u32 trace_buf[TRACE_BUF_SIZE];
+
+ struct ring_buffer_event *event;
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ struct bprintk_entry *entry;
+ unsigned long flags, irq_flags;
+ int cpu, len = 0, size, pc;
+
+ if (tracing_disabled || !trace_bprintk_enable)
+ return 0;
+
+ pc = preempt_count();
+ preempt_disable_notrace();
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+
+ if (unlikely(atomic_read(&data->disabled)))
+ goto out;
+
+ spin_lock_irqsave(&trace_buf_lock, flags);
+ len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
+
+ if (len > TRACE_BUF_SIZE || len < 0)
+ goto out_unlock;
+
+ size = sizeof(*entry) + sizeof(u32) * len;
+ event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
+ if (!event)
+ goto out_unlock;
+ entry = ring_buffer_event_data(event);
+ tracing_generic_entry_update(&entry->ent, flags, pc);
+ entry->ent.type = TRACE_BPRINTK;
+ entry->ip = ip;
+ entry->fmt = fmt;
+
+ memcpy(entry->buf, trace_buf, sizeof(u32) * len);
+ ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+out_unlock:
+ spin_unlock_irqrestore(&trace_buf_lock, flags);
+
+out:
+ preempt_enable_notrace();
+
+ return len;
+}
+EXPORT_SYMBOL_GPL(trace_vbprintk);
+
static int trace_panic_handler(struct notifier_block *this,
unsigned long event, void *unused)
{
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index a8b624c..6885d13 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -18,6 +18,7 @@ enum trace_type {
TRACE_WAKE,
TRACE_STACK,
TRACE_PRINT,
+ TRACE_BPRINTK,
TRACE_SPECIAL,
TRACE_MMIO_RW,
TRACE_MMIO_MAP,
@@ -120,6 +121,16 @@ struct print_entry {
char buf[];
};

+struct bprintk_entry {
+ struct trace_entry ent;
+ unsigned long ip;
+ const char *fmt;
+ u32 buf[];
+};
+#ifdef CONFIG_TRACE_BPRINTK
+extern int trace_bprintk_enable;
+#endif
+
#define TRACE_OLD_SIZE 88

struct trace_field_cont {
@@ -262,6 +273,7 @@ extern void __ftrace_bad_type(void);
IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
+ IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\
IF_ASSIGN(var, ent, struct special_entry, 0); \
IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
TRACE_MMIO_RW); \
diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c
new file mode 100644
index 0000000..1f8e532
--- /dev/null
+++ b/kernel/trace/trace_bprintk.c
@@ -0,0 +1,87 @@
+/*
+ * trace binary printk
+ *
+ * Copyright (C) 2008 Lai Jiangshan <[email protected]>
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/ftrace.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/marker.h>
+#include <linux/uaccess.h>
+
+#include "trace.h"
+
+/* binary printk basic */
+static DEFINE_MUTEX(btrace_mutex);
+static int btrace_metadata_count;
+
+static inline void lock_btrace(void)
+{
+ mutex_lock(&btrace_mutex);
+}
+
+static inline void unlock_btrace(void)
+{
+ mutex_unlock(&btrace_mutex);
+}
+
+static void get_btrace_metadata(void)
+{
+ lock_btrace();
+ btrace_metadata_count++;
+ unlock_btrace();
+}
+
+static void put_btrace_metadata(void)
+{
+ lock_btrace();
+ btrace_metadata_count--;
+ unlock_btrace();
+}
+
+/* events tracer */
+int trace_bprintk_enable;
+
+static void start_bprintk_trace(struct trace_array *tr)
+{
+ get_btrace_metadata();
+ tracing_reset_online_cpus(tr);
+ trace_bprintk_enable = 1;
+}
+
+static void stop_bprintk_trace(struct trace_array *tr)
+{
+ trace_bprintk_enable = 0;
+ tracing_reset_online_cpus(tr);
+ put_btrace_metadata();
+}
+
+static int init_bprintk_trace(struct trace_array *tr)
+{
+ start_bprintk_trace(tr);
+ return 0;
+}
+
+static struct tracer bprintk_trace __read_mostly =
+{
+ .name = "events",
+ .init = init_bprintk_trace,
+ .reset = stop_bprintk_trace,
+ .start = start_bprintk_trace,
+ .stop = stop_bprintk_trace,
+};
+
+static __init int init_bprintk(void)
+{
+ return register_tracer(&bprintk_trace);
+}
+
+device_initcall(init_bprintk);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index df0c25c..ccc9fa7 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -53,6 +53,26 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
return len;
}

+static int
+trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
+{
+ int len = (PAGE_SIZE - 1) - s->len;
+ int ret;
+
+ if (!len)
+ return 0;
+
+ ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
+
+ /* If we can't write it all, don't bother writing anything */
+ if (ret >= len)
+ return 0;
+
+ s->len += ret;
+
+ return len;
+}
+
/**
* trace_seq_puts - trace sequence printing of simple string
* @s: trace sequence descriptor
@@ -800,6 +820,57 @@ static struct trace_event trace_print_event = {
.binary = trace_nop_print,
};

+/* TRACE_BPRINTK */
+static int
+trace_bprintk_print(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+ struct bprintk_entry *field;
+
+ trace_assign_type(field, entry);
+
+ if (!seq_print_ip_sym(s, field->ip, flags))
+ goto partial;
+
+ if (!trace_seq_puts(s, ": "))
+ goto partial;
+
+ if (!trace_seq_bprintf(s, field->fmt, field->buf))
+ goto partial;
+
+ return 0;
+
+ partial:
+ return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static int
+trace_bprintk_raw(struct trace_seq *s, struct trace_entry *entry, int flags)
+{
+ struct bprintk_entry *field;
+
+ trace_assign_type(field, entry);
+
+ if (!trace_seq_printf(s, ": %lx : ", field->ip))
+ goto partial;
+
+ if (!trace_seq_bprintf(s, field->fmt, field->buf))
+ goto partial;
+
+ return 0;
+
+ partial:
+ return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static struct trace_event trace_bprintk_event = {
+ .type = TRACE_BPRINTK,
+ .trace = trace_bprintk_print,
+ .latency_trace = trace_bprintk_print,
+ .raw = trace_bprintk_raw,
+ .hex = trace_nop_print,
+ .binary = trace_nop_print,
+};
+
static struct trace_event *events[] __initdata = {
&trace_fn_event,
&trace_ctx_event,
@@ -808,6 +879,7 @@ static struct trace_event *events[] __initdata = {
&trace_stack_event,
&trace_user_stack_event,
&trace_print_event,
+ &trace_bprintk_event,
NULL
};






2008-12-31 05:00:19

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH 2/5] ftrace: infrastructure for supporting binary record

Hi Lai.

On Wed, Dec 31, 2008 at 10:56:05AM +0800, Lai Jiangshan wrote:
>
> Impact: save on memory for tracing
>
> Current tracers are typically using a struct(like struct ftrace_entry,
> struct ctx_switch_entry, struct special_entr etc...)to record a binary
> event. These structs can only record a their own kind of events.
> A new kind of tracer need a new struct and a lot of code too handle it.
>
> So we need a generic binary record for events. This infrastructure
> is for this purpose.
>
> Signed-off-by: Lai Jiangshan <[email protected]>
> ---
> include/linux/ftrace.h | 3 +
> kernel/trace/Kconfig | 6 ++
> kernel/trace/Makefile | 1
> kernel/trace/trace.c | 57 ++++++++++++++++++++++++++++
> kernel/trace/trace.h | 12 +++++
> kernel/trace/trace_bprintk.c | 87 +++++++++++++++++++++++++++++++++++++++++++
> kernel/trace/trace_output.c | 72 +++++++++++++++++++++++++++++++++++
> 7 files changed, 238 insertions(+)
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 677432b..0ec4752 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -181,6 +181,9 @@ extern int ftrace_make_nop(struct module *mod,
> */
> extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
>
> +#ifdef CONFIG_TRACE_BPRINTK
> +extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
> +#endif
>
> /* May be defined in arch */
> extern int ftrace_arch_read_dyn_info(char *buf, int size);
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index e2a4ff6..887bd89 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -77,6 +77,12 @@ config FUNCTION_GRAPH_TRACER
> This is done by setting the current return address on the current
> task structure into a stack of calls.
>
> +config TRACE_BPRINTK
> + bool "Binary printk for tracing"
> + default y
> + depends on TRACING
> + select BINARY_PRINTF
> +
> config IRQSOFF_TRACER
> bool "Interrupts-off Latency Tracer"
> default n
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index 31cd5fb..8ca3952 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -21,6 +21,7 @@ obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
> obj-$(CONFIG_TRACING) += trace.o
> obj-$(CONFIG_TRACING) += trace_output.o
> obj-$(CONFIG_TRACING) += trace_stat.o
> +obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o
> obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
> obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
> obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index b789c01..6e5c9df 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -3027,6 +3027,63 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
> }
> EXPORT_SYMBOL_GPL(__ftrace_printk);
>
> +/**
> + * trace_vbprintk - write binary msg to tracing buffer
> + *
> + * Caller must insure @fmt are valid when msg is in tracing buffer.
> + */
> +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
> +{
> + static DEFINE_SPINLOCK(trace_buf_lock);
> + static u32 trace_buf[TRACE_BUF_SIZE];
> +
> + struct ring_buffer_event *event;
> + struct trace_array *tr = &global_trace;
> + struct trace_array_cpu *data;
> + struct bprintk_entry *entry;
> + unsigned long flags, irq_flags;
> + int cpu, len = 0, size, pc;
> +
> + if (tracing_disabled || !trace_bprintk_enable)
> + return 0;
> +
> + pc = preempt_count();
> + preempt_disable_notrace();
> + cpu = raw_smp_processor_id();
> + data = tr->data[cpu];
> +
> + if (unlikely(atomic_read(&data->disabled)))
> + goto out;
> +
> + spin_lock_irqsave(&trace_buf_lock, flags);
> + len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
> +
> + if (len > TRACE_BUF_SIZE || len < 0)
> + goto out_unlock;
> +
> + size = sizeof(*entry) + sizeof(u32) * len;
> + event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
> + if (!event)
> + goto out_unlock;
> + entry = ring_buffer_event_data(event);
> + tracing_generic_entry_update(&entry->ent, flags, pc);
> + entry->ent.type = TRACE_BPRINTK;
> + entry->ip = ip;
> + entry->fmt = fmt;
> +
> + memcpy(entry->buf, trace_buf, sizeof(u32) * len);
> + ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
> +
> +out_unlock:
> + spin_unlock_irqrestore(&trace_buf_lock, flags);
> +
> +out:
> + preempt_enable_notrace();
> +
> + return len;
> +}
> +EXPORT_SYMBOL_GPL(trace_vbprintk);
> +
> static int trace_panic_handler(struct notifier_block *this,
> unsigned long event, void *unused)
> {
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index a8b624c..6885d13 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -18,6 +18,7 @@ enum trace_type {
> TRACE_WAKE,
> TRACE_STACK,
> TRACE_PRINT,
> + TRACE_BPRINTK,
> TRACE_SPECIAL,
> TRACE_MMIO_RW,
> TRACE_MMIO_MAP,
> @@ -120,6 +121,16 @@ struct print_entry {
> char buf[];
> };
>
> +struct bprintk_entry {
> + struct trace_entry ent;
> + unsigned long ip;
> + const char *fmt;
> + u32 buf[];
> +};
> +#ifdef CONFIG_TRACE_BPRINTK
> +extern int trace_bprintk_enable;
> +#endif
> +
> #define TRACE_OLD_SIZE 88
>
> struct trace_field_cont {
> @@ -262,6 +273,7 @@ extern void __ftrace_bad_type(void);
> IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
> IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
> IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
> + IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\
> IF_ASSIGN(var, ent, struct special_entry, 0); \
> IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
> TRACE_MMIO_RW); \
> diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c
> new file mode 100644
> index 0000000..1f8e532
> --- /dev/null
> +++ b/kernel/trace/trace_bprintk.c
> @@ -0,0 +1,87 @@
> +/*
> + * trace binary printk
> + *
> + * Copyright (C) 2008 Lai Jiangshan <[email protected]>
> + *
> + */
> +#include <linux/kernel.h>
> +#include <linux/ftrace.h>
> +#include <linux/string.h>
> +#include <linux/ctype.h>
> +#include <linux/list.h>
> +#include <linux/mutex.h>
> +#include <linux/slab.h>
> +#include <linux/module.h>
> +#include <linux/seq_file.h>
> +#include <linux/fs.h>
> +#include <linux/marker.h>
> +#include <linux/uaccess.h>
> +
> +#include "trace.h"
> +
> +/* binary printk basic */
> +static DEFINE_MUTEX(btrace_mutex);

Looks like you only need a mutex to protect a counter.
Wouldn't it be better to have this counter atomic and then drop
out the mutex?

> +static int btrace_metadata_count;
> +
> +static inline void lock_btrace(void)
> +{
> + mutex_lock(&btrace_mutex);
> +}
> +
> +static inline void unlock_btrace(void)
> +{
> + mutex_unlock(&btrace_mutex);
> +}
> +
> +static void get_btrace_metadata(void)
> +{
> + lock_btrace();
> + btrace_metadata_count++;
> + unlock_btrace();
> +}
> +
> +static void put_btrace_metadata(void)
> +{
> + lock_btrace();
> + btrace_metadata_count--;
> + unlock_btrace();
> +}
> +
> +/* events tracer */
> +int trace_bprintk_enable;
> +
> +static void start_bprintk_trace(struct trace_array *tr)
> +{
> + get_btrace_metadata();
> + tracing_reset_online_cpus(tr);
> + trace_bprintk_enable = 1;
> +}
> +
> +static void stop_bprintk_trace(struct trace_array *tr)
> +{
> + trace_bprintk_enable = 0;
> + tracing_reset_online_cpus(tr);
> + put_btrace_metadata();
> +}
> +
> +static int init_bprintk_trace(struct trace_array *tr)
> +{
> + start_bprintk_trace(tr);
> + return 0;
> +}
> +
> +static struct tracer bprintk_trace __read_mostly =
> +{
> + .name = "events",
> + .init = init_bprintk_trace,
> + .reset = stop_bprintk_trace,
> + .start = start_bprintk_trace,
> + .stop = stop_bprintk_trace,
> +};
> +
> +static __init int init_bprintk(void)
> +{
> + return register_tracer(&bprintk_trace);
> +}
> +
> +device_initcall(init_bprintk);
> diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
> index df0c25c..ccc9fa7 100644
> --- a/kernel/trace/trace_output.c
> +++ b/kernel/trace/trace_output.c
> @@ -53,6 +53,26 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
> return len;
> }
>
> +static int
> +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
> +{
> + int len = (PAGE_SIZE - 1) - s->len;
> + int ret;
> +
> + if (!len)
> + return 0;
> +
> + ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
> +
> + /* If we can't write it all, don't bother writing anything */
> + if (ret >= len)
> + return 0;
> +
> + s->len += ret;
> +
> + return len;
> +}
> +
> /**
> * trace_seq_puts - trace sequence printing of simple string
> * @s: trace sequence descriptor
> @@ -800,6 +820,57 @@ static struct trace_event trace_print_event = {
> .binary = trace_nop_print,
> };
>
> +/* TRACE_BPRINTK */
> +static int
> +trace_bprintk_print(struct trace_seq *s, struct trace_entry *entry, int flags)
> +{
> + struct bprintk_entry *field;
> +
> + trace_assign_type(field, entry);
> +
> + if (!seq_print_ip_sym(s, field->ip, flags))
> + goto partial;
> +
> + if (!trace_seq_puts(s, ": "))
> + goto partial;
> +
> + if (!trace_seq_bprintf(s, field->fmt, field->buf))
> + goto partial;
> +
> + return 0;
> +
> + partial:
> + return TRACE_TYPE_PARTIAL_LINE;
> +}
> +
> +static int
> +trace_bprintk_raw(struct trace_seq *s, struct trace_entry *entry, int flags)
> +{
> + struct bprintk_entry *field;
> +
> + trace_assign_type(field, entry);
> +
> + if (!trace_seq_printf(s, ": %lx : ", field->ip))
> + goto partial;
> +
> + if (!trace_seq_bprintf(s, field->fmt, field->buf))
> + goto partial;
> +
> + return 0;
> +
> + partial:
> + return TRACE_TYPE_PARTIAL_LINE;
> +}
> +
> +static struct trace_event trace_bprintk_event = {
> + .type = TRACE_BPRINTK,
> + .trace = trace_bprintk_print,
> + .latency_trace = trace_bprintk_print,
> + .raw = trace_bprintk_raw,
> + .hex = trace_nop_print,
> + .binary = trace_nop_print,
> +};
> +
> static struct trace_event *events[] __initdata = {
> &trace_fn_event,
> &trace_ctx_event,
> @@ -808,6 +879,7 @@ static struct trace_event *events[] __initdata = {
> &trace_stack_event,
> &trace_user_stack_event,
> &trace_print_event,
> + &trace_bprintk_event,
> NULL
> };
>


Just for curiosity. Why do you need such a binary tracing?
Do you need it because a string output is really too slow for your needs?

Thanks.

2009-01-02 22:24:58

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH 2/5] ftrace: infrastructure for supporting binary record

On Wed, Dec 31, 2008 at 2:59 AM, Frederic Weisbecker <[email protected]> wrote:
> Hi Lai.
>
> On Wed, Dec 31, 2008 at 10:56:05AM +0800, Lai Jiangshan wrote:
>>
>> Impact: save on memory for tracing
>>
>> Current tracers are typically using a struct(like struct ftrace_entry,
>> struct ctx_switch_entry, struct special_entr etc...)to record a binary
>> event. These structs can only record a their own kind of events.
>> A new kind of tracer need a new struct and a lot of code too handle it.
>>
>> So we need a generic binary record for events. This infrastructure
>> is for this purpose.
>>
>> Signed-off-by: Lai Jiangshan <[email protected]>
>> ---
>> include/linux/ftrace.h | 3 +
>> kernel/trace/Kconfig | 6 ++
>> kernel/trace/Makefile | 1
>> kernel/trace/trace.c | 57 ++++++++++++++++++++++++++++
>> kernel/trace/trace.h | 12 +++++
>> kernel/trace/trace_bprintk.c | 87 +++++++++++++++++++++++++++++++++++++++++++
>> kernel/trace/trace_output.c | 72 +++++++++++++++++++++++++++++++++++
>> 7 files changed, 238 insertions(+)
>> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
>> index 677432b..0ec4752 100644
>> --- a/include/linux/ftrace.h
>> +++ b/include/linux/ftrace.h
>> @@ -181,6 +181,9 @@ extern int ftrace_make_nop(struct module *mod,
>> */
>> extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
>>
>> +#ifdef CONFIG_TRACE_BPRINTK
>> +extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
>> +#endif
>>
>> /* May be defined in arch */
>> extern int ftrace_arch_read_dyn_info(char *buf, int size);
>> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
>> index e2a4ff6..887bd89 100644
>> --- a/kernel/trace/Kconfig
>> +++ b/kernel/trace/Kconfig
>> @@ -77,6 +77,12 @@ config FUNCTION_GRAPH_TRACER
>> This is done by setting the current return address on the current
>> task structure into a stack of calls.
>>
>> +config TRACE_BPRINTK
>> + bool "Binary printk for tracing"
>> + default y
>> + depends on TRACING
>> + select BINARY_PRINTF
>> +
>> config IRQSOFF_TRACER
>> bool "Interrupts-off Latency Tracer"
>> default n
>> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
>> index 31cd5fb..8ca3952 100644
>> --- a/kernel/trace/Makefile
>> +++ b/kernel/trace/Makefile
>> @@ -21,6 +21,7 @@ obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
>> obj-$(CONFIG_TRACING) += trace.o
>> obj-$(CONFIG_TRACING) += trace_output.o
>> obj-$(CONFIG_TRACING) += trace_stat.o
>> +obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o
>> obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
>> obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
>> obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
>> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
>> index b789c01..6e5c9df 100644
>> --- a/kernel/trace/trace.c
>> +++ b/kernel/trace/trace.c
>> @@ -3027,6 +3027,63 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
>> }
>> EXPORT_SYMBOL_GPL(__ftrace_printk);
>>
>> +/**
>> + * trace_vbprintk - write binary msg to tracing buffer
>> + *
>> + * Caller must insure @fmt are valid when msg is in tracing buffer.
>> + */
>> +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
>> +{
>> + static DEFINE_SPINLOCK(trace_buf_lock);
>> + static u32 trace_buf[TRACE_BUF_SIZE];
>> +
>> + struct ring_buffer_event *event;
>> + struct trace_array *tr = &global_trace;
>> + struct trace_array_cpu *data;
>> + struct bprintk_entry *entry;
>> + unsigned long flags, irq_flags;
>> + int cpu, len = 0, size, pc;
>> +
>> + if (tracing_disabled || !trace_bprintk_enable)
>> + return 0;
>> +
>> + pc = preempt_count();
>> + preempt_disable_notrace();
>> + cpu = raw_smp_processor_id();
>> + data = tr->data[cpu];
>> +
>> + if (unlikely(atomic_read(&data->disabled)))
>> + goto out;
>> +
>> + spin_lock_irqsave(&trace_buf_lock, flags);
>> + len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
>> +
>> + if (len > TRACE_BUF_SIZE || len < 0)
>> + goto out_unlock;
>> +
>> + size = sizeof(*entry) + sizeof(u32) * len;
>> + event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
>> + if (!event)
>> + goto out_unlock;
>> + entry = ring_buffer_event_data(event);
>> + tracing_generic_entry_update(&entry->ent, flags, pc);
>> + entry->ent.type = TRACE_BPRINTK;
>> + entry->ip = ip;
>> + entry->fmt = fmt;
>> +
>> + memcpy(entry->buf, trace_buf, sizeof(u32) * len);
>> + ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
>> +
>> +out_unlock:
>> + spin_unlock_irqrestore(&trace_buf_lock, flags);
>> +
>> +out:
>> + preempt_enable_notrace();
>> +
>> + return len;
>> +}
>> +EXPORT_SYMBOL_GPL(trace_vbprintk);
>> +
>> static int trace_panic_handler(struct notifier_block *this,
>> unsigned long event, void *unused)
>> {
>> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
>> index a8b624c..6885d13 100644
>> --- a/kernel/trace/trace.h
>> +++ b/kernel/trace/trace.h
>> @@ -18,6 +18,7 @@ enum trace_type {
>> TRACE_WAKE,
>> TRACE_STACK,
>> TRACE_PRINT,
>> + TRACE_BPRINTK,
>> TRACE_SPECIAL,
>> TRACE_MMIO_RW,
>> TRACE_MMIO_MAP,
>> @@ -120,6 +121,16 @@ struct print_entry {
>> char buf[];
>> };
>>
>> +struct bprintk_entry {
>> + struct trace_entry ent;
>> + unsigned long ip;
>> + const char *fmt;
>> + u32 buf[];
>> +};
>> +#ifdef CONFIG_TRACE_BPRINTK
>> +extern int trace_bprintk_enable;
>> +#endif
>> +
>> #define TRACE_OLD_SIZE 88
>>
>> struct trace_field_cont {
>> @@ -262,6 +273,7 @@ extern void __ftrace_bad_type(void);
>> IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
>> IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
>> IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
>> + IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\
>> IF_ASSIGN(var, ent, struct special_entry, 0); \
>> IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
>> TRACE_MMIO_RW); \
>> diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c
>> new file mode 100644
>> index 0000000..1f8e532
>> --- /dev/null
>> +++ b/kernel/trace/trace_bprintk.c
>> @@ -0,0 +1,87 @@
>> +/*
>> + * trace binary printk
>> + *
>> + * Copyright (C) 2008 Lai Jiangshan <[email protected]>
>> + *
>> + */
>> +#include <linux/kernel.h>
>> +#include <linux/ftrace.h>
>> +#include <linux/string.h>
>> +#include <linux/ctype.h>
>> +#include <linux/list.h>
>> +#include <linux/mutex.h>
>> +#include <linux/slab.h>
>> +#include <linux/module.h>
>> +#include <linux/seq_file.h>
>> +#include <linux/fs.h>
>> +#include <linux/marker.h>
>> +#include <linux/uaccess.h>
>> +
>> +#include "trace.h"
>> +
>> +/* binary printk basic */
>> +static DEFINE_MUTEX(btrace_mutex);
>
> Looks like you only need a mutex to protect a counter.
> Wouldn't it be better to have this counter atomic and then drop
> out the mutex?
>
>> +static int btrace_metadata_count;
>> +
>> +static inline void lock_btrace(void)
>> +{
>> + mutex_lock(&btrace_mutex);
>> +}
>> +
>> +static inline void unlock_btrace(void)
>> +{
>> + mutex_unlock(&btrace_mutex);
>> +}
>> +
>> +static void get_btrace_metadata(void)
>> +{
>> + lock_btrace();
>> + btrace_metadata_count++;
>> + unlock_btrace();
>> +}
>> +
>> +static void put_btrace_metadata(void)
>> +{
>> + lock_btrace();
>> + btrace_metadata_count--;
>> + unlock_btrace();
>> +}
>> +
>> +/* events tracer */
>> +int trace_bprintk_enable;
>> +
>> +static void start_bprintk_trace(struct trace_array *tr)
>> +{
>> + get_btrace_metadata();
>> + tracing_reset_online_cpus(tr);
>> + trace_bprintk_enable = 1;
>> +}
>> +
>> +static void stop_bprintk_trace(struct trace_array *tr)
>> +{
>> + trace_bprintk_enable = 0;
>> + tracing_reset_online_cpus(tr);
>> + put_btrace_metadata();
>> +}
>> +
>> +static int init_bprintk_trace(struct trace_array *tr)
>> +{
>> + start_bprintk_trace(tr);
>> + return 0;
>> +}
>> +
>> +static struct tracer bprintk_trace __read_mostly =
>> +{
>> + .name = "events",
>> + .init = init_bprintk_trace,
>> + .reset = stop_bprintk_trace,
>> + .start = start_bprintk_trace,
>> + .stop = stop_bprintk_trace,
>> +};
>> +
>> +static __init int init_bprintk(void)
>> +{
>> + return register_tracer(&bprintk_trace);
>> +}
>> +
>> +device_initcall(init_bprintk);
>> diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
>> index df0c25c..ccc9fa7 100644
>> --- a/kernel/trace/trace_output.c
>> +++ b/kernel/trace/trace_output.c
>> @@ -53,6 +53,26 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
>> return len;
>> }
>>
>> +static int
>> +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
>> +{
>> + int len = (PAGE_SIZE - 1) - s->len;
>> + int ret;
>> +
>> + if (!len)
>> + return 0;
>> +
>> + ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
>> +
>> + /* If we can't write it all, don't bother writing anything */
>> + if (ret >= len)
>> + return 0;
>> +
>> + s->len += ret;
>> +
>> + return len;
>> +}
>> +
>> /**
>> * trace_seq_puts - trace sequence printing of simple string
>> * @s: trace sequence descriptor
>> @@ -800,6 +820,57 @@ static struct trace_event trace_print_event = {
>> .binary = trace_nop_print,
>> };
>>
>> +/* TRACE_BPRINTK */
>> +static int
>> +trace_bprintk_print(struct trace_seq *s, struct trace_entry *entry, int flags)
>> +{
>> + struct bprintk_entry *field;
>> +
>> + trace_assign_type(field, entry);
>> +
>> + if (!seq_print_ip_sym(s, field->ip, flags))
>> + goto partial;
>> +
>> + if (!trace_seq_puts(s, ": "))
>> + goto partial;
>> +
>> + if (!trace_seq_bprintf(s, field->fmt, field->buf))
>> + goto partial;
>> +
>> + return 0;
>> +
>> + partial:
>> + return TRACE_TYPE_PARTIAL_LINE;
>> +}
>> +
>> +static int
>> +trace_bprintk_raw(struct trace_seq *s, struct trace_entry *entry, int flags)
>> +{
>> + struct bprintk_entry *field;
>> +
>> + trace_assign_type(field, entry);
>> +
>> + if (!trace_seq_printf(s, ": %lx : ", field->ip))
>> + goto partial;
>> +
>> + if (!trace_seq_bprintf(s, field->fmt, field->buf))
>> + goto partial;
>> +
>> + return 0;
>> +
>> + partial:
>> + return TRACE_TYPE_PARTIAL_LINE;
>> +}
>> +
>> +static struct trace_event trace_bprintk_event = {
>> + .type = TRACE_BPRINTK,
>> + .trace = trace_bprintk_print,
>> + .latency_trace = trace_bprintk_print,
>> + .raw = trace_bprintk_raw,
>> + .hex = trace_nop_print,
>> + .binary = trace_nop_print,
>> +};
>> +
>> static struct trace_event *events[] __initdata = {
>> &trace_fn_event,
>> &trace_ctx_event,
>> @@ -808,6 +879,7 @@ static struct trace_event *events[] __initdata = {
>> &trace_stack_event,
>> &trace_user_stack_event,
>> &trace_print_event,
>> + &trace_bprintk_event,
>> NULL
>> };
>>
>
>
> Just for curiosity. Why do you need such a binary tracing?
> Do you need it because a string output is really too slow for your needs?

warning: I haven't looked at the patch details

But I would love to use something like this to provide the exact
contents the userspace blktrace utilities want.

- Arnaldo

2009-01-05 02:09:18

by Lai Jiangshan

[permalink] [raw]
Subject: Re: [PATCH 2/5] ftrace: infrastructure for supporting binary record


>>
>> Just for curiosity. Why do you need such a binary tracing?
>> Do you need it because a string output is really too slow for your needs?

Hi, Frederic Weisbecker,

We have
1) lots of kinds events source(FUNCTION trace, TRACE_CTX, tracepoint, markers ...)
2) a generic and mature events log buffer - trace/ringbuffer.c
3) a generic and mature trace framework - trace/trace.c trace_ouput.c

But we don't have a generic events log format, we record different
events with different record formats: TRACE_FN, TRACE_CTX, TRACE_GRAPH_RET,
TRACE_SPECIAL, ... We use different struct for recording different
formats.

Too many kinds different formats, so we need a generic events log format.
This patch provide generic events log format.

Actually, We can use:
__ftrace_bprintk(0, "# %ld %ld %ld\n", arg1, arg2, arg3)
instead of TRACE_SPECIAL.
(maybe I should remove the "unsigned long ip" arg for __ftrace_bprintk()
and trace_vbprintk(), users need know the call_addr, just use "%pS")

And binary record is a little slower than string output.

Lai.


Arnaldo Carvalho de Melo wrote:
>
> warning: I haven't looked at the patch details
>
> But I would love to use something like this to provide the exact
> contents the userspace blktrace utilities want.
>

2009-01-06 11:32:59

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH 2/5] ftrace: infrastructure for supporting binary record

2009/1/5 Lai Jiangshan <[email protected]>:
>
>>>
>>> Just for curiosity. Why do you need such a binary tracing?
>>> Do you need it because a string output is really too slow for your needs?
>
> Hi, Frederic Weisbecker,
>
> We have
> 1) lots of kinds events source(FUNCTION trace, TRACE_CTX, tracepoint, markers ...)
> 2) a generic and mature events log buffer - trace/ringbuffer.c
> 3) a generic and mature trace framework - trace/trace.c trace_ouput.c
>
> But we don't have a generic events log format, we record different
> events with different record formats: TRACE_FN, TRACE_CTX, TRACE_GRAPH_RET,
> TRACE_SPECIAL, ... We use different struct for recording different
> formats.


I understand better now. So it acts like an optimized ftrace_printk.
Instead of inserting formatted ftrace_printk entries on the ring buffer,
we insert the binary datas with the format and let the tracer decide
what to do with this,
providing a full generic entry transport....

That looks a good idea.

2009-01-19 19:28:31

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH 2/5] ftrace: infrastructure for supporting binary record

2009/1/2 Arnaldo Carvalho de Melo <[email protected]>:
> On Wed, Dec 31, 2008 at 2:59 AM, Frederic Weisbecker <[email protected]> wrote:
>> Hi Lai.
>>
>> On Wed, Dec 31, 2008 at 10:56:05AM +0800, Lai Jiangshan wrote:
>>>
>>> Impact: save on memory for tracing
>>>
>>> Current tracers are typically using a struct(like struct ftrace_entry,
>>> struct ctx_switch_entry, struct special_entr etc...)to record a binary
>>> event. These structs can only record a their own kind of events.
>>> A new kind of tracer need a new struct and a lot of code too handle it.
>>>
>>> So we need a generic binary record for events. This infrastructure
>>> is for this purpose.
>>>
>>> Signed-off-by: Lai Jiangshan <[email protected]>
>>> ---
>>> include/linux/ftrace.h | 3 +
>>> kernel/trace/Kconfig | 6 ++
>>> kernel/trace/Makefile | 1
>>> kernel/trace/trace.c | 57 ++++++++++++++++++++++++++++
>>> kernel/trace/trace.h | 12 +++++
>>> kernel/trace/trace_bprintk.c | 87 +++++++++++++++++++++++++++++++++++++++++++
>>> kernel/trace/trace_output.c | 72 +++++++++++++++++++++++++++++++++++
>>> 7 files changed, 238 insertions(+)
>>> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
>>> index 677432b..0ec4752 100644
>>> --- a/include/linux/ftrace.h
>>> +++ b/include/linux/ftrace.h
>>> @@ -181,6 +181,9 @@ extern int ftrace_make_nop(struct module *mod,
>>> */
>>> extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
>>>
>>> +#ifdef CONFIG_TRACE_BPRINTK
>>> +extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
>>> +#endif
>>>
>>> /* May be defined in arch */
>>> extern int ftrace_arch_read_dyn_info(char *buf, int size);
>>> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
>>> index e2a4ff6..887bd89 100644
>>> --- a/kernel/trace/Kconfig
>>> +++ b/kernel/trace/Kconfig
>>> @@ -77,6 +77,12 @@ config FUNCTION_GRAPH_TRACER
>>> This is done by setting the current return address on the current
>>> task structure into a stack of calls.
>>>
>>> +config TRACE_BPRINTK
>>> + bool "Binary printk for tracing"
>>> + default y
>>> + depends on TRACING
>>> + select BINARY_PRINTF
>>> +
>>> config IRQSOFF_TRACER
>>> bool "Interrupts-off Latency Tracer"
>>> default n
>>> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
>>> index 31cd5fb..8ca3952 100644
>>> --- a/kernel/trace/Makefile
>>> +++ b/kernel/trace/Makefile
>>> @@ -21,6 +21,7 @@ obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
>>> obj-$(CONFIG_TRACING) += trace.o
>>> obj-$(CONFIG_TRACING) += trace_output.o
>>> obj-$(CONFIG_TRACING) += trace_stat.o
>>> +obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o
>>> obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
>>> obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
>>> obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
>>> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
>>> index b789c01..6e5c9df 100644
>>> --- a/kernel/trace/trace.c
>>> +++ b/kernel/trace/trace.c
>>> @@ -3027,6 +3027,63 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...)
>>> }
>>> EXPORT_SYMBOL_GPL(__ftrace_printk);
>>>
>>> +/**
>>> + * trace_vbprintk - write binary msg to tracing buffer
>>> + *
>>> + * Caller must insure @fmt are valid when msg is in tracing buffer.
>>> + */
>>> +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
>>> +{
>>> + static DEFINE_SPINLOCK(trace_buf_lock);
>>> + static u32 trace_buf[TRACE_BUF_SIZE];
>>> +
>>> + struct ring_buffer_event *event;
>>> + struct trace_array *tr = &global_trace;
>>> + struct trace_array_cpu *data;
>>> + struct bprintk_entry *entry;
>>> + unsigned long flags, irq_flags;
>>> + int cpu, len = 0, size, pc;
>>> +
>>> + if (tracing_disabled || !trace_bprintk_enable)
>>> + return 0;
>>> +
>>> + pc = preempt_count();
>>> + preempt_disable_notrace();
>>> + cpu = raw_smp_processor_id();
>>> + data = tr->data[cpu];
>>> +
>>> + if (unlikely(atomic_read(&data->disabled)))
>>> + goto out;
>>> +
>>> + spin_lock_irqsave(&trace_buf_lock, flags);
>>> + len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
>>> +
>>> + if (len > TRACE_BUF_SIZE || len < 0)
>>> + goto out_unlock;
>>> +
>>> + size = sizeof(*entry) + sizeof(u32) * len;
>>> + event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
>>> + if (!event)
>>> + goto out_unlock;
>>> + entry = ring_buffer_event_data(event);
>>> + tracing_generic_entry_update(&entry->ent, flags, pc);
>>> + entry->ent.type = TRACE_BPRINTK;
>>> + entry->ip = ip;
>>> + entry->fmt = fmt;
>>> +
>>> + memcpy(entry->buf, trace_buf, sizeof(u32) * len);
>>> + ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
>>> +
>>> +out_unlock:
>>> + spin_unlock_irqrestore(&trace_buf_lock, flags);
>>> +
>>> +out:
>>> + preempt_enable_notrace();
>>> +
>>> + return len;
>>> +}
>>> +EXPORT_SYMBOL_GPL(trace_vbprintk);
>>> +
>>> static int trace_panic_handler(struct notifier_block *this,
>>> unsigned long event, void *unused)
>>> {
>>> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
>>> index a8b624c..6885d13 100644
>>> --- a/kernel/trace/trace.h
>>> +++ b/kernel/trace/trace.h
>>> @@ -18,6 +18,7 @@ enum trace_type {
>>> TRACE_WAKE,
>>> TRACE_STACK,
>>> TRACE_PRINT,
>>> + TRACE_BPRINTK,
>>> TRACE_SPECIAL,
>>> TRACE_MMIO_RW,
>>> TRACE_MMIO_MAP,
>>> @@ -120,6 +121,16 @@ struct print_entry {
>>> char buf[];
>>> };
>>>
>>> +struct bprintk_entry {
>>> + struct trace_entry ent;
>>> + unsigned long ip;
>>> + const char *fmt;
>>> + u32 buf[];
>>> +};
>>> +#ifdef CONFIG_TRACE_BPRINTK
>>> +extern int trace_bprintk_enable;
>>> +#endif
>>> +
>>> #define TRACE_OLD_SIZE 88
>>>
>>> struct trace_field_cont {
>>> @@ -262,6 +273,7 @@ extern void __ftrace_bad_type(void);
>>> IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
>>> IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
>>> IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
>>> + IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\
>>> IF_ASSIGN(var, ent, struct special_entry, 0); \
>>> IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
>>> TRACE_MMIO_RW); \
>>> diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c
>>> new file mode 100644
>>> index 0000000..1f8e532
>>> --- /dev/null
>>> +++ b/kernel/trace/trace_bprintk.c
>>> @@ -0,0 +1,87 @@
>>> +/*
>>> + * trace binary printk
>>> + *
>>> + * Copyright (C) 2008 Lai Jiangshan <[email protected]>
>>> + *
>>> + */
>>> +#include <linux/kernel.h>
>>> +#include <linux/ftrace.h>
>>> +#include <linux/string.h>
>>> +#include <linux/ctype.h>
>>> +#include <linux/list.h>
>>> +#include <linux/mutex.h>
>>> +#include <linux/slab.h>
>>> +#include <linux/module.h>
>>> +#include <linux/seq_file.h>
>>> +#include <linux/fs.h>
>>> +#include <linux/marker.h>
>>> +#include <linux/uaccess.h>
>>> +
>>> +#include "trace.h"
>>> +
>>> +/* binary printk basic */
>>> +static DEFINE_MUTEX(btrace_mutex);
>>
>> Looks like you only need a mutex to protect a counter.
>> Wouldn't it be better to have this counter atomic and then drop
>> out the mutex?
>>
>>> +static int btrace_metadata_count;
>>> +
>>> +static inline void lock_btrace(void)
>>> +{
>>> + mutex_lock(&btrace_mutex);
>>> +}
>>> +
>>> +static inline void unlock_btrace(void)
>>> +{
>>> + mutex_unlock(&btrace_mutex);
>>> +}
>>> +
>>> +static void get_btrace_metadata(void)
>>> +{
>>> + lock_btrace();
>>> + btrace_metadata_count++;
>>> + unlock_btrace();
>>> +}
>>> +
>>> +static void put_btrace_metadata(void)
>>> +{
>>> + lock_btrace();
>>> + btrace_metadata_count--;
>>> + unlock_btrace();
>>> +}
>>> +
>>> +/* events tracer */
>>> +int trace_bprintk_enable;
>>> +
>>> +static void start_bprintk_trace(struct trace_array *tr)
>>> +{
>>> + get_btrace_metadata();
>>> + tracing_reset_online_cpus(tr);
>>> + trace_bprintk_enable = 1;
>>> +}
>>> +
>>> +static void stop_bprintk_trace(struct trace_array *tr)
>>> +{
>>> + trace_bprintk_enable = 0;
>>> + tracing_reset_online_cpus(tr);
>>> + put_btrace_metadata();
>>> +}
>>> +
>>> +static int init_bprintk_trace(struct trace_array *tr)
>>> +{
>>> + start_bprintk_trace(tr);
>>> + return 0;
>>> +}
>>> +
>>> +static struct tracer bprintk_trace __read_mostly =
>>> +{
>>> + .name = "events",
>>> + .init = init_bprintk_trace,
>>> + .reset = stop_bprintk_trace,
>>> + .start = start_bprintk_trace,
>>> + .stop = stop_bprintk_trace,
>>> +};
>>> +
>>> +static __init int init_bprintk(void)
>>> +{
>>> + return register_tracer(&bprintk_trace);
>>> +}
>>> +
>>> +device_initcall(init_bprintk);
>>> diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
>>> index df0c25c..ccc9fa7 100644
>>> --- a/kernel/trace/trace_output.c
>>> +++ b/kernel/trace/trace_output.c
>>> @@ -53,6 +53,26 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
>>> return len;
>>> }
>>>
>>> +static int
>>> +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
>>> +{
>>> + int len = (PAGE_SIZE - 1) - s->len;
>>> + int ret;
>>> +
>>> + if (!len)
>>> + return 0;
>>> +
>>> + ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
>>> +
>>> + /* If we can't write it all, don't bother writing anything */
>>> + if (ret >= len)
>>> + return 0;
>>> +
>>> + s->len += ret;
>>> +
>>> + return len;
>>> +}
>>> +
>>> /**
>>> * trace_seq_puts - trace sequence printing of simple string
>>> * @s: trace sequence descriptor
>>> @@ -800,6 +820,57 @@ static struct trace_event trace_print_event = {
>>> .binary = trace_nop_print,
>>> };
>>>
>>> +/* TRACE_BPRINTK */
>>> +static int
>>> +trace_bprintk_print(struct trace_seq *s, struct trace_entry *entry, int flags)
>>> +{
>>> + struct bprintk_entry *field;
>>> +
>>> + trace_assign_type(field, entry);
>>> +
>>> + if (!seq_print_ip_sym(s, field->ip, flags))
>>> + goto partial;
>>> +
>>> + if (!trace_seq_puts(s, ": "))
>>> + goto partial;
>>> +
>>> + if (!trace_seq_bprintf(s, field->fmt, field->buf))
>>> + goto partial;
>>> +
>>> + return 0;
>>> +
>>> + partial:
>>> + return TRACE_TYPE_PARTIAL_LINE;
>>> +}
>>> +
>>> +static int
>>> +trace_bprintk_raw(struct trace_seq *s, struct trace_entry *entry, int flags)
>>> +{
>>> + struct bprintk_entry *field;
>>> +
>>> + trace_assign_type(field, entry);
>>> +
>>> + if (!trace_seq_printf(s, ": %lx : ", field->ip))
>>> + goto partial;
>>> +
>>> + if (!trace_seq_bprintf(s, field->fmt, field->buf))
>>> + goto partial;
>>> +
>>> + return 0;
>>> +
>>> + partial:
>>> + return TRACE_TYPE_PARTIAL_LINE;
>>> +}
>>> +
>>> +static struct trace_event trace_bprintk_event = {
>>> + .type = TRACE_BPRINTK,
>>> + .trace = trace_bprintk_print,
>>> + .latency_trace = trace_bprintk_print,
>>> + .raw = trace_bprintk_raw,
>>> + .hex = trace_nop_print,
>>> + .binary = trace_nop_print,
>>> +};
>>> +
>>> static struct trace_event *events[] __initdata = {
>>> &trace_fn_event,
>>> &trace_ctx_event,
>>> @@ -808,6 +879,7 @@ static struct trace_event *events[] __initdata = {
>>> &trace_stack_event,
>>> &trace_user_stack_event,
>>> &trace_print_event,
>>> + &trace_bprintk_event,
>>> NULL
>>> };
>>>
>>
>>
>> Just for curiosity. Why do you need such a binary tracing?
>> Do you need it because a string output is really too slow for your needs?
>
> warning: I haven't looked at the patch details
>
> But I would love to use something like this to provide the exact
> contents the userspace blktrace utilities want.
>
> - Arnaldo
>


Hi Arnaldo,

Since you talked about binary record for the blk tracer, I just recall
this patch.
Are you sure this infrastructure would cover your needs?

>From your traced site, trace_vbprintk will carry your random typed
datas in a binary-contiguous way.
But actually, its purpose is more about holding binary data transport
to finally print them in a formatted string
output, as usual.

You can compare it with ftrace_printk, since the result is the same.
The difference is in the transport.
ftrace_printk will carry your data as already formatted whereas
trace_vbprintk will carry them as binary values and format them
in the last moment. On most cases, trace_vbprintk would be logically
more lightweight.


But if you have some predefined typed data to export as binary, I
think that can't really help you.
You would prefer to define you own type and then export them as binary
values with your own output helper function.

Hmm?

Steven, Ingo, what are your thoughts about this patch?

2009-01-19 20:25:48

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH 2/5] ftrace: infrastructure for supporting binary record

Em Mon, Jan 19, 2009 at 08:28:03PM +0100, Fr?d?ric Weisbecker escreveu:
> 2009/1/2 Arnaldo Carvalho de Melo <[email protected]>:
> >
> > warning: I haven't looked at the patch details
> >
> > But I would love to use something like this to provide the exact
> > contents the userspace blktrace utilities want.
> >
> > - Arnaldo
> >
>
>
> Hi Arnaldo,

> Since you talked about binary record for the blk tracer, I just recall
> this patch. Are you sure this infrastructure would cover your needs?

Nope, now that I look at it it definetely isn't what I need. See? the
warning was valid after all :-)

What I want and will experiment now is to almost dump the contents of
the ring buffer as-is to userspace. I want that so that I can use
blkparse to validate the ring buffer + blkFtrace routines produced
buffer.

So probably it will be a matter of using trace_iter to signal that, and
when it gets to my print_line routine I just put together the initial
trace format expected by blkparse + the ones I'm collecting at
tracepoint time.

> >From your traced site, trace_vbprintk will carry your random typed
> datas in a binary-contiguous way.
> But actually, its purpose is more about holding binary data transport
> to finally print them in a formatted string
> output, as usual.
>
> You can compare it with ftrace_printk, since the result is the same.
> The difference is in the transport.
> ftrace_printk will carry your data as already formatted whereas
> trace_vbprintk will carry them as binary values and format them
> in the last moment. On most cases, trace_vbprintk would be logically
> more lightweight.

Understood, but I already do this, and then leave it to the last minute
to look at the blktrace types:

+static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
+{
+ struct trace_seq *s = &iter->seq;
+ struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
+ const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
+ int ret;
+
+ switch (what) {
+ case __BLK_TA_ISSUE: ret = blk_log_generic(iter, t, "D"); break;
+ case __BLK_TA_INSERT: ret = blk_log_generic(iter, t, "I"); break;
+ case __BLK_TA_QUEUE: ret = blk_log_generic(iter, t, "Q"); break;
+ case __BLK_TA_BOUNCE: ret = blk_log_generic(iter, t, "B"); break;
+ case __BLK_TA_BACKMERGE: ret = blk_log_generic(iter, t, "M"); break;
+ case __BLK_TA_FRONTMERGE: ret = blk_log_generic(iter, t, "F"); break;
+ case __BLK_TA_GETRQ: ret = blk_log_generic(iter, t, "G"); break;
+ case __BLK_TA_SLEEPRQ: ret = blk_log_generic(iter, t, "S"); break;
+ case __BLK_TA_REQUEUE: ret = blk_log_with_err(iter, t, "R"); break;
+ case __BLK_TA_COMPLETE: ret = blk_log_with_err(iter, t, "C"); break;
+ case __BLK_TA_PLUG: ret = blk_log_plug(iter, t); break;
+ case __BLK_TA_UNPLUG_IO: ret = blk_log_unplug(iter, t, "U"); break;
+ case __BLK_TA_UNPLUG_TIMER: ret = blk_log_unplug(iter, t, "UT"); break;
+ case __BLK_TA_REMAP: ret = blk_log_remap(iter, t); break;
+ case __BLK_TA_SPLIT: ret = blk_log_split(iter, t, "X"); break;

I did this trying to get as close to the existing blktrace record format
as possible, to minimize the current patch.

I guess that the right thing to do is to create a new blktrace record
format, that is:

struct blk_io_trace {
struct trace_entry ent;
<what is not in struct trace_entry already>
}

And rebuild the userspace blkparse utility.

But I'm still trying to figure out how to best use the ever-improving
ftrace infrastructure.

For instance, now I think that I need to register one event for each of
the case __BLK_TA_ lines above, like kernel/trace/trace_branch.c already
does.

If I do that I can also provide a trace_event->bin that would use
trace_seq_putmem.

> But if you have some predefined typed data to export as binary, I
> think that can't really help you.
> You would prefer to define you own type and then export them as binary
> values with your own output helper function.

Agreed

> Hmm?
>
> Steven, Ingo, what are your thoughts about this patch?
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

2009-01-19 21:08:58

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH 2/5] ftrace: infrastructure for supporting binary record

On Mon, Jan 19, 2009 at 06:25:23PM -0200, Arnaldo Carvalho de Melo wrote:
> Em Mon, Jan 19, 2009 at 08:28:03PM +0100, Fr?d?ric Weisbecker escreveu:
> > 2009/1/2 Arnaldo Carvalho de Melo <[email protected]>:
> > >
> > > warning: I haven't looked at the patch details
> > >
> > > But I would love to use something like this to provide the exact
> > > contents the userspace blktrace utilities want.
> > >
> > > - Arnaldo
> > >
> >
> >
> > Hi Arnaldo,
>
> > Since you talked about binary record for the blk tracer, I just recall
> > this patch. Are you sure this infrastructure would cover your needs?
>
> Nope, now that I look at it it definetely isn't what I need. See? the
> warning was valid after all :-)
>
> What I want and will experiment now is to almost dump the contents of
> the ring buffer as-is to userspace. I want that so that I can use
> blkparse to validate the ring buffer + blkFtrace routines produced
> buffer.
>
> So probably it will be a matter of using trace_iter to signal that, and
> when it gets to my print_line routine I just put together the initial
> trace format expected by blkparse + the ones I'm collecting at
> tracepoint time.


So you would like two different trace files? Or print both bin and formatted
output together in the same file?
I'm not sure I understand :-s


> > >From your traced site, trace_vbprintk will carry your random typed
> > datas in a binary-contiguous way.
> > But actually, its purpose is more about holding binary data transport
> > to finally print them in a formatted string
> > output, as usual.
> >
> > You can compare it with ftrace_printk, since the result is the same.
> > The difference is in the transport.
> > ftrace_printk will carry your data as already formatted whereas
> > trace_vbprintk will carry them as binary values and format them
> > in the last moment. On most cases, trace_vbprintk would be logically
> > more lightweight.
>
> Understood, but I already do this, and then leave it to the last minute
> to look at the blktrace types:
>
> +static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
> +{
> + struct trace_seq *s = &iter->seq;
> + struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
> + const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
> + int ret;
> +
> + switch (what) {
> + case __BLK_TA_ISSUE: ret = blk_log_generic(iter, t, "D"); break;
> + case __BLK_TA_INSERT: ret = blk_log_generic(iter, t, "I"); break;
> + case __BLK_TA_QUEUE: ret = blk_log_generic(iter, t, "Q"); break;
> + case __BLK_TA_BOUNCE: ret = blk_log_generic(iter, t, "B"); break;
> + case __BLK_TA_BACKMERGE: ret = blk_log_generic(iter, t, "M"); break;
> + case __BLK_TA_FRONTMERGE: ret = blk_log_generic(iter, t, "F"); break;
> + case __BLK_TA_GETRQ: ret = blk_log_generic(iter, t, "G"); break;
> + case __BLK_TA_SLEEPRQ: ret = blk_log_generic(iter, t, "S"); break;
> + case __BLK_TA_REQUEUE: ret = blk_log_with_err(iter, t, "R"); break;
> + case __BLK_TA_COMPLETE: ret = blk_log_with_err(iter, t, "C"); break;
> + case __BLK_TA_PLUG: ret = blk_log_plug(iter, t); break;
> + case __BLK_TA_UNPLUG_IO: ret = blk_log_unplug(iter, t, "U"); break;
> + case __BLK_TA_UNPLUG_TIMER: ret = blk_log_unplug(iter, t, "UT"); break;
> + case __BLK_TA_REMAP: ret = blk_log_remap(iter, t); break;
> + case __BLK_TA_SPLIT: ret = blk_log_split(iter, t, "X"); break;
>
> I did this trying to get as close to the existing blktrace record format
> as possible, to minimize the current patch.

Ok.

> I guess that the right thing to do is to create a new blktrace record
> format, that is:
>
> struct blk_io_trace {
> struct trace_entry ent;
> <what is not in struct trace_entry already>
> }
>
> And rebuild the userspace blkparse utility.
>
> But I'm still trying to figure out how to best use the ever-improving
> ftrace infrastructure.
>
> For instance, now I think that I need to register one event for each of
> the case __BLK_TA_ lines above, like kernel/trace/trace_branch.c already
> does.


Right, but I fear it would make the event types list in trace.h very large.
But, actually, it is expected to host as much tracer as needed... So...

Probably it can be done that way, or one other approach would be to implement
a way to have subtypes events for a tracer.
This way, one tracer can dynamically register as much private types as needed with their
events callbacks without wondering about making trace.h too fat....

Steven, what would you prefer?

> If I do that I can also provide a trace_event->bin that would use
> trace_seq_putmem.


Yes, I discovered it recently, so all is actually ready for binary
output :-)


> > But if you have some predefined typed data to export as binary, I
> > think that can't really help you.
> > You would prefer to define you own type and then export them as binary
> > values with your own output helper function.
>
> Agreed
>
> > Hmm?
> >
> > Steven, Ingo, what are your thoughts about this patch?
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to [email protected]
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at http://www.tux.org/lkml/

2009-01-19 21:37:50

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: [PATCH 2/5] ftrace: infrastructure for supporting binary record

Em Mon, Jan 19, 2009 at 10:08:40PM +0100, Frederic Weisbecker escreveu:
> On Mon, Jan 19, 2009 at 06:25:23PM -0200, Arnaldo Carvalho de Melo wrote:
> > Em Mon, Jan 19, 2009 at 08:28:03PM +0100, Fr?d?ric Weisbecker escreveu:
> > > 2009/1/2 Arnaldo Carvalho de Melo <[email protected]>:
> > > >
> > > > warning: I haven't looked at the patch details
> > > >
> > > > But I would love to use something like this to provide the exact
> > > > contents the userspace blktrace utilities want.
> > > >
> > > > - Arnaldo
> > > >
> > >
> > >
> > > Hi Arnaldo,
> >
> > > Since you talked about binary record for the blk tracer, I just recall
> > > this patch. Are you sure this infrastructure would cover your needs?
> >
> > Nope, now that I look at it it definetely isn't what I need. See? the
> > warning was valid after all :-)
> >
> > What I want and will experiment now is to almost dump the contents of
> > the ring buffer as-is to userspace. I want that so that I can use
> > blkparse to validate the ring buffer + blkFtrace routines produced
> > buffer.
> >
> > So probably it will be a matter of using trace_iter to signal that, and
> > when it gets to my print_line routine I just put together the initial
> > trace format expected by blkparse + the ones I'm collecting at
> > tracepoint time.
>
>
> So you would like two different trace files? Or print both bin and formatted
> output together in the same file?
> I'm not sure I understand :-s

output depends on iter_flags, most of what is needed is there, but I
guess there are too many ways to achieve the same result and some
keywords that are already used for other purposes, such as
TRACE_ITER_BIN -> print_bin_fmt that forces all traces to first have
pid, cpu and timestamp (all using trace_seq_putmem), then whatever the
tracer wants to put after that (if it registered a tracer_event _and_ it
has a ->binary() emitter), when I wanted it to just call ->binary(),
where I would emulate exactly the old blktrace format, then we would be
able to just ask for binary traces, collect them thru
/d/tracing/trace_pipe and pipe them into blkparse for
validation/debugging the ring_buffer + ftrace + blkFtrace code.

I'll try to get a brain dump in the form of working code later
today/early tomorrow, now I'm being preempted by my wife to do those
social things like having dinner and going to the movie theater 8)

Regards,

- Arnaldo

2009-01-19 23:30:25

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH 2/5] ftrace: infrastructure for supporting binary record

On Mon, Jan 19, 2009 at 07:37:21PM -0200, Arnaldo Carvalho de Melo wrote:
> Em Mon, Jan 19, 2009 at 10:08:40PM +0100, Frederic Weisbecker escreveu:
> > On Mon, Jan 19, 2009 at 06:25:23PM -0200, Arnaldo Carvalho de Melo wrote:
> > > Em Mon, Jan 19, 2009 at 08:28:03PM +0100, Fr?d?ric Weisbecker escreveu:
> > > > 2009/1/2 Arnaldo Carvalho de Melo <[email protected]>:
> > > > >
> > > > > warning: I haven't looked at the patch details
> > > > >
> > > > > But I would love to use something like this to provide the exact
> > > > > contents the userspace blktrace utilities want.
> > > > >
> > > > > - Arnaldo
> > > > >
> > > >
> > > >
> > > > Hi Arnaldo,
> > >
> > > > Since you talked about binary record for the blk tracer, I just recall
> > > > this patch. Are you sure this infrastructure would cover your needs?
> > >
> > > Nope, now that I look at it it definetely isn't what I need. See? the
> > > warning was valid after all :-)
> > >
> > > What I want and will experiment now is to almost dump the contents of
> > > the ring buffer as-is to userspace. I want that so that I can use
> > > blkparse to validate the ring buffer + blkFtrace routines produced
> > > buffer.
> > >
> > > So probably it will be a matter of using trace_iter to signal that, and
> > > when it gets to my print_line routine I just put together the initial
> > > trace format expected by blkparse + the ones I'm collecting at
> > > tracepoint time.
> >
> >
> > So you would like two different trace files? Or print both bin and formatted
> > output together in the same file?
> > I'm not sure I understand :-s
>
> output depends on iter_flags, most of what is needed is there, but I
> guess there are too many ways to achieve the same result and some
> keywords that are already used for other purposes, such as
> TRACE_ITER_BIN -> print_bin_fmt that forces all traces to first have
> pid, cpu and timestamp (all using trace_seq_putmem), then whatever the
> tracer wants to put after that (if it registered a tracer_event _and_ it
> has a ->binary() emitter), when I wanted it to just call ->binary(),
> where I would emulate exactly the old blktrace format, then we would be
> able to just ask for binary traces, collect them thru
> /d/tracing/trace_pipe and pipe them into blkparse for
> validation/debugging the ring_buffer + ftrace + blkFtrace code.


Oh I see. I think it just needs a new flag in struct trace_event to tell
if we want the headers or not.
And then check this flag in trace.c to decide if we print the cpu/time/pid
or let the tracer do all its job.

I will submit a patch in the next days to bring it.


> I'll try to get a brain dump in the form of working code later
> today/early tomorrow, now I'm being preempted by my wife to do those
> social things like having dinner and going to the movie theater 8)


Heh, have a good evening! :-)


> Regards,
>
> - Arnaldo

2009-03-02 16:28:11

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 2/5] ftrace: infrastructure for supporting binary record



On Sat, 28 Feb 2009, Frederic Weisbecker wrote:

> From: Lai Jiangshan <[email protected]>
>
> Impact: save on memory for tracing
>
> Current tracers are typically using a struct(like struct ftrace_entry,
> struct ctx_switch_entry, struct special_entr etc...)to record a binary
> event. These structs can only record a their own kind of events.
> A new kind of tracer need a new struct and a lot of code too handle it.
>
> So we need a generic binary record for events. This infrastructure
> is for this purpose.
>
> [[email protected]: rebase against latest -tip]
>
> Signed-off-by: Lai Jiangshan <[email protected]>
> Signed-off-by: Frederic Weisbecker <[email protected]>
> ---
> include/linux/ftrace.h | 3 +
> kernel/trace/Kconfig | 6 +++
> kernel/trace/Makefile | 1 +
> kernel/trace/trace.c | 55 ++++++++++++++++++++++++++
> kernel/trace/trace.h | 12 ++++++
> kernel/trace/trace_bprintk.c | 87 ++++++++++++++++++++++++++++++++++++++++++
> kernel/trace/trace_output.c | 76 ++++++++++++++++++++++++++++++++++++
> 7 files changed, 240 insertions(+), 0 deletions(-)
>
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 847bb3c..8ac2fac 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -222,6 +222,9 @@ extern int ftrace_make_nop(struct module *mod,
> */
> extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
>
> +#ifdef CONFIG_TRACE_BPRINTK
> +extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
> +#endif
>
> /* May be defined in arch */
> extern int ftrace_arch_read_dyn_info(char *buf, int size);
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 02bc5d6..2e84fc7 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -86,6 +86,12 @@ config FUNCTION_GRAPH_TRACER
> This is done by setting the current return address on the current
> task structure into a stack of calls.
>
> +config TRACE_BPRINTK
> + bool "Binary printk for tracing"
> + default y
> + depends on TRACING
> + select BINARY_PRINTF
> +
> config IRQSOFF_TRACER
> bool "Interrupts-off Latency Tracer"
> default n
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index c931fe0..dba6157 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -22,6 +22,7 @@ obj-$(CONFIG_TRACING) += trace.o
> obj-$(CONFIG_TRACING) += trace_clock.o
> obj-$(CONFIG_TRACING) += trace_output.o
> obj-$(CONFIG_TRACING) += trace_stat.o
> +obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o
> obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
> obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
> obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index c5e39cd..3a3ae19 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -3572,6 +3572,61 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
> }
> EXPORT_SYMBOL_GPL(__ftrace_vprintk);
>
> +/**
> + * trace_vbprintk - write binary msg to tracing buffer
> + *
> + * Caller must insure @fmt are valid when msg is in tracing buffer.
> + */
> +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
> +{
> + static DEFINE_SPINLOCK(trace_buf_lock);
> + static u32 trace_buf[TRACE_BUF_SIZE];
> +
> + struct ring_buffer_event *event;
> + struct trace_array *tr = &global_trace;
> + struct trace_array_cpu *data;
> + struct bprintk_entry *entry;
> + unsigned long flags;
> + int cpu, len = 0, size, pc;
> +
> + if (tracing_disabled || !trace_bprintk_enable)
> + return 0;
> +

If this is called in the scheduler (we might want a ftrace_printk there),
then the preempt_enable might cause a recursive crash.

If a ftrace_printk is in schedule() but before we clear NEED_RESCHED, the
preempt_enable below will cause schedule to be called again. Then we would
again execute this trace and call schedule again, over and over till we
corrupt the stack and crash the system.

That is what ftrace_preempt_disable() is for.

It's defined in kernel/trace/trace.h:

The way you use it is:

int resched;
[...]

resched = ftrace_preempt_disable();
[...]

ftrace_preempt_enable(resched);

This will safely disable preemption and enable it correctly.


> + pc = preempt_count();
> + preempt_disable_notrace();
> + cpu = raw_smp_processor_id();
> + data = tr->data[cpu];
> +
> + if (unlikely(atomic_read(&data->disabled)))
> + goto out;
> +
> + spin_lock_irqsave(&trace_buf_lock, flags);
> + len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
> +
> + if (len > TRACE_BUF_SIZE || len < 0)
> + goto out_unlock;
> +
> + size = sizeof(*entry) + sizeof(u32) * len;
> + event = trace_buffer_lock_reserve(tr, TRACE_BPRINTK, size, flags, pc);
> + if (!event)
> + goto out_unlock;
> + entry = ring_buffer_event_data(event);
> + entry->ip = ip;
> + entry->fmt = fmt;
> +
> + memcpy(entry->buf, trace_buf, sizeof(u32) * len);
> + ring_buffer_unlock_commit(tr->buffer, event);
> +
> +out_unlock:
> + spin_unlock_irqrestore(&trace_buf_lock, flags);
> +
> +out:
> + preempt_enable_notrace();
> +
> + return len;
> +}
> +EXPORT_SYMBOL_GPL(trace_vbprintk);
> +
> static int trace_panic_handler(struct notifier_block *this,
> unsigned long event, void *unused)
> {
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index f6fa0b9..92703b1 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -20,6 +20,7 @@ enum trace_type {
> TRACE_WAKE,
> TRACE_STACK,
> TRACE_PRINT,
> + TRACE_BPRINTK,
> TRACE_SPECIAL,
> TRACE_MMIO_RW,
> TRACE_MMIO_MAP,
> @@ -124,6 +125,16 @@ struct print_entry {
> char buf[];
> };
>
> +struct bprintk_entry {
> + struct trace_entry ent;
> + unsigned long ip;
> + const char *fmt;
> + u32 buf[];
> +};
> +#ifdef CONFIG_TRACE_BPRINTK
> +extern int trace_bprintk_enable;
> +#endif
> +
> #define TRACE_OLD_SIZE 88
>
> struct trace_field_cont {
> @@ -284,6 +295,7 @@ extern void __ftrace_bad_type(void);
> IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
> IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
> IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
> + IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\
> IF_ASSIGN(var, ent, struct special_entry, 0); \
> IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
> TRACE_MMIO_RW); \
> diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c
> new file mode 100644
> index 0000000..1f8e532
> --- /dev/null
> +++ b/kernel/trace/trace_bprintk.c
> @@ -0,0 +1,87 @@
> +/*
> + * trace binary printk
> + *
> + * Copyright (C) 2008 Lai Jiangshan <[email protected]>
> + *
> + */
> +#include <linux/kernel.h>
> +#include <linux/ftrace.h>
> +#include <linux/string.h>
> +#include <linux/ctype.h>
> +#include <linux/list.h>
> +#include <linux/mutex.h>
> +#include <linux/slab.h>
> +#include <linux/module.h>
> +#include <linux/seq_file.h>
> +#include <linux/fs.h>
> +#include <linux/marker.h>
> +#include <linux/uaccess.h>
> +
> +#include "trace.h"
> +
> +/* binary printk basic */
> +static DEFINE_MUTEX(btrace_mutex);
> +static int btrace_metadata_count;
> +
> +static inline void lock_btrace(void)
> +{
> + mutex_lock(&btrace_mutex);
> +}
> +
> +static inline void unlock_btrace(void)
> +{
> + mutex_unlock(&btrace_mutex);
> +}

Is there a reason to add these wrappers around the mutex?

-- Steve

> +
> +static void get_btrace_metadata(void)
> +{
> + lock_btrace();
> + btrace_metadata_count++;
> + unlock_btrace();
> +}
> +
> +static void put_btrace_metadata(void)
> +{
> + lock_btrace();
> + btrace_metadata_count--;
> + unlock_btrace();
> +}
> +
> +/* events tracer */
> +int trace_bprintk_enable;
> +
> +static void start_bprintk_trace(struct trace_array *tr)
> +{
> + get_btrace_metadata();
> + tracing_reset_online_cpus(tr);
> + trace_bprintk_enable = 1;
> +}
> +
> +static void stop_bprintk_trace(struct trace_array *tr)
> +{
> + trace_bprintk_enable = 0;
> + tracing_reset_online_cpus(tr);
> + put_btrace_metadata();
> +}
> +
> +static int init_bprintk_trace(struct trace_array *tr)
> +{
> + start_bprintk_trace(tr);
> + return 0;
> +}
> +
> +static struct tracer bprintk_trace __read_mostly =
> +{
> + .name = "events",
> + .init = init_bprintk_trace,
> + .reset = stop_bprintk_trace,
> + .start = start_bprintk_trace,
> + .stop = stop_bprintk_trace,
> +};
> +
> +static __init int init_bprintk(void)
> +{
> + return register_tracer(&bprintk_trace);
> +}
> +
> +device_initcall(init_bprintk);
> diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
> index 9fc8150..7897b5d 100644
> --- a/kernel/trace/trace_output.c
> +++ b/kernel/trace/trace_output.c
> @@ -53,6 +53,26 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
> return len;
> }
>
> +static int
> +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
> +{
> + int len = (PAGE_SIZE - 1) - s->len;
> + int ret;
> +
> + if (!len)
> + return 0;
> +
> + ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
> +
> + /* If we can't write it all, don't bother writing anything */
> + if (ret >= len)
> + return 0;
> +
> + s->len += ret;
> +
> + return len;
> +}
> +
> /**
> * trace_seq_puts - trace sequence printing of simple string
> * @s: trace sequence descriptor
> @@ -887,6 +907,61 @@ static struct trace_event trace_print_event = {
> .raw = trace_print_raw,
> };
>
> +/* TRACE_BPRINTK */
> +static enum print_line_t
> +trace_bprintk_print(struct trace_iterator *iter, int flags)
> +{
> + struct trace_entry *entry = iter->ent;
> + struct trace_seq *s = &iter->seq;
> + struct bprintk_entry *field;
> +
> + trace_assign_type(field, entry);
> +
> + if (!seq_print_ip_sym(s, field->ip, flags))
> + goto partial;
> +
> + if (!trace_seq_puts(s, ": "))
> + goto partial;
> +
> + if (!trace_seq_bprintf(s, field->fmt, field->buf))
> + goto partial;
> +
> + return TRACE_TYPE_HANDLED;
> +
> + partial:
> + return TRACE_TYPE_PARTIAL_LINE;
> +}
> +
> +static enum print_line_t
> +trace_bprintk_raw(struct trace_iterator *iter, int flags)
> +{
> + struct trace_entry *entry = iter->ent;
> + struct trace_seq *s = &iter->seq;
> + struct bprintk_entry *field;
> +
> + trace_assign_type(field, entry);
> +
> + if (!trace_seq_printf(s, ": %lx : ", field->ip))
> + goto partial;
> +
> + if (!trace_seq_bprintf(s, field->fmt, field->buf))
> + goto partial;
> +
> + return TRACE_TYPE_HANDLED;
> +
> + partial:
> + return TRACE_TYPE_PARTIAL_LINE;
> +}
> +
> +static struct trace_event trace_bprintk_event = {
> + .type = TRACE_BPRINTK,
> + .trace = trace_bprintk_print,
> + .latency_trace = trace_bprintk_print,
> + .raw = trace_bprintk_raw,
> + .hex = trace_nop_print,
> + .binary = trace_nop_print,
> +};
> +
> static struct trace_event *events[] __initdata = {
> &trace_fn_event,
> &trace_ctx_event,
> @@ -895,6 +970,7 @@ static struct trace_event *events[] __initdata = {
> &trace_stack_event,
> &trace_user_stack_event,
> &trace_print_event,
> + &trace_bprintk_event,
> NULL
> };
>
> --
> 1.6.1
>
>
>

2009-03-02 17:39:19

by Frederic Weisbecker

[permalink] [raw]
Subject: Re: [PATCH 2/5] ftrace: infrastructure for supporting binary record

2009/3/2 Steven Rostedt <[email protected]>:
>
>
> On Sat, 28 Feb 2009, Frederic Weisbecker wrote:
>
>> From: Lai Jiangshan <[email protected]>
>>
>> Impact: save on memory for tracing
>>
>> Current tracers are typically using a struct(like struct ftrace_entry,
>> struct ctx_switch_entry, struct special_entr etc...)to record a binary
>> event. These structs can only record a their own kind of events.
>> A new kind of tracer need a new struct and a lot of code too handle it.
>>
>> So we need a generic binary record for events. This infrastructure
>> is for this purpose.
>>
>> [[email protected]: rebase against latest -tip]
>>
>> Signed-off-by: Lai Jiangshan <[email protected]>
>> Signed-off-by: Frederic Weisbecker <[email protected]>
>> ---
>> ?include/linux/ftrace.h ? ? ? | ? ?3 +
>> ?kernel/trace/Kconfig ? ? ? ? | ? ?6 +++
>> ?kernel/trace/Makefile ? ? ? ?| ? ?1 +
>> ?kernel/trace/trace.c ? ? ? ? | ? 55 ++++++++++++++++++++++++++
>> ?kernel/trace/trace.h ? ? ? ? | ? 12 ++++++
>> ?kernel/trace/trace_bprintk.c | ? 87 ++++++++++++++++++++++++++++++++++++++++++
>> ?kernel/trace/trace_output.c ?| ? 76 ++++++++++++++++++++++++++++++++++++
>> ?7 files changed, 240 insertions(+), 0 deletions(-)
>>
>> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
>> index 847bb3c..8ac2fac 100644
>> --- a/include/linux/ftrace.h
>> +++ b/include/linux/ftrace.h
>> @@ -222,6 +222,9 @@ extern int ftrace_make_nop(struct module *mod,
>> ? */
>> ?extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
>>
>> +#ifdef CONFIG_TRACE_BPRINTK
>> +extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
>> +#endif
>>
>> ?/* May be defined in arch */
>> ?extern int ftrace_arch_read_dyn_info(char *buf, int size);
>> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
>> index 02bc5d6..2e84fc7 100644
>> --- a/kernel/trace/Kconfig
>> +++ b/kernel/trace/Kconfig
>> @@ -86,6 +86,12 @@ config FUNCTION_GRAPH_TRACER
>> ? ? ? ? This is done by setting the current return address on the current
>> ? ? ? ? task structure into a stack of calls.
>>
>> +config TRACE_BPRINTK
>> + ? ? bool "Binary printk for tracing"
>> + ? ? default y
>> + ? ? depends on TRACING
>> + ? ? select BINARY_PRINTF
>> +
>> ?config IRQSOFF_TRACER
>> ? ? ? bool "Interrupts-off Latency Tracer"
>> ? ? ? default n
>> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
>> index c931fe0..dba6157 100644
>> --- a/kernel/trace/Makefile
>> +++ b/kernel/trace/Makefile
>> @@ -22,6 +22,7 @@ obj-$(CONFIG_TRACING) += trace.o
>> ?obj-$(CONFIG_TRACING) += trace_clock.o
>> ?obj-$(CONFIG_TRACING) += trace_output.o
>> ?obj-$(CONFIG_TRACING) += trace_stat.o
>> +obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o
>> ?obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
>> ?obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
>> ?obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
>> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
>> index c5e39cd..3a3ae19 100644
>> --- a/kernel/trace/trace.c
>> +++ b/kernel/trace/trace.c
>> @@ -3572,6 +3572,61 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
>> ?}
>> ?EXPORT_SYMBOL_GPL(__ftrace_vprintk);
>>
>> +/**
>> + * trace_vbprintk - write binary msg to tracing buffer
>> + *
>> + * Caller must insure @fmt are valid when msg is in tracing buffer.
>> + */
>> +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
>> +{
>> + ? ? static DEFINE_SPINLOCK(trace_buf_lock);
>> + ? ? static u32 trace_buf[TRACE_BUF_SIZE];
>> +
>> + ? ? struct ring_buffer_event *event;
>> + ? ? struct trace_array *tr = &global_trace;
>> + ? ? struct trace_array_cpu *data;
>> + ? ? struct bprintk_entry *entry;
>> + ? ? unsigned long flags;
>> + ? ? int cpu, len = 0, size, pc;
>> +
>> + ? ? if (tracing_disabled || !trace_bprintk_enable)
>> + ? ? ? ? ? ? return 0;
>> +
>
> If this is called in the scheduler (we might want a ftrace_printk there),
> then the preempt_enable might cause a recursive crash.
>
> If a ftrace_printk is in schedule() but before we clear NEED_RESCHED, the
> preempt_enable below will cause schedule to be called again. Then we would
> again execute this trace and call schedule again, over and over till we
> corrupt the stack and crash the system.


Oh, right!


> That is what ftrace_preempt_disable() is for.
>
> It's defined in kernel/trace/trace.h:
>
> The way you use it is:
>
> ? ? ? ?int resched;
> ? ? ? ?[...]
>
> ? ? ? ?resched = ftrace_preempt_disable();
> ? ? ? ?[...]
>
> ? ? ? ?ftrace_preempt_enable(resched);
>
> This will safely disable preemption and enable it correctly.
>


Ok, will fix it.


>> + ? ? pc = preempt_count();
>> + ? ? preempt_disable_notrace();
>> + ? ? cpu = raw_smp_processor_id();
>> + ? ? data = tr->data[cpu];
>> +
>> + ? ? if (unlikely(atomic_read(&data->disabled)))
>> + ? ? ? ? ? ? goto out;
>> +
>> + ? ? spin_lock_irqsave(&trace_buf_lock, flags);
>> + ? ? len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
>> +
>> + ? ? if (len > TRACE_BUF_SIZE || len < 0)
>> + ? ? ? ? ? ? goto out_unlock;
>> +
>> + ? ? size = sizeof(*entry) + sizeof(u32) * len;
>> + ? ? event = trace_buffer_lock_reserve(tr, TRACE_BPRINTK, size, flags, pc);
>> + ? ? if (!event)
>> + ? ? ? ? ? ? goto out_unlock;
>> + ? ? entry = ring_buffer_event_data(event);
>> + ? ? entry->ip ? ? ? ? ? ? ? ? ? ? ? = ip;
>> + ? ? entry->fmt ? ? ? ? ? ? ? ? ? ? ?= fmt;
>> +
>> + ? ? memcpy(entry->buf, trace_buf, sizeof(u32) * len);
>> + ? ? ring_buffer_unlock_commit(tr->buffer, event);
>> +
>> +out_unlock:
>> + ? ? spin_unlock_irqrestore(&trace_buf_lock, flags);
>> +
>> +out:
>> + ? ? preempt_enable_notrace();
>> +
>> + ? ? return len;
>> +}
>> +EXPORT_SYMBOL_GPL(trace_vbprintk);
>> +
>> ?static int trace_panic_handler(struct notifier_block *this,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?unsigned long event, void *unused)
>> ?{
>> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
>> index f6fa0b9..92703b1 100644
>> --- a/kernel/trace/trace.h
>> +++ b/kernel/trace/trace.h
>> @@ -20,6 +20,7 @@ enum trace_type {
>> ? ? ? TRACE_WAKE,
>> ? ? ? TRACE_STACK,
>> ? ? ? TRACE_PRINT,
>> + ? ? TRACE_BPRINTK,
>> ? ? ? TRACE_SPECIAL,
>> ? ? ? TRACE_MMIO_RW,
>> ? ? ? TRACE_MMIO_MAP,
>> @@ -124,6 +125,16 @@ struct print_entry {
>> ? ? ? char ? ? ? ? ? ? ? ? ? ?buf[];
>> ?};
>>
>> +struct bprintk_entry {
>> + ? ? struct trace_entry ent;
>> + ? ? unsigned long ip;
>> + ? ? const char *fmt;
>> + ? ? u32 buf[];
>> +};
>> +#ifdef CONFIG_TRACE_BPRINTK
>> +extern int trace_bprintk_enable;
>> +#endif
>> +
>> ?#define TRACE_OLD_SIZE ? ? ? ? ? ? ? 88
>>
>> ?struct trace_field_cont {
>> @@ -284,6 +295,7 @@ extern void __ftrace_bad_type(void);
>> ? ? ? ? ? ? ? IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); ? \
>> ? ? ? ? ? ? ? IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
>> ? ? ? ? ? ? ? IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); ? \
>> + ? ? ? ? ? ? IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\
>> ? ? ? ? ? ? ? IF_ASSIGN(var, ent, struct special_entry, 0); ? ? ? ? ? \
>> ? ? ? ? ? ? ? IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, ? ? ? ? ?\
>> ? ? ? ? ? ? ? ? ? ? ? ? TRACE_MMIO_RW); ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? \
>> diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c
>> new file mode 100644
>> index 0000000..1f8e532
>> --- /dev/null
>> +++ b/kernel/trace/trace_bprintk.c
>> @@ -0,0 +1,87 @@
>> +/*
>> + * trace binary printk
>> + *
>> + * Copyright (C) 2008 Lai Jiangshan <[email protected]>
>> + *
>> + */
>> +#include <linux/kernel.h>
>> +#include <linux/ftrace.h>
>> +#include <linux/string.h>
>> +#include <linux/ctype.h>
>> +#include <linux/list.h>
>> +#include <linux/mutex.h>
>> +#include <linux/slab.h>
>> +#include <linux/module.h>
>> +#include <linux/seq_file.h>
>> +#include <linux/fs.h>
>> +#include <linux/marker.h>
>> +#include <linux/uaccess.h>
>> +
>> +#include "trace.h"
>> +
>> +/* binary printk basic */
>> +static DEFINE_MUTEX(btrace_mutex);
>> +static int btrace_metadata_count;
>> +
>> +static inline void lock_btrace(void)
>> +{
>> + ? ? mutex_lock(&btrace_mutex);
>> +}
>> +
>> +static inline void unlock_btrace(void)
>> +{
>> + ? ? mutex_unlock(&btrace_mutex);
>> +}
>
> Is there a reason to add these wrappers around the mutex?
>
> -- Steve

Removed on the end of the serie :-)

Thanks!

>
>> +
>> +static void get_btrace_metadata(void)
>> +{
>> + ? ? lock_btrace();
>> + ? ? btrace_metadata_count++;
>> + ? ? unlock_btrace();
>> +}
>> +
>> +static void put_btrace_metadata(void)
>> +{
>> + ? ? lock_btrace();
>> + ? ? btrace_metadata_count--;
>> + ? ? unlock_btrace();
>> +}
>> +
>> +/* events tracer */
>> +int trace_bprintk_enable;
>> +
>> +static void start_bprintk_trace(struct trace_array *tr)
>> +{
>> + ? ? get_btrace_metadata();
>> + ? ? tracing_reset_online_cpus(tr);
>> + ? ? trace_bprintk_enable = 1;
>> +}
>> +
>> +static void stop_bprintk_trace(struct trace_array *tr)
>> +{
>> + ? ? trace_bprintk_enable = 0;
>> + ? ? tracing_reset_online_cpus(tr);
>> + ? ? put_btrace_metadata();
>> +}
>> +
>> +static int init_bprintk_trace(struct trace_array *tr)
>> +{
>> + ? ? start_bprintk_trace(tr);
>> + ? ? return 0;
>> +}
>> +
>> +static struct tracer bprintk_trace __read_mostly =
>> +{
>> + ? ? .name ? ? ? ?= "events",
>> + ? ? .init ? ? ? ?= init_bprintk_trace,
>> + ? ? .reset ? ? ? = stop_bprintk_trace,
>> + ? ? .start ? ? ? = start_bprintk_trace,
>> + ? ? .stop ? ? ? ?= stop_bprintk_trace,
>> +};
>> +
>> +static __init int init_bprintk(void)
>> +{
>> + ? ? return register_tracer(&bprintk_trace);
>> +}
>> +
>> +device_initcall(init_bprintk);
>> diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
>> index 9fc8150..7897b5d 100644
>> --- a/kernel/trace/trace_output.c
>> +++ b/kernel/trace/trace_output.c
>> @@ -53,6 +53,26 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
>> ? ? ? return len;
>> ?}
>>
>> +static int
>> +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
>> +{
>> + ? ? int len = (PAGE_SIZE - 1) - s->len;
>> + ? ? int ret;
>> +
>> + ? ? if (!len)
>> + ? ? ? ? ? ? return 0;
>> +
>> + ? ? ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
>> +
>> + ? ? /* If we can't write it all, don't bother writing anything */
>> + ? ? if (ret >= len)
>> + ? ? ? ? ? ? return 0;
>> +
>> + ? ? s->len += ret;
>> +
>> + ? ? return len;
>> +}
>> +
>> ?/**
>> ? * trace_seq_puts - trace sequence printing of simple string
>> ? * @s: trace sequence descriptor
>> @@ -887,6 +907,61 @@ static struct trace_event trace_print_event = {
>> ? ? ? .raw ? ? ? ? ? ?= trace_print_raw,
>> ?};
>>
>> +/* TRACE_BPRINTK */
>> +static enum print_line_t
>> +trace_bprintk_print(struct trace_iterator *iter, int flags)
>> +{
>> + ? ? struct trace_entry *entry = iter->ent;
>> + ? ? struct trace_seq *s = &iter->seq;
>> + ? ? struct bprintk_entry *field;
>> +
>> + ? ? trace_assign_type(field, entry);
>> +
>> + ? ? if (!seq_print_ip_sym(s, field->ip, flags))
>> + ? ? ? ? ? ? goto partial;
>> +
>> + ? ? if (!trace_seq_puts(s, ": "))
>> + ? ? ? ? ? ? goto partial;
>> +
>> + ? ? if (!trace_seq_bprintf(s, field->fmt, field->buf))
>> + ? ? ? ? ? ? goto partial;
>> +
>> + ? ? return TRACE_TYPE_HANDLED;
>> +
>> + partial:
>> + ? ? return TRACE_TYPE_PARTIAL_LINE;
>> +}
>> +
>> +static enum print_line_t
>> +trace_bprintk_raw(struct trace_iterator *iter, int flags)
>> +{
>> + ? ? struct trace_entry *entry = iter->ent;
>> + ? ? struct trace_seq *s = &iter->seq;
>> + ? ? struct bprintk_entry *field;
>> +
>> + ? ? trace_assign_type(field, entry);
>> +
>> + ? ? if (!trace_seq_printf(s, ": %lx : ", field->ip))
>> + ? ? ? ? ? ? goto partial;
>> +
>> + ? ? if (!trace_seq_bprintf(s, field->fmt, field->buf))
>> + ? ? ? ? ? ? goto partial;
>> +
>> + ? ? return TRACE_TYPE_HANDLED;
>> +
>> + partial:
>> + ? ? return TRACE_TYPE_PARTIAL_LINE;
>> +}
>> +
>> +static struct trace_event trace_bprintk_event = {
>> + ? ? .type ? ? ? ? ? = TRACE_BPRINTK,
>> + ? ? .trace ? ? ? ? ?= trace_bprintk_print,
>> + ? ? .latency_trace ?= trace_bprintk_print,
>> + ? ? .raw ? ? ? ? ? ?= trace_bprintk_raw,
>> + ? ? .hex ? ? ? ? ? ?= trace_nop_print,
>> + ? ? .binary ? ? ? ? = trace_nop_print,
>> +};
>> +
>> ?static struct trace_event *events[] __initdata = {
>> ? ? ? &trace_fn_event,
>> ? ? ? &trace_ctx_event,
>> @@ -895,6 +970,7 @@ static struct trace_event *events[] __initdata = {
>> ? ? ? &trace_stack_event,
>> ? ? ? &trace_user_stack_event,
>> ? ? ? &trace_print_event,
>> + ? ? &trace_bprintk_event,
>> ? ? ? NULL
>> ?};
>>
>> --
>> 1.6.1
>>
>>
>>
>