2010-04-26 20:04:18

by Steven Rostedt

[permalink] [raw]
Subject: [PATCH 04/10][RFC] tracing: Remove per event trace registering

From: Steven Rostedt <[email protected]>

This patch removes the register functions of TRACE_EVENT() to enable
and disable tracepoints. The registering of a event is now down
directly in the trace_events.c file. The tracepoint_probe_register()
is now called directly.

The prototypes are no longer type checked, but this should not be
an issue since the tracepoints are created automatically by the
macros. If a prototype is incorrect in the TRACE_EVENT() macro, then
other macros will catch it.

The trace_event_class structure now holds the probes to be called
by the callbacks. This removes needing to have each event have
a separate pointer for the probe.

To handle kprobes and syscalls, since they register probes in a
different manner, a "reg" field is added to the ftrace_event_class
structure. If the "reg" field is assigned, then it will be called for
enabling and disabling of the probe for either ftrace or perf. To let
the reg function know what is happening, a new enum (trace_reg) is
created that has the type of control that is needed.

With this new rework, the 82 kernel events and 616 syscall events
has their footprint dramatically lowered:

text data bss dec hex filename
5788186 1337252 9351592 16477030 fb6b66 vmlinux.orig
5792282 1333796 9351592 16477670 fb6de6 vmlinux.class
5793448 1333780 9351592 16478820 fb7264 vmlinux.tracepoint
5796926 1337748 9351592 16486266 fb8f7a vmlinux.data
5774316 1306580 9351592 16432488 fabd68 vmlinux.regs

The size went from 16477030 to 16432488, that's a total of 44K
in savings. With tracepoints being continuously added, this is
critical that the footprint becomes minimal.

Signed-off-by: Steven Rostedt <[email protected]>
---
include/linux/ftrace_event.h | 17 +++++--
include/linux/syscalls.h | 29 ++---------
include/linux/tracepoint.h | 12 ++++-
include/trace/ftrace.h | 110 +++++----------------------------------
kernel/trace/trace_event_perf.c | 15 ++++-
kernel/trace/trace_events.c | 26 +++++++---
kernel/trace/trace_kprobe.c | 34 +++++++++---
kernel/trace/trace_syscalls.c | 56 +++++++++++++++++++-
8 files changed, 151 insertions(+), 148 deletions(-)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 496eea8..dd0051e 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -113,8 +113,21 @@ void tracing_record_cmdline(struct task_struct *tsk);

struct event_filter;

+enum trace_reg {
+ TRACE_REG_REGISTER,
+ TRACE_REG_UNREGISTER,
+ TRACE_REG_PERF_REGISTER,
+ TRACE_REG_PERF_UNREGISTER,
+};
+
+struct ftrace_event_call;
+
struct ftrace_event_class {
char *system;
+ void *probe;
+ void *perf_probe;
+ int (*reg)(struct ftrace_event_call *event,
+ enum trace_reg type);
};

struct ftrace_event_call {
@@ -124,8 +137,6 @@ struct ftrace_event_call {
struct dentry *dir;
struct trace_event *event;
int enabled;
- int (*regfunc)(struct ftrace_event_call *);
- void (*unregfunc)(struct ftrace_event_call *);
int id;
const char *print_fmt;
int (*raw_init)(struct ftrace_event_call *);
@@ -137,8 +148,6 @@ struct ftrace_event_call {
void *data;

int perf_refcount;
- int (*perf_event_enable)(struct ftrace_event_call *);
- void (*perf_event_disable)(struct ftrace_event_call *);
};

#define PERF_MAX_TRACE_SIZE 2048
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index ac5791d..e3348c4 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -103,22 +103,6 @@ struct perf_event_attr;
#define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__)
#define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__)

-#ifdef CONFIG_PERF_EVENTS
-
-#define TRACE_SYS_ENTER_PERF_INIT(sname) \
- .perf_event_enable = perf_sysenter_enable, \
- .perf_event_disable = perf_sysenter_disable,
-
-#define TRACE_SYS_EXIT_PERF_INIT(sname) \
- .perf_event_enable = perf_sysexit_enable, \
- .perf_event_disable = perf_sysexit_disable,
-#else
-#define TRACE_SYS_ENTER_PERF(sname)
-#define TRACE_SYS_ENTER_PERF_INIT(sname)
-#define TRACE_SYS_EXIT_PERF(sname)
-#define TRACE_SYS_EXIT_PERF_INIT(sname)
-#endif /* CONFIG_PERF_EVENTS */
-
#ifdef CONFIG_FTRACE_SYSCALLS
#define __SC_STR_ADECL1(t, a) #a
#define __SC_STR_ADECL2(t, a, ...) #a, __SC_STR_ADECL1(__VA_ARGS__)
@@ -134,7 +118,8 @@ struct perf_event_attr;
#define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__)
#define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__)

-extern struct ftrace_event_class event_class_syscalls;
+extern struct ftrace_event_class event_class_syscall_enter;
+extern struct ftrace_event_class event_class_syscall_exit;

#define SYSCALL_TRACE_ENTER_EVENT(sname) \
static const struct syscall_metadata __syscall_meta_##sname; \
@@ -148,14 +133,11 @@ extern struct ftrace_event_class event_class_syscalls;
__attribute__((section("_ftrace_events"))) \
event_enter_##sname = { \
.name = "sys_enter"#sname, \
- .class = &event_class_syscalls, \
+ .class = &event_class_syscall_enter, \
.event = &enter_syscall_print_##sname, \
.raw_init = init_syscall_trace, \
.define_fields = syscall_enter_define_fields, \
- .regfunc = reg_event_syscall_enter, \
- .unregfunc = unreg_event_syscall_enter, \
.data = (void *)&__syscall_meta_##sname,\
- TRACE_SYS_ENTER_PERF_INIT(sname) \
}

#define SYSCALL_TRACE_EXIT_EVENT(sname) \
@@ -170,14 +152,11 @@ extern struct ftrace_event_class event_class_syscalls;
__attribute__((section("_ftrace_events"))) \
event_exit_##sname = { \
.name = "sys_exit"#sname, \
- .class = &event_class_syscalls, \
+ .class = &event_class_syscall_exit, \
.event = &exit_syscall_print_##sname, \
.raw_init = init_syscall_trace, \
.define_fields = syscall_exit_define_fields, \
- .regfunc = reg_event_syscall_exit, \
- .unregfunc = unreg_event_syscall_exit, \
.data = (void *)&__syscall_meta_##sname,\
- TRACE_SYS_EXIT_PERF_INIT(sname) \
}

#define SYSCALL_METADATA(sname, nb) \
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index c04988a..5876b77 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -173,13 +173,21 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin,
{ } \
static inline void trace_##name(proto) \
{ } \
+ static inline int register_trace_##name(void (*probe)(proto)) \
+ { \
+ return -ENOSYS; \
+ } \
+ static inline int unregister_trace_##name(void (*probe)(proto)) \
+ { \
+ return -ENOSYS; \
+ } \
static inline int \
- register_trace_##name(void (*probe)(proto), void *data) \
+ register_trace_##name##_data(void (*probe)(proto), void *data) \
{ \
return -ENOSYS; \
} \
static inline int \
- unregister_trace_##name(void (*probe)(proto), void *data) \
+ unregister_trace_##name##_data(void (*probe)(proto), void *data) \
{ \
return -ENOSYS; \
}
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 0921a8f..62fe622 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -381,53 +381,6 @@ static inline notrace int ftrace_get_offsets_##call( \

#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)

-#ifdef CONFIG_PERF_EVENTS
-
-/*
- * Generate the functions needed for tracepoint perf_event support.
- *
- * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
- *
- * static int ftrace_profile_enable_<call>(void)
- * {
- * return register_trace_<call>(ftrace_profile_<call>);
- * }
- *
- * static void ftrace_profile_disable_<call>(void)
- * {
- * unregister_trace_<call>(ftrace_profile_<call>);
- * }
- *
- */
-
-#undef DECLARE_EVENT_CLASS
-#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)
-
-#undef DEFINE_EVENT
-#define DEFINE_EVENT(template, name, proto, args) \
- \
-static void perf_trace_##name(proto); \
- \
-static notrace int \
-perf_trace_enable_##name(struct ftrace_event_call *unused) \
-{ \
- return register_trace_##name(perf_trace_##name); \
-} \
- \
-static notrace void \
-perf_trace_disable_##name(struct ftrace_event_call *unused) \
-{ \
- unregister_trace_##name(perf_trace_##name); \
-}
-
-#undef DEFINE_EVENT_PRINT
-#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
- DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
-
-#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
-
-#endif /* CONFIG_PERF_EVENTS */
-
/*
* Stage 4 of the trace events.
*
@@ -468,16 +421,6 @@ perf_trace_disable_##name(struct ftrace_event_call *unused) \
* event, irq_flags, pc);
* }
*
- * static int ftrace_raw_reg_event_<call>(struct ftrace_event_call *unused)
- * {
- * return register_trace_<call>(ftrace_raw_event_<call>);
- * }
- *
- * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
- * {
- * unregister_trace_<call>(ftrace_raw_event_<call>);
- * }
- *
* static struct trace_event ftrace_event_type_<call> = {
* .trace = ftrace_raw_output_<call>, <-- stage 2
* };
@@ -504,11 +447,15 @@ perf_trace_disable_##name(struct ftrace_event_call *unused) \

#ifdef CONFIG_PERF_EVENTS

+#define _TRACE_PERF_PROTO(call, proto) \
+ static notrace void \
+ perf_trace_##call(proto, struct ftrace_event_call *event);
+
#define _TRACE_PERF_INIT(call) \
- .perf_event_enable = perf_trace_enable_##call, \
- .perf_event_disable = perf_trace_disable_##call,
+ .perf_probe = perf_trace_##call,

#else
+#define _TRACE_PERF_PROTO(call, proto)
#define _TRACE_PERF_INIT(call)
#endif /* CONFIG_PERF_EVENTS */

@@ -542,8 +489,8 @@ perf_trace_disable_##name(struct ftrace_event_call *unused) \
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
\
static notrace void \
-ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \
- proto) \
+ftrace_raw_event_##call(proto, \
+ struct ftrace_event_call *event_call) \
{ \
struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
struct ring_buffer_event *event; \
@@ -578,23 +525,6 @@ ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \
#undef DEFINE_EVENT
#define DEFINE_EVENT(template, call, proto, args) \
\
-static notrace void ftrace_raw_event_##call(proto) \
-{ \
- ftrace_raw_event_id_##template(&event_##call, args); \
-} \
- \
-static notrace int \
-ftrace_raw_reg_event_##call(struct ftrace_event_call *unused) \
-{ \
- return register_trace_##call(ftrace_raw_event_##call); \
-} \
- \
-static notrace void \
-ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused) \
-{ \
- unregister_trace_##call(ftrace_raw_event_##call); \
-} \
- \
static struct trace_event ftrace_event_type_##call = { \
.trace = ftrace_raw_output_##call, \
};
@@ -618,9 +548,12 @@ static struct trace_event ftrace_event_type_##call = { \

#undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+_TRACE_PERF_PROTO(call, PARAMS(proto)); \
static const char print_fmt_##call[] = print; \
static struct ftrace_event_class __used event_class_##call = { \
- .system = __stringify(TRACE_SYSTEM) \
+ .system = __stringify(TRACE_SYSTEM), \
+ .probe = ftrace_raw_event_##call, \
+ _TRACE_PERF_INIT(call) \
}

#undef DEFINE_EVENT
@@ -633,11 +566,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
.class = &event_class_##template, \
.event = &ftrace_event_type_##call, \
.raw_init = trace_event_raw_init, \
- .regfunc = ftrace_raw_reg_event_##call, \
- .unregfunc = ftrace_raw_unreg_event_##call, \
.print_fmt = print_fmt_##template, \
.define_fields = ftrace_define_fields_##template, \
- _TRACE_PERF_INIT(call) \
}

#undef DEFINE_EVENT_PRINT
@@ -651,12 +581,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
.name = #call, \
.class = &event_class_##template, \
.event = &ftrace_event_type_##call, \
- .raw_init = trace_event_raw_init, \
- .regfunc = ftrace_raw_reg_event_##call, \
- .unregfunc = ftrace_raw_unreg_event_##call, \
.print_fmt = print_fmt_##call, \
- .define_fields = ftrace_define_fields_##template, \
- _TRACE_PERF_INIT(call) \
}

#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
@@ -756,8 +681,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
#undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
static notrace void \
-perf_trace_templ_##call(struct ftrace_event_call *event_call, \
- proto) \
+perf_trace_##call(proto, struct ftrace_event_call *event_call) \
{ \
struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
struct ftrace_raw_##call *entry; \
@@ -792,13 +716,7 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \
}

#undef DEFINE_EVENT
-#define DEFINE_EVENT(template, call, proto, args) \
-static notrace void perf_trace_##call(proto) \
-{ \
- struct ftrace_event_call *event_call = &event_##call; \
- \
- perf_trace_templ_##template(event_call, args); \
-}
+#define DEFINE_EVENT(template, call, proto, args)

#undef DEFINE_EVENT_PRINT
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 81f691e..95df5a7 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -44,7 +44,12 @@ static int perf_trace_event_enable(struct ftrace_event_call *event)
rcu_assign_pointer(perf_trace_buf_nmi, buf);
}

- ret = event->perf_event_enable(event);
+ if (event->class->reg)
+ ret = event->class->reg(event, TRACE_REG_PERF_REGISTER);
+ else
+ ret = tracepoint_probe_register(event->name,
+ event->class->perf_probe,
+ event);
if (!ret) {
total_ref_count++;
return 0;
@@ -70,7 +75,8 @@ int perf_trace_enable(int event_id)

mutex_lock(&event_mutex);
list_for_each_entry(event, &ftrace_events, list) {
- if (event->id == event_id && event->perf_event_enable &&
+ if (event->id == event_id &&
+ event->class && event->class->perf_probe &&
try_module_get(event->mod)) {
ret = perf_trace_event_enable(event);
break;
@@ -88,7 +94,10 @@ static void perf_trace_event_disable(struct ftrace_event_call *event)
if (--event->perf_refcount > 0)
return;

- event->perf_event_disable(event);
+ if (event->class->reg)
+ event->class->reg(event, TRACE_REG_PERF_UNREGISTER);
+ else
+ tracepoint_probe_unregister(event->name, event->class->perf_probe, event);

if (!--total_ref_count) {
buf = perf_trace_buf;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f6893cc..f84cfcb 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -126,13 +126,23 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
if (call->enabled) {
call->enabled = 0;
tracing_stop_cmdline_record();
- call->unregfunc(call);
+ if (call->class->reg)
+ call->class->reg(call, TRACE_REG_UNREGISTER);
+ else
+ tracepoint_probe_unregister(call->name,
+ call->class->probe,
+ call);
}
break;
case 1:
if (!call->enabled) {
tracing_start_cmdline_record();
- ret = call->regfunc(call);
+ if (call->class->reg)
+ ret = call->class->reg(call, TRACE_REG_REGISTER);
+ else
+ ret = tracepoint_probe_register(call->name,
+ call->class->probe,
+ call);
if (ret) {
tracing_stop_cmdline_record();
pr_info("event trace: Could not enable event "
@@ -170,7 +180,8 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {

- if (!call->name || !call->regfunc)
+ if (!call->name || !call->class ||
+ (!call->class->probe && !call->class->reg))
continue;

if (match &&
@@ -296,7 +307,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
* The ftrace subsystem is for showing formats only.
* They can not be enabled or disabled via the event files.
*/
- if (call->regfunc)
+ if (call->class && (call->class->probe || call->class->reg))
return call;
}

@@ -449,7 +460,8 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,

mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {
- if (!call->name || !call->regfunc)
+ if (!call->name || !call->class ||
+ (!call->class->probe && !call->class->reg))
continue;

if (system && strcmp(call->class->system, system) != 0)
@@ -934,11 +946,11 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
return -1;
}

- if (call->regfunc)
+ if (call->class->probe || call->class->reg)
trace_create_file("enable", 0644, call->dir, call,
enable);

- if (call->id && call->perf_event_enable)
+ if (call->id && (call->class->perf_probe || call->class->reg))
trace_create_file("id", 0444, call->dir, call,
id);

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index eda220b..f8af21a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -202,6 +202,7 @@ struct trace_probe {
unsigned long nhit;
unsigned int flags; /* For TP_FLAG_* */
const char *symbol; /* symbol name */
+ struct ftrace_event_class class;
struct ftrace_event_call call;
struct trace_event event;
unsigned int nr_args;
@@ -323,6 +324,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
goto error;
}

+ tp->call.class = &tp->class;
tp->call.name = kstrdup(event, GFP_KERNEL);
if (!tp->call.name)
goto error;
@@ -332,8 +334,8 @@ static struct trace_probe *alloc_trace_probe(const char *group,
goto error;
}

- tp->call.class->system = kstrdup(group, GFP_KERNEL);
- if (!tp->call.class->system)
+ tp->class.system = kstrdup(group, GFP_KERNEL);
+ if (!tp->class.system)
goto error;

INIT_LIST_HEAD(&tp->list);
@@ -1302,6 +1304,26 @@ static void probe_perf_disable(struct ftrace_event_call *call)
}
#endif /* CONFIG_PERF_EVENTS */

+static __kprobes
+int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
+{
+ switch (type) {
+ case TRACE_REG_REGISTER:
+ return probe_event_enable(event);
+ case TRACE_REG_UNREGISTER:
+ probe_event_disable(event);
+ return 0;
+
+#ifdef CONFIG_PERF_EVENTS
+ case TRACE_REG_PERF_REGISTER:
+ return probe_perf_enable(event);
+ case TRACE_REG_PERF_UNREGISTER:
+ probe_perf_disable(event);
+ return 0;
+#endif
+ }
+ return 0;
+}

static __kprobes
int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
@@ -1355,13 +1377,7 @@ static int register_probe_event(struct trace_probe *tp)
return -ENODEV;
}
call->enabled = 0;
- call->regfunc = probe_event_enable;
- call->unregfunc = probe_event_disable;
-
-#ifdef CONFIG_PERF_EVENTS
- call->perf_event_enable = probe_perf_enable;
- call->perf_event_disable = probe_perf_disable;
-#endif
+ call->class->reg = kprobe_register;
call->data = tp;
ret = trace_add_event_call(call);
if (ret) {
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 31fc95a..c92934d 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -14,8 +14,19 @@ static int sys_refcount_exit;
static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);

-struct ftrace_event_class event_class_syscalls = {
- .system = "syscalls"
+static int syscall_enter_register(struct ftrace_event_call *event,
+ enum trace_reg type);
+static int syscall_exit_register(struct ftrace_event_call *event,
+ enum trace_reg type);
+
+struct ftrace_event_class event_class_syscall_enter = {
+ .system = "syscalls",
+ .reg = syscall_enter_register
+};
+
+struct ftrace_event_class event_class_syscall_exit = {
+ .system = "syscalls",
+ .reg = syscall_exit_register
};

extern unsigned long __start_syscalls_metadata[];
@@ -586,3 +597,44 @@ void perf_sysexit_disable(struct ftrace_event_call *call)

#endif /* CONFIG_PERF_EVENTS */

+static int syscall_enter_register(struct ftrace_event_call *event,
+ enum trace_reg type)
+{
+ switch (type) {
+ case TRACE_REG_REGISTER:
+ return reg_event_syscall_enter(event);
+ case TRACE_REG_UNREGISTER:
+ unreg_event_syscall_enter(event);
+ return 0;
+
+#ifdef CONFIG_PERF_EVENTS
+ case TRACE_REG_PERF_REGISTER:
+ return perf_sysenter_enable(event);
+ case TRACE_REG_PERF_UNREGISTER:
+ perf_sysenter_disable(event);
+ return 0;
+#endif
+ }
+ return 0;
+}
+
+static int syscall_exit_register(struct ftrace_event_call *event,
+ enum trace_reg type)
+{
+ switch (type) {
+ case TRACE_REG_REGISTER:
+ return reg_event_syscall_exit(event);
+ case TRACE_REG_UNREGISTER:
+ unreg_event_syscall_exit(event);
+ return 0;
+
+#ifdef CONFIG_PERF_EVENTS
+ case TRACE_REG_PERF_REGISTER:
+ return perf_sysexit_enable(event);
+ case TRACE_REG_PERF_UNREGISTER:
+ perf_sysexit_disable(event);
+ return 0;
+#endif
+ }
+ return 0;
+}
--
1.7.0


2010-04-28 20:45:05

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 04/10][RFC] tracing: Remove per event trace registering

* Steven Rostedt ([email protected]) wrote:
> From: Steven Rostedt <[email protected]>
>
> This patch removes the register functions of TRACE_EVENT() to enable
> and disable tracepoints. The registering of a event is now down
> directly in the trace_events.c file. The tracepoint_probe_register()
> is now called directly.
>
> The prototypes are no longer type checked, but this should not be
> an issue since the tracepoints are created automatically by the
> macros. If a prototype is incorrect in the TRACE_EVENT() macro, then
> other macros will catch it.
>
> The trace_event_class structure now holds the probes to be called
> by the callbacks. This removes needing to have each event have
> a separate pointer for the probe.
>
> To handle kprobes and syscalls, since they register probes in a
> different manner, a "reg" field is added to the ftrace_event_class
> structure. If the "reg" field is assigned, then it will be called for
> enabling and disabling of the probe for either ftrace or perf. To let
> the reg function know what is happening, a new enum (trace_reg) is
> created that has the type of control that is needed.
>
> With this new rework, the 82 kernel events and 616 syscall events
> has their footprint dramatically lowered:
>
> text data bss dec hex filename
> 5788186 1337252 9351592 16477030 fb6b66 vmlinux.orig
> 5792282 1333796 9351592 16477670 fb6de6 vmlinux.class
> 5793448 1333780 9351592 16478820 fb7264 vmlinux.tracepoint
> 5796926 1337748 9351592 16486266 fb8f7a vmlinux.data
> 5774316 1306580 9351592 16432488 fabd68 vmlinux.regs
>
> The size went from 16477030 to 16432488, that's a total of 44K
> in savings. With tracepoints being continuously added, this is
> critical that the footprint becomes minimal.

Have you tried doing a BUILD_BUG_ON() on __typeof__() mismatch between
the type of the callback generated by TRACE_EVENT() and the expected
type ? This might help catching tricky preprocessor macro errors early.

Thanks,

Mathieu

>
> Signed-off-by: Steven Rostedt <[email protected]>
> ---
> include/linux/ftrace_event.h | 17 +++++--
> include/linux/syscalls.h | 29 ++---------
> include/linux/tracepoint.h | 12 ++++-
> include/trace/ftrace.h | 110 +++++----------------------------------
> kernel/trace/trace_event_perf.c | 15 ++++-
> kernel/trace/trace_events.c | 26 +++++++---
> kernel/trace/trace_kprobe.c | 34 +++++++++---
> kernel/trace/trace_syscalls.c | 56 +++++++++++++++++++-
> 8 files changed, 151 insertions(+), 148 deletions(-)
>
> diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
> index 496eea8..dd0051e 100644
> --- a/include/linux/ftrace_event.h
> +++ b/include/linux/ftrace_event.h
> @@ -113,8 +113,21 @@ void tracing_record_cmdline(struct task_struct *tsk);
>
> struct event_filter;
>
> +enum trace_reg {
> + TRACE_REG_REGISTER,
> + TRACE_REG_UNREGISTER,
> + TRACE_REG_PERF_REGISTER,
> + TRACE_REG_PERF_UNREGISTER,
> +};
> +
> +struct ftrace_event_call;
> +
> struct ftrace_event_class {
> char *system;
> + void *probe;
> + void *perf_probe;
> + int (*reg)(struct ftrace_event_call *event,
> + enum trace_reg type);
> };
>
> struct ftrace_event_call {
> @@ -124,8 +137,6 @@ struct ftrace_event_call {
> struct dentry *dir;
> struct trace_event *event;
> int enabled;
> - int (*regfunc)(struct ftrace_event_call *);
> - void (*unregfunc)(struct ftrace_event_call *);
> int id;
> const char *print_fmt;
> int (*raw_init)(struct ftrace_event_call *);
> @@ -137,8 +148,6 @@ struct ftrace_event_call {
> void *data;
>
> int perf_refcount;
> - int (*perf_event_enable)(struct ftrace_event_call *);
> - void (*perf_event_disable)(struct ftrace_event_call *);
> };
>
> #define PERF_MAX_TRACE_SIZE 2048
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index ac5791d..e3348c4 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -103,22 +103,6 @@ struct perf_event_attr;
> #define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__)
> #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
>
> -#ifdef CONFIG_PERF_EVENTS
> -
> -#define TRACE_SYS_ENTER_PERF_INIT(sname) \
> - .perf_event_enable = perf_sysenter_enable, \
> - .perf_event_disable = perf_sysenter_disable,
> -
> -#define TRACE_SYS_EXIT_PERF_INIT(sname) \
> - .perf_event_enable = perf_sysexit_enable, \
> - .perf_event_disable = perf_sysexit_disable,
> -#else
> -#define TRACE_SYS_ENTER_PERF(sname)
> -#define TRACE_SYS_ENTER_PERF_INIT(sname)
> -#define TRACE_SYS_EXIT_PERF(sname)
> -#define TRACE_SYS_EXIT_PERF_INIT(sname)
> -#endif /* CONFIG_PERF_EVENTS */
> -
> #ifdef CONFIG_FTRACE_SYSCALLS
> #define __SC_STR_ADECL1(t, a) #a
> #define __SC_STR_ADECL2(t, a, ...) #a, __SC_STR_ADECL1(__VA_ARGS__)
> @@ -134,7 +118,8 @@ struct perf_event_attr;
> #define __SC_STR_TDECL5(t, a, ...) #t, __SC_STR_TDECL4(__VA_ARGS__)
> #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__)
>
> -extern struct ftrace_event_class event_class_syscalls;
> +extern struct ftrace_event_class event_class_syscall_enter;
> +extern struct ftrace_event_class event_class_syscall_exit;
>
> #define SYSCALL_TRACE_ENTER_EVENT(sname) \
> static const struct syscall_metadata __syscall_meta_##sname; \
> @@ -148,14 +133,11 @@ extern struct ftrace_event_class event_class_syscalls;
> __attribute__((section("_ftrace_events"))) \
> event_enter_##sname = { \
> .name = "sys_enter"#sname, \
> - .class = &event_class_syscalls, \
> + .class = &event_class_syscall_enter, \
> .event = &enter_syscall_print_##sname, \
> .raw_init = init_syscall_trace, \
> .define_fields = syscall_enter_define_fields, \
> - .regfunc = reg_event_syscall_enter, \
> - .unregfunc = unreg_event_syscall_enter, \
> .data = (void *)&__syscall_meta_##sname,\
> - TRACE_SYS_ENTER_PERF_INIT(sname) \
> }
>
> #define SYSCALL_TRACE_EXIT_EVENT(sname) \
> @@ -170,14 +152,11 @@ extern struct ftrace_event_class event_class_syscalls;
> __attribute__((section("_ftrace_events"))) \
> event_exit_##sname = { \
> .name = "sys_exit"#sname, \
> - .class = &event_class_syscalls, \
> + .class = &event_class_syscall_exit, \
> .event = &exit_syscall_print_##sname, \
> .raw_init = init_syscall_trace, \
> .define_fields = syscall_exit_define_fields, \
> - .regfunc = reg_event_syscall_exit, \
> - .unregfunc = unreg_event_syscall_exit, \
> .data = (void *)&__syscall_meta_##sname,\
> - TRACE_SYS_EXIT_PERF_INIT(sname) \
> }
>
> #define SYSCALL_METADATA(sname, nb) \
> diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
> index c04988a..5876b77 100644
> --- a/include/linux/tracepoint.h
> +++ b/include/linux/tracepoint.h
> @@ -173,13 +173,21 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin,
> { } \
> static inline void trace_##name(proto) \
> { } \
> + static inline int register_trace_##name(void (*probe)(proto)) \
> + { \
> + return -ENOSYS; \
> + } \
> + static inline int unregister_trace_##name(void (*probe)(proto)) \
> + { \
> + return -ENOSYS; \
> + } \
> static inline int \
> - register_trace_##name(void (*probe)(proto), void *data) \
> + register_trace_##name##_data(void (*probe)(proto), void *data) \
> { \
> return -ENOSYS; \
> } \
> static inline int \
> - unregister_trace_##name(void (*probe)(proto), void *data) \
> + unregister_trace_##name##_data(void (*probe)(proto), void *data) \
> { \
> return -ENOSYS; \
> }
> diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
> index 0921a8f..62fe622 100644
> --- a/include/trace/ftrace.h
> +++ b/include/trace/ftrace.h
> @@ -381,53 +381,6 @@ static inline notrace int ftrace_get_offsets_##call( \
>
> #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
>
> -#ifdef CONFIG_PERF_EVENTS
> -
> -/*
> - * Generate the functions needed for tracepoint perf_event support.
> - *
> - * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
> - *
> - * static int ftrace_profile_enable_<call>(void)
> - * {
> - * return register_trace_<call>(ftrace_profile_<call>);
> - * }
> - *
> - * static void ftrace_profile_disable_<call>(void)
> - * {
> - * unregister_trace_<call>(ftrace_profile_<call>);
> - * }
> - *
> - */
> -
> -#undef DECLARE_EVENT_CLASS
> -#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)
> -
> -#undef DEFINE_EVENT
> -#define DEFINE_EVENT(template, name, proto, args) \
> - \
> -static void perf_trace_##name(proto); \
> - \
> -static notrace int \
> -perf_trace_enable_##name(struct ftrace_event_call *unused) \
> -{ \
> - return register_trace_##name(perf_trace_##name); \
> -} \
> - \
> -static notrace void \
> -perf_trace_disable_##name(struct ftrace_event_call *unused) \
> -{ \
> - unregister_trace_##name(perf_trace_##name); \
> -}
> -
> -#undef DEFINE_EVENT_PRINT
> -#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
> - DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
> -
> -#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
> -
> -#endif /* CONFIG_PERF_EVENTS */
> -
> /*
> * Stage 4 of the trace events.
> *
> @@ -468,16 +421,6 @@ perf_trace_disable_##name(struct ftrace_event_call *unused) \
> * event, irq_flags, pc);
> * }
> *
> - * static int ftrace_raw_reg_event_<call>(struct ftrace_event_call *unused)
> - * {
> - * return register_trace_<call>(ftrace_raw_event_<call>);
> - * }
> - *
> - * static void ftrace_unreg_event_<call>(struct ftrace_event_call *unused)
> - * {
> - * unregister_trace_<call>(ftrace_raw_event_<call>);
> - * }
> - *
> * static struct trace_event ftrace_event_type_<call> = {
> * .trace = ftrace_raw_output_<call>, <-- stage 2
> * };
> @@ -504,11 +447,15 @@ perf_trace_disable_##name(struct ftrace_event_call *unused) \
>
> #ifdef CONFIG_PERF_EVENTS
>
> +#define _TRACE_PERF_PROTO(call, proto) \
> + static notrace void \
> + perf_trace_##call(proto, struct ftrace_event_call *event);
> +
> #define _TRACE_PERF_INIT(call) \
> - .perf_event_enable = perf_trace_enable_##call, \
> - .perf_event_disable = perf_trace_disable_##call,
> + .perf_probe = perf_trace_##call,
>
> #else
> +#define _TRACE_PERF_PROTO(call, proto)
> #define _TRACE_PERF_INIT(call)
> #endif /* CONFIG_PERF_EVENTS */
>
> @@ -542,8 +489,8 @@ perf_trace_disable_##name(struct ftrace_event_call *unused) \
> #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
> \
> static notrace void \
> -ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \
> - proto) \
> +ftrace_raw_event_##call(proto, \
> + struct ftrace_event_call *event_call) \
> { \
> struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
> struct ring_buffer_event *event; \
> @@ -578,23 +525,6 @@ ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \
> #undef DEFINE_EVENT
> #define DEFINE_EVENT(template, call, proto, args) \
> \
> -static notrace void ftrace_raw_event_##call(proto) \
> -{ \
> - ftrace_raw_event_id_##template(&event_##call, args); \
> -} \
> - \
> -static notrace int \
> -ftrace_raw_reg_event_##call(struct ftrace_event_call *unused) \
> -{ \
> - return register_trace_##call(ftrace_raw_event_##call); \
> -} \
> - \
> -static notrace void \
> -ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused) \
> -{ \
> - unregister_trace_##call(ftrace_raw_event_##call); \
> -} \
> - \
> static struct trace_event ftrace_event_type_##call = { \
> .trace = ftrace_raw_output_##call, \
> };
> @@ -618,9 +548,12 @@ static struct trace_event ftrace_event_type_##call = { \
>
> #undef DECLARE_EVENT_CLASS
> #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
> +_TRACE_PERF_PROTO(call, PARAMS(proto)); \
> static const char print_fmt_##call[] = print; \
> static struct ftrace_event_class __used event_class_##call = { \
> - .system = __stringify(TRACE_SYSTEM) \
> + .system = __stringify(TRACE_SYSTEM), \
> + .probe = ftrace_raw_event_##call, \
> + _TRACE_PERF_INIT(call) \
> }
>
> #undef DEFINE_EVENT
> @@ -633,11 +566,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
> .class = &event_class_##template, \
> .event = &ftrace_event_type_##call, \
> .raw_init = trace_event_raw_init, \
> - .regfunc = ftrace_raw_reg_event_##call, \
> - .unregfunc = ftrace_raw_unreg_event_##call, \
> .print_fmt = print_fmt_##template, \
> .define_fields = ftrace_define_fields_##template, \
> - _TRACE_PERF_INIT(call) \
> }
>
> #undef DEFINE_EVENT_PRINT
> @@ -651,12 +581,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
> .name = #call, \
> .class = &event_class_##template, \
> .event = &ftrace_event_type_##call, \
> - .raw_init = trace_event_raw_init, \
> - .regfunc = ftrace_raw_reg_event_##call, \
> - .unregfunc = ftrace_raw_unreg_event_##call, \
> .print_fmt = print_fmt_##call, \
> - .define_fields = ftrace_define_fields_##template, \
> - _TRACE_PERF_INIT(call) \
> }
>
> #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
> @@ -756,8 +681,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
> #undef DECLARE_EVENT_CLASS
> #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
> static notrace void \
> -perf_trace_templ_##call(struct ftrace_event_call *event_call, \
> - proto) \
> +perf_trace_##call(proto, struct ftrace_event_call *event_call) \
> { \
> struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
> struct ftrace_raw_##call *entry; \
> @@ -792,13 +716,7 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \
> }
>
> #undef DEFINE_EVENT
> -#define DEFINE_EVENT(template, call, proto, args) \
> -static notrace void perf_trace_##call(proto) \
> -{ \
> - struct ftrace_event_call *event_call = &event_##call; \
> - \
> - perf_trace_templ_##template(event_call, args); \
> -}
> +#define DEFINE_EVENT(template, call, proto, args)
>
> #undef DEFINE_EVENT_PRINT
> #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
> diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
> index 81f691e..95df5a7 100644
> --- a/kernel/trace/trace_event_perf.c
> +++ b/kernel/trace/trace_event_perf.c
> @@ -44,7 +44,12 @@ static int perf_trace_event_enable(struct ftrace_event_call *event)
> rcu_assign_pointer(perf_trace_buf_nmi, buf);
> }
>
> - ret = event->perf_event_enable(event);
> + if (event->class->reg)
> + ret = event->class->reg(event, TRACE_REG_PERF_REGISTER);
> + else
> + ret = tracepoint_probe_register(event->name,
> + event->class->perf_probe,
> + event);
> if (!ret) {
> total_ref_count++;
> return 0;
> @@ -70,7 +75,8 @@ int perf_trace_enable(int event_id)
>
> mutex_lock(&event_mutex);
> list_for_each_entry(event, &ftrace_events, list) {
> - if (event->id == event_id && event->perf_event_enable &&
> + if (event->id == event_id &&
> + event->class && event->class->perf_probe &&
> try_module_get(event->mod)) {
> ret = perf_trace_event_enable(event);
> break;
> @@ -88,7 +94,10 @@ static void perf_trace_event_disable(struct ftrace_event_call *event)
> if (--event->perf_refcount > 0)
> return;
>
> - event->perf_event_disable(event);
> + if (event->class->reg)
> + event->class->reg(event, TRACE_REG_PERF_UNREGISTER);
> + else
> + tracepoint_probe_unregister(event->name, event->class->perf_probe, event);
>
> if (!--total_ref_count) {
> buf = perf_trace_buf;
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index f6893cc..f84cfcb 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -126,13 +126,23 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
> if (call->enabled) {
> call->enabled = 0;
> tracing_stop_cmdline_record();
> - call->unregfunc(call);
> + if (call->class->reg)
> + call->class->reg(call, TRACE_REG_UNREGISTER);
> + else
> + tracepoint_probe_unregister(call->name,
> + call->class->probe,
> + call);
> }
> break;
> case 1:
> if (!call->enabled) {
> tracing_start_cmdline_record();
> - ret = call->regfunc(call);
> + if (call->class->reg)
> + ret = call->class->reg(call, TRACE_REG_REGISTER);
> + else
> + ret = tracepoint_probe_register(call->name,
> + call->class->probe,
> + call);
> if (ret) {
> tracing_stop_cmdline_record();
> pr_info("event trace: Could not enable event "
> @@ -170,7 +180,8 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
> mutex_lock(&event_mutex);
> list_for_each_entry(call, &ftrace_events, list) {
>
> - if (!call->name || !call->regfunc)
> + if (!call->name || !call->class ||
> + (!call->class->probe && !call->class->reg))
> continue;
>
> if (match &&
> @@ -296,7 +307,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
> * The ftrace subsystem is for showing formats only.
> * They can not be enabled or disabled via the event files.
> */
> - if (call->regfunc)
> + if (call->class && (call->class->probe || call->class->reg))
> return call;
> }
>
> @@ -449,7 +460,8 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
>
> mutex_lock(&event_mutex);
> list_for_each_entry(call, &ftrace_events, list) {
> - if (!call->name || !call->regfunc)
> + if (!call->name || !call->class ||
> + (!call->class->probe && !call->class->reg))
> continue;
>
> if (system && strcmp(call->class->system, system) != 0)
> @@ -934,11 +946,11 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
> return -1;
> }
>
> - if (call->regfunc)
> + if (call->class->probe || call->class->reg)
> trace_create_file("enable", 0644, call->dir, call,
> enable);
>
> - if (call->id && call->perf_event_enable)
> + if (call->id && (call->class->perf_probe || call->class->reg))
> trace_create_file("id", 0444, call->dir, call,
> id);
>
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index eda220b..f8af21a 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -202,6 +202,7 @@ struct trace_probe {
> unsigned long nhit;
> unsigned int flags; /* For TP_FLAG_* */
> const char *symbol; /* symbol name */
> + struct ftrace_event_class class;
> struct ftrace_event_call call;
> struct trace_event event;
> unsigned int nr_args;
> @@ -323,6 +324,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
> goto error;
> }
>
> + tp->call.class = &tp->class;
> tp->call.name = kstrdup(event, GFP_KERNEL);
> if (!tp->call.name)
> goto error;
> @@ -332,8 +334,8 @@ static struct trace_probe *alloc_trace_probe(const char *group,
> goto error;
> }
>
> - tp->call.class->system = kstrdup(group, GFP_KERNEL);
> - if (!tp->call.class->system)
> + tp->class.system = kstrdup(group, GFP_KERNEL);
> + if (!tp->class.system)
> goto error;
>
> INIT_LIST_HEAD(&tp->list);
> @@ -1302,6 +1304,26 @@ static void probe_perf_disable(struct ftrace_event_call *call)
> }
> #endif /* CONFIG_PERF_EVENTS */
>
> +static __kprobes
> +int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
> +{
> + switch (type) {
> + case TRACE_REG_REGISTER:
> + return probe_event_enable(event);
> + case TRACE_REG_UNREGISTER:
> + probe_event_disable(event);
> + return 0;
> +
> +#ifdef CONFIG_PERF_EVENTS
> + case TRACE_REG_PERF_REGISTER:
> + return probe_perf_enable(event);
> + case TRACE_REG_PERF_UNREGISTER:
> + probe_perf_disable(event);
> + return 0;
> +#endif
> + }
> + return 0;
> +}
>
> static __kprobes
> int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
> @@ -1355,13 +1377,7 @@ static int register_probe_event(struct trace_probe *tp)
> return -ENODEV;
> }
> call->enabled = 0;
> - call->regfunc = probe_event_enable;
> - call->unregfunc = probe_event_disable;
> -
> -#ifdef CONFIG_PERF_EVENTS
> - call->perf_event_enable = probe_perf_enable;
> - call->perf_event_disable = probe_perf_disable;
> -#endif
> + call->class->reg = kprobe_register;
> call->data = tp;
> ret = trace_add_event_call(call);
> if (ret) {
> diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
> index 31fc95a..c92934d 100644
> --- a/kernel/trace/trace_syscalls.c
> +++ b/kernel/trace/trace_syscalls.c
> @@ -14,8 +14,19 @@ static int sys_refcount_exit;
> static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
> static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
>
> -struct ftrace_event_class event_class_syscalls = {
> - .system = "syscalls"
> +static int syscall_enter_register(struct ftrace_event_call *event,
> + enum trace_reg type);
> +static int syscall_exit_register(struct ftrace_event_call *event,
> + enum trace_reg type);
> +
> +struct ftrace_event_class event_class_syscall_enter = {
> + .system = "syscalls",
> + .reg = syscall_enter_register
> +};
> +
> +struct ftrace_event_class event_class_syscall_exit = {
> + .system = "syscalls",
> + .reg = syscall_exit_register
> };
>
> extern unsigned long __start_syscalls_metadata[];
> @@ -586,3 +597,44 @@ void perf_sysexit_disable(struct ftrace_event_call *call)
>
> #endif /* CONFIG_PERF_EVENTS */
>
> +static int syscall_enter_register(struct ftrace_event_call *event,
> + enum trace_reg type)
> +{
> + switch (type) {
> + case TRACE_REG_REGISTER:
> + return reg_event_syscall_enter(event);
> + case TRACE_REG_UNREGISTER:
> + unreg_event_syscall_enter(event);
> + return 0;
> +
> +#ifdef CONFIG_PERF_EVENTS
> + case TRACE_REG_PERF_REGISTER:
> + return perf_sysenter_enable(event);
> + case TRACE_REG_PERF_UNREGISTER:
> + perf_sysenter_disable(event);
> + return 0;
> +#endif
> + }
> + return 0;
> +}
> +
> +static int syscall_exit_register(struct ftrace_event_call *event,
> + enum trace_reg type)
> +{
> + switch (type) {
> + case TRACE_REG_REGISTER:
> + return reg_event_syscall_exit(event);
> + case TRACE_REG_UNREGISTER:
> + unreg_event_syscall_exit(event);
> + return 0;
> +
> +#ifdef CONFIG_PERF_EVENTS
> + case TRACE_REG_PERF_REGISTER:
> + return perf_sysexit_enable(event);
> + case TRACE_REG_PERF_UNREGISTER:
> + perf_sysexit_disable(event);
> + return 0;
> +#endif
> + }
> + return 0;
> +}
> --
> 1.7.0
>
>

--
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com

2010-04-29 00:00:22

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 04/10][RFC] tracing: Remove per event trace registering

On Wed, 2010-04-28 at 16:44 -0400, Mathieu Desnoyers wrote:
> * Steven Rostedt ([email protected]) wrote:
> > From: Steven Rostedt <[email protected]>
> >
> > This patch removes the register functions of TRACE_EVENT() to enable
> > and disable tracepoints. The registering of a event is now down
> > directly in the trace_events.c file. The tracepoint_probe_register()
> > is now called directly.
> >
> > The prototypes are no longer type checked, but this should not be
> > an issue since the tracepoints are created automatically by the
> > macros. If a prototype is incorrect in the TRACE_EVENT() macro, then
> > other macros will catch it.
> >
> > The trace_event_class structure now holds the probes to be called
> > by the callbacks. This removes needing to have each event have
> > a separate pointer for the probe.
> >
> > To handle kprobes and syscalls, since they register probes in a
> > different manner, a "reg" field is added to the ftrace_event_class
> > structure. If the "reg" field is assigned, then it will be called for
> > enabling and disabling of the probe for either ftrace or perf. To let
> > the reg function know what is happening, a new enum (trace_reg) is
> > created that has the type of control that is needed.
> >
> > With this new rework, the 82 kernel events and 616 syscall events
> > has their footprint dramatically lowered:
> >
> > text data bss dec hex filename
> > 5788186 1337252 9351592 16477030 fb6b66 vmlinux.orig
> > 5792282 1333796 9351592 16477670 fb6de6 vmlinux.class
> > 5793448 1333780 9351592 16478820 fb7264 vmlinux.tracepoint
> > 5796926 1337748 9351592 16486266 fb8f7a vmlinux.data
> > 5774316 1306580 9351592 16432488 fabd68 vmlinux.regs
> >
> > The size went from 16477030 to 16432488, that's a total of 44K
> > in savings. With tracepoints being continuously added, this is
> > critical that the footprint becomes minimal.
>
> Have you tried doing a BUILD_BUG_ON() on __typeof__() mismatch between
> the type of the callback generated by TRACE_EVENT() and the expected
> type ? This might help catching tricky preprocessor macro errors early.

Well, we could, but if it is broken once, it is broken everywhere.

-- Steve

2010-04-29 00:05:32

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 04/10][RFC] tracing: Remove per event trace registering

* Steven Rostedt ([email protected]) wrote:
> On Wed, 2010-04-28 at 16:44 -0400, Mathieu Desnoyers wrote:
> > * Steven Rostedt ([email protected]) wrote:
> > > From: Steven Rostedt <[email protected]>
> > >
> > > This patch removes the register functions of TRACE_EVENT() to enable
> > > and disable tracepoints. The registering of a event is now down
> > > directly in the trace_events.c file. The tracepoint_probe_register()
> > > is now called directly.
> > >
> > > The prototypes are no longer type checked, but this should not be
> > > an issue since the tracepoints are created automatically by the
> > > macros. If a prototype is incorrect in the TRACE_EVENT() macro, then
> > > other macros will catch it.
> > >
> > > The trace_event_class structure now holds the probes to be called
> > > by the callbacks. This removes needing to have each event have
> > > a separate pointer for the probe.
> > >
> > > To handle kprobes and syscalls, since they register probes in a
> > > different manner, a "reg" field is added to the ftrace_event_class
> > > structure. If the "reg" field is assigned, then it will be called for
> > > enabling and disabling of the probe for either ftrace or perf. To let
> > > the reg function know what is happening, a new enum (trace_reg) is
> > > created that has the type of control that is needed.
> > >
> > > With this new rework, the 82 kernel events and 616 syscall events
> > > has their footprint dramatically lowered:
> > >
> > > text data bss dec hex filename
> > > 5788186 1337252 9351592 16477030 fb6b66 vmlinux.orig
> > > 5792282 1333796 9351592 16477670 fb6de6 vmlinux.class
> > > 5793448 1333780 9351592 16478820 fb7264 vmlinux.tracepoint
> > > 5796926 1337748 9351592 16486266 fb8f7a vmlinux.data
> > > 5774316 1306580 9351592 16432488 fabd68 vmlinux.regs
> > >
> > > The size went from 16477030 to 16432488, that's a total of 44K
> > > in savings. With tracepoints being continuously added, this is
> > > critical that the footprint becomes minimal.
> >
> > Have you tried doing a BUILD_BUG_ON() on __typeof__() mismatch between
> > the type of the callback generated by TRACE_EVENT() and the expected
> > type ? This might help catching tricky preprocessor macro errors early.
>
> Well, we could, but if it is broken once, it is broken everywhere.

I fear about "subtly" broken things, where trace data could end up being
incorrectly typed and/or corrupted. I think this BUILD_BUG_ON() will
become very useful.

Thanks,

Mathieu

>
> -- Steve
>
>

--
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com

2010-04-29 00:20:26

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 04/10][RFC] tracing: Remove per event trace registering

On Wed, 2010-04-28 at 20:05 -0400, Mathieu Desnoyers wrote:
> * Steven Rostedt ([email protected]) wrote:

> > > Have you tried doing a BUILD_BUG_ON() on __typeof__() mismatch between
> > > the type of the callback generated by TRACE_EVENT() and the expected
> > > type ? This might help catching tricky preprocessor macro errors early.
> >
> > Well, we could, but if it is broken once, it is broken everywhere.
>
> I fear about "subtly" broken things, where trace data could end up being
> incorrectly typed and/or corrupted. I think this BUILD_BUG_ON() will
> become very useful.

Actually, I'm not sure what you want to check. What is not checked is
the prototype that is created, to the prototype that is passed to the
tracepoint_probe_register. Other parts are still checked. If you
mis-match the args with the parameters, there are still places that the
compiler will flag it. There really is not much less protection here
than there was before.

Instead of calling register_trace_##name that is created for each
tracepoint, we now call the tracepoint_probe_register() directly in the
C file with the generated probe.

Both the probe and the tracepoint are created from the same data. I'm
not seeing where you want to add this check.

-- Steve

2010-04-30 16:51:19

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 04/10][RFC] tracing: Remove per event trace registering

* Steven Rostedt ([email protected]) wrote:
> On Thu, 2010-04-29 at 09:36 -0400, Mathieu Desnoyers wrote:
>
> > > Instead of calling register_trace_##name that is created for each
> > > tracepoint, we now call the tracepoint_probe_register() directly in the
> > > C file with the generated probe.
> > >
> > > Both the probe and the tracepoint are created from the same data. I'm
> > > not seeing where you want to add this check.
> >
> > So if they are created from the same data, we can expect this test to
> > always pass, which is good (and expected).
> >
> > I'd add this extra check before casting the callback to (void *) before
> > it is passed to tracepoint_probe_register(). Let's just call this
> > internal preprocessing macro integrity check. As long as it does not add
> > a runtime cost, there is no reason not to put this extra check.
>
> The problem is, the cast is now performed in a C file for all events.
> There's no way to know what to cast it to there. This is out of the
> automation of the macro.
>
> We use to have the cast check by creating code that would create the
> "register_trace_##call", and the typecheck was doing by passing the data
> to this function. But we removed this code out of the per event, it was
> adding lots of text footprint, and moved it to one single function that
> handles all events. It is just expected that the callback created
> matches the function it was done.
>
> If you are overly paranoid, we could create a special function that
> tests that the callback format that is created matches the tracepoint
> that is created, and make it so GCC sees that nothing calls it and
> removes it at final link. But I still see this as a waste.
>
>
> The tracepoint is created in include/linux/tracepoint.h:
>
> #define TRACE_EVENT(name, proto, args, struct, assign, print) \
> DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))

Can we add something like this to DECLARE_TRACE ? (not convinced it is
valid though)

static inline void check_trace_##name(cb_type)
{
BUILD_BUG_ON(!__same_type(void (*probe)(TP_PROTO(proto), void *data),
cb_type));
}

>
> The callback is created in include/trace/ftrace.h:
>
> #undef TRACE_EVENT
> #define TRACE_EVENT(name, proto, args, tstuct, assign, print) \
> DECLARE_EVENT_CLASS(name, \
> PARAMS(proto), \
> PARAMS(args), \
> PARAMS(tstruct), \
> PARAMS(assign), \
> PARAMS(print)); \
> DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
>
> #undef DECLARE_EVENT_CLASS
> #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
> \
> static notrace void \
> ftrace_raw_event_##call(proto, \
> struct ftrace_event_call *event_call) \
> [...]
>

Either within this callback, or in a dummy static function after, we
could add:

check_trace_##call(ftrace_raw_event_##call);

So.. you are the preprocessor expert, do you think this could fly ? ;)

Thanks,

Mathieu

>
> Thus the "proto" field of the TRACE_EVENT() is used to make both the
> tracepoint and the callback. We add the struct ftrace_event_call
> *event_call which is the data we pass to the callback.
>
> Now, where this gets called is in kernel/trace/trace_events.c:
>
> tracepoint_probe_register(call->name,
> call->class->probe,
> call);
>
> This is where we lose the typecheck. So my question is... where do you
> want to put in a check?
>
> -- Steve
>
>
>
>

--
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com

2010-04-30 17:09:49

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 04/10][RFC] tracing: Remove per event trace registering

* Steven Rostedt ([email protected]) wrote:
> On Thu, 2010-04-29 at 10:55 -0400, Mathieu Desnoyers wrote:
> > * Steven Rostedt ([email protected]) wrote:
>
> > >
> > > The tracepoint is created in include/linux/tracepoint.h:
> > >
> > > #define TRACE_EVENT(name, proto, args, struct, assign, print) \
> > > DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
> >
> > Can we add something like this to DECLARE_TRACE ? (not convinced it is
> > valid though)
> >
> > static inline void check_trace_##name(cb_type)
> > {
> > BUILD_BUG_ON(!__same_type(void (*probe)(TP_PROTO(proto), void *data),
> > cb_type));
> > }
> >
>
> We could add it, but I'm not sure it would add any more protection. If
> for some strange reason the prototype got out of sync, would would
> prevent the cb_type from getting out of sync with it too, and not cause
> this to fail, but still have the same bug.
>
> Honestly, I find this a bit too paranoid. Again, the callback and the
> tracepoint are made with the same data. I find it hard to think that it
> would break somehow. Yes, perhaps it will break if you modify ftrace.h,
> but then if you are doing that, you should know better than to break
> things :-)

How can you be sure that the "void *data" type will match the type at
the same position in the generated callback ?

Honestly, I don't think kernel programmers write bug-free code. And I
include myself when I say that. So the best we can do, on top of code
review, is to use all the verification and debugging tools available to
minimize the amount of undetected bugs. Rather than try to find out the
cause of subtly broken tracepoint callbacks with their runtime
side-effects, I strongly prefer to let the compiler find this out as
early as possible.

I also don't trust that these complex TRACE_EVENT() preprocessor macros
will never ever have bugs. That's just doomed to happen one day or
another. Again, call me paranoid if you like, but I think adding this
type checking is justified.

I am providing the type check implementation in a separate email. It
will need to be extended to support the extra data parameter you plan to
add.

Thanks,

Mathieu

>
>
> > >
> > > The callback is created in include/trace/ftrace.h:
> > >
> > > #undef TRACE_EVENT
> > > #define TRACE_EVENT(name, proto, args, tstuct, assign, print) \
> > > DECLARE_EVENT_CLASS(name, \
> > > PARAMS(proto), \
> > > PARAMS(args), \
> > > PARAMS(tstruct), \
> > > PARAMS(assign), \
> > > PARAMS(print)); \
> > > DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
> > >
> > > #undef DECLARE_EVENT_CLASS
> > > #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
> > > \
> > > static notrace void \
> > > ftrace_raw_event_##call(proto, \
> > > struct ftrace_event_call *event_call) \
> > > [...]
> > >
> >
> > Either within this callback, or in a dummy static function after, we
> > could add:
> >
> > check_trace_##call(ftrace_raw_event_##call);
> >
> > So.. you are the preprocessor expert, do you think this could fly ? ;)
>
>
>
> Sure, the static function you did could be added, and hope that gcc is
> smart enough to get rid of it (add __unused to it). But what are we
> really checking here? If CPP works?
>
> -- Steve
>
>

--
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com

2010-04-30 17:20:52

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 04/10][RFC] tracing: Remove per event trace registering

On Thu, 2010-04-29 at 10:55 -0400, Mathieu Desnoyers wrote:
> * Steven Rostedt ([email protected]) wrote:

> >
> > The tracepoint is created in include/linux/tracepoint.h:
> >
> > #define TRACE_EVENT(name, proto, args, struct, assign, print) \
> > DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
>
> Can we add something like this to DECLARE_TRACE ? (not convinced it is
> valid though)
>
> static inline void check_trace_##name(cb_type)
> {
> BUILD_BUG_ON(!__same_type(void (*probe)(TP_PROTO(proto), void *data),
> cb_type));
> }
>

We could add it, but I'm not sure it would add any more protection. If
for some strange reason the prototype got out of sync, would would
prevent the cb_type from getting out of sync with it too, and not cause
this to fail, but still have the same bug.

Honestly, I find this a bit too paranoid. Again, the callback and the
tracepoint are made with the same data. I find it hard to think that it
would break somehow. Yes, perhaps it will break if you modify ftrace.h,
but then if you are doing that, you should know better than to break
things :-)


> >
> > The callback is created in include/trace/ftrace.h:
> >
> > #undef TRACE_EVENT
> > #define TRACE_EVENT(name, proto, args, tstuct, assign, print) \
> > DECLARE_EVENT_CLASS(name, \
> > PARAMS(proto), \
> > PARAMS(args), \
> > PARAMS(tstruct), \
> > PARAMS(assign), \
> > PARAMS(print)); \
> > DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
> >
> > #undef DECLARE_EVENT_CLASS
> > #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
> > \
> > static notrace void \
> > ftrace_raw_event_##call(proto, \
> > struct ftrace_event_call *event_call) \
> > [...]
> >
>
> Either within this callback, or in a dummy static function after, we
> could add:
>
> check_trace_##call(ftrace_raw_event_##call);
>
> So.. you are the preprocessor expert, do you think this could fly ? ;)



Sure, the static function you did could be added, and hope that gcc is
smart enough to get rid of it (add __unused to it). But what are we
really checking here? If CPP works?

-- Steve

2010-04-30 18:17:24

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 04/10][RFC] tracing: Remove per event trace registering

On Fri, 2010-04-30 at 13:09 -0400, Mathieu Desnoyers wrote:

> How can you be sure that the "void *data" type will match the type at
> the same position in the generated callback ?


We do it all the time in the kernel with no type checking. Just look at
all the users of file->private.


>
> Honestly, I don't think kernel programmers write bug-free code. And I
> include myself when I say that. So the best we can do, on top of code
> review, is to use all the verification and debugging tools available to
> minimize the amount of undetected bugs. Rather than try to find out the
> cause of subtly broken tracepoint callbacks with their runtime
> side-effects, I strongly prefer to let the compiler find this out as
> early as possible.

If it is possible sure, but that's the point. Where do you add the
check? The typecast is in the C code that is constant for all trace
events.

>
> I also don't trust that these complex TRACE_EVENT() preprocessor macros

Thanks for your vote of confidence.

> will never ever have bugs. That's just doomed to happen one day or
> another. Again, call me paranoid if you like, but I think adding this
> type checking is justified.

Where do you add the typecheck?? As I said before, if the TRACE_EVENT()
macros are broken, then so will the typecheck, and it will not catch the
errors.

Sure the event macros can have bugs, but if it does then it will have
bugs for all. Because it is automated. If there is a bug, it wont be
because of a missed type being passed in, it would be because of one of
the extra macros we have that processes the same type incorrectly.

>
> I am providing the type check implementation in a separate email. It
> will need to be extended to support the extra data parameter you plan to
> add.

I saw the patch, but how does it help?

I use "proto" to make the tracepoint and the callback, so I can add
somewhere this "check_trace_callback_type_##name(proto)", but if the
macros break somehow, that means proto changed between two references of
it, but what keeps proto from breaking at both callback creation and the
typecheck.

Basically, you are saying that somehow the argument "proto" can change
between two uses of it. I don't really see that happening, and I'm not
paranoid enough to think that's an issue. Adding checks that don't
really check anything, honestly I find a waste, and just more confusion
in the macros.

-- Steve


> >
> >
> > > >
> > > > The callback is created in include/trace/ftrace.h:
> > > >
> > > > #undef TRACE_EVENT
> > > > #define TRACE_EVENT(name, proto, args, tstuct, assign, print) \
> > > > DECLARE_EVENT_CLASS(name, \
> > > > PARAMS(proto), \
> > > > PARAMS(args), \
> > > > PARAMS(tstruct), \
> > > > PARAMS(assign), \
> > > > PARAMS(print)); \
> > > > DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
> > > >
> > > > #undef DECLARE_EVENT_CLASS
> > > > #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
> > > > \
> > > > static notrace void \
> > > > ftrace_raw_event_##call(proto, \
> > > > struct ftrace_event_call *event_call) \
> > > > [...]
> > > >
> > >
> > > Either within this callback, or in a dummy static function after, we
> > > could add:
> > >
> > > check_trace_##call(ftrace_raw_event_##call);
> > >
> > > So.. you are the preprocessor expert, do you think this could fly ? ;)
> >
> >
> >
> > Sure, the static function you did could be added, and hope that gcc is
> > smart enough to get rid of it (add __unused to it). But what are we
> > really checking here? If CPP works?
> >
> > -- Steve
> >
> >
>

2010-04-30 17:21:20

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 04/10][RFC] tracing: Remove per event trace registering

On Thu, 2010-04-29 at 09:36 -0400, Mathieu Desnoyers wrote:

> > Instead of calling register_trace_##name that is created for each
> > tracepoint, we now call the tracepoint_probe_register() directly in the
> > C file with the generated probe.
> >
> > Both the probe and the tracepoint are created from the same data. I'm
> > not seeing where you want to add this check.
>
> So if they are created from the same data, we can expect this test to
> always pass, which is good (and expected).
>
> I'd add this extra check before casting the callback to (void *) before
> it is passed to tracepoint_probe_register(). Let's just call this
> internal preprocessing macro integrity check. As long as it does not add
> a runtime cost, there is no reason not to put this extra check.

The problem is, the cast is now performed in a C file for all events.
There's no way to know what to cast it to there. This is out of the
automation of the macro.

We use to have the cast check by creating code that would create the
"register_trace_##call", and the typecheck was doing by passing the data
to this function. But we removed this code out of the per event, it was
adding lots of text footprint, and moved it to one single function that
handles all events. It is just expected that the callback created
matches the function it was done.

If you are overly paranoid, we could create a special function that
tests that the callback format that is created matches the tracepoint
that is created, and make it so GCC sees that nothing calls it and
removes it at final link. But I still see this as a waste.


The tracepoint is created in include/linux/tracepoint.h:

#define TRACE_EVENT(name, proto, args, struct, assign, print) \
DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))

The callback is created in include/trace/ftrace.h:

#undef TRACE_EVENT
#define TRACE_EVENT(name, proto, args, tstuct, assign, print) \
DECLARE_EVENT_CLASS(name, \
PARAMS(proto), \
PARAMS(args), \
PARAMS(tstruct), \
PARAMS(assign), \
PARAMS(print)); \
DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));

#undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
\
static notrace void \
ftrace_raw_event_##call(proto, \
struct ftrace_event_call *event_call) \
[...]


Thus the "proto" field of the TRACE_EVENT() is used to make both the
tracepoint and the callback. We add the struct ftrace_event_call
*event_call which is the data we pass to the callback.

Now, where this gets called is in kernel/trace/trace_events.c:

tracepoint_probe_register(call->name,
call->class->probe,
call);

This is where we lose the typecheck. So my question is... where do you
want to put in a check?

-- Steve



2010-04-30 19:06:29

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 04/10][RFC] tracing: Remove per event trace registering

* Steven Rostedt ([email protected]) wrote:
> On Fri, 2010-04-30 at 13:09 -0400, Mathieu Desnoyers wrote:
>
> > How can you be sure that the "void *data" type will match the type at
> > the same position in the generated callback ?
>
>
> We do it all the time in the kernel with no type checking. Just look at
> all the users of file->private.
>
>
> >
> > Honestly, I don't think kernel programmers write bug-free code. And I
> > include myself when I say that. So the best we can do, on top of code
> > review, is to use all the verification and debugging tools available to
> > minimize the amount of undetected bugs. Rather than try to find out the
> > cause of subtly broken tracepoint callbacks with their runtime
> > side-effects, I strongly prefer to let the compiler find this out as
> > early as possible.
>
> If it is possible sure, but that's the point. Where do you add the
> check? The typecast is in the C code that is constant for all trace
> events.

You can add the call to the static inline type check directly within the
generated probe function, right after the local variable declarations.

>
> >
> > I also don't trust that these complex TRACE_EVENT() preprocessor macros
>
> Thanks for your vote of confidence.

Please don't take this personally. As I said above, I include myself in
the list of people I don't trust to write entirely bug-free code. I'm
just saying that we should not overlook a possibility to detect more
bugs automatically when we have one, especially if this results in no
object code change.

>
> > will never ever have bugs. That's just doomed to happen one day or
> > another. Again, call me paranoid if you like, but I think adding this
> > type checking is justified.
>
> Where do you add the typecheck?? As I said before, if the TRACE_EVENT()
> macros are broken, then so will the typecheck, and it will not catch the
> errors.
>
> Sure the event macros can have bugs, but if it does then it will have
> bugs for all. Because it is automated. If there is a bug, it wont be
> because of a missed type being passed in, it would be because of one of
> the extra macros we have that processes the same type incorrectly.
>
> >
> > I am providing the type check implementation in a separate email. It
> > will need to be extended to support the extra data parameter you plan to
> > add.
>
> I saw the patch, but how does it help?
>
> I use "proto" to make the tracepoint and the callback, so I can add
> somewhere this "check_trace_callback_type_##name(proto)", but if the
> macros break somehow, that means proto changed between two references of
> it, but what keeps proto from breaking at both callback creation and the
> typecheck.
>
> Basically, you are saying that somehow the argument "proto" can change
> between two uses of it. I don't really see that happening, and I'm not
> paranoid enough to think that's an issue. Adding checks that don't
> really check anything, honestly I find a waste, and just more confusion
> in the macros.

In the TRACE_EVENT() case, without the extra "void *data" argument,
it is indeed checking that the "proto" of the callback you create is
that same as the "proto" expected by the tracepoint call. However, given
that you plan on adding other parameters besides "proto", then the added
type-checking makes more and more sense.

Thanks,

Mathieu

>
> -- Steve
>
>
> > >
> > >
> > > > >
> > > > > The callback is created in include/trace/ftrace.h:
> > > > >
> > > > > #undef TRACE_EVENT
> > > > > #define TRACE_EVENT(name, proto, args, tstuct, assign, print) \
> > > > > DECLARE_EVENT_CLASS(name, \
> > > > > PARAMS(proto), \
> > > > > PARAMS(args), \
> > > > > PARAMS(tstruct), \
> > > > > PARAMS(assign), \
> > > > > PARAMS(print)); \
> > > > > DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
> > > > >
> > > > > #undef DECLARE_EVENT_CLASS
> > > > > #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
> > > > > \
> > > > > static notrace void \
> > > > > ftrace_raw_event_##call(proto, \
> > > > > struct ftrace_event_call *event_call) \
> > > > > [...]
> > > > >
> > > >
> > > > Either within this callback, or in a dummy static function after, we
> > > > could add:
> > > >
> > > > check_trace_##call(ftrace_raw_event_##call);
> > > >
> > > > So.. you are the preprocessor expert, do you think this could fly ? ;)
> > >
> > >
> > >
> > > Sure, the static function you did could be added, and hope that gcc is
> > > smart enough to get rid of it (add __unused to it). But what are we
> > > really checking here? If CPP works?
> > >
> > > -- Steve
> > >
> > >
> >
>
>

--
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com

2010-04-30 19:48:15

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 04/10][RFC] tracing: Remove per event trace registering

On Fri, 2010-04-30 at 15:06 -0400, Mathieu Desnoyers wrote:

> > If it is possible sure, but that's the point. Where do you add the
> > check? The typecast is in the C code that is constant for all trace
> > events.
>
> You can add the call to the static inline type check directly within the
> generated probe function, right after the local variable declarations.

Well, one thing, the callback is not going to be the same as the
DECLARE_TRACE() because the prototype ends with "void *data", and the
function being called actually uses the type of that data.

We now will have:

DEFINE_TRACE(mytracepoint, int myarg, myarg);

void mycallback(int myarg, struct mystuct *mydata);

register_trace_mytracepoint_data(mycallback, mydata)

There's no place in DEFINE_TRACE to be able to test the type of data
that is being passed back. I could make the calling function be:

void mycallback(int myarg, void *data)
{
struct mystruct *mydata = data;
[...]

Because the data is defined uniquely by the caller that registers a
callback. Each function can register its own data type.

>
> >
> > >
> > > I also don't trust that these complex TRACE_EVENT() preprocessor macros
> >
> > Thanks for your vote of confidence.
>
> Please don't take this personally. As I said above, I include myself in
> the list of people I don't trust to write entirely bug-free code. I'm
> just saying that we should not overlook a possibility to detect more
> bugs automatically when we have one, especially if this results in no
> object code change.

The point being is that this is not about buggy code, but the fact that
the same data is being used in two places, you want to test to make sure
it is the same. I don't see how this helps.



>
> >
> > > will never ever have bugs. That's just doomed to happen one day or
> > > another. Again, call me paranoid if you like, but I think adding this
> > > type checking is justified.
> >
> > Where do you add the typecheck?? As I said before, if the TRACE_EVENT()
> > macros are broken, then so will the typecheck, and it will not catch the
> > errors.
> >
> > Sure the event macros can have bugs, but if it does then it will have
> > bugs for all. Because it is automated. If there is a bug, it wont be
> > because of a missed type being passed in, it would be because of one of
> > the extra macros we have that processes the same type incorrectly.
> >
> > >
> > > I am providing the type check implementation in a separate email. It
> > > will need to be extended to support the extra data parameter you plan to
> > > add.
> >
> > I saw the patch, but how does it help?
> >
> > I use "proto" to make the tracepoint and the callback, so I can add
> > somewhere this "check_trace_callback_type_##name(proto)", but if the
> > macros break somehow, that means proto changed between two references of
> > it, but what keeps proto from breaking at both callback creation and the
> > typecheck.
> >
> > Basically, you are saying that somehow the argument "proto" can change
> > between two uses of it. I don't really see that happening, and I'm not
> > paranoid enough to think that's an issue. Adding checks that don't
> > really check anything, honestly I find a waste, and just more confusion
> > in the macros.
>
> In the TRACE_EVENT() case, without the extra "void *data" argument,
> it is indeed checking that the "proto" of the callback you create is
> that same as the "proto" expected by the tracepoint call. However, given
> that you plan on adding other parameters besides "proto", then the added
> type-checking makes more and more sense.

But you can not test it! That's my point.

The first part of proto will be the same, and that's all we can test.
But the data parameter that the DECLARE_TRACE() is going to create will
be void *. Which means we can't test it. This is something that C lacks,
and we could test it in C++ if we did this with templates. The only way
to test it is at runtime with a magic number in the data field.

This is the same as the file->private data. You can't test it at build
time.

Let me explain this again:

DECLARE_TRACE(name, proto, args);

Will call the function like:

callback(args, data);

The callback will be at best:

int callback(proto, void *data);


because the data being passed in is not defined yet. It is defined at
the point of the registering of the callback. You can have two callbacks
registered to the same tracepoint with two different types as the data
field.

So what is it that this check is testing?

-- Steve

2010-04-30 20:07:56

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 04/10][RFC] tracing: Remove per event trace registering

* Steven Rostedt ([email protected]) wrote:
> On Fri, 2010-04-30 at 15:06 -0400, Mathieu Desnoyers wrote:
>
> > > If it is possible sure, but that's the point. Where do you add the
> > > check? The typecast is in the C code that is constant for all trace
> > > events.
> >
> > You can add the call to the static inline type check directly within the
> > generated probe function, right after the local variable declarations.
>
> Well, one thing, the callback is not going to be the same as the
> DECLARE_TRACE() because the prototype ends with "void *data", and the
> function being called actually uses the type of that data.
>
> We now will have:
>
> DEFINE_TRACE(mytracepoint, int myarg, myarg);
>
> void mycallback(int myarg, struct mystuct *mydata);
>
> register_trace_mytracepoint_data(mycallback, mydata)
>
> There's no place in DEFINE_TRACE to be able to test the type of data
> that is being passed back. I could make the calling function be:
>
> void mycallback(int myarg, void *data)
> {
> struct mystruct *mydata = data;
> [...]
>
> Because the data is defined uniquely by the caller that registers a
> callback. Each function can register its own data type.

Yep. There would need to be a cast from void * to struct mystruct *
at the beginning of the callback as you propose here. I prefer this cast
to be explicit (as proposed here) rather than hidden within the entire
function call (void *) cast.

>
> >
> > >
> > > >
> > > > I also don't trust that these complex TRACE_EVENT() preprocessor macros
> > >
> > > Thanks for your vote of confidence.
> >
> > Please don't take this personally. As I said above, I include myself in
> > the list of people I don't trust to write entirely bug-free code. I'm
> > just saying that we should not overlook a possibility to detect more
> > bugs automatically when we have one, especially if this results in no
> > object code change.
>
> The point being is that this is not about buggy code, but the fact that
> the same data is being used in two places, you want to test to make sure
> it is the same. I don't see how this helps.

See my comment above about specifically casting the void *data parameter
rather than relying on casting of the whole callback function pointer
type to void *.

>
>
>
> >
> > >
> > > > will never ever have bugs. That's just doomed to happen one day or
> > > > another. Again, call me paranoid if you like, but I think adding this
> > > > type checking is justified.
> > >
> > > Where do you add the typecheck?? As I said before, if the TRACE_EVENT()
> > > macros are broken, then so will the typecheck, and it will not catch the
> > > errors.
> > >
> > > Sure the event macros can have bugs, but if it does then it will have
> > > bugs for all. Because it is automated. If there is a bug, it wont be
> > > because of a missed type being passed in, it would be because of one of
> > > the extra macros we have that processes the same type incorrectly.
> > >
> > > >
> > > > I am providing the type check implementation in a separate email. It
> > > > will need to be extended to support the extra data parameter you plan to
> > > > add.
> > >
> > > I saw the patch, but how does it help?
> > >
> > > I use "proto" to make the tracepoint and the callback, so I can add
> > > somewhere this "check_trace_callback_type_##name(proto)", but if the
> > > macros break somehow, that means proto changed between two references of
> > > it, but what keeps proto from breaking at both callback creation and the
> > > typecheck.
> > >
> > > Basically, you are saying that somehow the argument "proto" can change
> > > between two uses of it. I don't really see that happening, and I'm not
> > > paranoid enough to think that's an issue. Adding checks that don't
> > > really check anything, honestly I find a waste, and just more confusion
> > > in the macros.
> >
> > In the TRACE_EVENT() case, without the extra "void *data" argument,
> > it is indeed checking that the "proto" of the callback you create is
> > that same as the "proto" expected by the tracepoint call. However, given
> > that you plan on adding other parameters besides "proto", then the added
> > type-checking makes more and more sense.
>
> But you can not test it! That's my point.
>
> The first part of proto will be the same, and that's all we can test.
> But the data parameter that the DECLARE_TRACE() is going to create will
> be void *. Which means we can't test it. This is something that C lacks,
> and we could test it in C++ if we did this with templates. The only way
> to test it is at runtime with a magic number in the data field.
>
> This is the same as the file->private data. You can't test it at build
> time.
>
> Let me explain this again:
>
> DECLARE_TRACE(name, proto, args);
>
> Will call the function like:
>
> callback(args, data);
>
> The callback will be at best:
>
> int callback(proto, void *data);
>
>
> because the data being passed in is not defined yet. It is defined at
> the point of the registering of the callback. You can have two callbacks
> registered to the same tracepoint with two different types as the data
> field.
>
> So what is it that this check is testing?

It's making sure that TRACE_EVENT() creates callbacks with the following
signature:

void callback(proto, void *data)

rather than

void callback(proto, struct somestruct *data)

and forces the cast to be done within the callback rather than casting
the whole function pointer type to void *, assuming types to match. I
prefer to leave the cast outside of the tracepoint infrastructure, so we
do not obfuscate the fact that an explicit type cast is needed there.

Thanks,

Mathieu

>
> -- Steve
>
>

--
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com

2010-04-30 20:14:48

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 04/10][RFC] tracing: Remove per event trace registering

On Fri, 2010-04-30 at 16:07 -0400, Mathieu Desnoyers wrote:
> * Steven Rostedt ([email protected]) wrote:
> > On Fri, 2010-04-30 at 15:06 -0400, Mathieu Desnoyers wrote:
> >
> > > > If it is possible sure, but that's the point. Where do you add the
> > > > check? The typecast is in the C code that is constant for all trace
> > > > events.
> > >
> > > You can add the call to the static inline type check directly within the
> > > generated probe function, right after the local variable declarations.
> >
> > Well, one thing, the callback is not going to be the same as the
> > DECLARE_TRACE() because the prototype ends with "void *data", and the
> > function being called actually uses the type of that data.
> >
> > We now will have:
> >
> > DEFINE_TRACE(mytracepoint, int myarg, myarg);
> >
> > void mycallback(int myarg, struct mystuct *mydata);
> >
> > register_trace_mytracepoint_data(mycallback, mydata)
> >
> > There's no place in DEFINE_TRACE to be able to test the type of data
> > that is being passed back. I could make the calling function be:
> >
> > void mycallback(int myarg, void *data)
> > {
> > struct mystruct *mydata = data;
> > [...]
> >
> > Because the data is defined uniquely by the caller that registers a
> > callback. Each function can register its own data type.
>
> Yep. There would need to be a cast from void * to struct mystruct *
> at the beginning of the callback as you propose here. I prefer this cast
> to be explicit (as proposed here) rather than hidden within the entire
> function call (void *) cast.
>

OK, so you prefer that, I don't, but I also don't care, so I could
easily change it.


> > Let me explain this again:
> >
> > DECLARE_TRACE(name, proto, args);
> >
> > Will call the function like:
> >
> > callback(args, data);
> >
> > The callback will be at best:
> >
> > int callback(proto, void *data);
> >
> >
> > because the data being passed in is not defined yet. It is defined at
> > the point of the registering of the callback. You can have two callbacks
> > registered to the same tracepoint with two different types as the data
> > field.
> >
> > So what is it that this check is testing?
>
> It's making sure that TRACE_EVENT() creates callbacks with the following
> signature:
>
> void callback(proto, void *data)
>
> rather than
>
> void callback(proto, struct somestruct *data)
>
> and forces the cast to be done within the callback rather than casting
> the whole function pointer type to void *, assuming types to match. I
> prefer to leave the cast outside of the tracepoint infrastructure, so we
> do not obfuscate the fact that an explicit type cast is needed there.

Fine, but I hardly see it as obfuscation. But my question again, even if
we do change this. What is this test testing? To me, it is checking that
CPP works.

-- Steve

2010-04-30 21:02:26

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 04/10][RFC] tracing: Remove per event trace registering

* Steven Rostedt ([email protected]) wrote:
> On Fri, 2010-04-30 at 16:07 -0400, Mathieu Desnoyers wrote:
> > * Steven Rostedt ([email protected]) wrote:
> > > On Fri, 2010-04-30 at 15:06 -0400, Mathieu Desnoyers wrote:
> > >
> > > > > If it is possible sure, but that's the point. Where do you add the
> > > > > check? The typecast is in the C code that is constant for all trace
> > > > > events.
> > > >
> > > > You can add the call to the static inline type check directly within the
> > > > generated probe function, right after the local variable declarations.
> > >
> > > Well, one thing, the callback is not going to be the same as the
> > > DECLARE_TRACE() because the prototype ends with "void *data", and the
> > > function being called actually uses the type of that data.
> > >
> > > We now will have:
> > >
> > > DEFINE_TRACE(mytracepoint, int myarg, myarg);
> > >
> > > void mycallback(int myarg, struct mystuct *mydata);
> > >
> > > register_trace_mytracepoint_data(mycallback, mydata)
> > >
> > > There's no place in DEFINE_TRACE to be able to test the type of data
> > > that is being passed back. I could make the calling function be:
> > >
> > > void mycallback(int myarg, void *data)
> > > {
> > > struct mystruct *mydata = data;
> > > [...]
> > >
> > > Because the data is defined uniquely by the caller that registers a
> > > callback. Each function can register its own data type.
> >
> > Yep. There would need to be a cast from void * to struct mystruct *
> > at the beginning of the callback as you propose here. I prefer this cast
> > to be explicit (as proposed here) rather than hidden within the entire
> > function call (void *) cast.
> >
>
> OK, so you prefer that, I don't, but I also don't care, so I could
> easily change it.
>
>
> > > Let me explain this again:
> > >
> > > DECLARE_TRACE(name, proto, args);
> > >
> > > Will call the function like:
> > >
> > > callback(args, data);
> > >
> > > The callback will be at best:
> > >
> > > int callback(proto, void *data);
> > >
> > >
> > > because the data being passed in is not defined yet. It is defined at
> > > the point of the registering of the callback. You can have two callbacks
> > > registered to the same tracepoint with two different types as the data
> > > field.
> > >
> > > So what is it that this check is testing?
> >
> > It's making sure that TRACE_EVENT() creates callbacks with the following
> > signature:
> >
> > void callback(proto, void *data)
> >
> > rather than
> >
> > void callback(proto, struct somestruct *data)
> >
> > and forces the cast to be done within the callback rather than casting
> > the whole function pointer type to void *, assuming types to match. I
> > prefer to leave the cast outside of the tracepoint infrastructure, so we
> > do not obfuscate the fact that an explicit type cast is needed there.
>
> Fine, but I hardly see it as obfuscation. But my question again, even if
> we do change this. What is this test testing? To me, it is checking that
> CPP works.

It's checking that the macros generated compatible call/callback
prototypes, yes. It comes down to using the compiler type-checking to
double-check that the macros are fine.

Thanks,

Mathieu

>
> -- Steve
>
>

--
Mathieu Desnoyers
Operating System Efficiency R&D Consultant
EfficiOS Inc.
http://www.efficios.com