Hi Frederic,
This series fixes bugs and upgrades kprobe-based event tracer
as a dynamic event tracer on ftrace/perf tools. This also enhances
tracer output format to show each argument name and event name on
each entry.
With this series, users can add trace events dynamically on ftrace
and use those events with perf tools as below.
(Step.1) Define new events under new group
$ echo p:mygroup/myprobe do_sys_open dfd=a0 filename=a1 flags=a2 mode=a3 \
> /debug/tracing/kprobes_events
$ echo r:mygroup/myretprobe do_sys_open rv >> /debug/tracing/kprobes_events
$ cat /debug/tracing/kprobes_events
p:myprobe do_sys_open+0 dfd=a0 filename=a1 flags=a2 mode=a3
r:myretprobe do_sys_open+0 rv=rv
(You can see that each argument has its name.)
(Step.2) Perf shows new events
$ perf list
...
mygroup:myretprobe [Tracepoint event]
mygroup:myprobe [Tracepoint event]
...
(Step.3) Record events with perf
$ perf record -f -e mygroup:myprobe:record -F 1 -a ls
...
[ perf record: Captured and wrote 0.081 MB perf.data (~3544 samples) ]
(Step.4) Perf trace shows the result
$ perf trace
version = 0.5
perf-1405 [000] 0.000000: myprobe: (c04b0a5c) dfd=ffffff9c filename=810d3f7 flags=98800 mode=1
perf-1405 [000] 0.000000: myprobe: (c04b0a5c) dfd=ffffff9c filename=bff7650c flags=8000 mode=1b6
perf-1405 [000] 0.000000: myprobe: (c04b0a5c) dfd=ffffff9c filename=bff7650c flags=98800 mode=bff7450c
perf-1405 [000] 0.000000: myprobe: (c04b0a5c) dfd=ffffff9c filename=bff7650c flags=8000 mode=1b6
(Step.5) You can also use return probes.
$ perf record -f -e mygroup:myretprobe:record -F 1 -a ls
...
[ perf record: Captured and wrote 0.081 MB perf.data (~3544 samples) ]
$ perf trace
version = 0.5
perf-1408 [000] 0.000000: myretprobe: (c04b0a5c <- c04b0b7d) rv=b
perf-1408 [000] 0.000000: myretprobe: (c04b0a5c <- c04b0b7d) rv=c
perf-1408 [000] 0.000000: myretprobe: (c04b0a5c <- c04b0b7d) rv=d
perf-1408 [000] 0.000000: myretprobe: (c04b0a5c <- c04b0b7d) rv=c
TODO:
- Implement perf kprobe command to help defining new probes.
Thank you,
---
Masami Hiramatsu (7):
tracing/kprobes: Support custom subsystem for each kprobe event
tracing/kprobes: Show event name in trace output
tracing/kprobes: Add argument name support
tracing/kprobes: Add event profiling support
tracing/kprobes: Cleanup kprobe tracer code.
tracing/kprobes: Fix probe offset to be unsigned
x86/ptrace: Fix regs_get_argument_nth() to add correct offset
Documentation/trace/kprobetrace.txt | 65 +++---
arch/x86/kernel/ptrace.c | 2
kernel/trace/trace_kprobe.c | 369 +++++++++++++++++++++++------------
3 files changed, 274 insertions(+), 162 deletions(-)
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
Fix regs_get_argument_nth() to add correct offset bytes. Because
offset_of() returns offset in byte, the offset should be added
to char * instead of unsigned long *.
Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Jim Keniston <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Frank Ch. Eigler <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: K.Prasad <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Tom Zanussi <[email protected]>
---
arch/x86/kernel/ptrace.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index a33a17d..caffb68 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -150,7 +150,7 @@ static const int arg_offs_table[] = {
unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
{
if (n < ARRAY_SIZE(arg_offs_table))
- return *((unsigned long *)regs + arg_offs_table[n]);
+ return *(unsigned long *)((char *)regs + arg_offs_table[n]);
else {
/*
* The typical case: arg n is on the stack.
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
Prohibit user to specify negative offset from symbols.
Since kprobe.offset is unsigned int, the offset must be always positive
value.
Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Jim Keniston <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Frank Ch. Eigler <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: K.Prasad <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Tom Zanussi <[email protected]>
---
Documentation/trace/kprobetrace.txt | 14 +++++++-------
kernel/trace/trace_kprobe.c | 19 +++++++------------
2 files changed, 14 insertions(+), 19 deletions(-)
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 3de7517..db55318 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -25,15 +25,15 @@ probe events via /sys/kernel/debug/tracing/events/kprobes/<EVENT>/filter.
Synopsis of kprobe_events
-------------------------
- p[:EVENT] SYMBOL[+offs|-offs]|MEMADDR [FETCHARGS] : Set a probe
- r[:EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
+ p[:EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe
+ r[:EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
- EVENT : Event name. If omitted, the event name is generated
- based on SYMBOL+offs or MEMADDR.
- SYMBOL[+offs|-offs] : Symbol+offset where the probe is inserted.
- MEMADDR : Address where the probe is inserted.
+ EVENT : Event name. If omitted, the event name is generated
+ based on SYMBOL+offs or MEMADDR.
+ SYMBOL[+offs] : Symbol+offset where the probe is inserted.
+ MEMADDR : Address where the probe is inserted.
- FETCHARGS : Arguments. Each probe can have up to 128 args.
+ FETCHARGS : Arguments. Each probe can have up to 128 args.
%REG : Fetch register REG
sN : Fetch Nth entry of stack (N >= 0)
sa : Fetch stack address.
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index f4ec3fc..8491525 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -210,7 +210,7 @@ static __kprobes const char *probe_symbol(struct trace_probe *tp)
return tp->symbol ? tp->symbol : "unknown";
}
-static __kprobes long probe_offset(struct trace_probe *tp)
+static __kprobes unsigned int probe_offset(struct trace_probe *tp)
{
return (probe_is_return(tp)) ? tp->rp.kp.offset : tp->kp.offset;
}
@@ -380,7 +380,7 @@ end:
}
/* Split symbol and offset. */
-static int split_symbol_offset(char *symbol, long *offset)
+static int split_symbol_offset(char *symbol, unsigned long *offset)
{
char *tmp;
int ret;
@@ -389,16 +389,11 @@ static int split_symbol_offset(char *symbol, long *offset)
return -EINVAL;
tmp = strchr(symbol, '+');
- if (!tmp)
- tmp = strchr(symbol, '-');
-
if (tmp) {
/* skip sign because strict_strtol doesn't accept '+' */
- ret = strict_strtol(tmp + 1, 0, offset);
+ ret = strict_strtoul(tmp + 1, 0, offset);
if (ret)
return ret;
- if (*tmp == '-')
- *offset = -(*offset);
*tmp = '\0';
} else
*offset = 0;
@@ -520,7 +515,7 @@ static int create_trace_probe(int argc, char **argv)
{
/*
* Argument syntax:
- * - Add kprobe: p[:EVENT] SYMBOL[+OFFS|-OFFS]|ADDRESS [FETCHARGS]
+ * - Add kprobe: p[:EVENT] SYMBOL[+OFFS]|ADDRESS [FETCHARGS]
* - Add kretprobe: r[:EVENT] SYMBOL[+0] [FETCHARGS]
* Fetch args:
* aN : fetch Nth of function argument. (N:0-)
@@ -539,7 +534,7 @@ static int create_trace_probe(int argc, char **argv)
int i, ret = 0;
int is_return = 0;
char *symbol = NULL, *event = NULL;
- long offset = 0;
+ unsigned long offset = 0;
void *addr = NULL;
if (argc < 2)
@@ -605,7 +600,7 @@ static int create_trace_probe(int argc, char **argv)
if (tp->symbol) {
kp->symbol_name = tp->symbol;
- kp->offset = offset;
+ kp->offset = (unsigned int)offset;
} else
kp->addr = addr;
@@ -675,7 +670,7 @@ static int probes_seq_show(struct seq_file *m, void *v)
seq_printf(m, ":%s", tp->call.name);
if (tp->symbol)
- seq_printf(m, " %s%+ld", probe_symbol(tp), probe_offset(tp));
+ seq_printf(m, " %s+%u", probe_symbol(tp), probe_offset(tp));
else
seq_printf(m, " 0x%p", probe_address(tp));
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
Simplify trace_probe to remove an union, and remove some redundant wrappers.
And also, cleanup create_trace_probe() function.
Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Jim Keniston <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Frank Ch. Eigler <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: K.Prasad <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Tom Zanussi <[email protected]>
---
kernel/trace/trace_kprobe.c | 81 ++++++++++++++++++-------------------------
1 files changed, 34 insertions(+), 47 deletions(-)
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 8491525..902a148 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -180,10 +180,7 @@ static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
struct trace_probe {
struct list_head list;
- union {
- struct kprobe kp;
- struct kretprobe rp;
- };
+ struct kretprobe rp; /* Use rp.kp for kprobe use */
unsigned long nhit;
const char *symbol; /* symbol name */
struct ftrace_event_call call;
@@ -202,7 +199,7 @@ static int kretprobe_trace_func(struct kretprobe_instance *ri,
static __kprobes int probe_is_return(struct trace_probe *tp)
{
- return (tp->rp.handler == kretprobe_trace_func);
+ return tp->rp.handler != NULL;
}
static __kprobes const char *probe_symbol(struct trace_probe *tp)
@@ -210,16 +207,6 @@ static __kprobes const char *probe_symbol(struct trace_probe *tp)
return tp->symbol ? tp->symbol : "unknown";
}
-static __kprobes unsigned int probe_offset(struct trace_probe *tp)
-{
- return (probe_is_return(tp)) ? tp->rp.kp.offset : tp->kp.offset;
-}
-
-static __kprobes void *probe_address(struct trace_probe *tp)
-{
- return (probe_is_return(tp)) ? tp->rp.kp.addr : tp->kp.addr;
-}
-
static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
{
int ret = -EINVAL;
@@ -269,8 +256,14 @@ static void unregister_probe_event(struct trace_probe *tp);
static DEFINE_MUTEX(probe_lock);
static LIST_HEAD(probe_list);
-static struct trace_probe *alloc_trace_probe(const char *symbol,
- const char *event, int nargs)
+/*
+ * Allocate new trace_probe and initialize it (including kprobes).
+ */
+static struct trace_probe *alloc_trace_probe(const char *event,
+ void *addr,
+ const char *symbol,
+ unsigned long offs,
+ int nargs, int is_return)
{
struct trace_probe *tp;
@@ -282,7 +275,16 @@ static struct trace_probe *alloc_trace_probe(const char *symbol,
tp->symbol = kstrdup(symbol, GFP_KERNEL);
if (!tp->symbol)
goto error;
- }
+ tp->rp.kp.symbol_name = tp->symbol;
+ tp->rp.kp.offset = offs;
+ } else
+ tp->rp.kp.addr = addr;
+
+ if (is_return)
+ tp->rp.handler = kretprobe_trace_func;
+ else
+ tp->rp.kp.pre_handler = kprobe_trace_func;
+
if (!event)
goto error;
tp->call.name = kstrdup(event, GFP_KERNEL);
@@ -327,7 +329,7 @@ static void __unregister_trace_probe(struct trace_probe *tp)
if (probe_is_return(tp))
unregister_kretprobe(&tp->rp);
else
- unregister_kprobe(&tp->kp);
+ unregister_kprobe(&tp->rp.kp);
}
/* Unregister a trace_probe and probe_event: call with locking probe_lock */
@@ -349,14 +351,14 @@ static int register_trace_probe(struct trace_probe *tp)
if (probe_is_return(tp))
ret = register_kretprobe(&tp->rp);
else
- ret = register_kprobe(&tp->kp);
+ ret = register_kprobe(&tp->rp.kp);
if (ret) {
pr_warning("Could not insert probe(%d)\n", ret);
if (ret == -EILSEQ) {
pr_warning("Probing address(0x%p) is not an "
"instruction boundary.\n",
- probe_address(tp));
+ tp->rp.kp.addr);
ret = -EINVAL;
}
goto end;
@@ -530,12 +532,12 @@ static int create_trace_probe(int argc, char **argv)
* +|-offs(ARG) : fetch memory at ARG +|- offs address.
*/
struct trace_probe *tp;
- struct kprobe *kp;
int i, ret = 0;
int is_return = 0;
char *symbol = NULL, *event = NULL;
unsigned long offset = 0;
void *addr = NULL;
+ char buf[MAX_EVENT_NAME_LEN];
if (argc < 2)
return -EINVAL;
@@ -577,33 +579,18 @@ static int create_trace_probe(int argc, char **argv)
/* setup a probe */
if (!event) {
/* Make a new event name */
- char buf[MAX_EVENT_NAME_LEN];
if (symbol)
snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
is_return ? 'r' : 'p', symbol, offset);
else
snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
is_return ? 'r' : 'p', addr);
- tp = alloc_trace_probe(symbol, buf, argc);
- } else
- tp = alloc_trace_probe(symbol, event, argc);
+ event = buf;
+ }
+ tp = alloc_trace_probe(event, addr, symbol, offset, argc, is_return);
if (IS_ERR(tp))
return PTR_ERR(tp);
- if (is_return) {
- kp = &tp->rp.kp;
- tp->rp.handler = kretprobe_trace_func;
- } else {
- kp = &tp->kp;
- tp->kp.pre_handler = kprobe_trace_func;
- }
-
- if (tp->symbol) {
- kp->symbol_name = tp->symbol;
- kp->offset = (unsigned int)offset;
- } else
- kp->addr = addr;
-
/* parse arguments */
ret = 0;
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
@@ -670,9 +657,9 @@ static int probes_seq_show(struct seq_file *m, void *v)
seq_printf(m, ":%s", tp->call.name);
if (tp->symbol)
- seq_printf(m, " %s+%u", probe_symbol(tp), probe_offset(tp));
+ seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
else
- seq_printf(m, " 0x%p", probe_address(tp));
+ seq_printf(m, " 0x%p", tp->rp.kp.addr);
for (i = 0; i < tp->nr_args; i++) {
ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
@@ -783,7 +770,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
struct trace_probe *tp = v;
seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
- probe_is_return(tp) ? tp->rp.kp.nmissed : tp->kp.nmissed);
+ tp->rp.kp.nmissed);
return 0;
}
@@ -811,7 +798,7 @@ static const struct file_operations kprobe_profile_ops = {
/* Kprobe handler */
static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
{
- struct trace_probe *tp = container_of(kp, struct trace_probe, kp);
+ struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
struct kprobe_trace_entry *entry;
struct ring_buffer_event *event;
int size, i, pc;
@@ -864,7 +851,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
entry = ring_buffer_event_data(event);
entry->nargs = tp->nr_args;
- entry->func = (unsigned long)probe_address(tp);
+ entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i], regs);
@@ -943,7 +930,7 @@ static int probe_event_enable(struct ftrace_event_call *call)
if (probe_is_return(tp))
return enable_kretprobe(&tp->rp);
else
- return enable_kprobe(&tp->kp);
+ return enable_kprobe(&tp->rp.kp);
}
static void probe_event_disable(struct ftrace_event_call *call)
@@ -953,7 +940,7 @@ static void probe_event_disable(struct ftrace_event_call *call)
if (probe_is_return(tp))
disable_kretprobe(&tp->rp);
else
- disable_kprobe(&tp->kp);
+ disable_kprobe(&tp->rp.kp);
}
static int probe_event_raw_init(struct ftrace_event_call *event_call)
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
Add *probe_profile_enable/disable for supporting perf tool
when CONFIG_PROFILE_EVENT=y.
Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Jim Keniston <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Frank Ch. Eigler <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: K.Prasad <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Tom Zanussi <[email protected]>
---
Documentation/trace/kprobetrace.txt | 4 +
kernel/trace/trace_kprobe.c | 110 ++++++++++++++++++++++++++++++++++-
2 files changed, 111 insertions(+), 3 deletions(-)
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index db55318..8f882eb 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -62,13 +62,15 @@ enabled:
You can enable/disable the probe by writing 1 or 0 on it.
format:
- It shows the format of this probe event. It also shows aliases of arguments
+ This shows the format of this probe event. It also shows aliases of arguments
which you specified to kprobe_events.
filter:
You can write filtering rules of this event. And you can use both of aliase
names and field names for describing filters.
+id:
+ This shows the id of this probe event.
Event Profiling
---------------
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 902a148..21ffb5e 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -28,6 +28,7 @@
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/ptrace.h>
+#include <linux/perf_counter.h>
#include "trace.h"
#include "trace_output.h"
@@ -280,6 +281,7 @@ static struct trace_probe *alloc_trace_probe(const char *event,
} else
tp->rp.kp.addr = addr;
+ /* Set handler here for checking whether this probe is return or not. */
if (is_return)
tp->rp.handler = kretprobe_trace_func;
else
@@ -927,10 +929,13 @@ static int probe_event_enable(struct ftrace_event_call *call)
{
struct trace_probe *tp = (struct trace_probe *)call->data;
- if (probe_is_return(tp))
+ if (probe_is_return(tp)) {
+ tp->rp.handler = kretprobe_trace_func;
return enable_kretprobe(&tp->rp);
- else
+ } else {
+ tp->rp.kp.pre_handler = kprobe_trace_func;
return enable_kprobe(&tp->rp.kp);
+ }
}
static void probe_event_disable(struct ftrace_event_call *call)
@@ -1103,6 +1108,101 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call,
"func, ret_ip");
}
+#ifdef CONFIG_EVENT_PROFILE
+
+/* Kprobe profile handler */
+static __kprobes int kprobe_profile_func(struct kprobe *kp,
+ struct pt_regs *regs)
+{
+ struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
+ struct ftrace_event_call *call = &tp->call;
+ struct kprobe_trace_entry *entry;
+ int size, i, pc;
+ unsigned long irq_flags;
+
+ local_save_flags(irq_flags);
+ pc = preempt_count();
+
+ size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
+
+ do {
+ char raw_data[size];
+ struct trace_entry *ent;
+
+ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+ entry = (struct kprobe_trace_entry *)raw_data;
+ ent = &entry->ent;
+
+ tracing_generic_entry_update(ent, irq_flags, pc);
+ ent->type = call->id;
+ entry->nargs = tp->nr_args;
+ entry->ip = (unsigned long)kp->addr;
+ for (i = 0; i < tp->nr_args; i++)
+ entry->args[i] = call_fetch(&tp->args[i], regs);
+ perf_tpcounter_event(call->id, entry->ip, 1, entry, size);
+ } while (0);
+ return 0;
+}
+
+/* Kretprobe profile handler */
+static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
+ struct ftrace_event_call *call = &tp->call;
+ struct kretprobe_trace_entry *entry;
+ int size, i, pc;
+ unsigned long irq_flags;
+
+ local_save_flags(irq_flags);
+ pc = preempt_count();
+
+ size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
+
+ do {
+ char raw_data[size];
+ struct trace_entry *ent;
+
+ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+ entry = (struct kretprobe_trace_entry *)raw_data;
+ ent = &entry->ent;
+
+ tracing_generic_entry_update(ent, irq_flags, pc);
+ ent->type = call->id;
+ entry->nargs = tp->nr_args;
+ entry->func = (unsigned long)tp->rp.kp.addr;
+ entry->ret_ip = (unsigned long)ri->ret_addr;
+ for (i = 0; i < tp->nr_args; i++)
+ entry->args[i] = call_fetch(&tp->args[i], regs);
+ perf_tpcounter_event(call->id, entry->ret_ip, 1, entry, size);
+ } while (0);
+ return 0;
+}
+
+static int probe_profile_enable(struct ftrace_event_call *call)
+{
+ struct trace_probe *tp = (struct trace_probe *)call->data;
+
+ if (atomic_inc_return(&call->profile_count))
+ return 0;
+
+ if (probe_is_return(tp)) {
+ tp->rp.handler = kretprobe_profile_func;
+ return enable_kretprobe(&tp->rp);
+ } else {
+ tp->rp.kp.pre_handler = kprobe_profile_func;
+ return enable_kprobe(&tp->rp.kp);
+ }
+}
+
+static void probe_profile_disable(struct ftrace_event_call *call)
+{
+ if (atomic_add_negative(-1, &call->profile_count))
+ probe_event_disable(call);
+}
+
+#endif /* CONFIG_EVENT_PROFILE */
+
static int register_probe_event(struct trace_probe *tp)
{
struct ftrace_event_call *call = &tp->call;
@@ -1128,6 +1228,12 @@ static int register_probe_event(struct trace_probe *tp)
call->enabled = 1;
call->regfunc = probe_event_enable;
call->unregfunc = probe_event_disable;
+
+#ifdef CONFIG_EVENT_PROFILE
+ atomic_set(&call->profile_count, -1);
+ call->profile_enable = probe_profile_enable;
+ call->profile_disable = probe_profile_disable;
+#endif
call->data = tp;
ret = trace_add_event_call(call);
if (ret) {
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
Add argument name assignment support and remove "alias" lines from format.
This allows user to assign unique name to each argument. For example,
$ echo p do_sys_open dfd=a0 filename=a1 flags=a2 mode=a3 > kprobe_events
This assigns dfd, filename, flags, and mode to 1st - 4th arguments
respectively. Trace buffer shows those names too.
<...>-1439 [000] 1200885.933147: do_sys_open+0x0/0xdf: dfd=ffffff9c filename=bfa898ac flags=8000 mode=0
This helps users to know what each value means.
Users can filter each events by these names too. Note that you can not
filter by argN anymore.
Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Jim Keniston <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Frank Ch. Eigler <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: K.Prasad <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Tom Zanussi <[email protected]>
---
Documentation/trace/kprobetrace.txt | 46 +++++--------
kernel/trace/trace_kprobe.c | 128 ++++++++++++++++++-----------------
2 files changed, 84 insertions(+), 90 deletions(-)
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 8f882eb..55a8034 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -42,7 +42,8 @@ Synopsis of kprobe_events
aN : Fetch function argument. (N >= 0)(*)
rv : Fetch return value.(**)
ra : Fetch return address.(**)
- +|-offs(FETCHARG) : fetch memory at FETCHARG +|- offs address.(***)
+ +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***)
+ NAME=FETCHARG: Set NAME as the argument name of FETCHARG.
(*) aN may not correct on asmlinkaged functions and at the middle of
function body.
@@ -62,12 +63,10 @@ enabled:
You can enable/disable the probe by writing 1 or 0 on it.
format:
- This shows the format of this probe event. It also shows aliases of arguments
- which you specified to kprobe_events.
+ This shows the format of this probe event.
filter:
- You can write filtering rules of this event. And you can use both of aliase
- names and field names for describing filters.
+ You can write filtering rules of this event.
id:
This shows the id of this probe event.
@@ -85,10 +84,11 @@ Usage examples
To add a probe as a new event, write a new definition to kprobe_events
as below.
- echo p:myprobe do_sys_open a0 a1 a2 a3 > /sys/kernel/debug/tracing/kprobe_events
+ echo p:myprobe do_sys_open dfd=a0 filename=a1 flags=a2 mode=a3 > /sys/kernel/debug/tracing/kprobe_events
This sets a kprobe on the top of do_sys_open() function with recording
-1st to 4th arguments as "myprobe" event.
+1st to 4th arguments as "myprobe" event. As this example shows, users can
+choose more familier name for each arguments.
echo r:myretprobe do_sys_open rv ra >> /sys/kernel/debug/tracing/kprobe_events
@@ -99,7 +99,7 @@ recording return value and return address as "myretprobe" event.
cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format
name: myprobe
-ID: 23
+ID: 75
format:
field:unsigned short common_type; offset:0; size:2;
field:unsigned char common_flags; offset:2; size:1;
@@ -109,21 +109,15 @@ format:
field: unsigned long ip; offset:16;tsize:8;
field: int nargs; offset:24;tsize:4;
- field: unsigned long arg0; offset:32;tsize:8;
- field: unsigned long arg1; offset:40;tsize:8;
- field: unsigned long arg2; offset:48;tsize:8;
- field: unsigned long arg3; offset:56;tsize:8;
+ field: unsigned long dfd; offset:32;tsize:8;
+ field: unsigned long filename; offset:40;tsize:8;
+ field: unsigned long flags; offset:48;tsize:8;
+ field: unsigned long mode; offset:56;tsize:8;
- alias: a0; original: arg0;
- alias: a1; original: arg1;
- alias: a2; original: arg2;
- alias: a3; original: arg3;
+print fmt: "%lx: dfd=%lx filename=%lx flags=%lx mode=%lx", ip, REC->dfd, REC->filename, REC->flags, REC->mode
-print fmt: "%lx: 0x%lx 0x%lx 0x%lx 0x%lx", ip, arg0, arg1, arg2, arg3
-
- You can see that the event has 4 arguments and alias expressions
-corresponding to it.
+ You can see that the event has 4 arguments as expressions you specified.
echo > /sys/kernel/debug/tracing/kprobe_events
@@ -135,12 +129,12 @@ corresponding to it.
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
- <...>-1447 [001] 1038282.286875: do_sys_open+0x0/0xd6: 0x3 0x7fffd1ec4440 0x8000 0x0
- <...>-1447 [001] 1038282.286878: sys_openat+0xc/0xe <- do_sys_open: 0xfffffffffffffffe 0xffffffff81367a3a
- <...>-1447 [001] 1038282.286885: do_sys_open+0x0/0xd6: 0xffffff9c 0x40413c 0x8000 0x1b6
- <...>-1447 [001] 1038282.286915: sys_open+0x1b/0x1d <- do_sys_open: 0x3 0xffffffff81367a3a
- <...>-1447 [001] 1038282.286969: do_sys_open+0x0/0xd6: 0xffffff9c 0x4041c6 0x98800 0x10
- <...>-1447 [001] 1038282.286976: sys_open+0x1b/0x1d <- do_sys_open: 0x3 0xffffffff81367a3a
+ <...>-1447 [001] 1038282.286875: do_sys_open+0x0/0xd6: dfd=3 filename=7fffd1ec4440 flags=8000 mode=0
+ <...>-1447 [001] 1038282.286878: sys_openat+0xc/0xe <- do_sys_open: rv=fffffffffffffffe ra=ffffffff81367a3a
+ <...>-1447 [001] 1038282.286885: do_sys_open+0x0/0xd6: dfd=ffffff9c filename=40413c flags=8000 mode=1b6
+ <...>-1447 [001] 1038282.286915: sys_open+0x1b/0x1d <- do_sys_open: rv=3 ra=ffffffff81367a3a
+ <...>-1447 [001] 1038282.286969: do_sys_open+0x0/0xd6: dfd=ffffff9c filename=4041c6 flags=98800 mode=10
+ <...>-1447 [001] 1038282.286976: sys_open+0x1b/0x1d <- do_sys_open: rv=3 ra=ffffffff81367a3a
Each line shows when the kernel hits a probe, and <- SYMBOL means kernel
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 21ffb5e..6b88acd 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -176,9 +176,14 @@ static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
}
/**
- * kprobe_trace_core
+ * Kprobe tracer core functions
*/
+struct probe_arg {
+ struct fetch_func fetch;
+ const char *name;
+};
+
struct trace_probe {
struct list_head list;
struct kretprobe rp; /* Use rp.kp for kprobe use */
@@ -187,12 +192,12 @@ struct trace_probe {
struct ftrace_event_call call;
struct trace_event event;
unsigned int nr_args;
- struct fetch_func args[];
+ struct probe_arg args[];
};
#define SIZEOF_TRACE_PROBE(n) \
(offsetof(struct trace_probe, args) + \
- (sizeof(struct fetch_func) * (n)))
+ (sizeof(struct probe_arg) * (n)))
static int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_trace_func(struct kretprobe_instance *ri,
@@ -301,15 +306,21 @@ error:
return ERR_PTR(-ENOMEM);
}
+static void free_probe_arg(struct probe_arg *arg)
+{
+ if (arg->fetch.func == fetch_symbol)
+ free_symbol_cache(arg->fetch.data);
+ else if (arg->fetch.func == fetch_indirect)
+ free_indirect_fetch_data(arg->fetch.data);
+ kfree(arg->name);
+}
+
static void free_trace_probe(struct trace_probe *tp)
{
int i;
for (i = 0; i < tp->nr_args; i++)
- if (tp->args[i].func == fetch_symbol)
- free_symbol_cache(tp->args[i].data);
- else if (tp->args[i].func == fetch_indirect)
- free_indirect_fetch_data(tp->args[i].data);
+ free_probe_arg(&tp->args[i]);
kfree(tp->call.name);
kfree(tp->symbol);
@@ -532,11 +543,13 @@ static int create_trace_probe(int argc, char **argv)
* %REG : fetch register REG
* Indirect memory fetch:
* +|-offs(ARG) : fetch memory at ARG +|- offs address.
+ * Alias name of args:
+ * NAME=FETCHARG : set NAME as alias of FETCHARG.
*/
struct trace_probe *tp;
int i, ret = 0;
int is_return = 0;
- char *symbol = NULL, *event = NULL;
+ char *symbol = NULL, *event = NULL, *arg = NULL;
unsigned long offset = 0;
void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN];
@@ -596,12 +609,21 @@ static int create_trace_probe(int argc, char **argv)
/* parse arguments */
ret = 0;
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
- if (strlen(argv[i]) > MAX_ARGSTR_LEN) {
- pr_info("Argument%d(%s) is too long.\n", i, argv[i]);
+ /* Parse argument name */
+ arg = strchr(argv[i], '=');
+ if (arg)
+ *arg++ = '\0';
+ else
+ arg = argv[i];
+ tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
+
+ /* Parse fetch argument */
+ if (strlen(arg) > MAX_ARGSTR_LEN) {
+ pr_info("Argument%d(%s) is too long.\n", i, arg);
ret = -ENOSPC;
goto error;
}
- ret = parse_probe_arg(argv[i], &tp->args[i], is_return);
+ ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
if (ret)
goto error;
}
@@ -664,12 +686,12 @@ static int probes_seq_show(struct seq_file *m, void *v)
seq_printf(m, " 0x%p", tp->rp.kp.addr);
for (i = 0; i < tp->nr_args; i++) {
- ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
+ ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
if (ret < 0) {
pr_warning("Argument%d decoding error(%d).\n", i, ret);
return ret;
}
- seq_printf(m, " %s", buf);
+ seq_printf(m, " %s=%s", tp->args[i].name, buf);
}
seq_printf(m, "\n");
return 0;
@@ -823,7 +845,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
entry->nargs = tp->nr_args;
entry->ip = (unsigned long)kp->addr;
for (i = 0; i < tp->nr_args; i++)
- entry->args[i] = call_fetch(&tp->args[i], regs);
+ entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
if (!filter_current_check_discard(call, entry, event))
trace_nowake_buffer_unlock_commit(event, irq_flags, pc);
@@ -856,7 +878,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
for (i = 0; i < tp->nr_args; i++)
- entry->args[i] = call_fetch(&tp->args[i], regs);
+ entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
if (!filter_current_check_discard(call, entry, event))
trace_nowake_buffer_unlock_commit(event, irq_flags, pc);
@@ -870,9 +892,13 @@ print_kprobe_event(struct trace_iterator *iter, int flags)
{
struct kprobe_trace_entry *field;
struct trace_seq *s = &iter->seq;
+ struct trace_event *event;
+ struct trace_probe *tp;
int i;
field = (struct kprobe_trace_entry *)iter->ent;
+ event = ftrace_find_event(field->ent.type);
+ tp = container_of(event, struct trace_probe, event);
if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
goto partial;
@@ -881,7 +907,8 @@ print_kprobe_event(struct trace_iterator *iter, int flags)
goto partial;
for (i = 0; i < field->nargs; i++)
- if (!trace_seq_printf(s, " 0x%lx", field->args[i]))
+ if (!trace_seq_printf(s, " %s=%lx",
+ tp->args[i].name, field->args[i]))
goto partial;
if (!trace_seq_puts(s, "\n"))
@@ -897,9 +924,13 @@ print_kretprobe_event(struct trace_iterator *iter, int flags)
{
struct kretprobe_trace_entry *field;
struct trace_seq *s = &iter->seq;
+ struct trace_event *event;
+ struct trace_probe *tp;
int i;
field = (struct kretprobe_trace_entry *)iter->ent;
+ event = ftrace_find_event(field->ent.type);
+ tp = container_of(event, struct trace_probe, event);
if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
goto partial;
@@ -914,7 +945,8 @@ print_kretprobe_event(struct trace_iterator *iter, int flags)
goto partial;
for (i = 0; i < field->nargs; i++)
- if (!trace_seq_printf(s, " 0x%lx", field->args[i]))
+ if (!trace_seq_printf(s, " %s=%lx",
+ tp->args[i].name, field->args[i]))
goto partial;
if (!trace_seq_puts(s, "\n"))
@@ -970,7 +1002,6 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
{
int ret, i;
struct kprobe_trace_entry field;
- char buf[MAX_ARGSTR_LEN + 1];
struct trace_probe *tp = (struct trace_probe *)event_call->data;
ret = trace_define_common_fields(event_call);
@@ -979,16 +1010,9 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
DEFINE_FIELD(unsigned long, ip, "ip", 0);
DEFINE_FIELD(int, nargs, "nargs", 1);
- for (i = 0; i < tp->nr_args; i++) {
- /* Set argN as a field */
- sprintf(buf, "arg%d", i);
- DEFINE_FIELD(unsigned long, args[i], buf, 0);
- /* Set argument string as an alias field */
- ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
- if (ret < 0)
- return ret;
- DEFINE_FIELD(unsigned long, args[i], buf, 0);
- }
+ /* Set argument names as fields */
+ for (i = 0; i < tp->nr_args; i++)
+ DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
return 0;
}
@@ -996,7 +1020,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
{
int ret, i;
struct kretprobe_trace_entry field;
- char buf[MAX_ARGSTR_LEN + 1];
struct trace_probe *tp = (struct trace_probe *)event_call->data;
ret = trace_define_common_fields(event_call);
@@ -1006,16 +1029,9 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
DEFINE_FIELD(unsigned long, func, "func", 0);
DEFINE_FIELD(unsigned long, ret_ip, "ret_ip", 0);
DEFINE_FIELD(int, nargs, "nargs", 1);
- for (i = 0; i < tp->nr_args; i++) {
- /* Set argN as a field */
- sprintf(buf, "arg%d", i);
- DEFINE_FIELD(unsigned long, args[i], buf, 0);
- /* Set argument string as an alias field */
- ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
- if (ret < 0)
- return ret;
- DEFINE_FIELD(unsigned long, args[i], buf, 0);
- }
+ /* Set argument names as fields */
+ for (i = 0; i < tp->nr_args; i++)
+ DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
return 0;
}
@@ -1023,31 +1039,21 @@ static int __probe_event_show_format(struct trace_seq *s,
struct trace_probe *tp, const char *fmt,
const char *arg)
{
- int i, ret;
- char buf[MAX_ARGSTR_LEN + 1];
+ int i;
- /* Show aliases */
- for (i = 0; i < tp->nr_args; i++) {
- ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
- if (ret < 0)
- return ret;
- if (!trace_seq_printf(s, "\talias: %s;\toriginal: arg%d;\n",
- buf, i))
- return 0;
- }
/* Show format */
if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
return 0;
for (i = 0; i < tp->nr_args; i++)
- if (!trace_seq_puts(s, " 0x%lx"))
+ if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
return 0;
if (!trace_seq_printf(s, "\", %s", arg))
return 0;
for (i = 0; i < tp->nr_args; i++)
- if (!trace_seq_printf(s, ", arg%d", i))
+ if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
return 0;
return trace_seq_puts(s, "\n");
@@ -1069,17 +1075,14 @@ static int kprobe_event_show_format(struct ftrace_event_call *call,
{
struct kprobe_trace_entry field __attribute__((unused));
int ret, i;
- char buf[8];
struct trace_probe *tp = (struct trace_probe *)call->data;
SHOW_FIELD(unsigned long, ip, "ip");
SHOW_FIELD(int, nargs, "nargs");
/* Show fields */
- for (i = 0; i < tp->nr_args; i++) {
- sprintf(buf, "arg%d", i);
- SHOW_FIELD(unsigned long, args[i], buf);
- }
+ for (i = 0; i < tp->nr_args; i++)
+ SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
trace_seq_puts(s, "\n");
return __probe_event_show_format(s, tp, "%lx:", "ip");
@@ -1090,7 +1093,6 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call,
{
struct kretprobe_trace_entry field __attribute__((unused));
int ret, i;
- char buf[8];
struct trace_probe *tp = (struct trace_probe *)call->data;
SHOW_FIELD(unsigned long, func, "func");
@@ -1098,10 +1100,8 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call,
SHOW_FIELD(int, nargs, "nargs");
/* Show fields */
- for (i = 0; i < tp->nr_args; i++) {
- sprintf(buf, "arg%d", i);
- SHOW_FIELD(unsigned long, args[i], buf);
- }
+ for (i = 0; i < tp->nr_args; i++)
+ SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
trace_seq_puts(s, "\n");
return __probe_event_show_format(s, tp, "%lx <- %lx:",
@@ -1138,7 +1138,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
entry->nargs = tp->nr_args;
entry->ip = (unsigned long)kp->addr;
for (i = 0; i < tp->nr_args; i++)
- entry->args[i] = call_fetch(&tp->args[i], regs);
+ entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
perf_tpcounter_event(call->id, entry->ip, 1, entry, size);
} while (0);
return 0;
@@ -1173,7 +1173,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
for (i = 0; i < tp->nr_args; i++)
- entry->args[i] = call_fetch(&tp->args[i], regs);
+ entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
perf_tpcounter_event(call->id, entry->ret_ip, 1, entry, size);
} while (0);
return 0;
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
Show event name in tracing/trace output. This also fixes kprobes events format
as same as other tracepoint events.
Before patching:
<...>-1447 [001] 1038282.286875: do_sys_open+0x0/0xd6: ...
<...>-1447 [001] 1038282.286878: sys_openat+0xc/0xe <- do_sys_open: ...
After patching:
<...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) ...
<...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) ...
Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Jim Keniston <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Frank Ch. Eigler <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: K.Prasad <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Tom Zanussi <[email protected]>
---
Documentation/trace/kprobetrace.txt | 16 ++++++++--------
kernel/trace/trace_kprobe.c | 16 +++++++++++-----
2 files changed, 19 insertions(+), 13 deletions(-)
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 55a8034..ed152a1 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -114,7 +114,7 @@ format:
field: unsigned long flags; offset:48;tsize:8;
field: unsigned long mode; offset:56;tsize:8;
-print fmt: "%lx: dfd=%lx filename=%lx flags=%lx mode=%lx", ip, REC->dfd, REC->filename, REC->flags, REC->mode
+print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode
You can see that the event has 4 arguments as expressions you specified.
@@ -129,15 +129,15 @@ print fmt: "%lx: dfd=%lx filename=%lx flags=%lx mode=%lx", ip, REC->dfd, REC->fi
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
- <...>-1447 [001] 1038282.286875: do_sys_open+0x0/0xd6: dfd=3 filename=7fffd1ec4440 flags=8000 mode=0
- <...>-1447 [001] 1038282.286878: sys_openat+0xc/0xe <- do_sys_open: rv=fffffffffffffffe ra=ffffffff81367a3a
- <...>-1447 [001] 1038282.286885: do_sys_open+0x0/0xd6: dfd=ffffff9c filename=40413c flags=8000 mode=1b6
- <...>-1447 [001] 1038282.286915: sys_open+0x1b/0x1d <- do_sys_open: rv=3 ra=ffffffff81367a3a
- <...>-1447 [001] 1038282.286969: do_sys_open+0x0/0xd6: dfd=ffffff9c filename=4041c6 flags=98800 mode=10
- <...>-1447 [001] 1038282.286976: sys_open+0x1b/0x1d <- do_sys_open: rv=3 ra=ffffffff81367a3a
+ <...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) dfd=3 filename=7fffd1ec4440 flags=8000 mode=0
+ <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) rv=fffffffffffffffe ra=ffffffff81367a3a
+ <...>-1447 [001] 1038282.286885: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=40413c flags=8000 mode=1b6
+ <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) rv=3 ra=ffffffff81367a3a
+ <...>-1447 [001] 1038282.286969: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=4041c6 flags=98800 mode=10
+ <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) rv=3 ra=ffffffff81367a3a
- Each line shows when the kernel hits a probe, and <- SYMBOL means kernel
+ Each line shows when the kernel hits an event, and <- SYMBOL means kernel
returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel
returns from do_sys_open to sys_open+0x1b).
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 6b88acd..486c229 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -900,10 +900,13 @@ print_kprobe_event(struct trace_iterator *iter, int flags)
event = ftrace_find_event(field->ent.type);
tp = container_of(event, struct trace_probe, event);
+ if (!trace_seq_printf(s, "%s: (", tp->call.name))
+ goto partial;
+
if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
goto partial;
- if (!trace_seq_puts(s, ":"))
+ if (!trace_seq_puts(s, ")"))
goto partial;
for (i = 0; i < field->nargs; i++)
@@ -932,6 +935,9 @@ print_kretprobe_event(struct trace_iterator *iter, int flags)
event = ftrace_find_event(field->ent.type);
tp = container_of(event, struct trace_probe, event);
+ if (!trace_seq_printf(s, "%s: (", tp->call.name))
+ goto partial;
+
if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
goto partial;
@@ -941,7 +947,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags)
if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
goto partial;
- if (!trace_seq_puts(s, ":"))
+ if (!trace_seq_puts(s, ")"))
goto partial;
for (i = 0; i < field->nargs; i++)
@@ -1085,7 +1091,7 @@ static int kprobe_event_show_format(struct ftrace_event_call *call,
SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
trace_seq_puts(s, "\n");
- return __probe_event_show_format(s, tp, "%lx:", "ip");
+ return __probe_event_show_format(s, tp, "(%lx)", "REC->ip");
}
static int kretprobe_event_show_format(struct ftrace_event_call *call,
@@ -1104,8 +1110,8 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call,
SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
trace_seq_puts(s, "\n");
- return __probe_event_show_format(s, tp, "%lx <- %lx:",
- "func, ret_ip");
+ return __probe_event_show_format(s, tp, "(%lx <- %lx)",
+ "REC->func, REC->ret_ip");
}
#ifdef CONFIG_EVENT_PROFILE
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
Support specifying a custom subsystem(group) for each kprobe event.
This allows users to create new group for controlling several probes
at once, or add events to existing groups as additional tracepoints.
New synopsis:
p[:[subsys/]event-name] KADDR|KSYM[+offs] [ARGS]
Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Jim Keniston <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Frank Ch. Eigler <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: K.Prasad <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Tom Zanussi <[email protected]>
---
Documentation/trace/kprobetrace.txt | 5 +++--
kernel/trace/trace_kprobe.c | 33 +++++++++++++++++++++++++++------
2 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index ed152a1..af9ae49 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -25,9 +25,10 @@ probe events via /sys/kernel/debug/tracing/events/kprobes/<EVENT>/filter.
Synopsis of kprobe_events
-------------------------
- p[:EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe
- r[:EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
+ p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe
+ r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
+ GRP : Group name. If omitted, use "kprobes" for it.
EVENT : Event name. If omitted, the event name is generated
based on SYMBOL+offs or MEMADDR.
SYMBOL[+offs] : Symbol+offset where the probe is inserted.
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 486c229..cb3627b 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -36,6 +36,7 @@
#define MAX_TRACE_ARGS 128
#define MAX_ARGSTR_LEN 63
#define MAX_EVENT_NAME_LEN 64
+#define KPROBE_EVENT_SYSTEM "kprobes"
/* currently, trace_kprobe only supports X86. */
@@ -265,7 +266,8 @@ static LIST_HEAD(probe_list);
/*
* Allocate new trace_probe and initialize it (including kprobes).
*/
-static struct trace_probe *alloc_trace_probe(const char *event,
+static struct trace_probe *alloc_trace_probe(const char *group,
+ const char *event,
void *addr,
const char *symbol,
unsigned long offs,
@@ -298,9 +300,16 @@ static struct trace_probe *alloc_trace_probe(const char *event,
if (!tp->call.name)
goto error;
+ if (!group)
+ goto error;
+ tp->call.system = kstrdup(group, GFP_KERNEL);
+ if (!tp->call.system)
+ goto error;
+
INIT_LIST_HEAD(&tp->list);
return tp;
error:
+ kfree(tp->call.name);
kfree(tp->symbol);
kfree(tp);
return ERR_PTR(-ENOMEM);
@@ -322,6 +331,7 @@ static void free_trace_probe(struct trace_probe *tp)
for (i = 0; i < tp->nr_args; i++)
free_probe_arg(&tp->args[i]);
+ kfree(tp->call.system);
kfree(tp->call.name);
kfree(tp->symbol);
kfree(tp);
@@ -530,8 +540,8 @@ static int create_trace_probe(int argc, char **argv)
{
/*
* Argument syntax:
- * - Add kprobe: p[:EVENT] SYMBOL[+OFFS]|ADDRESS [FETCHARGS]
- * - Add kretprobe: r[:EVENT] SYMBOL[+0] [FETCHARGS]
+ * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
+ * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
* Fetch args:
* aN : fetch Nth of function argument. (N:0-)
* rv : fetch return value
@@ -549,7 +559,7 @@ static int create_trace_probe(int argc, char **argv)
struct trace_probe *tp;
int i, ret = 0;
int is_return = 0;
- char *symbol = NULL, *event = NULL, *arg = NULL;
+ char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
unsigned long offset = 0;
void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN];
@@ -566,6 +576,15 @@ static int create_trace_probe(int argc, char **argv)
if (argv[0][1] == ':') {
event = &argv[0][2];
+ if (strchr(event, '/')) {
+ group = event;
+ event = strchr(group, '/') + 1;
+ event[-1] = '\0';
+ if (strlen(group) == 0) {
+ pr_info("Group name is not specifiled\n");
+ return -EINVAL;
+ }
+ }
if (strlen(event) == 0) {
pr_info("Event name is not specifiled\n");
return -EINVAL;
@@ -592,6 +611,8 @@ static int create_trace_probe(int argc, char **argv)
argc -= 2; argv += 2;
/* setup a probe */
+ if (!group)
+ group = KPROBE_EVENT_SYSTEM;
if (!event) {
/* Make a new event name */
if (symbol)
@@ -602,7 +623,8 @@ static int create_trace_probe(int argc, char **argv)
is_return ? 'r' : 'p', addr);
event = buf;
}
- tp = alloc_trace_probe(event, addr, symbol, offset, argc, is_return);
+ tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
+ is_return);
if (IS_ERR(tp))
return PTR_ERR(tp);
@@ -1215,7 +1237,6 @@ static int register_probe_event(struct trace_probe *tp)
int ret;
/* Initialize ftrace_event_call */
- call->system = "kprobes";
if (probe_is_return(tp)) {
tp->event.trace = print_kretprobe_event;
call->raw_init = probe_event_raw_init;
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
On Thu, Sep 10, 2009 at 07:52:58PM -0400, Masami Hiramatsu wrote:
> Hi Frederic,
>
> This series fixes bugs and upgrades kprobe-based event tracer
> as a dynamic event tracer on ftrace/perf tools. This also enhances
> tracer output format to show each argument name and event name on
> each entry.
>
> With this series, users can add trace events dynamically on ftrace
> and use those events with perf tools as below.
>
> (Step.1) Define new events under new group
>
> $ echo p:mygroup/myprobe do_sys_open dfd=a0 filename=a1 flags=a2 mode=a3 \
> > /debug/tracing/kprobes_events
> $ echo r:mygroup/myretprobe do_sys_open rv >> /debug/tracing/kprobes_events
> $ cat /debug/tracing/kprobes_events
> p:myprobe do_sys_open+0 dfd=a0 filename=a1 flags=a2 mode=a3
> r:myretprobe do_sys_open+0 rv=rv
>
> (You can see that each argument has its name.)
>
>
> (Step.2) Perf shows new events
>
> $ perf list
> ...
> mygroup:myretprobe [Tracepoint event]
> mygroup:myprobe [Tracepoint event]
> ...
>
>
> (Step.3) Record events with perf
>
> $ perf record -f -e mygroup:myprobe:record -F 1 -a ls
> ...
> [ perf record: Captured and wrote 0.081 MB perf.data (~3544 samples) ]
>
>
> (Step.4) Perf trace shows the result
>
> $ perf trace
> version = 0.5
> perf-1405 [000] 0.000000: myprobe: (c04b0a5c) dfd=ffffff9c filename=810d3f7 flags=98800 mode=1
> perf-1405 [000] 0.000000: myprobe: (c04b0a5c) dfd=ffffff9c filename=bff7650c flags=8000 mode=1b6
> perf-1405 [000] 0.000000: myprobe: (c04b0a5c) dfd=ffffff9c filename=bff7650c flags=98800 mode=bff7450c
> perf-1405 [000] 0.000000: myprobe: (c04b0a5c) dfd=ffffff9c filename=bff7650c flags=8000 mode=1b6
Nice!
May be another step in the todo-list that would be nice: define the format
for a type. Like it's done from ftrace events.
>
>
> (Step.5) You can also use return probes.
>
> $ perf record -f -e mygroup:myretprobe:record -F 1 -a ls
> ...
> [ perf record: Captured and wrote 0.081 MB perf.data (~3544 samples) ]
> $ perf trace
> version = 0.5
> perf-1408 [000] 0.000000: myretprobe: (c04b0a5c <- c04b0b7d) rv=b
> perf-1408 [000] 0.000000: myretprobe: (c04b0a5c <- c04b0b7d) rv=c
> perf-1408 [000] 0.000000: myretprobe: (c04b0a5c <- c04b0b7d) rv=d
> perf-1408 [000] 0.000000: myretprobe: (c04b0a5c <- c04b0b7d) rv=c
>
>
> TODO:
> - Implement perf kprobe command to help defining new probes.
Yeah!
I wonder what could be the best workflow to use it.
Imagine the following steps:
- perf kprobe = define kprobes using C expression
- perf record -e our_kprobes
- perf trace
That's way too much.
Especially it's sad to be forced to define a kprobe, then
get back its name, use it with record, and eventually
unsheathe perf trace.
I guess we should choose between the low level, very granular
but uninviting method "kprobe + record + trace" and also an all
in one quick approach.
And that could be chosen from perf kprobe:
Low level:
perf kprobe --define-only [-p|-r] [probe_name] -a1 [arg1] -a2 [arg2] \
--format="%s %...."
perf record -e kprobes:probe_name
perf trace
Quick:
perf kprobe -p probe_name -a1 ..... cmdline| -a
And after the profiled task is finished, it could launch perf trace
by itself (or wait for a Ctrl + C if -a/wide profiling)
Well, it's just a brainstorming, having the low level method first
would be already a very nice thing.
I'm really looking forward seeing this C expression-like kprobe creation
tool.
It seems powerful enough to replace printk + kernel rebuild.
No need anymore to write some printk to debug, worrying,
sweating, feeling guilty because we know we'll need yet another
printk() after the reboot, and we even already know where while
it is compiling.
We would build less kernels, then drink less coffee, becoming
less nervous, more friendly. Everyone will offer flowers in
the street, the icebergs will grow back and white bears will...
And eventually we'll be inspired enough to write perf love,
the more than expected tool to post process ftrace "love" events.
Thanks,
Frederic.
On Thu, 2009-09-10 at 19:53 -0400, Masami Hiramatsu wrote:
> ---
>
> arch/x86/kernel/ptrace.c | 2 +-
> 1 files changed, 1 insertions(+), 1 deletions(-)
>
> diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
> index a33a17d..caffb68 100644
> --- a/arch/x86/kernel/ptrace.c
> +++ b/arch/x86/kernel/ptrace.c
> @@ -150,7 +150,7 @@ static const int arg_offs_table[] = {
> unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
> {
> if (n < ARRAY_SIZE(arg_offs_table))
> - return *((unsigned long *)regs + arg_offs_table[n]);
> + return *(unsigned long *)((char *)regs + arg_offs_table[n]);
That definitely looks like a bug.
Acked-by: Steven Rostedt <[email protected]>
-- Steve
> else {
> /*
> * The typical case: arg n is on the stack.
>
>
On Fri, 2009-09-11 at 03:33 +0200, Frederic Weisbecker wrote:
> It seems powerful enough to replace printk + kernel rebuild.
>
> No need anymore to write some printk to debug, worrying,
> sweating, feeling guilty because we know we'll need yet another
> printk() after the reboot, and we even already know where while
> it is compiling.
>
> We would build less kernels, then drink less coffee, becoming
> less nervous, more friendly. Everyone will offer flowers in
> the street, the icebergs will grow back and white bears will...
>
> And eventually we'll be inspired enough to write perf love,
> the more than expected tool to post process ftrace "love" events.
OK Frederic,
Time for you to take a holiday! ;-)
-- Steve
On Thu, 2009-09-10 at 19:53 -0400, Masami Hiramatsu wrote:
> Simplify trace_probe to remove an union, and remove some redundant wrappers.
> And also, cleanup create_trace_probe() function.
>
One single checkpatch issue in this one (whitespace).. The rest of the
patch set appears to be clean tho ..
Daniel
On Thu, Sep 10, 2009 at 07:33:31PM -0700, Daniel Walker wrote:
> On Thu, 2009-09-10 at 19:53 -0400, Masami Hiramatsu wrote:
> > Simplify trace_probe to remove an union, and remove some redundant wrappers.
> > And also, cleanup create_trace_probe() function.
> >
>
>
> One single checkpatch issue in this one (whitespace).. The rest of the
> patch set appears to be clean tho ..
>
> Daniel
>
Yep, no problem, I've fixed it while applying it.
Thanks.
On Thu, Sep 10, 2009 at 07:53:30PM -0400, Masami Hiramatsu wrote:
> +#ifdef CONFIG_EVENT_PROFILE
> +
> +/* Kprobe profile handler */
> +static __kprobes int kprobe_profile_func(struct kprobe *kp,
> + struct pt_regs *regs)
> +{
> + struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
> + struct ftrace_event_call *call = &tp->call;
> + struct kprobe_trace_entry *entry;
> + int size, i, pc;
> + unsigned long irq_flags;
> +
> + local_save_flags(irq_flags);
> + pc = preempt_count();
> +
> + size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
Note that the end-result must be u64 aligned for perf ring buffer.
And this is a bit tricky.
What is inserted in the perf ring buffer is:
raw_trace + (u32)raw_trace_size
So we must ensure that sizeof(raw_trace) + sizeof(u32)
is well u64 aligned.
We don't insert the trace_size ourself though, this is done
from kernel/perf_counter.c
But we need to handle the size of the size (sorry) in the final
alignment.
To sum-up: sizeof(raw_trace) doesn't need (shouldn't) to be u64
aligned but sizeof(raw_trace) + sizeof(u32) must be.
Given this aligned size, we then substract it by sizeof(u32)
to have the needed size of the raw entry.
This result gives you the size of char raw_data[], which
is also the same size passed in perf_tpcounter_event().
See?
That's why we have this in trace/ftrace.h:
__data_size = "the real entry data size"
__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32), sizeof(u64));
__entry_size -= sizeof(u32);
do {
char raw_data[__entry_size];
...
perf_tpcounter_event(event_call->id, __addr, __count, entry,
__entry_size);
...
} while (0);
> +static int probe_profile_enable(struct ftrace_event_call *call)
> +{
> + struct trace_probe *tp = (struct trace_probe *)call->data;
> +
> + if (atomic_inc_return(&call->profile_count))
> + return 0;
> +
> + if (probe_is_return(tp)) {
> + tp->rp.handler = kretprobe_profile_func;
> + return enable_kretprobe(&tp->rp);
> + } else {
> + tp->rp.kp.pre_handler = kprobe_profile_func;
> + return enable_kprobe(&tp->rp.kp);
> + }
> +}
May be I misunderstood but it seems that concurrent uses of
ftrace and perf would really mess up the result, as one would
overwrite the handler of the other.
Even though it's hard to imagine one using both at the same
time on the same probe, but still...
Is it possible to have two kprobes having the exact same
properties? (pointing to the same address, having the same
probe handlers, etc...)
Another solution would be to allow kprobes to have multiple
handlers.
On Thu, 2009-09-10 at 19:53 -0400, Masami Hiramatsu wrote:
> @@ -870,9 +892,13 @@ print_kprobe_event(struct trace_iterator *iter, int flags)
> {
> struct kprobe_trace_entry *field;
> struct trace_seq *s = &iter->seq;
> + struct trace_event *event;
> + struct trace_probe *tp;
> int i;
>
> field = (struct kprobe_trace_entry *)iter->ent;
> + event = ftrace_find_event(field->ent.type);
> + tp = container_of(event, struct trace_probe, event);
Can this function be called the data is in the ring buffer, but the
probe has been unregistered? If so, the result of ftrace_find_event be
NULL?
-- Steve
>
> if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
> goto partial;
> @@ -881,7 +907,8 @@ print_kprobe_event(struct trace_iterator *iter, int flags)
> goto partial;
>
> for (i = 0; i < field->nargs; i++)
> - if (!trace_seq_printf(s, " 0x%lx", field->args[i]))
> + if (!trace_seq_printf(s, " %s=%lx",
> + tp->args[i].name, field->args[i]))
> goto partial;
>
> if (!trace_seq_puts(s, "\n"))
On Thu, Sep 10, 2009 at 07:52:58PM -0400, Masami Hiramatsu wrote:
> Hi Frederic,
>
> This series fixes bugs and upgrades kprobe-based event tracer
> as a dynamic event tracer on ftrace/perf tools. This also enhances
> tracer output format to show each argument name and event name on
> each entry.
>
> With this series, users can add trace events dynamically on ftrace
> and use those events with perf tools as below.
>
> (Step.1) Define new events under new group
>
> $ echo p:mygroup/myprobe do_sys_open dfd=a0 filename=a1 flags=a2 mode=a3 \
> > /debug/tracing/kprobes_events
> $ echo r:mygroup/myretprobe do_sys_open rv >> /debug/tracing/kprobes_events
> $ cat /debug/tracing/kprobes_events
> p:myprobe do_sys_open+0 dfd=a0 filename=a1 flags=a2 mode=a3
> r:myretprobe do_sys_open+0 rv=rv
I've tested the above example and it works very well,
so I've applied this set, and the previous pending patches
in:
git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing.git \
tracing/kprobes
Thanks.
Frederic Weisbecker wrote:
> On Thu, Sep 10, 2009 at 07:52:58PM -0400, Masami Hiramatsu wrote:
>> Hi Frederic,
>>
>> This series fixes bugs and upgrades kprobe-based event tracer
>> as a dynamic event tracer on ftrace/perf tools. This also enhances
>> tracer output format to show each argument name and event name on
>> each entry.
>>
>> With this series, users can add trace events dynamically on ftrace
>> and use those events with perf tools as below.
>>
>> (Step.1) Define new events under new group
>>
>> $ echo p:mygroup/myprobe do_sys_open dfd=a0 filename=a1 flags=a2 mode=a3 \
>> > /debug/tracing/kprobes_events
>> $ echo r:mygroup/myretprobe do_sys_open rv>> /debug/tracing/kprobes_events
>> $ cat /debug/tracing/kprobes_events
>> p:myprobe do_sys_open+0 dfd=a0 filename=a1 flags=a2 mode=a3
>> r:myretprobe do_sys_open+0 rv=rv
>>
>> (You can see that each argument has its name.)
>>
>>
>> (Step.2) Perf shows new events
>>
>> $ perf list
>> ...
>> mygroup:myretprobe [Tracepoint event]
>> mygroup:myprobe [Tracepoint event]
>> ...
>>
>>
>> (Step.3) Record events with perf
>>
>> $ perf record -f -e mygroup:myprobe:record -F 1 -a ls
>> ...
>> [ perf record: Captured and wrote 0.081 MB perf.data (~3544 samples) ]
>>
>>
>> (Step.4) Perf trace shows the result
>>
>> $ perf trace
>> version = 0.5
>> perf-1405 [000] 0.000000: myprobe: (c04b0a5c) dfd=ffffff9c filename=810d3f7 flags=98800 mode=1
>> perf-1405 [000] 0.000000: myprobe: (c04b0a5c) dfd=ffffff9c filename=bff7650c flags=8000 mode=1b6
>> perf-1405 [000] 0.000000: myprobe: (c04b0a5c) dfd=ffffff9c filename=bff7650c flags=98800 mode=bff7450c
>> perf-1405 [000] 0.000000: myprobe: (c04b0a5c) dfd=ffffff9c filename=bff7650c flags=8000 mode=1b6
>
>
> Nice!
>
> May be another step in the todo-list that would be nice: define the format
> for a type. Like it's done from ftrace events.
Thanks!
BTW, I'm not sure what the type means. Each event already has its own
event ID and event_call. Could you tell me which part of ftrace I should
refer to ?
>>
>>
>> (Step.5) You can also use return probes.
>>
>> $ perf record -f -e mygroup:myretprobe:record -F 1 -a ls
>> ...
>> [ perf record: Captured and wrote 0.081 MB perf.data (~3544 samples) ]
>> $ perf trace
>> version = 0.5
>> perf-1408 [000] 0.000000: myretprobe: (c04b0a5c<- c04b0b7d) rv=b
>> perf-1408 [000] 0.000000: myretprobe: (c04b0a5c<- c04b0b7d) rv=c
>> perf-1408 [000] 0.000000: myretprobe: (c04b0a5c<- c04b0b7d) rv=d
>> perf-1408 [000] 0.000000: myretprobe: (c04b0a5c<- c04b0b7d) rv=c
>>
>>
>> TODO:
>> - Implement perf kprobe command to help defining new probes.
>
>
>
> Yeah!
>
> I wonder what could be the best workflow to use it.
>
> Imagine the following steps:
>
> - perf kprobe = define kprobes using C expression
> - perf record -e our_kprobes
> - perf trace
>
> That's way too much.
> Especially it's sad to be forced to define a kprobe, then
> get back its name, use it with record, and eventually
> unsheathe perf trace.
>
> I guess we should choose between the low level, very granular
> but uninviting method "kprobe + record + trace" and also an all
> in one quick approach.
>
> And that could be chosen from perf kprobe:
>
> Low level:
>
> perf kprobe --define-only [-p|-r] [probe_name] -a1 [arg1] -a2 [arg2] \
> --format="%s %...."
>
> perf record -e kprobes:probe_name
> perf trace
>
> Quick:
>
> perf kprobe -p probe_name -a1 ..... cmdline| -a
>
> And after the profiled task is finished, it could launch perf trace
> by itself (or wait for a Ctrl + C if -a/wide profiling)
Another thought: expand record subcommand.
perf record -E "p|r:probe_name,place,arg1,arg2..."
perf trace
And kprobe accept multiple definitions
perf kprobe -E "p|r:probe_name,place,arg1,arg2..." -E ...
Thank you,
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
Steven Rostedt wrote:
> On Thu, 2009-09-10 at 19:53 -0400, Masami Hiramatsu wrote:
>
>> @@ -870,9 +892,13 @@ print_kprobe_event(struct trace_iterator *iter, int flags)
>> {
>> struct kprobe_trace_entry *field;
>> struct trace_seq *s =&iter->seq;
>> + struct trace_event *event;
>> + struct trace_probe *tp;
>> int i;
>>
>> field = (struct kprobe_trace_entry *)iter->ent;
>> + event = ftrace_find_event(field->ent.type);
>> + tp = container_of(event, struct trace_probe, event);
>
> Can this function be called the data is in the ring buffer, but the
> probe has been unregistered? If so, the result of ftrace_find_event be
> NULL?
Hmm, it will depend on ftrace implementation. Before releasing
trace_probe, kprobe tracer tries to unregister event call.
If it's correctly locking mutex or some rw_lock for both of
unregistering and printing, it will be safe.
Unfortunately, it seems not :-(.
In trace_events.c,
1054 static void __trace_remove_event_call(struct ftrace_event_call *call)
1055 {
1056 ftrace_event_enable_disable(call, 0);
1057 if (call->event)
1058 __unregister_ftrace_event(call->event);
What we need to do is calling unregister_ftrace_event() instead of
__unregister_ftrace_event.
Thank you,
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
Frederic Weisbecker wrote:
>> +static int probe_profile_enable(struct ftrace_event_call *call)
>> +{
>> + struct trace_probe *tp = (struct trace_probe *)call->data;
>> +
>> + if (atomic_inc_return(&call->profile_count))
>> + return 0;
>> +
>> + if (probe_is_return(tp)) {
>> + tp->rp.handler = kretprobe_profile_func;
>> + return enable_kretprobe(&tp->rp);
>> + } else {
>> + tp->rp.kp.pre_handler = kprobe_profile_func;
>> + return enable_kprobe(&tp->rp.kp);
>> + }
>> +}
>
>
>
> May be I misunderstood but it seems that concurrent uses of
> ftrace and perf would really mess up the result, as one would
> overwrite the handler of the other.
>
> Even though it's hard to imagine one using both at the same
> time on the same probe, but still...
Oops, it's my misunderstanding. I thought those are exclusively
enabled each other.
> Is it possible to have two kprobes having the exact same
> properties? (pointing to the same address, having the same
> probe handlers, etc...)
>
> Another solution would be to allow kprobes to have multiple
> handlers.
It could be to have multiple kprobes on same point, but I think
it's waste of the memory and time in this case.
I'd like to have a dispatcher function and flags internally :)
Thank you,
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
Masami Hiramatsu wrote:
> Steven Rostedt wrote:
>> On Thu, 2009-09-10 at 19:53 -0400, Masami Hiramatsu wrote:
>>
>>> @@ -870,9 +892,13 @@ print_kprobe_event(struct trace_iterator *iter, int flags)
>>> {
>>> struct kprobe_trace_entry *field;
>>> struct trace_seq *s =&iter->seq;
>>> + struct trace_event *event;
>>> + struct trace_probe *tp;
>>> int i;
>>>
>>> field = (struct kprobe_trace_entry *)iter->ent;
>>> + event = ftrace_find_event(field->ent.type);
>>> + tp = container_of(event, struct trace_probe, event);
>>
>> Can this function be called the data is in the ring buffer, but the
>> probe has been unregistered? If so, the result of ftrace_find_event be
>> NULL?
>
> Hmm, it will depend on ftrace implementation. Before releasing
> trace_probe, kprobe tracer tries to unregister event call.
> If it's correctly locking mutex or some rw_lock for both of
> unregistering and printing, it will be safe.
>
> Unfortunately, it seems not :-(.
>
> In trace_events.c,
> 1054 static void __trace_remove_event_call(struct ftrace_event_call *call)
> 1055 {
> 1056 ftrace_event_enable_disable(call, 0);
> 1057 if (call->event)
> 1058 __unregister_ftrace_event(call->event);
>
> What we need to do is calling unregister_ftrace_event() instead of
> __unregister_ftrace_event.
Aah, NO. the caller of trace_remove_event_call() should have
trace_event_mutex before calling!
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
Frederic Weisbecker <[email protected]> writes:
> [...] I'm really looking forward seeing this C expression-like
> kprobe creation tool. It seems powerful enough to replace printk +
> kernel rebuild. No need anymore to write some printk to debug,
> worrying, [...]
To a large extent, systemtap had delivered this already some years
ago, including the cushy ponies dancing in the sunlight. While such
low-level machinery is fine, some of our experience indicates that it
is dramatically easier to use if high-level, symbolic, debugging data
is used to compute probe locations and variable names/types/locations.
It is also too easy to stress the low-level machinery beyond its
humble origins, in this case meaning putting probes in all kinds of
tender spots that go "ouch". The kprobes robustness patches coming in
are great and will benefit all of our efforts, but it will be awhile
until the kernel can survive a fuzz/crashme type stress test on that
subsystem. So expect ongoing effort there.
- FChE
On Fri, Sep 11, 2009 at 03:03:35PM -0400, Frank Ch. Eigler wrote:
> Frederic Weisbecker <[email protected]> writes:
>
> > [...] I'm really looking forward seeing this C expression-like
> > kprobe creation tool. It seems powerful enough to replace printk +
> > kernel rebuild. No need anymore to write some printk to debug,
> > worrying, [...]
>
> To a large extent, systemtap had delivered this already some years
> ago, including the cushy ponies dancing in the sunlight. While such
> low-level machinery is fine, some of our experience indicates that it
> is dramatically easier to use if high-level, symbolic, debugging data
> is used to compute probe locations and variable names/types/locations.
No, systemtap has been for years failing to delivers this in a way that
it could be usefully integrated into the kernel. Masami's patches are
exactly the kind of low-level functionality we absolutely need in the
kernel tree so that we can built more useful higherlevel tools ontop
of this.
On Fri, Sep 11, 2009 at 03:03:35PM -0400, Frank Ch. Eigler wrote:
> Frederic Weisbecker <[email protected]> writes:
>
> > [...] I'm really looking forward seeing this C expression-like
> > kprobe creation tool. It seems powerful enough to replace printk +
> > kernel rebuild. No need anymore to write some printk to debug,
> > worrying, [...]
>
> To a large extent, systemtap had delivered this already some years
> ago, including the cushy ponies dancing in the sunlight. While such
> low-level machinery is fine, some of our experience indicates that it
> is dramatically easier to use if high-level, symbolic, debugging data
> is used to compute probe locations and variable names/types/locations.
>
> It is also too easy to stress the low-level machinery beyond its
> humble origins, in this case meaning putting probes in all kinds of
> tender spots that go "ouch". The kprobes robustness patches coming in
> are great and will benefit all of our efforts, but it will be awhile
> until the kernel can survive a fuzz/crashme type stress test on that
> subsystem. So expect ongoing effort there.
Fully agreed! The more I see corner recursivity cases, the more I think
we'll never fix every potential cases. But yeah it's worth trying to
fix all of them that are reported/anticipated, the more such case
are covered, the more it's usable.
Frederic Weisbecker wrote:
> On Thu, Sep 10, 2009 at 07:53:30PM -0400, Masami Hiramatsu wrote:
>> +#ifdef CONFIG_EVENT_PROFILE
>> +
>> +/* Kprobe profile handler */
>> +static __kprobes int kprobe_profile_func(struct kprobe *kp,
>> + struct pt_regs *regs)
>> +{
>> + struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
>> + struct ftrace_event_call *call =&tp->call;
>> + struct kprobe_trace_entry *entry;
>> + int size, i, pc;
>> + unsigned long irq_flags;
>> +
>> + local_save_flags(irq_flags);
>> + pc = preempt_count();
>> +
>> + size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
>
>
>
> Note that the end-result must be u64 aligned for perf ring buffer.
> And this is a bit tricky.
> What is inserted in the perf ring buffer is:
>
> raw_trace + (u32)raw_trace_size
>
> So we must ensure that sizeof(raw_trace) + sizeof(u32)
> is well u64 aligned.
>
> We don't insert the trace_size ourself though, this is done
> from kernel/perf_counter.c
>
> But we need to handle the size of the size (sorry) in the final
> alignment.
> To sum-up: sizeof(raw_trace) doesn't need (shouldn't) to be u64
> aligned but sizeof(raw_trace) + sizeof(u32) must be.
>
> Given this aligned size, we then substract it by sizeof(u32)
> to have the needed size of the raw entry.
>
> This result gives you the size of char raw_data[], which
> is also the same size passed in perf_tpcounter_event().
>
> See?
Ah, I see. So the size to write to perf_tpcounter_event must be
'(a multiple number of sizeof(u64)) - sizeof(u32)', right?
(Hmm, why would not perf_counter align data by itself? :)
>
> That's why we have this in trace/ftrace.h:
>
> __data_size = "the real entry data size"
> __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32), sizeof(u64));
> __entry_size -= sizeof(u32);
>
> do {
> char raw_data[__entry_size];
> ...
> perf_tpcounter_event(event_call->id, __addr, __count, entry,
> __entry_size);
> ...
> } while (0);
Ok, I'll do that.
Thank you,
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
On Fri, 2009-09-11 at 15:06 -0400, Christoph Hellwig wrote:
> On Fri, Sep 11, 2009 at 03:03:35PM -0400, Frank Ch. Eigler wrote:
> > Frederic Weisbecker <[email protected]> writes:
> >
> > > [...] I'm really looking forward seeing this C expression-like
> > > kprobe creation tool. It seems powerful enough to replace printk +
> > > kernel rebuild. No need anymore to write some printk to debug,
> > > worrying, [...]
> >
> > To a large extent, systemtap had delivered this already some years
> > ago, including the cushy ponies dancing in the sunlight. While such
> > low-level machinery is fine, some of our experience indicates that it
> > is dramatically easier to use if high-level, symbolic, debugging data
> > is used to compute probe locations and variable names/types/locations.
>
> No, systemtap has been for years failing to delivers this in a way that
> it could be usefully integrated into the kernel.
You are saying "No" to a claim Frank didn't even make.
> Masami's patches are
> exactly the kind of low-level functionality we absolutely need in the
> kernel tree so that we can built more useful higherlevel tools ontop
> of this.
And nobody is denying that either. I think everybody agrees that Masami
is doing some really wonderful work and improving the kprobes
foundations in a way that any higher level tracing tool will benefit
from it.
Cheers,
Mark
On Fri, Sep 11, 2009 at 09:50:13PM +0200, Mark Wielaard wrote:
> On Fri, 2009-09-11 at 15:06 -0400, Christoph Hellwig wrote:
> > On Fri, Sep 11, 2009 at 03:03:35PM -0400, Frank Ch. Eigler wrote:
> > > Frederic Weisbecker <[email protected]> writes:
> > >
> > > > [...] I'm really looking forward seeing this C expression-like
> > > > kprobe creation tool. It seems powerful enough to replace printk +
> > > > kernel rebuild. No need anymore to write some printk to debug,
> > > > worrying, [...]
> > >
> > > To a large extent, systemtap had delivered this already some years
> > > ago, including the cushy ponies dancing in the sunlight. While such
> > > low-level machinery is fine, some of our experience indicates that it
> > > is dramatically easier to use if high-level, symbolic, debugging data
> > > is used to compute probe locations and variable names/types/locations.
> >
> > No, systemtap has been for years failing to delivers this in a way that
> > it could be usefully integrated into the kernel.
>
> You are saying "No" to a claim Frank didn't even make.
He said systemtap had delivered it, which is not the case. It has
implemented it, but not actually delivered it in a useful way.
Frederic Weisbecker wrote:
>
> I've tested the above example and it works very well,
> so I've applied this set, and the previous pending patches
> in:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing.git \
> tracing/kprobes
I had a bug on that tree when I did Step.1
> (Step.1) Define new events under new group
>
> $ echo p:mygroup/myprobe do_sys_open dfd=a0 filename=a1 flags=a2 mode=a3 \
> > /debug/tracing/kprobes_events
> $ echo r:mygroup/myretprobe do_sys_open rv >> /debug/tracing/kprobes_events
> $ cat /debug/tracing/kprobes_events
> p:myprobe do_sys_open+0 dfd=a0 filename=a1 flags=a2 mode=a3
> r:myretprobe do_sys_open+0 rv=rv
It seems that you forget to pull PATCH 7/7 of my previous series.
And also I've found my probe registration order is buggy.
I'll fix that.
Thank you,
----
Could not create debugfs 'mygroup/myprobe' directory
Failed to register kprobe event: mygroup/myprobe
Faild to register probe event(-1)
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [<c0537085>] list_del+0x9/0x60
*pdpt = 000000001f1ed001 *pde = 000000001f1da067 *pte = 0000000000000000
Oops: 0000 [#1] SMP
last sysfs file: /sys/devices/virtual/tty/tty9/uevent
Modules linked in: sunrpc uinput virtio_balloon virtio_net i2c_piix4 pcspkr i2c_
core virtio_blk virtio_pci virtio_ring virtio [last unloaded: scsi_wait_scan]
Pid: 1372, comm: tee Not tainted (2.6.31-rc9 #49)
EIP: 0060:[<c0537085>] EFLAGS: 00210246 CPU: 0
EIP is at list_del+0x9/0x60
EAX: 00000000 EBX: d8d2166c ECX: ccd2165c EDX: 00000000
ESI: d8d21600 EDI: dedecc40 EBP: d8dc9eac ESP: d8dc9ea8
DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process tee (pid: 1372, ti=d8dc8000 task=d8da57f0 task.ti=d8dc8000)
Stack:
d8d2166c d8dc9ebc c047f1db d8d2166c d8d21600 d8dc9ec8 c047f27e d8d21600
<0> d8dc9ed4 c04823bc d8d21600 d8dc9f4c c048302b 0000000c 00000000 00000002
<0> dedecba0 00000001 decbabe8 c044a1b3 d993e114 d993e000 00000001 d993e110
Call Trace:
[<c047f1db>] ? __trace_remove_event_call+0x29/0xb5
[<c047f27e>] ? trace_remove_event_call+0x17/0x24
[<c04823bc>] ? unregister_trace_probe+0xe/0x1f
[<c048302b>] ? command_trace_probe+0x320/0x466
[<c044a1b3>] ? remove_wait_queue+0x22/0x27
[<c042a9c0>] ? __wake_up+0x32/0x3b
[<c0483245>] ? probes_write+0xd4/0x10b
[<c0483171>] ? probes_write+0x0/0x10b
[<c04b26f9>] ? vfs_write+0x80/0xdf
[<c04b27ec>] ? sys_write+0x3b/0x5d
[<c0670c91>] ? syscall_call+0x7/0xb
Code: 5d c3 55 89 e5 56 53 89 c3 89 d0 e8 4a ff ff ff 89 c6 89 d8 e8 41 ff ff ff 5b 01 f0 5e 5d c3 90 90 90 55 89 e5 53 89 c3 8b 40 04 <8b> 00 39 d8 74 16 50 53 68 9f fa 75
c0 6a 30 68 d9 fa 75 c0 e8
EIP: [<c0537085>] list_del+0x9/0x60 SS:ESP 0068:d8dc9ea8
CR2: 0000000000000000
---[ end trace 2adc5d6dc10dde5a ]---
Kernel panic - not syncing: Fatal exception
Pid: 1372, comm: tee Tainted: G D 2.6.31-rc9 #49
Call Trace:
[<c066ee91>] ? printk+0xf/0x16
[<c066ede0>] panic+0x39/0xdb
[<c0671bcd>] oops_end+0x91/0xa0
[<c04222e1>] no_context+0x13c/0x146
[<c0422405>] __bad_area_nosemaphore+0x11a/0x122
[<c043d4a1>] ? irq_exit+0x34/0x57
[<c041a84a>] ? smp_apic_timer_interrupt+0x68/0x76
[<c06710aa>] ? apic_timer_interrupt+0x2a/0x30
[<c0420dd4>] ? kvm_mmu_write+0x5a/0x62
[<c0422450>] __bad_area+0x33/0x39
[<c0422463>] bad_area+0xd/0x10
[<c0672e8a>] do_page_fault+0x1a3/0x2a5
[<c0672ce7>] ? do_page_fault+0x0/0x2a5
[<c06712de>] error_code+0x66/0x6c
[<c0672ce7>] ? do_page_fault+0x0/0x2a5
[<c0537085>] ? list_del+0x9/0x60
[<c047f1db>] __trace_remove_event_call+0x29/0xb5
[<c047f27e>] trace_remove_event_call+0x17/0x24
[<c04823bc>] unregister_trace_probe+0xe/0x1f
[<c048302b>] command_trace_probe+0x320/0x466
[<c044a1b3>] ? remove_wait_queue+0x22/0x27
[<c042a9c0>] ? __wake_up+0x32/0x3b
[<c0483245>] probes_write+0xd4/0x10b
[<c0483171>] ? probes_write+0x0/0x10b
[<c04b26f9>] vfs_write+0x80/0xdf
[<c04b27ec>] sys_write+0x3b/0x5d
[<c0670c91>] syscall_call+0x7/0xb
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
Christoph Hellwig wrote:
> On Fri, Sep 11, 2009 at 09:50:13PM +0200, Mark Wielaard wrote:
>> On Fri, 2009-09-11 at 15:06 -0400, Christoph Hellwig wrote:
>>> On Fri, Sep 11, 2009 at 03:03:35PM -0400, Frank Ch. Eigler wrote:
>>>> Frederic Weisbecker<[email protected]> writes:
>>>>
>>>>> [...] I'm really looking forward seeing this C expression-like
>>>>> kprobe creation tool. It seems powerful enough to replace printk +
>>>>> kernel rebuild. No need anymore to write some printk to debug,
>>>>> worrying, [...]
>>>>
>>>> To a large extent, systemtap had delivered this already some years
>>>> ago, including the cushy ponies dancing in the sunlight. While such
>>>> low-level machinery is fine, some of our experience indicates that it
>>>> is dramatically easier to use if high-level, symbolic, debugging data
>>>> is used to compute probe locations and variable names/types/locations.
>>>
>>> No, systemtap has been for years failing to delivers this in a way that
>>> it could be usefully integrated into the kernel.
>>
>> You are saying "No" to a claim Frank didn't even make.
>
> He said systemtap had delivered it, which is not the case. It has
> implemented it, but not actually delivered it in a useful way.
I assume that he has meant that systemtap had delivered to end-users,
and that is certainly true. Systemtap has been released and distributed
with many distributions. I know there are many systemtap users nowadays.
However, indeed, that was not so useful way especially for kernel developers.
Systemtap developers have also recognized this problem, and we are trying
to fix it for good relationship with upstream kernel.
I'd like to make my work useful not only for upstream kernel and ftrace,
but also for systemtap. Through this enhancement, knowledge and
experiences of systemtap will be transferred to upstream. I think
systemtap can also use it by re-implementing on in-kernel tracing
functions. I believe we can collaborate on this work, at least
on stabilizing tracing functions.
Thank you,
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
On Fri, Sep 11, 2009 at 05:12:54AM +0200, Frederic Weisbecker wrote:
> On Thu, Sep 10, 2009 at 07:53:30PM -0400, Masami Hiramatsu wrote:
...
> Is it possible to have two kprobes having the exact same
> properties? (pointing to the same address, having the same
> probe handlers, etc...)
Yes, this is possible with two *different* kprobes. However, we have a bug
with the current code where there is insufficient scaffolding to prevent
re-registration of the same kprobe. Here is a patch...
---
Prevent re-registration of the same kprobe. Current code allows this,
albeit with disastrous consequences. Its not a common case, but should
be flagged nonetheless.
Signed-off-by: Ananth N Mavinakayanahalli <[email protected]>
---
kernel/kprobes.c | 3 +++
1 file changed, 3 insertions(+)
Index: ptrace-10sep/kernel/kprobes.c
===================================================================
--- ptrace-10sep.orig/kernel/kprobes.c
+++ ptrace-10sep/kernel/kprobes.c
@@ -589,6 +589,9 @@ static int __kprobes register_aggr_kprob
int ret = 0;
struct kprobe *ap = old_p;
+ if (old_p == p)
+ /* Attempt to re-register the same kprobe.. fail */
+ return -EINVAL;
if (old_p->pre_handler != aggr_pre_handler) {
/* If old_p is not an aggr_probe, create new aggr_kprobe. */
ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
Ananth N Mavinakayanahalli wrote:
> On Fri, Sep 11, 2009 at 05:12:54AM +0200, Frederic Weisbecker wrote:
>> On Thu, Sep 10, 2009 at 07:53:30PM -0400, Masami Hiramatsu wrote:
>
> ...
>
>> Is it possible to have two kprobes having the exact same
>> properties? (pointing to the same address, having the same
>> probe handlers, etc...)
>
> Yes, this is possible with two *different* kprobes. However, we have a bug
> with the current code where there is insufficient scaffolding to prevent
> re-registration of the same kprobe. Here is a patch...
Indeed, that is a bug, or spec. I didn't expect that user register
same kprobes twice.
> ---
> Prevent re-registration of the same kprobe. Current code allows this,
> albeit with disastrous consequences. Its not a common case, but should
> be flagged nonetheless.
>
> Signed-off-by: Ananth N Mavinakayanahalli <[email protected]>
> ---
> kernel/kprobes.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> Index: ptrace-10sep/kernel/kprobes.c
> ===================================================================
> --- ptrace-10sep.orig/kernel/kprobes.c
> +++ ptrace-10sep/kernel/kprobes.c
> @@ -589,6 +589,9 @@ static int __kprobes register_aggr_kprob
> int ret = 0;
> struct kprobe *ap = old_p;
>
> + if (old_p == p)
> + /* Attempt to re-register the same kprobe.. fail */
> + return -EINVAL;
> if (old_p->pre_handler != aggr_pre_handler) {
> /* If old_p is not an aggr_probe, create new aggr_kprobe. */
> ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
Hmm, if we catch the second registration here, it's too late. At
register_kprobe(), we initialized some field of kprobe before calling
register_aggr_kprobe(). Especially kprobe.list is cleared.
And this can't check the case that 'p' is already registered on
an aggr kprobe.
Thus, I think some initialization paths should be changed (kp.flag
field is initialized too early, yearh, that's my mistake),
and also, you will need to use get_valid_kprobe() to check the kprobe
has not been registered.
Thank you,
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
On Fri, Sep 11, 2009 at 05:48:24PM -0400, Masami Hiramatsu wrote:
> Frederic Weisbecker wrote:
>>
>> I've tested the above example and it works very well,
>> so I've applied this set, and the previous pending patches
>> in:
>>
>> git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing.git \
>> tracing/kprobes
>
> I had a bug on that tree when I did Step.1
>
> > (Step.1) Define new events under new group
> >
> > $ echo p:mygroup/myprobe do_sys_open dfd=a0 filename=a1 flags=a2 mode=a3 \
> > > /debug/tracing/kprobes_events
> > $ echo r:mygroup/myretprobe do_sys_open rv >> /debug/tracing/kprobes_events
> > $ cat /debug/tracing/kprobes_events
> > p:myprobe do_sys_open+0 dfd=a0 filename=a1 flags=a2 mode=a3
> > r:myretprobe do_sys_open+0 rv=rv
>
> It seems that you forget to pull PATCH 7/7 of my previous series.
> And also I've found my probe registration order is buggy.
> I'll fix that.
>
> Thank you,
Sorry :-s
Applied, thanks.
On Fri, Sep 11, 2009 at 12:03:30PM -0400, Masami Hiramatsu wrote:
> Frederic Weisbecker wrote:
>> May be another step in the todo-list that would be nice: define the format
>> for a type. Like it's done from ftrace events.
>
> Thanks!
>
> BTW, I'm not sure what the type means. Each event already has its own
> event ID and event_call. Could you tell me which part of ftrace I should
> refer to ?
>
Actually I meant the format for a field.
Say you define filename=arg1, it would be nice to have
print "%s", filename
in the format file.
Hmm, now that I think about it, we can't dereference an array...for now :-)
>> I guess we should choose between the low level, very granular
>> but uninviting method "kprobe + record + trace" and also an all
>> in one quick approach.
>>
>> And that could be chosen from perf kprobe:
>>
>> Low level:
>>
>> perf kprobe --define-only [-p|-r] [probe_name] -a1 [arg1] -a2 [arg2] \
>> --format="%s %...."
>>
>> perf record -e kprobes:probe_name
>> perf trace
>>
>> Quick:
>>
>> perf kprobe -p probe_name -a1 ..... cmdline| -a
>>
>> And after the profiled task is finished, it could launch perf trace
>> by itself (or wait for a Ctrl + C if -a/wide profiling)
>
> Another thought: expand record subcommand.
>
> perf record -E "p|r:probe_name,place,arg1,arg2..."
> perf trace
>
> And kprobe accept multiple definitions
>
> perf kprobe -E "p|r:probe_name,place,arg1,arg2..." -E ...
Well, perf record could also support multiple definitions
too...
On Fri, Sep 11, 2009 at 12:22:16PM -0400, Masami Hiramatsu wrote:
> Frederic Weisbecker wrote:
>>> +static int probe_profile_enable(struct ftrace_event_call *call)
>>> +{
>>> + struct trace_probe *tp = (struct trace_probe *)call->data;
>>> +
>>> + if (atomic_inc_return(&call->profile_count))
>>> + return 0;
>>> +
>>> + if (probe_is_return(tp)) {
>>> + tp->rp.handler = kretprobe_profile_func;
>>> + return enable_kretprobe(&tp->rp);
>>> + } else {
>>> + tp->rp.kp.pre_handler = kprobe_profile_func;
>>> + return enable_kprobe(&tp->rp.kp);
>>> + }
>>> +}
>>
>>
>>
>> May be I misunderstood but it seems that concurrent uses of
>> ftrace and perf would really mess up the result, as one would
>> overwrite the handler of the other.
>>
>> Even though it's hard to imagine one using both at the same
>> time on the same probe, but still...
>
> Oops, it's my misunderstanding. I thought those are exclusively
> enabled each other.
It's automatically managed with events because ftrace and
and perf have their individual tracepoint probes, because
tracepoints support multiple probes.
>> Is it possible to have two kprobes having the exact same
>> properties? (pointing to the same address, having the same
>> probe handlers, etc...)
>>
>> Another solution would be to allow kprobes to have multiple
>> handlers.
>
> It could be to have multiple kprobes on same point, but I think
> it's waste of the memory and time in this case.
Yeah.
>
> I'd like to have a dispatcher function and flags internally :)
You mean kprobes that could support multiple probes?
That would be a nice solution IMHO...
On Fri, Sep 11, 2009 at 03:30:24PM -0400, Masami Hiramatsu wrote:
>> Note that the end-result must be u64 aligned for perf ring buffer.
>> And this is a bit tricky.
>> What is inserted in the perf ring buffer is:
>>
>> raw_trace + (u32)raw_trace_size
>>
>> So we must ensure that sizeof(raw_trace) + sizeof(u32)
>> is well u64 aligned.
>>
>> We don't insert the trace_size ourself though, this is done
>> from kernel/perf_counter.c
>>
>> But we need to handle the size of the size (sorry) in the final
>> alignment.
>> To sum-up: sizeof(raw_trace) doesn't need (shouldn't) to be u64
>> aligned but sizeof(raw_trace) + sizeof(u32) must be.
>>
>> Given this aligned size, we then substract it by sizeof(u32)
>> to have the needed size of the raw entry.
>>
>> This result gives you the size of char raw_data[], which
>> is also the same size passed in perf_tpcounter_event().
>>
>> See?
>
> Ah, I see. So the size to write to perf_tpcounter_event must be
> '(a multiple number of sizeof(u64)) - sizeof(u32)', right?
Exactly.
To simplify I guess the raw events just needs to be u32 aligned :)
> (Hmm, why would not perf_counter align data by itself? :)
Because that would require it to copy the data into a seperate
u64 aligned buffer.
>>
>> That's why we have this in trace/ftrace.h:
>>
>> __data_size = "the real entry data size"
>> __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32), sizeof(u64));
>> __entry_size -= sizeof(u32);
>>
>> do {
>> char raw_data[__entry_size];
>> ...
>> perf_tpcounter_event(event_call->id, __addr, __count, entry,
>> __entry_size);
>> ...
>> } while (0);
>
> Ok, I'll do that.
Thanks!
On Sun, Sep 13, 2009 at 09:47:39PM -0400, Masami Hiramatsu wrote:
> Ananth N Mavinakayanahalli wrote:
> > On Fri, Sep 11, 2009 at 05:12:54AM +0200, Frederic Weisbecker wrote:
> >> On Thu, Sep 10, 2009 at 07:53:30PM -0400, Masami Hiramatsu wrote:
...
> Hmm, if we catch the second registration here, it's too late. At
> register_kprobe(), we initialized some field of kprobe before calling
> register_aggr_kprobe(). Especially kprobe.list is cleared.
>
> And this can't check the case that 'p' is already registered on
> an aggr kprobe.
Agreed. I thought of this case after sending out the earlier patch...
> Thus, I think some initialization paths should be changed (kp.flag
> field is initialized too early, yearh, that's my mistake),
> and also, you will need to use get_valid_kprobe() to check the kprobe
> has not been registered.
__get_valid_kprobe() makes the task easy. We should just prevent
re-registration whether or not the earlier probe is disabled.
How does this patch look?
---
Prevent re-registration of the same kprobe. This situation, though
unlikely, needs to be flagged since it can lead to a system crash if its
not handled.
The core change itself is small, but the helper routine needed to be
moved around a bit; hence the diffstat.
Signed-off-by: Ananth N Mavinakayanahalli <[email protected]>
---
kernel/kprobes.c | 58 ++++++++++++++++++++++++++++++++++++-------------------
1 file changed, 38 insertions(+), 20 deletions(-)
Index: linux-2.6.31/kernel/kprobes.c
===================================================================
--- linux-2.6.31.orig/kernel/kprobes.c
+++ linux-2.6.31/kernel/kprobes.c
@@ -681,6 +681,40 @@ static kprobe_opcode_t __kprobes *kprobe
return (kprobe_opcode_t *)(((char *)addr) + p->offset);
}
+/* Check passed kprobe is valid and return kprobe in kprobe_table. */
+static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
+{
+ struct kprobe *old_p, *list_p;
+
+ old_p = get_kprobe(p->addr);
+ if (unlikely(!old_p))
+ return NULL;
+
+ if (p != old_p) {
+ list_for_each_entry_rcu(list_p, &old_p->list, list)
+ if (list_p == p)
+ /* kprobe p is a valid probe */
+ goto valid;
+ return NULL;
+ }
+valid:
+ return old_p;
+}
+
+/* Return error if the kprobe is being re-registered */
+static inline int check_kprobe_rereg(struct kprobe *p)
+{
+ int ret = 0;
+ struct kprobe *old_p;
+
+ mutex_lock(&kprobe_mutex);
+ old_p = __get_valid_kprobe(p);
+ if (old_p == p)
+ ret = -EINVAL;
+ mutex_unlock(&kprobe_mutex);
+ return ret;
+}
+
int __kprobes register_kprobe(struct kprobe *p)
{
int ret = 0;
@@ -693,6 +727,10 @@ int __kprobes register_kprobe(struct kpr
return -EINVAL;
p->addr = addr;
+ ret = check_kprobe_rereg(p);
+ if (ret)
+ return ret;
+
preempt_disable();
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr)) {
@@ -762,26 +800,6 @@ out:
}
EXPORT_SYMBOL_GPL(register_kprobe);
-/* Check passed kprobe is valid and return kprobe in kprobe_table. */
-static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
-{
- struct kprobe *old_p, *list_p;
-
- old_p = get_kprobe(p->addr);
- if (unlikely(!old_p))
- return NULL;
-
- if (p != old_p) {
- list_for_each_entry_rcu(list_p, &old_p->list, list)
- if (list_p == p)
- /* kprobe p is a valid probe */
- goto valid;
- return NULL;
- }
-valid:
- return old_p;
-}
-
/*
* Unregister a kprobe without a scheduler synchronization.
*/
Ananth N Mavinakayanahalli wrote:
> On Sun, Sep 13, 2009 at 09:47:39PM -0400, Masami Hiramatsu wrote:
>> Ananth N Mavinakayanahalli wrote:
>>> On Fri, Sep 11, 2009 at 05:12:54AM +0200, Frederic Weisbecker wrote:
>>>> On Thu, Sep 10, 2009 at 07:53:30PM -0400, Masami Hiramatsu wrote:
>
> ...
>
>> Hmm, if we catch the second registration here, it's too late. At
>> register_kprobe(), we initialized some field of kprobe before calling
>> register_aggr_kprobe(). Especially kprobe.list is cleared.
>>
>> And this can't check the case that 'p' is already registered on
>> an aggr kprobe.
>
> Agreed. I thought of this case after sending out the earlier patch...
>
>> Thus, I think some initialization paths should be changed (kp.flag
>> field is initialized too early, yearh, that's my mistake),
>> and also, you will need to use get_valid_kprobe() to check the kprobe
>> has not been registered.
>
> __get_valid_kprobe() makes the task easy. We should just prevent
> re-registration whether or not the earlier probe is disabled.
>
> How does this patch look?
>
> ---
> Prevent re-registration of the same kprobe. This situation, though
> unlikely, needs to be flagged since it can lead to a system crash if its
> not handled.
>
> The core change itself is small, but the helper routine needed to be
> moved around a bit; hence the diffstat.
>
> Signed-off-by: Ananth N Mavinakayanahalli<[email protected]>
> ---
> kernel/kprobes.c | 58 ++++++++++++++++++++++++++++++++++++-------------------
> 1 file changed, 38 insertions(+), 20 deletions(-)
>
> Index: linux-2.6.31/kernel/kprobes.c
> ===================================================================
> --- linux-2.6.31.orig/kernel/kprobes.c
> +++ linux-2.6.31/kernel/kprobes.c
> @@ -681,6 +681,40 @@ static kprobe_opcode_t __kprobes *kprobe
> return (kprobe_opcode_t *)(((char *)addr) + p->offset);
> }
>
> +/* Check passed kprobe is valid and return kprobe in kprobe_table. */
> +static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
> +{
> + struct kprobe *old_p, *list_p;
> +
> + old_p = get_kprobe(p->addr);
> + if (unlikely(!old_p))
> + return NULL;
> +
> + if (p != old_p) {
> + list_for_each_entry_rcu(list_p,&old_p->list, list)
> + if (list_p == p)
> + /* kprobe p is a valid probe */
> + goto valid;
> + return NULL;
> + }
> +valid:
> + return old_p;
> +}
> +
> +/* Return error if the kprobe is being re-registered */
> +static inline int check_kprobe_rereg(struct kprobe *p)
> +{
> + int ret = 0;
> + struct kprobe *old_p;
> +
> + mutex_lock(&kprobe_mutex);
> + old_p = __get_valid_kprobe(p);
> + if (old_p == p)
Here, since __get_valid_kprobe(p) will return aggr_kprobe of 'p',
you just need to check old_p != NULL.
Other parts are OK for me.
Thank you!
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
Frederic Weisbecker wrote:
> On Fri, Sep 11, 2009 at 12:22:16PM -0400, Masami Hiramatsu wrote:
>> Frederic Weisbecker wrote:
>>>> +static int probe_profile_enable(struct ftrace_event_call *call)
>>>> +{
>>>> + struct trace_probe *tp = (struct trace_probe *)call->data;
>>>> +
>>>> + if (atomic_inc_return(&call->profile_count))
>>>> + return 0;
>>>> +
>>>> + if (probe_is_return(tp)) {
>>>> + tp->rp.handler = kretprobe_profile_func;
>>>> + return enable_kretprobe(&tp->rp);
>>>> + } else {
>>>> + tp->rp.kp.pre_handler = kprobe_profile_func;
>>>> + return enable_kprobe(&tp->rp.kp);
>>>> + }
>>>> +}
>>>
>>>
>>>
>>> May be I misunderstood but it seems that concurrent uses of
>>> ftrace and perf would really mess up the result, as one would
>>> overwrite the handler of the other.
>>>
>>> Even though it's hard to imagine one using both at the same
>>> time on the same probe, but still...
>>
>> Oops, it's my misunderstanding. I thought those are exclusively
>> enabled each other.
>
>
> It's automatically managed with events because ftrace and
> and perf have their individual tracepoint probes, because
> tracepoints support multiple probes.
>
>
>>> Is it possible to have two kprobes having the exact same
>>> properties? (pointing to the same address, having the same
>>> probe handlers, etc...)
>>>
>>> Another solution would be to allow kprobes to have multiple
>>> handlers.
>>
>> It could be to have multiple kprobes on same point, but I think
>> it's waste of the memory and time in this case.
>
>
> Yeah.
>
>
>>
>> I'd like to have a dispatcher function and flags internally :)
>
>
> You mean kprobes that could support multiple probes?
> That would be a nice solution IMHO...
Yeah, actually kprobes could support multiple probes on the
same point. But kprobe structure has many extensions which
kprobe-tracer doesn't need, e.g. post_handler/break_handler,
opcode, arch sprcific instructions.
Kretprobe consumes more memories for storing return points :(.
Thus, if we know there are two functions to be called on the
same probe point, I think it is better to have a dispatcher.
(Especially, in this case, we can call fixed functions, so
it's easier way.)
Thank you,
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
Frederic Weisbecker wrote:
> On Fri, Sep 11, 2009 at 12:03:30PM -0400, Masami Hiramatsu wrote:
>> Frederic Weisbecker wrote:
>>> May be another step in the todo-list that would be nice: define the format
>>> for a type. Like it's done from ftrace events.
>>
>> Thanks!
>>
>> BTW, I'm not sure what the type means. Each event already has its own
>> event ID and event_call. Could you tell me which part of ftrace I should
>> refer to ?
>>
>
>
> Actually I meant the format for a field.
> Say you define filename=arg1, it would be nice to have
>
> print "%s", filename
>
> in the format file.
Ah, indeed. It is better to support 'type' casting for each argument.
I think type casting can be done as below syntax.
NAME=ARG:TYPE
e.g.
jiffies64=@jiffies64:u64
message=%ax:str
> Hmm, now that I think about it, we can't dereference an array...for now :-)
:-)
BTW, currently, an entry of the array can be shown, e.g. +10(sa).
Hmm, for more complex dereference(e.g. accessing a->b[a->c]), it might need
another dereferencing syntax(e.g. "sa[16][sa[8]]"), or
just allow to use braces(e.g. "+(+8(sa))(+16(sa))").
Thank you,
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
On Mon, Sep 14, 2009 at 12:54:24PM -0400, Masami Hiramatsu wrote:
>>> I'd like to have a dispatcher function and flags internally :)
>>
>>
>> You mean kprobes that could support multiple probes?
>> That would be a nice solution IMHO...
>
> Yeah, actually kprobes could support multiple probes on the
> same point. But kprobe structure has many extensions which
> kprobe-tracer doesn't need, e.g. post_handler/break_handler,
> opcode, arch sprcific instructions.
> Kretprobe consumes more memories for storing return points :(.
>
> Thus, if we know there are two functions to be called on the
> same probe point, I think it is better to have a dispatcher.
> (Especially, in this case, we can call fixed functions, so
> it's easier way.)
>
Yeah, you could union the post_handler with profile_handler
or something like that.
It depends if kprobes may need one day to support an undeterminate
number of probes.
Also, is the post_handler called at the same location than the normal
probe?
And is a post handler called even if there is no normal handler?
There might be some of such factors that would force you to handle a
lot of corner cases, things that you wouldn't need to worry about
if you just had to maintain a simple rcu list of probes to call.
Frederic Weisbecker wrote:
> On Mon, Sep 14, 2009 at 12:54:24PM -0400, Masami Hiramatsu wrote:
>>>> I'd like to have a dispatcher function and flags internally :)
>>>
>>>
>>> You mean kprobes that could support multiple probes?
>>> That would be a nice solution IMHO...
>>
>> Yeah, actually kprobes could support multiple probes on the
>> same point. But kprobe structure has many extensions which
>> kprobe-tracer doesn't need, e.g. post_handler/break_handler,
>> opcode, arch sprcific instructions.
>> Kretprobe consumes more memories for storing return points :(.
>>
>> Thus, if we know there are two functions to be called on the
>> same probe point, I think it is better to have a dispatcher.
>> (Especially, in this case, we can call fixed functions, so
>> it's easier way.)
>
>
> Yeah, you could union the post_handler with profile_handler
> or something like that.
No, you can't do that, because kprobes calls post_handler if
it is not NULL.
>
> It depends if kprobes may need one day to support an undeterminate
> number of probes.
Kprobes itself is supporting those multiple kprobes on the same
address. I meant that we don't need to have multiple kprobes on
the same "kprobe-tracer's event". Even if introducing a dispatcher,
kprobe-tracer can support multiple trace-event at the same location.
> Also, is the post_handler called at the same location than the normal
> probe?
No, post_handler is called after single-stepping.
> And is a post handler called even if there is no normal handler?
Yes, it is.
Hmm, I assume I have told about kprobes infrastructure, and have you
told about kprobe-tracer?:)
> There might be some of such factors that would force you to handle a
> lot of corner cases, things that you wouldn't need to worry about
> if you just had to maintain a simple rcu list of probes to call.
Anyway, I never see who are using post_handler:). I'm not sure why
it is needed...
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
On Mon, Sep 14, 2009 at 01:16:13PM -0400, Masami Hiramatsu wrote:
> Frederic Weisbecker wrote:
>> On Fri, Sep 11, 2009 at 12:03:30PM -0400, Masami Hiramatsu wrote:
>>> Frederic Weisbecker wrote:
>>>> May be another step in the todo-list that would be nice: define the format
>>>> for a type. Like it's done from ftrace events.
>>>
>>> Thanks!
>>>
>>> BTW, I'm not sure what the type means. Each event already has its own
>>> event ID and event_call. Could you tell me which part of ftrace I should
>>> refer to ?
>>>
>>
>>
>> Actually I meant the format for a field.
>> Say you define filename=arg1, it would be nice to have
>>
>> print "%s", filename
>>
>> in the format file.
>
> Ah, indeed. It is better to support 'type' casting for each argument.
> I think type casting can be done as below syntax.
>
> NAME=ARG:TYPE
> e.g.
> jiffies64=@jiffies64:u64
> message=%ax:str
>
Yeah looks good!
>> Hmm, now that I think about it, we can't dereference an array...for now :-)
>
> :-)
> BTW, currently, an entry of the array can be shown, e.g. +10(sa).
> Hmm, for more complex dereference(e.g. accessing a->b[a->c]), it might need
> another dereferencing syntax(e.g. "sa[16][sa[8]]"), or
> just allow to use braces(e.g. "+(+8(sa))(+16(sa))").
Well, that may be too much complexity.
I guess if we want multi level dereference, say you want a->b->c
it should be sufficient to probe the point where b->c gets it's
value (if any).
But it would be nice to fetch a range: sa[begin:end]
Or at least just giving the length of the range.
On Mon, Sep 14, 2009 at 03:36:33PM -0400, Masami Hiramatsu wrote:
> Frederic Weisbecker wrote:
>> On Mon, Sep 14, 2009 at 12:54:24PM -0400, Masami Hiramatsu wrote:
>>>>> I'd like to have a dispatcher function and flags internally :)
>>>>
>>>>
>>>> You mean kprobes that could support multiple probes?
>>>> That would be a nice solution IMHO...
>>>
>>> Yeah, actually kprobes could support multiple probes on the
>>> same point. But kprobe structure has many extensions which
>>> kprobe-tracer doesn't need, e.g. post_handler/break_handler,
>>> opcode, arch sprcific instructions.
>>> Kretprobe consumes more memories for storing return points :(.
>>>
>>> Thus, if we know there are two functions to be called on the
>>> same probe point, I think it is better to have a dispatcher.
>>> (Especially, in this case, we can call fixed functions, so
>>> it's easier way.)
>>
>>
>> Yeah, you could union the post_handler with profile_handler
>> or something like that.
>
> No, you can't do that, because kprobes calls post_handler if
> it is not NULL.
Yeah, I was meaning: having the normal probe call ftrace handler
and post probe call perf handler.
But well, that looks not that sane (your dispatch idea looks fine).
>>
>> It depends if kprobes may need one day to support an undeterminate
>> number of probes.
>
> Kprobes itself is supporting those multiple kprobes on the same
> address. I meant that we don't need to have multiple kprobes on
> the same "kprobe-tracer's event". Even if introducing a dispatcher,
> kprobe-tracer can support multiple trace-event at the same location.
Yeah, I was not meaning several kprobes on the same address but
actually a single kprobe instance with multiple probe callbacks.
But your dispatch patch (sorry) does that in essence, though
specialized to the ftrace/perf couple but that's fine since nothing
else may need to have multiplexed probes.
>> Also, is the post_handler called at the same location than the normal
>> probe?
>
> No, post_handler is called after single-stepping.
Ok.
>
>> And is a post handler called even if there is no normal handler?
>
> Yes, it is.
>
> Hmm, I assume I have told about kprobes infrastructure, and have you
> told about kprobe-tracer?:)
No I was talking about kprobes in general :)
>> There might be some of such factors that would force you to handle a
>> lot of corner cases, things that you wouldn't need to worry about
>> if you just had to maintain a simple rcu list of probes to call.
>
>
> Anyway, I never see who are using post_handler:). I'm not sure why
> it is needed...
May be it's a legacy of code that was used by the past, in which case
that could perhaps be cleaned up?
On Mon, Sep 14, 2009 at 05:10:33PM -0400, Masami Hiramatsu wrote:
> Frederic Weisbecker wrote:
>> On Mon, Sep 14, 2009 at 01:16:13PM -0400, Masami Hiramatsu wrote:
>>> Frederic Weisbecker wrote:
>>>> On Fri, Sep 11, 2009 at 12:03:30PM -0400, Masami Hiramatsu wrote:
>>>>> Frederic Weisbecker wrote:
>>>>>> May be another step in the todo-list that would be nice: define the format
>>>>>> for a type. Like it's done from ftrace events.
>>>>>
>>>>> Thanks!
>>>>>
>>>>> BTW, I'm not sure what the type means. Each event already has its own
>>>>> event ID and event_call. Could you tell me which part of ftrace I should
>>>>> refer to ?
>>>>>
>>>>
>>>>
>>>> Actually I meant the format for a field.
>>>> Say you define filename=arg1, it would be nice to have
>>>>
>>>> print "%s", filename
>>>>
>>>> in the format file.
>>>
>>> Ah, indeed. It is better to support 'type' casting for each argument.
>>> I think type casting can be done as below syntax.
>>>
>>> NAME=ARG:TYPE
>>> e.g.
>>> jiffies64=@jiffies64:u64
>>> message=%ax:str
>>>
>>
>>
>> Yeah looks good!
>>
>>
>>
>>>> Hmm, now that I think about it, we can't dereference an array...for now :-)
>>>
>>> :-)
>>> BTW, currently, an entry of the array can be shown, e.g. +10(sa).
>>> Hmm, for more complex dereference(e.g. accessing a->b[a->c]), it might need
>>> another dereferencing syntax(e.g. "sa[16][sa[8]]"), or
>>> just allow to use braces(e.g. "+(+8(sa))(+16(sa))").
>>
>>
>> Well, that may be too much complexity.
>> I guess if we want multi level dereference, say you want a->b->c
>> it should be sufficient to probe the point where b->c gets it's
>> value (if any).
>>
>> But it would be nice to fetch a range: sa[begin:end]
>> Or at least just giving the length of the range.
>
> Hmm, I think it is better to treat the range as a type, because
> we don't know how to express each entry without the type.
>
> NAME=ARG:TYPE[LEN]
> e.g.
> regs=sa:u64[16]
>
Yeah indeed, and that fits well in the NAME=ARG:TYPE logic
that you have suggested.
Frederic Weisbecker wrote:
> On Mon, Sep 14, 2009 at 01:16:13PM -0400, Masami Hiramatsu wrote:
>> Frederic Weisbecker wrote:
>>> On Fri, Sep 11, 2009 at 12:03:30PM -0400, Masami Hiramatsu wrote:
>>>> Frederic Weisbecker wrote:
>>>>> May be another step in the todo-list that would be nice: define the format
>>>>> for a type. Like it's done from ftrace events.
>>>>
>>>> Thanks!
>>>>
>>>> BTW, I'm not sure what the type means. Each event already has its own
>>>> event ID and event_call. Could you tell me which part of ftrace I should
>>>> refer to ?
>>>>
>>>
>>>
>>> Actually I meant the format for a field.
>>> Say you define filename=arg1, it would be nice to have
>>>
>>> print "%s", filename
>>>
>>> in the format file.
>>
>> Ah, indeed. It is better to support 'type' casting for each argument.
>> I think type casting can be done as below syntax.
>>
>> NAME=ARG:TYPE
>> e.g.
>> jiffies64=@jiffies64:u64
>> message=%ax:str
>>
>
>
> Yeah looks good!
>
>
>
>>> Hmm, now that I think about it, we can't dereference an array...for now :-)
>>
>> :-)
>> BTW, currently, an entry of the array can be shown, e.g. +10(sa).
>> Hmm, for more complex dereference(e.g. accessing a->b[a->c]), it might need
>> another dereferencing syntax(e.g. "sa[16][sa[8]]"), or
>> just allow to use braces(e.g. "+(+8(sa))(+16(sa))").
>
>
> Well, that may be too much complexity.
> I guess if we want multi level dereference, say you want a->b->c
> it should be sufficient to probe the point where b->c gets it's
> value (if any).
>
> But it would be nice to fetch a range: sa[begin:end]
> Or at least just giving the length of the range.
Hmm, I think it is better to treat the range as a type, because
we don't know how to express each entry without the type.
NAME=ARG:TYPE[LEN]
e.g.
regs=sa:u64[16]
Thank you,
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
On Mon, Sep 14, 2009 at 11:07:13PM +0200, Frederic Weisbecker wrote:
> On Mon, Sep 14, 2009 at 03:36:33PM -0400, Masami Hiramatsu wrote:
> > Frederic Weisbecker wrote:
> >> On Mon, Sep 14, 2009 at 12:54:24PM -0400, Masami Hiramatsu wrote:
>
>
> >> There might be some of such factors that would force you to handle a
> >> lot of corner cases, things that you wouldn't need to worry about
> >> if you just had to maintain a simple rcu list of probes to call.
> >
> >
> > Anyway, I never see who are using post_handler:). I'm not sure why
> > it is needed...
>
>
> May be it's a legacy of code that was used by the past, in which case
> that could perhaps be cleaned up?
No, kprobe post_handlers are used in various places, especially for
debug purposes. It is part of the ABI and should be retained.
Ananth
On Mon, Sep 14, 2009 at 12:25:41PM -0400, Masami Hiramatsu wrote:
> Ananth N Mavinakayanahalli wrote:
>> On Sun, Sep 13, 2009 at 09:47:39PM -0400, Masami Hiramatsu wrote:
>>> Ananth N Mavinakayanahalli wrote:
>>>> On Fri, Sep 11, 2009 at 05:12:54AM +0200, Frederic Weisbecker wrote:
>>>>> On Thu, Sep 10, 2009 at 07:53:30PM -0400, Masami Hiramatsu wrote:
...
>> +static inline int check_kprobe_rereg(struct kprobe *p)
>> +{
>> + int ret = 0;
>> + struct kprobe *old_p;
>> +
>> + mutex_lock(&kprobe_mutex);
>> + old_p = __get_valid_kprobe(p);
>> + if (old_p == p)
>
> Here, since __get_valid_kprobe(p) will return aggr_kprobe of 'p',
> you just need to check old_p != NULL.
Right!
---
Prevent re-registration of the same kprobe. This situation, though
unlikely, needs to be flagged since it can lead to a system crash if its
not handled.
The core change itself is small, but the helper routine needed to be
moved around a bit; hence the diffstat.
Signed-off-by: Ananth N Mavinakayanahalli<[email protected]>
---
kernel/kprobes.c | 58 ++++++++++++++++++++++++++++++++++++-------------------
1 file changed, 38 insertions(+), 20 deletions(-)
Index: linux-2.6.31/kernel/kprobes.c
===================================================================
--- linux-2.6.31.orig/kernel/kprobes.c
+++ linux-2.6.31/kernel/kprobes.c
@@ -681,6 +681,40 @@ static kprobe_opcode_t __kprobes *kprobe
return (kprobe_opcode_t *)(((char *)addr) + p->offset);
}
+/* Check passed kprobe is valid and return kprobe in kprobe_table. */
+static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
+{
+ struct kprobe *old_p, *list_p;
+
+ old_p = get_kprobe(p->addr);
+ if (unlikely(!old_p))
+ return NULL;
+
+ if (p != old_p) {
+ list_for_each_entry_rcu(list_p, &old_p->list, list)
+ if (list_p == p)
+ /* kprobe p is a valid probe */
+ goto valid;
+ return NULL;
+ }
+valid:
+ return old_p;
+}
+
+/* Return error if the kprobe is being re-registered */
+static inline int check_kprobe_rereg(struct kprobe *p)
+{
+ int ret = 0;
+ struct kprobe *old_p;
+
+ mutex_lock(&kprobe_mutex);
+ old_p = __get_valid_kprobe(p);
+ if (old_p)
+ ret = -EINVAL;
+ mutex_unlock(&kprobe_mutex);
+ return ret;
+}
+
int __kprobes register_kprobe(struct kprobe *p)
{
int ret = 0;
@@ -693,6 +727,10 @@ int __kprobes register_kprobe(struct kpr
return -EINVAL;
p->addr = addr;
+ ret = check_kprobe_rereg(p);
+ if (ret)
+ return ret;
+
preempt_disable();
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr)) {
@@ -762,26 +800,6 @@ out:
}
EXPORT_SYMBOL_GPL(register_kprobe);
-/* Check passed kprobe is valid and return kprobe in kprobe_table. */
-static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
-{
- struct kprobe *old_p, *list_p;
-
- old_p = get_kprobe(p->addr);
- if (unlikely(!old_p))
- return NULL;
-
- if (p != old_p) {
- list_for_each_entry_rcu(list_p, &old_p->list, list)
- if (list_p == p)
- /* kprobe p is a valid probe */
- goto valid;
- return NULL;
- }
-valid:
- return old_p;
-}
-
/*
* Unregister a kprobe without a scheduler synchronization.
*/
Ananth N Mavinakayanahalli wrote:
> On Mon, Sep 14, 2009 at 12:25:41PM -0400, Masami Hiramatsu wrote:
>> Ananth N Mavinakayanahalli wrote:
>>> On Sun, Sep 13, 2009 at 09:47:39PM -0400, Masami Hiramatsu wrote:
>>>> Ananth N Mavinakayanahalli wrote:
>>>>> On Fri, Sep 11, 2009 at 05:12:54AM +0200, Frederic Weisbecker wrote:
>>>>>> On Thu, Sep 10, 2009 at 07:53:30PM -0400, Masami Hiramatsu wrote:
>
> ...
>
>>> +static inline int check_kprobe_rereg(struct kprobe *p)
>>> +{
>>> + int ret = 0;
>>> + struct kprobe *old_p;
>>> +
>>> + mutex_lock(&kprobe_mutex);
>>> + old_p = __get_valid_kprobe(p);
>>> + if (old_p == p)
>>
>> Here, since __get_valid_kprobe(p) will return aggr_kprobe of 'p',
>> you just need to check old_p != NULL.
>
> Right!
>
> ---
> Prevent re-registration of the same kprobe. This situation, though
> unlikely, needs to be flagged since it can lead to a system crash if its
> not handled.
>
> The core change itself is small, but the helper routine needed to be
> moved around a bit; hence the diffstat.
>
> Signed-off-by: Ananth N Mavinakayanahalli<[email protected]>
Acked-by: Masami Hiramatsu <[email protected]>
> ---
> kernel/kprobes.c | 58 ++++++++++++++++++++++++++++++++++++-------------------
> 1 file changed, 38 insertions(+), 20 deletions(-)
>
> Index: linux-2.6.31/kernel/kprobes.c
> ===================================================================
> --- linux-2.6.31.orig/kernel/kprobes.c
> +++ linux-2.6.31/kernel/kprobes.c
> @@ -681,6 +681,40 @@ static kprobe_opcode_t __kprobes *kprobe
> return (kprobe_opcode_t *)(((char *)addr) + p->offset);
> }
>
> +/* Check passed kprobe is valid and return kprobe in kprobe_table. */
> +static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
> +{
> + struct kprobe *old_p, *list_p;
> +
> + old_p = get_kprobe(p->addr);
> + if (unlikely(!old_p))
> + return NULL;
> +
> + if (p != old_p) {
> + list_for_each_entry_rcu(list_p, &old_p->list, list)
> + if (list_p == p)
> + /* kprobe p is a valid probe */
> + goto valid;
> + return NULL;
> + }
> +valid:
> + return old_p;
> +}
> +
> +/* Return error if the kprobe is being re-registered */
> +static inline int check_kprobe_rereg(struct kprobe *p)
> +{
> + int ret = 0;
> + struct kprobe *old_p;
> +
> + mutex_lock(&kprobe_mutex);
> + old_p = __get_valid_kprobe(p);
> + if (old_p)
> + ret = -EINVAL;
> + mutex_unlock(&kprobe_mutex);
> + return ret;
> +}
> +
> int __kprobes register_kprobe(struct kprobe *p)
> {
> int ret = 0;
> @@ -693,6 +727,10 @@ int __kprobes register_kprobe(struct kpr
> return -EINVAL;
> p->addr = addr;
>
> + ret = check_kprobe_rereg(p);
> + if (ret)
> + return ret;
> +
> preempt_disable();
> if (!kernel_text_address((unsigned long) p->addr) ||
> in_kprobes_functions((unsigned long) p->addr)) {
> @@ -762,26 +800,6 @@ out:
> }
> EXPORT_SYMBOL_GPL(register_kprobe);
>
> -/* Check passed kprobe is valid and return kprobe in kprobe_table. */
> -static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
> -{
> - struct kprobe *old_p, *list_p;
> -
> - old_p = get_kprobe(p->addr);
> - if (unlikely(!old_p))
> - return NULL;
> -
> - if (p != old_p) {
> - list_for_each_entry_rcu(list_p, &old_p->list, list)
> - if (list_p == p)
> - /* kprobe p is a valid probe */
> - goto valid;
> - return NULL;
> - }
> -valid:
> - return old_p;
> -}
> -
> /*
> * Unregister a kprobe without a scheduler synchronization.
> */
--
Masami Hiramatsu
Software Engineer
Hitachi Computer Products (America), Inc.
Software Solutions Division
e-mail: [email protected]
Commit-ID: ad5cafcdb09c57008c990edd309c0a563b09f238
Gitweb: http://git.kernel.org/tip/ad5cafcdb09c57008c990edd309c0a563b09f238
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Thu, 10 Sep 2009 19:53:06 -0400
Committer: Frederic Weisbecker <[email protected]>
CommitDate: Fri, 11 Sep 2009 04:14:24 +0200
x86/ptrace: Fix regs_get_argument_nth() to add correct offset
Fix regs_get_argument_nth() to add correct offset bytes. Because
offset_of() returns offset in byte, the offset should be added
to char * instead of unsigned long *.
Signed-off-by: Masami Hiramatsu <[email protected]>
Acked-by: Steven Rostedt <[email protected]>
Cc: Jim Keniston <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Frank Ch. Eigler <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: K.Prasad <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Tom Zanussi <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
arch/x86/kernel/ptrace.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index a33a17d..caffb68 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -150,7 +150,7 @@ static const int arg_offs_table[] = {
unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
{
if (n < ARRAY_SIZE(arg_offs_table))
- return *((unsigned long *)regs + arg_offs_table[n]);
+ return *(unsigned long *)((char *)regs + arg_offs_table[n]);
else {
/*
* The typical case: arg n is on the stack.
Commit-ID: 2fba0c8867af47f6455490e7b59e512dd180c027
Gitweb: http://git.kernel.org/tip/2fba0c8867af47f6455490e7b59e512dd180c027
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Thu, 10 Sep 2009 19:53:14 -0400
Committer: Frederic Weisbecker <[email protected]>
CommitDate: Fri, 11 Sep 2009 04:18:58 +0200
tracing/kprobes: Fix probe offset to be unsigned
Prohibit user to specify negative offset from symbols.
Since kprobe.offset is unsigned int, the offset must be always positive
value.
Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Jim Keniston <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Frank Ch. Eigler <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: K.Prasad <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Tom Zanussi <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
Documentation/trace/kprobetrace.txt | 14 +++++++-------
kernel/trace/trace_kprobe.c | 19 +++++++------------
2 files changed, 14 insertions(+), 19 deletions(-)
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 3de7517..db55318 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -25,15 +25,15 @@ probe events via /sys/kernel/debug/tracing/events/kprobes/<EVENT>/filter.
Synopsis of kprobe_events
-------------------------
- p[:EVENT] SYMBOL[+offs|-offs]|MEMADDR [FETCHARGS] : Set a probe
- r[:EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
+ p[:EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe
+ r[:EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
- EVENT : Event name. If omitted, the event name is generated
- based on SYMBOL+offs or MEMADDR.
- SYMBOL[+offs|-offs] : Symbol+offset where the probe is inserted.
- MEMADDR : Address where the probe is inserted.
+ EVENT : Event name. If omitted, the event name is generated
+ based on SYMBOL+offs or MEMADDR.
+ SYMBOL[+offs] : Symbol+offset where the probe is inserted.
+ MEMADDR : Address where the probe is inserted.
- FETCHARGS : Arguments. Each probe can have up to 128 args.
+ FETCHARGS : Arguments. Each probe can have up to 128 args.
%REG : Fetch register REG
sN : Fetch Nth entry of stack (N >= 0)
sa : Fetch stack address.
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 19a6de6..c24b7e9 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -210,7 +210,7 @@ static __kprobes const char *probe_symbol(struct trace_probe *tp)
return tp->symbol ? tp->symbol : "unknown";
}
-static __kprobes long probe_offset(struct trace_probe *tp)
+static __kprobes unsigned int probe_offset(struct trace_probe *tp)
{
return (probe_is_return(tp)) ? tp->rp.kp.offset : tp->kp.offset;
}
@@ -380,7 +380,7 @@ end:
}
/* Split symbol and offset. */
-static int split_symbol_offset(char *symbol, long *offset)
+static int split_symbol_offset(char *symbol, unsigned long *offset)
{
char *tmp;
int ret;
@@ -389,16 +389,11 @@ static int split_symbol_offset(char *symbol, long *offset)
return -EINVAL;
tmp = strchr(symbol, '+');
- if (!tmp)
- tmp = strchr(symbol, '-');
-
if (tmp) {
/* skip sign because strict_strtol doesn't accept '+' */
- ret = strict_strtol(tmp + 1, 0, offset);
+ ret = strict_strtoul(tmp + 1, 0, offset);
if (ret)
return ret;
- if (*tmp == '-')
- *offset = -(*offset);
*tmp = '\0';
} else
*offset = 0;
@@ -520,7 +515,7 @@ static int create_trace_probe(int argc, char **argv)
{
/*
* Argument syntax:
- * - Add kprobe: p[:EVENT] SYMBOL[+OFFS|-OFFS]|ADDRESS [FETCHARGS]
+ * - Add kprobe: p[:EVENT] SYMBOL[+OFFS]|ADDRESS [FETCHARGS]
* - Add kretprobe: r[:EVENT] SYMBOL[+0] [FETCHARGS]
* Fetch args:
* aN : fetch Nth of function argument. (N:0-)
@@ -539,7 +534,7 @@ static int create_trace_probe(int argc, char **argv)
int i, ret = 0;
int is_return = 0;
char *symbol = NULL, *event = NULL;
- long offset = 0;
+ unsigned long offset = 0;
void *addr = NULL;
if (argc < 2)
@@ -605,7 +600,7 @@ static int create_trace_probe(int argc, char **argv)
if (tp->symbol) {
kp->symbol_name = tp->symbol;
- kp->offset = offset;
+ kp->offset = (unsigned int)offset;
} else
kp->addr = addr;
@@ -675,7 +670,7 @@ static int probes_seq_show(struct seq_file *m, void *v)
seq_printf(m, ":%s", tp->call.name);
if (tp->symbol)
- seq_printf(m, " %s%+ld", probe_symbol(tp), probe_offset(tp));
+ seq_printf(m, " %s+%u", probe_symbol(tp), probe_offset(tp));
else
seq_printf(m, " 0x%p", probe_address(tp));
Commit-ID: 4a846b443b4e8633057946a2234e23559a67ce42
Gitweb: http://git.kernel.org/tip/4a846b443b4e8633057946a2234e23559a67ce42
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Fri, 11 Sep 2009 05:31:21 +0200
Committer: Frederic Weisbecker <[email protected]>
CommitDate: Fri, 11 Sep 2009 05:31:21 +0200
tracing/kprobes: Cleanup kprobe tracer code.
Simplify trace_probe to remove a union, and remove some redundant
wrappers.
And also, cleanup create_trace_probe() function.
Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Jim Keniston <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Frank Ch. Eigler <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: K.Prasad <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Tom Zanussi <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/trace/trace_kprobe.c | 81 ++++++++++++++++++-------------------------
1 files changed, 34 insertions(+), 47 deletions(-)
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c24b7e9..4ce728c 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -180,10 +180,7 @@ static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
struct trace_probe {
struct list_head list;
- union {
- struct kprobe kp;
- struct kretprobe rp;
- };
+ struct kretprobe rp; /* Use rp.kp for kprobe use */
unsigned long nhit;
const char *symbol; /* symbol name */
struct ftrace_event_call call;
@@ -202,7 +199,7 @@ static int kretprobe_trace_func(struct kretprobe_instance *ri,
static __kprobes int probe_is_return(struct trace_probe *tp)
{
- return (tp->rp.handler == kretprobe_trace_func);
+ return tp->rp.handler != NULL;
}
static __kprobes const char *probe_symbol(struct trace_probe *tp)
@@ -210,16 +207,6 @@ static __kprobes const char *probe_symbol(struct trace_probe *tp)
return tp->symbol ? tp->symbol : "unknown";
}
-static __kprobes unsigned int probe_offset(struct trace_probe *tp)
-{
- return (probe_is_return(tp)) ? tp->rp.kp.offset : tp->kp.offset;
-}
-
-static __kprobes void *probe_address(struct trace_probe *tp)
-{
- return (probe_is_return(tp)) ? tp->rp.kp.addr : tp->kp.addr;
-}
-
static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
{
int ret = -EINVAL;
@@ -269,8 +256,14 @@ static void unregister_probe_event(struct trace_probe *tp);
static DEFINE_MUTEX(probe_lock);
static LIST_HEAD(probe_list);
-static struct trace_probe *alloc_trace_probe(const char *symbol,
- const char *event, int nargs)
+/*
+ * Allocate new trace_probe and initialize it (including kprobes).
+ */
+static struct trace_probe *alloc_trace_probe(const char *event,
+ void *addr,
+ const char *symbol,
+ unsigned long offs,
+ int nargs, int is_return)
{
struct trace_probe *tp;
@@ -282,7 +275,16 @@ static struct trace_probe *alloc_trace_probe(const char *symbol,
tp->symbol = kstrdup(symbol, GFP_KERNEL);
if (!tp->symbol)
goto error;
- }
+ tp->rp.kp.symbol_name = tp->symbol;
+ tp->rp.kp.offset = offs;
+ } else
+ tp->rp.kp.addr = addr;
+
+ if (is_return)
+ tp->rp.handler = kretprobe_trace_func;
+ else
+ tp->rp.kp.pre_handler = kprobe_trace_func;
+
if (!event)
goto error;
tp->call.name = kstrdup(event, GFP_KERNEL);
@@ -327,7 +329,7 @@ static void __unregister_trace_probe(struct trace_probe *tp)
if (probe_is_return(tp))
unregister_kretprobe(&tp->rp);
else
- unregister_kprobe(&tp->kp);
+ unregister_kprobe(&tp->rp.kp);
}
/* Unregister a trace_probe and probe_event: call with locking probe_lock */
@@ -349,14 +351,14 @@ static int register_trace_probe(struct trace_probe *tp)
if (probe_is_return(tp))
ret = register_kretprobe(&tp->rp);
else
- ret = register_kprobe(&tp->kp);
+ ret = register_kprobe(&tp->rp.kp);
if (ret) {
pr_warning("Could not insert probe(%d)\n", ret);
if (ret == -EILSEQ) {
pr_warning("Probing address(0x%p) is not an "
"instruction boundary.\n",
- probe_address(tp));
+ tp->rp.kp.addr);
ret = -EINVAL;
}
goto end;
@@ -530,12 +532,12 @@ static int create_trace_probe(int argc, char **argv)
* +|-offs(ARG) : fetch memory at ARG +|- offs address.
*/
struct trace_probe *tp;
- struct kprobe *kp;
int i, ret = 0;
int is_return = 0;
char *symbol = NULL, *event = NULL;
unsigned long offset = 0;
void *addr = NULL;
+ char buf[MAX_EVENT_NAME_LEN];
if (argc < 2)
return -EINVAL;
@@ -577,33 +579,18 @@ static int create_trace_probe(int argc, char **argv)
/* setup a probe */
if (!event) {
/* Make a new event name */
- char buf[MAX_EVENT_NAME_LEN];
if (symbol)
snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld",
is_return ? 'r' : 'p', symbol, offset);
else
snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p",
is_return ? 'r' : 'p', addr);
- tp = alloc_trace_probe(symbol, buf, argc);
- } else
- tp = alloc_trace_probe(symbol, event, argc);
+ event = buf;
+ }
+ tp = alloc_trace_probe(event, addr, symbol, offset, argc, is_return);
if (IS_ERR(tp))
return PTR_ERR(tp);
- if (is_return) {
- kp = &tp->rp.kp;
- tp->rp.handler = kretprobe_trace_func;
- } else {
- kp = &tp->kp;
- tp->kp.pre_handler = kprobe_trace_func;
- }
-
- if (tp->symbol) {
- kp->symbol_name = tp->symbol;
- kp->offset = (unsigned int)offset;
- } else
- kp->addr = addr;
-
/* parse arguments */
ret = 0;
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
@@ -670,9 +657,9 @@ static int probes_seq_show(struct seq_file *m, void *v)
seq_printf(m, ":%s", tp->call.name);
if (tp->symbol)
- seq_printf(m, " %s+%u", probe_symbol(tp), probe_offset(tp));
+ seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset);
else
- seq_printf(m, " 0x%p", probe_address(tp));
+ seq_printf(m, " 0x%p", tp->rp.kp.addr);
for (i = 0; i < tp->nr_args; i++) {
ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
@@ -783,7 +770,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
struct trace_probe *tp = v;
seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
- probe_is_return(tp) ? tp->rp.kp.nmissed : tp->kp.nmissed);
+ tp->rp.kp.nmissed);
return 0;
}
@@ -811,7 +798,7 @@ static const struct file_operations kprobe_profile_ops = {
/* Kprobe handler */
static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
{
- struct trace_probe *tp = container_of(kp, struct trace_probe, kp);
+ struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
struct kprobe_trace_entry *entry;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
@@ -866,7 +853,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
entry = ring_buffer_event_data(event);
entry->nargs = tp->nr_args;
- entry->func = (unsigned long)probe_address(tp);
+ entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i], regs);
@@ -945,7 +932,7 @@ static int probe_event_enable(struct ftrace_event_call *call)
if (probe_is_return(tp))
return enable_kretprobe(&tp->rp);
else
- return enable_kprobe(&tp->kp);
+ return enable_kprobe(&tp->rp.kp);
}
static void probe_event_disable(struct ftrace_event_call *call)
@@ -955,7 +942,7 @@ static void probe_event_disable(struct ftrace_event_call *call)
if (probe_is_return(tp))
disable_kretprobe(&tp->rp);
else
- disable_kprobe(&tp->kp);
+ disable_kprobe(&tp->rp.kp);
}
static int probe_event_raw_init(struct ftrace_event_call *event_call)
Commit-ID: e08d1c657f70bcaca11401cd6ac5c8fe59bd2bb7
Gitweb: http://git.kernel.org/tip/e08d1c657f70bcaca11401cd6ac5c8fe59bd2bb7
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Thu, 10 Sep 2009 19:53:30 -0400
Committer: Frederic Weisbecker <[email protected]>
CommitDate: Fri, 11 Sep 2009 05:33:03 +0200
tracing/kprobes: Add event profiling support
Add *probe_profile_enable/disable to support kprobes raw events
sampling from perf counters, like other ftrace events, when
CONFIG_PROFILE_EVENT=y.
Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Jim Keniston <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Frank Ch. Eigler <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: K.Prasad <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Tom Zanussi <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
Documentation/trace/kprobetrace.txt | 4 +-
kernel/trace/trace_kprobe.c | 110 ++++++++++++++++++++++++++++++++++-
2 files changed, 111 insertions(+), 3 deletions(-)
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index db55318..8f882eb 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -62,13 +62,15 @@ enabled:
You can enable/disable the probe by writing 1 or 0 on it.
format:
- It shows the format of this probe event. It also shows aliases of arguments
+ This shows the format of this probe event. It also shows aliases of arguments
which you specified to kprobe_events.
filter:
You can write filtering rules of this event. And you can use both of aliase
names and field names for describing filters.
+id:
+ This shows the id of this probe event.
Event Profiling
---------------
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 4ce728c..730e992 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -28,6 +28,7 @@
#include <linux/string.h>
#include <linux/ctype.h>
#include <linux/ptrace.h>
+#include <linux/perf_counter.h>
#include "trace.h"
#include "trace_output.h"
@@ -280,6 +281,7 @@ static struct trace_probe *alloc_trace_probe(const char *event,
} else
tp->rp.kp.addr = addr;
+ /* Set handler here for checking whether this probe is return or not. */
if (is_return)
tp->rp.handler = kretprobe_trace_func;
else
@@ -929,10 +931,13 @@ static int probe_event_enable(struct ftrace_event_call *call)
{
struct trace_probe *tp = (struct trace_probe *)call->data;
- if (probe_is_return(tp))
+ if (probe_is_return(tp)) {
+ tp->rp.handler = kretprobe_trace_func;
return enable_kretprobe(&tp->rp);
- else
+ } else {
+ tp->rp.kp.pre_handler = kprobe_trace_func;
return enable_kprobe(&tp->rp.kp);
+ }
}
static void probe_event_disable(struct ftrace_event_call *call)
@@ -1105,6 +1110,101 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call,
"func, ret_ip");
}
+#ifdef CONFIG_EVENT_PROFILE
+
+/* Kprobe profile handler */
+static __kprobes int kprobe_profile_func(struct kprobe *kp,
+ struct pt_regs *regs)
+{
+ struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
+ struct ftrace_event_call *call = &tp->call;
+ struct kprobe_trace_entry *entry;
+ int size, i, pc;
+ unsigned long irq_flags;
+
+ local_save_flags(irq_flags);
+ pc = preempt_count();
+
+ size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
+
+ do {
+ char raw_data[size];
+ struct trace_entry *ent;
+
+ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+ entry = (struct kprobe_trace_entry *)raw_data;
+ ent = &entry->ent;
+
+ tracing_generic_entry_update(ent, irq_flags, pc);
+ ent->type = call->id;
+ entry->nargs = tp->nr_args;
+ entry->ip = (unsigned long)kp->addr;
+ for (i = 0; i < tp->nr_args; i++)
+ entry->args[i] = call_fetch(&tp->args[i], regs);
+ perf_tpcounter_event(call->id, entry->ip, 1, entry, size);
+ } while (0);
+ return 0;
+}
+
+/* Kretprobe profile handler */
+static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
+ struct ftrace_event_call *call = &tp->call;
+ struct kretprobe_trace_entry *entry;
+ int size, i, pc;
+ unsigned long irq_flags;
+
+ local_save_flags(irq_flags);
+ pc = preempt_count();
+
+ size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
+
+ do {
+ char raw_data[size];
+ struct trace_entry *ent;
+
+ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+ entry = (struct kretprobe_trace_entry *)raw_data;
+ ent = &entry->ent;
+
+ tracing_generic_entry_update(ent, irq_flags, pc);
+ ent->type = call->id;
+ entry->nargs = tp->nr_args;
+ entry->func = (unsigned long)tp->rp.kp.addr;
+ entry->ret_ip = (unsigned long)ri->ret_addr;
+ for (i = 0; i < tp->nr_args; i++)
+ entry->args[i] = call_fetch(&tp->args[i], regs);
+ perf_tpcounter_event(call->id, entry->ret_ip, 1, entry, size);
+ } while (0);
+ return 0;
+}
+
+static int probe_profile_enable(struct ftrace_event_call *call)
+{
+ struct trace_probe *tp = (struct trace_probe *)call->data;
+
+ if (atomic_inc_return(&call->profile_count))
+ return 0;
+
+ if (probe_is_return(tp)) {
+ tp->rp.handler = kretprobe_profile_func;
+ return enable_kretprobe(&tp->rp);
+ } else {
+ tp->rp.kp.pre_handler = kprobe_profile_func;
+ return enable_kprobe(&tp->rp.kp);
+ }
+}
+
+static void probe_profile_disable(struct ftrace_event_call *call)
+{
+ if (atomic_add_negative(-1, &call->profile_count))
+ probe_event_disable(call);
+}
+
+#endif /* CONFIG_EVENT_PROFILE */
+
static int register_probe_event(struct trace_probe *tp)
{
struct ftrace_event_call *call = &tp->call;
@@ -1130,6 +1230,12 @@ static int register_probe_event(struct trace_probe *tp)
call->enabled = 1;
call->regfunc = probe_event_enable;
call->unregfunc = probe_event_disable;
+
+#ifdef CONFIG_EVENT_PROFILE
+ atomic_set(&call->profile_count, -1);
+ call->profile_enable = probe_profile_enable;
+ call->profile_disable = probe_profile_disable;
+#endif
call->data = tp;
ret = trace_add_event_call(call);
if (ret) {
Commit-ID: eca0d916f6429785bbc88db3ff66631cde62b432
Gitweb: http://git.kernel.org/tip/eca0d916f6429785bbc88db3ff66631cde62b432
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Thu, 10 Sep 2009 19:53:38 -0400
Committer: Frederic Weisbecker <[email protected]>
CommitDate: Fri, 11 Sep 2009 06:08:00 +0200
tracing/kprobes: Add argument name support
Add argument name assignment support and remove "alias" lines from format.
This allows user to assign unique name to each argument. For example,
$ echo p do_sys_open dfd=a0 filename=a1 flags=a2 mode=a3 > kprobe_events
This assigns dfd, filename, flags, and mode to 1st - 4th arguments
respectively. Trace buffer shows those names too.
<...>-1439 [000] 1200885.933147: do_sys_open+0x0/0xdf: dfd=ffffff9c filename=bfa898ac flags=8000 mode=0
This helps users to know what each value means.
Users can filter each events by these names too. Note that you can not
filter by argN anymore.
Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Jim Keniston <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Frank Ch. Eigler <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: K.Prasad <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Tom Zanussi <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
Documentation/trace/kprobetrace.txt | 46 ++++++-------
kernel/trace/trace_kprobe.c | 128 +++++++++++++++++-----------------
2 files changed, 84 insertions(+), 90 deletions(-)
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 8f882eb..aaa6c10 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -42,7 +42,8 @@ Synopsis of kprobe_events
aN : Fetch function argument. (N >= 0)(*)
rv : Fetch return value.(**)
ra : Fetch return address.(**)
- +|-offs(FETCHARG) : fetch memory at FETCHARG +|- offs address.(***)
+ +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***)
+ NAME=FETCHARG: Set NAME as the argument name of FETCHARG.
(*) aN may not correct on asmlinkaged functions and at the middle of
function body.
@@ -62,12 +63,10 @@ enabled:
You can enable/disable the probe by writing 1 or 0 on it.
format:
- This shows the format of this probe event. It also shows aliases of arguments
- which you specified to kprobe_events.
+ This shows the format of this probe event.
filter:
- You can write filtering rules of this event. And you can use both of aliase
- names and field names for describing filters.
+ You can write filtering rules of this event.
id:
This shows the id of this probe event.
@@ -85,10 +84,11 @@ Usage examples
To add a probe as a new event, write a new definition to kprobe_events
as below.
- echo p:myprobe do_sys_open a0 a1 a2 a3 > /sys/kernel/debug/tracing/kprobe_events
+ echo p:myprobe do_sys_open dfd=a0 filename=a1 flags=a2 mode=a3 > /sys/kernel/debug/tracing/kprobe_events
This sets a kprobe on the top of do_sys_open() function with recording
-1st to 4th arguments as "myprobe" event.
+1st to 4th arguments as "myprobe" event. As this example shows, users can
+choose more familiar names for each arguments.
echo r:myretprobe do_sys_open rv ra >> /sys/kernel/debug/tracing/kprobe_events
@@ -99,7 +99,7 @@ recording return value and return address as "myretprobe" event.
cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format
name: myprobe
-ID: 23
+ID: 75
format:
field:unsigned short common_type; offset:0; size:2;
field:unsigned char common_flags; offset:2; size:1;
@@ -109,21 +109,15 @@ format:
field: unsigned long ip; offset:16;tsize:8;
field: int nargs; offset:24;tsize:4;
- field: unsigned long arg0; offset:32;tsize:8;
- field: unsigned long arg1; offset:40;tsize:8;
- field: unsigned long arg2; offset:48;tsize:8;
- field: unsigned long arg3; offset:56;tsize:8;
+ field: unsigned long dfd; offset:32;tsize:8;
+ field: unsigned long filename; offset:40;tsize:8;
+ field: unsigned long flags; offset:48;tsize:8;
+ field: unsigned long mode; offset:56;tsize:8;
- alias: a0; original: arg0;
- alias: a1; original: arg1;
- alias: a2; original: arg2;
- alias: a3; original: arg3;
+print fmt: "%lx: dfd=%lx filename=%lx flags=%lx mode=%lx", ip, REC->dfd, REC->filename, REC->flags, REC->mode
-print fmt: "%lx: 0x%lx 0x%lx 0x%lx 0x%lx", ip, arg0, arg1, arg2, arg3
-
- You can see that the event has 4 arguments and alias expressions
-corresponding to it.
+ You can see that the event has 4 arguments as in the expressions you specified.
echo > /sys/kernel/debug/tracing/kprobe_events
@@ -135,12 +129,12 @@ corresponding to it.
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
- <...>-1447 [001] 1038282.286875: do_sys_open+0x0/0xd6: 0x3 0x7fffd1ec4440 0x8000 0x0
- <...>-1447 [001] 1038282.286878: sys_openat+0xc/0xe <- do_sys_open: 0xfffffffffffffffe 0xffffffff81367a3a
- <...>-1447 [001] 1038282.286885: do_sys_open+0x0/0xd6: 0xffffff9c 0x40413c 0x8000 0x1b6
- <...>-1447 [001] 1038282.286915: sys_open+0x1b/0x1d <- do_sys_open: 0x3 0xffffffff81367a3a
- <...>-1447 [001] 1038282.286969: do_sys_open+0x0/0xd6: 0xffffff9c 0x4041c6 0x98800 0x10
- <...>-1447 [001] 1038282.286976: sys_open+0x1b/0x1d <- do_sys_open: 0x3 0xffffffff81367a3a
+ <...>-1447 [001] 1038282.286875: do_sys_open+0x0/0xd6: dfd=3 filename=7fffd1ec4440 flags=8000 mode=0
+ <...>-1447 [001] 1038282.286878: sys_openat+0xc/0xe <- do_sys_open: rv=fffffffffffffffe ra=ffffffff81367a3a
+ <...>-1447 [001] 1038282.286885: do_sys_open+0x0/0xd6: dfd=ffffff9c filename=40413c flags=8000 mode=1b6
+ <...>-1447 [001] 1038282.286915: sys_open+0x1b/0x1d <- do_sys_open: rv=3 ra=ffffffff81367a3a
+ <...>-1447 [001] 1038282.286969: do_sys_open+0x0/0xd6: dfd=ffffff9c filename=4041c6 flags=98800 mode=10
+ <...>-1447 [001] 1038282.286976: sys_open+0x1b/0x1d <- do_sys_open: rv=3 ra=ffffffff81367a3a
Each line shows when the kernel hits a probe, and <- SYMBOL means kernel
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 730e992..44dad1a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -176,9 +176,14 @@ static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data)
}
/**
- * kprobe_trace_core
+ * Kprobe tracer core functions
*/
+struct probe_arg {
+ struct fetch_func fetch;
+ const char *name;
+};
+
struct trace_probe {
struct list_head list;
struct kretprobe rp; /* Use rp.kp for kprobe use */
@@ -187,12 +192,12 @@ struct trace_probe {
struct ftrace_event_call call;
struct trace_event event;
unsigned int nr_args;
- struct fetch_func args[];
+ struct probe_arg args[];
};
#define SIZEOF_TRACE_PROBE(n) \
(offsetof(struct trace_probe, args) + \
- (sizeof(struct fetch_func) * (n)))
+ (sizeof(struct probe_arg) * (n)))
static int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs);
static int kretprobe_trace_func(struct kretprobe_instance *ri,
@@ -301,15 +306,21 @@ error:
return ERR_PTR(-ENOMEM);
}
+static void free_probe_arg(struct probe_arg *arg)
+{
+ if (arg->fetch.func == fetch_symbol)
+ free_symbol_cache(arg->fetch.data);
+ else if (arg->fetch.func == fetch_indirect)
+ free_indirect_fetch_data(arg->fetch.data);
+ kfree(arg->name);
+}
+
static void free_trace_probe(struct trace_probe *tp)
{
int i;
for (i = 0; i < tp->nr_args; i++)
- if (tp->args[i].func == fetch_symbol)
- free_symbol_cache(tp->args[i].data);
- else if (tp->args[i].func == fetch_indirect)
- free_indirect_fetch_data(tp->args[i].data);
+ free_probe_arg(&tp->args[i]);
kfree(tp->call.name);
kfree(tp->symbol);
@@ -532,11 +543,13 @@ static int create_trace_probe(int argc, char **argv)
* %REG : fetch register REG
* Indirect memory fetch:
* +|-offs(ARG) : fetch memory at ARG +|- offs address.
+ * Alias name of args:
+ * NAME=FETCHARG : set NAME as alias of FETCHARG.
*/
struct trace_probe *tp;
int i, ret = 0;
int is_return = 0;
- char *symbol = NULL, *event = NULL;
+ char *symbol = NULL, *event = NULL, *arg = NULL;
unsigned long offset = 0;
void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN];
@@ -596,12 +609,21 @@ static int create_trace_probe(int argc, char **argv)
/* parse arguments */
ret = 0;
for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
- if (strlen(argv[i]) > MAX_ARGSTR_LEN) {
- pr_info("Argument%d(%s) is too long.\n", i, argv[i]);
+ /* Parse argument name */
+ arg = strchr(argv[i], '=');
+ if (arg)
+ *arg++ = '\0';
+ else
+ arg = argv[i];
+ tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
+
+ /* Parse fetch argument */
+ if (strlen(arg) > MAX_ARGSTR_LEN) {
+ pr_info("Argument%d(%s) is too long.\n", i, arg);
ret = -ENOSPC;
goto error;
}
- ret = parse_probe_arg(argv[i], &tp->args[i], is_return);
+ ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return);
if (ret)
goto error;
}
@@ -664,12 +686,12 @@ static int probes_seq_show(struct seq_file *m, void *v)
seq_printf(m, " 0x%p", tp->rp.kp.addr);
for (i = 0; i < tp->nr_args; i++) {
- ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
+ ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch);
if (ret < 0) {
pr_warning("Argument%d decoding error(%d).\n", i, ret);
return ret;
}
- seq_printf(m, " %s", buf);
+ seq_printf(m, " %s=%s", tp->args[i].name, buf);
}
seq_printf(m, "\n");
return 0;
@@ -824,7 +846,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
entry->nargs = tp->nr_args;
entry->ip = (unsigned long)kp->addr;
for (i = 0; i < tp->nr_args; i++)
- entry->args[i] = call_fetch(&tp->args[i], regs);
+ entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
if (!filter_current_check_discard(buffer, call, entry, event))
trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
@@ -858,7 +880,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri,
entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
for (i = 0; i < tp->nr_args; i++)
- entry->args[i] = call_fetch(&tp->args[i], regs);
+ entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
if (!filter_current_check_discard(buffer, call, entry, event))
trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
@@ -872,9 +894,13 @@ print_kprobe_event(struct trace_iterator *iter, int flags)
{
struct kprobe_trace_entry *field;
struct trace_seq *s = &iter->seq;
+ struct trace_event *event;
+ struct trace_probe *tp;
int i;
field = (struct kprobe_trace_entry *)iter->ent;
+ event = ftrace_find_event(field->ent.type);
+ tp = container_of(event, struct trace_probe, event);
if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
goto partial;
@@ -883,7 +909,8 @@ print_kprobe_event(struct trace_iterator *iter, int flags)
goto partial;
for (i = 0; i < field->nargs; i++)
- if (!trace_seq_printf(s, " 0x%lx", field->args[i]))
+ if (!trace_seq_printf(s, " %s=%lx",
+ tp->args[i].name, field->args[i]))
goto partial;
if (!trace_seq_puts(s, "\n"))
@@ -899,9 +926,13 @@ print_kretprobe_event(struct trace_iterator *iter, int flags)
{
struct kretprobe_trace_entry *field;
struct trace_seq *s = &iter->seq;
+ struct trace_event *event;
+ struct trace_probe *tp;
int i;
field = (struct kretprobe_trace_entry *)iter->ent;
+ event = ftrace_find_event(field->ent.type);
+ tp = container_of(event, struct trace_probe, event);
if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
goto partial;
@@ -916,7 +947,8 @@ print_kretprobe_event(struct trace_iterator *iter, int flags)
goto partial;
for (i = 0; i < field->nargs; i++)
- if (!trace_seq_printf(s, " 0x%lx", field->args[i]))
+ if (!trace_seq_printf(s, " %s=%lx",
+ tp->args[i].name, field->args[i]))
goto partial;
if (!trace_seq_puts(s, "\n"))
@@ -972,7 +1004,6 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
{
int ret, i;
struct kprobe_trace_entry field;
- char buf[MAX_ARGSTR_LEN + 1];
struct trace_probe *tp = (struct trace_probe *)event_call->data;
ret = trace_define_common_fields(event_call);
@@ -981,16 +1012,9 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
DEFINE_FIELD(unsigned long, ip, "ip", 0);
DEFINE_FIELD(int, nargs, "nargs", 1);
- for (i = 0; i < tp->nr_args; i++) {
- /* Set argN as a field */
- sprintf(buf, "arg%d", i);
- DEFINE_FIELD(unsigned long, args[i], buf, 0);
- /* Set argument string as an alias field */
- ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
- if (ret < 0)
- return ret;
- DEFINE_FIELD(unsigned long, args[i], buf, 0);
- }
+ /* Set argument names as fields */
+ for (i = 0; i < tp->nr_args; i++)
+ DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
return 0;
}
@@ -998,7 +1022,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
{
int ret, i;
struct kretprobe_trace_entry field;
- char buf[MAX_ARGSTR_LEN + 1];
struct trace_probe *tp = (struct trace_probe *)event_call->data;
ret = trace_define_common_fields(event_call);
@@ -1008,16 +1031,9 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
DEFINE_FIELD(unsigned long, func, "func", 0);
DEFINE_FIELD(unsigned long, ret_ip, "ret_ip", 0);
DEFINE_FIELD(int, nargs, "nargs", 1);
- for (i = 0; i < tp->nr_args; i++) {
- /* Set argN as a field */
- sprintf(buf, "arg%d", i);
- DEFINE_FIELD(unsigned long, args[i], buf, 0);
- /* Set argument string as an alias field */
- ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
- if (ret < 0)
- return ret;
- DEFINE_FIELD(unsigned long, args[i], buf, 0);
- }
+ /* Set argument names as fields */
+ for (i = 0; i < tp->nr_args; i++)
+ DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0);
return 0;
}
@@ -1025,31 +1041,21 @@ static int __probe_event_show_format(struct trace_seq *s,
struct trace_probe *tp, const char *fmt,
const char *arg)
{
- int i, ret;
- char buf[MAX_ARGSTR_LEN + 1];
+ int i;
- /* Show aliases */
- for (i = 0; i < tp->nr_args; i++) {
- ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]);
- if (ret < 0)
- return ret;
- if (!trace_seq_printf(s, "\talias: %s;\toriginal: arg%d;\n",
- buf, i))
- return 0;
- }
/* Show format */
if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
return 0;
for (i = 0; i < tp->nr_args; i++)
- if (!trace_seq_puts(s, " 0x%lx"))
+ if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
return 0;
if (!trace_seq_printf(s, "\", %s", arg))
return 0;
for (i = 0; i < tp->nr_args; i++)
- if (!trace_seq_printf(s, ", arg%d", i))
+ if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
return 0;
return trace_seq_puts(s, "\n");
@@ -1071,17 +1077,14 @@ static int kprobe_event_show_format(struct ftrace_event_call *call,
{
struct kprobe_trace_entry field __attribute__((unused));
int ret, i;
- char buf[8];
struct trace_probe *tp = (struct trace_probe *)call->data;
SHOW_FIELD(unsigned long, ip, "ip");
SHOW_FIELD(int, nargs, "nargs");
/* Show fields */
- for (i = 0; i < tp->nr_args; i++) {
- sprintf(buf, "arg%d", i);
- SHOW_FIELD(unsigned long, args[i], buf);
- }
+ for (i = 0; i < tp->nr_args; i++)
+ SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
trace_seq_puts(s, "\n");
return __probe_event_show_format(s, tp, "%lx:", "ip");
@@ -1092,7 +1095,6 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call,
{
struct kretprobe_trace_entry field __attribute__((unused));
int ret, i;
- char buf[8];
struct trace_probe *tp = (struct trace_probe *)call->data;
SHOW_FIELD(unsigned long, func, "func");
@@ -1100,10 +1102,8 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call,
SHOW_FIELD(int, nargs, "nargs");
/* Show fields */
- for (i = 0; i < tp->nr_args; i++) {
- sprintf(buf, "arg%d", i);
- SHOW_FIELD(unsigned long, args[i], buf);
- }
+ for (i = 0; i < tp->nr_args; i++)
+ SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
trace_seq_puts(s, "\n");
return __probe_event_show_format(s, tp, "%lx <- %lx:",
@@ -1140,7 +1140,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
entry->nargs = tp->nr_args;
entry->ip = (unsigned long)kp->addr;
for (i = 0; i < tp->nr_args; i++)
- entry->args[i] = call_fetch(&tp->args[i], regs);
+ entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
perf_tpcounter_event(call->id, entry->ip, 1, entry, size);
} while (0);
return 0;
@@ -1175,7 +1175,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
for (i = 0; i < tp->nr_args; i++)
- entry->args[i] = call_fetch(&tp->args[i], regs);
+ entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
perf_tpcounter_event(call->id, entry->ret_ip, 1, entry, size);
} while (0);
return 0;
Commit-ID: 6e9f23d1619f7badaf9090dac09e86a22d6061d8
Gitweb: http://git.kernel.org/tip/6e9f23d1619f7badaf9090dac09e86a22d6061d8
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Thu, 10 Sep 2009 19:53:45 -0400
Committer: Frederic Weisbecker <[email protected]>
CommitDate: Fri, 11 Sep 2009 06:09:47 +0200
tracing/kprobes: Show event name in trace output
Show event name in tracing/trace output. This also fixes kprobes events
format to comply with other tracepoint events formats.
Before patching:
<...>-1447 [001] 1038282.286875: do_sys_open+0x0/0xd6: ...
<...>-1447 [001] 1038282.286878: sys_openat+0xc/0xe <- do_sys_open: ...
After patching:
<...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) ...
<...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) ...
Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Jim Keniston <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Frank Ch. Eigler <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: K.Prasad <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Tom Zanussi <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
Documentation/trace/kprobetrace.txt | 16 ++++++++--------
kernel/trace/trace_kprobe.c | 16 +++++++++++-----
2 files changed, 19 insertions(+), 13 deletions(-)
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index aaa6c10..a849889 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -114,7 +114,7 @@ format:
field: unsigned long flags; offset:48;tsize:8;
field: unsigned long mode; offset:56;tsize:8;
-print fmt: "%lx: dfd=%lx filename=%lx flags=%lx mode=%lx", ip, REC->dfd, REC->filename, REC->flags, REC->mode
+print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode
You can see that the event has 4 arguments as in the expressions you specified.
@@ -129,15 +129,15 @@ print fmt: "%lx: dfd=%lx filename=%lx flags=%lx mode=%lx", ip, REC->dfd, REC->fi
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
- <...>-1447 [001] 1038282.286875: do_sys_open+0x0/0xd6: dfd=3 filename=7fffd1ec4440 flags=8000 mode=0
- <...>-1447 [001] 1038282.286878: sys_openat+0xc/0xe <- do_sys_open: rv=fffffffffffffffe ra=ffffffff81367a3a
- <...>-1447 [001] 1038282.286885: do_sys_open+0x0/0xd6: dfd=ffffff9c filename=40413c flags=8000 mode=1b6
- <...>-1447 [001] 1038282.286915: sys_open+0x1b/0x1d <- do_sys_open: rv=3 ra=ffffffff81367a3a
- <...>-1447 [001] 1038282.286969: do_sys_open+0x0/0xd6: dfd=ffffff9c filename=4041c6 flags=98800 mode=10
- <...>-1447 [001] 1038282.286976: sys_open+0x1b/0x1d <- do_sys_open: rv=3 ra=ffffffff81367a3a
+ <...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) dfd=3 filename=7fffd1ec4440 flags=8000 mode=0
+ <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) rv=fffffffffffffffe ra=ffffffff81367a3a
+ <...>-1447 [001] 1038282.286885: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=40413c flags=8000 mode=1b6
+ <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) rv=3 ra=ffffffff81367a3a
+ <...>-1447 [001] 1038282.286969: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=4041c6 flags=98800 mode=10
+ <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) rv=3 ra=ffffffff81367a3a
- Each line shows when the kernel hits a probe, and <- SYMBOL means kernel
+ Each line shows when the kernel hits an event, and <- SYMBOL means kernel
returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel
returns from do_sys_open to sys_open+0x1b).
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 44dad1a..1746afe 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -902,10 +902,13 @@ print_kprobe_event(struct trace_iterator *iter, int flags)
event = ftrace_find_event(field->ent.type);
tp = container_of(event, struct trace_probe, event);
+ if (!trace_seq_printf(s, "%s: (", tp->call.name))
+ goto partial;
+
if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
goto partial;
- if (!trace_seq_puts(s, ":"))
+ if (!trace_seq_puts(s, ")"))
goto partial;
for (i = 0; i < field->nargs; i++)
@@ -934,6 +937,9 @@ print_kretprobe_event(struct trace_iterator *iter, int flags)
event = ftrace_find_event(field->ent.type);
tp = container_of(event, struct trace_probe, event);
+ if (!trace_seq_printf(s, "%s: (", tp->call.name))
+ goto partial;
+
if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
goto partial;
@@ -943,7 +949,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags)
if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
goto partial;
- if (!trace_seq_puts(s, ":"))
+ if (!trace_seq_puts(s, ")"))
goto partial;
for (i = 0; i < field->nargs; i++)
@@ -1087,7 +1093,7 @@ static int kprobe_event_show_format(struct ftrace_event_call *call,
SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
trace_seq_puts(s, "\n");
- return __probe_event_show_format(s, tp, "%lx:", "ip");
+ return __probe_event_show_format(s, tp, "(%lx)", "REC->ip");
}
static int kretprobe_event_show_format(struct ftrace_event_call *call,
@@ -1106,8 +1112,8 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call,
SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
trace_seq_puts(s, "\n");
- return __probe_event_show_format(s, tp, "%lx <- %lx:",
- "func, ret_ip");
+ return __probe_event_show_format(s, tp, "(%lx <- %lx)",
+ "REC->func, REC->ret_ip");
}
#ifdef CONFIG_EVENT_PROFILE
Commit-ID: f52487e9c0041842eeb77c6c48774414b1cede08
Gitweb: http://git.kernel.org/tip/f52487e9c0041842eeb77c6c48774414b1cede08
Author: Masami Hiramatsu <[email protected]>
AuthorDate: Thu, 10 Sep 2009 19:53:53 -0400
Committer: Frederic Weisbecker <[email protected]>
CommitDate: Thu, 17 Sep 2009 04:03:24 +0200
tracing/kprobes: Support custom subsystem for each kprobe event
Support specifying a custom subsystem(group) for each kprobe event.
This allows users to create new group to control several probes
at once, or add events to existing groups as additional tracepoints.
New synopsis:
p[:[subsys/]event-name] KADDR|KSYM[+offs] [ARGS]
Signed-off-by: Masami Hiramatsu <[email protected]>
Cc: Jim Keniston <[email protected]>
Cc: Ananth N Mavinakayanahalli <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Frank Ch. Eigler <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: K.Prasad <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Tom Zanussi <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
Documentation/trace/kprobetrace.txt | 5 +++--
kernel/trace/trace_kprobe.c | 33 +++++++++++++++++++++++++++------
2 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index a849889..6521681 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -25,9 +25,10 @@ probe events via /sys/kernel/debug/tracing/events/kprobes/<EVENT>/filter.
Synopsis of kprobe_events
-------------------------
- p[:EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe
- r[:EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
+ p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe
+ r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
+ GRP : Group name. If omitted, use "kprobes" for it.
EVENT : Event name. If omitted, the event name is generated
based on SYMBOL+offs or MEMADDR.
SYMBOL[+offs] : Symbol+offset where the probe is inserted.
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 1746afe..cbc0870 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -36,6 +36,7 @@
#define MAX_TRACE_ARGS 128
#define MAX_ARGSTR_LEN 63
#define MAX_EVENT_NAME_LEN 64
+#define KPROBE_EVENT_SYSTEM "kprobes"
/* currently, trace_kprobe only supports X86. */
@@ -265,7 +266,8 @@ static LIST_HEAD(probe_list);
/*
* Allocate new trace_probe and initialize it (including kprobes).
*/
-static struct trace_probe *alloc_trace_probe(const char *event,
+static struct trace_probe *alloc_trace_probe(const char *group,
+ const char *event,
void *addr,
const char *symbol,
unsigned long offs,
@@ -298,9 +300,16 @@ static struct trace_probe *alloc_trace_probe(const char *event,
if (!tp->call.name)
goto error;
+ if (!group)
+ goto error;
+ tp->call.system = kstrdup(group, GFP_KERNEL);
+ if (!tp->call.system)
+ goto error;
+
INIT_LIST_HEAD(&tp->list);
return tp;
error:
+ kfree(tp->call.name);
kfree(tp->symbol);
kfree(tp);
return ERR_PTR(-ENOMEM);
@@ -322,6 +331,7 @@ static void free_trace_probe(struct trace_probe *tp)
for (i = 0; i < tp->nr_args; i++)
free_probe_arg(&tp->args[i]);
+ kfree(tp->call.system);
kfree(tp->call.name);
kfree(tp->symbol);
kfree(tp);
@@ -530,8 +540,8 @@ static int create_trace_probe(int argc, char **argv)
{
/*
* Argument syntax:
- * - Add kprobe: p[:EVENT] SYMBOL[+OFFS]|ADDRESS [FETCHARGS]
- * - Add kretprobe: r[:EVENT] SYMBOL[+0] [FETCHARGS]
+ * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
+ * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
* Fetch args:
* aN : fetch Nth of function argument. (N:0-)
* rv : fetch return value
@@ -549,7 +559,7 @@ static int create_trace_probe(int argc, char **argv)
struct trace_probe *tp;
int i, ret = 0;
int is_return = 0;
- char *symbol = NULL, *event = NULL, *arg = NULL;
+ char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
unsigned long offset = 0;
void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN];
@@ -566,6 +576,15 @@ static int create_trace_probe(int argc, char **argv)
if (argv[0][1] == ':') {
event = &argv[0][2];
+ if (strchr(event, '/')) {
+ group = event;
+ event = strchr(group, '/') + 1;
+ event[-1] = '\0';
+ if (strlen(group) == 0) {
+ pr_info("Group name is not specifiled\n");
+ return -EINVAL;
+ }
+ }
if (strlen(event) == 0) {
pr_info("Event name is not specifiled\n");
return -EINVAL;
@@ -592,6 +611,8 @@ static int create_trace_probe(int argc, char **argv)
argc -= 2; argv += 2;
/* setup a probe */
+ if (!group)
+ group = KPROBE_EVENT_SYSTEM;
if (!event) {
/* Make a new event name */
if (symbol)
@@ -602,7 +623,8 @@ static int create_trace_probe(int argc, char **argv)
is_return ? 'r' : 'p', addr);
event = buf;
}
- tp = alloc_trace_probe(event, addr, symbol, offset, argc, is_return);
+ tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
+ is_return);
if (IS_ERR(tp))
return PTR_ERR(tp);
@@ -1217,7 +1239,6 @@ static int register_probe_event(struct trace_probe *tp)
int ret;
/* Initialize ftrace_event_call */
- call->system = "kprobes";
if (probe_is_return(tp)) {
tp->event.trace = print_kretprobe_event;
call->raw_init = probe_event_raw_init;
Commit-ID: 1f0ab40976460bc4673fa204ce917a725185d8f2
Gitweb: http://git.kernel.org/tip/1f0ab40976460bc4673fa204ce917a725185d8f2
Author: Ananth N Mavinakayanahalli <[email protected]>
AuthorDate: Tue, 15 Sep 2009 10:43:07 +0530
Committer: Frederic Weisbecker <[email protected]>
CommitDate: Thu, 17 Sep 2009 04:24:57 +0200
kprobes: Prevent re-registration of the same kprobe
Prevent re-registration of the same kprobe. This situation, though
unlikely, needs to be flagged since it can lead to a system crash if
it's not handled.
The core change itself is small, but the helper routine needed to be
moved around a bit; hence the diffstat.
Signed-off-by: Ananth N Mavinakayanahalli<[email protected]>
Acked-by: Masami Hiramatsu <[email protected]>
Cc: Jim Keniston <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: Frank Ch. Eigler <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Jason Baron <[email protected]>
Cc: K.Prasad <[email protected]>
Cc: Lai Jiangshan <[email protected]>
Cc: Li Zefan <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Tom Zanussi <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Frederic Weisbecker <[email protected]>
---
kernel/kprobes.c | 58 +++++++++++++++++++++++++++++++++++------------------
1 files changed, 38 insertions(+), 20 deletions(-)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 00d01b0..b946761 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -676,6 +676,40 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
return (kprobe_opcode_t *)(((char *)addr) + p->offset);
}
+/* Check passed kprobe is valid and return kprobe in kprobe_table. */
+static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
+{
+ struct kprobe *old_p, *list_p;
+
+ old_p = get_kprobe(p->addr);
+ if (unlikely(!old_p))
+ return NULL;
+
+ if (p != old_p) {
+ list_for_each_entry_rcu(list_p, &old_p->list, list)
+ if (list_p == p)
+ /* kprobe p is a valid probe */
+ goto valid;
+ return NULL;
+ }
+valid:
+ return old_p;
+}
+
+/* Return error if the kprobe is being re-registered */
+static inline int check_kprobe_rereg(struct kprobe *p)
+{
+ int ret = 0;
+ struct kprobe *old_p;
+
+ mutex_lock(&kprobe_mutex);
+ old_p = __get_valid_kprobe(p);
+ if (old_p)
+ ret = -EINVAL;
+ mutex_unlock(&kprobe_mutex);
+ return ret;
+}
+
int __kprobes register_kprobe(struct kprobe *p)
{
int ret = 0;
@@ -688,6 +722,10 @@ int __kprobes register_kprobe(struct kprobe *p)
return -EINVAL;
p->addr = addr;
+ ret = check_kprobe_rereg(p);
+ if (ret)
+ return ret;
+
preempt_disable();
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr)) {
@@ -757,26 +795,6 @@ out:
}
EXPORT_SYMBOL_GPL(register_kprobe);
-/* Check passed kprobe is valid and return kprobe in kprobe_table. */
-static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
-{
- struct kprobe *old_p, *list_p;
-
- old_p = get_kprobe(p->addr);
- if (unlikely(!old_p))
- return NULL;
-
- if (p != old_p) {
- list_for_each_entry_rcu(list_p, &old_p->list, list)
- if (list_p == p)
- /* kprobe p is a valid probe */
- goto valid;
- return NULL;
- }
-valid:
- return old_p;
-}
-
/*
* Unregister a kprobe without a scheduler synchronization.
*/