2013-06-16 17:25:50

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH 0/3] tracing/kprobes: trace_probe->files cleanups

Hello.

I simply can't understand why trace_kprobe.c uses
"struct ftrace_event_file **", the simple list_head looks much
more natural and simple.

And we do not want to copy-and-paste this code to trace_uprobe.c.
If there is a reason for array-of-pointers we should create the
helpers in the common trace_probe.c so that uprobes can use them
too.

Oleg.

kernel/trace/trace_kprobe.c | 171 ++++++++++++------------------------------
1 files changed, 49 insertions(+), 122 deletions(-)


2013-06-16 17:26:08

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH 2/3] tracing/kprobes: Kill probe_enable_lock

enable_trace_probe() and disable_trace_probe() should not worry about
serialization, the caller (perf_trace_init or __ftrace_set_clr_event)
holds event_mutex.

They are also called by kprobe_trace_self_tests_init(), but this __init
function can't race with itself or trace_events.c

And note that this code depended on event_mutex even before 41a7dd420c
which introduced probe_enable_lock. In fact it assumes that the caller
kprobe_register() can never race with itself. Otherwise, say, tp->flags
manipulations are racy.

Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/trace/trace_kprobe.c | 33 ++++++++++-----------------------
1 files changed, 10 insertions(+), 23 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c0af476..5a73de0 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -183,16 +183,15 @@ static struct trace_probe *find_trace_probe(const char *event,
return NULL;
}

+/*
+ * This and enable_trace_probe/disable_trace_probe rely on event_mutex
+ * held by the caller, __ftrace_set_clr_event().
+ */
static int trace_probe_nr_files(struct trace_probe *tp)
{
- struct ftrace_event_file **file;
+ struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
int ret = 0;

- /*
- * Since all tp->files updater is protected by probe_enable_lock,
- * we don't need to lock an rcu_read_lock.
- */
- file = rcu_dereference_raw(tp->files);
if (file)
while (*(file++))
ret++;
@@ -200,8 +199,6 @@ static int trace_probe_nr_files(struct trace_probe *tp)
return ret;
}

-static DEFINE_MUTEX(probe_enable_lock);
-
/*
* Enable trace_probe
* if the file is NULL, enable "perf" handler, or enable "trace" handler.
@@ -211,8 +208,6 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
{
int ret = 0;

- mutex_lock(&probe_enable_lock);
-
if (file) {
struct ftrace_event_file **new, **old;
int n = trace_probe_nr_files(tp);
@@ -223,7 +218,7 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
GFP_KERNEL);
if (!new) {
ret = -ENOMEM;
- goto out_unlock;
+ goto out;
}
memcpy(new, old, n * sizeof(struct ftrace_event_file *));
new[n] = file;
@@ -247,10 +242,7 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
else
ret = enable_kprobe(&tp->rp.kp);
}
-
- out_unlock:
- mutex_unlock(&probe_enable_lock);
-
+ out:
return ret;
}

@@ -283,8 +275,6 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
{
int ret = 0;

- mutex_lock(&probe_enable_lock);
-
if (file) {
struct ftrace_event_file **new, **old;
int n = trace_probe_nr_files(tp);
@@ -293,7 +283,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
old = rcu_dereference_raw(tp->files);
if (n == 0 || trace_probe_file_index(tp, file) < 0) {
ret = -EINVAL;
- goto out_unlock;
+ goto out;
}

if (n == 1) { /* Remove the last file */
@@ -304,7 +294,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
GFP_KERNEL);
if (!new) {
ret = -ENOMEM;
- goto out_unlock;
+ goto out;
}

/* This copy & check loop copies the NULL stopper too */
@@ -327,10 +317,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
else
disable_kprobe(&tp->rp.kp);
}
-
- out_unlock:
- mutex_unlock(&probe_enable_lock);
-
+ out:
return ret;
}

--
1.5.5.1

2013-06-16 17:26:14

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH 3/3] tracing/kprobes: Turn trace_probe->files into list_head

I think that "ftrace_event_file *trace_probe[]" complicates the
code for no reason, turn it into list_head to simplify the code.
enable_trace_probe() no longer needs synchronize_sched().

This needs the extra sizeof(list_head) memory for every attached
ftrace_event_file, hopefully not a problem in this case.

Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/trace/trace_kprobe.c | 138 ++++++++++++-------------------------------
1 files changed, 37 insertions(+), 101 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 5a73de0..b95f683 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -35,12 +35,17 @@ struct trace_probe {
const char *symbol; /* symbol name */
struct ftrace_event_class class;
struct ftrace_event_call call;
- struct ftrace_event_file * __rcu *files;
+ struct list_head files;
ssize_t size; /* trace entry size */
unsigned int nr_args;
struct probe_arg args[];
};

+struct event_file_link {
+ struct ftrace_event_file *file;
+ struct list_head list;
+};
+
#define SIZEOF_TRACE_PROBE(n) \
(offsetof(struct trace_probe, args) + \
(sizeof(struct probe_arg) * (n)))
@@ -150,6 +155,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
goto error;

INIT_LIST_HEAD(&tp->list);
+ INIT_LIST_HEAD(&tp->files);
return tp;
error:
kfree(tp->call.name);
@@ -184,22 +190,6 @@ static struct trace_probe *find_trace_probe(const char *event,
}

/*
- * This and enable_trace_probe/disable_trace_probe rely on event_mutex
- * held by the caller, __ftrace_set_clr_event().
- */
-static int trace_probe_nr_files(struct trace_probe *tp)
-{
- struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
- int ret = 0;
-
- if (file)
- while (*(file++))
- ret++;
-
- return ret;
-}
-
-/*
* Enable trace_probe
* if the file is NULL, enable "perf" handler, or enable "trace" handler.
*/
@@ -209,29 +199,18 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
int ret = 0;

if (file) {
- struct ftrace_event_file **new, **old;
- int n = trace_probe_nr_files(tp);
-
- old = rcu_dereference_raw(tp->files);
- /* 1 is for new one and 1 is for stopper */
- new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *),
- GFP_KERNEL);
- if (!new) {
+ struct event_file_link *link;
+
+ link = kmalloc(sizeof(*link), GFP_KERNEL);
+ if (!link) {
ret = -ENOMEM;
goto out;
}
- memcpy(new, old, n * sizeof(struct ftrace_event_file *));
- new[n] = file;
- /* The last one keeps a NULL */

- rcu_assign_pointer(tp->files, new);
- tp->flags |= TP_FLAG_TRACE;
+ link->file = file;
+ list_add_rcu(&link->list, &tp->files);

- if (old) {
- /* Make sure the probe is done with old files */
- synchronize_sched();
- kfree(old);
- }
+ tp->flags |= TP_FLAG_TRACE;
} else
tp->flags |= TP_FLAG_PROFILE;

@@ -246,24 +225,16 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
return ret;
}

-static int
-trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file)
+static struct event_file_link *
+find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file)
{
- struct ftrace_event_file **files;
- int i;
+ struct event_file_link *link;

- /*
- * Since all tp->files updater is protected by probe_enable_lock,
- * we don't need to lock an rcu_read_lock.
- */
- files = rcu_dereference_raw(tp->files);
- if (files) {
- for (i = 0; files[i]; i++)
- if (files[i] == file)
- return i;
- }
+ list_for_each_entry(link, &tp->files, list)
+ if (link->file == file)
+ return link;

- return -1;
+ return NULL;
}

/*
@@ -276,38 +247,23 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
int ret = 0;

if (file) {
- struct ftrace_event_file **new, **old;
- int n = trace_probe_nr_files(tp);
- int i, j;
+ struct event_file_link *link;

- old = rcu_dereference_raw(tp->files);
- if (n == 0 || trace_probe_file_index(tp, file) < 0) {
+ link = find_event_file_link(tp, file);
+ if (!link) {
ret = -EINVAL;
goto out;
}

- if (n == 1) { /* Remove the last file */
- tp->flags &= ~TP_FLAG_TRACE;
- new = NULL;
- } else {
- new = kzalloc(n * sizeof(struct ftrace_event_file *),
- GFP_KERNEL);
- if (!new) {
- ret = -ENOMEM;
- goto out;
- }
-
- /* This copy & check loop copies the NULL stopper too */
- for (i = 0, j = 0; j < n && i < n + 1; i++)
- if (old[i] != file)
- new[j++] = old[i];
- }
+ list_del_rcu(&link->list);
+ /* synchronize with kprobe_trace_func/kretprobe_trace_func */
+ synchronize_sched();
+ kfree(link);

- rcu_assign_pointer(tp->files, new);
+ if (!list_empty(&tp->files))
+ goto out;

- /* Make sure the probe is done with old files */
- synchronize_sched();
- kfree(old);
+ tp->flags &= ~TP_FLAG_TRACE;
} else
tp->flags &= ~TP_FLAG_PROFILE;

@@ -872,20 +828,10 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs,
static __kprobes void
kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs)
{
- /*
- * Note: preempt is already disabled around the kprobe handler.
- * However, we still need an smp_read_barrier_depends() corresponding
- * to smp_wmb() in rcu_assign_pointer() to access the pointer.
- */
- struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
-
- if (unlikely(!file))
- return;
+ struct event_file_link *link;

- while (*file) {
- __kprobe_trace_func(tp, regs, *file);
- file++;
- }
+ list_for_each_entry_rcu(link, &tp->files, list)
+ __kprobe_trace_func(tp, regs, link->file);
}

/* Kretprobe handler */
@@ -932,20 +878,10 @@ static __kprobes void
kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,
struct pt_regs *regs)
{
- /*
- * Note: preempt is already disabled around the kprobe handler.
- * However, we still need an smp_read_barrier_depends() corresponding
- * to smp_wmb() in rcu_assign_pointer() to access the pointer.
- */
- struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
-
- if (unlikely(!file))
- return;
+ struct event_file_link *link;

- while (*file) {
- __kretprobe_trace_func(tp, ri, regs, *file);
- file++;
- }
+ list_for_each_entry_rcu(link, &tp->files, list)
+ __kretprobe_trace_func(tp, ri, regs, link->file);
}

/* Event entry printers */
--
1.5.5.1

2013-06-16 17:26:07

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH 1/3] tracing/kprobes: Avoid perf_trace_buf_*() if ->perf_events is empty

perf_trace_buf_prepare() + perf_trace_buf_submit() make no sense
if this task/CPU has no active counters. Change kprobe_perf_func()
and kretprobe_perf_func() to check call->perf_events beforehand
and return if this list is empty.

For example, "perf record -e some_probe -p1". Only /sbin/init will
report, all other threads which hit the same probe will do
perf_trace_buf_prepare/perf_trace_buf_submit just to realize that
nobody wants perf_swevent_event().

Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/trace/trace_kprobe.c | 12 ++++++++----
1 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 9f46e98..c0af476 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1157,6 +1157,10 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
int size, __size, dsize;
int rctx;

+ head = this_cpu_ptr(call->perf_events);
+ if (hlist_empty(head))
+ return;
+
dsize = __get_data_size(tp, regs);
__size = sizeof(*entry) + tp->size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
@@ -1172,8 +1176,6 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
entry->ip = (unsigned long)tp->rp.kp.addr;
memset(&entry[1], 0, dsize);
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
-
- head = this_cpu_ptr(call->perf_events);
perf_trace_buf_submit(entry, size, rctx,
entry->ip, 1, regs, head, NULL);
}
@@ -1189,6 +1191,10 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
int size, __size, dsize;
int rctx;

+ head = this_cpu_ptr(call->perf_events);
+ if (hlist_empty(head))
+ return;
+
dsize = __get_data_size(tp, regs);
__size = sizeof(*entry) + tp->size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
@@ -1204,8 +1210,6 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
-
- head = this_cpu_ptr(call->perf_events);
perf_trace_buf_submit(entry, size, rctx,
entry->ret_ip, 1, regs, head, NULL);
}
--
1.5.5.1

2013-06-17 03:49:22

by zhangwei(Jovi)

[permalink] [raw]
Subject: Re: [PATCH 1/3] tracing/kprobes: Avoid perf_trace_buf_*() if ->perf_events is empty

On 2013/6/17 1:21, Oleg Nesterov wrote:
> perf_trace_buf_prepare() + perf_trace_buf_submit() make no sense
> if this task/CPU has no active counters. Change kprobe_perf_func()
> and kretprobe_perf_func() to check call->perf_events beforehand
> and return if this list is empty.
>
> For example, "perf record -e some_probe -p1". Only /sbin/init will
> report, all other threads which hit the same probe will do
> perf_trace_buf_prepare/perf_trace_buf_submit just to realize that
> nobody wants perf_swevent_event().
>
> Signed-off-by: Oleg Nesterov <[email protected]>

Good point, I think we also need to change other places in below patch.

After applied the patch, perf_tp_event() function call reduced a lots
when using task based perf tracing.


-----------------------------------


tracing: Avoid perf_trace_buf_*() if ->perf_events is empty


Signed-off-by: zhangwei(Jovi) <[email protected]>
---
include/trace/ftrace.h | 5 ++++-
kernel/trace/trace_syscalls.c | 10 ++++++++--
2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 19edd7f..5d340f5 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -659,6 +659,10 @@ perf_trace_##call(void *__data, proto) \
int __data_size; \
int rctx; \
\
+ head = this_cpu_ptr(event_call->perf_events); \
+ if (hlist_empty(head)) \
+ return; \
+ \
perf_fetch_caller_regs(&__regs); \
\
__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
@@ -679,7 +683,6 @@ perf_trace_##call(void *__data, proto) \
\
{ assign; } \
\
- head = this_cpu_ptr(event_call->perf_events); \
perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \
__count, &__regs, head, __task); \
}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8f2ac73..28debf4 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -553,6 +553,10 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
if (!sys_data)
return;

+ head = this_cpu_ptr(sys_data->enter_event->perf_events);
+ if (hlist_empty(head))
+ return;
+
/* get the size after alignment with the u32 buffer size field */
size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
size = ALIGN(size + sizeof(u32), sizeof(u64));
@@ -571,7 +575,6 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);

- head = this_cpu_ptr(sys_data->enter_event->perf_events);
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
}

@@ -629,6 +632,10 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
if (!sys_data)
return;

+ head = this_cpu_ptr(sys_data->exit_event->perf_events);
+ if (hlist_empty(head))
+ return;
+
/* We can probably do that at build time */
size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
@@ -649,7 +656,6 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);

- head = this_cpu_ptr(sys_data->exit_event->perf_events);
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
}

--
1.7.9.7


Subject: Re: [PATCH 1/3] tracing/kprobes: Avoid perf_trace_buf_*() if ->perf_events is empty

(2013/06/17 2:21), Oleg Nesterov wrote:
> perf_trace_buf_prepare() + perf_trace_buf_submit() make no sense
> if this task/CPU has no active counters. Change kprobe_perf_func()
> and kretprobe_perf_func() to check call->perf_events beforehand
> and return if this list is empty.
>
> For example, "perf record -e some_probe -p1". Only /sbin/init will
> report, all other threads which hit the same probe will do
> perf_trace_buf_prepare/perf_trace_buf_submit just to realize that
> nobody wants perf_swevent_event().
>
> Signed-off-by: Oleg Nesterov <[email protected]>

Looks good :) Thank you!

Acked-by: Masami Hiramatsu <[email protected]>

> ---
> kernel/trace/trace_kprobe.c | 12 ++++++++----
> 1 files changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index 9f46e98..c0af476 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -1157,6 +1157,10 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
> int size, __size, dsize;
> int rctx;
>
> + head = this_cpu_ptr(call->perf_events);
> + if (hlist_empty(head))
> + return;
> +
> dsize = __get_data_size(tp, regs);
> __size = sizeof(*entry) + tp->size + dsize;
> size = ALIGN(__size + sizeof(u32), sizeof(u64));
> @@ -1172,8 +1176,6 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
> entry->ip = (unsigned long)tp->rp.kp.addr;
> memset(&entry[1], 0, dsize);
> store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
> -
> - head = this_cpu_ptr(call->perf_events);
> perf_trace_buf_submit(entry, size, rctx,
> entry->ip, 1, regs, head, NULL);
> }
> @@ -1189,6 +1191,10 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
> int size, __size, dsize;
> int rctx;
>
> + head = this_cpu_ptr(call->perf_events);
> + if (hlist_empty(head))
> + return;
> +
> dsize = __get_data_size(tp, regs);
> __size = sizeof(*entry) + tp->size + dsize;
> size = ALIGN(__size + sizeof(u32), sizeof(u64));
> @@ -1204,8 +1210,6 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
> entry->func = (unsigned long)tp->rp.kp.addr;
> entry->ret_ip = (unsigned long)ri->ret_addr;
> store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
> -
> - head = this_cpu_ptr(call->perf_events);
> perf_trace_buf_submit(entry, size, rctx,
> entry->ret_ip, 1, regs, head, NULL);
> }
>


--
Masami HIRAMATSU
IT Management Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]

Subject: Re: [PATCH 2/3] tracing/kprobes: Kill probe_enable_lock

(2013/06/17 2:21), Oleg Nesterov wrote:
> enable_trace_probe() and disable_trace_probe() should not worry about
> serialization, the caller (perf_trace_init or __ftrace_set_clr_event)
> holds event_mutex.
>
> They are also called by kprobe_trace_self_tests_init(), but this __init
> function can't race with itself or trace_events.c

Right,
For safety, we should comment this at the caller side, because
those calls are the reason why I have introduced this lock.

Thank you,

> And note that this code depended on event_mutex even before 41a7dd420c
> which introduced probe_enable_lock. In fact it assumes that the caller
> kprobe_register() can never race with itself. Otherwise, say, tp->flags
> manipulations are racy.
>
> Signed-off-by: Oleg Nesterov <[email protected]>
> ---
> kernel/trace/trace_kprobe.c | 33 ++++++++++-----------------------
> 1 files changed, 10 insertions(+), 23 deletions(-)
>
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index c0af476..5a73de0 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -183,16 +183,15 @@ static struct trace_probe *find_trace_probe(const char *event,
> return NULL;
> }
>
> +/*
> + * This and enable_trace_probe/disable_trace_probe rely on event_mutex
> + * held by the caller, __ftrace_set_clr_event().
> + */
> static int trace_probe_nr_files(struct trace_probe *tp)
> {
> - struct ftrace_event_file **file;
> + struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
> int ret = 0;
>
> - /*
> - * Since all tp->files updater is protected by probe_enable_lock,
> - * we don't need to lock an rcu_read_lock.
> - */
> - file = rcu_dereference_raw(tp->files);
> if (file)
> while (*(file++))
> ret++;
> @@ -200,8 +199,6 @@ static int trace_probe_nr_files(struct trace_probe *tp)
> return ret;
> }
>
> -static DEFINE_MUTEX(probe_enable_lock);
> -
> /*
> * Enable trace_probe
> * if the file is NULL, enable "perf" handler, or enable "trace" handler.
> @@ -211,8 +208,6 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> {
> int ret = 0;
>
> - mutex_lock(&probe_enable_lock);
> -
> if (file) {
> struct ftrace_event_file **new, **old;
> int n = trace_probe_nr_files(tp);
> @@ -223,7 +218,7 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> GFP_KERNEL);
> if (!new) {
> ret = -ENOMEM;
> - goto out_unlock;
> + goto out;
> }
> memcpy(new, old, n * sizeof(struct ftrace_event_file *));
> new[n] = file;
> @@ -247,10 +242,7 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> else
> ret = enable_kprobe(&tp->rp.kp);
> }
> -
> - out_unlock:
> - mutex_unlock(&probe_enable_lock);
> -
> + out:
> return ret;
> }
>
> @@ -283,8 +275,6 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> {
> int ret = 0;
>
> - mutex_lock(&probe_enable_lock);
> -
> if (file) {
> struct ftrace_event_file **new, **old;
> int n = trace_probe_nr_files(tp);
> @@ -293,7 +283,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> old = rcu_dereference_raw(tp->files);
> if (n == 0 || trace_probe_file_index(tp, file) < 0) {
> ret = -EINVAL;
> - goto out_unlock;
> + goto out;
> }
>
> if (n == 1) { /* Remove the last file */
> @@ -304,7 +294,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> GFP_KERNEL);
> if (!new) {
> ret = -ENOMEM;
> - goto out_unlock;
> + goto out;
> }
>
> /* This copy & check loop copies the NULL stopper too */
> @@ -327,10 +317,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> else
> disable_kprobe(&tp->rp.kp);
> }
> -
> - out_unlock:
> - mutex_unlock(&probe_enable_lock);
> -
> + out:
> return ret;
> }
>
>


--
Masami HIRAMATSU
IT Management Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]

Subject: Re: [PATCH 3/3] tracing/kprobes: Turn trace_probe->files into list_head

(2013/06/17 2:21), Oleg Nesterov wrote:
> I think that "ftrace_event_file *trace_probe[]" complicates the
> code for no reason, turn it into list_head to simplify the code.
> enable_trace_probe() no longer needs synchronize_sched().

Looks cleaner :)

>
> This needs the extra sizeof(list_head) memory for every attached
> ftrace_event_file, hopefully not a problem in this case.

I think it's no problem, because the number depends on the instances
and it could not be so much. :)

Thanks!

Acked-by: Masami Hiramatsu <[email protected]>

>
> Signed-off-by: Oleg Nesterov <[email protected]>
> ---
> kernel/trace/trace_kprobe.c | 138 ++++++++++++-------------------------------
> 1 files changed, 37 insertions(+), 101 deletions(-)
>
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index 5a73de0..b95f683 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -35,12 +35,17 @@ struct trace_probe {
> const char *symbol; /* symbol name */
> struct ftrace_event_class class;
> struct ftrace_event_call call;
> - struct ftrace_event_file * __rcu *files;
> + struct list_head files;
> ssize_t size; /* trace entry size */
> unsigned int nr_args;
> struct probe_arg args[];
> };
>
> +struct event_file_link {
> + struct ftrace_event_file *file;
> + struct list_head list;
> +};
> +
> #define SIZEOF_TRACE_PROBE(n) \
> (offsetof(struct trace_probe, args) + \
> (sizeof(struct probe_arg) * (n)))
> @@ -150,6 +155,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
> goto error;
>
> INIT_LIST_HEAD(&tp->list);
> + INIT_LIST_HEAD(&tp->files);
> return tp;
> error:
> kfree(tp->call.name);
> @@ -184,22 +190,6 @@ static struct trace_probe *find_trace_probe(const char *event,
> }
>
> /*
> - * This and enable_trace_probe/disable_trace_probe rely on event_mutex
> - * held by the caller, __ftrace_set_clr_event().
> - */
> -static int trace_probe_nr_files(struct trace_probe *tp)
> -{
> - struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
> - int ret = 0;
> -
> - if (file)
> - while (*(file++))
> - ret++;
> -
> - return ret;
> -}
> -
> -/*
> * Enable trace_probe
> * if the file is NULL, enable "perf" handler, or enable "trace" handler.
> */
> @@ -209,29 +199,18 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> int ret = 0;
>
> if (file) {
> - struct ftrace_event_file **new, **old;
> - int n = trace_probe_nr_files(tp);
> -
> - old = rcu_dereference_raw(tp->files);
> - /* 1 is for new one and 1 is for stopper */
> - new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *),
> - GFP_KERNEL);
> - if (!new) {
> + struct event_file_link *link;
> +
> + link = kmalloc(sizeof(*link), GFP_KERNEL);
> + if (!link) {
> ret = -ENOMEM;
> goto out;
> }
> - memcpy(new, old, n * sizeof(struct ftrace_event_file *));
> - new[n] = file;
> - /* The last one keeps a NULL */
>
> - rcu_assign_pointer(tp->files, new);
> - tp->flags |= TP_FLAG_TRACE;
> + link->file = file;
> + list_add_rcu(&link->list, &tp->files);
>
> - if (old) {
> - /* Make sure the probe is done with old files */
> - synchronize_sched();
> - kfree(old);
> - }
> + tp->flags |= TP_FLAG_TRACE;
> } else
> tp->flags |= TP_FLAG_PROFILE;
>
> @@ -246,24 +225,16 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> return ret;
> }
>
> -static int
> -trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file)
> +static struct event_file_link *
> +find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file)
> {
> - struct ftrace_event_file **files;
> - int i;
> + struct event_file_link *link;
>
> - /*
> - * Since all tp->files updater is protected by probe_enable_lock,
> - * we don't need to lock an rcu_read_lock.
> - */
> - files = rcu_dereference_raw(tp->files);
> - if (files) {
> - for (i = 0; files[i]; i++)
> - if (files[i] == file)
> - return i;
> - }
> + list_for_each_entry(link, &tp->files, list)
> + if (link->file == file)
> + return link;
>
> - return -1;
> + return NULL;
> }
>
> /*
> @@ -276,38 +247,23 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> int ret = 0;
>
> if (file) {
> - struct ftrace_event_file **new, **old;
> - int n = trace_probe_nr_files(tp);
> - int i, j;
> + struct event_file_link *link;
>
> - old = rcu_dereference_raw(tp->files);
> - if (n == 0 || trace_probe_file_index(tp, file) < 0) {
> + link = find_event_file_link(tp, file);
> + if (!link) {
> ret = -EINVAL;
> goto out;
> }
>
> - if (n == 1) { /* Remove the last file */
> - tp->flags &= ~TP_FLAG_TRACE;
> - new = NULL;
> - } else {
> - new = kzalloc(n * sizeof(struct ftrace_event_file *),
> - GFP_KERNEL);
> - if (!new) {
> - ret = -ENOMEM;
> - goto out;
> - }
> -
> - /* This copy & check loop copies the NULL stopper too */
> - for (i = 0, j = 0; j < n && i < n + 1; i++)
> - if (old[i] != file)
> - new[j++] = old[i];
> - }
> + list_del_rcu(&link->list);
> + /* synchronize with kprobe_trace_func/kretprobe_trace_func */
> + synchronize_sched();
> + kfree(link);
>
> - rcu_assign_pointer(tp->files, new);
> + if (!list_empty(&tp->files))
> + goto out;
>
> - /* Make sure the probe is done with old files */
> - synchronize_sched();
> - kfree(old);
> + tp->flags &= ~TP_FLAG_TRACE;
> } else
> tp->flags &= ~TP_FLAG_PROFILE;
>
> @@ -872,20 +828,10 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs,
> static __kprobes void
> kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs)
> {
> - /*
> - * Note: preempt is already disabled around the kprobe handler.
> - * However, we still need an smp_read_barrier_depends() corresponding
> - * to smp_wmb() in rcu_assign_pointer() to access the pointer.
> - */
> - struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
> -
> - if (unlikely(!file))
> - return;
> + struct event_file_link *link;
>
> - while (*file) {
> - __kprobe_trace_func(tp, regs, *file);
> - file++;
> - }
> + list_for_each_entry_rcu(link, &tp->files, list)
> + __kprobe_trace_func(tp, regs, link->file);
> }
>
> /* Kretprobe handler */
> @@ -932,20 +878,10 @@ static __kprobes void
> kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,
> struct pt_regs *regs)
> {
> - /*
> - * Note: preempt is already disabled around the kprobe handler.
> - * However, we still need an smp_read_barrier_depends() corresponding
> - * to smp_wmb() in rcu_assign_pointer() to access the pointer.
> - */
> - struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
> -
> - if (unlikely(!file))
> - return;
> + struct event_file_link *link;
>
> - while (*file) {
> - __kretprobe_trace_func(tp, ri, regs, *file);
> - file++;
> - }
> + list_for_each_entry_rcu(link, &tp->files, list)
> + __kretprobe_trace_func(tp, ri, regs, link->file);
> }
>
> /* Event entry printers */
>


--
Masami HIRAMATSU
IT Management Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]

2013-06-17 13:52:35

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH 1/3] tracing/kprobes: Avoid perf_trace_buf_*() if ->perf_events is empty

On 06/17, zhangwei(Jovi) wrote:
>
> On 2013/6/17 1:21, Oleg Nesterov wrote:
> > perf_trace_buf_prepare() + perf_trace_buf_submit() make no sense
> > if this task/CPU has no active counters. Change kprobe_perf_func()
> > and kretprobe_perf_func() to check call->perf_events beforehand
> > and return if this list is empty.
> >
> > For example, "perf record -e some_probe -p1". Only /sbin/init will
> > report, all other threads which hit the same probe will do
> > perf_trace_buf_prepare/perf_trace_buf_submit just to realize that
> > nobody wants perf_swevent_event().
> >
> > Signed-off-by: Oleg Nesterov <[email protected]>
>
> Good point, I think we also need to change other places in below patch.
>
> After applied the patch, perf_tp_event() function call reduced a lots
> when using task based perf tracing.

Yes, I was going to do this, but this is not that simple.

> --- a/include/trace/ftrace.h
> +++ b/include/trace/ftrace.h
> @@ -659,6 +659,10 @@ perf_trace_##call(void *__data, proto) \
> int __data_size; \
> int rctx; \
> \
> + head = this_cpu_ptr(event_call->perf_events); \
> + if (hlist_empty(head)) \
> + return; \
> + \

This is not right. Please note __perf_task() and
"if (task && task != current)" in perf_tp_event().

Oleg.

2013-06-17 15:23:14

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH 2/3] tracing/kprobes: Kill probe_enable_lock

On 06/17, Masami Hiramatsu wrote:
>
> (2013/06/17 2:21), Oleg Nesterov wrote:
> > enable_trace_probe() and disable_trace_probe() should not worry about
> > serialization, the caller (perf_trace_init or __ftrace_set_clr_event)
> > holds event_mutex.
> >
> > They are also called by kprobe_trace_self_tests_init(), but this __init
> > function can't race with itself or trace_events.c
>
> Right,
> For safety, we should comment this at the caller side,

Which caller do you mean?

The patch adds

/*
* This and enable_trace_probe/disable_trace_probe rely on event_mutex
* held by the caller, __ftrace_set_clr_event().
*/

above trace_probe_nr_files() but the next patch removes this function
with the comment...

Will you agree with this patch if I add something like

/*
* called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex
*/

above kprobe_register() ? Perhaps it makes sense to add
lockdep_assert_held(&event_mutex) into the body?

And:

> because
> those calls are the reason why I have introduced this lock.

Please do not hesitate to nack this patch if you think that we should
keep probe_enable_lock for safety even if it is not currently needed.
In this case I'd suggest to move lock/unlock into kprobe_register()
but this is minor.

Oleg.

2013-06-17 15:35:09

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH 3/3] tracing/kprobes: Turn trace_probe->files into list_head

On 06/17, Masami Hiramatsu wrote:
>
> (2013/06/17 2:21), Oleg Nesterov wrote:
> >
> > This needs the extra sizeof(list_head) memory for every attached
> > ftrace_event_file, hopefully not a problem in this case.
>
> I think it's no problem, because the number depends on the instances
> and it could not be so much. :)

Yes, agreed.

> Acked-by: Masami Hiramatsu <[email protected]>

Thanks!

Given that 2/3 should update the comments (or should be dropped
if you do not like it), this ones should be rediffed. And, I just
noticed typo in this patch,

> > @@ -209,29 +199,18 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> > ...
> > + link->file = file;
> > + list_add_rcu(&link->list, &tp->files);

I meant list_add_tail_rcu().

I guess this doesn't matter at all, still I'd like to avoid any
visible changes in behaviour.

Otherwise we could use hlist or even single list, although this
doesn't really matter too.

I'll preserve your ack.

Oleg.

Subject: Re: [PATCH 2/3] tracing/kprobes: Kill probe_enable_lock

(2013/06/18 0:18), Oleg Nesterov wrote:
> On 06/17, Masami Hiramatsu wrote:
>>
>> (2013/06/17 2:21), Oleg Nesterov wrote:
>>> enable_trace_probe() and disable_trace_probe() should not worry about
>>> serialization, the caller (perf_trace_init or __ftrace_set_clr_event)
>>> holds event_mutex.
>>>
>>> They are also called by kprobe_trace_self_tests_init(), but this __init
>>> function can't race with itself or trace_events.c
>>
>> Right,
>> For safety, we should comment this at the caller side,
>
> Which caller do you mean?

I meant the caller was kprobe_test_self_tests_init().
Since that function calls enable/disable_trace_probe()
without holding event_mutex, we need to notice that
(this is safe because there is no race) at the calling
places :)

Thank you,

>
> The patch adds
>
> /*
> * This and enable_trace_probe/disable_trace_probe rely on event_mutex
> * held by the caller, __ftrace_set_clr_event().
> */
>
> above trace_probe_nr_files() but the next patch removes this function
> with the comment...
>
> Will you agree with this patch if I add something like
>
> /*
> * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex
> */
>
> above kprobe_register() ? Perhaps it makes sense to add
> lockdep_assert_held(&event_mutex) into the body?
>
> And:
>
>> because
>> those calls are the reason why I have introduced this lock.
>
> Please do not hesitate to nack this patch if you think that we should
> keep probe_enable_lock for safety even if it is not currently needed.
> In this case I'd suggest to move lock/unlock into kprobe_register()
> but this is minor.
>
> Oleg.
>
>


--
Masami HIRAMATSU
IT Management Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]

Subject: Re: Re: [PATCH 2/3] tracing/kprobes: Kill probe_enable_lock

(2013/06/18 0:18), Oleg Nesterov wrote:
>> because
>> those calls are the reason why I have introduced this lock.
>
> Please do not hesitate to nack this patch if you think that we should
> keep probe_enable_lock for safety even if it is not currently needed.
> In this case I'd suggest to move lock/unlock into kprobe_register()
> but this is minor.

Oh, I agree with removing probe_enable_lock itself :)
I just concerned only about the exceptional case of __init test
function, which can mislead someone to use enable/disable_trace_probe
at other racy point.

Thank you,

--
Masami HIRAMATSU
IT Management Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]

2013-06-18 19:29:04

by Oleg Nesterov

[permalink] [raw]
Subject: Re: Re: [PATCH 2/3] tracing/kprobes: Kill probe_enable_lock

On 06/18, Masami Hiramatsu wrote:
>
> Oh, I agree with removing probe_enable_lock itself :)
> I just concerned only about the exceptional case of __init test
> function, which can mislead someone to use enable/disable_trace_probe
> at other racy point.

Ah, understand.

OK, I'll send v2 with the updated comments plus the additional patch
tomorrow.

Thanks!

Oleg.

2013-06-19 20:34:34

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH v2 2/3] tracing/kprobes: Kill probe_enable_lock

On 06/18, Oleg Nesterov wrote:
>
> On 06/18, Masami Hiramatsu wrote:
> >
> > Oh, I agree with removing probe_enable_lock itself :)
> > I just concerned only about the exceptional case of __init test
> > function, which can mislead someone to use enable/disable_trace_probe
> > at other racy point.
>
> Ah, understand.
>
> OK, I'll send v2 with the updated comments plus the additional patch
> tomorrow.

So. I'll resend this series, will you ack the v2 below?

I only added a couple of comments, the interdiff is

@@ -1202,6 +1202,12 @@ kretprobe_perf_func(struct trace_probe *
}
#endif /* CONFIG_PERF_EVENTS */

+/*
+ * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
+ *
+ * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
+ * lockless, but we can't race with this __init function.
+ */
static __kprobes
int kprobe_register(struct ftrace_event_call *event,
enum trace_reg type, void *data)
@@ -1367,6 +1373,10 @@ find_trace_probe_file(struct trace_probe
return NULL;
}

+/*
+ * Nobody but us can call enable_trace_probe/disable_trace_probe at this
+ * stage, we can do this lockless.
+ */
static __init int kprobe_trace_self_tests_init(void)
{
int ret, warn = 0;

3/3 was updated too, but the only change is s/list_add_rcu/list_add_tail_rcu/,
I won't spam the list but preserve your ack unless you object.

Oleg.

-------------------------------------------------------------------------------
Subject: [PATCH v2] tracing/kprobes: Kill probe_enable_lock

enable_trace_probe() and disable_trace_probe() should not worry about
serialization, the caller (perf_trace_init or __ftrace_set_clr_event)
holds event_mutex.

They are also called by kprobe_trace_self_tests_init(), but this __init
function can't race with itself or trace_events.c

And note that this code depended on event_mutex even before 41a7dd420c
which introduced probe_enable_lock. In fact it assumes that the caller
kprobe_register() can never race with itself. Otherwise, say, tp->flags
manipulations are racy.

Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/trace/trace_kprobe.c | 43 ++++++++++++++++++++-----------------------
1 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c0af476..3432652 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -183,16 +183,15 @@ static struct trace_probe *find_trace_probe(const char *event,
return NULL;
}

+/*
+ * This and enable_trace_probe/disable_trace_probe rely on event_mutex
+ * held by the caller, __ftrace_set_clr_event().
+ */
static int trace_probe_nr_files(struct trace_probe *tp)
{
- struct ftrace_event_file **file;
+ struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
int ret = 0;

- /*
- * Since all tp->files updater is protected by probe_enable_lock,
- * we don't need to lock an rcu_read_lock.
- */
- file = rcu_dereference_raw(tp->files);
if (file)
while (*(file++))
ret++;
@@ -200,8 +199,6 @@ static int trace_probe_nr_files(struct trace_probe *tp)
return ret;
}

-static DEFINE_MUTEX(probe_enable_lock);
-
/*
* Enable trace_probe
* if the file is NULL, enable "perf" handler, or enable "trace" handler.
@@ -211,8 +208,6 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
{
int ret = 0;

- mutex_lock(&probe_enable_lock);
-
if (file) {
struct ftrace_event_file **new, **old;
int n = trace_probe_nr_files(tp);
@@ -223,7 +218,7 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
GFP_KERNEL);
if (!new) {
ret = -ENOMEM;
- goto out_unlock;
+ goto out;
}
memcpy(new, old, n * sizeof(struct ftrace_event_file *));
new[n] = file;
@@ -247,10 +242,7 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
else
ret = enable_kprobe(&tp->rp.kp);
}
-
- out_unlock:
- mutex_unlock(&probe_enable_lock);
-
+ out:
return ret;
}

@@ -283,8 +275,6 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
{
int ret = 0;

- mutex_lock(&probe_enable_lock);
-
if (file) {
struct ftrace_event_file **new, **old;
int n = trace_probe_nr_files(tp);
@@ -293,7 +283,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
old = rcu_dereference_raw(tp->files);
if (n == 0 || trace_probe_file_index(tp, file) < 0) {
ret = -EINVAL;
- goto out_unlock;
+ goto out;
}

if (n == 1) { /* Remove the last file */
@@ -304,7 +294,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
GFP_KERNEL);
if (!new) {
ret = -ENOMEM;
- goto out_unlock;
+ goto out;
}

/* This copy & check loop copies the NULL stopper too */
@@ -327,10 +317,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
else
disable_kprobe(&tp->rp.kp);
}
-
- out_unlock:
- mutex_unlock(&probe_enable_lock);
-
+ out:
return ret;
}

@@ -1215,6 +1202,12 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
}
#endif /* CONFIG_PERF_EVENTS */

+/*
+ * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
+ *
+ * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
+ * lockless, but we can't race with this __init function.
+ */
static __kprobes
int kprobe_register(struct ftrace_event_call *event,
enum trace_reg type, void *data)
@@ -1380,6 +1373,10 @@ find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr)
return NULL;
}

+/*
+ * Nobody but us can call enable_trace_probe/disable_trace_probe at this
+ * stage, we can do this lockless.
+ */
static __init int kprobe_trace_self_tests_init(void)
{
int ret, warn = 0;
--
1.5.5.1

2013-06-19 20:38:55

by Oleg Nesterov

[permalink] [raw]
Subject: [PATCH] tracing/kprobes: Don't pass addr=ip to perf_trace_buf_submit()

On 06/19, Oleg Nesterov wrote:
>
> So. I'll resend this series, will you ack the v2 below?

Also. Could you please review the new patch I am going to include
into this series?

This is copy-and-paste of 32520b2c6 which you reviewed and acked.

And please note that kprobes is the last user of
perf_trace_buf_submit(addr != 0), we can make some cleanups on top.

-------------------------------------------------------------------------------
Subject: [PATCH] tracing/kprobes: Don't pass addr=ip to perf_trace_buf_submit()

kprobe_perf_func() and kretprobe_perf_func() pass addr=ip to
perf_trace_buf_submit() for no reason.

This sets perf_sample_data->addr for PERF_SAMPLE_ADDR, we already
have perf_sample_data->ip initialized if PERF_SAMPLE_IP.

Signed-off-by: Oleg Nesterov <[email protected]>
---
kernel/trace/trace_kprobe.c | 6 ++----
1 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 3432652..141c682 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1163,8 +1163,7 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
entry->ip = (unsigned long)tp->rp.kp.addr;
memset(&entry[1], 0, dsize);
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
- perf_trace_buf_submit(entry, size, rctx,
- entry->ip, 1, regs, head, NULL);
+ perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
}

/* Kretprobe profile handler */
@@ -1197,8 +1196,7 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
entry->func = (unsigned long)tp->rp.kp.addr;
entry->ret_ip = (unsigned long)ri->ret_addr;
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
- perf_trace_buf_submit(entry, size, rctx,
- entry->ret_ip, 1, regs, head, NULL);
+ perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
}
#endif /* CONFIG_PERF_EVENTS */

--
1.5.5.1

Subject: Re: [PATCH v2 2/3] tracing/kprobes: Kill probe_enable_lock

(2013/06/20 5:30), Oleg Nesterov wrote:
> On 06/18, Oleg Nesterov wrote:
>>
>> On 06/18, Masami Hiramatsu wrote:
>>>
>>> Oh, I agree with removing probe_enable_lock itself :)
>>> I just concerned only about the exceptional case of __init test
>>> function, which can mislead someone to use enable/disable_trace_probe
>>> at other racy point.
>>
>> Ah, understand.
>>
>> OK, I'll send v2 with the updated comments plus the additional patch
>> tomorrow.
>
> So. I'll resend this series, will you ack the v2 below?
>
> I only added a couple of comments, the interdiff is
>
> @@ -1202,6 +1202,12 @@ kretprobe_perf_func(struct trace_probe *
> }
> #endif /* CONFIG_PERF_EVENTS */
>
> +/*
> + * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
> + *
> + * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
> + * lockless, but we can't race with this __init function.
> + */
> static __kprobes
> int kprobe_register(struct ftrace_event_call *event,
> enum trace_reg type, void *data)
> @@ -1367,6 +1373,10 @@ find_trace_probe_file(struct trace_probe
> return NULL;
> }
>
> +/*
> + * Nobody but us can call enable_trace_probe/disable_trace_probe at this
> + * stage, we can do this lockless.
> + */
> static __init int kprobe_trace_self_tests_init(void)
> {
> int ret, warn = 0;

Looks good for me :)

>
> 3/3 was updated too, but the only change is s/list_add_rcu/list_add_tail_rcu/,
> I won't spam the list but preserve your ack unless you object.

OK, I think it's a minor change.

> -------------------------------------------------------------------------------
> Subject: [PATCH v2] tracing/kprobes: Kill probe_enable_lock
>
> enable_trace_probe() and disable_trace_probe() should not worry about
> serialization, the caller (perf_trace_init or __ftrace_set_clr_event)
> holds event_mutex.
>
> They are also called by kprobe_trace_self_tests_init(), but this __init
> function can't race with itself or trace_events.c
>
> And note that this code depended on event_mutex even before 41a7dd420c
> which introduced probe_enable_lock. In fact it assumes that the caller
> kprobe_register() can never race with itself. Otherwise, say, tp->flags
> manipulations are racy.
>

Acked-by: Masami Hiramatsu <[email protected]>

Thank you!

> Signed-off-by: Oleg Nesterov <[email protected]>
> ---
> kernel/trace/trace_kprobe.c | 43 ++++++++++++++++++++-----------------------
> 1 files changed, 20 insertions(+), 23 deletions(-)
>
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index c0af476..3432652 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -183,16 +183,15 @@ static struct trace_probe *find_trace_probe(const char *event,
> return NULL;
> }
>
> +/*
> + * This and enable_trace_probe/disable_trace_probe rely on event_mutex
> + * held by the caller, __ftrace_set_clr_event().
> + */
> static int trace_probe_nr_files(struct trace_probe *tp)
> {
> - struct ftrace_event_file **file;
> + struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
> int ret = 0;
>
> - /*
> - * Since all tp->files updater is protected by probe_enable_lock,
> - * we don't need to lock an rcu_read_lock.
> - */
> - file = rcu_dereference_raw(tp->files);
> if (file)
> while (*(file++))
> ret++;
> @@ -200,8 +199,6 @@ static int trace_probe_nr_files(struct trace_probe *tp)
> return ret;
> }
>
> -static DEFINE_MUTEX(probe_enable_lock);
> -
> /*
> * Enable trace_probe
> * if the file is NULL, enable "perf" handler, or enable "trace" handler.
> @@ -211,8 +208,6 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> {
> int ret = 0;
>
> - mutex_lock(&probe_enable_lock);
> -
> if (file) {
> struct ftrace_event_file **new, **old;
> int n = trace_probe_nr_files(tp);
> @@ -223,7 +218,7 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> GFP_KERNEL);
> if (!new) {
> ret = -ENOMEM;
> - goto out_unlock;
> + goto out;
> }
> memcpy(new, old, n * sizeof(struct ftrace_event_file *));
> new[n] = file;
> @@ -247,10 +242,7 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> else
> ret = enable_kprobe(&tp->rp.kp);
> }
> -
> - out_unlock:
> - mutex_unlock(&probe_enable_lock);
> -
> + out:
> return ret;
> }
>
> @@ -283,8 +275,6 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> {
> int ret = 0;
>
> - mutex_lock(&probe_enable_lock);
> -
> if (file) {
> struct ftrace_event_file **new, **old;
> int n = trace_probe_nr_files(tp);
> @@ -293,7 +283,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> old = rcu_dereference_raw(tp->files);
> if (n == 0 || trace_probe_file_index(tp, file) < 0) {
> ret = -EINVAL;
> - goto out_unlock;
> + goto out;
> }
>
> if (n == 1) { /* Remove the last file */
> @@ -304,7 +294,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> GFP_KERNEL);
> if (!new) {
> ret = -ENOMEM;
> - goto out_unlock;
> + goto out;
> }
>
> /* This copy & check loop copies the NULL stopper too */
> @@ -327,10 +317,7 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
> else
> disable_kprobe(&tp->rp.kp);
> }
> -
> - out_unlock:
> - mutex_unlock(&probe_enable_lock);
> -
> + out:
> return ret;
> }
>
> @@ -1215,6 +1202,12 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
> }
> #endif /* CONFIG_PERF_EVENTS */
>
> +/*
> + * called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
> + *
> + * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
> + * lockless, but we can't race with this __init function.
> + */
> static __kprobes
> int kprobe_register(struct ftrace_event_call *event,
> enum trace_reg type, void *data)
> @@ -1380,6 +1373,10 @@ find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr)
> return NULL;
> }
>
> +/*
> + * Nobody but us can call enable_trace_probe/disable_trace_probe at this
> + * stage, we can do this lockless.
> + */
> static __init int kprobe_trace_self_tests_init(void)
> {
> int ret, warn = 0;
>


--
Masami HIRAMATSU
IT Management Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]

Subject: Re: [PATCH] tracing/kprobes: Don't pass addr=ip to perf_trace_buf_submit()

(2013/06/20 5:34), Oleg Nesterov wrote:
> On 06/19, Oleg Nesterov wrote:
>>
>> So. I'll resend this series, will you ack the v2 below?
>
> Also. Could you please review the new patch I am going to include
> into this series?

It looks OK for me. BTW, IIRC, I had reviewed same one previously...

>
> This is copy-and-paste of 32520b2c6 which you reviewed and acked.
>
> And please note that kprobes is the last user of
> perf_trace_buf_submit(addr != 0), we can make some cleanups on top.

Agreed,

>
> -------------------------------------------------------------------------------
> Subject: [PATCH] tracing/kprobes: Don't pass addr=ip to perf_trace_buf_submit()
>
> kprobe_perf_func() and kretprobe_perf_func() pass addr=ip to
> perf_trace_buf_submit() for no reason.
>
> This sets perf_sample_data->addr for PERF_SAMPLE_ADDR, we already
> have perf_sample_data->ip initialized if PERF_SAMPLE_IP.
>

Acked-by: Masami Hiramatsu <[email protected]>

Thank you!

> Signed-off-by: Oleg Nesterov <[email protected]>
> ---
> kernel/trace/trace_kprobe.c | 6 ++----
> 1 files changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index 3432652..141c682 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -1163,8 +1163,7 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
> entry->ip = (unsigned long)tp->rp.kp.addr;
> memset(&entry[1], 0, dsize);
> store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
> - perf_trace_buf_submit(entry, size, rctx,
> - entry->ip, 1, regs, head, NULL);
> + perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
> }
>
> /* Kretprobe profile handler */
> @@ -1197,8 +1196,7 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
> entry->func = (unsigned long)tp->rp.kp.addr;
> entry->ret_ip = (unsigned long)ri->ret_addr;
> store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
> - perf_trace_buf_submit(entry, size, rctx,
> - entry->ret_ip, 1, regs, head, NULL);
> + perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
> }
> #endif /* CONFIG_PERF_EVENTS */
>
>


--
Masami HIRAMATSU
IT Management Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]