2008-07-09 15:02:17

by Mathieu Desnoyers

[permalink] [raw]
Subject: [patch 05/15] LTTng instrumentation - scheduler

Instrument the scheduler activity (sched_switch, migration, wakeups, wait for a
task, signal delivery) and process/thread creation/destruction (fork, exit,
kthread stop). Actually, kthread creation is not instrumented in this patch
because it is architecture dependent. It allows to connect tracers such as
ftrace which detects scheduling latencies, good/bad scheduler decisions. Tools
like LTTng can export this scheduler information along with instrumentation of
the rest of the kernel activity to perform post-mortem analysis on the scheduler
activity.

About the performance impact of tracepoints (which is comparable to markers),
even without immediate values optimizations, tests done by Hideo Aoki on ia64
show no regression. His test case was using hackbench on a kernel where
scheduler instrumentation (about 5 events in code scheduler code) was added.
See the "Tracepoints" patch header for performance result detail.

Signed-off-by: Mathieu Desnoyers <[email protected]>
CC: 'Peter Zijlstra' <[email protected]>
CC: 'Steven Rostedt' <[email protected]>
CC: Thomas Gleixner <[email protected]>
CC: Masami Hiramatsu <[email protected]>
CC: "Frank Ch. Eigler" <[email protected]>
CC: 'Ingo Molnar' <[email protected]>
CC: 'Hideo AOKI' <[email protected]>
CC: Takashi Nishiie <[email protected]>
CC: Eduard - Gabriel Munteanu <[email protected]>
---
kernel/exit.c | 6 ++++++
kernel/fork.c | 3 +++
kernel/kthread.c | 5 +++++
kernel/sched-trace.h | 43 +++++++++++++++++++++++++++++++++++++++++++
kernel/sched.c | 4 ++++
kernel/signal.c | 3 +++
6 files changed, 64 insertions(+)

Index: linux-2.6-lttng/kernel/kthread.c
===================================================================
--- linux-2.6-lttng.orig/kernel/kthread.c 2008-07-09 10:55:46.000000000 -0400
+++ linux-2.6-lttng/kernel/kthread.c 2008-07-09 10:57:43.000000000 -0400
@@ -13,6 +13,7 @@
#include <linux/file.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include "sched-trace.h"

#define KTHREAD_NICE_LEVEL (-5)

@@ -187,6 +188,8 @@ int kthread_stop(struct task_struct *k)
/* It could exit after stop_info.k set, but before wake_up_process. */
get_task_struct(k);

+ trace_sched_kthread_stop(k);
+
/* Must init completion *before* thread sees kthread_stop_info.k */
init_completion(&kthread_stop_info.done);
smp_wmb();
@@ -202,6 +205,8 @@ int kthread_stop(struct task_struct *k)
ret = kthread_stop_info.err;
mutex_unlock(&kthread_stop_lock);

+ trace_sched_kthread_stop_ret(ret);
+
return ret;
}
EXPORT_SYMBOL(kthread_stop);
Index: linux-2.6-lttng/kernel/sched.c
===================================================================
--- linux-2.6-lttng.orig/kernel/sched.c 2008-07-09 10:55:46.000000000 -0400
+++ linux-2.6-lttng/kernel/sched.c 2008-07-09 10:57:43.000000000 -0400
@@ -1987,6 +1987,7 @@ void wait_task_inactive(struct task_stru
* just go back and repeat.
*/
rq = task_rq_lock(p, &flags);
+ trace_sched_wait_task(p);
running = task_running(rq, p);
on_rq = p->se.on_rq;
task_rq_unlock(rq, &flags);
@@ -2275,6 +2276,7 @@ static int try_to_wake_up(struct task_st

smp_wmb();
rq = task_rq_lock(p, &flags);
+ trace_sched_try_wakeup(p);
old_state = p->state;
if (!(old_state & state))
goto out;
@@ -2457,6 +2459,7 @@ void wake_up_new_task(struct task_struct
struct rq *rq;

rq = task_rq_lock(p, &flags);
+ trace_sched_wakeup_new_task(p);
BUG_ON(p->state != TASK_RUNNING);
update_rq_clock(rq);

@@ -2884,6 +2887,7 @@ static void sched_migrate_task(struct ta
|| unlikely(cpu_is_offline(dest_cpu)))
goto out;

+ trace_sched_migrate_task(p, dest_cpu);
/* force the process onto the specified CPU */
if (migrate_task(p, dest_cpu, &req)) {
/* Need to wait for migration thread (might exit: take ref). */
Index: linux-2.6-lttng/kernel/exit.c
===================================================================
--- linux-2.6-lttng.orig/kernel/exit.c 2008-07-09 10:55:46.000000000 -0400
+++ linux-2.6-lttng/kernel/exit.c 2008-07-09 10:57:43.000000000 -0400
@@ -46,6 +46,7 @@
#include <linux/resource.h>
#include <linux/blkdev.h>
#include <linux/task_io_accounting_ops.h>
+#include "sched-trace.h"

#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -149,6 +150,7 @@ static void __exit_signal(struct task_st

static void delayed_put_task_struct(struct rcu_head *rhp)
{
+ trace_sched_process_free(container_of(rhp, struct task_struct, rcu));
put_task_struct(container_of(rhp, struct task_struct, rcu));
}

@@ -1040,6 +1042,8 @@ NORET_TYPE void do_exit(long code)

if (group_dead)
acct_process();
+ trace_sched_process_exit(tsk);
+
exit_sem(tsk);
exit_files(tsk);
exit_fs(tsk);
@@ -1524,6 +1528,8 @@ static long do_wait(enum pid_type type,
struct task_struct *tsk;
int flag, retval;

+ trace_sched_process_wait(pid);
+
add_wait_queue(&current->signal->wait_chldexit,&wait);
repeat:
/* If there is nothing that can match our critier just get out */
Index: linux-2.6-lttng/kernel/fork.c
===================================================================
--- linux-2.6-lttng.orig/kernel/fork.c 2008-07-09 10:55:46.000000000 -0400
+++ linux-2.6-lttng/kernel/fork.c 2008-07-09 10:58:05.000000000 -0400
@@ -56,6 +56,7 @@
#include <linux/proc_fs.h>
#include <linux/blkdev.h>
#include <linux/magic.h>
+#include "sched-trace.h"

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -1362,6 +1363,8 @@ long do_fork(unsigned long clone_flags,
if (!IS_ERR(p)) {
struct completion vfork;

+ trace_sched_process_fork(current, p);
+
nr = task_pid_vnr(p);

if (clone_flags & CLONE_PARENT_SETTID)
Index: linux-2.6-lttng/kernel/signal.c
===================================================================
--- linux-2.6-lttng.orig/kernel/signal.c 2008-07-09 10:46:33.000000000 -0400
+++ linux-2.6-lttng/kernel/signal.c 2008-07-09 10:57:43.000000000 -0400
@@ -26,6 +26,7 @@
#include <linux/freezer.h>
#include <linux/pid_namespace.h>
#include <linux/nsproxy.h>
+#include "sched-trace.h"

#include <asm/param.h>
#include <asm/uaccess.h>
@@ -807,6 +808,8 @@ static int send_signal(int sig, struct s
struct sigpending *pending;
struct sigqueue *q;

+ trace_sched_signal_send(sig, t);
+
assert_spin_locked(&t->sighand->siglock);
if (!prepare_signal(sig, t))
return 0;
Index: linux-2.6-lttng/kernel/sched-trace.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/kernel/sched-trace.h 2008-07-09 10:57:43.000000000 -0400
@@ -0,0 +1,43 @@
+#ifndef _SCHED_TRACE_H
+#define _SCHED_TRACE_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(sched_kthread_stop,
+ TPPROTO(struct task_struct *t),
+ TPARGS(t));
+DEFINE_TRACE(sched_kthread_stop_ret,
+ TPPROTO(int ret),
+ TPARGS(ret));
+DEFINE_TRACE(sched_wait_task,
+ TPPROTO(struct task_struct *p),
+ TPARGS(p));
+DEFINE_TRACE(sched_try_wakeup,
+ TPPROTO(struct task_struct *p),
+ TPARGS(p));
+DEFINE_TRACE(sched_wakeup_new_task,
+ TPPROTO(struct task_struct *p),
+ TPARGS(p));
+DEFINE_TRACE(sched_switch,
+ TPPROTO(struct task_struct *prev, struct task_struct *next),
+ TPARGS(prev, next));
+DEFINE_TRACE(sched_migrate_task,
+ TPPROTO(struct task_struct *p, int dest_cpu),
+ TPARGS(p, dest_cpu));
+DEFINE_TRACE(sched_process_free,
+ TPPROTO(struct task_struct *p),
+ TPARGS(p));
+DEFINE_TRACE(sched_process_exit,
+ TPPROTO(struct task_struct *p),
+ TPARGS(p));
+DEFINE_TRACE(sched_process_wait,
+ TPPROTO(struct pid *pid),
+ TPARGS(pid));
+DEFINE_TRACE(sched_process_fork,
+ TPPROTO(struct task_struct *parent, struct task_struct *child),
+ TPARGS(parent, child));
+DEFINE_TRACE(sched_signal_send,
+ TPPROTO(int sig, struct task_struct *p),
+ TPARGS(sig, p));
+
+#endif

--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68


2008-07-09 15:34:50

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [patch 05/15] LTTng instrumentation - scheduler (repost)

There were 2 rejects when I ported the patch to linux-next. Sorry. Here
is a repost.


Instrument the scheduler activity (sched_switch, migration, wakeups, wait for a
task, signal delivery) and process/thread creation/destruction (fork, exit,
kthread stop). Actually, kthread creation is not instrumented in this patch
because it is architecture dependent. It allows to connect tracers such as
ftrace which detects scheduling latencies, good/bad scheduler decisions. Tools
like LTTng can export this scheduler information along with instrumentation of
the rest of the kernel activity to perform post-mortem analysis on the scheduler
activity.

About the performance impact of tracepoints (which is comparable to markers),
even without immediate values optimizations, tests done by Hideo Aoki on ia64
show no regression. His test case was using hackbench on a kernel where
scheduler instrumentation (about 5 events in code scheduler code) was added.
See the "Tracepoints" patch header for performance result detail.

Signed-off-by: Mathieu Desnoyers <[email protected]>
CC: 'Peter Zijlstra' <[email protected]>
CC: 'Steven Rostedt' <[email protected]>
CC: Thomas Gleixner <[email protected]>
CC: Masami Hiramatsu <[email protected]>
CC: "Frank Ch. Eigler" <[email protected]>
CC: 'Ingo Molnar' <[email protected]>
CC: 'Hideo AOKI' <[email protected]>
CC: Takashi Nishiie <[email protected]>
CC: Eduard - Gabriel Munteanu <[email protected]>
---
kernel/exit.c | 6 ++++++
kernel/fork.c | 3 +++
kernel/kthread.c | 5 +++++
kernel/sched-trace.h | 43 +++++++++++++++++++++++++++++++++++++++++++
kernel/sched.c | 11 ++++++-----
kernel/signal.c | 3 +++
6 files changed, 66 insertions(+), 5 deletions(-)

Index: linux-2.6-lttng/kernel/kthread.c
===================================================================
--- linux-2.6-lttng.orig/kernel/kthread.c 2008-07-09 11:27:01.000000000 -0400
+++ linux-2.6-lttng/kernel/kthread.c 2008-07-09 11:27:08.000000000 -0400
@@ -13,6 +13,7 @@
#include <linux/file.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include "sched-trace.h"

#define KTHREAD_NICE_LEVEL (-5)

@@ -187,6 +188,8 @@ int kthread_stop(struct task_struct *k)
/* It could exit after stop_info.k set, but before wake_up_process. */
get_task_struct(k);

+ trace_sched_kthread_stop(k);
+
/* Must init completion *before* thread sees kthread_stop_info.k */
init_completion(&kthread_stop_info.done);
smp_wmb();
@@ -202,6 +205,8 @@ int kthread_stop(struct task_struct *k)
ret = kthread_stop_info.err;
mutex_unlock(&kthread_stop_lock);

+ trace_sched_kthread_stop_ret(ret);
+
return ret;
}
EXPORT_SYMBOL(kthread_stop);
Index: linux-2.6-lttng/kernel/sched.c
===================================================================
--- linux-2.6-lttng.orig/kernel/sched.c 2008-07-09 11:27:01.000000000 -0400
+++ linux-2.6-lttng/kernel/sched.c 2008-07-09 11:27:56.000000000 -0400
@@ -71,6 +71,7 @@
#include <linux/debugfs.h>
#include <linux/ctype.h>
#include <linux/ftrace.h>
+#include "sched-trace.h"

#include <asm/tlb.h>
#include <asm/irq_regs.h>
@@ -1987,6 +1988,7 @@ void wait_task_inactive(struct task_stru
* just go back and repeat.
*/
rq = task_rq_lock(p, &flags);
+ trace_sched_wait_task(p);
running = task_running(rq, p);
on_rq = p->se.on_rq;
task_rq_unlock(rq, &flags);
@@ -2275,6 +2277,7 @@ static int try_to_wake_up(struct task_st

smp_wmb();
rq = task_rq_lock(p, &flags);
+ trace_sched_try_wakeup(p);
old_state = p->state;
if (!(old_state & state))
goto out;
@@ -2457,6 +2460,7 @@ void wake_up_new_task(struct task_struct
struct rq *rq;

rq = task_rq_lock(p, &flags);
+ trace_sched_wakeup_new_task(p);
BUG_ON(p->state != TASK_RUNNING);
update_rq_clock(rq);

@@ -2647,11 +2651,7 @@ context_switch(struct rq *rq, struct tas
struct mm_struct *mm, *oldmm;

prepare_task_switch(rq, prev, next);
- trace_mark(kernel_sched_schedule,
- "prev_pid %d next_pid %d prev_state %ld "
- "## rq %p prev %p next %p",
- prev->pid, next->pid, prev->state,
- rq, prev, next);
+ trace_sched_switch(prev, next);
mm = next->mm;
oldmm = prev->active_mm;
/*
@@ -2884,6 +2884,7 @@ static void sched_migrate_task(struct ta
|| unlikely(cpu_is_offline(dest_cpu)))
goto out;

+ trace_sched_migrate_task(p, dest_cpu);
/* force the process onto the specified CPU */
if (migrate_task(p, dest_cpu, &req)) {
/* Need to wait for migration thread (might exit: take ref). */
Index: linux-2.6-lttng/kernel/exit.c
===================================================================
--- linux-2.6-lttng.orig/kernel/exit.c 2008-07-09 11:27:01.000000000 -0400
+++ linux-2.6-lttng/kernel/exit.c 2008-07-09 11:27:08.000000000 -0400
@@ -46,6 +46,7 @@
#include <linux/resource.h>
#include <linux/blkdev.h>
#include <linux/task_io_accounting_ops.h>
+#include "sched-trace.h"

#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -149,6 +150,7 @@ static void __exit_signal(struct task_st

static void delayed_put_task_struct(struct rcu_head *rhp)
{
+ trace_sched_process_free(container_of(rhp, struct task_struct, rcu));
put_task_struct(container_of(rhp, struct task_struct, rcu));
}

@@ -1040,6 +1042,8 @@ NORET_TYPE void do_exit(long code)

if (group_dead)
acct_process();
+ trace_sched_process_exit(tsk);
+
exit_sem(tsk);
exit_files(tsk);
exit_fs(tsk);
@@ -1524,6 +1528,8 @@ static long do_wait(enum pid_type type,
struct task_struct *tsk;
int flag, retval;

+ trace_sched_process_wait(pid);
+
add_wait_queue(&current->signal->wait_chldexit,&wait);
repeat:
/* If there is nothing that can match our critier just get out */
Index: linux-2.6-lttng/kernel/fork.c
===================================================================
--- linux-2.6-lttng.orig/kernel/fork.c 2008-07-09 11:27:01.000000000 -0400
+++ linux-2.6-lttng/kernel/fork.c 2008-07-09 11:27:08.000000000 -0400
@@ -56,6 +56,7 @@
#include <linux/proc_fs.h>
#include <linux/blkdev.h>
#include <linux/magic.h>
+#include "sched-trace.h"

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -1362,6 +1363,8 @@ long do_fork(unsigned long clone_flags,
if (!IS_ERR(p)) {
struct completion vfork;

+ trace_sched_process_fork(current, p);
+
nr = task_pid_vnr(p);

if (clone_flags & CLONE_PARENT_SETTID)
Index: linux-2.6-lttng/kernel/signal.c
===================================================================
--- linux-2.6-lttng.orig/kernel/signal.c 2008-07-09 11:25:24.000000000 -0400
+++ linux-2.6-lttng/kernel/signal.c 2008-07-09 11:27:08.000000000 -0400
@@ -26,6 +26,7 @@
#include <linux/freezer.h>
#include <linux/pid_namespace.h>
#include <linux/nsproxy.h>
+#include "sched-trace.h"

#include <asm/param.h>
#include <asm/uaccess.h>
@@ -807,6 +808,8 @@ static int send_signal(int sig, struct s
struct sigpending *pending;
struct sigqueue *q;

+ trace_sched_signal_send(sig, t);
+
assert_spin_locked(&t->sighand->siglock);
if (!prepare_signal(sig, t))
return 0;
Index: linux-2.6-lttng/kernel/sched-trace.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/kernel/sched-trace.h 2008-07-09 11:27:08.000000000 -0400
@@ -0,0 +1,43 @@
+#ifndef _SCHED_TRACE_H
+#define _SCHED_TRACE_H
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(sched_kthread_stop,
+ TPPROTO(struct task_struct *t),
+ TPARGS(t));
+DEFINE_TRACE(sched_kthread_stop_ret,
+ TPPROTO(int ret),
+ TPARGS(ret));
+DEFINE_TRACE(sched_wait_task,
+ TPPROTO(struct task_struct *p),
+ TPARGS(p));
+DEFINE_TRACE(sched_try_wakeup,
+ TPPROTO(struct task_struct *p),
+ TPARGS(p));
+DEFINE_TRACE(sched_wakeup_new_task,
+ TPPROTO(struct task_struct *p),
+ TPARGS(p));
+DEFINE_TRACE(sched_switch,
+ TPPROTO(struct task_struct *prev, struct task_struct *next),
+ TPARGS(prev, next));
+DEFINE_TRACE(sched_migrate_task,
+ TPPROTO(struct task_struct *p, int dest_cpu),
+ TPARGS(p, dest_cpu));
+DEFINE_TRACE(sched_process_free,
+ TPPROTO(struct task_struct *p),
+ TPARGS(p));
+DEFINE_TRACE(sched_process_exit,
+ TPPROTO(struct task_struct *p),
+ TPARGS(p));
+DEFINE_TRACE(sched_process_wait,
+ TPPROTO(struct pid *pid),
+ TPARGS(pid));
+DEFINE_TRACE(sched_process_fork,
+ TPPROTO(struct task_struct *parent, struct task_struct *child),
+ TPARGS(parent, child));
+DEFINE_TRACE(sched_signal_send,
+ TPPROTO(int sig, struct task_struct *p),
+ TPARGS(sig, p));
+
+#endif
--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-07-09 15:39:49

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch 05/15] LTTng instrumentation - scheduler (repost)


* Mathieu Desnoyers <[email protected]> wrote:

> There were 2 rejects when I ported the patch to linux-next. Sorry.
> Here is a repost.

there's still a standing objection (NAK) from Peter Zijstra so none of
this can go into linux-next yet i'm afraid.

Ingo

2008-07-09 16:00:31

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [patch 05/15] LTTng instrumentation - scheduler (repost)

* Ingo Molnar ([email protected]) wrote:
>
> * Mathieu Desnoyers <[email protected]> wrote:
>
> > There were 2 rejects when I ported the patch to linux-next. Sorry.
> > Here is a repost.
>
> there's still a standing objection (NAK) from Peter Zijstra so none of
> this can go into linux-next yet i'm afraid.
>
> Ingo

Hi Ingo,

This "tracepoint" infrastructure has been created to answer to Peter's
concerns about trace_mark uglyness. The last comment I had from him when
we discussed the tracepoint idea was "Looking forward to your
proposal..", so I guess it's up to him to decide if tracepoints are
clean enough to live.

I'll submit a revised scheduler instrumentation which turns all
scheduler trace_mark() already in linux-next into tracepoints, with the
same arguments. ftrace will have to be updated accordingly.

Mathieu

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-07-09 16:22:22

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [patch 05/15] LTTng instrumentation - scheduler (merge ftrace markers)

Peter : this is the instrumentation from ftrace (exact same location of
trace_mark statements) turned into tracepoints. How do you like it ?
If we merge this, ftrace will have to be adapted accordingly.


LTTng instrumentation - scheduler

Instrument the scheduler activity (sched_switch, migration, wakeups, wait for a
task, signal delivery) and process/thread creation/destruction (fork, exit,
kthread stop). Actually, kthread creation is not instrumented in this patch
because it is architecture dependent. It allows to connect tracers such as
ftrace which detects scheduling latencies, good/bad scheduler decisions. Tools
like LTTng can export this scheduler information along with instrumentation of
the rest of the kernel activity to perform post-mortem analysis on the scheduler
activity.

About the performance impact of tracepoints (which is comparable to markers),
even without immediate values optimizations, tests done by Hideo Aoki on ia64
show no regression. His test case was using hackbench on a kernel where
scheduler instrumentation (about 5 events in code scheduler code) was added.
See the "Tracepoints" patch header for performance result detail.

Changelog :
- Change instrumentation location and parameter to match ftrace instrumentation,
previously done with kernel markers.

Signed-off-by: Mathieu Desnoyers <[email protected]>
CC: 'Peter Zijlstra' <[email protected]>
CC: 'Steven Rostedt' <[email protected]>
CC: Thomas Gleixner <[email protected]>
CC: Masami Hiramatsu <[email protected]>
CC: "Frank Ch. Eigler" <[email protected]>
CC: 'Ingo Molnar' <[email protected]>
CC: 'Hideo AOKI' <[email protected]>
CC: Takashi Nishiie <[email protected]>
CC: Eduard - Gabriel Munteanu <[email protected]>
---
kernel/exit.c | 6 ++++++
kernel/fork.c | 3 +++
kernel/kthread.c | 5 +++++
kernel/sched-trace.h | 45 +++++++++++++++++++++++++++++++++++++++++++++
kernel/sched.c | 17 ++++++-----------
kernel/signal.c | 3 +++
6 files changed, 68 insertions(+), 11 deletions(-)

Index: linux-2.6-lttng/kernel/kthread.c
===================================================================
--- linux-2.6-lttng.orig/kernel/kthread.c 2008-07-09 12:11:59.000000000 -0400
+++ linux-2.6-lttng/kernel/kthread.c 2008-07-09 12:12:07.000000000 -0400
@@ -13,6 +13,7 @@
#include <linux/file.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include "sched-trace.h"

#define KTHREAD_NICE_LEVEL (-5)

@@ -187,6 +188,8 @@ int kthread_stop(struct task_struct *k)
/* It could exit after stop_info.k set, but before wake_up_process. */
get_task_struct(k);

+ trace_sched_kthread_stop(k);
+
/* Must init completion *before* thread sees kthread_stop_info.k */
init_completion(&kthread_stop_info.done);
smp_wmb();
@@ -202,6 +205,8 @@ int kthread_stop(struct task_struct *k)
ret = kthread_stop_info.err;
mutex_unlock(&kthread_stop_lock);

+ trace_sched_kthread_stop_ret(ret);
+
return ret;
}
EXPORT_SYMBOL(kthread_stop);
Index: linux-2.6-lttng/kernel/sched.c
===================================================================
--- linux-2.6-lttng.orig/kernel/sched.c 2008-07-09 12:11:59.000000000 -0400
+++ linux-2.6-lttng/kernel/sched.c 2008-07-09 12:14:28.000000000 -0400
@@ -71,6 +71,7 @@
#include <linux/debugfs.h>
#include <linux/ctype.h>
#include <linux/ftrace.h>
+#include "sched-trace.h"

#include <asm/tlb.h>
#include <asm/irq_regs.h>
@@ -1987,6 +1988,7 @@ void wait_task_inactive(struct task_stru
* just go back and repeat.
*/
rq = task_rq_lock(p, &flags);
+ trace_sched_wait_task(rq, p);
running = task_running(rq, p);
on_rq = p->se.on_rq;
task_rq_unlock(rq, &flags);
@@ -2337,9 +2339,7 @@ out_activate:
success = 1;

out_running:
- trace_mark(kernel_sched_wakeup,
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- p->pid, p->state, rq, p, rq->curr);
+ trace_sched_wakeup(rq, p);
check_preempt_curr(rq, p);

p->state = TASK_RUNNING;
@@ -2472,9 +2472,7 @@ void wake_up_new_task(struct task_struct
p->sched_class->task_new(rq, p);
inc_nr_running(rq);
}
- trace_mark(kernel_sched_wakeup_new,
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- p->pid, p->state, rq, p, rq->curr);
+ trace_sched_wakeup_new(rq, p);
check_preempt_curr(rq, p);
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
@@ -2647,11 +2645,7 @@ context_switch(struct rq *rq, struct tas
struct mm_struct *mm, *oldmm;

prepare_task_switch(rq, prev, next);
- trace_mark(kernel_sched_schedule,
- "prev_pid %d next_pid %d prev_state %ld "
- "## rq %p prev %p next %p",
- prev->pid, next->pid, prev->state,
- rq, prev, next);
+ trace_sched_switch(rq, prev, next);
mm = next->mm;
oldmm = prev->active_mm;
/*
@@ -2884,6 +2878,7 @@ static void sched_migrate_task(struct ta
|| unlikely(cpu_is_offline(dest_cpu)))
goto out;

+ trace_sched_migrate_task(rq, p, dest_cpu);
/* force the process onto the specified CPU */
if (migrate_task(p, dest_cpu, &req)) {
/* Need to wait for migration thread (might exit: take ref). */
Index: linux-2.6-lttng/kernel/exit.c
===================================================================
--- linux-2.6-lttng.orig/kernel/exit.c 2008-07-09 12:11:59.000000000 -0400
+++ linux-2.6-lttng/kernel/exit.c 2008-07-09 12:12:07.000000000 -0400
@@ -46,6 +46,7 @@
#include <linux/resource.h>
#include <linux/blkdev.h>
#include <linux/task_io_accounting_ops.h>
+#include "sched-trace.h"

#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -149,6 +150,7 @@ static void __exit_signal(struct task_st

static void delayed_put_task_struct(struct rcu_head *rhp)
{
+ trace_sched_process_free(container_of(rhp, struct task_struct, rcu));
put_task_struct(container_of(rhp, struct task_struct, rcu));
}

@@ -1040,6 +1042,8 @@ NORET_TYPE void do_exit(long code)

if (group_dead)
acct_process();
+ trace_sched_process_exit(tsk);
+
exit_sem(tsk);
exit_files(tsk);
exit_fs(tsk);
@@ -1524,6 +1528,8 @@ static long do_wait(enum pid_type type,
struct task_struct *tsk;
int flag, retval;

+ trace_sched_process_wait(pid);
+
add_wait_queue(&current->signal->wait_chldexit,&wait);
repeat:
/* If there is nothing that can match our critier just get out */
Index: linux-2.6-lttng/kernel/fork.c
===================================================================
--- linux-2.6-lttng.orig/kernel/fork.c 2008-07-09 12:11:59.000000000 -0400
+++ linux-2.6-lttng/kernel/fork.c 2008-07-09 12:12:27.000000000 -0400
@@ -56,6 +56,7 @@
#include <linux/proc_fs.h>
#include <linux/blkdev.h>
#include <linux/magic.h>
+#include "sched-trace.h"

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -1362,6 +1363,8 @@ long do_fork(unsigned long clone_flags,
if (!IS_ERR(p)) {
struct completion vfork;

+ trace_sched_process_fork(current, p);
+
nr = task_pid_vnr(p);

if (clone_flags & CLONE_PARENT_SETTID)
Index: linux-2.6-lttng/kernel/signal.c
===================================================================
--- linux-2.6-lttng.orig/kernel/signal.c 2008-07-09 12:11:45.000000000 -0400
+++ linux-2.6-lttng/kernel/signal.c 2008-07-09 12:12:07.000000000 -0400
@@ -26,6 +26,7 @@
#include <linux/freezer.h>
#include <linux/pid_namespace.h>
#include <linux/nsproxy.h>
+#include "sched-trace.h"

#include <asm/param.h>
#include <asm/uaccess.h>
@@ -807,6 +808,8 @@ static int send_signal(int sig, struct s
struct sigpending *pending;
struct sigqueue *q;

+ trace_sched_signal_send(sig, t);
+
assert_spin_locked(&t->sighand->siglock);
if (!prepare_signal(sig, t))
return 0;
Index: linux-2.6-lttng/kernel/sched-trace.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/kernel/sched-trace.h 2008-07-09 12:14:59.000000000 -0400
@@ -0,0 +1,45 @@
+#ifndef _SCHED_TRACE_H
+#define _SCHED_TRACE_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(sched_kthread_stop,
+ TPPROTO(struct task_struct *t),
+ TPARGS(t));
+DEFINE_TRACE(sched_kthread_stop_ret,
+ TPPROTO(int ret),
+ TPARGS(ret));
+DEFINE_TRACE(sched_wait_task,
+ TPPROTO(struct rq *rq, struct task_struct *p),
+ TPARGS(rq, p));
+DEFINE_TRACE(sched_wakeup,
+ TPPROTO(struct rq *rq, struct task_struct *p),
+ TPARGS(rq, p));
+DEFINE_TRACE(sched_wakeup_new,
+ TPPROTO(struct rq *rq, struct task_struct *p),
+ TPARGS(rq, p));
+DEFINE_TRACE(sched_switch,
+ TPPROTO(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next),
+ TPARGS(rq, prev, next));
+DEFINE_TRACE(sched_migrate_task,
+ TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu),
+ TPARGS(rq, p, dest_cpu));
+DEFINE_TRACE(sched_process_free,
+ TPPROTO(struct task_struct *p),
+ TPARGS(p));
+DEFINE_TRACE(sched_process_exit,
+ TPPROTO(struct task_struct *p),
+ TPARGS(p));
+DEFINE_TRACE(sched_process_wait,
+ TPPROTO(struct pid *pid),
+ TPARGS(pid));
+DEFINE_TRACE(sched_process_fork,
+ TPPROTO(struct task_struct *parent, struct task_struct *child),
+ TPARGS(parent, child));
+DEFINE_TRACE(sched_signal_send,
+ TPPROTO(int sig, struct task_struct *p),
+ TPARGS(sig, p));
+
+#endif
--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-07-09 19:10:17

by Mathieu Desnoyers

[permalink] [raw]
Subject: [PATCH] ftrace port to tracepoints (linux-next)

Porting the trace_mark() used by ftrace to tracepoints. (cleanup)

This patch applies after the "Tracepoints v3" patchset for linux-next.

Signed-off-by: Mathieu Desnoyers <[email protected]>
CC: Masami Hiramatsu <[email protected]>
CC: 'Peter Zijlstra' <[email protected]>
CC: "Frank Ch. Eigler" <[email protected]>
CC: 'Ingo Molnar' <[email protected]>
CC: 'Hideo AOKI' <[email protected]>
CC: Takashi Nishiie <[email protected]>
CC: 'Steven Rostedt' <[email protected]>
CC: Eduard - Gabriel Munteanu <[email protected]>
---
kernel/trace/trace_sched_switch.c | 114 ++++++---------------------------
kernel/trace/trace_sched_wakeup.c | 129 +++++++++-----------------------------
2 files changed, 52 insertions(+), 191 deletions(-)

Index: linux-2.6-lttng/kernel/trace/trace_sched_switch.c
===================================================================
--- linux-2.6-lttng.orig/kernel/trace/trace_sched_switch.c 2008-07-09 14:33:28.000000000 -0400
+++ linux-2.6-lttng/kernel/trace/trace_sched_switch.c 2008-07-09 15:03:07.000000000 -0400
@@ -9,9 +9,9 @@
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
-#include <linux/marker.h>
#include <linux/ftrace.h>

+#include "../sched-trace.h"
#include "trace.h"

static struct trace_array *ctx_trace;
@@ -19,16 +19,17 @@ static int __read_mostly tracer_enabled;
static atomic_t sched_ref;

static void
-sched_switch_func(void *private, void *__rq, struct task_struct *prev,
+probe_sched_switch(struct rq *__rq, struct task_struct *prev,
struct task_struct *next)
{
- struct trace_array **ptr = private;
- struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;

+ if (!atomic_read(&sched_ref))
+ return;
+
tracing_record_cmdline(prev);
tracing_record_cmdline(next);

@@ -37,95 +38,42 @@ sched_switch_func(void *private, void *_

local_irq_save(flags);
cpu = raw_smp_processor_id();
- data = tr->data[cpu];
+ data = ctx_trace->data[cpu];
disabled = atomic_inc_return(&data->disabled);

if (likely(disabled == 1))
- tracing_sched_switch_trace(tr, data, prev, next, flags);
+ tracing_sched_switch_trace(ctx_trace, data, prev, next, flags);

atomic_dec(&data->disabled);
local_irq_restore(flags);
}

-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct task_struct *prev;
- struct task_struct *next;
- struct rq *__rq;
-
- if (!atomic_read(&sched_ref))
- return;
-
- /* skip prev_pid %d next_pid %d prev_state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- __rq = va_arg(*args, typeof(__rq));
- prev = va_arg(*args, typeof(prev));
- next = va_arg(*args, typeof(next));
-
- /*
- * If tracer_switch_func only points to the local
- * switch func, it still needs the ptr passed to it.
- */
- sched_switch_func(probe_data, __rq, prev, next);
-}
-
static void
-wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
- task_struct *curr)
+probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
{
- struct trace_array **ptr = private;
- struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;

- if (!tracer_enabled)
+ if (!likely(tracer_enabled))
return;

- tracing_record_cmdline(curr);
+ tracing_record_cmdline(current);

local_irq_save(flags);
cpu = raw_smp_processor_id();
- data = tr->data[cpu];
+ data = ctx_trace->data[cpu];
disabled = atomic_inc_return(&data->disabled);

if (likely(disabled == 1))
- tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
+ tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
+ flags);

atomic_dec(&data->disabled);
local_irq_restore(flags);
}

-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct task_struct *curr;
- struct task_struct *task;
- struct rq *__rq;
-
- if (likely(!tracer_enabled))
- return;
-
- /* Skip pid %d state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- /* now get the meat: "rq %p task %p rq->curr %p" */
- __rq = va_arg(*args, typeof(__rq));
- task = va_arg(*args, typeof(task));
- curr = va_arg(*args, typeof(curr));
-
- tracing_record_cmdline(task);
- tracing_record_cmdline(curr);
-
- wakeup_func(probe_data, __rq, task, curr);
-}
-
static void sched_switch_reset(struct trace_array *tr)
{
int cpu;
@@ -140,31 +88,21 @@ static int tracing_sched_register(void)
{
int ret;

- ret = marker_probe_register("kernel_sched_wakeup",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &ctx_trace);
+ ret = register_trace_sched_wakeup(probe_sched_wakeup);
if (ret) {
pr_info("wakeup trace: Couldn't add marker"
" probe to kernel_sched_wakeup\n");
return ret;
}

- ret = marker_probe_register("kernel_sched_wakeup_new",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &ctx_trace);
+ ret = register_trace_sched_wakeup_new(probe_sched_wakeup);
if (ret) {
pr_info("wakeup trace: Couldn't add marker"
" probe to kernel_sched_wakeup_new\n");
goto fail_deprobe;
}

- ret = marker_probe_register("kernel_sched_schedule",
- "prev_pid %d next_pid %d prev_state %ld "
- "## rq %p prev %p next %p",
- sched_switch_callback,
- &ctx_trace);
+ ret = register_trace_sched_switch(probe_sched_switch);
if (ret) {
pr_info("sched trace: Couldn't add marker"
" probe to kernel_sched_schedule\n");
@@ -173,27 +111,17 @@ static int tracing_sched_register(void)

return ret;
fail_deprobe_wake_new:
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &ctx_trace);
+ unregister_trace_sched_wakeup_new(probe_sched_wakeup);
fail_deprobe:
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &ctx_trace);
+ unregister_trace_sched_wakeup(probe_sched_wakeup);
return ret;
}

static void tracing_sched_unregister(void)
{
- marker_probe_unregister("kernel_sched_schedule",
- sched_switch_callback,
- &ctx_trace);
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &ctx_trace);
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &ctx_trace);
+ unregister_trace_sched_switch(probe_sched_switch);
+ unregister_trace_sched_wakeup_new(probe_sched_wakeup);
+ unregister_trace_sched_wakeup(probe_sched_wakeup);
}

static void tracing_start_sched_switch(void)
Index: linux-2.6-lttng/kernel/trace/trace_sched_wakeup.c
===================================================================
--- linux-2.6-lttng.orig/kernel/trace/trace_sched_wakeup.c 2008-07-09 14:33:34.000000000 -0400
+++ linux-2.6-lttng/kernel/trace/trace_sched_wakeup.c 2008-07-09 15:05:23.000000000 -0400
@@ -15,8 +15,8 @@
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
-#include <linux/marker.h>

+#include "../sched-trace.h"
#include "trace.h"

static struct trace_array *wakeup_trace;
@@ -109,18 +109,18 @@ static int report_latency(cycle_t delta)
}

static void notrace
-wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
+probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
unsigned long latency = 0, t0 = 0, t1 = 0;
- struct trace_array **ptr = private;
- struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
cycle_t T0, T1, delta;
unsigned long flags;
long disabled;
int cpu;

+ tracing_record_cmdline(prev);
+
if (unlikely(!tracer_enabled))
return;

@@ -137,11 +137,11 @@ wakeup_sched_switch(void *private, void
return;

/* The task we are waiting for is waking up */
- data = tr->data[wakeup_cpu];
+ data = wakeup_trace->data[wakeup_cpu];

/* disable local data, not wakeup_cpu data */
cpu = raw_smp_processor_id();
- disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+ disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
if (likely(disabled != 1))
goto out;

@@ -151,7 +151,7 @@ wakeup_sched_switch(void *private, void
if (unlikely(!tracer_enabled || next != wakeup_task))
goto out_unlock;

- trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
+ trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags);

/*
* usecs conversion is slow so we try to delay the conversion
@@ -170,38 +170,13 @@ wakeup_sched_switch(void *private, void
t0 = nsecs_to_usecs(T0);
t1 = nsecs_to_usecs(T1);

- update_max_tr(tr, wakeup_task, wakeup_cpu);
+ update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);

out_unlock:
- __wakeup_reset(tr);
+ __wakeup_reset(wakeup_trace);
spin_unlock_irqrestore(&wakeup_lock, flags);
out:
- atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct task_struct *prev;
- struct task_struct *next;
- struct rq *__rq;
-
- /* skip prev_pid %d next_pid %d prev_state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- __rq = va_arg(*args, typeof(__rq));
- prev = va_arg(*args, typeof(prev));
- next = va_arg(*args, typeof(next));
-
- tracing_record_cmdline(prev);
-
- /*
- * If tracer_switch_func only points to the local
- * switch func, it still needs the ptr passed to it.
- */
- wakeup_sched_switch(probe_data, __rq, prev, next);
+ atomic_dec(&wakeup_trace->data[cpu]->disabled);
}

static void __wakeup_reset(struct trace_array *tr)
@@ -235,19 +210,24 @@ static void wakeup_reset(struct trace_ar
}

static void
-wakeup_check_start(struct trace_array *tr, struct task_struct *p,
- struct task_struct *curr)
+probe_wakeup(struct rq *rq, struct task_struct *p)
{
int cpu = smp_processor_id();
unsigned long flags;
long disabled;

+ if (likely(!tracer_enabled))
+ return;
+
+ tracing_record_cmdline(p);
+ tracing_record_cmdline(current);
+
if (likely(!rt_task(p)) ||
p->prio >= wakeup_prio ||
- p->prio >= curr->prio)
+ p->prio >= current->prio)
return;

- disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+ disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
if (unlikely(disabled != 1))
goto out;

@@ -259,7 +239,7 @@ wakeup_check_start(struct trace_array *t
goto out_locked;

/* reset the trace */
- __wakeup_reset(tr);
+ __wakeup_reset(wakeup_trace);

wakeup_cpu = task_cpu(p);
wakeup_prio = p->prio;
@@ -269,72 +249,35 @@ wakeup_check_start(struct trace_array *t

local_save_flags(flags);

- tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
- trace_function(tr, tr->data[wakeup_cpu],
+ wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
+ trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
CALLER_ADDR1, CALLER_ADDR2, flags);

out_locked:
spin_unlock(&wakeup_lock);
out:
- atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct trace_array **ptr = probe_data;
- struct trace_array *tr = *ptr;
- struct task_struct *curr;
- struct task_struct *task;
- struct rq *__rq;
-
- if (likely(!tracer_enabled))
- return;
-
- /* Skip pid %d state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- /* now get the meat: "rq %p task %p rq->curr %p" */
- __rq = va_arg(*args, typeof(__rq));
- task = va_arg(*args, typeof(task));
- curr = va_arg(*args, typeof(curr));
-
- tracing_record_cmdline(task);
- tracing_record_cmdline(curr);
-
- wakeup_check_start(tr, task, curr);
+ atomic_dec(&wakeup_trace->data[cpu]->disabled);
}

static void start_wakeup_tracer(struct trace_array *tr)
{
int ret;

- ret = marker_probe_register("kernel_sched_wakeup",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &wakeup_trace);
+ ret = register_trace_sched_wakeup(probe_wakeup);
if (ret) {
pr_info("wakeup trace: Couldn't add marker"
" probe to kernel_sched_wakeup\n");
return;
}

- ret = marker_probe_register("kernel_sched_wakeup_new",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &wakeup_trace);
+ ret = register_trace_sched_wakeup_new(probe_wakeup);
if (ret) {
pr_info("wakeup trace: Couldn't add marker"
" probe to kernel_sched_wakeup_new\n");
goto fail_deprobe;
}

- ret = marker_probe_register("kernel_sched_schedule",
- "prev_pid %d next_pid %d prev_state %ld "
- "## rq %p prev %p next %p",
- sched_switch_callback,
- &wakeup_trace);
+ ret = register_trace_sched_switch(probe_wakeup_sched_switch);
if (ret) {
pr_info("sched trace: Couldn't add marker"
" probe to kernel_sched_schedule\n");
@@ -357,28 +300,18 @@ static void start_wakeup_tracer(struct t

return;
fail_deprobe_wake_new:
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &wakeup_trace);
+ unregister_trace_sched_wakeup_new(probe_wakeup);
fail_deprobe:
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &wakeup_trace);
+ unregister_trace_sched_wakeup(probe_wakeup);
}

static void stop_wakeup_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
unregister_ftrace_function(&trace_ops);
- marker_probe_unregister("kernel_sched_schedule",
- sched_switch_callback,
- &wakeup_trace);
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &wakeup_trace);
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &wakeup_trace);
+ unregister_trace_sched_switch(probe_wakeup_sched_switch);
+ unregister_trace_sched_wakeup_new(probe_wakeup);
+ unregister_trace_sched_wakeup(probe_wakeup);
}

static void wakeup_tracer_init(struct trace_array *tr)
--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-07-10 03:14:17

by Takashi NISHIIE

[permalink] [raw]
Subject: RE: [PATCH] ftrace port to tracepoints (linux-next)

Hi,Mathieu

I think that it is wonderful that the source code becomes simple by
changing kernel markers to tracepoints.

However, it seems to forget to correct the error message.

For example.
Mathieu Wrote:
>- ret = marker_probe_register("kernel_sched_wakeup",
>- "pid %d state %ld ## rq %p task %p rq->curr %p",
>- wake_up_callback,
>- &ctx_trace);
>+ ret = register_trace_sched_wakeup(probe_sched_wakeup);
> if (ret) {
> pr_info("wakeup trace: Couldn't add marker"
> " probe to kernel_sched_wakeup\n");
> return ret;
> }

if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup\n");
return ret;
}

Thank you,
--
Takashi Nishiie


2008-07-10 03:57:38

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH] ftrace port to tracepoints (linux-next) (nitpick update)

* Takashi Nishiie ([email protected]) wrote:
> Hi,Mathieu
>
> I think that it is wonderful that the source code becomes simple by
> changing kernel markers to tracepoints.
>
> However, it seems to forget to correct the error message.
>

:)

Good catch. Here is the revised version. Thanks!

Mathieu


ftrace port to tracepoints

Porting the trace_mark() used by ftrace to tracepoints. (cleanup)

Changelog :
- Change error messages : marker -> tracepoint

Signed-off-by: Mathieu Desnoyers <[email protected]>
CC: Masami Hiramatsu <[email protected]>
CC: 'Peter Zijlstra' <[email protected]>
CC: "Frank Ch. Eigler" <[email protected]>
CC: 'Ingo Molnar' <[email protected]>
CC: 'Hideo AOKI' <[email protected]>
CC: Takashi Nishiie <[email protected]>
CC: 'Steven Rostedt' <[email protected]>
CC: Eduard - Gabriel Munteanu <[email protected]>
---
kernel/trace/trace_sched_switch.c | 120 ++++++---------------------------
kernel/trace/trace_sched_wakeup.c | 135 +++++++++-----------------------------
2 files changed, 58 insertions(+), 197 deletions(-)

Index: linux-2.6-lttng/kernel/trace/trace_sched_switch.c
===================================================================
--- linux-2.6-lttng.orig/kernel/trace/trace_sched_switch.c 2008-07-09 23:50:26.000000000 -0400
+++ linux-2.6-lttng/kernel/trace/trace_sched_switch.c 2008-07-09 23:53:06.000000000 -0400
@@ -9,9 +9,9 @@
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
-#include <linux/marker.h>
#include <linux/ftrace.h>

+#include "../sched-trace.h"
#include "trace.h"

static struct trace_array *ctx_trace;
@@ -19,16 +19,17 @@ static int __read_mostly tracer_enabled;
static atomic_t sched_ref;

static void
-sched_switch_func(void *private, void *__rq, struct task_struct *prev,
+probe_sched_switch(struct rq *__rq, struct task_struct *prev,
struct task_struct *next)
{
- struct trace_array **ptr = private;
- struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;

+ if (!atomic_read(&sched_ref))
+ return;
+
tracing_record_cmdline(prev);
tracing_record_cmdline(next);

@@ -37,95 +38,42 @@ sched_switch_func(void *private, void *_

local_irq_save(flags);
cpu = raw_smp_processor_id();
- data = tr->data[cpu];
+ data = ctx_trace->data[cpu];
disabled = atomic_inc_return(&data->disabled);

if (likely(disabled == 1))
- tracing_sched_switch_trace(tr, data, prev, next, flags);
+ tracing_sched_switch_trace(ctx_trace, data, prev, next, flags);

atomic_dec(&data->disabled);
local_irq_restore(flags);
}

-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct task_struct *prev;
- struct task_struct *next;
- struct rq *__rq;
-
- if (!atomic_read(&sched_ref))
- return;
-
- /* skip prev_pid %d next_pid %d prev_state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- __rq = va_arg(*args, typeof(__rq));
- prev = va_arg(*args, typeof(prev));
- next = va_arg(*args, typeof(next));
-
- /*
- * If tracer_switch_func only points to the local
- * switch func, it still needs the ptr passed to it.
- */
- sched_switch_func(probe_data, __rq, prev, next);
-}
-
static void
-wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
- task_struct *curr)
+probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
{
- struct trace_array **ptr = private;
- struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;

- if (!tracer_enabled)
+ if (!likely(tracer_enabled))
return;

- tracing_record_cmdline(curr);
+ tracing_record_cmdline(current);

local_irq_save(flags);
cpu = raw_smp_processor_id();
- data = tr->data[cpu];
+ data = ctx_trace->data[cpu];
disabled = atomic_inc_return(&data->disabled);

if (likely(disabled == 1))
- tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
+ tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
+ flags);

atomic_dec(&data->disabled);
local_irq_restore(flags);
}

-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct task_struct *curr;
- struct task_struct *task;
- struct rq *__rq;
-
- if (likely(!tracer_enabled))
- return;
-
- /* Skip pid %d state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- /* now get the meat: "rq %p task %p rq->curr %p" */
- __rq = va_arg(*args, typeof(__rq));
- task = va_arg(*args, typeof(task));
- curr = va_arg(*args, typeof(curr));
-
- tracing_record_cmdline(task);
- tracing_record_cmdline(curr);
-
- wakeup_func(probe_data, __rq, task, curr);
-}
-
static void sched_switch_reset(struct trace_array *tr)
{
int cpu;
@@ -140,60 +88,40 @@ static int tracing_sched_register(void)
{
int ret;

- ret = marker_probe_register("kernel_sched_wakeup",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &ctx_trace);
+ ret = register_trace_sched_wakeup(probe_sched_wakeup);
if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup\n");
return ret;
}

- ret = marker_probe_register("kernel_sched_wakeup_new",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &ctx_trace);
+ ret = register_trace_sched_wakeup_new(probe_sched_wakeup);
if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup_new\n");
goto fail_deprobe;
}

- ret = marker_probe_register("kernel_sched_schedule",
- "prev_pid %d next_pid %d prev_state %ld "
- "## rq %p prev %p next %p",
- sched_switch_callback,
- &ctx_trace);
+ ret = register_trace_sched_switch(probe_sched_switch);
if (ret) {
- pr_info("sched trace: Couldn't add marker"
+ pr_info("sched trace: Couldn't activate tracepoint"
" probe to kernel_sched_schedule\n");
goto fail_deprobe_wake_new;
}

return ret;
fail_deprobe_wake_new:
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &ctx_trace);
+ unregister_trace_sched_wakeup_new(probe_sched_wakeup);
fail_deprobe:
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &ctx_trace);
+ unregister_trace_sched_wakeup(probe_sched_wakeup);
return ret;
}

static void tracing_sched_unregister(void)
{
- marker_probe_unregister("kernel_sched_schedule",
- sched_switch_callback,
- &ctx_trace);
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &ctx_trace);
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &ctx_trace);
+ unregister_trace_sched_switch(probe_sched_switch);
+ unregister_trace_sched_wakeup_new(probe_sched_wakeup);
+ unregister_trace_sched_wakeup(probe_sched_wakeup);
}

static void tracing_start_sched_switch(void)
Index: linux-2.6-lttng/kernel/trace/trace_sched_wakeup.c
===================================================================
--- linux-2.6-lttng.orig/kernel/trace/trace_sched_wakeup.c 2008-07-09 23:50:26.000000000 -0400
+++ linux-2.6-lttng/kernel/trace/trace_sched_wakeup.c 2008-07-09 23:53:33.000000000 -0400
@@ -15,8 +15,8 @@
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
-#include <linux/marker.h>

+#include "../sched-trace.h"
#include "trace.h"

static struct trace_array *wakeup_trace;
@@ -109,18 +109,18 @@ static int report_latency(cycle_t delta)
}

static void notrace
-wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
+probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
unsigned long latency = 0, t0 = 0, t1 = 0;
- struct trace_array **ptr = private;
- struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
cycle_t T0, T1, delta;
unsigned long flags;
long disabled;
int cpu;

+ tracing_record_cmdline(prev);
+
if (unlikely(!tracer_enabled))
return;

@@ -137,11 +137,11 @@ wakeup_sched_switch(void *private, void
return;

/* The task we are waiting for is waking up */
- data = tr->data[wakeup_cpu];
+ data = wakeup_trace->data[wakeup_cpu];

/* disable local data, not wakeup_cpu data */
cpu = raw_smp_processor_id();
- disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+ disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
if (likely(disabled != 1))
goto out;

@@ -151,7 +151,7 @@ wakeup_sched_switch(void *private, void
if (unlikely(!tracer_enabled || next != wakeup_task))
goto out_unlock;

- trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
+ trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags);

/*
* usecs conversion is slow so we try to delay the conversion
@@ -170,38 +170,13 @@ wakeup_sched_switch(void *private, void
t0 = nsecs_to_usecs(T0);
t1 = nsecs_to_usecs(T1);

- update_max_tr(tr, wakeup_task, wakeup_cpu);
+ update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);

out_unlock:
- __wakeup_reset(tr);
+ __wakeup_reset(wakeup_trace);
spin_unlock_irqrestore(&wakeup_lock, flags);
out:
- atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct task_struct *prev;
- struct task_struct *next;
- struct rq *__rq;
-
- /* skip prev_pid %d next_pid %d prev_state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- __rq = va_arg(*args, typeof(__rq));
- prev = va_arg(*args, typeof(prev));
- next = va_arg(*args, typeof(next));
-
- tracing_record_cmdline(prev);
-
- /*
- * If tracer_switch_func only points to the local
- * switch func, it still needs the ptr passed to it.
- */
- wakeup_sched_switch(probe_data, __rq, prev, next);
+ atomic_dec(&wakeup_trace->data[cpu]->disabled);
}

static void __wakeup_reset(struct trace_array *tr)
@@ -235,19 +210,24 @@ static void wakeup_reset(struct trace_ar
}

static void
-wakeup_check_start(struct trace_array *tr, struct task_struct *p,
- struct task_struct *curr)
+probe_wakeup(struct rq *rq, struct task_struct *p)
{
int cpu = smp_processor_id();
unsigned long flags;
long disabled;

+ if (likely(!tracer_enabled))
+ return;
+
+ tracing_record_cmdline(p);
+ tracing_record_cmdline(current);
+
if (likely(!rt_task(p)) ||
p->prio >= wakeup_prio ||
- p->prio >= curr->prio)
+ p->prio >= current->prio)
return;

- disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+ disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
if (unlikely(disabled != 1))
goto out;

@@ -259,7 +239,7 @@ wakeup_check_start(struct trace_array *t
goto out_locked;

/* reset the trace */
- __wakeup_reset(tr);
+ __wakeup_reset(wakeup_trace);

wakeup_cpu = task_cpu(p);
wakeup_prio = p->prio;
@@ -269,74 +249,37 @@ wakeup_check_start(struct trace_array *t

local_save_flags(flags);

- tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
- trace_function(tr, tr->data[wakeup_cpu],
+ wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
+ trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
CALLER_ADDR1, CALLER_ADDR2, flags);

out_locked:
spin_unlock(&wakeup_lock);
out:
- atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct trace_array **ptr = probe_data;
- struct trace_array *tr = *ptr;
- struct task_struct *curr;
- struct task_struct *task;
- struct rq *__rq;
-
- if (likely(!tracer_enabled))
- return;
-
- /* Skip pid %d state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- /* now get the meat: "rq %p task %p rq->curr %p" */
- __rq = va_arg(*args, typeof(__rq));
- task = va_arg(*args, typeof(task));
- curr = va_arg(*args, typeof(curr));
-
- tracing_record_cmdline(task);
- tracing_record_cmdline(curr);
-
- wakeup_check_start(tr, task, curr);
+ atomic_dec(&wakeup_trace->data[cpu]->disabled);
}

static void start_wakeup_tracer(struct trace_array *tr)
{
int ret;

- ret = marker_probe_register("kernel_sched_wakeup",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &wakeup_trace);
+ ret = register_trace_sched_wakeup(probe_wakeup);
if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup\n");
return;
}

- ret = marker_probe_register("kernel_sched_wakeup_new",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &wakeup_trace);
+ ret = register_trace_sched_wakeup_new(probe_wakeup);
if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup_new\n");
goto fail_deprobe;
}

- ret = marker_probe_register("kernel_sched_schedule",
- "prev_pid %d next_pid %d prev_state %ld "
- "## rq %p prev %p next %p",
- sched_switch_callback,
- &wakeup_trace);
+ ret = register_trace_sched_switch(probe_wakeup_sched_switch);
if (ret) {
- pr_info("sched trace: Couldn't add marker"
+ pr_info("sched trace: Couldn't activate tracepoint"
" probe to kernel_sched_schedule\n");
goto fail_deprobe_wake_new;
}
@@ -357,28 +300,18 @@ static void start_wakeup_tracer(struct t

return;
fail_deprobe_wake_new:
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &wakeup_trace);
+ unregister_trace_sched_wakeup_new(probe_wakeup);
fail_deprobe:
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &wakeup_trace);
+ unregister_trace_sched_wakeup(probe_wakeup);
}

static void stop_wakeup_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
unregister_ftrace_function(&trace_ops);
- marker_probe_unregister("kernel_sched_schedule",
- sched_switch_callback,
- &wakeup_trace);
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &wakeup_trace);
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &wakeup_trace);
+ unregister_trace_sched_switch(probe_wakeup_sched_switch);
+ unregister_trace_sched_wakeup_new(probe_wakeup);
+ unregister_trace_sched_wakeup(probe_wakeup);
}

static void wakeup_tracer_init(struct trace_array *tr)


--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68