Hi,
We can monitor timer's lifecycle and behaviors by these tracepoint.
Following tracepoints are introduced as ingo's suggestion in
http://marc.info/?l=linux-kernel&m=123791899529128&w=2
for timer:
trace_timer_init()
trace_timer_start()
trace_timer_expire()
trace_timer_cancel()
for hrtimer:
trace_hrtimer_init()
trace_hrtimer_start()
trace_hrtimer_expire()
trace_hrtimer_cancel()
for itimer:
itimer_state
itimer_expire()
Changlog from latest patchset:
Add itimer's patch to monitor itimer's lifecycle and behaviors
This patch is modify from Mathieu's patch base on Ingo's suggestion, the
original patch can be found here:
http://marc.info/?l=linux-kernel&m=123791201816247&w=2
Great thanks to Thomas for giving me so many valuable advices.
Those tracepoints are wanted and useful:
1: We can detect a timer's delay
2: We can monitor the lifecycle and behaviors of a timer
Thus they help in analysing and debuging.
Example ftrace output:
insmod-3743 [001] 217105.738770: timer_init: timer=e0b3e4c8
insmod-3743 [001] 217105.738868: timer_start: timer=e0b3e4c8 func=timer_fun expires=442863 timeout=50
<idle>-0 [001] 217105.935436: timer_expire: timer=e0b3e4c8 func=timer_fun jiffies=442863
<idle>-0 [001] 217105.935440: timer_cancel: timer=e0b3e4c8 func=timer_fun
We expect the timer expires at 442863, after 50 jiffies, actually the timer
expires at 442863, so it is delayed by 0 jiffies.
Changelog v1->v2:
1: Add jiffies info into timer
2: Combine debugobjects and trace as Thomas's suggestion
Signed-off-by: Mathieu Desnoyers <[email protected]>
Signed-off-by: Xiao Guangrong <[email protected]>
---
include/trace/events/timer.h | 82 ++++++++++++++++++++++++++++++++++++++++++
kernel/timer.c | 33 +++++++++++++++--
2 files changed, 111 insertions(+), 4 deletions(-)
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 6e7beb5..e9c51ee 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -110,6 +110,88 @@ TRACE_EVENT(hrtimer_exit,
"HRTIMER_RESTART" : "HRTIMER_NORESTART")
);
+TRACE_EVENT(timer_init,
+
+ TP_PROTO(struct timer_list *timer),
+
+ TP_ARGS(timer),
+
+ TP_STRUCT__entry(
+ __field( void *, timer )
+ ),
+
+ TP_fast_assign(
+ __entry->timer = timer;
+ ),
+
+ TP_printk("timer=%p", __entry->timer)
+);
+
+TRACE_EVENT(timer_start,
+
+ TP_PROTO(struct timer_list *timer, unsigned long expires),
+
+ TP_ARGS(timer, expires),
+
+ TP_STRUCT__entry(
+ __field( void *, timer )
+ __field( void *, function )
+ __field( unsigned long, expires )
+ __field( int, timeout )
+ ),
+
+ TP_fast_assign(
+ __entry->timer = timer;
+ __entry->function = timer->function;
+ __entry->expires = expires;
+ __entry->timeout = timer->expires - jiffies;
+ ),
+
+ TP_printk("timer=%p func=%pf expires=%lu timeout=%d", __entry->timer,
+ __entry->function, __entry->expires, __entry->timeout)
+);
+
+TRACE_EVENT(timer_expire,
+
+ TP_PROTO(struct timer_list *timer),
+
+ TP_ARGS(timer),
+
+ TP_STRUCT__entry(
+ __field( void *, timer )
+ __field( void *, function )
+ __field( unsigned long, jiffies )
+ ),
+
+ TP_fast_assign(
+ __entry->timer = timer;
+ __entry->function = timer->function;
+ __entry->jiffies = jiffies;
+ ),
+
+ TP_printk("timer=%p func=%pf jiffies=%lu", __entry->timer,
+ __entry->function, __entry->jiffies)
+);
+
+TRACE_EVENT(timer_cancel,
+
+ TP_PROTO(struct timer_list *timer),
+
+ TP_ARGS(timer),
+
+ TP_STRUCT__entry(
+ __field( void *, timer )
+ __field( void *, function )
+ ),
+
+ TP_fast_assign(
+ __entry->timer = timer;
+ __entry->function = timer->function;
+ ),
+
+ TP_printk("timer=%p func=%pf", __entry->timer, __entry->function)
+);
+
#endif /* _TRACE_TIMER_H */
/* This part must be outside protection */
diff --git a/kernel/timer.c b/kernel/timer.c
index 9385ad5..13f943b 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -523,6 +523,30 @@ static inline void debug_timer_activate(struct timer_list *timer) { }
static inline void debug_timer_deactivate(struct timer_list *timer) { }
#endif
+static inline void debug_and_trace_timer_init(struct timer_list *timer)
+{
+ debug_timer_init(timer);
+ trace_timer_init(timer);
+}
+
+static inline void
+debug_and_trace_timer_activate(struct timer_list *timer, unsigned long expires)
+{
+ debug_timer_activate(timer);
+ trace_timer_start(timer, expires);
+}
+
+static inline void debug_and_trace_timer_expire(struct timer_list *timer)
+{
+ trace_timer_expire(timer);
+}
+
+static inline void debug_and_trace_timer_deactivate(struct timer_list *timer)
+{
+ debug_timer_deactivate(timer);
+ trace_timer_cancel(timer);
+}
+
static void __init_timer(struct timer_list *timer,
const char *name,
struct lock_class_key *key)
@@ -551,7 +575,7 @@ void init_timer_key(struct timer_list *timer,
const char *name,
struct lock_class_key *key)
{
- debug_timer_init(timer);
+ debug_and_trace_timer_init(timer);
__init_timer(timer, name, key);
}
EXPORT_SYMBOL(init_timer_key);
@@ -570,7 +594,7 @@ static inline void detach_timer(struct timer_list *timer,
{
struct list_head *entry = &timer->entry;
- debug_timer_deactivate(timer);
+ debug_and_trace_timer_deactivate(timer);
__list_del(entry->prev, entry->next);
if (clear_pending)
@@ -631,7 +655,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
goto out_unlock;
}
- debug_timer_activate(timer);
+ debug_and_trace_timer_activate(timer, expires);
new_base = __get_cpu_var(tvec_bases);
@@ -783,7 +807,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
BUG_ON(timer_pending(timer) || !timer->function);
spin_lock_irqsave(&base->lock, flags);
timer_set_base(timer, base);
- debug_timer_activate(timer);
+ debug_and_trace_timer_activate(timer, timer->expires);
internal_add_timer(base, timer);
/*
* Check whether the other CPU is idle and needs to be
@@ -954,6 +978,7 @@ static inline void __run_timers(struct tvec_base *base)
unsigned long data;
timer = list_first_entry(head, struct timer_list,entry);
+ debug_and_trace_timer_expire(timer);
fn = timer->function;
data = timer->data;
--
1.6.1.2
Those tracepoints are wanted and useful:
1: We can detect a hrtimer's delay
2: We can monitor the lifecycle and behaviors of a hrtimer
Thus they help in analysing and debuging.
Great thanks to Thomas for giving me so many valuable advices.
Example ftrace output:
insmod-3821 [001] 3192.239314: hrtimer_init: timer=d08a1480 clockid=CLOCK_REALTIME mode=HRTIMER_MODE_ABS
insmod-3821 [001] 3192.239335: hrtimer_start: timer=d08a1480 func=hrtimer_fun expires=1245162841000000000 ns softexpires=1245162841000000000 ns
<idle>-0 [001] 3201.506127: hrtimer_expire: timer=d08a1480 func=hrtimer_fun now=1245162841010241858
<idle>-0 [001] 3201.506129: hrtimer_cancel: timer=d08a1480 func=hrtimer_fun
We expect the hrtimer expires at 1245162841000000000 ns, actually the
hrtimer expires at 1245162841010241858 ns, so it is delayed by
1245162841010241858- 1245162841000000000 = 10241858 ns.
Changelog v1->v2:
1: Remove ktime_to_ns() in TP_fast_assign()
2: Combine debugobjects and trace as Thomas's suggestion
Signed-off-by: Xiao Guangrong <[email protected]>
---
include/trace/events/timer.h | 90 ++++++++++++++++++++++++++++++++++++++++++
kernel/hrtimer.c | 38 +++++++++++++++--
2 files changed, 123 insertions(+), 5 deletions(-)
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index e9c51ee..8ffb2e2 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -192,6 +192,96 @@ TRACE_EVENT(timer_cancel,
TP_printk("timer=%p func=%pf", __entry->timer, __entry->function)
);
+TRACE_EVENT(hrtimer_init,
+
+ TP_PROTO(struct hrtimer *timer, clockid_t clockid, enum hrtimer_mode mode),
+
+ TP_ARGS(timer, clockid, mode),
+
+ TP_STRUCT__entry(
+ __field( void *, timer )
+ __field( clockid_t, clockid )
+ __field( enum hrtimer_mode, mode )
+ ),
+
+ TP_fast_assign(
+ __entry->timer = timer;
+ __entry->clockid = clockid;
+ __entry->mode = mode;
+ ),
+
+ TP_printk("timer=%p clockid=%s mode=%s", __entry->timer,
+ __entry->clockid == CLOCK_REALTIME ?
+ "CLOCK_REALTIME" : "CLOCK_MONOTONIC",
+ __entry->mode == HRTIMER_MODE_ABS ?
+ "HRTIMER_MODE_ABS" : "HRTIMER_MODE_REL")
+);
+
+TRACE_EVENT(hrtimer_start,
+
+ TP_PROTO(struct hrtimer *timer),
+
+ TP_ARGS(timer),
+
+ TP_STRUCT__entry(
+ __field( void *, timer )
+ __field( void *, function )
+ __field( s64, expires )
+ __field( s64, softexpires )
+ ),
+
+ TP_fast_assign(
+ __entry->timer = timer;
+ __entry->function = timer->function;
+ __entry->expires = hrtimer_get_expires(timer).tv64;
+ __entry->softexpires = hrtimer_get_softexpires(timer).tv64;
+ ),
+
+ TP_printk("timer=%p func=%pf expires=%llu ns softexpires=%llu ns",
+ __entry->timer, __entry->function,
+ (unsigned long long)__entry->expires,
+ (unsigned long long)__entry->softexpires)
+);
+
+TRACE_EVENT(hrtimer_expire,
+
+ TP_PROTO(struct hrtimer *timer, s64 now),
+
+ TP_ARGS(timer, now),
+
+ TP_STRUCT__entry(
+ __field( void *, timer )
+ __field( void *, function )
+ __field( s64, now )
+ ),
+
+ TP_fast_assign(
+ __entry->timer = timer;
+ __entry->function = timer->function;
+ __entry->now = now;
+ ),
+
+ TP_printk("timer=%p func=%pf now=%llu", __entry->timer, __entry->function,
+ (unsigned long long)__entry->now)
+);
+
+TRACE_EVENT(hrtimer_cancel,
+
+ TP_PROTO(struct hrtimer *timer),
+ TP_ARGS(timer),
+ TP_STRUCT__entry(
+ __field( void *, timer )
+ __field( void *, function )
+ ),
+
+ TP_fast_assign(
+ __entry->timer = timer;
+ __entry->function = timer->function;
+ ),
+
+ TP_printk("timer=%p func=%pf", __entry->timer, __entry->function)
+);
+
#endif /* _TRACE_TIMER_H */
/* This part must be outside protection */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 085b007..b8947e8 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -493,6 +493,32 @@ static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
#endif
+static inline void
+debug_and_trace_hrtimer_init(struct hrtimer *timer, clockid_t clockid,
+ enum hrtimer_mode mode)
+{
+ debug_hrtimer_init(timer);
+ trace_hrtimer_init(timer, clockid, mode);
+}
+
+static inline void debug_and_trace_hrtimer_activate(struct hrtimer *timer)
+{
+ debug_hrtimer_activate(timer);
+ trace_hrtimer_start(timer);
+}
+
+static inline void debug_and_trace_hrtimer_expire(struct hrtimer *timer,
+ s64 now)
+{
+ trace_hrtimer_expire(timer, now);
+}
+
+static inline void debug_and_trace_hrtimer_deactivate(struct hrtimer *timer)
+{
+ debug_hrtimer_deactivate(timer);
+ trace_hrtimer_cancel(timer);
+}
+
/* High resolution timer related functions */
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -849,7 +875,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,
struct hrtimer *entry;
int leftmost = 1;
- debug_hrtimer_activate(timer);
+ debug_and_trace_hrtimer_activate(timer);
/*
* Find the right place in the rbtree:
@@ -935,7 +961,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
* reprogramming happens in the interrupt handler. This is a
* rare case and less expensive than a smp call.
*/
- debug_hrtimer_deactivate(timer);
+ debug_and_trace_hrtimer_deactivate(timer);
timer_stats_hrtimer_clear_start_info(timer);
reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE,
@@ -1169,7 +1195,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
enum hrtimer_mode mode)
{
- debug_hrtimer_init(timer);
+ debug_and_trace_hrtimer_init(timer, clock_id, mode);
__hrtimer_init(timer, clock_id, mode);
}
EXPORT_SYMBOL_GPL(hrtimer_init);
@@ -1202,7 +1228,7 @@ static void __run_hrtimer(struct hrtimer *timer)
WARN_ON(!irqs_disabled());
- debug_hrtimer_deactivate(timer);
+ debug_and_trace_hrtimer_deactivate(timer);
__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
timer_stats_account_hrtimer(timer);
fn = timer->function;
@@ -1318,6 +1344,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
break;
}
+ debug_and_trace_hrtimer_expire(timer, basenow.tv64);
__run_hrtimer(timer);
}
spin_unlock(&cpu_base->lock);
@@ -1436,6 +1463,7 @@ void hrtimer_run_queues(void)
hrtimer_get_expires_tv64(timer))
break;
+ debug_and_trace_hrtimer_expire(timer, base->softirq_time.tv64);
__run_hrtimer(timer);
}
spin_unlock(&cpu_base->lock);
@@ -1612,7 +1640,7 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
while ((node = rb_first(&old_base->active))) {
timer = rb_entry(node, struct hrtimer, node);
BUG_ON(hrtimer_callback_running(timer));
- debug_hrtimer_deactivate(timer);
+ debug_and_trace_hrtimer_deactivate(timer);
/*
* Mark it as STATE_MIGRATE not INACTIVE otherwise the
--
1.6.1.2
Those tracepoints are wanted and useful:
1: We can detect a itimer's delay
2: We can monitor the lifecycle and behaviors of a itimer
Thus they help in analysing and debuging.
Great thanks to Thomas for giving me so many valuable advices.
There have two case:
case 1, if we use ITIMER_REAL itimer:
Example ftrace output:
main-1649 [001] 1674.034138: hrtimer_start: timer=ce97d3a4 func=it_real_fn expires=1679993510723 ns softexpires=1679993510723 ns
main-1649 [001] 1674.034140: itimer_state: task=main which=0 state:start timer=ce97d3a4 expires=0 it_value=2.0 it_interval=0.500000
main-1649 [001] 1676.029184: hrtimer_expire: timer=ce97d3a4 func=it_real_fn now=1679993511197
main-1649 [001] 1676.029185: hrtimer_cancel: timer=ce97d3a4 func=it_real_fn
main-1649 [001] 1676.029190: itimer_expire: task=main which=0 now=0
main-1649 [001] 1676.034744: itimer_state: task=main which=0 state:cancel timer=ce97d3a4 expires=0 it_value=0.0 it_interval=0.0
ITIMER_REAL itime use hrtimer to calculate time, so we can cooperate with
hrtimer's tracepoint to get it's delay.
from this output information, the process set up a ITIMER_REAL
iteimr(which=0), use a hrtime which address is ce97d3a4, we can get the
delay is 1679993511197-1679993510723 = 474 ns
case 2, if we use ITIMER_VIRTUAL/ITIMER_PROF itimer:
Example ftrace output:
main-1600 [000] 524.081367: itimer_state: task=main which=2 state:start timer=(null) expires=2008 it_value=2.0 it_interval=0.500000
main-1600 [000] 526.084820: itimer_expire: task=main which=2 now=2008
main-1600 [000] 526.088912: itimer_state: task=main which=2 state:cancel timer=(null) expires=0 it_value=0.0 it_interval=0.0
from this, we can know the itimer delay is 2008-2008=0 jiffies.
Changelog v1->v2:
1: Remove ktime_to_ns() in TP_fast_assign()
2: Combine debugobjects and trace as Thomas's suggestion
Signed-off-by: Xiao Guangrong <[email protected]>
---
include/trace/events/timer.h | 66 ++++++++++++++++++++++++++++++++++++++++++
kernel/itimer.c | 3 ++
kernel/posix-cpu-timers.c | 3 ++
3 files changed, 72 insertions(+), 0 deletions(-)
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 8ffb2e2..66d33e1 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -4,6 +4,7 @@
#include <linux/tracepoint.h>
#include <linux/timer.h>
#include <linux/hrtimer.h>
+#include <linux/time.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM timer
@@ -282,6 +283,71 @@ TRACE_EVENT(hrtimer_cancel,
TP_printk("timer=%p func=%pf", __entry->timer, __entry->function)
);
+TRACE_EVENT(itimer_state,
+
+ TP_PROTO(int which, struct itimerval *value, cputime_t expires),
+
+ TP_ARGS(which, value, expires),
+
+ TP_STRUCT__entry(
+ __field( int, which )
+ __field( void *, timer )
+ __field( int, state )
+ __field( cputime_t, expires )
+ __field( long, value_sec )
+ __field( long, value_usec )
+ __field( long, interval_sec )
+ __field( long, interval_usec )
+ __string( comm, current->comm )
+ ),
+
+ TP_fast_assign(
+ __entry->which = which;
+ __entry->timer = which == ITIMER_REAL ?
+ ¤t->signal->real_timer : NULL;
+ __entry->state = (value->it_value.tv_sec != 0 ||
+ value->it_value.tv_usec != 0) ? 1 : 0;
+ __entry->expires = which == ITIMER_REAL ? 0 : expires;
+ __entry->value_sec = value->it_value.tv_sec;
+ __entry->value_usec = value->it_value.tv_usec;
+ __entry->interval_sec = value->it_interval.tv_sec;
+ __entry->interval_usec = value->it_interval.tv_usec;
+ __assign_str(comm, current->comm);
+ ),
+
+ TP_printk("task=%s which=%d state:%s timer=%p expires=%lu "
+ "it_value=%lu.%lu it_interval=%lu.%lu",
+ __get_str(comm), __entry->which,
+ __entry->state ? "start" : "cancel", __entry->timer,
+ __entry->expires, __entry->value_sec, __entry->value_usec,
+ __entry->interval_sec, __entry->interval_usec)
+);
+
+TRACE_EVENT(itimer_expire,
+
+ TP_PROTO(int which, struct task_struct *task, struct pid *pid, cputime_t now),
+
+ TP_ARGS(which, task, pid, now),
+
+ TP_STRUCT__entry(
+ __field( int , which )
+ __field( cputime_t, now )
+ __array( char, comm, TASK_COMM_LEN )
+ ),
+
+ TP_fast_assign(
+ __entry->which = which;
+ __entry->now = now;
+ if (pid)
+ strcpy(__entry->comm, pid_task(pid, PIDTYPE_PID)->comm);
+ else
+ strcpy(__entry->comm, task->comm);
+ ),
+
+ TP_printk("task=%s which=%d now=%lu", __entry->comm, __entry->which,
+ __entry->now)
+);
+
#endif /* _TRACE_TIMER_H */
/* This part must be outside protection */
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 58762f7..ee3a810 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -12,6 +12,7 @@
#include <linux/time.h>
#include <linux/posix-timers.h>
#include <linux/hrtimer.h>
+#include <trace/events/timer.h>
#include <asm/uaccess.h>
@@ -123,6 +124,7 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
struct signal_struct *sig =
container_of(timer, struct signal_struct, real_timer);
+ trace_itimer_expire(ITIMER_REAL, NULL, sig->leader_pid, 0);
kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
return HRTIMER_NORESTART;
@@ -220,6 +222,7 @@ again:
default:
return -EINVAL;
}
+ trace_itimer_state(which, value, nval);
return 0;
}
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index bece7c0..66d0e28 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -8,6 +8,7 @@
#include <linux/math64.h>
#include <asm/uaccess.h>
#include <linux/kernel_stat.h>
+#include <trace/events/timer.h>
/*
* Called after updating RLIMIT_CPU to set timer expiration if necessary.
@@ -1160,6 +1161,7 @@ static void check_process_timers(struct task_struct *tsk,
sig->it_prof_expires = cputime_add(
sig->it_prof_expires, ptime);
}
+ trace_itimer_expire(ITIMER_PROF, tsk, NULL, ptime);
__group_send_sig_info(SIGPROF, SEND_SIG_PRIV, tsk);
}
if (!cputime_eq(sig->it_prof_expires, cputime_zero) &&
@@ -1176,6 +1178,7 @@ static void check_process_timers(struct task_struct *tsk,
sig->it_virt_expires = cputime_add(
sig->it_virt_expires, utime);
}
+ trace_itimer_expire(ITIMER_VIRTUAL, tsk, NULL, utime);
__group_send_sig_info(SIGVTALRM, SEND_SIG_PRIV, tsk);
}
if (!cputime_eq(sig->it_virt_expires, cputime_zero) &&
--
1.6.1.2
* Xiao Guangrong ([email protected]) wrote:
> Those tracepoints are wanted and useful:
> 1: We can detect a itimer's delay
> 2: We can monitor the lifecycle and behaviors of a itimer
>
> Thus they help in analysing and debuging.
>
> Great thanks to Thomas for giving me so many valuable advices.
>
> There have two case:
> case 1, if we use ITIMER_REAL itimer:
> Example ftrace output:
> main-1649 [001] 1674.034138: hrtimer_start: timer=ce97d3a4 func=it_real_fn expires=1679993510723 ns softexpires=1679993510723 ns
> main-1649 [001] 1674.034140: itimer_state: task=main which=0 state:start timer=ce97d3a4 expires=0 it_value=2.0 it_interval=0.500000
> main-1649 [001] 1676.029184: hrtimer_expire: timer=ce97d3a4 func=it_real_fn now=1679993511197
> main-1649 [001] 1676.029185: hrtimer_cancel: timer=ce97d3a4 func=it_real_fn
> main-1649 [001] 1676.029190: itimer_expire: task=main which=0 now=0
> main-1649 [001] 1676.034744: itimer_state: task=main which=0 state:cancel timer=ce97d3a4 expires=0 it_value=0.0 it_interval=0.0
>
> ITIMER_REAL itime use hrtimer to calculate time, so we can cooperate with
> hrtimer's tracepoint to get it's delay.
>
> from this output information, the process set up a ITIMER_REAL
> iteimr(which=0), use a hrtime which address is ce97d3a4, we can get the
> delay is 1679993511197-1679993510723 = 474 ns
>
> case 2, if we use ITIMER_VIRTUAL/ITIMER_PROF itimer:
> Example ftrace output:
> main-1600 [000] 524.081367: itimer_state: task=main which=2 state:start timer=(null) expires=2008 it_value=2.0 it_interval=0.500000
> main-1600 [000] 526.084820: itimer_expire: task=main which=2 now=2008
> main-1600 [000] 526.088912: itimer_state: task=main which=2 state:cancel timer=(null) expires=0 it_value=0.0 it_interval=0.0
>
> from this, we can know the itimer delay is 2008-2008=0 jiffies.
>
> Changelog v1->v2:
> 1: Remove ktime_to_ns() in TP_fast_assign()
> 2: Combine debugobjects and trace as Thomas's suggestion
>
> Signed-off-by: Xiao Guangrong <[email protected]>
> ---
> include/trace/events/timer.h | 66 ++++++++++++++++++++++++++++++++++++++++++
> kernel/itimer.c | 3 ++
> kernel/posix-cpu-timers.c | 3 ++
> 3 files changed, 72 insertions(+), 0 deletions(-)
>
> diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
> index 8ffb2e2..66d33e1 100644
> --- a/include/trace/events/timer.h
> +++ b/include/trace/events/timer.h
> @@ -4,6 +4,7 @@
> #include <linux/tracepoint.h>
> #include <linux/timer.h>
> #include <linux/hrtimer.h>
> +#include <linux/time.h>
>
> #undef TRACE_SYSTEM
> #define TRACE_SYSTEM timer
> @@ -282,6 +283,71 @@ TRACE_EVENT(hrtimer_cancel,
> TP_printk("timer=%p func=%pf", __entry->timer, __entry->function)
> );
>
> +TRACE_EVENT(itimer_state,
> +
Hrm, I'd reserve the use of the *_state namespace for something
different if possible.
In LTTng, we have the ability to list a set of data structures at trace
start (we call this state dump). In this case, we could add a list of
all active timers at trace start. This is useful if the trace started
after timer inits.
So having "_state" without much meaning here might eventually be
confusing.
"_action" or "_call" (like IPCs) might be more appropriate ?
Mathieu
> + TP_PROTO(int which, struct itimerval *value, cputime_t expires),
> +
> + TP_ARGS(which, value, expires),
> +
> + TP_STRUCT__entry(
> + __field( int, which )
> + __field( void *, timer )
> + __field( int, state )
> + __field( cputime_t, expires )
> + __field( long, value_sec )
> + __field( long, value_usec )
> + __field( long, interval_sec )
> + __field( long, interval_usec )
> + __string( comm, current->comm )
> + ),
> +
> + TP_fast_assign(
> + __entry->which = which;
> + __entry->timer = which == ITIMER_REAL ?
> + ¤t->signal->real_timer : NULL;
> + __entry->state = (value->it_value.tv_sec != 0 ||
> + value->it_value.tv_usec != 0) ? 1 : 0;
> + __entry->expires = which == ITIMER_REAL ? 0 : expires;
> + __entry->value_sec = value->it_value.tv_sec;
> + __entry->value_usec = value->it_value.tv_usec;
> + __entry->interval_sec = value->it_interval.tv_sec;
> + __entry->interval_usec = value->it_interval.tv_usec;
> + __assign_str(comm, current->comm);
> + ),
> +
> + TP_printk("task=%s which=%d state:%s timer=%p expires=%lu "
> + "it_value=%lu.%lu it_interval=%lu.%lu",
> + __get_str(comm), __entry->which,
> + __entry->state ? "start" : "cancel", __entry->timer,
> + __entry->expires, __entry->value_sec, __entry->value_usec,
> + __entry->interval_sec, __entry->interval_usec)
> +);
> +
> +TRACE_EVENT(itimer_expire,
> +
> + TP_PROTO(int which, struct task_struct *task, struct pid *pid, cputime_t now),
> +
> + TP_ARGS(which, task, pid, now),
> +
> + TP_STRUCT__entry(
> + __field( int , which )
> + __field( cputime_t, now )
> + __array( char, comm, TASK_COMM_LEN )
> + ),
> +
> + TP_fast_assign(
> + __entry->which = which;
> + __entry->now = now;
> + if (pid)
> + strcpy(__entry->comm, pid_task(pid, PIDTYPE_PID)->comm);
> + else
> + strcpy(__entry->comm, task->comm);
> + ),
> +
> + TP_printk("task=%s which=%d now=%lu", __entry->comm, __entry->which,
> + __entry->now)
> +);
> +
> #endif /* _TRACE_TIMER_H */
>
> /* This part must be outside protection */
> diff --git a/kernel/itimer.c b/kernel/itimer.c
> index 58762f7..ee3a810 100644
> --- a/kernel/itimer.c
> +++ b/kernel/itimer.c
> @@ -12,6 +12,7 @@
> #include <linux/time.h>
> #include <linux/posix-timers.h>
> #include <linux/hrtimer.h>
> +#include <trace/events/timer.h>
>
> #include <asm/uaccess.h>
>
> @@ -123,6 +124,7 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
> struct signal_struct *sig =
> container_of(timer, struct signal_struct, real_timer);
>
> + trace_itimer_expire(ITIMER_REAL, NULL, sig->leader_pid, 0);
> kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
>
> return HRTIMER_NORESTART;
> @@ -220,6 +222,7 @@ again:
> default:
> return -EINVAL;
> }
> + trace_itimer_state(which, value, nval);
> return 0;
> }
>
> diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
> index bece7c0..66d0e28 100644
> --- a/kernel/posix-cpu-timers.c
> +++ b/kernel/posix-cpu-timers.c
> @@ -8,6 +8,7 @@
> #include <linux/math64.h>
> #include <asm/uaccess.h>
> #include <linux/kernel_stat.h>
> +#include <trace/events/timer.h>
>
> /*
> * Called after updating RLIMIT_CPU to set timer expiration if necessary.
> @@ -1160,6 +1161,7 @@ static void check_process_timers(struct task_struct *tsk,
> sig->it_prof_expires = cputime_add(
> sig->it_prof_expires, ptime);
> }
> + trace_itimer_expire(ITIMER_PROF, tsk, NULL, ptime);
> __group_send_sig_info(SIGPROF, SEND_SIG_PRIV, tsk);
> }
> if (!cputime_eq(sig->it_prof_expires, cputime_zero) &&
> @@ -1176,6 +1178,7 @@ static void check_process_timers(struct task_struct *tsk,
> sig->it_virt_expires = cputime_add(
> sig->it_virt_expires, utime);
> }
> + trace_itimer_expire(ITIMER_VIRTUAL, tsk, NULL, utime);
> __group_send_sig_info(SIGVTALRM, SEND_SIG_PRIV, tsk);
> }
> if (!cputime_eq(sig->it_virt_expires, cputime_zero) &&
> --
> 1.6.1.2
>
>
--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
On Mon, 6 Jul 2009, Xiao Guangrong wrote:
> +TRACE_EVENT(itimer_state,
> +
> + TP_PROTO(int which, struct itimerval *value, cputime_t expires),
> +
> + TP_ARGS(which, value, expires),
> +
> + TP_STRUCT__entry(
> + __field( int, which )
> + __field( void *, timer )
> + __field( int, state )
> + __field( cputime_t, expires )
> + __field( long, value_sec )
> + __field( long, value_usec )
> + __field( long, interval_sec )
> + __field( long, interval_usec )
> + __string( comm, current->comm )
> + ),
> +
> + TP_fast_assign(
> + __entry->which = which;
> + __entry->timer = which == ITIMER_REAL ?
> + ¤t->signal->real_timer : NULL;
Why do we need this information at all ? There is no value in the
address of real_timer AFAICT.
> + __entry->state = (value->it_value.tv_sec != 0 ||
> + value->it_value.tv_usec != 0) ? 1 : 0;
Redundant information. You store the values anyway.
> + __entry->expires = which == ITIMER_REAL ? 0 : expires;
Why not initializing "nval" in do_setitimer() to 0 and store it
unconditionally in the trace.
> + __entry->value_sec = value->it_value.tv_sec;
> + __entry->value_usec = value->it_value.tv_usec;
> + __entry->interval_sec = value->it_interval.tv_sec;
> + __entry->interval_usec = value->it_interval.tv_usec;
> + __assign_str(comm, current->comm);
The tracer is recording current->comm already, so where is the value
of another instance ?
> +TRACE_EVENT(itimer_expire,
> +
> + TP_PROTO(int which, struct task_struct *task, struct pid *pid, cputime_t now),
> +
> + TP_ARGS(which, task, pid, now),
> +
> + TP_STRUCT__entry(
> + __field( int , which )
> + __field( cputime_t, now )
> + __array( char, comm, TASK_COMM_LEN )
> + ),
> +
> + TP_fast_assign(
> + __entry->which = which;
> + __entry->now = now;
> + if (pid)
> + strcpy(__entry->comm, pid_task(pid, PIDTYPE_PID)->comm);
> + else
> + strcpy(__entry->comm, task->comm);
It's sufficient to store task->pid.
Thanks,
tglx
On Mon, 6 Jul 2009, Mathieu Desnoyers wrote:
> * Xiao Guangrong ([email protected]) wrote:
> > +
> > +TRACE_EVENT(itimer_state,
> > +
>
> Hrm, I'd reserve the use of the *_state namespace for something
> different if possible.
See below.
> In LTTng, we have the ability to list a set of data structures at trace
> start (we call this state dump). In this case, we could add a list of
> all active timers at trace start. This is useful if the trace started
> after timer inits.
Can you please spare us the gory details of LTTng. We already know
that it can do everything plus brewing coffee, so no need to advertise
it in every tracing related mail.
> So having "_state" without much meaning here might eventually be
> confusing.
>
> "_action" or "_call" (like IPCs) might be more appropriate ?
Err, where is this an action or a call ? Care to look at the call site
of this tracepoint. It's at the end of do_setitimer() and records the
_state_ of the itimer _after_ the syscall was handled.
> Mathieu
Can you please remove the hundred lines you are not commenting on next
time ?
> > + TP_PROTO(int which, struct itimerval *value, cputime_t expires),
Thanks,
tglx
On Mon, 6 Jul 2009, Xiao Guangrong wrote:
> Changelog v1->v2:
> 1: Add jiffies info into timer
> 2: Combine debugobjects and trace as Thomas's suggestion
I like it that way. Some minor nitpicks below.
> +TRACE_EVENT(timer_expire,
> +
> + TP_PROTO(struct timer_list *timer),
> +
> + TP_ARGS(timer),
> +
> + TP_STRUCT__entry(
> + __field( void *, timer )
> + __field( void *, function )
Do we really need the function here? It does not change between
start and expiry.
> +TRACE_EVENT(timer_cancel,
> +
> + TP_PROTO(struct timer_list *timer),
> +
> + TP_ARGS(timer),
> +
> + TP_STRUCT__entry(
> + __field( void *, timer )
> + __field( void *, function )
Ditto.
> +
> +static inline void debug_and_trace_timer_expire(struct timer_list *timer)
Technically not really necessary. The debug code does not care about
it.
> timer = list_first_entry(head, struct timer_list,entry);
> + debug_and_trace_timer_expire(timer);
> fn = timer->function;
> data = timer->data;
>
Can you please add the tracepoint which was suggested by Anton (IIRC)
which allows us to measure the execution time of the callback function
? There is no need to dereference timer in the expiry tracepoint, we
just store the pointer. So we can safely move the tracepoints just
around the function call.
trace_timer_expire(timer);
fn(data);
trace_timer_callback_done(timer);
Or whatever fancy name you come up with.
Thanks,
tglx
On Mon, 6 Jul 2009, Xiao Guangrong wrote:
> +TRACE_EVENT(hrtimer_expire,
> +
> + TP_PROTO(struct hrtimer *timer, s64 now),
> +
> + TP_ARGS(timer, now),
> +
> + TP_STRUCT__entry(
> + __field( void *, timer )
> + __field( void *, function )
Again, function is not necessary here.
> + __field( s64, now )
> + ),
> +
> + TP_fast_assign(
> + __entry->timer = timer;
> + __entry->function = timer->function;
> + __entry->now = now;
> + ),
> +
> + TP_printk("timer=%p func=%pf now=%llu", __entry->timer, __entry->function,
> + (unsigned long long)__entry->now)
> +);
> +
> +TRACE_EVENT(hrtimer_cancel,
> +
> + TP_PROTO(struct hrtimer *timer),
> + TP_ARGS(timer),
> + TP_STRUCT__entry(
> + __field( void *, timer )
> + __field( void *, function )
Ditto.
> +static inline void debug_and_trace_hrtimer_expire(struct hrtimer *timer,
> + s64 now)
Technically not necessary.
>
> + debug_and_trace_hrtimer_expire(timer, basenow.tv64);
> __run_hrtimer(timer);
Here I'd like to see a callback_done entry as well, so we can check
for long lasting callbacks.
> }
> spin_unlock(&cpu_base->lock);
> @@ -1436,6 +1463,7 @@ void hrtimer_run_queues(void)
> hrtimer_get_expires_tv64(timer))
> break;
>
> + debug_and_trace_hrtimer_expire(timer, base->softirq_time.tv64);
> __run_hrtimer(timer);
Ditto.
Thanks,
tglx
Thomas Gleixner wrote:
> On Mon, 6 Jul 2009, Xiao Guangrong wrote:
>> Changelog v1->v2:
>> 1: Add jiffies info into timer
>> 2: Combine debugobjects and trace as Thomas's suggestion
>
> I like it that way. Some minor nitpicks below.
>
>> +TRACE_EVENT(timer_expire,
>> +
>> + TP_PROTO(struct timer_list *timer),
>> +
>> + TP_ARGS(timer),
>> +
>> + TP_STRUCT__entry(
>> + __field( void *, timer )
>> + __field( void *, function )
>
> Do we really need the function here? It does not change between
> start and expiry.
>
Yeah, I'll fix.
>> +TRACE_EVENT(timer_cancel,
>> +
>> + TP_PROTO(struct timer_list *timer),
>> +
>> + TP_ARGS(timer),
>> +
>> + TP_STRUCT__entry(
>> + __field( void *, timer )
>> + __field( void *, function )
>
> Ditto.
>
Will fix.
>> +
>> +static inline void debug_and_trace_timer_expire(struct timer_list *timer)
>
> Technically not really necessary. The debug code does not care about
> it.
>
Will fix.
>> timer = list_first_entry(head, struct timer_list,entry);
>> + debug_and_trace_timer_expire(timer);
>> fn = timer->function;
>> data = timer->data;
>>
>
> Can you please add the tracepoint which was suggested by Anton (IIRC)
> which allows us to measure the execution time of the callback function
> ? There is no need to dereference timer in the expiry tracepoint, we
> just store the pointer. So we can safely move the tracepoints just
> around the function call.
>
> trace_timer_expire(timer);
> fn(data);
> trace_timer_callback_done(timer);
>
> Or whatever fancy name you come up with.
>
OK, I'll fix it base on your suggestion.
trace_timer_expire/trace_timer_callback_done are already in Frederic' tree
but with different name, see below URL:
http://marc.info/?l=linux-kernel&m=124331396919301&w=2
I'll modify those tracepoint's name to match ours.
Thank you very much!
Xiao
>
> tglx
>
>
Hi Thomas,
Thanks for your review.
Thomas Gleixner wrote:
> On Mon, 6 Jul 2009, Xiao Guangrong wrote:
>> +TRACE_EVENT(itimer_state,
>> +
>> + TP_PROTO(int which, struct itimerval *value, cputime_t expires),
>> +
>> + TP_ARGS(which, value, expires),
>> +
>> + TP_STRUCT__entry(
>> + __field( int, which )
>> + __field( void *, timer )
>> + __field( int, state )
>> + __field( cputime_t, expires )
>> + __field( long, value_sec )
>> + __field( long, value_usec )
>> + __field( long, interval_sec )
>> + __field( long, interval_usec )
>> + __string( comm, current->comm )
>> + ),
>> +
>> + TP_fast_assign(
>> + __entry->which = which;
>> + __entry->timer = which == ITIMER_REAL ?
>> + ¤t->signal->real_timer : NULL;
>
> Why do we need this information at all ? There is no value in the
> address of real_timer AFAICT.
>
Thanks for your review. I'll send a new patch with your comments addressed.
* Thomas Gleixner ([email protected]) wrote:
> On Mon, 6 Jul 2009, Mathieu Desnoyers wrote:
> > * Xiao Guangrong ([email protected]) wrote:
> > > +
> > > +TRACE_EVENT(itimer_state,
> > > +
> >
> > Hrm, I'd reserve the use of the *_state namespace for something
> > different if possible.
>
> See below.
>
> > In LTTng, we have the ability to list a set of data structures at trace
> > start (we call this state dump). In this case, we could add a list of
> > all active timers at trace start. This is useful if the trace started
> > after timer inits.
>
> Can you please spare us the gory details of LTTng. We already know
> that it can do everything plus brewing coffee, so no need to advertise
> it in every tracing related mail.
>
> > So having "_state" without much meaning here might eventually be
> > confusing.
> >
> > "_action" or "_call" (like IPCs) might be more appropriate ?
>
> Err, where is this an action or a call ? Care to look at the call site
> of this tracepoint. It's at the end of do_setitimer() and records the
> _state_ of the itimer _after_ the syscall was handled.
>
Hrm, right. The patch context misled me. I thought it was at the end of
it_real_fn(), but it's actually in do_setitimer().
OK, looking at it from it's actual context, it makes perfect sense to
name it that way.
And it could even be used as-is to iterate on all timers at trace start
as we do in this tracer I won't name. ;-)
BTW, I was merely pointing out the risk of a name clash here. And I
had to refer to the tree where this piece of code exists to support the
point. My intent was not to advertise anything.
> > Mathieu
>
> Can you please remove the hundred lines you are not commenting on next
> time ?
>
OK,
Thanks,
Mathieu
> > > + TP_PROTO(int which, struct itimerval *value, cputime_t expires),
>
> Thanks,
>
> tglx
--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
Mathieu,
On Tue, 7 Jul 2009, Mathieu Desnoyers wrote:
> > Err, where is this an action or a call ? Care to look at the call site
> > of this tracepoint. It's at the end of do_setitimer() and records the
> > _state_ of the itimer _after_ the syscall was handled.
> >
>
> Hrm, right. The patch context misled me. I thought it was at the end of
> it_real_fn(), but it's actually in do_setitimer().
>
> OK, looking at it from it's actual context, it makes perfect sense to
> name it that way.
>
> And it could even be used as-is to iterate on all timers at trace start
> as we do in this tracer I won't name. ;-)
:)
> BTW, I was merely pointing out the risk of a name clash here. And I
> had to refer to the tree where this piece of code exists to support the
> point. My intent was not to advertise anything.
While I really do not care much about potential name clashes with out
of tree code, looking at your wrong analysis of the patch and your
mentioning of that unnamed tracer triggered my rant button :)
tglx
On Tue, Jul 07, 2009 at 05:28:18PM +0800, Xiao Guangrong wrote:
>
>
> Thomas Gleixner wrote:
> > On Mon, 6 Jul 2009, Xiao Guangrong wrote:
> >> Changelog v1->v2:
> >> 1: Add jiffies info into timer
> >> 2: Combine debugobjects and trace as Thomas's suggestion
> >
> > I like it that way. Some minor nitpicks below.
> >
> >> +TRACE_EVENT(timer_expire,
> >> +
> >> + TP_PROTO(struct timer_list *timer),
> >> +
> >> + TP_ARGS(timer),
> >> +
> >> + TP_STRUCT__entry(
> >> + __field( void *, timer )
> >> + __field( void *, function )
> >
> > Do we really need the function here? It does not change between
> > start and expiry.
> >
>
> Yeah, I'll fix.
>
> >> +TRACE_EVENT(timer_cancel,
> >> +
> >> + TP_PROTO(struct timer_list *timer),
> >> +
> >> + TP_ARGS(timer),
> >> +
> >> + TP_STRUCT__entry(
> >> + __field( void *, timer )
> >> + __field( void *, function )
> >
> > Ditto.
> >
>
> Will fix.
>
> >> +
> >> +static inline void debug_and_trace_timer_expire(struct timer_list *timer)
> >
> > Technically not really necessary. The debug code does not care about
> > it.
> >
>
> Will fix.
>
> >> timer = list_first_entry(head, struct timer_list,entry);
> >> + debug_and_trace_timer_expire(timer);
> >> fn = timer->function;
> >> data = timer->data;
> >>
> >
> > Can you please add the tracepoint which was suggested by Anton (IIRC)
> > which allows us to measure the execution time of the callback function
> > ? There is no need to dereference timer in the expiry tracepoint, we
> > just store the pointer. So we can safely move the tracepoints just
> > around the function call.
> >
> > trace_timer_expire(timer);
> > fn(data);
> > trace_timer_callback_done(timer);
> >
> > Or whatever fancy name you come up with.
> >
>
> OK, I'll fix it base on your suggestion.
>
> trace_timer_expire/trace_timer_callback_done are already in Frederic' tree
> but with different name, see below URL:
> http://marc.info/?l=linux-kernel&m=124331396919301&w=2
>
> I'll modify those tracepoint's name to match ours.
>
> Thank you very much!
>
> Xiao
If you wish so, I can gather your patches with the one of Anton Blanchard
and send a pull request once it's acked.
Thanks,
Frederic.
Hi Thomas,
Thomas Gleixner wrote:
>
> Here I'd like to see a callback_done entry as well, so we can check
> for long lasting callbacks.
>
I am trying to address your comment that move trace_timer_expires()
form hrtimer_interrupt()/hrtimer_run_queues() to __run_hrtimer() to
trace the execution time of hrtimer's callback, like below:
__run_hrtimer()
{
.....
+ trace_hrtimer_expires();
restart = fn(timer);
+ trace_hrtimer_callback_done();
......
}
But I meet a problem here that I can't get the timestamps when hrtimer
expires in __run_hrtimer(), which is used to calculate the latency of
hrtimer. I think it's Ok to not move trace_hrtimer_expires(), that is:
hrtimer_interrupt()
{
......
+ trace_hrtimer_expires();
__run_hrtimer(timer);
......
}
__run_hrtimer()
{
......
restart = fn(timer);
+ trace_hrtimer_callback_done();
......
}
But if more accurate execution time is a require, we'd better use the
tracepoints added by Anton Blanchard <[email protected]>. That is:
hrtimer_interrupt()
{
......
+ trace_hrtimer_expires();
__run_hrtimer(timer);
......
}
__run_hrtimer()
{
......
+ trace_hrtimer_callback_entry()
restart = fn(timer);
+ trace_hrtimer_callback_exit();
......
}
Thanks,
Xiao