We encounted one BUG_ON() issue at function __run_hrtimer(),
but the panic info is not enough to find out which hrtimer
users use the hrtimer wrongly.
(in this BUG_ON case, it is callback running at the same time
hrtimer_start() is executed on different context.)
We can print basic timer info before BUG(), it is easy to find
out which hrtimer user has fault, the output info is like below:
<3>[ 41.118552] timer info: state/3, func/test_timer_cb
<3>[ 41.118618] timer stats: pid/786(sh), site/hrtimer_start
<0>[ 41.118670] ------------[ cut here ]------------
<2>[ 41.118691] kernel BUG at /root/r4_code/hardware/intel/linux-2.6/kernel/hrtimer.c:1260!
<0>[ 41.118715] invalid opcode: 0000 [#1] PREEMPT SMP
The infos of callback/state are very helpful for debugging this BUG_ON().
Signed-off-by: Li Fei <[email protected]>
Signed-off-by: liu chuansheng <[email protected]>
---
kernel/hrtimer.c | 34 +++++++++++++++++++++++++++++++++-
1 files changed, 33 insertions(+), 1 deletions(-)
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 6db7a5e..388e8b2 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1204,6 +1204,35 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
}
EXPORT_SYMBOL_GPL(hrtimer_get_res);
+/*
+ * dump_hrtimer_callinfo - print hrtimer information including:
+ * state, callback function, pid and start_site.
+*/
+static void dump_hrtimer_callinfo(struct hrtimer *timer)
+{
+
+ char symname[KSYM_NAME_LEN];
+
+ if (lookup_symbol_name((unsigned long)(timer->function), symname) < 0) {
+ pr_err("timer info: state/%lx, func/%pK\n",
+ timer->state, timer->function);
+ } else {
+ pr_err("timer info: state/%lx, func/%s",
+ timer->state, symname);
+ }
+
+#ifdef CONFIG_TIMER_STATS
+ if (lookup_symbol_name((unsigned long)(timer->start_site),
+ symname) < 0) {
+ pr_err("timer stats: pid/%d(%s), site/%pK\n",
+ timer->start_pid, timer->start_comm, timer->start_site);
+ } else {
+ pr_err("timer stats: pid/%d(%s), site/%s\n",
+ timer->start_pid, timer->start_comm, symname);
+ }
+#endif
+}
+
static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
{
struct hrtimer_clock_base *base = timer->base;
@@ -1235,7 +1264,10 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
* hrtimer_start_range_ns() or in hrtimer_interrupt()
*/
if (restart != HRTIMER_NORESTART) {
- BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
+ if (timer->state != HRTIMER_STATE_CALLBACK) {
+ dump_hrtimer_callinfo(timer);
+ BUG();
+ }
enqueue_hrtimer(timer, base);
}
--
1.7.0.4
On Mon, 2012-10-29 at 19:02 +0800, Chuansheng Liu wrote:
> +/*
> + * dump_hrtimer_callinfo - print hrtimer information including:
> + * state, callback function, pid and start_site.
> +*/
> +static void dump_hrtimer_callinfo(struct hrtimer *timer)
> +{
> +
> + char symname[KSYM_NAME_LEN];
> +
> + if (lookup_symbol_name((unsigned long)(timer->function), symname) < 0) {
> + pr_err("timer info: state/%lx, func/%pK\n",
> + timer->state, timer->function);
> + } else {
> + pr_err("timer info: state/%lx, func/%s",
> + timer->state, symname);
> + }
> +
> +#ifdef CONFIG_TIMER_STATS
> + if (lookup_symbol_name((unsigned long)(timer->start_site),
> + symname) < 0) {
> + pr_err("timer stats: pid/%d(%s), site/%pK\n",
> + timer->start_pid, timer->start_comm, timer->start_site);
> + } else {
> + pr_err("timer stats: pid/%d(%s), site/%s\n",
> + timer->start_pid, timer->start_comm, symname);
> + }
> +#endif
> +}
What's wrong with %pf ?
> -----Original Message-----
> From: Peter Zijlstra [mailto:[email protected]]
> Sent: Monday, October 29, 2012 5:06 PM
> To: Liu, Chuansheng
> Cc: [email protected]; [email protected]; [email protected]; Li, Fei;
> [email protected]
> Subject: Re: [PATCH] hrtimer: Printing timer info when hitting BUG_ON()
>
> On Mon, 2012-10-29 at 19:02 +0800, Chuansheng Liu wrote:
> > +/*
> > + * dump_hrtimer_callinfo - print hrtimer information including:
> > + * state, callback function, pid and start_site.
> > +*/
> > +static void dump_hrtimer_callinfo(struct hrtimer *timer)
> > +{
> > +
> > + char symname[KSYM_NAME_LEN];
> > +
> > + if (lookup_symbol_name((unsigned long)(timer->function),
> symname) < 0) {
> > + pr_err("timer info: state/%lx, func/%pK\n",
> > + timer->state, timer->function);
> > + } else {
> > + pr_err("timer info: state/%lx, func/%s",
> > + timer->state, symname);
> > + }
> > +
> > +#ifdef CONFIG_TIMER_STATS
> > + if (lookup_symbol_name((unsigned long)(timer->start_site),
> > + symname) < 0) {
> > + pr_err("timer stats: pid/%d(%s), site/%pK\n",
> > + timer->start_pid, timer->start_comm,
> timer->start_site);
> > + } else {
> > + pr_err("timer stats: pid/%d(%s), site/%s\n",
> > + timer->start_pid, timer->start_comm,
> symname);
> > + }
> > +#endif
> > +}
>
> What's wrong with %pf ?
Thanks Peter's help and pointing out, patch V2 has been sent.
This patch helps us to find out the root cause quickly.