Create /proc/timer_wheel_list.
This does for the timer wheel what /proc/timer_list
does for hrtimers -- provide a way of displaying what
timers are running on what cpus, and their attributes.
Signed-off-by: Joe Korty <[email protected]>
Index: 2.6.28-rc6/kernel/timer.c
===================================================================
--- 2.6.28-rc6.orig/kernel/timer.c 2008-11-21 17:02:04.000000000 -0500
+++ 2.6.28-rc6/kernel/timer.c 2008-11-21 17:04:25.000000000 -0500
@@ -36,6 +36,8 @@
#include <linux/syscalls.h>
#include <linux/delay.h>
#include <linux/tick.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#include <linux/kallsyms.h>
#include <asm/uaccess.h>
@@ -1568,6 +1570,113 @@
open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
}
+#ifdef CONFIG_PROC_FS
+
+static void seq_printf_symbol(struct seq_file *m, void *symaddr, int width)
+{
+ char symname[KSYM_NAME_LEN];
+ int stat, len = m->count;
+
+ if (lookup_symbol_name((unsigned long)symaddr, symname) < 0)
+ stat = seq_printf(m, "<%p>", symaddr);
+ else
+ stat = seq_printf(m, "%s", symname);
+ if (width && stat == 0) {
+ len += (width - m->count);
+ if (len > 0)
+ seq_printf(m, "%*s", len, " ");
+ }
+}
+
+static void print_single_timer(struct seq_file *m, struct timer_list *timer)
+{
+ unsigned long base_jiffies = tbase_get_base(timer->base)->timer_jiffies;
+
+ seq_printf(m, " %p - ", (void *)(timer->expires - base_jiffies));
+ seq_printf_symbol(m, timer->function, 24);
+ seq_printf(m, " (data ");
+ seq_printf_symbol(m, (void *)(timer->data), 24);
+ seq_printf(m, ")");
+#ifdef CONFIG_TIMER_STATS
+ seq_printf(m, " from ");
+ seq_printf_symbol(m, timer->start_site, 28);
+ seq_printf(m, " %*s/%d",
+ TASK_COMM_LEN, timer->start_comm,
+ timer->start_pid);
+#endif
+ seq_printf(m, "\n");
+}
+
+static void print_timer_list(struct seq_file *m, struct list_head *head)
+{
+ struct timer_list *timer;
+ struct list_head *item;
+
+ for (item = head->next; item != head; item = item->next) {
+ timer = list_entry(item, struct timer_list, entry);
+ print_single_timer(m, timer);
+ }
+}
+
+static void print_cpu_timers(struct seq_file *m, int cpu)
+{
+ int i;
+ struct tvec_base *base = per_cpu(tvec_bases, cpu);
+
+ spin_lock_irq(&base->lock);
+ seq_printf(m, "\ncpu: %d, base jiffies: %p\n\n",
+ cpu, (void *)(base->timer_jiffies));
+
+ for (i = 0; i < TVR_SIZE; i++)
+ print_timer_list(m, base->tv1.vec + i);
+ for (i = 0; i < TVN_SIZE; i++) {
+ print_timer_list(m, base->tv2.vec + i);
+ print_timer_list(m, base->tv3.vec + i);
+ print_timer_list(m, base->tv4.vec + i);
+ print_timer_list(m, base->tv5.vec + i);
+ }
+ spin_unlock_irq(&base->lock);
+}
+
+static int timer_list_show(struct seq_file *m, void *v)
+{
+ int cpu;
+
+ seq_printf(m, "Timer Wheel List Version: 1\n");
+ seq_printf(m, "Jiffies: %px\n", (void *)jiffies);
+
+ for_each_online_cpu(cpu) {
+ print_cpu_timers(m, cpu);
+ }
+
+ return 0;
+}
+
+static int timer_list_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, timer_list_show, NULL);
+}
+
+static struct file_operations timer_list_fops = {
+ .open = timer_list_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init init_timer_list_procfs(void)
+{
+ struct proc_dir_entry *pe;
+
+ pe = proc_create("timer_wheel_list", 0444, NULL, &timer_list_fops);
+ if (!pe)
+ return -ENOMEM;
+ return 0;
+}
+late_initcall(init_timer_list_procfs);
+
+#endif /* CONFIG_PROC_FS */
+
/**
* msleep - sleep safely even with waitqueue interruptions
* @msecs: Time in milliseconds to sleep for
On Fri, Nov 21, 2008 at 05:11:13PM -0500, Joe Korty wrote:
> Create /proc/timer_wheel_list.
>
> This does for the timer wheel what /proc/timer_list
> does for hrtimers -- provide a way of displaying what
> timers are running on what cpus, and their attributes.
The fact that it's called a timer wheel is just an implementation detail
unsuitable for permanent file.
And you invented totally new (broken) way to print jiffies.
For printing nice function pointers we have %pF now.
late_initcall usage is taken out of air.
Can we put all this shit in debugfs, please?
> --- 2.6.28-rc6.orig/kernel/timer.c
> +++ 2.6.28-rc6/kernel/timer.c
> @@ -1568,6 +1570,113 @@
> open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
> }
>
> +#ifdef CONFIG_PROC_FS
> +
> +static void seq_printf_symbol(struct seq_file *m, void *symaddr, int width)
> +{
> + char symname[KSYM_NAME_LEN];
> + int stat, len = m->count;
> +
> + if (lookup_symbol_name((unsigned long)symaddr, symname) < 0)
> + stat = seq_printf(m, "<%p>", symaddr);
> + else
> + stat = seq_printf(m, "%s", symname);
> + if (width && stat == 0) {
> + len += (width - m->count);
> + if (len > 0)
> + seq_printf(m, "%*s", len, " ");
> + }
> +}
> +
> +static void print_single_timer(struct seq_file *m, struct timer_list *timer)
> +{
> + unsigned long base_jiffies = tbase_get_base(timer->base)->timer_jiffies;
> +
> + seq_printf(m, " %p - ", (void *)(timer->expires - base_jiffies));
> + seq_printf_symbol(m, timer->function, 24);
> + seq_printf(m, " (data ");
> + seq_printf_symbol(m, (void *)(timer->data), 24);
> + seq_printf(m, ")");
> +#ifdef CONFIG_TIMER_STATS
> + seq_printf(m, " from ");
> + seq_printf_symbol(m, timer->start_site, 28);
> + seq_printf(m, " %*s/%d",
> + TASK_COMM_LEN, timer->start_comm,
> + timer->start_pid);
> +#endif
> + seq_printf(m, "\n");
> +}
> +
> +static void print_timer_list(struct seq_file *m, struct list_head *head)
> +{
> + struct timer_list *timer;
> + struct list_head *item;
> +
> + for (item = head->next; item != head; item = item->next) {
> + timer = list_entry(item, struct timer_list, entry);
> + print_single_timer(m, timer);
> + }
> +}
> +
> +static void print_cpu_timers(struct seq_file *m, int cpu)
> +{
> + int i;
> + struct tvec_base *base = per_cpu(tvec_bases, cpu);
> +
> + spin_lock_irq(&base->lock);
> + seq_printf(m, "\ncpu: %d, base jiffies: %p\n\n",
> + cpu, (void *)(base->timer_jiffies));
> +
> + for (i = 0; i < TVR_SIZE; i++)
> + print_timer_list(m, base->tv1.vec + i);
> + for (i = 0; i < TVN_SIZE; i++) {
> + print_timer_list(m, base->tv2.vec + i);
> + print_timer_list(m, base->tv3.vec + i);
> + print_timer_list(m, base->tv4.vec + i);
> + print_timer_list(m, base->tv5.vec + i);
> + }
> + spin_unlock_irq(&base->lock);
> +}
> +
> +static int timer_list_show(struct seq_file *m, void *v)
> +{
> + int cpu;
> +
> + seq_printf(m, "Timer Wheel List Version: 1\n");
> + seq_printf(m, "Jiffies: %px\n", (void *)jiffies);
> +
> + for_each_online_cpu(cpu) {
> + print_cpu_timers(m, cpu);
> + }
> +
> + return 0;
> +}
> +
> +static int timer_list_open(struct inode *inode, struct file *filp)
> +{
> + return single_open(filp, timer_list_show, NULL);
> +}
> +
> +static struct file_operations timer_list_fops = {
> + .open = timer_list_open,
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = single_release,
> +};
> +
> +static int __init init_timer_list_procfs(void)
> +{
> + struct proc_dir_entry *pe;
> +
> + pe = proc_create("timer_wheel_list", 0444, NULL, &timer_list_fops);
> + if (!pe)
> + return -ENOMEM;
> + return 0;
> +}
> +late_initcall(init_timer_list_procfs);
> +
> +#endif /* CONFIG_PROC_FS */
On Sat, Nov 22, 2008 at 12:34:23PM -0500, Alexey Dobriyan wrote:
> On Fri, Nov 21, 2008 at 05:11:13PM -0500, Joe Korty wrote:
> > Create /proc/timer_wheel_list.
> >
> > This does for the timer wheel what /proc/timer_list
> > does for hrtimers -- provide a way of displaying what
> > timers are running on what cpus, and their attributes.
>
> The fact that it's called a timer wheel is just an implementation detail
> unsuitable for permanent file.
Agreed. I'll come up with something else. lrtimer_list is
a good candidate.
> And you invented totally new (broken) way to print jiffies.
You'll have to be more explicit about what's broken.
> For printing nice function pointers we have %pF now.
I'll go play with that again. IIRC, the first
time I played with it, it's output was of the form
'symbol+0xoffset/0xsize' rather than just 'symbol'.
> late_initcall usage is taken out of air.
I'm open to suggestions. My thought was that it was as
good as any other initcall, since none of this stuff can
be referenced until after applications start running off
of /sbin/init. So what one picks is a bit of a crapshoot.
> Can we put all this shit in debugfs, please?
I kinda like the debugfs idea. Though there is
some attraction to making it parallel to the existing
/proc/timer_list, rather than putting it somewhere else
entirely.
I can see a migration path here .. put lrtimers_list in
debugfs, later move /proc/timers_list to debugfs (perhaps
as hrtimers_list) and make /proc/timers_list itself a
symbolic link to the debugfs version.
Regards,
Joe
* Joe Korty <[email protected]> wrote:
> On Sat, Nov 22, 2008 at 12:34:23PM -0500, Alexey Dobriyan wrote:
> > On Fri, Nov 21, 2008 at 05:11:13PM -0500, Joe Korty wrote:
> > > Create /proc/timer_wheel_list.
> > >
> > > This does for the timer wheel what /proc/timer_list
> > > does for hrtimers -- provide a way of displaying what
> > > timers are running on what cpus, and their attributes.
> >
> > The fact that it's called a timer wheel is just an implementation detail
> > unsuitable for permanent file.
>
> Agreed. I'll come up with something else. lrtimer_list is a good
> candidate.
no - please include it in /proc/timer_list instead. It already lists
clockevents and hrtimers - listing timer-list timers would be a
natural extension of that facility.
Ingo
On Sun, Nov 23, 2008 at 05:04:24AM -0500, Ingo Molnar wrote:
>
> * Joe Korty <[email protected]> wrote:
>
> > On Sat, Nov 22, 2008 at 12:34:23PM -0500, Alexey Dobriyan wrote:
> > > On Fri, Nov 21, 2008 at 05:11:13PM -0500, Joe Korty wrote:
> > > > Create /proc/timer_wheel_list.
> > > >
> > > > This does for the timer wheel what /proc/timer_list
> > > > does for hrtimers -- provide a way of displaying what
> > > > timers are running on what cpus, and their attributes.
> > >
> > > The fact that it's called a timer wheel is just an implementation detail
> > > unsuitable for permanent file.
> >
> > Agreed. I'll come up with something else. lrtimer_list is a good
> > candidate.
>
> no - please include it in /proc/timer_list instead. It already lists
> clockevents and hrtimers - listing timer-list timers would be a
> natural extension of that facility.
struct tvec_base and TVR_MASK & family are all local
to kernel/timers.c rather than in some header file,
so parts of the display code would need to remain in
kernel/timers.c. Seems ugly, but is of course do-able.
Also, I am not sure why we are lumping all sorts of
disparate things in one display file. Did proc files
become expensive somewhere along the line?
(Just asking before I go down a path that everyone will
hate, once they see the result).
Regards,
Joe
Joe,
On Fri, Nov 21, 2008 at 5:11 PM, Joe Korty <[email protected]> wrote:
> Create /proc/timer_wheel_list.
>
> This does for the timer wheel what /proc/timer_list
> does for hrtimers -- provide a way of displaying what
> timers are running on what cpus, and their attributes.
Please CC linux-api on kernel-userland API changes.
See Documentation/SubmitChecklist and
http://thread.gmane.org/gmane.linux.ltp/5658.
Cheers,
Michael
> Index: 2.6.28-rc6/kernel/timer.c
> ===================================================================
> --- 2.6.28-rc6.orig/kernel/timer.c 2008-11-21 17:02:04.000000000 -0500
> +++ 2.6.28-rc6/kernel/timer.c 2008-11-21 17:04:25.000000000 -0500
> @@ -36,6 +36,8 @@
> #include <linux/syscalls.h>
> #include <linux/delay.h>
> #include <linux/tick.h>
> +#include <linux/proc_fs.h>
> +#include <linux/seq_file.h>
> #include <linux/kallsyms.h>
>
> #include <asm/uaccess.h>
> @@ -1568,6 +1570,113 @@
> open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
> }
>
> +#ifdef CONFIG_PROC_FS
> +
> +static void seq_printf_symbol(struct seq_file *m, void *symaddr, int width)
> +{
> + char symname[KSYM_NAME_LEN];
> + int stat, len = m->count;
> +
> + if (lookup_symbol_name((unsigned long)symaddr, symname) < 0)
> + stat = seq_printf(m, "<%p>", symaddr);
> + else
> + stat = seq_printf(m, "%s", symname);
> + if (width && stat == 0) {
> + len += (width - m->count);
> + if (len > 0)
> + seq_printf(m, "%*s", len, " ");
> + }
> +}
> +
> +static void print_single_timer(struct seq_file *m, struct timer_list *timer)
> +{
> + unsigned long base_jiffies = tbase_get_base(timer->base)->timer_jiffies;
> +
> + seq_printf(m, " %p - ", (void *)(timer->expires - base_jiffies));
> + seq_printf_symbol(m, timer->function, 24);
> + seq_printf(m, " (data ");
> + seq_printf_symbol(m, (void *)(timer->data), 24);
> + seq_printf(m, ")");
> +#ifdef CONFIG_TIMER_STATS
> + seq_printf(m, " from ");
> + seq_printf_symbol(m, timer->start_site, 28);
> + seq_printf(m, " %*s/%d",
> + TASK_COMM_LEN, timer->start_comm,
> + timer->start_pid);
> +#endif
> + seq_printf(m, "\n");
> +}
> +
> +static void print_timer_list(struct seq_file *m, struct list_head *head)
> +{
> + struct timer_list *timer;
> + struct list_head *item;
> +
> + for (item = head->next; item != head; item = item->next) {
> + timer = list_entry(item, struct timer_list, entry);
> + print_single_timer(m, timer);
> + }
> +}
> +
> +static void print_cpu_timers(struct seq_file *m, int cpu)
> +{
> + int i;
> + struct tvec_base *base = per_cpu(tvec_bases, cpu);
> +
> + spin_lock_irq(&base->lock);
> + seq_printf(m, "\ncpu: %d, base jiffies: %p\n\n",
> + cpu, (void *)(base->timer_jiffies));
> +
> + for (i = 0; i < TVR_SIZE; i++)
> + print_timer_list(m, base->tv1.vec + i);
> + for (i = 0; i < TVN_SIZE; i++) {
> + print_timer_list(m, base->tv2.vec + i);
> + print_timer_list(m, base->tv3.vec + i);
> + print_timer_list(m, base->tv4.vec + i);
> + print_timer_list(m, base->tv5.vec + i);
> + }
> + spin_unlock_irq(&base->lock);
> +}
> +
> +static int timer_list_show(struct seq_file *m, void *v)
> +{
> + int cpu;
> +
> + seq_printf(m, "Timer Wheel List Version: 1\n");
> + seq_printf(m, "Jiffies: %px\n", (void *)jiffies);
> +
> + for_each_online_cpu(cpu) {
> + print_cpu_timers(m, cpu);
> + }
> +
> + return 0;
> +}
> +
> +static int timer_list_open(struct inode *inode, struct file *filp)
> +{
> + return single_open(filp, timer_list_show, NULL);
> +}
> +
> +static struct file_operations timer_list_fops = {
> + .open = timer_list_open,
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = single_release,
> +};
> +
> +static int __init init_timer_list_procfs(void)
> +{
> + struct proc_dir_entry *pe;
> +
> + pe = proc_create("timer_wheel_list", 0444, NULL, &timer_list_fops);
> + if (!pe)
> + return -ENOMEM;
> + return 0;
> +}
> +late_initcall(init_timer_list_procfs);
> +
> +#endif /* CONFIG_PROC_FS */
> +
> /**
> * msleep - sleep safely even with waitqueue interruptions
> * @msecs: Time in milliseconds to sleep for
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
--
Michael Kerrisk Linux man-pages maintainer;
http://www.kernel.org/doc/man-pages/ Found a documentation bug?
http://www.kernel.org/doc/man-pages/reporting_bugs.html
Add to /proc/timer_list a display of the active jiffie timers.
Tested on i386 and x86_64, with 'less /proc/timer_list' and
through SysRq-Q.
Signed-off-by: Joe Korty <[email protected]>
Index: 2.6.28-rc6/kernel/timer.c
===================================================================
--- 2.6.28-rc6.orig/kernel/timer.c 2008-11-25 11:59:07.000000000 -0500
+++ 2.6.28-rc6/kernel/timer.c 2008-11-25 13:49:05.000000000 -0500
@@ -36,6 +36,8 @@
#include <linux/syscalls.h>
#include <linux/delay.h>
#include <linux/tick.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#include <linux/kallsyms.h>
#include <asm/uaccess.h>
@@ -1568,6 +1570,87 @@
open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
}
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_MAGIC_SYSRQ)
+
+/*
+ * This allows printing both to /proc/timer_list and
+ * to the console (on SysRq-Q):
+ */
+#define SEQ_printf(m, x...) \
+ do { \
+ if (m) \
+ seq_printf(m, x); \
+ else \
+ printk(x); \
+ } while (0)
+
+static void print_nearest_symbol(struct seq_file *m, void *symaddr)
+{
+ char symname[KSYM_NAME_LEN];
+
+ if (lookup_symbol_name((unsigned long)symaddr, symname) < 0)
+ SEQ_printf(m, "<%p>", symaddr);
+ else
+ SEQ_printf(m, "%s", symname);
+}
+
+static void print_single_jtimer(struct seq_file *m, struct timer_list *timer)
+{
+ unsigned long base_jiffies = tbase_get_base(timer->base)->timer_jiffies;
+#ifdef CONFIG_TIMER_STATS
+ char tmp[TASK_COMM_LEN + 1];
+#endif
+
+ SEQ_printf(m, " %p: ", (void *)(timer->expires - base_jiffies));
+ print_nearest_symbol(m, timer->function);
+ SEQ_printf(m, " (");
+ print_nearest_symbol(m, (void *)(timer->data));
+ SEQ_printf(m, ")");
+#ifdef CONFIG_TIMER_STATS
+ SEQ_printf(m, " from ");
+ print_nearest_symbol(m, timer->start_site);
+ memcpy(tmp, timer->start_comm, TASK_COMM_LEN);
+ tmp[TASK_COMM_LEN] = 0;
+ SEQ_printf(m, ", %s/%d", tmp, timer->start_pid);
+#endif
+ SEQ_printf(m, "\n");
+}
+
+static void print_list_jtimers(struct seq_file *m, struct list_head *head)
+{
+ struct timer_list *timer;
+ struct list_head *item;
+
+ for (item = head->next; item != head; item = item->next) {
+ timer = list_entry(item, struct timer_list, entry);
+ print_single_jtimer(m, timer);
+ }
+}
+
+void print_cpu_jtimers(struct seq_file *m, int cpu)
+{
+ int i;
+ struct tvec_base *base = per_cpu(tvec_bases, cpu);
+
+ SEQ_printf(m, "active jiffie timers:\n");
+ spin_lock_irq(&base->lock);
+ SEQ_printf(m, " base: %p\n", base);
+ SEQ_printf(m, " timer_jiffies: %p\n", (void *)(base->timer_jiffies));
+ SEQ_printf(m, " running_timer: %p\n", base->running_timer);
+
+ for (i = 0; i < TVR_SIZE; i++)
+ print_list_jtimers(m, base->tv1.vec + i);
+ for (i = 0; i < TVN_SIZE; i++) {
+ print_list_jtimers(m, base->tv2.vec + i);
+ print_list_jtimers(m, base->tv3.vec + i);
+ print_list_jtimers(m, base->tv4.vec + i);
+ print_list_jtimers(m, base->tv5.vec + i);
+ }
+ spin_unlock_irq(&base->lock);
+}
+
+#endif /* CONFIG_PROC_FS || CONFIG_MAGIC_SYSRQ */
+
/**
* msleep - sleep safely even with waitqueue interruptions
* @msecs: Time in milliseconds to sleep for
Index: 2.6.28-rc6/include/linux/jiffies.h
===================================================================
--- 2.6.28-rc6.orig/include/linux/jiffies.h 2008-11-25 11:59:07.000000000 -0500
+++ 2.6.28-rc6/include/linux/jiffies.h 2008-11-25 13:22:41.000000000 -0500
@@ -298,6 +298,13 @@
extern u64 jiffies_64_to_clock_t(u64 x);
extern u64 nsec_to_clock_t(u64 x);
+struct seq_file;
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_MAGIC_SYSRQ)
+extern void print_cpu_jtimers(struct seq_file *, int);
+#else
+static inline void print_cpu_jtimers(struct seq_file *, int) {}
+#endif
+
#define TIMESTAMP_SIZE 30
#endif
Index: 2.6.28-rc6/kernel/time/timer_list.c
===================================================================
--- 2.6.28-rc6.orig/kernel/time/timer_list.c 2008-11-25 11:59:07.000000000 -0500
+++ 2.6.28-rc6/kernel/time/timer_list.c 2008-11-25 13:47:30.000000000 -0500
@@ -17,6 +17,7 @@
#include <linux/seq_file.h>
#include <linux/kallsyms.h>
#include <linux/tick.h>
+#include <linux/jiffies.h>
#include <asm/uaccess.h>
@@ -139,6 +140,7 @@
SEQ_printf(m, " clock %d:\n", i);
print_base(m, cpu_base->clock_base + i, now);
}
+
#define P(x) \
SEQ_printf(m, " .%-15s: %Lu\n", #x, \
(unsigned long long)(cpu_base->x))
@@ -176,9 +178,11 @@
P(last_jiffies);
P(next_jiffies);
P_ns(idle_expires);
- SEQ_printf(m, "jiffies: %Lu\n",
+ SEQ_printf(m, "jiffies: %llu (0x%llx)\n",
+ (unsigned long long)jiffies,
(unsigned long long)jiffies);
}
+ print_cpu_jtimers(m, cpu);
#endif
#undef P
@@ -252,7 +256,7 @@
u64 now = ktime_to_ns(ktime_get());
int cpu;
- SEQ_printf(m, "Timer List Version: v0.4\n");
+ SEQ_printf(m, "Timer List Version: v0.5\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
@@ -286,7 +290,7 @@
{
struct proc_dir_entry *pe;
- pe = proc_create("timer_list", 0644, NULL, &timer_list_fops);
+ pe = proc_create("timer_list", 0444, NULL, &timer_list_fops);
if (!pe)
return -ENOMEM;
return 0;
Joe,
On Tue, 25 Nov 2008, Joe Korty wrote:
> Add to /proc/timer_list a display of the active jiffie timers.
>
> Tested on i386 and x86_64, with 'less /proc/timer_list' and
> through SysRq-Q.
>
> Signed-off-by: Joe Korty <[email protected]>
>
> Index: 2.6.28-rc6/kernel/timer.c
> ===================================================================
> --- 2.6.28-rc6.orig/kernel/timer.c 2008-11-25 11:59:07.000000000 -0500
> +++ 2.6.28-rc6/kernel/timer.c 2008-11-25 13:49:05.000000000 -0500
> @@ -36,6 +36,8 @@
> #include <linux/syscalls.h>
> #include <linux/delay.h>
> #include <linux/tick.h>
> +#include <linux/proc_fs.h>
> +#include <linux/seq_file.h>
> #include <linux/kallsyms.h>
>
> #include <asm/uaccess.h>
> @@ -1568,6 +1570,87 @@
> open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
> }
>
> +#if defined(CONFIG_PROC_FS) || defined(CONFIG_MAGIC_SYSRQ)
This belongs into kernel/time/timer_list.c and there is no need to
copy that code around.
> +void print_cpu_jtimers(struct seq_file *m, int cpu)
> +{
> + int i;
> + struct tvec_base *base = per_cpu(tvec_bases, cpu);
> +
> + SEQ_printf(m, "active jiffie timers:\n");
> + spin_lock_irq(&base->lock);
Yuck. We really do _NOT_ stop everything just to print timers. Check
the hrtimer print code in timer_list.c
> Index: 2.6.28-rc6/kernel/time/timer_list.c
> ===================================================================
> --- 2.6.28-rc6.orig/kernel/time/timer_list.c 2008-11-25 11:59:07.000000000 -0500
> +++ 2.6.28-rc6/kernel/time/timer_list.c 2008-11-25 13:47:30.000000000 -0500
> @@ -17,6 +17,7 @@
> #include <linux/seq_file.h>
> #include <linux/kallsyms.h>
> #include <linux/tick.h>
> +#include <linux/jiffies.h>
>
> #include <asm/uaccess.h>
>
> @@ -139,6 +140,7 @@
> SEQ_printf(m, " clock %d:\n", i);
> print_base(m, cpu_base->clock_base + i, now);
> }
> +
random whitespace change
> #define P(x) \
> SEQ_printf(m, " .%-15s: %Lu\n", #x, \
> (unsigned long long)(cpu_base->x))
> @@ -176,9 +178,11 @@
> P(last_jiffies);
> P(next_jiffies);
> P_ns(idle_expires);
> - SEQ_printf(m, "jiffies: %Lu\n",
> + SEQ_printf(m, "jiffies: %llu (0x%llx)\n",
> + (unsigned long long)jiffies,
> (unsigned long long)jiffies);
The exact purpose of this change ?
> @@ -286,7 +290,7 @@
> {
> struct proc_dir_entry *pe;
>
> - pe = proc_create("timer_list", 0644, NULL, &timer_list_fops);
> + pe = proc_create("timer_list", 0444, NULL, &timer_list_fops);
> if (!pe)
> return -ENOMEM;
> return 0;
Correct, but unrelated $subject. Separate patch please.
Thanks,
tglx
On Tue, Nov 25, 2008 at 04:36:48PM -0500, Thomas Gleixner wrote:
> > --- 2.6.28-rc6.orig/kernel/timer.c 2008-11-25 11:59:07.000000000 -0500
> > +++ 2.6.28-rc6/kernel/timer.c 2008-11-25 13:49:05.000000000 -0500
> > +#if defined(CONFIG_PROC_FS) || defined(CONFIG_MAGIC_SYSRQ)
>
> This belongs into kernel/time/timer_list.c and there is no need to
> copy that code around.
Everything to do with jiffy timer implementation is static
local to kernel/timer.c, and not available to code in
kernel/time/timer_list.c or anywhere else. I consider that
localization to be a rather nice feature of kernel/timer.c,
and I wasn't willing to globalize it just for a debug
data dump.
Also, other features implement their 'show' functions
elsewhere, for example, show_interrupts. So doing the
same thing here is certainly not out of line.
> > +void print_cpu_jtimers(struct seq_file *m, int cpu)
> > +{
> > + int i;
> > + struct tvec_base *base = per_cpu(tvec_bases, cpu);
> > +
> > + SEQ_printf(m, "active jiffie timers:\n");
> > + spin_lock_irq(&base->lock);
>
> Yuck. We really do _NOT_ stop everything just to print timers. Check
> the hrtimer print code in timer_list.c
I'm not sure there is a safe way to reference the timers without
holding the lock. But I will look into this and see what can
be done.
> >
> > @@ -139,6 +140,7 @@
> > SEQ_printf(m, " clock %d:\n", i);
> > print_base(m, cpu_base->clock_base + i, now);
> > }
> > +
>
> random whitespace change
Will fix.
> > P_ns(idle_expires);
> > - SEQ_printf(m, "jiffies: %Lu\n",
> > + SEQ_printf(m, "jiffies: %llu (0x%llx)\n",
> > + (unsigned long long)jiffies,
> > (unsigned long long)jiffies);
>
> The exact purpose of this change ?
In the 'active jiffie timer' section I print timer_jiffies
etc in hex format. It's probably just me, but I found that
to be easier to read than the rather longer decimal format.
So this change was to get a hex version of the global jiffies
out, for easy comparison to values appearing in that section.
Certainly everything can be changed to decimal, if that is
the consensus.
> > - pe = proc_create("timer_list", 0644, NULL, &timer_list_fops);
> > + pe = proc_create("timer_list", 0444, NULL, &timer_list_fops);
> > if (!pe)
> > return -ENOMEM;
> > return 0;
>
> Correct, but unrelated $subject. Separate patch please.
Will do.
Thanks for the review,
Joe
On Tue, 25 Nov 2008, Joe Korty wrote:
> On Tue, Nov 25, 2008 at 04:36:48PM -0500, Thomas Gleixner wrote:
> > > --- 2.6.28-rc6.orig/kernel/timer.c 2008-11-25 11:59:07.000000000 -0500
> > > +++ 2.6.28-rc6/kernel/timer.c 2008-11-25 13:49:05.000000000 -0500
> > > +#if defined(CONFIG_PROC_FS) || defined(CONFIG_MAGIC_SYSRQ)
> >
> > This belongs into kernel/time/timer_list.c and there is no need to
> > copy that code around.
>
> Everything to do with jiffy timer implementation is static
> local to kernel/timer.c, and not available to code in
> kernel/time/timer_list.c or anywhere else. I consider that
> localization to be a rather nice feature of kernel/timer.c,
> and I wasn't willing to globalize it just for a debug
> data dump.
>
> Also, other features implement their 'show' functions
> elsewhere, for example, show_interrupts. So doing the
> same thing here is certainly not out of line.
I prefer a global variable over duplicate code and stuff which I have
to find out from random places.
show_interrupts is arch specific. timers are not.
> > > +void print_cpu_jtimers(struct seq_file *m, int cpu)
> > > +{
> > > + int i;
> > > + struct tvec_base *base = per_cpu(tvec_bases, cpu);
> > > +
> > > + SEQ_printf(m, "active jiffie timers:\n");
> > > + spin_lock_irq(&base->lock);
> >
> > Yuck. We really do _NOT_ stop everything just to print timers. Check
> > the hrtimer print code in timer_list.c
>
> I'm not sure there is a safe way to reference the timers without
> holding the lock. But I will look into this and see what can
> be done.
You can't do a real snapshot of the timer wheel. All you can do is
walk it carefully and make a copy of the timer_list structure and then
print it. The timer wheel can be pretty full and we definitely do not
want to stop a CPU fully just to print debug information.
> > > P_ns(idle_expires);
> > > - SEQ_printf(m, "jiffies: %Lu\n",
> > > + SEQ_printf(m, "jiffies: %llu (0x%llx)\n",
> > > + (unsigned long long)jiffies,
> > > (unsigned long long)jiffies);
> >
> > The exact purpose of this change ?
>
> In the 'active jiffie timer' section I print timer_jiffies
> etc in hex format. It's probably just me, but I found that
> to be easier to read than the rather longer decimal format.
> So this change was to get a hex version of the global jiffies
> out, for easy comparison to values appearing in that section.
>
> Certainly everything can be changed to decimal, if that is
> the consensus.
We want a consistent format for all the printouts.
Thanks,
tglx
Add an 'active jiffie timers' subdisplay to /proc/timer_list, v2.
Version 1-to-2 features:
1) Version 1 created display code moved from
kernel/timer.c to kernel/time/timer_list.c. To support
this, kernel/timer.c internals were moved to a new header
file, timer-internals.h, which is now included by both
of the above .c files.
2) No longer locks a cpus' timer wheel for the entire
duration of the printing of that wheel's timers; we now
instead grab and release the lock on a per timer wheel
slot basis. Refinements: we don't grab the lock at all
for those slots which are obviously empty (which will be
most of them most of the time), and we have pushed out
the actual printing of the timers past the point where
the lock is dropped.
3) Version 1 hex displays changed to decimal, to match
the style of the other /proc/timer_list output.
Signed-off-by: Joe Korty <[email protected]>
Index: 2.6.28-rc6/kernel/timer.c
===================================================================
--- 2.6.28-rc6.orig/kernel/timer.c 2008-11-26 10:26:22.000000000 -0500
+++ 2.6.28-rc6/kernel/timer.c 2008-11-26 10:26:23.000000000 -0500
@@ -44,73 +44,16 @@
#include <asm/timex.h>
#include <asm/io.h>
+#include <linux/timer-internals.h>
+
u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
EXPORT_SYMBOL(jiffies_64);
-/*
- * per-CPU timer vector definitions:
- */
-#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
-#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
-#define TVN_SIZE (1 << TVN_BITS)
-#define TVR_SIZE (1 << TVR_BITS)
-#define TVN_MASK (TVN_SIZE - 1)
-#define TVR_MASK (TVR_SIZE - 1)
-
-struct tvec {
- struct list_head vec[TVN_SIZE];
-};
-
-struct tvec_root {
- struct list_head vec[TVR_SIZE];
-};
-
-struct tvec_base {
- spinlock_t lock;
- struct timer_list *running_timer;
- unsigned long timer_jiffies;
- struct tvec_root tv1;
- struct tvec tv2;
- struct tvec tv3;
- struct tvec tv4;
- struct tvec tv5;
-} ____cacheline_aligned;
-
struct tvec_base boot_tvec_bases;
EXPORT_SYMBOL(boot_tvec_bases);
-static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
-
-/*
- * Note that all tvec_bases are 2 byte aligned and lower bit of
- * base in timer_list is guaranteed to be zero. Use the LSB for
- * the new flag to indicate whether the timer is deferrable
- */
-#define TBASE_DEFERRABLE_FLAG (0x1)
+DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
-/* Functions below help us manage 'deferrable' flag */
-static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
-{
- return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
-}
-
-static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
-{
- return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
-}
-
-static inline void timer_set_deferrable(struct timer_list *timer)
-{
- timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
- TBASE_DEFERRABLE_FLAG));
-}
-
-static inline void
-timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
-{
- timer->base = (struct tvec_base *)((unsigned long)(new_base) |
- tbase_get_deferrable(timer->base));
-}
static unsigned long round_jiffies_common(unsigned long j, int cpu,
bool force_up)
Index: 2.6.28-rc6/kernel/time/timer_list.c
===================================================================
--- 2.6.28-rc6.orig/kernel/time/timer_list.c 2008-11-26 10:26:22.000000000 -0500
+++ 2.6.28-rc6/kernel/time/timer_list.c 2008-11-26 11:14:19.000000000 -0500
@@ -17,6 +17,9 @@
#include <linux/seq_file.h>
#include <linux/kallsyms.h>
#include <linux/tick.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/timer-internals.h>
#include <asm/uaccess.h>
@@ -46,6 +49,109 @@
SEQ_printf(m, "%s", symname);
}
+/*
+ * Low resolution (jiffie) timer display support routines.
+ */
+static void print_one_jtimer(struct seq_file *m, struct timer_list *timer)
+{
+ unsigned long base_jiffies = tbase_get_base(timer->base)->timer_jiffies;
+#ifdef CONFIG_TIMER_STATS
+ char tmp[TASK_COMM_LEN + 1];
+#endif
+
+ SEQ_printf(m, " %7lu: ", timer->expires - base_jiffies);
+ print_name_offset(m, timer->function);
+ SEQ_printf(m, " (");
+ print_name_offset(m, (void *)(timer->data));
+ SEQ_printf(m, ")");
+#ifdef CONFIG_TIMER_STATS
+ SEQ_printf(m, " from ");
+ print_name_offset(m, timer->start_site);
+ memcpy(tmp, timer->start_comm, TASK_COMM_LEN);
+ tmp[TASK_COMM_LEN] = 0;
+ SEQ_printf(m, ", %s/%d", tmp, timer->start_pid);
+#endif
+ SEQ_printf(m, "\n");
+}
+
+#define MAX_TIMERS_PER_SLOT 32
+
+static void print_one_wheel_slot(struct seq_file *m,
+ struct tvec_base *base, struct list_head *head,
+ struct timer_list *tlist)
+{
+ int i, ntimers, overflow;
+ struct timer_list *t;
+ struct list_head *item;
+ unsigned long flags;
+
+ /*
+ * Don't grab the lock if this timer wheel slot is known
+ * to have no timers in it. This dramatically reduces
+ * lock/unlock churn, as slots are typically empty.
+ */
+ if (head->next == head)
+ return;
+
+ /*
+ * Keep lock ownership to a minimum by _not_ printing out timer
+ * contents while the lock is held. This requires snapshotting
+ * timer contents for post-unlock printing.
+ */
+ spin_lock_irqsave(&base->lock, flags);
+ for (overflow = ntimers = 0, item = head->next;
+ item != head && ntimers <= MAX_TIMERS_PER_SLOT;
+ item = item->next) {
+ if (ntimers >= MAX_TIMERS_PER_SLOT) {
+ overflow++;
+ continue;
+ }
+ t = list_entry(item, struct timer_list, entry);
+ tlist[ntimers++] = *t;
+ }
+ spin_unlock_irqrestore(&base->lock, flags);
+
+ for (i = 0; i < ntimers; i++)
+ print_one_jtimer(m, &tlist[i]);
+
+ if (overflow)
+ SEQ_printf(m, " *** "
+ "Display table overflow, some timers missed\n");
+}
+
+static void print_cpu_jtimers(struct seq_file *m, int cpu)
+{
+ int i;
+ struct tvec_base *base = per_cpu(tvec_bases, cpu);
+ struct timer_list *tlist;
+
+ SEQ_printf(m, "active jiffie timers:\n");
+ SEQ_printf(m, " base: %p\n", base);
+ SEQ_printf(m, " running_timer: %p\n", base->running_timer);
+ SEQ_printf(m, " timer_jiffies: %lu\n", base->timer_jiffies);
+
+ tlist = kmalloc(sizeof(struct timer_list) * MAX_TIMERS_PER_SLOT,
+ m ? GFP_KERNEL : GFP_ATOMIC);
+ if (!tlist) {
+ SEQ_printf(m, "(Error, could not allocate needed memory.)\n");
+ return;
+ }
+
+ for (i = 0; i < TVR_SIZE; i++)
+ print_one_wheel_slot(m, base, base->tv1.vec + i, tlist);
+ for (i = 0; i < TVN_SIZE; i++) {
+ print_one_wheel_slot(m, base, base->tv2.vec + i, tlist);
+ print_one_wheel_slot(m, base, base->tv3.vec + i, tlist);
+ print_one_wheel_slot(m, base, base->tv4.vec + i, tlist);
+ print_one_wheel_slot(m, base, base->tv5.vec + i, tlist);
+ }
+
+ kfree(tlist);
+}
+
+/*
+ * High res timer display support routines.
+ */
static void
print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer,
int idx, u64 now)
@@ -179,6 +285,7 @@
SEQ_printf(m, "jiffies: %Lu\n",
(unsigned long long)jiffies);
}
+ print_cpu_jtimers(m, cpu);
#endif
#undef P
@@ -252,7 +359,7 @@
u64 now = ktime_to_ns(ktime_get());
int cpu;
- SEQ_printf(m, "Timer List Version: v0.4\n");
+ SEQ_printf(m, "Timer List Version: v0.5\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
Index: 2.6.28-rc6/include/linux/timer-internals.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ 2.6.28-rc6/include/linux/timer-internals.h 2008-11-26 11:13:18.000000000 -0500
@@ -0,0 +1,67 @@
+#ifndef _LINUX_TIMER_INTERNALS_H
+#define _LINUX_TIMER_INTERNALS_H 1
+
+/*
+ * per-CPU timer vector definitions:
+ */
+#define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
+#define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
+#define TVN_SIZE (1 << TVN_BITS)
+#define TVR_SIZE (1 << TVR_BITS)
+#define TVN_MASK (TVN_SIZE - 1)
+#define TVR_MASK (TVR_SIZE - 1)
+
+struct tvec {
+ struct list_head vec[TVN_SIZE];
+};
+
+struct tvec_root {
+ struct list_head vec[TVR_SIZE];
+};
+
+struct tvec_base {
+ spinlock_t lock;
+ struct timer_list *running_timer;
+ unsigned long timer_jiffies;
+ struct tvec_root tv1;
+ struct tvec tv2;
+ struct tvec tv3;
+ struct tvec tv4;
+ struct tvec tv5;
+} ____cacheline_aligned;
+
+DECLARE_PER_CPU(struct tvec_base *, tvec_bases);
+
+/*
+ * Note that all tvec_bases are 2 byte aligned and lower bit of
+ * base in timer_list is guaranteed to be zero. Use the LSB for
+ * the new flag to indicate whether the timer is deferrable
+ */
+#define TBASE_DEFERRABLE_FLAG (0x1)
+
+/* Functions below help us manage 'deferrable' flag */
+static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
+{
+ return (unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG;
+}
+
+static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
+{
+ return (struct tvec_base *)((unsigned long)base
+ & ~TBASE_DEFERRABLE_FLAG);
+}
+
+static inline void timer_set_deferrable(struct timer_list *timer)
+{
+ timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
+ TBASE_DEFERRABLE_FLAG));
+}
+
+static inline void
+timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
+{
+ timer->base = (struct tvec_base *)((unsigned long)(new_base) |
+ tbase_get_deferrable(timer->base));
+}
+
+#endif /* _LINUX_TIMER_INTERNALS_H */
On Wed, Nov 26, 2008 at 11:48:45AM -0500, Joe Korty wrote:
> Add an 'active jiffie timers' subdisplay to /proc/timer_list, v2.
Can you add the documentation for this new field to Documentation/ABI/
as you are changing/adding a public API.
thanks,
greg k-h
On Wed, Nov 26, 2008 at 12:07:15PM -0500, Greg KH wrote:
> On Wed, Nov 26, 2008 at 11:48:45AM -0500, Joe Korty wrote:
> > Add an 'active jiffie timers' subdisplay to /proc/timer_list, v2.
>
> Can you add the documentation for this new field to Documentation/ABI/
> as you are changing/adding a public API.
OK.
Right now there is no ABI documention for anything in
/proc/timer_list. Do you prefer one file for everything
in /proc/timer_list, or a seperate file for each of the
subsections?
I was also thinking the doc(s) should go into the /testing
subsection, as the displayed revision number is < 1.0.
Joe
On Wed, Nov 26, 2008 at 12:34:10PM -0500, Joe Korty wrote:
> On Wed, Nov 26, 2008 at 12:07:15PM -0500, Greg KH wrote:
> > On Wed, Nov 26, 2008 at 11:48:45AM -0500, Joe Korty wrote:
> > > Add an 'active jiffie timers' subdisplay to /proc/timer_list, v2.
> >
> > Can you add the documentation for this new field to Documentation/ABI/
> > as you are changing/adding a public API.
>
>
> OK.
>
> Right now there is no ABI documention for anything in
> /proc/timer_list.
Yeah, you touched it last, so you get to document it all, sorry, it's
not fair, but it's how we are slowly fixing these issues :)
> Do you prefer one file for everything in /proc/timer_list, or a
> seperate file for each of the subsections?
Whatever you feel would be best is fine.
> I was also thinking the doc(s) should go into the /testing
> subsection, as the displayed revision number is < 1.0.
Due to how long this has been in the kernel, I think it's fair to say
it's part of the stable section now.
thanks,
greg k-h
Document /proc/timer_list ABI.
This documents all of /timer_list, including the extension
adding jiffie timers, as proposed in the patch:
[PATCH] Display active jiffie timers in /proc/timer_list, v2
Signed-off-by: Joe Korty <[email protected]>
Index: 2.6.28-rc6/Documentation/ABI/stable/procfs-timer_list
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ 2.6.28-rc6/Documentation/ABI/stable/procfs-timer_list 2008-11-26 15:55:04.000000000 -0500
@@ -0,0 +1,96 @@
+What: /proc/timer_list
+Date: November 2008
+Contact: Ingo Molnar <[email protected]>
+ Thomas Gleixner <[email protected]>
+ Joe Korty <[email protected]>
+Revision-Rate: Moderate
+At-Revision: 0.5
+Description:
+ /proc/timer_list displays most everything about every kind
+ of timer, and some things about time too.
+
+ The contents of this file should be expected to change,
+ as the data displayed corresponds directly to various
+ kernel-internal data structures. For this reason, the first
+ line contains the file revision. It is the responsibility
+ of this file's maintainers to bump the revision each time a
+ kernel is released having incompatible changes in this file.
+
+ Section Overview
+ ----------------
+ The file contains several somewhat independent sections.
+
+ The first section contains a few lines of global info.
+ Examples: file version id, #clock types in the kernel,
+ #nsecs since boot.
+
+ The second section is organized per-cpu. Each cpu subsection
+ in turn contains several sub-subsections which are, in order
+ of appearance:
+
+ The contents of the data structures associated with each
+ clock (CLOCK_REALTIME, CLOCK_MONOTONIC, etc) on this cpu.
+ Examples: base, index, resolution, get_timer, offset.
+ Under each of these clocks is, in turn, a display of all
+ the active high resolution timers queued to that clock.
+ Example: all lines beginning with '#'.
+
+ The contents of per-cpu timer data fields not associated
+ with a particular clock type (ie, shared by both clocks or
+ not associated with any clock). Examples: expires_next,
+ hres_active, nr_event, nohz_mode, all things idle_*,
+ tick_stopped, last_jiffies, next_jiffies.
+
+ A display of low resolution (ie, jiffie) timer wheel
+ data. Examples: base, running_timer, timer_jiffies.
+ Also under this section is a display, one per line, of
+ each active jiffie timer queued to this cpu. Examples:
+ All lines under an 'active jiffie timers' section that
+ begin with a number.
+
+ The third and final section describes each 'tick device'
+ known to the kernel. A tick device is a piece of hardware
+ capable of generating periodic and/or one shot interrupts
+ under software control, and thus is capable of generating
+ the interrupts needed to expire the various active timers at
+ their given expiration times. Examples: hpet, pit, lapic.
+
+ Hires Timer Layout
+ ------------------
+ High resolution timers are displayed on lines that begin
+ with a '#' and always appear under one of the many sections
+ labeled 'active timers'. There is an 'active timers'
+ section for every cpu and every clock.
+
+ The fields of a hrtimer, spread out over two lines, are:
+
+ line 1 fields:
+ 1 - unique hrtimer index (#0, #1, #2, etc)
+ 2 - kernel address of the hrtimer data structure
+ in question
+ 3 - function to be called when timer expires
+ 4 - timer state (eg, S:01), avail states, OR-able:
+ 0 - inactive
+ 1 - enqueued
+ 2 - callback
+ 4 - pending
+ 8 - migrate
+ 5 - function which created the timer
+ 6 - process name & pid which created the timer
+
+ line 2 fields:
+ 1 - absolute expiration time, range format (start - end)
+ 2 - relative expiration time, range format (start - end)
+
+ Lowres Timer Layout
+ -------------------
+ Low resolution timers are displayed one-per-line under
+ sections labeled 'active jiffie timers'. There is one such
+ section per cpu. A lowres timer has the following fields:
+
+ 1 - #jiffies remaining until timer expires
+ 2 - function to be called on expiration
+ 3 - data value to be given to the above function on
+ expiration
+ 4 - function which created this timer
+ 5 - name & pid of the process that created this timer
Joe,
Some comments and suggestions below.
On Wed, Nov 26, 2008 at 4:06 PM, Joe Korty <[email protected]> wrote:
> Document /proc/timer_list ABI.
>
> This documents all of /timer_list, including the extension
> adding jiffie timers, as proposed in the patch:
>
> [PATCH] Display active jiffie timers in /proc/timer_list, v2
>
> Signed-off-by: Joe Korty <[email protected]>
>
> Index: 2.6.28-rc6/Documentation/ABI/stable/procfs-timer_list
> ===================================================================
> --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> +++ 2.6.28-rc6/Documentation/ABI/stable/procfs-timer_list 2008-11-26 15:55:04.000000000 -0500
> @@ -0,0 +1,96 @@
> +What: /proc/timer_list
> +Date: November 2008
> +Contact: Ingo Molnar <[email protected]>
> + Thomas Gleixner <[email protected]>
> + Joe Korty <[email protected]>
> +Revision-Rate: Moderate
> +At-Revision: 0.5
> +Description:
> + /proc/timer_list displays most everything about every kind
> + of timer, and some things about time too.
> +
> + The contents of this file should be expected to change,
> + as the data displayed corresponds directly to various
> + kernel-internal data structures. For this reason, the first
> + line contains the file revision. It is the responsibility
> + of this file's maintainers to bump the revision each time a
> + kernel is released having incompatible changes in this file.
So, on my 2.6.25 system, I see v0.3. And I see that by 2.6.28-rc, we
have v0.4. It would be nice to have some explanation here of what the
x.y version number means in this context.
And where are the differences between versions (e.g., 0.3 and 0.4) documented?
> + Section Overview
> + ----------------
> + The file contains several somewhat independent sections.
> +
> + The first section contains a few lines of global info.
> + Examples: file version id, #clock types in the kernel,
IMO, it really helps readers when documentation is written in
something like natural language. "#clock types" may save you a few
moments of time, but "number of clock types" will probably be clearer
to yout (hopefully many) readers.
But: what does "clock types" refer to? The HRTIMER_MAX_CLOCK_BASES
line? If so, that's not obvious from this description.
> + #nsecs since boot.
See previous comment.
> +
> + The second section is organized per-cpu. Each cpu subsection
1,$s/cpu/CPU/g
> + in turn contains several sub-subsections which are, in order
> + of appearance:
> +
> + The contents of the data structures associated with each
> + clock (CLOCK_REALTIME, CLOCK_MONOTONIC, etc) on this cpu.
(How) can I work out whether clock 0 is CLOCK_REALTIME, etc?
> + Examples: base, index, resolution, get_timer, offset.
Ahh -- base looks like one of the changes between v0.3 and v0.4...
Would be nice to let the reader know this, perhaps?
Also, how about adding a line on what each of these fields represents.
> + Under each of these clocks is, in turn, a display of all
> + the active high resolution timers queued to that clock.
> + Example: all lines beginning with '#'.
> +
> + The contents of per-cpu timer data fields not associated
> + with a particular clock type (ie, shared by both clocks or
> + not associated with any clock). Examples: expires_next,
> + hres_active, nr_event, nohz_mode, all things idle_*,
> + tick_stopped, last_jiffies, next_jiffies.
How about adding a line on what each of these fields represents.
> +
> + A display of low resolution (ie, jiffie) timer wheel
> + data. Examples: base, running_timer, timer_jiffies.
At the risk of sounding repetitive... How about adding a line on what
each of these fields represents.
> + Also under this section is a display, one per line, of
> + each active jiffie timer queued to this cpu. Examples:
> + All lines under an 'active jiffie timers' section that
> + begin with a number.
> +
> + The third and final section describes each 'tick device'
> + known to the kernel. A tick device is a piece of hardware
> + capable of generating periodic and/or one shot interrupts
s/one shot/one-shot/
> + under software control, and thus is capable of generating
> + the interrupts needed to expire the various active timers at
> + their given expiration times. Examples: hpet, pit, lapic.
> +
> + Hires Timer Layout
> + ------------------
> + High resolution timers are displayed on lines that begin
s/High resolution/High-resolution/
> + with a '#' and always appear under one of the many sections
> + labeled 'active timers'. There is an 'active timers'
> + section for every cpu and every clock.
> +
> + The fields of a hrtimer, spread out over two lines, are:
> +
> + line 1 fields:
> + 1 - unique hrtimer index (#0, #1, #2, etc)
> + 2 - kernel address of the hrtimer data structure
> + in question
> + 3 - function to be called when timer expires
> + 4 - timer state (eg, S:01), avail states, OR-able:
> + 0 - inactive
> + 1 - enqueued
> + 2 - callback
> + 4 - pending
> + 8 - migrate
> + 5 - function which created the timer
> + 6 - process name & pid which created the timer
> +
> + line 2 fields:
> + 1 - absolute expiration time, range format (start - end)
> + 2 - relative expiration time, range format (start - end)
> +
> + Lowres Timer Layout
> + -------------------
> + Low resolution timers are displayed one-per-line under
s/Low resolution/Low-resolution/
> + sections labeled 'active jiffie timers'. There is one such
> + section per cpu. A lowres timer has the following fields:
> +
> + 1 - #jiffies remaining until timer expires
> + 2 - function to be called on expiration
> + 3 - data value to be given to the above function on
> + expiration
> + 4 - function which created this timer
> + 5 - name & pid of the process that created this timer
Cheers,
Michael
--
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
git://git.kernel.org/pub/scm/docs/man-pages/man-pages.git
man-pages online: http://www.kernel.org/doc/man-pages/online_pages.html
Found a bug? http://www.kernel.org/doc/man-pages/reporting_bugs.html
Document /proc/timer_list ABI, version 2.
This partially documents /timer_list, including the
proposed 'Version 0.5' extensions that add a jiffie timer
display.
v2 exists to address some of the concerns Michael Kerrisk
brought up. What was left out: I did not document old
versions of /timer_list, I did not document the meaning
of the x.y version numbering system (which only Ingo
can answer anyway), and I did not document fields of
secondary importance that already had adequate 'DocBook'
documentation in the kernel sources.
Signed-off-by: Joe Korty <[email protected]>
Index: 2.6.28-rc6/Documentation/ABI/stable/procfs-timer_list
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ 2.6.28-rc6/Documentation/ABI/stable/procfs-timer_list 2008-12-01 13:07:15.000000000 -0500
@@ -0,0 +1,129 @@
+What: /proc/timer_list
+Date: November 2008
+Contact: Ingo Molnar <[email protected]>
+ Thomas Gleixner <[email protected]>
+ Joe Korty <[email protected]>
+Revision-Rate: Moderate
+At-Revision: 0.5
+Description:
+ /proc/timer_list displays most everything about every kind
+ of timer, and some things about time too.
+
+ The contents of this file should be expected to change,
+ as the data displayed corresponds directly to various
+ kernel-internal data structures. For this reason, the first
+ line contains the file revision. It is the responsibility
+ of this file's maintainers to bump the revision each time a
+ kernel is released having incompatible changes in this file.
+
+ This document covers only the version of /proc/timer_list
+ located in the kernel sources to which it is attached.
+ Documentation for previous (and later) versions of
+ /proc/timer_list is to be found (if they exist) in the
+ kernel sources of those earlier (or later) kernels.
+
+ Section Overview
+ ----------------
+ The file contains several somewhat independent sections.
+
+ The first section contains a few lines of global info:
+ 1 - Timer List Version: File revision.
+ 2 - HRTIMER_MAX_CLOCK_BASES: number of clock types that
+ support high resolution timers.
+ 3 - now at x nsecs: number of nsecs since boot.
+
+ The second section is organized per-CPU. Each CPU subsection
+ in turn contains several sub-subsections which are, in order
+ of appearance:
+
+ The contents of the data structures associated with each
+ clock on this CPU:
+ 1 - clock ID: 0 == CLOCK_REALTIME, 1 == CLOCK_MONOTONIC
+ 2 - base: kernel address of this clock's
+ hrtimer_clock_base structure.
+ 3 - resolution: resolution of this clock.
+ 4 - get_time: name of kernel function used to fetch
+ time from this clock.
+ 5 - offset: difference, in nsecs, between this clock
+ and the reference clock for this clock.
+ Under each of these clocks is, in turn, a display of all
+ the active high resolution timers queued to that clock.
+ These are the lines beginning with '#' and are described
+ in detail later in this document.
+
+ The contents of per-CPU hrtimer data fields not
+ associated with a particular cpu clock (ie, shared by
+ both clocks or not associated with any clock). These
+ are: expires_next, hres_active, nr_event, nohz_mode, all
+ things idle_*, tick_stopped, last_jiffies, next_jiffies.
+ The above are field names from 'struct tick_sched' and
+ 'struct hrtimer_cpu_base', documentation for these may
+ be found in the kernel DocBook.
+
+ A display of low resolution (ie, jiffie) timer wheel
+ data. These are prefixed by the lines:
+ 1 - base: kernel virtual address of the timer wheel
+ data structure (struct tvec_base) for this cpu.
+ 2 - running timer: kernel virtual address of the
+ expired timer being processed, NULL if none.
+ 3 - timer_jiffies: what this wheel considers to
+ be the current time, will be == jiffies or
+ will lag it by a tick or two if it has not
+ caught up with the current time.
+ Also under this section is a display, one per line, of
+ each active jiffie timer queued to this CPU. These are
+ the lines under an 'active jiffie timers' section that
+ begin with a number.
+
+ The third and final section describes each 'tick device'
+ known to the kernel. A tick device is a piece of hardware
+ capable of generating periodic and/or one-shot interrupts
+ under software control, and thus is capable of generating
+ the interrupts needed to expire the various active timers
+ at their given expiration times. Examples of tick devices:
+ hpet, pit, lapic. All but the first two lines display
+ fields corresponding to structure elements from 'struct
+ clock_event_device', documentation for which can be found
+ in the kernel Docbook. The first two lines are:
+ 1 - mode: 0 == periodic timer, 1 == one-shot timer
+ 2 - is 'Per CPU device' or is 'Broadcast device'
+
+ Hires Timer Layout
+ ------------------
+ High-resolution timers are displayed on lines that begin
+ with a '#' and always appear under one of the many sections
+ labeled 'active timers'. There is an 'active timers'
+ section for every CPU and every clock.
+
+ The fields of a hrtimer, spread out over two lines, are:
+
+ line 1 fields:
+ 1 - unique hrtimer index (#0, #1, #2, etc)
+ 2 - kernel address of the hrtimer data structure
+ in question
+ 3 - function to be called when timer expires
+ 4 - timer state (eg, S:01), avail states, OR-able:
+ 0 - inactive
+ 1 - enqueued
+ 2 - callback
+ 4 - pending
+ 8 - migrate
+ 5 - function which created the timer
+ 6 - process name & pid which created the timer
+
+ line 2 fields:
+ 1 - absolute expiration time, range format (start - end)
+ 2 - relative expiration time, range format (start - end)
+
+ Lowres Timer Layout
+ -------------------
+ Low-resolution timers are displayed one-per-line under
+ sections labeled 'active jiffie timers'. There is one such
+ section per CPU. A lowres timer has the following fields:
+
+ 1 - number of jiffies remaining until timer expires
+ 2 - function to be called on expiration
+ 3 - data value to be given to the above function on
+ expiration
+ 4 - function which created this timer
+ 5 - name & pid of the process that created this timer
Joe Korty wrote:
> Document /proc/timer_list ABI, version 2.
>
> This partially documents /timer_list, including the
> proposed 'Version 0.5' extensions that add a jiffie timer
> display.
>
> v2 exists to address some of the concerns Michael Kerrisk
> brought up. What was left out: I did not document old
> versions of /timer_list, I did not document the meaning
> of the x.y version numbering system (which only Ingo
> can answer anyway), and I did not document fields of
> secondary importance that already had adequate 'DocBook'
> documentation in the kernel sources.
>
> Signed-off-by: Joe Korty <[email protected]>
>
> Index: 2.6.28-rc6/Documentation/ABI/stable/procfs-timer_list
> ===================================================================
> --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> +++ 2.6.28-rc6/Documentation/ABI/stable/procfs-timer_list 2008-12-01 13:07:15.000000000 -0500
> @@ -0,0 +1,129 @@
> +What: /proc/timer_list
> +Date: November 2008
> +Contact: Ingo Molnar <[email protected]>
> + Thomas Gleixner <[email protected]>
> + Joe Korty <[email protected]>
> +Revision-Rate: Moderate
> +At-Revision: 0.5
> +Description:
> + /proc/timer_list displays most everything about every kind
> + of timer, and some things about time too.
> +
> + The contents of this file should be expected to change,
> + as the data displayed corresponds directly to various
> + kernel-internal data structures. For this reason, the first
> + line contains the file revision. It is the responsibility
> + of this file's maintainers to bump the revision each time a
> + kernel is released having incompatible changes in this file.
> +
> + This document covers only the version of /proc/timer_list
> + located in the kernel sources to which it is attached.
> + Documentation for previous (and later) versions of
> + /proc/timer_list is to be found (if they exist) in the
> + kernel sources of those earlier (or later) kernels.
> +
> + Section Overview
> + ----------------
> + The file contains several somewhat independent sections.
> +
> + The first section contains a few lines of global info:
> + 1 - Timer List Version: File revision.
> + 2 - HRTIMER_MAX_CLOCK_BASES: number of clock types that
> + support high resolution timers.
> + 3 - now at x nsecs: number of nsecs since boot.
> +
> + The second section is organized per-CPU. Each CPU subsection
> + in turn contains several sub-subsections which are, in order
> + of appearance:
> +
> + The contents of the data structures associated with each
> + clock on this CPU:
> + 1 - clock ID: 0 == CLOCK_REALTIME, 1 == CLOCK_MONOTONIC
> + 2 - base: kernel address of this clock's
> + hrtimer_clock_base structure.
> + 3 - resolution: resolution of this clock.
> + 4 - get_time: name of kernel function used to fetch
> + time from this clock.
> + 5 - offset: difference, in nsecs, between this clock
> + and the reference clock for this clock.
> + Under each of these clocks is, in turn, a display of all
> + the active high resolution timers queued to that clock.
> + These are the lines beginning with '#' and are described
> + in detail later in this document.
Are we supposed to be able to see lines beginning with '#' in this text file,
or only in /proc/timer_list ?
> +
> + The contents of per-CPU hrtimer data fields not
> + associated with a particular cpu clock (ie, shared by
Please use "CPU" consistenly (instead of "cpu").
> + both clocks or not associated with any clock). These
> + are: expires_next, hres_active, nr_event, nohz_mode, all
> + things idle_*, tick_stopped, last_jiffies, next_jiffies.
> + The above are field names from 'struct tick_sched' and
> + 'struct hrtimer_cpu_base', documentation for these may
'struct hrtimer_cpu_base'; documentation for these may
> + be found in the kernel DocBook.
> +
> + A display of low resolution (ie, jiffie) timer wheel
> + data. These are prefixed by the lines:
> + 1 - base: kernel virtual address of the timer wheel
> + data structure (struct tvec_base) for this cpu.
> + 2 - running timer: kernel virtual address of the
> + expired timer being processed, NULL if none.
> + 3 - timer_jiffies: what this wheel considers to
> + be the current time, will be == jiffies or
> + will lag it by a tick or two if it has not
> + caught up with the current time.
> + Also under this section is a display, one per line, of
> + each active jiffie timer queued to this CPU. These are
> + the lines under an 'active jiffie timers' section that
> + begin with a number.
> +
> + The third and final section describes each 'tick device'
> + known to the kernel. A tick device is a piece of hardware
> + capable of generating periodic and/or one-shot interrupts
> + under software control, and thus is capable of generating
> + the interrupts needed to expire the various active timers
> + at their given expiration times. Examples of tick devices:
> + hpet, pit, lapic. All but the first two lines display
> + fields corresponding to structure elements from 'struct
> + clock_event_device', documentation for which can be found
> + in the kernel Docbook. The first two lines are:
> + 1 - mode: 0 == periodic timer, 1 == one-shot timer
> + 2 - is 'Per CPU device' or is 'Broadcast device'
> +
> + Hires Timer Layout
> + ------------------
> + High-resolution timers are displayed on lines that begin
> + with a '#' and always appear under one of the many sections
> + labeled 'active timers'. There is an 'active timers'
> + section for every CPU and every clock.
> +
> + The fields of a hrtimer, spread out over two lines, are:
an hrtimer,
> +
> + line 1 fields:
> + 1 - unique hrtimer index (#0, #1, #2, etc)
> + 2 - kernel address of the hrtimer data structure
> + in question
> + 3 - function to be called when timer expires
> + 4 - timer state (eg, S:01), avail states, OR-able:
> + 0 - inactive
> + 1 - enqueued
> + 2 - callback
> + 4 - pending
> + 8 - migrate
> + 5 - function which created the timer
> + 6 - process name & pid which created the timer
> +
> + line 2 fields:
> + 1 - absolute expiration time, range format (start - end)
> + 2 - relative expiration time, range format (start - end)
> +
> + Lowres Timer Layout
> + -------------------
> + Low-resolution timers are displayed one-per-line under
> + sections labeled 'active jiffie timers'. There is one such
> + section per CPU. A lowres timer has the following fields:
> +
> + 1 - number of jiffies remaining until timer expires
> + 2 - function to be called on expiration
> + 3 - data value to be given to the above function on
> + expiration
> + 4 - function which created this timer
> + 5 - name & pid of the process that created this timer
Thanks,
~Randy
Hi Randy,
Thanks for taking the time to review this. I get the
impression, though, that the corresponding patch that
creates the 0.5 version of /proc/timer_list is not going
to be accepted. Perhaps I should strip down this ABI
patch to document just the existing 0.4 /proc/timer_list.
Regards,
Joe