2005-01-19 00:07:08

by Tony Lindgren

[permalink] [raw]
Subject: [PATCH] dynamic tick patch

diff -Nru a/arch/i386/Kconfig b/arch/i386/Kconfig
--- a/arch/i386/Kconfig 2005-01-18 15:50:17 -08:00
+++ b/arch/i386/Kconfig 2005-01-18 15:50:17 -08:00
@@ -452,6 +452,14 @@
bool "Provide RTC interrupt"
depends on HPET_TIMER && RTC=y

+config NO_IDLE_HZ
+ bool "Dynamic Tick Timer - Skip timer ticks during idle"
+ help
+ This option enables support for skipping timer ticks when the
+ processor is idle. During system load, timer is continuous.
+ This option saves power, as it allows the system to stay in
+ idle mode longer.
+
config SMP
bool "Symmetric multi-processing support"
---help---
diff -Nru a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
--- a/arch/i386/kernel/irq.c 2005-01-18 15:50:17 -08:00
+++ b/arch/i386/kernel/irq.c 2005-01-18 15:50:17 -08:00
@@ -15,6 +15,7 @@
#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <linux/dyn-tick-timer.h>

#ifndef CONFIG_X86_LOCAL_APIC
/*
@@ -100,6 +101,11 @@
} else
#endif
__do_IRQ(irq, regs);
+
+#ifdef CONFIG_NO_IDLE_HZ
+ if (dyn_tick->state & (DYN_TICK_ENABLED | DYN_TICK_SKIPPING) && irq != 0)
+ dyn_tick->interrupt(irq, NULL, regs);
+#endif

irq_exit();

diff -Nru a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
--- a/arch/i386/kernel/time.c 2005-01-18 15:50:17 -08:00
+++ b/arch/i386/kernel/time.c 2005-01-18 15:50:17 -08:00
@@ -46,6 +46,7 @@
#include <linux/bcd.h>
#include <linux/efi.h>
#include <linux/mca.h>
+#include <linux/dyn-tick-timer.h>

#include <asm/io.h>
#include <asm/smp.h>
@@ -301,6 +302,49 @@
return IRQ_HANDLED;
}

+#ifdef CONFIG_NO_IDLE_HZ
+static unsigned long long last_tick;
+void reprogram_pit_tick(int jiffies_to_skip);
+
+#ifdef DEBUG
+#define dbg_dyn_tick_irq() {if (skipped < dyn_tick->skip) \
+ printk("%i/%i ", skipped, dyn_tick->skip);}
+#else
+#define dbg_dyn_tick_irq() {}
+#endif
+
+/*
+ * This interrupt handler updates the time based on number of jiffies skipped
+ * It would be somewhat more optimized to have a customa handler in each timer
+ * using hardware ticks instead of nanoseconds. Note that CONFIG_NO_IDLE_HZ
+ * currently disables timer fallback on skipped jiffies.
+ */
+irqreturn_t dyn_tick_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ unsigned long flags;
+ volatile unsigned long long now;
+ unsigned int skipped = 0;
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ now = cur_timer->get_hw_time();
+ while (now - last_tick >= NS_TICK_LEN) {
+ last_tick += NS_TICK_LEN;
+ cur_timer->mark_offset();
+ do_timer_interrupt(irq, NULL, regs);
+ skipped++;
+ }
+ if (dyn_tick->state & (DYN_TICK_ENABLED | DYN_TICK_SKIPPING)) {
+ dbg_dyn_tick_irq();
+ dyn_tick->skip = 1;
+ reprogram_pit_tick(dyn_tick->skip);
+ dyn_tick->state = DYN_TICK_ENABLED;
+ }
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ return IRQ_HANDLED;
+}
+#endif
+
/* not static: needed by APM */
unsigned long get_cmos_time(void)
{
@@ -396,6 +440,53 @@
}
#endif

+#ifdef CONFIG_NO_IDLE_HZ
+static struct dyn_tick_timer arch_ltt;
+
+/*
+ * Reprograms the next timer interrupt
+ * PIT timer reprogramming code taken from APM code.
+ * Note that PIT timer is a 16-bit timer, which allows max
+ * skip of only few seconds.
+ */
+void reprogram_pit_tick(int jiffies_to_skip)
+{
+ int skip;
+ extern spinlock_t i8253_lock;
+ unsigned long flags;
+
+ skip = jiffies_to_skip * LATCH;
+ if (skip > 0xffff)
+ skip = 0xffff;
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(skip & 0xff, PIT_CH0); /* LSB */
+ outb(skip >> 8, PIT_CH0); /* MSB */
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+extern void replace_timer_interrupt(void * new_handler);
+
+static int dyn_tick_late_init(void)
+{
+ unsigned long flags;
+
+ if (!cur_timer->get_hw_time)
+ return -ENODEV;
+ write_seqlock_irqsave(&xtime_lock, flags);
+ last_tick = cur_timer->get_hw_time();
+ dyn_tick->skip = 1;
+ dyn_tick->state = DYN_TICK_ENABLED;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+ if (cur_timer->late_init)
+ cur_timer->late_init();
+ dyn_tick->interrupt = dyn_tick_timer_interrupt;
+ replace_timer_interrupt(dyn_tick->interrupt);
+
+ return 0;
+}
+#endif
+
void __init time_init(void)
{
#ifdef CONFIG_HPET_TIMER
@@ -416,5 +507,9 @@
cur_timer = select_timer();
printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);

+#ifdef CONFIG_NO_IDLE_HZ
+ arch_ltt.init = dyn_tick_late_init;
+ dyn_tick_register(&arch_ltt);
+#endif
time_init_hook();
}
diff -Nru a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
--- a/arch/i386/kernel/timers/timer_pm.c 2005-01-18 15:50:17 -08:00
+++ b/arch/i386/kernel/timers/timer_pm.c 2005-01-18 15:50:17 -08:00
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/init.h>
+#include <linux/dyn-tick-timer.h>
#include <asm/types.h>
#include <asm/timer.h>
#include <asm/smp.h>
@@ -168,6 +169,7 @@
monotonic_base += delta * NSEC_PER_USEC;
write_sequnlock(&monotonic_lock);

+#ifndef CONFIG_NO_IDLE_HZ
/* convert to ticks */
delta += offset_delay;
lost = delta / (USEC_PER_SEC / HZ);
@@ -184,6 +186,7 @@
first_run = 0;
offset_delay = 0;
}
+#endif
}


@@ -238,6 +241,25 @@
return (unsigned long) offset_delay + cyc2us(delta);
}

+static unsigned long long ns_time;
+
+static unsigned long long get_hw_time_pmtmr(void)
+{
+ u32 now, delta;
+ static unsigned int last_cycles;
+ now = read_pmtmr();
+ delta = (now - last_cycles) & ACPI_PM_MASK;
+ last_cycles = now;
+ ns_time += cyc2us(delta) * NSEC_PER_USEC;
+ return ns_time;
+}
+
+static void late_init_pmtmr(void)
+{
+ ns_time = monotonic_clock_pmtmr();
+}
+
+extern irqreturn_t pmtmr_interrupt(int irq, void *dev_id, struct pt_regs *regs);

/* acpi timer_opts struct */
static struct timer_opts timer_pmtmr = {
@@ -245,7 +267,9 @@
.mark_offset = mark_offset_pmtmr,
.get_offset = get_offset_pmtmr,
.monotonic_clock = monotonic_clock_pmtmr,
+ .get_hw_time = get_hw_time_pmtmr,
.delay = delay_pmtmr,
+ .late_init = late_init_pmtmr,
};

struct init_timer_opts __initdata timer_pmtmr_init = {
diff -Nru a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
--- a/arch/i386/kernel/timers/timer_tsc.c 2005-01-18 15:50:17 -08:00
+++ b/arch/i386/kernel/timers/timer_tsc.c 2005-01-18 15:50:17 -08:00
@@ -112,6 +112,15 @@
return delay_at_last_interrupt + edx;
}

+static unsigned long get_hw_time_tsc(void)
+{
+ register unsigned long eax, edx;
+
+ unsigned long long hw_time;
+ rdtscll(hw_time);
+ return cycles_2_ns(hw_time);
+}
+
static unsigned long long monotonic_clock_tsc(void)
{
unsigned long long last_offset, this_offset, base;
@@ -348,6 +357,7 @@

rdtsc(last_tsc_low, last_tsc_high);

+#ifndef CONFIG_NO_IDLE_HZ
spin_lock(&i8253_lock);
outb_p(0x00, PIT_MODE); /* latch the count ASAP */

@@ -415,14 +425,18 @@
cpufreq_delayed_get();
} else
lost_count = 0;
+#endif
+
/* update the monotonic base value */
this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
monotonic_base += cycles_2_ns(this_offset - last_offset);
write_sequnlock(&monotonic_lock);

+#ifndef CONFIG_NO_IDLE_HZ
/* calculate delay_at_last_interrupt */
count = ((LATCH-1) - count) * TICK_SIZE;
delay_at_last_interrupt = (count + LATCH/2) / LATCH;
+#endif

/* catch corner case where tick rollover occured
* between tsc and pit reads (as noted when
@@ -551,6 +565,7 @@
.mark_offset = mark_offset_tsc,
.get_offset = get_offset_tsc,
.monotonic_clock = monotonic_clock_tsc,
+ .get_hw_time = get_hw_time_tsc,
.delay = delay_tsc,
};

diff -Nru a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c
--- a/arch/i386/mach-default/setup.c 2005-01-18 15:50:17 -08:00
+++ b/arch/i386/mach-default/setup.c 2005-01-18 15:50:17 -08:00
@@ -85,6 +85,22 @@
setup_irq(0, &irq0);
}

+/**
+ * replace_timer_interrupt - allow replacing timer interrupt handler
+ *
+ * Description:
+ * Can be used to replace timer interrupt handler with a more optimized
+ * handler. Used for enabling and disabling of CONFIG_NO_IDLE_HZ.
+ */
+void replace_timer_interrupt(void * new_handler)
+{
+ unsigned long flags;
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ irq0.handler = new_handler;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+}
+
#ifdef CONFIG_MCA
/**
* mca_nmi_hook - hook into MCA specific NMI chain
diff -Nru a/include/asm-i386/timer.h b/include/asm-i386/timer.h
--- a/include/asm-i386/timer.h 2005-01-18 15:50:17 -08:00
+++ b/include/asm-i386/timer.h 2005-01-18 15:50:17 -08:00
@@ -1,6 +1,7 @@
#ifndef _ASMi386_TIMER_H
#define _ASMi386_TIMER_H
#include <linux/init.h>
+#include <linux/interrupt.h>

/**
* struct timer_ops - used to define a timer source
@@ -21,7 +22,9 @@
void (*mark_offset)(void);
unsigned long (*get_offset)(void);
unsigned long long (*monotonic_clock)(void);
+ unsigned long long (*get_hw_time)(void);
void (*delay)(unsigned long);
+ void (*late_init)(void);
};

struct init_timer_opts {
diff -Nru a/include/linux/dyn-tick-timer.h b/include/linux/dyn-tick-timer.h
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/include/linux/dyn-tick-timer.h 2005-01-18 15:50:17 -08:00
@@ -0,0 +1,55 @@
+/*
+ * linux/include/linux/dyn-tick-timer.h
+ *
+ * Copyright (C) 2004 Nokia Corporation
+ * Written by Tony Lindgen <[email protected]> and
+ * Tuukka Tikkanen <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/interrupt.h>
+
+#define DYN_TICK_SKIPPING (1 << 2)
+#define DYN_TICK_RUNNING (1 << 1)
+#define DYN_TICK_ENABLED (1 << 0)
+
+struct dyn_tick_state {
+ unsigned int state; /* Current state */
+ unsigned long idle_mask; /* Idle processor mask */
+ unsigned int skip; /* Ticks to skip */
+ unsigned long irq_skip_mask; /* Do not update time from these irqs */
+ irqreturn_t (*interrupt)(int, void *, struct pt_regs *);
+};
+
+/* REVISIT: Add functions to enable/disable dyn-tick on the fly */
+struct dyn_tick_timer {
+ int (*init) (void);
+};
+
+extern struct dyn_tick_state * dyn_tick;
+extern struct dyn_tick_timer * ltt;
+extern void dyn_tick_register(struct dyn_tick_timer * new_timer);
+
+#define NS_TICK_LEN ((1 * 1000000000)/HZ)
+
+/* On x86, MAX_SKIP_JIFFIES is limited by the PIT timer length */
+#define MAX_SKIP_JIFFIES (0xffff/LATCH)
diff -Nru a/kernel/Makefile b/kernel/Makefile
--- a/kernel/Makefile 2005-01-18 15:50:17 -08:00
+++ b/kernel/Makefile 2005-01-18 15:50:17 -08:00
@@ -26,6 +26,7 @@
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_SYSFS) += ksysfs.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
+obj-$(CONFIG_NO_IDLE_HZ) += dyn-tick-timer.o

ifneq ($(CONFIG_IA64),y)
# According to Alan Modra <[email protected]>, the -fno-omit-frame-pointer is
diff -Nru a/kernel/dyn-tick-timer.c b/kernel/dyn-tick-timer.c
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/kernel/dyn-tick-timer.c 2005-01-18 15:50:17 -08:00
@@ -0,0 +1,121 @@
+/*
+ * linux/kernel/dyn-tick-timer.c
+ *
+ * Beginnings of generic dynamic tick timer support
+ *
+ * Copyright (C) 2004 Nokia Corporation
+ * Written by Tony Lindgen <[email protected]> and
+ * Tuukka Tikkanen <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ * TODO:
+ * - Add functions for enabling/disabling dyn-tick on the fly
+ * - Generalize to work with ARM sys_timer
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/cpumask.h>
+#include <linux/pm.h>
+#include <linux/dyn-tick-timer.h>
+#include <asm/io.h>
+
+#include "io_ports.h"
+
+#define VERSION 050109-4
+
+struct dyn_tick_state dyn_tick_state;
+struct dyn_tick_state * dyn_tick = &dyn_tick_state;
+struct dyn_tick_timer dyn_tick_timer;
+struct dyn_tick_timer * ltt = &dyn_tick_timer;
+static void (*orig_idle) (void) = 0;
+extern void reprogram_pit_tick(int jiffies_to_skip);
+static cpumask_t dyn_cpu_map;
+
+/*
+ * We want to have all processors idle before reprogramming the next
+ * timer interrupt. Note that we must maintain the state for dynamic tick,
+ * otherwise the idle loop could be reprogramming the timer continuously
+ * further into the future, and the timer interrupt would never happen.
+ */
+static void dyn_tick_idle(void)
+{
+ int cpu;
+ unsigned long flags;
+
+ if (!(dyn_tick->state & DYN_TICK_ENABLED))
+ goto out;
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ cpu = smp_processor_id();
+ cpu_set(cpu, dyn_cpu_map);
+ if (!(dyn_tick->state & DYN_TICK_SKIPPING) && cpus_full(dyn_cpu_map)) {
+ dyn_tick->skip = next_timer_interrupt();
+ if (dyn_tick->skip > MAX_SKIP_JIFFIES)
+ dyn_tick->skip = MAX_SKIP_JIFFIES;
+ reprogram_pit_tick(dyn_tick->skip);
+ dyn_tick->state |= DYN_TICK_SKIPPING;
+ cpus_clear(dyn_cpu_map);
+ }
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+out:
+ if (orig_idle)
+ orig_idle();
+ else
+ safe_halt();
+}
+
+void __init dyn_tick_register(struct dyn_tick_timer * new_timer)
+{
+ ltt->init = new_timer->init;
+ printk(KERN_INFO "dyn-tick: Registering dynamic tick timer\n");
+}
+
+/*
+ * We need to initialize dynamic tick after calibrate delay
+ */
+static int __init dyn_tick_init(void)
+{
+ int ret = 0;
+
+ printk(KERN_INFO "dyn-tick: Enabling dynamic tick timer\n");
+ if (ltt->init) {
+ ret = ltt->init();
+ if (ret != 0) {
+ printk(KERN_WARNING "dyn-tick: Cannot use this timer\n");
+ goto out;
+ }
+ }
+ orig_idle = pm_idle;
+ pm_idle = dyn_tick_idle;
+ cpu_idle_wait();
+ printk(KERN_INFO "dyn-tick: Timer using dynamic tick\n");
+
+ out:
+ return ret;
+}
+late_initcall(dyn_tick_init);


Attachments:
(No filename) (1.59 kB)
patch-dynamic-tick-2.6.11-rc1-050118-1 (15.58 kB)
Download all attachments

2005-01-19 00:23:11

by Lee Revell

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

On Tue, 2005-01-18 at 16:05 -0800, Tony Lindgren wrote:
> Currently supported timers are TSC and ACPI PM timer. Other
> timers should be easy to add. Both TSC and ACPI PM timer
> rely on the PIT timer for interrupts, so the maximum skip
> inbetween ticks is only few seconds at most.
>

An interesting hack if your sound cards interval timer is supported and
can interrupt at high enough resolution (currently ymfpci, emu10k1 and
some ISA cards) would be to use it as the system timer. Who knows, it
might even be useful for games, music and AV stuff that clocks off the
sound card anyway. It would probably be easy, ALSA has a very clean
timer API.

Lee

2005-01-19 01:05:13

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Lee Revell <[email protected]> [050118 16:22]:
> On Tue, 2005-01-18 at 16:05 -0800, Tony Lindgren wrote:
> > Currently supported timers are TSC and ACPI PM timer. Other
> > timers should be easy to add. Both TSC and ACPI PM timer
> > rely on the PIT timer for interrupts, so the maximum skip
> > inbetween ticks is only few seconds at most.
> >
>
> An interesting hack if your sound cards interval timer is supported and
> can interrupt at high enough resolution (currently ymfpci, emu10k1 and
> some ISA cards) would be to use it as the system timer. Who knows, it
> might even be useful for games, music and AV stuff that clocks off the
> sound card anyway. It would probably be easy, ALSA has a very clean
> timer API.

Hmmm, that never occured to me, but sounds interesting. I wonder if
the patch already removes some latencies, as the sound card interrupt
triggers the timer interrupt as well?

Tony

2005-01-19 04:25:15

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

On Tue, 2005-01-18 at 16:05 -0800, Tony Lindgren wrote:
> Hi all,
>
> Attached is the dynamic tick patch for x86 to play with
> as I promised in few threads earlier on this list.[1][2]
>
> The dynamic tick patch does following:
>
> .../...

Nice, that's exactly what I want on ppc to allow the laptops to have the
CPU "nap" longer when idle ! I'll look into adding ppc support to your
patch soon.

BTW. Is it possible, when entering the "idle" loop, to quickly know an
estimate of when the next tick shoud actually kick in ?

Also, looking at the patch, I think it mixes a bit too much of x86
things with generic stuffs... like pm_idle an x86 thing.

Other implementation details comments: Do you need all those globals to
be exported ? And give them better names than "ltt", that makes using of
system.map quite annoying ;)

I don't understand your comment about "we must have all processors idle"
as well...

So while the whole thing is interesting, I dislike the actual
kernel/dyn-tick-timer.c implementation, which should be moved to arch
stuff at this point imho.

Ben.


2005-01-19 05:07:41

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Benjamin Herrenschmidt <[email protected]> [050118 20:22]:
> On Tue, 2005-01-18 at 16:05 -0800, Tony Lindgren wrote:
> > Hi all,
> >
> > Attached is the dynamic tick patch for x86 to play with
> > as I promised in few threads earlier on this list.[1][2]
> >
> > The dynamic tick patch does following:
> >
> > .../...
>
> Nice, that's exactly what I want on ppc to allow the laptops to have the
> CPU "nap" longer when idle ! I'll look into adding ppc support to your
> patch soon.

Great!

> BTW. Is it possible, when entering the "idle" loop, to quickly know an
> estimate of when the next tick shoud actually kick in ?

Yes, see next_timer_interrupt() for that. The interrupt loop should
be pretty much the same on all archs. Then calling the timer
interrupt from other interrupts removes any latency issues with the
timer. But that's pretty much all the patch does.

> Also, looking at the patch, I think it mixes a bit too much of x86
> things with generic stuffs... like pm_idle an x86 thing.

Yes, the idle module should probably be in drivers/acpi or something
to allow loading other custom PM modules.

> Other implementation details comments: Do you need all those globals to
> be exported ? And give them better names than "ltt", that makes using of
> system.map quite annoying ;)

Oops, ltt, is probably left-over from low-tick-timer that I used
first as a name... I'll fix that :)

> I don't understand your comment about "we must have all processors idle"
> as well...

Hmmm, maybe it's not needed any longer? Have to try it out. I had
some issues with SMP when I started doing the patch.

> So while the whole thing is interesting, I dislike the actual
> kernel/dyn-tick-timer.c implementation, which should be moved to arch
> stuff at this point imho.

Yeah, there's not much shared code yet, when I started I expected to
share more code between ARM and x86. But the timer framework is
quite arch specific. So far only registering and /sys control to
enable seems common. Maybe some inline functions too, but a common
header might be enough.

Regards,

Tony

2005-01-19 05:23:04

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Tony Lindgren <[email protected]> [050118 21:08]:
> * Benjamin Herrenschmidt <[email protected]> [050118 20:22]:
> >
> > BTW. Is it possible, when entering the "idle" loop, to quickly know an
> > estimate of when the next tick shoud actually kick in ?
>
> Yes, see next_timer_interrupt() for that.

Hmmm, or maybe you mean _quick_estimate_ instead of
next_timer_interrupt()?

I don't think there's any faster way to estimate the skippable ticks
without going through the list like next_timer_interrupt already does.
Does anybody have any ideas for that?

Tony

2005-01-19 05:30:00

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Hrm... reading more of the patch & Martin's previous work, I'm not sure
I like the idea too much in the end... The main problem is that you are
just "replaying" the ticks afterward, which I see as a problem for
things like sched_clock() which returns the real current time, no ?

I'll toy a bit with my own implementation directly using Martin's work
and see what kind of improvement I really get on ppc laptops.

Ben.


2005-01-19 05:45:46

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

On Tue, 2005-01-18 at 21:21 -0800, Tony Lindgren wrote:
> * Tony Lindgren <[email protected]> [050118 21:08]:
> > * Benjamin Herrenschmidt <[email protected]> [050118 20:22]:
> > >
> > > BTW. Is it possible, when entering the "idle" loop, to quickly know an
> > > estimate of when the next tick shoud actually kick in ?
> >
> > Yes, see next_timer_interrupt() for that.
>
> Hmmm, or maybe you mean _quick_estimate_ instead of
> next_timer_interrupt()?
>
> I don't think there's any faster way to estimate the skippable ticks
> without going through the list like next_timer_interrupt already does.
> Does anybody have any ideas for that?

No, that's fine, we already have to call it before entering the PM
state, so I'll just pass it along and, at the low level, decide how
deep to sleep based on that.

I think I should also add some stats on the amount of interrupts, since
it would be fairly inefficient to keep entering deep PM state on a
machine with typically little timer interrupts but high HW interrupt
(Rusty mentions case of packet forwarding routers or that kind of thing)

Ben.


2005-01-19 06:26:38

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Benjamin Herrenschmidt <[email protected]> [050118 21:45]:
> On Tue, 2005-01-18 at 21:21 -0800, Tony Lindgren wrote:
> > * Tony Lindgren <[email protected]> [050118 21:08]:
> > > * Benjamin Herrenschmidt <[email protected]> [050118 20:22]:
> > > >
> > > > BTW. Is it possible, when entering the "idle" loop, to quickly know an
> > > > estimate of when the next tick shoud actually kick in ?
> > >
> > > Yes, see next_timer_interrupt() for that.
> >
> > Hmmm, or maybe you mean _quick_estimate_ instead of
> > next_timer_interrupt()?
> >
> > I don't think there's any faster way to estimate the skippable ticks
> > without going through the list like next_timer_interrupt already does.
> > Does anybody have any ideas for that?
>
> No, that's fine, we already have to call it before entering the PM
> state, so I'll just pass it along and, at the low level, decide how
> deep to sleep based on that.
>
> I think I should also add some stats on the amount of interrupts, since
> it would be fairly inefficient to keep entering deep PM state on a
> machine with typically little timer interrupts but high HW interrupt
> (Rusty mentions case of packet forwarding routers or that kind of thing)

Maybe some HW timer interrupt mask could be used? Also it would be
nice to check for file IO.

Tony

2005-01-19 06:38:39

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Benjamin Herrenschmidt <[email protected]> [050118 21:29]:
> Hrm... reading more of the patch & Martin's previous work, I'm not sure
> I like the idea too much in the end... The main problem is that you are
> just "replaying" the ticks afterward, which I see as a problem for
> things like sched_clock() which returns the real current time, no ?

Well so far I haven't found problems with time. Since sched_clock()
returns the hw time, how does it cause a problem? Do you have some
example in mind? Maybe there's something I haven't even considered
yet.

> I'll toy a bit with my own implementation directly using Martin's work
> and see what kind of improvement I really get on ppc laptops.

I'd be interested in what you come up with :)

Tony

2005-01-19 07:09:40

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

On Tue, 2005-01-18 at 22:37 -0800, Tony Lindgren wrote:
> * Benjamin Herrenschmidt <[email protected]> [050118 21:29]:
> > Hrm... reading more of the patch & Martin's previous work, I'm not sure
> > I like the idea too much in the end... The main problem is that you are
> > just "replaying" the ticks afterward, which I see as a problem for
> > things like sched_clock() which returns the real current time, no ?
>
> Well so far I haven't found problems with time. Since sched_clock()
> returns the hw time, how does it cause a problem? Do you have some
> example in mind? Maybe there's something I haven't even considered
> yet.
>
> > I'll toy a bit with my own implementation directly using Martin's work
> > and see what kind of improvement I really get on ppc laptops.
>
> I'd be interested in what you come up with :)

Well, I did a very simple implementation entirely local to
arch/ppc/kernel, that basically calls timer_interrupt on every do_IRQ, I
don't change timer_interrupt (our implementation already knows how to
"catch up" already if missed ticks and knows how to deal beeing called
to early as well). Then, when going to idle loop, I "override" the
decrementer interrupt setting to be further in the future if
next_timer_interrupt() returns more than 1.

Strangely, I got not measurable improvement on power consumption despite
putting the CPU longer into NAP mode. Note that this may be very
different with earlier (G3 notably) CPUs, since G3 users repeately
reported me havign a significant loss in battery life with HZ=1000

Later, I'll do some stats to check how long I really slept, and see if
it's worth, when I predict a long sleep, flushing the cache and going
into a deeper PM mode where cache coherency is disabled too.

Ben.


2005-01-19 07:31:45

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Benjamin Herrenschmidt <[email protected]> [050118 23:09]:
> On Tue, 2005-01-18 at 22:37 -0800, Tony Lindgren wrote:
> > * Benjamin Herrenschmidt <[email protected]> [050118 21:29]:
> > > Hrm... reading more of the patch & Martin's previous work, I'm not sure
> > > I like the idea too much in the end... The main problem is that you are
> > > just "replaying" the ticks afterward, which I see as a problem for
> > > things like sched_clock() which returns the real current time, no ?
> >
> > Well so far I haven't found problems with time. Since sched_clock()
> > returns the hw time, how does it cause a problem? Do you have some
> > example in mind? Maybe there's something I haven't even considered
> > yet.
> >
> > > I'll toy a bit with my own implementation directly using Martin's work
> > > and see what kind of improvement I really get on ppc laptops.
> >
> > I'd be interested in what you come up with :)
>
> Well, I did a very simple implementation entirely local to
> arch/ppc/kernel, that basically calls timer_interrupt on every do_IRQ, I
> don't change timer_interrupt (our implementation already knows how to
> "catch up" already if missed ticks and knows how to deal beeing called
> to early as well). Then, when going to idle loop, I "override" the
> decrementer interrupt setting to be further in the future if
> next_timer_interrupt() returns more than 1.

That sounds interesting, I'll check it out. Do you already have it
available somewhere?

BTW, It would be nice to be able to just skip ticks, maybe Martin's
cputime patch allows that.

> Strangely, I got not measurable improvement on power consumption despite
> putting the CPU longer into NAP mode. Note that this may be very
> different with earlier (G3 notably) CPUs, since G3 users repeately
> reported me havign a significant loss in battery life with HZ=1000

Yeah, it could be that NAP mode wakes up too early. I haven't looked
much what happens on my machine with ACPI, but I have feeling C2 idle
mode wakes up before the next timer interrupt.

It could also be that the difference between idling the cpu more
is minimal. But if there's a difference with HZ=1000, it sounds like
idling the cpu longer should make a difference. Unless of course
calling next_timer_interrupt() continuously eats away the gain :)

> Later, I'll do some stats to check how long I really slept, and see if
> it's worth, when I predict a long sleep, flushing the cache and going
> into a deeper PM mode where cache coherency is disabled too.

I think that's where there should be some real power savings showing up.

Regards,

Tony

2005-01-19 09:44:19

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Hi!

> Attached is the dynamic tick patch for x86 to play with
> as I promised in few threads earlier on this list.[1][2]
>
> The dynamic tick patch does following:
>
> - Separates timer interrupts from updating system time
>
> - Allows updating time from other interrupts in addition
> to timer interrupt
>
> - Makes timer tick dynamic
>
> - Allows power management modules to take advantage of the
> idle time inbetween skipped ticks
>
> - Might help with the whistling caps?
>
> The patch should be non-intrusive where possible. The system
> boots with the regular timers, and then later on switches on
> the dynamic tick if the selected driver implements get_hw_time()
> function.
>
> Currently supported timers are TSC and ACPI PM timer. Other
> timers should be easy to add. Both TSC and ACPI PM timer
> rely on the PIT timer for interrupts, so the maximum skip
> inbetween ticks is only few seconds at most.
>
> Please note that this patch alone does not help much with
> power savings. More work is needed in that area to make the
> system take advantage of the idle time inbetween the skipped
> ticks.

Well, having HZ=100 instead of HZ=1000 has measurable benefits on
power consumption. This should be at least as good, no?
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2005-01-19 09:46:07

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Hi!

> > No, that's fine, we already have to call it before entering the PM
> > state, so I'll just pass it along and, at the low level, decide how
> > deep to sleep based on that.
> >
> > I think I should also add some stats on the amount of interrupts, since
> > it would be fairly inefficient to keep entering deep PM state on a
> > machine with typically little timer interrupts but high HW interrupt
> > (Rusty mentions case of packet forwarding routers or that kind of thing)
>
> Maybe some HW timer interrupt mask could be used? Also it would be
> nice to check for file IO.

Well, you could mask those interrupts, but it would ruin your
packet-forwarding performance, so you probably do not want to do
that...
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2005-01-19 11:32:44

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Hi!

> As this patch is related to the VST/High-Res timers, there
> are probably various things that can be merged. I have not
> yet looked at what all could be merged.
>
> I'd appreciate some comments and testing!

Good news is that it does seem to reduce number of interrupts. Bad
news is that time now runs faster (like "sleep 10" finishes in ~5
seconds) and that I could not measure any difference in power
consumption.

Pavel

root@amd:~# date; cat /proc/interrupts ; sleep 10; date; cat
/proc/interrupts
Wed Jan 19 12:30:46 CET 2005
CPU0
0: 18136 IO-APIC-edge timer
1: 557 IO-APIC-edge i8042
10: 148 IO-APIC-level acpi
12: 69 IO-APIC-edge i8042
14: 1587 IO-APIC-edge ide0
15: 14 IO-APIC-edge ide1
17: 1 IO-APIC-level yenta
19: 2 IO-APIC-level ohci1394
21: 0 IO-APIC-level ehci_hcd, uhci_hcd, uhci_hcd, uhci_hcd
22: 0 IO-APIC-level VIA8233
23: 0 IO-APIC-level eth0
NMI: 0
LOC: 20924
ERR: 0
MIS: 0
Wed Jan 19 12:30:56 CET 2005
CPU0
0: 18253 IO-APIC-edge timer
1: 558 IO-APIC-edge i8042
10: 148 IO-APIC-level acpi
12: 69 IO-APIC-edge i8042
14: 1587 IO-APIC-edge ide0
15: 14 IO-APIC-edge ide1
17: 1 IO-APIC-level yenta
19: 2 IO-APIC-level ohci1394
21: 0 IO-APIC-level ehci_hcd, uhci_hcd, uhci_hcd, uhci_hcd
22: 0 IO-APIC-level VIA8233
23: 0 IO-APIC-level eth0
NMI: 0
LOC: 21062
ERR: 0
MIS: 0
root@amd:~#

(But it really took somewhere around 5 seconds).
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2005-01-19 14:12:34

by Stephen Frost

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Benjamin Herrenschmidt ([email protected]) wrote:
> Hrm... reading more of the patch & Martin's previous work, I'm not sure
> I like the idea too much in the end... The main problem is that you are
> just "replaying" the ticks afterward, which I see as a problem for
> things like sched_clock() which returns the real current time, no ?
>
> I'll toy a bit with my own implementation directly using Martin's work
> and see what kind of improvement I really get on ppc laptops.

I don't know if this is the same thing, or the same issue, but I've
noticed on my Windows machines that the longer my laptop sleeps the
longer it takes for it to wake back up- my guess is that it's doing
exactly this (replaying ticks). It *really* sucks though because it can
take quite a while for it to come back if it's been asleep for a while.

Stephen


Attachments:
(No filename) (846.00 B)
signature.asc (189.00 B)
Digital signature
Download all attachments

2005-01-19 17:13:35

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Pavel Machek <[email protected]> [050119 03:32]:
> Hi!
>
> > As this patch is related to the VST/High-Res timers, there
> > are probably various things that can be merged. I have not
> > yet looked at what all could be merged.
> >
> > I'd appreciate some comments and testing!
>
> Good news is that it does seem to reduce number of interrupts. Bad
> news is that time now runs faster (like "sleep 10" finishes in ~5
> seconds) and that I could not measure any difference in power
> consumption.

Thanks for trying it out. I have quite accurate time here on my
systems, and sleep works as it should. I wonder what's happening on
your system? If you have a chance, could you please post the results
from following simple tests?

Regards,

Tony

# dmesg | grep -i time
Using tsc for high-res timesource
dyn-tick: Registering dynamic tick timer
per-CPU timeslice cutoff: 731.77 usecs.
task migration cache decay timeout: 1 msecs.
..TIMER: vector=0x31 pin1=2 pin2=-1
Machine check exception polling timer started.
Real Time Clock Driver v1.12
dyn-tick: Enabling dynamic tick timer
dyn-tick: Timer using dynamic tick

# for i in 1 2 3 4 5; do ntpdate -b rinkeli && sleep 10; done
19 Jan 17:03:16 ntpdate[937]: step time server 192.168.100.254 offset -0.002639 sec
19 Jan 17:03:26 ntpdate[941]: step time server 192.168.100.254 offset -0.000374 sec
19 Jan 17:03:36 ntpdate[945]: step time server 192.168.100.254 offset -0.000100 sec
19 Jan 17:03:47 ntpdate[949]: step time server 192.168.100.254 offset -0.000530 sec
19 Jan 17:03:57 ntpdate[953]: step time server 192.168.100.254 offset -0.000841 sec

# date && sleep 10 && date
Wed Jan 19 17:05:35 UTC 2005
Wed Jan 19 17:05:45 UTC 2005

# while [ 1 ]; do date; done | uniq
Wed Jan 19 17:06:14 UTC 2005
Wed Jan 19 17:06:15 UTC 2005
Wed Jan 19 17:06:16 UTC 2005
Wed Jan 19 17:06:17 UTC 2005
...

2005-01-19 17:15:13

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Pavel Machek <[email protected]> [050119 01:44]:
>
> >
> > Please note that this patch alone does not help much with
> > power savings. More work is needed in that area to make the
> > system take advantage of the idle time inbetween the skipped
> > ticks.
>
> Well, having HZ=100 instead of HZ=1000 has measurable benefits on
> power consumption. This should be at least as good, no?

HZ=100 does not allow improving the idle loop much further
from what we have. We should be able to take advantage of the
longer idle/sleep periods inbetween the skipped ticks.

Tony

2005-01-19 17:23:39

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Stephen Frost <[email protected]> [050119 06:11]:
> * Benjamin Herrenschmidt ([email protected]) wrote:
> > Hrm... reading more of the patch & Martin's previous work, I'm not sure
> > I like the idea too much in the end... The main problem is that you are
> > just "replaying" the ticks afterward, which I see as a problem for
> > things like sched_clock() which returns the real current time, no ?
> >
> > I'll toy a bit with my own implementation directly using Martin's work
> > and see what kind of improvement I really get on ppc laptops.
>
> I don't know if this is the same thing, or the same issue, but I've
> noticed on my Windows machines that the longer my laptop sleeps the
> longer it takes for it to wake back up- my guess is that it's doing
> exactly this (replaying ticks). It *really* sucks though because it can
> take quite a while for it to come back if it's been asleep for a while.

That sounds like suspend related thing, while this is an idle loop
issue. On my machine with PIT timer doing the interrupts, the skip is
only 54 ticks, so catching up is very fast :) But if the machine was
able to idle for seconds at time inbetween ticks, it would be noticable.

Regards,

Tony

2005-01-19 17:34:23

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

On Wed, 2005-01-19 at 09:11 -0800, Tony Lindgren wrote:
> * Pavel Machek <[email protected]> [050119 03:32]:
> > Hi!
> >
> > > As this patch is related to the VST/High-Res timers, there
> > > are probably various things that can be merged. I have not
> > > yet looked at what all could be merged.
> > >
> > > I'd appreciate some comments and testing!
> >
> > Good news is that it does seem to reduce number of interrupts. Bad
> > news is that time now runs faster (like "sleep 10" finishes in ~5
> > seconds) and that I could not measure any difference in power
> > consumption.
>
> Thanks for trying it out. I have quite accurate time here on my
> systems, and sleep works as it should. I wonder what's happening on
> your system? If you have a chance, could you please post the results
> from following simple tests?

tsc is dangerous for this btw; several cpus go either slower or stop tsc
entirely during hlt... eg when idle.
I would suggest to not include a tsc driver for this (otherwise really
cool) feature.


2005-01-19 17:40:27

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Martin Schwidefsky <[email protected]> [050119 07:19]:
>
>
>
>
> Stephen Frost <[email protected]> wrote on 19/01/2005 03:11:15 PM:
>
> > > Hrm... reading more of the patch & Martin's previous work, I'm not sure
> > > I like the idea too much in the end... The main problem is that you are
> > > just "replaying" the ticks afterward, which I see as a problem for
> > > things like sched_clock() which returns the real current time, no ?
> > >
> > > I'll toy a bit with my own implementation directly using Martin's work
> > > and see what kind of improvement I really get on ppc laptops.
> >
> > I don't know if this is the same thing, or the same issue, but I've
> > noticed on my Windows machines that the longer my laptop sleeps the
> > longer it takes for it to wake back up- my guess is that it's doing
> > exactly this (replaying ticks). It *really* sucks though because it can
> > take quite a while for it to come back if it's been asleep for a while.
>
> That is the while loop that calls do_timer for each missed timer tick.
> In my very first try in regard to a tick less system it tried to avoid
> the loop with a new interface that allowed to account several ticks
> (the posting on lkml and the patch can still be found in the archives,
> e.g. http://marc.theaimsgroup.com/?l=linux-kernel&m=98683292412129&w=2)
> We could try to revive the idea and add a #ticks parameter to do_timer(),
> update_process_times and friends. The main obstacle has been ntp with
> its time adjustments. There is another patch from John Stultz that
> introduces new time-of-day code, this would take care of the ntp problem
> (see http://marc.theaimsgroup.com/?l=linux-kernel&m=110247121329835&w=2).

Being able to skip multiple ticks would save some overhead with catching
up with the time after idle in this case.

Also John's patch for using nsecs looks very interesting. On ARM,
new sys_timer code should make the John's patch a bit easier to use.

Regards,

Tony

2005-01-19 17:44:20

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Arjan van de Ven <[email protected]> [050119 09:31]:
> On Wed, 2005-01-19 at 09:11 -0800, Tony Lindgren wrote:
> > * Pavel Machek <[email protected]> [050119 03:32]:
> > > Hi!
> > >
> > > > As this patch is related to the VST/High-Res timers, there
> > > > are probably various things that can be merged. I have not
> > > > yet looked at what all could be merged.
> > > >
> > > > I'd appreciate some comments and testing!
> > >
> > > Good news is that it does seem to reduce number of interrupts. Bad
> > > news is that time now runs faster (like "sleep 10" finishes in ~5
> > > seconds) and that I could not measure any difference in power
> > > consumption.
> >
> > Thanks for trying it out. I have quite accurate time here on my
> > systems, and sleep works as it should. I wonder what's happening on
> > your system? If you have a chance, could you please post the results
> > from following simple tests?
>
> tsc is dangerous for this btw; several cpus go either slower or stop tsc
> entirely during hlt... eg when idle.
> I would suggest to not include a tsc driver for this (otherwise really
> cool) feature.

Yeah, I just started with what was running on my old box :)
I'll make it a separate Kconfig option with notes on that. The TSC
timer is currently as accurate as without dyn-tick, but the ACPI PM
timer's accuracy suffers a bit for some reason.

Tony

2005-01-19 17:56:40

by Andrea Arcangeli

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

On Wed, Jan 19, 2005 at 09:13:23AM -0800, Tony Lindgren wrote:
> HZ=100 does not allow improving the idle loop much further
> from what we have. We should be able to take advantage of the
> longer idle/sleep periods inbetween the skipped ticks.

OTOH servers aren't just doing idle power saving, if you're computing
wasting 1% of your cpu isn't always desiderable.

You'd need to trap into add_timer to make it a truly dynamic timer, only
then it would obsolete the HZ=100. So if there's no timer you run at
HZ=100, while if there's some timer pending in the next 10 timer queues
you run at HZ=1000.

The main problem I can see with your patch is the accuracy issue with
the system time. I couldn't fix the algorithm you're depending on to get
accurate system time, and I can guarantee such algorithm never worked
accurately here and worst of all it doesn't printk anything (which is
why it doesn't printk for your patch), so you only see system time go in
the future at an excessive rate (minutes per hour IIRC).

Pavel posted the cli/sti script, that should allow to reproduce the
inaccuracy of the algorithm. I had to set HZ=100 by hand to workaround
the usb modem irq latency that is about 3/4 msec.

Once in a while such algorithm overstates the number of ticks that have
been missed, and so the system time goes 1msec in the future when that
happens. I still suspect there might be a bug in such code though.
There's an unfixable window between the latch read and the tsc
read where an error can happen, but as long as the window is below
0.5msec, it should always be possible to regenerate the accurate timing
with the current algo, but in practice it fails to be accurate...

2005-01-19 18:22:31

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Andrea Arcangeli <[email protected]> [050119 09:49]:
> On Wed, Jan 19, 2005 at 09:13:23AM -0800, Tony Lindgren wrote:
> > HZ=100 does not allow improving the idle loop much further
> > from what we have. We should be able to take advantage of the
> > longer idle/sleep periods inbetween the skipped ticks.
>
> OTOH servers aren't just doing idle power saving, if you're computing
> wasting 1% of your cpu isn't always desiderable.

That's true, this is more for battery operated systems. In the
current patch there's not that much extra calculation going on when
the system is busy, but there is some overhead.

> You'd need to trap into add_timer to make it a truly dynamic timer, only
> then it would obsolete the HZ=100. So if there's no timer you run at
> HZ=100, while if there's some timer pending in the next 10 timer queues
> you run at HZ=1000.

OK, that makes sense.

> The main problem I can see with your patch is the accuracy issue with
> the system time. I couldn't fix the algorithm you're depending on to get
> accurate system time, and I can guarantee such algorithm never worked
> accurately here and worst of all it doesn't printk anything (which is
> why it doesn't printk for your patch), so you only see system time go in
> the future at an excessive rate (minutes per hour IIRC).

Sounds like for some reason it does not work on your computer then.
I've tried it only on four different machines here, with one SMP
box, and it works fine here. And the debug printk works too, and
time is accurate.

If you have a chance, can you please provide me with some more info
on your system, see my recent reply to Pavel in this thread for the
tests I've been using. Sounds like your system won't boot well enough
to carry out the tests though...

> Pavel posted the cli/sti script, that should allow to reproduce the
> inaccuracy of the algorithm. I had to set HZ=100 by hand to workaround
> the usb modem irq latency that is about 3/4 msec.

Great, I'd like to try it out here.

> Once in a while such algorithm overstates the number of ticks that have
> been missed, and so the system time goes 1msec in the future when that
> happens. I still suspect there might be a bug in such code though.
> There's an unfixable window between the latch read and the tsc
> read where an error can happen, but as long as the window is below
> 0.5msec, it should always be possible to regenerate the accurate timing
> with the current algo, but in practice it fails to be accurate...

Sure there's a lot of room for improvment, but it works quite
accurately here.

Tony

2005-01-19 19:12:22

by Andrea Arcangeli

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

On Wed, Jan 19, 2005 at 10:19:47AM -0800, Tony Lindgren wrote:
> If you have a chance, can you please provide me with some more info
> on your system, see my recent reply to Pavel in this thread for the

It's a normal UP athlon 1ghz, it should be quite widespread hardware.
I know at least another system that had the problem of system time going
in the future with 2.6 at the same rate of mine. Still it could be an
hardware issue after all if nobody else can reproduce it. At HZ=100 the
system time is again perfectly accurate like in 2.4, so probably at least
the PIT is ok.

2005-01-19 19:18:49

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Andrea Arcangeli <[email protected]> [050119 11:12]:
> On Wed, Jan 19, 2005 at 10:19:47AM -0800, Tony Lindgren wrote:
> > If you have a chance, can you please provide me with some more info
> > on your system, see my recent reply to Pavel in this thread for the
>
> It's a normal UP athlon 1ghz, it should be quite widespread hardware.
> I know at least another system that had the problem of system time going
> in the future with 2.6 at the same rate of mine. Still it could be an
> hardware issue after all if nobody else can reproduce it. At HZ=100 the
> system time is again perfectly accurate like in 2.4, so probably at least
> the PIT is ok.

I've tested it with a celeron a300 box, tyan s2460 dual athlon,
tyan tiger 100 dual p3, and fujitsu p1110 crusoe laptop. Fujitsu I
may not have tested with TSC, but others work with both ACPI PM
timer and TSC.

Maybe try disabling or enabling ACPI PM timer? Or maybe it does not
use TSC or ACPI PM timer, and that there's some bug in my patch that
kills the plain PIT timer?

Tony

2005-01-19 19:35:47

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Tony Lindgren <[email protected]> [050119 11:20]:
> * Andrea Arcangeli <[email protected]> [050119 11:12]:
> > On Wed, Jan 19, 2005 at 10:19:47AM -0800, Tony Lindgren wrote:
> > > If you have a chance, can you please provide me with some more info
> > > on your system, see my recent reply to Pavel in this thread for the
> >
> > It's a normal UP athlon 1ghz, it should be quite widespread hardware.
> > I know at least another system that had the problem of system time going
> > in the future with 2.6 at the same rate of mine. Still it could be an
> > hardware issue after all if nobody else can reproduce it. At HZ=100 the
> > system time is again perfectly accurate like in 2.4, so probably at least
> > the PIT is ok.
>
> I've tested it with a celeron a300 box, tyan s2460 dual athlon,
> tyan tiger 100 dual p3, and fujitsu p1110 crusoe laptop. Fujitsu I
> may not have tested with TSC, but others work with both ACPI PM
> timer and TSC.
>
> Maybe try disabling or enabling ACPI PM timer? Or maybe it does not
> use TSC or ACPI PM timer, and that there's some bug in my patch that
> kills the plain PIT timer?

It could be HPET that kills it. I don't have any boxes with HPET
timer, can you try without HPET?

Tony

2005-01-19 22:10:03

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Hi!

> > > As this patch is related to the VST/High-Res timers, there
> > > are probably various things that can be merged. I have not
> > > yet looked at what all could be merged.
> > >
> > > I'd appreciate some comments and testing!
> >
> > Good news is that it does seem to reduce number of interrupts. Bad
> > news is that time now runs faster (like "sleep 10" finishes in ~5
> > seconds) and that I could not measure any difference in power
> > consumption.
>
> Thanks for trying it out. I have quite accurate time here on my
> systems, and sleep works as it should. I wonder what's happening on
> your system? If you have a chance, could you please post the results
> from following simple tests?

Unpatched 2.6.11-rc1:

root@amd:~# dmesg | grep -i time; for i in 1 2 3 4 5; do ntpdate -b tak.cesnet.cz && sleep 10; done ; date && sleep 10 && date; while [ 1 ]; do date; done | uniq
PCI: Setting latency timer of device 0000:00:11.5 to 64
19 Jan 22:53:20 ntpdate[7943]: step time server 195.113.144.238 offset 0.013070 sec
19 Jan 22:53:36 ntpdate[8169]: step time server 195.113.144.238 offset -0.005736 sec
19 Jan 22:53:51 ntpdate[8427]: step time server 195.113.144.238 offset -0.010292 sec
19 Jan 22:54:06 ntpdate[8647]: step time server 195.113.144.238 offset -0.045055 sec
19 Jan 22:54:21 ntpdate[8889]: step time server 195.113.144.238 offset 0.054865 sec
Wed Jan 19 22:54:31 CET 2005
Wed Jan 19 22:54:41 CET 2005
Wed Jan 19 22:54:41 CET 2005
Wed Jan 19 22:54:42 CET 2005
Wed Jan 19 22:54:43 CET 2005
Wed Jan 19 22:54:44 CET 2005
Wed Jan 19 22:54:45 CET 2005
Wed Jan 19 22:54:46 CET 2005
Wed Jan 19 22:54:47 CET 2005
Wed Jan 19 22:54:48 CET 2005
Wed Jan 19 22:54:49 CET 2005
Wed Jan 19 22:54:50 CET 2005
Wed Jan 19 22:54:51 CET 2005
Wed Jan 19 22:54:52 CET 2005
Wed Jan 19 22:54:53 CET 2005
Wed Jan 19 22:54:54 CET 2005
Wed Jan 19 22:54:55 CET 2005
Wed Jan 19 22:54:56 CET 2005

On patched 2.6.11-rc1:

[Heh, clock is two times too fast, perhaps that makes ntpdate fail? -- yes.

root@amd:~# dmesg | grep -i time; for i in 1 2 3 4 5; do ntpdate -b tak.cesnet.cz && sleep 10; done ; date && sleep 10 && date; while [ 1 ]; do date; done | uniq
PCI: Setting latency timer of device 0000:00:11.5 to 64
dyn-tick: Enabling dynamic tick timer
dyn-tick: Timer using dynamic tick
19 Jan 22:59:16 ntpdate[1363]: no server suitable for synchronization found
19 Jan 22:59:25 ntpdate[1364]: no server suitable for synchronization found
19 Jan 22:59:34 ntpdate[1365]: no server suitable for synchronization found
19 Jan 22:59:42 ntpdate[1366]: no server suitable for synchronization found
19 Jan 22:59:51 ntpdate[1367]: no server suitable for synchronization found
Wed Jan 19 22:59:51 CET 2005
Wed Jan 19 23:00:01 CET 2005
Wed Jan 19 23:00:01 CET 2005
Wed Jan 19 23:00:02 CET 2005
Wed Jan 19 23:00:03 CET 2005
Wed Jan 19 23:00:04 CET 2005
Wed Jan 19 23:00:05 CET 2005
Wed Jan 19 23:00:06 CET 2005
Wed Jan 19 23:00:07 CET 2005
Wed Jan 19 23:00:08 CET 2005
Wed Jan 19 23:00:09 CET 2005
Wed Jan 19 23:00:10 CET 2005

I used -t 10 to force it to work. Notice that clock is two times too fast.

root@amd:~# dmesg | grep -i time; for i in 1 2 3 4 5; do ntpdate -t 10 tak.cesnet.cz && sleep 10; done ; date && sleep 10 && date; while [ 1 ]; do date; done |
uniq
PCI: Setting latency timer of device 0000:00:11.5 to 64
dyn-tick: Enabling dynamic tick timer
dyn-tick: Timer using dynamic tick
19 Jan 23:03:27 ntpdate[20782]: step time server 195.113.144.238 offset -45.355081 sec
19 Jan 23:03:38 ntpdate[20784]: step time server 195.113.144.238 offset -9.592768 sec
19 Jan 23:03:47 ntpdate[20786]: step time server 195.113.144.238 offset -12.048951 sec
19 Jan 23:04:00 ntpdate[20788]: step time server 195.113.144.238 offset -8.273278 sec
19 Jan 23:04:08 ntpdate[20790]: step time server 195.113.144.238 offset -12.240673 sec
Wed Jan 19 23:04:18 CET 2005
Wed Jan 19 23:04:28 CET 2005
Wed Jan 19 23:04:28 CET 2005
Wed Jan 19 23:04:29 CET 2005
Wed Jan 19 23:04:30 CET 2005
Wed Jan 19 23:04:31 CET 2005
Wed Jan 19 23:04:32 CET 2005
Wed Jan 19 23:04:33 CET 2005
Wed Jan 19 23:04:34 CET 2005
Wed Jan 19 23:04:35 CET 2005
Wed Jan 19 23:04:36 CET 2005
Wed Jan 19 23:04:37 CET 2005
Wed Jan 19 23:04:38 CET 2005
Wed Jan 19 23:04:39 CET 2005
Wed Jan 19 23:04:40 CET 2005

Anything else I should try?

Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2005-01-19 22:26:36

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Hi!

> > > As this patch is related to the VST/High-Res timers, there
> > > are probably various things that can be merged. I have not
> > > yet looked at what all could be merged.
> > >
> > > I'd appreciate some comments and testing!
> >
> > Good news is that it does seem to reduce number of interrupts. Bad
> > news is that time now runs faster (like "sleep 10" finishes in ~5
> > seconds) and that I could not measure any difference in power
> > consumption.
>
> Thanks for trying it out. I have quite accurate time here on my
> systems, and sleep works as it should. I wonder what's happening on
> your system? If you have a chance, could you please post the results
> from following simple tests?

Correction: with patch was not 2.6.11-rc1, but 2.6.11-rc1-bk.
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2005-01-19 22:42:23

by Andrea Arcangeli

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

On Wed, Jan 19, 2005 at 11:34:20AM -0800, Tony Lindgren wrote:
> It could be HPET that kills it. I don't have any boxes with HPET
> timer, can you try without HPET?

There's no hpet hardware in that system. Also the problem I'm having is
not with your patch but on some code that should be exercised by your
patch (it's the firewall so it's not very handy to test patches on it).
I'll test on a different system next time to see if I can reproduce only
on the firewall or not.

2005-01-19 22:59:31

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Andrea Arcangeli wrote:
> On Wed, Jan 19, 2005 at 09:13:23AM -0800, Tony Lindgren wrote:
>
>>HZ=100 does not allow improving the idle loop much further
>>from what we have. We should be able to take advantage of the
>>longer idle/sleep periods inbetween the skipped ticks.
>
>
> OTOH servers aren't just doing idle power saving, if you're computing
> wasting 1% of your cpu isn't always desiderable.
>
> You'd need to trap into add_timer to make it a truly dynamic timer, only
> then it would obsolete the HZ=100. So if there's no timer you run at
> HZ=100, while if there's some timer pending in the next 10 timer queues
> you run at HZ=1000.
>
> The main problem I can see with your patch is the accuracy issue with
> the system time. I couldn't fix the algorithm you're depending on to get
> accurate system time, and I can guarantee such algorithm never worked
> accurately here and worst of all it doesn't printk anything (which is
> why it doesn't printk for your patch), so you only see system time go in
> the future at an excessive rate (minutes per hour IIRC).
>
> Pavel posted the cli/sti script, that should allow to reproduce the
> inaccuracy of the algorithm. I had to set HZ=100 by hand to workaround
> the usb modem irq latency that is about 3/4 msec.
>
> Once in a while such algorithm overstates the number of ticks that have
> been missed, and so the system time goes 1msec in the future when that
> happens. I still suspect there might be a bug in such code though.
> There's an unfixable window between the latch read and the tsc
> read where an error can happen, but as long as the window is below
> 0.5msec, it should always be possible to regenerate the accurate timing
> with the current algo, but in practice it fails to be accurate...
> -
I don't think you will ever get good time if you EVER reprogramm the PIT. That
is why the VST patch on sourceforge does NOT touch the PIT, it only turns off
the interrupt by interrupting the interrupt path (not changing the PIT). This
allows the PIT to be the "gold standard" in time that it is designed to be. The
wake up interrupt, then needs to come from an independent timer. My patch
requires a local APIC for this. Patch is available at
http://sourceforge.net/projects/high-res-timers/

--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/

2005-01-19 23:08:55

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Pavel Machek <[email protected]> [050119 14:06]:
> Hi!
>
> > > > As this patch is related to the VST/High-Res timers, there
> > > > are probably various things that can be merged. I have not
> > > > yet looked at what all could be merged.
> > > >
> > > > I'd appreciate some comments and testing!
> > >
> > > Good news is that it does seem to reduce number of interrupts. Bad
> > > news is that time now runs faster (like "sleep 10" finishes in ~5
> > > seconds) and that I could not measure any difference in power
> > > consumption.
> >
> > Thanks for trying it out. I have quite accurate time here on my
> > systems, and sleep works as it should. I wonder what's happening on
> > your system? If you have a chance, could you please post the results
> > from following simple tests?
>
> On patched 2.6.11-rc1:
>
> [Heh, clock is two times too fast, perhaps that makes ntpdate fail? -- yes.
>
> root@amd:~# dmesg | grep -i time; for i in 1 2 3 4 5; do ntpdate -b tak.cesnet.cz && sleep 10; done ; date && sleep 10 && date; while [ 1 ]; do date; done | uniq
> PCI: Setting latency timer of device 0000:00:11.5 to 64
> dyn-tick: Enabling dynamic tick timer
> dyn-tick: Timer using dynamic tick

Thanks. Looks like you're running on PIT only, I guess my patch
currently breaks PIT (and possibly HPET) No dmesg message for "
"Using XXX for high-res timesource".

Tony

2005-01-19 23:21:42

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* George Anzinger <[email protected]> [050119 15:00]:
>
> I don't think you will ever get good time if you EVER reprogramm the PIT.
> That is why the VST patch on sourceforge does NOT touch the PIT, it only
> turns off the interrupt by interrupting the interrupt path (not changing
> the PIT). This allows the PIT to be the "gold standard" in time that it is
> designed to be. The wake up interrupt, then needs to come from an
> independent timer. My patch requires a local APIC for this. Patch is
> available at http://sourceforge.net/projects/high-res-timers/

Well on my test systems I have pretty good accurate time. But I agree,
PIT is not the best option for interrupt. It should be possible to use
other interrupt sources as well.

It should not matter where the timer interrupt comes from, as long as
it comes when programmed. Updating time should be separate from timer
interrupts. Currently we have a problem where time is tied to the
timer interrupt.

I'll take a look at your patch again, and check out the APIC part.

Regards,

Tony

2005-01-19 23:29:29

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

On Wed, 2005-01-19 at 14:59 -0800, George Anzinger wrote:
> I don't think you will ever get good time if you EVER reprogramm the PIT.

Why not ? If you have a continous time source, which keeps track of
"ticks" regardless the CPU state, why should PIT reprogramming be evil ?

tglx


2005-01-19 23:52:28

by john stultz

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

On Thu, 2005-01-20 at 00:26 +0100, Thomas Gleixner wrote:
> On Wed, 2005-01-19 at 14:59 -0800, George Anzinger wrote:
> > I don't think you will ever get good time if you EVER reprogramm the PIT.
>
> Why not ? If you have a continous time source, which keeps track of
> "ticks" regardless the CPU state, why should PIT reprogramming be evil ?

That's a big if. The problem is that while the PIT has its problems
(such as lost ticks), it runs at a known frequency and is reasonably
accurate. Time sources like the TSC have the problem that it doesn't run
at a known frequency, and thus we have to calibrate it (usually using
the PIT). Unfortunately this calibration is not extremely accurate
(George can go on to the reasons why), which causes the TSC to be a poor
stand alone time source.

That said, the PIT is a poor time source as well, as it does loose ticks
and is very slow to access. ACPI PM and HPET are better as they don't
have the lost tick problem, but they are still off chip and slower to
access then the TSC.

For an example of your ideal continuous timesource, check out the
timebase on PPC/PPC64. Other arches also have similar well behaved time
hardware.

thanks
-john



2005-01-19 23:52:27

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Hi!

> > > > > As this patch is related to the VST/High-Res timers, there
> > > > > are probably various things that can be merged. I have not
> > > > > yet looked at what all could be merged.
> > > > >
> > > > > I'd appreciate some comments and testing!
> > > >
> > > > Good news is that it does seem to reduce number of interrupts. Bad
> > > > news is that time now runs faster (like "sleep 10" finishes in ~5
> > > > seconds) and that I could not measure any difference in power
> > > > consumption.
> > >
> > > Thanks for trying it out. I have quite accurate time here on my
> > > systems, and sleep works as it should. I wonder what's happening on
> > > your system? If you have a chance, could you please post the results
> > > from following simple tests?
> >
> > On patched 2.6.11-rc1:
> >
> > [Heh, clock is two times too fast, perhaps that makes ntpdate fail? -- yes.
> >
> > root@amd:~# dmesg | grep -i time; for i in 1 2 3 4 5; do ntpdate -b tak.cesnet.cz && sleep 10; done ; date && sleep 10 && date; while [ 1 ]; do date; done | uniq
> > PCI: Setting latency timer of device 0000:00:11.5 to 64
> > dyn-tick: Enabling dynamic tick timer
> > dyn-tick: Timer using dynamic tick
>
> Thanks. Looks like you're running on PIT only, I guess my patch
> currently breaks PIT (and possibly HPET) No dmesg message for "
> "Using XXX for high-res timesource".

Okay, so I set CONFIG_HPET. CONFIG_X86_TSC was already set, I wonder
why the code did not use it?
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2005-01-19 23:57:03

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Pavel Machek <[email protected]> [050119 15:47]:
> Hi!
>
> > > > > > As this patch is related to the VST/High-Res timers, there
> > > > > > are probably various things that can be merged. I have not
> > > > > > yet looked at what all could be merged.
> > > > > >
> > > > > > I'd appreciate some comments and testing!
> > > > >
> > > > > Good news is that it does seem to reduce number of interrupts. Bad
> > > > > news is that time now runs faster (like "sleep 10" finishes in ~5
> > > > > seconds) and that I could not measure any difference in power
> > > > > consumption.
> > > >
> > > > Thanks for trying it out. I have quite accurate time here on my
> > > > systems, and sleep works as it should. I wonder what's happening on
> > > > your system? If you have a chance, could you please post the results
> > > > from following simple tests?
> > >
> > > On patched 2.6.11-rc1:
> > >
> > > [Heh, clock is two times too fast, perhaps that makes ntpdate fail? -- yes.
> > >
> > > root@amd:~# dmesg | grep -i time; for i in 1 2 3 4 5; do ntpdate -b tak.cesnet.cz && sleep 10; done ; date && sleep 10 && date; while [ 1 ]; do date; done | uniq
> > > PCI: Setting latency timer of device 0000:00:11.5 to 64
> > > dyn-tick: Enabling dynamic tick timer
> > > dyn-tick: Timer using dynamic tick
> >
> > Thanks. Looks like you're running on PIT only, I guess my patch
> > currently breaks PIT (and possibly HPET) No dmesg message for "
> > "Using XXX for high-res timesource".
>
> Okay, so I set CONFIG_HPET. CONFIG_X86_TSC was already set, I wonder
> why the code did not use it?

Can you try with no CONFIG_HPET and CONFIG_X86_TCS or X86_PM_TIMER?
I don't have hardware with HPET, so I have not looked at it.

Tony

2005-01-20 00:02:26

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Hi!

> > > Thanks for trying it out. I have quite accurate time here on my
> > > systems, and sleep works as it should. I wonder what's happening on
> > > your system? If you have a chance, could you please post the results
> > > from following simple tests?
> >
> > On patched 2.6.11-rc1:
> >
> > [Heh, clock is two times too fast, perhaps that makes ntpdate fail? -- yes.
> >
> > root@amd:~# dmesg | grep -i time; for i in 1 2 3 4 5; do ntpdate -b tak.cesnet.cz && sleep 10; done ; date && sleep 10 && date; while [ 1 ]; do date; done | uniq
> > PCI: Setting latency timer of device 0000:00:11.5 to 64
> > dyn-tick: Enabling dynamic tick timer
> > dyn-tick: Timer using dynamic tick
>
> Thanks. Looks like you're running on PIT only, I guess my patch
> currently breaks PIT (and possibly HPET) No dmesg message for "
> "Using XXX for high-res timesource".

This machine definitely has TSC... What do I have to do in .config to
make it do something interesting? My .config is:
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!


Attachments:
(No filename) (1.11 kB)
config.gz (9.90 kB)
Download all attachments

2005-01-20 00:14:23

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

--- config.orig 2005-01-19 16:05:16.000000000 -0800
+++ config 2005-01-19 16:06:07.000000000 -0800
@@ -103,7 +103,7 @@
CONFIG_X86_GOOD_APIC=y
CONFIG_X86_INTEL_USERCOPY=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
-CONFIG_HPET_TIMER=y
+# CONFIG_HPET_TIMER is not set
CONFIG_NO_IDLE_HZ=y
# CONFIG_SMP is not set
CONFIG_PREEMPT=y
@@ -169,7 +169,7 @@
CONFIG_ACPI_POWER=y
CONFIG_ACPI_PCI=y
CONFIG_ACPI_SYSTEM=y
-# CONFIG_X86_PM_TIMER is not set
+CONFIG_X86_PM_TIMER=y
# CONFIG_ACPI_CONTAINER is not set

#


Attachments:
(No filename) (1.15 kB)
patch-config-no-hpet (501.00 B)
Download all attachments

2005-01-20 00:28:37

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Tony Lindgren wrote:
> * George Anzinger <[email protected]> [050119 15:00]:
>
>>I don't think you will ever get good time if you EVER reprogramm the PIT.
>>That is why the VST patch on sourceforge does NOT touch the PIT, it only
>>turns off the interrupt by interrupting the interrupt path (not changing
>>the PIT). This allows the PIT to be the "gold standard" in time that it is
>>designed to be. The wake up interrupt, then needs to come from an
>>independent timer. My patch requires a local APIC for this. Patch is
>>available at http://sourceforge.net/projects/high-res-timers/
>
>
> Well on my test systems I have pretty good accurate time. But I agree,
> PIT is not the best option for interrupt. It should be possible to use
> other interrupt sources as well.
>
> It should not matter where the timer interrupt comes from, as long as
> it comes when programmed. Updating time should be separate from timer
> interrupts. Currently we have a problem where time is tied to the
> timer interrupt.

In the HRT code time is most correctly stated as wall_time +
get_arch_cycles_since(wall_jiffies) (plus conversion or two:)). This is some
what removed from the tick interrupt, but is resynced to that interrupt more or
less each interrupt.

A second issue is trying to get the jiffies update as close to the run of the
timer list as possible. Without this we have no hope of high res timers.

--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/

2005-01-20 00:39:38

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Thomas Gleixner wrote:
> On Wed, 2005-01-19 at 14:59 -0800, George Anzinger wrote:
>
>>I don't think you will ever get good time if you EVER reprogramm the PIT.
>
>
> Why not ? If you have a continous time source, which keeps track of
> "ticks" regardless the CPU state, why should PIT reprogramming be evil ?

First it takes too long. Second, you are (usually) programming it to run at
1/HZ but you do this somewhere between the ticks (and, likely you don't really
know where between them you are). This last means, on the average, that you
lose 1/2 a tick time, i.e. the tick interrupt will happen 1/2 a tick late for
each reprogramm done.

If you say, well, lets just use the TSC (or some other timer) you have the
problem that in x86 boxes those don't rely on "rocks" selected for time keeping,
so you have an inaccurate clock to this degree. Also, in the case of the TSC,
at boot time we "try" to figure out how many cycles of TSC it takes to do a PIT
cycle by "looking" as the PIT in a loop while we catch the TSC. Problem is that
the I/O sync needed to look at the PIT is several TSC cycles long and we don't
really know how close we got. Even using the max PIT time of around 50 ms the
error on my 800 MHZ PII is 10 or more TSC cycles.

At one point I tried to get the PIT to sync back up by doing a short cycle
followed by the normal cycle. I.e. I loaded the counter for the time remaining
in the jiffie, started the PIT and then loaded the 1/HZ latch count on top of
it. The spec says the new count should be loaded by the chip when the current
one expires. This sort of worked, but I still got feedback on clock drift.
Also, there are some PITs out there that don't do this correctly. And in the
load part, you have to wait for the first program to start prior to loading the
second one. This is a busy loop waiting for an I/O event, i.e. much too long.

We should also keep in mind that we really want the timer tick to happen as
close as possible to the jiffies++ as possible. Especially if we are doing high
res timers, any delay here will show up as late timers.
>
>

--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/

2005-01-20 00:57:10

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Hi!

> > > > > Thanks for trying it out. I have quite accurate time here on my
> > > > > systems, and sleep works as it should. I wonder what's happening on
> > > > > your system? If you have a chance, could you please post the results
> > > > > from following simple tests?
> > > >
> > > > On patched 2.6.11-rc1:
> > > >
> > > > [Heh, clock is two times too fast, perhaps that makes ntpdate fail? -- yes.
> > > >
> > > > root@amd:~# dmesg | grep -i time; for i in 1 2 3 4 5; do ntpdate -b tak.cesnet.cz && sleep 10; done ; date && sleep 10 && date; while [ 1 ]; do date; done | uniq
> > > > PCI: Setting latency timer of device 0000:00:11.5 to 64
> > > > dyn-tick: Enabling dynamic tick timer
> > > > dyn-tick: Timer using dynamic tick
> > >
> > > Thanks. Looks like you're running on PIT only, I guess my patch
> > > currently breaks PIT (and possibly HPET) No dmesg message for "
> > > "Using XXX for high-res timesource".
> >
> > This machine definitely has TSC... What do I have to do in .config to
> > make it do something interesting? My .config is:
>
> I suspect it's the HPET_TIMER, see below. CONFIG_X86_PM_TIMER is
> optional, otherwise TSC is used.

Okay, so I tried to measure power consumption. Patched kernel seems to
be about 200mW better. That's on 22W... I'll see if I can tweak ACPI
somehow to decrease it a bit more.
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2005-01-20 00:57:31

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Hi!


> > > Thanks. Looks like you're running on PIT only, I guess my patch
> > > currently breaks PIT (and possibly HPET) No dmesg message for "
> > > "Using XXX for high-res timesource".
> >
> > This machine definitely has TSC... What do I have to do in .config to
> > make it do something interesting? My .config is:
>
> I suspect it's the HPET_TIMER, see below. CONFIG_X86_PM_TIMER is
> optional, otherwise TSC is used.

Now it seems to work okay, see below.

root@amd:~# dmesg | grep -i time; for i in 1 2 3 4 5; do ntpdate -t 10 tak.cesnet.cz && sleep 10; done ; date && sleep 10 && date; while [ 1 ]; do date; done |
uniq
PCI: Setting latency timer of device 0000:00:11.5 to 64
dyn-tick: Enabling dynamic tick timer
dyn-tick: Timer using dynamic tick
20 Jan 01:39:45 ntpdate[1365]: step time server 195.113.144.238 offset -3.449324 sec
20 Jan 01:40:00 ntpdate[1371]: adjust time server 195.113.144.238 offset 0.005790 sec
20 Jan 01:40:15 ntpdate[1375]: adjust time server 195.113.144.238 offset 0.024061 sec
20 Jan 01:40:31 ntpdate[1380]: adjust time server 195.113.144.238 offset 0.004277 sec
20 Jan 01:40:46 ntpdate[1384]: adjust time server 195.113.144.238 offset 0.000283 sec
Thu Jan 20 01:40:56 CET 2005
Thu Jan 20 01:41:06 CET 2005
Thu Jan 20 01:41:06 CET 2005
Thu Jan 20 01:41:07 CET 2005
Thu Jan 20 01:41:08 CET 2005
Thu Jan 20 01:41:09 CET 2005
Thu Jan 20 01:41:10 CET 2005
Thu Jan 20 01:41:11 CET 2005

Timer interrupts are no longer that common, good.

root@amd:~# cat /proc/interrupts ; sleep 10; cat /proc/interrupts
CPU0
0: 16390 IO-APIC-edge timer
1: 646 IO-APIC-edge i8042
10: 49 IO-APIC-level acpi
12: 69 IO-APIC-edge i8042
14: 1688 IO-APIC-edge ide0
15: 14 IO-APIC-edge ide1
17: 1 IO-APIC-level yenta
19: 2 IO-APIC-level ohci1394
21: 1252 IO-APIC-level ehci_hcd, uhci_hcd, uhci_hcd, uhci_hcd
22: 0 IO-APIC-level VIA8233
23: 3 IO-APIC-level eth0
NMI: 0
LOC: 20306
ERR: 0
MIS: 0
CPU0
0: 16586 IO-APIC-edge timer
1: 647 IO-APIC-edge i8042
10: 49 IO-APIC-level acpi
12: 69 IO-APIC-edge i8042
14: 1744 IO-APIC-edge ide0
15: 14 IO-APIC-edge ide1
17: 1 IO-APIC-level yenta
19: 2 IO-APIC-level ohci1394
21: 1337 IO-APIC-level ehci_hcd, uhci_hcd, uhci_hcd, uhci_hcd
22: 0 IO-APIC-level VIA8233
23: 3 IO-APIC-level eth0
NMI: 0
LOC: 20647
ERR: 0
MIS: 0

Pavel
> --- config.orig 2005-01-19 16:05:16.000000000 -0800
> +++ config 2005-01-19 16:06:07.000000000 -0800
> @@ -103,7 +103,7 @@
> CONFIG_X86_GOOD_APIC=y
> CONFIG_X86_INTEL_USERCOPY=y
> CONFIG_X86_USE_PPRO_CHECKSUM=y
> -CONFIG_HPET_TIMER=y
> +# CONFIG_HPET_TIMER is not set
> CONFIG_NO_IDLE_HZ=y
> # CONFIG_SMP is not set
> CONFIG_PREEMPT=y
> @@ -169,7 +169,7 @@
> CONFIG_ACPI_POWER=y
> CONFIG_ACPI_PCI=y
> CONFIG_ACPI_SYSTEM=y
> -# CONFIG_X86_PM_TIMER is not set
> +CONFIG_X86_PM_TIMER=y
> # CONFIG_ACPI_CONTAINER is not set
>
> #


--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2005-01-20 03:18:16

by Valdis Klētnieks

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

On Wed, 19 Jan 2005 14:59:20 PST, George Anzinger said:

> allows the PIT to be the "gold standard" in time that it is designed to be. The
> wake up interrupt, then needs to come from an independent timer. My patch
> requires a local APIC for this. Patch is available at
> http://sourceforge.net/projects/high-res-timers/

Foo. and me with a BIOS-borked LAPIC. Anybody have helpful hints on how to
get Dell to fix a laptop BIOS(*), or are you all too busy dying of hysterical laughter?

Dell Latitude C840, running bios A13 (most recent). I *know* it was borked
around A08, and the bios release notes haven't mentioned fixing it. I wonder
if it's worth re-checking anyhow....


Attachments:
(No filename) (226.00 B)

2005-01-20 04:03:11

by Zwane Mwaikambo

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

On Tue, 18 Jan 2005, Tony Lindgren wrote:

> Hi all,
>
> Attached is the dynamic tick patch for x86 to play with
> as I promised in few threads earlier on this list.[1][2]
>
> The dynamic tick patch does following:
>
> - Separates timer interrupts from updating system time
>
> - Allows updating time from other interrupts in addition
> to timer interrupt
>
> - Makes timer tick dynamic
>
> - Allows power management modules to take advantage of the
> idle time inbetween skipped ticks
>
> - Might help with the whistling caps?

This doesn't seem to cover the local APIC timer, what do you do about the
1kHz tick which it's programmed to do?

2005-01-20 05:57:18

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

On Wed, 2005-01-19 at 15:45 -0800, john stultz wrote:
> On Thu, 2005-01-20 at 00:26 +0100, Thomas Gleixner wrote:
> > On Wed, 2005-01-19 at 14:59 -0800, George Anzinger wrote:
> > > I don't think you will ever get good time if you EVER reprogramm the PIT.
> >
> > Why not ? If you have a continous time source, which keeps track of
> > "ticks" regardless the CPU state, why should PIT reprogramming be evil ?
>
> That's a big if. The problem is that while the PIT has its problems
> (such as lost ticks), it runs at a known frequency and is reasonably
> accurate. Time sources like the TSC have the problem that it doesn't run
> at a known frequency, and thus we have to calibrate it (usually using
> the PIT). Unfortunately this calibration is not extremely accurate
> (George can go on to the reasons why), which causes the TSC to be a poor
> stand alone time source.
>
> That said, the PIT is a poor time source as well, as it does loose ticks
> and is very slow to access. ACPI PM and HPET are better as they don't
> have the lost tick problem, but they are still off chip and slower to
> access then the TSC.

And they aren't available on every board - especially not on embedded
ones.

> For an example of your ideal continuous timesource, check out the
> timebase on PPC/PPC64. Other arches also have similar well behaved time
> hardware.

Yes, I'm aware of that. Unfortunately we live in the x86 universe.

tglx


2005-01-20 07:39:51

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Pavel Machek <[email protected]> [050119 16:56]:
> Hi!
>
> > > > > > Thanks for trying it out. I have quite accurate time here on my
> > > > > > systems, and sleep works as it should. I wonder what's happening on
> > > > > > your system? If you have a chance, could you please post the results
> > > > > > from following simple tests?
> > > > >
> > > > > On patched 2.6.11-rc1:
> > > > >
> > > > > [Heh, clock is two times too fast, perhaps that makes ntpdate fail? -- yes.
> > > > >
> > > > > root@amd:~# dmesg | grep -i time; for i in 1 2 3 4 5; do ntpdate -b tak.cesnet.cz && sleep 10; done ; date && sleep 10 && date; while [ 1 ]; do date; done | uniq
> > > > > PCI: Setting latency timer of device 0000:00:11.5 to 64
> > > > > dyn-tick: Enabling dynamic tick timer
> > > > > dyn-tick: Timer using dynamic tick
> > > >
> > > > Thanks. Looks like you're running on PIT only, I guess my patch
> > > > currently breaks PIT (and possibly HPET) No dmesg message for "
> > > > "Using XXX for high-res timesource".
> > >
> > > This machine definitely has TSC... What do I have to do in .config to
> > > make it do something interesting? My .config is:
> >
> > I suspect it's the HPET_TIMER, see below. CONFIG_X86_PM_TIMER is
> > optional, otherwise TSC is used.
>
> Okay, so I tried to measure power consumption. Patched kernel seems to
> be about 200mW better. That's on 22W... I'll see if I can tweak ACPI
> somehow to decrease it a bit more.

Good to hear you finally got it to work, and can enjoy the extra few
milliseconds of battery life :) Now the real problem is what to do
with the idle to take advantage of the extra idle time... I'm not
convinced that hlt or ACPI C2/C3 actually sleep through it. Maybe
there's something more that can be done?

Tony

2005-01-20 08:06:29

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* George Anzinger <[email protected]> [050119 16:25]:
> Tony Lindgren wrote:
> >* George Anzinger <[email protected]> [050119 15:00]:
> >
> >>I don't think you will ever get good time if you EVER reprogramm the PIT.
> >>That is why the VST patch on sourceforge does NOT touch the PIT, it only
> >>turns off the interrupt by interrupting the interrupt path (not changing
> >>the PIT). This allows the PIT to be the "gold standard" in time that it
> >>is designed to be. The wake up interrupt, then needs to come from an
> >>independent timer. My patch requires a local APIC for this. Patch is
> >>available at http://sourceforge.net/projects/high-res-timers/
> >
> >
> >Well on my test systems I have pretty good accurate time. But I agree,
> >PIT is not the best option for interrupt. It should be possible to use
> >other interrupt sources as well.
> >
> >It should not matter where the timer interrupt comes from, as long as
> >it comes when programmed. Updating time should be separate from timer
> >interrupts. Currently we have a problem where time is tied to the
> >timer interrupt.
>
> In the HRT code time is most correctly stated as wall_time +
> get_arch_cycles_since(wall_jiffies) (plus conversion or two:)). This is
> some what removed from the tick interrupt, but is resynced to that
> interrupt more or less each interrupt.

That sounds very accurate :)

> A second issue is trying to get the jiffies update as close to the run of
> the timer list as possible. Without this we have no hope of high res
> timers.

OK. But if the timer interrupt is separated from updating the time,
the next timer interrupt should be programmable to happen exactly
when a HRT timer needs it, right?

Hmm, how about using a pool of programmable timers available on the
system for the timer interrupts and HRT? Or is one interrupt source
always enough?

Tony

2005-01-20 23:10:55

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Tony Lindgren wrote:
> * George Anzinger <[email protected]> [050119 16:25]:
>
>>Tony Lindgren wrote:
>>
>>>* George Anzinger <[email protected]> [050119 15:00]:
>>>
>>>
>>>>I don't think you will ever get good time if you EVER reprogramm the PIT.
>>>>That is why the VST patch on sourceforge does NOT touch the PIT, it only
>>>>turns off the interrupt by interrupting the interrupt path (not changing
>>>>the PIT). This allows the PIT to be the "gold standard" in time that it
>>>>is designed to be. The wake up interrupt, then needs to come from an
>>>>independent timer. My patch requires a local APIC for this. Patch is
>>>>available at http://sourceforge.net/projects/high-res-timers/
>>>
>>>
>>>Well on my test systems I have pretty good accurate time. But I agree,
>>>PIT is not the best option for interrupt. It should be possible to use
>>>other interrupt sources as well.
>>>
>>>It should not matter where the timer interrupt comes from, as long as
>>>it comes when programmed. Updating time should be separate from timer
>>>interrupts. Currently we have a problem where time is tied to the
>>>timer interrupt.
>>
>>In the HRT code time is most correctly stated as wall_time +
>>get_arch_cycles_since(wall_jiffies) (plus conversion or two:)). This is
>>some what removed from the tick interrupt, but is resynced to that
>>interrupt more or less each interrupt.
>
>
> That sounds very accurate :)
>
>
>>A second issue is trying to get the jiffies update as close to the run of
>>the timer list as possible. Without this we have no hope of high res
>>timers.
>
>
> OK. But if the timer interrupt is separated from updating the time,
> the next timer interrupt should be programmable to happen exactly
> when a HRT timer needs it, right?

First, HRT uses a two phase system of timing. The first phase is the normal
timer list expires the timer. The timer is then handed to the high res code
which keeps a list of timers that are to expire prior to the next jiffie. An
interrupt is scheduled to make this happen. Depending on the hardware
available, this can come from the same timer or a different timer. For example
on x86 systems with a local apic we use the apic timer to generate this
interrupt. It triggers either a tasklet for UP or SMP with out per cpu timers
or a soft irq for SMP systems with per cpu timers.

What this means is that, for timers near but just after a jiffie, the run_timer
list being late can make the HR timer late.

This code on on sourceforge if you want a closer look...
>
> Hmm, how about using a pool of programmable timers available on the
> system for the timer interrupts and HRT? Or is one interrupt source
> always enough?

Hardware heaven :), but no thanks. A reliable tick generator for the jiffies
timer and one additional timer (or one per cpu) works well in the x86.

If you have something like the PPC where you can mess with the timer with out
loosing time, that works well also. The correct formulation would be a "clock"
that can be read quickly and a timer tied to the same "rock" that uses the same
count units as the clock. PARISC has a counter that just counts and a compare
register. When they are equal an interrupt is generated. That is a nice set up.

Now the X86 is bad and has little hope of being fixed for these reasons:
a.) the TSC is fast and easy to read but its not clocked at any given frequency
and, on some platforms, it changes without notifying the software.
b.) the PIT and the PMTIMER are both in I/O space and so take forever to access.
c.) All three of these use different units (but at least the PMTIMER is
(supposed to be) related to the PIT clock.
d.) the HPET, again is in I/O space. I suspect that it uses a reasonable "rock"
but, as I understand it, it knocks out the PIT and, of course it uses units
unrelated to all the others.

--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/

2005-01-21 17:36:56

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* George Anzinger <[email protected]> [050120 15:10]:
> Tony Lindgren wrote:
> >* George Anzinger <[email protected]> [050119 16:25]:
> >
> >>Tony Lindgren wrote:
> >>
> >>>* George Anzinger <[email protected]> [050119 15:00]:
> >>>
> >>>
> >>>>I don't think you will ever get good time if you EVER reprogramm the
> >>>>PIT. That is why the VST patch on sourceforge does NOT touch the PIT,
> >>>>it only turns off the interrupt by interrupting the interrupt path (not
> >>>>changing the PIT). This allows the PIT to be the "gold standard" in
> >>>>time that it is designed to be. The wake up interrupt, then needs to
> >>>>come from an independent timer. My patch requires a local APIC for
> >>>>this. Patch is available at
> >>>>http://sourceforge.net/projects/high-res-timers/
> >>>
> >>>
> >>>Well on my test systems I have pretty good accurate time. But I agree,
> >>>PIT is not the best option for interrupt. It should be possible to use
> >>>other interrupt sources as well.

But then again reprogramming PIT in my patch should not be that bad,
as it's not done under load.

> >>>It should not matter where the timer interrupt comes from, as long as
> >>>it comes when programmed. Updating time should be separate from timer
> >>>interrupts. Currently we have a problem where time is tied to the
> >>>timer interrupt.
> >>
> >>In the HRT code time is most correctly stated as wall_time +
> >>get_arch_cycles_since(wall_jiffies) (plus conversion or two:)). This is
> >>some what removed from the tick interrupt, but is resynced to that
> >>interrupt more or less each interrupt.
> >
> >
> >That sounds very accurate :)
> >
> >
> >>A second issue is trying to get the jiffies update as close to the run of
> >>the timer list as possible. Without this we have no hope of high res
> >>timers.
> >
> >
> >OK. But if the timer interrupt is separated from updating the time,
> >the next timer interrupt should be programmable to happen exactly
> >when a HRT timer needs it, right?
>
> First, HRT uses a two phase system of timing. The first phase is the
> normal timer list expires the timer. The timer is then handed to the high
> res code which keeps a list of timers that are to expire prior to the next
> jiffie. An interrupt is scheduled to make this happen. Depending on the
> hardware available, this can come from the same timer or a different timer.
> For example on x86 systems with a local apic we use the apic timer to
> generate this interrupt. It triggers either a tasklet for UP or SMP with
> out per cpu timers or a soft irq for SMP systems with per cpu timers.
>
> What this means is that, for timers near but just after a jiffie, the
> run_timer list being late can make the HR timer late.

Thanks for explaining that. So basically catching up with jiffies after
skipping ticks could easily delay the HRT timer.

If jiffies was calculated from hw timer, updating time after skipping
jiffies would be fast, and then this problem would go away, right?

> This code on on sourceforge if you want a closer look...

I'll take a look at it.

> >Hmm, how about using a pool of programmable timers available on the
> >system for the timer interrupts and HRT? Or is one interrupt source
> >always enough?
>
> Hardware heaven :), but no thanks. A reliable tick generator for the
> jiffies timer and one additional timer (or one per cpu) works well in the
> x86.
>
> If you have something like the PPC where you can mess with the timer with
> out loosing time, that works well also. The correct formulation would be a
> "clock" that can be read quickly and a timer tied to the same "rock" that
> uses the same count units as the clock. PARISC has a counter that just
> counts and a compare register. When they are equal an interrupt is
> generated. That is a nice set up.

Yes, many ARMs have this setup as well.

> Now the X86 is bad and has little hope of being fixed for these reasons:
> a.) the TSC is fast and easy to read but its not clocked at any given
> frequency and, on some platforms, it changes without notifying the software.
> b.) the PIT and the PMTIMER are both in I/O space and so take forever to
> access.
> c.) All three of these use different units (but at least the PMTIMER is
> (supposed to be) related to the PIT clock.
> d.) the HPET, again is in I/O space. I suspect that it uses a reasonable
> "rock" but, as I understand it, it knocks out the PIT and, of course it
> uses units unrelated to all the others.

The timers on x86 are quite messy...

Tony

2005-01-21 17:49:59

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Zwane Mwaikambo <[email protected]> [050119 20:02]:
> On Tue, 18 Jan 2005, Tony Lindgren wrote:
>
> > Hi all,
> >
> > Attached is the dynamic tick patch for x86 to play with
> > as I promised in few threads earlier on this list.[1][2]
> >
> > The dynamic tick patch does following:
> >
> > - Separates timer interrupts from updating system time
> >
> > - Allows updating time from other interrupts in addition
> > to timer interrupt
> >
> > - Makes timer tick dynamic
> >
> > - Allows power management modules to take advantage of the
> > idle time inbetween skipped ticks
> >
> > - Might help with the whistling caps?
>
> This doesn't seem to cover the local APIC timer, what do you do about the
> 1kHz tick which it's programmed to do?

Sorry for the delay in replaying. Thanks for pointing that out, I
don't know yet what to do with the local APIC timer. Have to look at
more.

Tony

2005-01-21 18:28:49

by Zwane Mwaikambo

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

On Fri, 21 Jan 2005, Tony Lindgren wrote:

> > This doesn't seem to cover the local APIC timer, what do you do about the
> > 1kHz tick which it's programmed to do?
>
> Sorry for the delay in replaying. Thanks for pointing that out, I
> don't know yet what to do with the local APIC timer. Have to look at
> more.

Pavel does your test system have a Local APIC? If so that may also explain
why you didn't see a difference.

Tony, something like the following for oneshot should work, untested of
course. Perhaps you could use that for the wakeup interrupt instead?

void setup_oneshot_apic_timer(unsigned int count)
{
unsigned int lvtt, tmp_value;
unsigned long flags;

count *= calibration_result;
local_irq_save(flags);
lvtt = ~APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
apic_write_around(APIC_LVTT, lvtt);
tmp_value = apic_read(APIC_TDCR);
apic_write_around(APIC_TDCR, (tmp_value
& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
| APIC_TDR_DIV_16);

apic_write_around(APIC_TMICT, count/APIC_DIVISOR);
local_irq_restore(flags);
}

2005-01-21 18:38:35

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

* Zwane Mwaikambo <[email protected]> [050121 10:27]:
> On Fri, 21 Jan 2005, Tony Lindgren wrote:
>
> > > This doesn't seem to cover the local APIC timer, what do you do about the
> > > 1kHz tick which it's programmed to do?
> >
> > Sorry for the delay in replaying. Thanks for pointing that out, I
> > don't know yet what to do with the local APIC timer. Have to look at
> > more.
>
> Pavel does your test system have a Local APIC? If so that may also explain
> why you didn't see a difference.

Yeah, that could explain why sleep mode seems to wake up too early.

> Tony, something like the following for oneshot should work, untested of
> course. Perhaps you could use that for the wakeup interrupt instead?
>
> void setup_oneshot_apic_timer(unsigned int count)
> {
> unsigned int lvtt, tmp_value;
> unsigned long flags;
>
> count *= calibration_result;
> local_irq_save(flags);
> lvtt = ~APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
> apic_write_around(APIC_LVTT, lvtt);
> tmp_value = apic_read(APIC_TDCR);
> apic_write_around(APIC_TDCR, (tmp_value
> & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
> | APIC_TDR_DIV_16);
>
> apic_write_around(APIC_TMICT, count/APIC_DIVISOR);
> local_irq_restore(flags);
> }
>

Thanks, I'll try it out! As George also pointed out, we should use apic
timer if available. Else we can fall back on use PIT.

Tony

2005-01-21 18:54:58

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Hi!

> > > This doesn't seem to cover the local APIC timer, what do you do about the
> > > 1kHz tick which it's programmed to do?
> >
> > Sorry for the delay in replaying. Thanks for pointing that out, I
> > don't know yet what to do with the local APIC timer. Have to look at
> > more.
>
> Pavel does your test system have a Local APIC? If so that may also explain
> why you didn't see a difference.

My systems do have APICs, but I prefer them disabled :-).
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2005-01-21 20:23:55

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Tony Lindgren wrote:
> * George Anzinger <[email protected]> [050120 15:10]:
>
>>Tony Lindgren wrote:
>>
>>>* George Anzinger <[email protected]> [050119 16:25]:
>>>
>>>
>>>>Tony Lindgren wrote:
>>>>
>>>>
>>>>>* George Anzinger <[email protected]> [050119 15:00]:
>>>>>
>>>>>
>>>>>
>>>>>>I don't think you will ever get good time if you EVER reprogramm the
>>>>>>PIT. That is why the VST patch on sourceforge does NOT touch the PIT,
>>>>>>it only turns off the interrupt by interrupting the interrupt path (not
>>>>>>changing the PIT). This allows the PIT to be the "gold standard" in
>>>>>>time that it is designed to be. The wake up interrupt, then needs to
>>>>>>come from an independent timer. My patch requires a local APIC for
>>>>>>this. Patch is available at
>>>>>>http://sourceforge.net/projects/high-res-timers/
>>>>>
>>>>>
>>>>>Well on my test systems I have pretty good accurate time. But I agree,
>>>>>PIT is not the best option for interrupt. It should be possible to use
>>>>>other interrupt sources as well.
>
>
> But then again reprogramming PIT in my patch should not be that bad,
> as it's not done under load.
>
>
>>>>>It should not matter where the timer interrupt comes from, as long as
>>>>>it comes when programmed. Updating time should be separate from timer
>>>>>interrupts. Currently we have a problem where time is tied to the
>>>>>timer interrupt.
>>>>
>>>>In the HRT code time is most correctly stated as wall_time +
>>>>get_arch_cycles_since(wall_jiffies) (plus conversion or two:)). This is
>>>>some what removed from the tick interrupt, but is resynced to that
>>>>interrupt more or less each interrupt.
>>>
>>>
>>>That sounds very accurate :)
>>>
>>>
>>>
>>>>A second issue is trying to get the jiffies update as close to the run of
>>>>the timer list as possible. Without this we have no hope of high res
>>>>timers.
>>>
>>>
>>>OK. But if the timer interrupt is separated from updating the time,
>>>the next timer interrupt should be programmable to happen exactly
>>>when a HRT timer needs it, right?
>>
>>First, HRT uses a two phase system of timing. The first phase is the
>>normal timer list expires the timer. The timer is then handed to the high
>>res code which keeps a list of timers that are to expire prior to the next
>>jiffie. An interrupt is scheduled to make this happen. Depending on the
>>hardware available, this can come from the same timer or a different timer.
>>For example on x86 systems with a local apic we use the apic timer to
>>generate this interrupt. It triggers either a tasklet for UP or SMP with
>>out per cpu timers or a soft irq for SMP systems with per cpu timers.
>>
>>What this means is that, for timers near but just after a jiffie, the
>>run_timer list being late can make the HR timer late.
>
>
> Thanks for explaining that. So basically catching up with jiffies after
> skipping ticks could easily delay the HRT timer.

Any due in the "skipped time" would be late, as would any normal timers due
during that time.
>
> If jiffies was calculated from hw timer, updating time after skipping
> jiffies would be fast, and then this problem would go away, right?

No, you are still pulling the timers out of the timer list long after they
should have expired.
>
>
>>This code on on sourceforge if you want a closer look...
>
>
> I'll take a look at it.
>
>
>>>Hmm, how about using a pool of programmable timers available on the
>>>system for the timer interrupts and HRT? Or is one interrupt source
>>>always enough?
>>
>>Hardware heaven :), but no thanks. A reliable tick generator for the
>>jiffies timer and one additional timer (or one per cpu) works well in the
>>x86.
>>
>>If you have something like the PPC where you can mess with the timer with
>>out loosing time, that works well also. The correct formulation would be a
>>"clock" that can be read quickly and a timer tied to the same "rock" that
>>uses the same count units as the clock. PARISC has a counter that just
>>counts and a compare register. When they are equal an interrupt is
>>generated. That is a nice set up.
>
>
> Yes, many ARMs have this setup as well.
>
>
>>Now the X86 is bad and has little hope of being fixed for these reasons:
>>a.) the TSC is fast and easy to read but its not clocked at any given
>>frequency and, on some platforms, it changes without notifying the software.
>>b.) the PIT and the PMTIMER are both in I/O space and so take forever to
>>access.
>>c.) All three of these use different units (but at least the PMTIMER is
>>(supposed to be) related to the PIT clock.
>>d.) the HPET, again is in I/O space. I suspect that it uses a reasonable
>>"rock" but, as I understand it, it knocks out the PIT and, of course it
>>uses units unrelated to all the others.
>
>
> The timers on x86 are quite messy...

AMEN!
>
> Tony
>

--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/

2005-01-21 20:28:42

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Zwane Mwaikambo wrote:
> On Fri, 21 Jan 2005, Tony Lindgren wrote:
>
>
>>>This doesn't seem to cover the local APIC timer, what do you do about the
>>>1kHz tick which it's programmed to do?
>>
>>Sorry for the delay in replaying. Thanks for pointing that out, I
>>don't know yet what to do with the local APIC timer. Have to look at
>>more.
>
>
> Pavel does your test system have a Local APIC? If so that may also explain
> why you didn't see a difference.
>
> Tony, something like the following for oneshot should work, untested of
> course. Perhaps you could use that for the wakeup interrupt instead?
>
> void setup_oneshot_apic_timer(unsigned int count)
> {
> unsigned int lvtt, tmp_value;
> unsigned long flags;
>
> count *= calibration_result;
> local_irq_save(flags);
> lvtt = ~APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
> apic_write_around(APIC_LVTT, lvtt);
> tmp_value = apic_read(APIC_TDCR);
> apic_write_around(APIC_TDCR, (tmp_value
> & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
> | APIC_TDR_DIV_16);
>
> apic_write_around(APIC_TMICT, count/APIC_DIVISOR);
> local_irq_restore(flags);
> }

The VST patch on sourceforge (http://sourceforge.net/projects/high-res-timers/)
uses the local apic timer to do the wake up. This is the same timer that is
used for the High Res work.

--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/

2005-01-21 21:25:55

by Zwane Mwaikambo

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

On Fri, 21 Jan 2005, Pavel Machek wrote:

> > > > This doesn't seem to cover the local APIC timer, what do you do about the
> > > > 1kHz tick which it's programmed to do?
> > >
> > > Sorry for the delay in replaying. Thanks for pointing that out, I
> > > don't know yet what to do with the local APIC timer. Have to look at
> > > more.
> >
> > Pavel does your test system have a Local APIC? If so that may also explain
> > why you didn't see a difference.
>
> My systems do have APICs, but I prefer them disabled :-).

May i ask why? ;)

2005-01-21 21:32:19

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Hi!

> > > > Sorry for the delay in replaying. Thanks for pointing that out, I
> > > > don't know yet what to do with the local APIC timer. Have to look at
> > > > more.
> > >
> > > Pavel does your test system have a Local APIC? If so that may also explain
> > > why you didn't see a difference.
> >
> > My systems do have APICs, but I prefer them disabled :-).
>
> May i ask why? ;)

Well, BIOSes tend to do strange stuff if you enter sleep state with
APIC enabled, and APIC is harder to understand than plain old
PIT. Leave APIC enabled, try to reboot it dies, etc.

PIT may be slightly slower, but not having to debug APIC problems
outweights that.

Anyway I was wrong, I do have it enabled on my main system. Ok, it
seems to work, so I'll probably leave it alone.

Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!

2005-01-21 21:43:16

by Zwane Mwaikambo

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Hello George,

On Fri, 21 Jan 2005, George Anzinger wrote:

> The VST patch on sourceforge
> (http://sourceforge.net/projects/high-res-timers/) uses the local apic timer
> to do the wake up. This is the same timer that is used for the High Res work.

I've been meaning to look into it, although it's quite a bit of work going
through all the extra code from the highres timer patch.

Thanks,
Zwane

2005-01-22 07:24:40

by George Anzinger

[permalink] [raw]
Subject: Re: [PATCH] dynamic tick patch

Zwane Mwaikambo wrote:
> Hello George,
>
> On Fri, 21 Jan 2005, George Anzinger wrote:
>
>
>>The VST patch on sourceforge
>>(http://sourceforge.net/projects/high-res-timers/) uses the local apic timer
>>to do the wake up. This is the same timer that is used for the High Res work.
>
>
> I've been meaning to look into it, although it's quite a bit of work going
> through all the extra code from the highres timer patch.

Well, really all it uses is the HR timer. The rest of HRT is not really used
for VST. (Unless, of course, you are refering to the work over of the tsc timer
tick code.)

-g
>
> Thanks,
> Zwane
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>

--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/