2005-04-06 08:33:12

by Tony Lindgren

[permalink] [raw]
Subject: [PATCH] Dynamic Tick version 050406-1

diff -Nru a/arch/i386/Kconfig b/arch/i386/Kconfig
--- a/arch/i386/Kconfig 2005-04-06 01:06:59 -07:00
+++ b/arch/i386/Kconfig 2005-04-06 01:06:59 -07:00
@@ -460,6 +460,16 @@
bool "Provide RTC interrupt"
depends on HPET_TIMER && RTC=y

+config NO_IDLE_HZ
+ bool "Dynamic Tick Timer - Skip timer ticks during idle"
+ help
+ This option enables support for skipping timer ticks when the
+ processor is idle. During system load, timer is continuous.
+ This option saves power, as it allows the system to stay in
+ idle mode longer. Currently supported timers are ACPI PM
+ timer, local APIC timer, and TSC timer. HPET timer is currently
+ not supported.
+
config SMP
bool "Symmetric multi-processing support"
---help---
diff -Nru a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
--- a/arch/i386/kernel/apic.c 2005-04-06 01:06:59 -07:00
+++ b/arch/i386/kernel/apic.c 2005-04-06 01:06:59 -07:00
@@ -26,6 +26,7 @@
#include <linux/mc146818rtc.h>
#include <linux/kernel_stat.h>
#include <linux/sysdev.h>
+#include <linux/dyn-tick-timer.h>

#include <asm/atomic.h>
#include <asm/smp.h>
@@ -909,6 +910,8 @@

#define APIC_DIVISOR 16

+static u32 apic_timer_val;
+
static void __setup_APIC_LVTT(unsigned int clocks)
{
unsigned int lvtt_value, tmp_value, ver;
@@ -927,7 +930,15 @@
& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
| APIC_TDR_DIV_16);

- apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+ apic_timer_val = clocks/APIC_DIVISOR;
+
+#ifdef CONFIG_NO_IDLE_HZ
+ /* Local APIC timer is 24-bit */
+ if (apic_timer_val)
+ dyn_tick->max_skip = 0xffffff / apic_timer_val;
+#endif
+
+ apic_write_around(APIC_TMICT, apic_timer_val);
}

static void __init setup_APIC_timer(unsigned int clocks)
@@ -1040,6 +1051,13 @@
*/
setup_APIC_timer(calibration_result);

+#ifdef CONFIG_NO_IDLE_HZ
+ if (calibration_result)
+ dyn_tick->state |= DYN_TICK_USE_APIC;
+ else
+ printk(KERN_INFO "dyn-tick: Cannot use local APIC\n");
+#endif
+
local_irq_enable();
}

@@ -1068,6 +1086,18 @@
}
}

+#if defined(CONFIG_NO_IDLE_HZ)
+void reprogram_apic_timer(unsigned int count)
+{
+ unsigned long flags;
+
+ count *= apic_timer_val;
+ local_irq_save(flags);
+ apic_write_around(APIC_TMICT, count);
+ local_irq_restore(flags);
+}
+#endif
+
/*
* the frequency of the profiling timer can be changed
* by writing a multiplier value into /proc/profile.
@@ -1160,6 +1190,7 @@

fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
{
+ unsigned long seq;
int cpu = smp_processor_id();

/*
@@ -1178,6 +1209,23 @@
* interrupt lock, which is the WrongThing (tm) to do.
*/
irq_enter();
+
+#ifdef CONFIG_NO_IDLE_HZ
+ /*
+ * Check if we need to wake up PIT interrupt handler.
+ * Otherwise just wake up local APIC timer.
+ */
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ if (dyn_tick->state & (DYN_TICK_ENABLED | DYN_TICK_SKIPPING)) {
+ if (dyn_tick->skip_cpu == cpu && dyn_tick->skip > DYN_TICK_MIN_SKIP)
+ dyn_tick->interrupt(99, NULL, regs);
+ else
+ reprogram_apic_timer(1);
+ }
+ } while (read_seqretry(&xtime_lock, seq));
+#endif
+
smp_local_timer_interrupt(regs);
irq_exit();
}
diff -Nru a/arch/i386/kernel/dyn-tick.c b/arch/i386/kernel/dyn-tick.c
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/arch/i386/kernel/dyn-tick.c 2005-04-06 01:06:59 -07:00
@@ -0,0 +1,68 @@
+/*
+ * linux/arch/i386/kernel/dyn-tick.c
+ *
+ * Copyright (C) 2004 Nokia Corporation
+ * Written by Tony Lindgen <[email protected]> and
+ * Tuukka Tikkanen <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/version.h>
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+//#include <linux/module.h>
+//#include <linux/interrupt.h>
+//#include <linux/cpumask.h>
+//#include <linux/pm.h>
+#include <linux/dyn-tick-timer.h>
+//#include <asm/io.h>
+#include "dyn-tick.h"
+
+extern int dyn_tick_late_init(void);
+
+void arch_reprogram_timer(void)
+{
+ if (cpu_has_local_apic()) {
+ disable_pit_tick();
+ if (dyn_tick->state & DYN_TICK_TIMER_INT)
+ reprogram_apic_timer(dyn_tick->skip);
+ } else {
+ if (dyn_tick->state & DYN_TICK_TIMER_INT)
+ reprogram_pit_tick(dyn_tick->skip);
+ else
+ disable_pit_tick();
+ }
+}
+
+static struct dyn_tick_timer arch_dyn_tick_timer = {
+ .arch_reprogram_timer = &arch_reprogram_timer,
+};
+
+int __init dyn_tick_init(void)
+{
+ arch_dyn_tick_timer.arch_init = dyn_tick_late_init;
+ dyn_tick_register(&arch_dyn_tick_timer);
+
+ return 0;
+}
+arch_initcall(dyn_tick_init);
diff -Nru a/arch/i386/kernel/dyn-tick.h b/arch/i386/kernel/dyn-tick.h
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/arch/i386/kernel/dyn-tick.h 2005-04-06 01:06:59 -07:00
@@ -0,0 +1,33 @@
+/*
+ * linux/arch/i386/kernel/dyn-tick.h
+ *
+ * Copyright (C) 2004 Nokia Corporation
+ * Written by Tony Lindgen <[email protected]> and
+ * Tuukka Tikkanen <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#if defined(CONFIG_SMP) || defined(CONFIG_X86_UP_APIC)
+#define cpu_has_local_apic() (dyn_tick->state & DYN_TICK_USE_APIC)
+#else
+#define cpu_has_local_apic() 0
+#endif
diff -Nru a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
--- a/arch/i386/kernel/irq.c 2005-04-06 01:06:59 -07:00
+++ b/arch/i386/kernel/irq.c 2005-04-06 01:06:59 -07:00
@@ -15,6 +15,7 @@
#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <linux/dyn-tick-timer.h>

DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
EXPORT_PER_CPU_SYMBOL(irq_stat);
@@ -102,6 +103,12 @@
);
} else
#endif
+
+#ifdef CONFIG_NO_IDLE_HZ
+ if (dyn_tick->state & (DYN_TICK_ENABLED | DYN_TICK_SKIPPING) && irq != 0)
+ dyn_tick->interrupt(irq, NULL, regs);
+#endif
+
__do_IRQ(irq, regs);

irq_exit();
diff -Nru a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
--- a/arch/i386/kernel/process.c 2005-04-06 01:06:59 -07:00
+++ b/arch/i386/kernel/process.c 2005-04-06 01:06:59 -07:00
@@ -160,6 +160,10 @@
if (!idle)
idle = default_idle;

+#ifdef CONFIG_NO_IDLE_HZ
+ dyn_tick_reprogram_timer();
+#endif
+
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
idle();
}
diff -Nru a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
--- a/arch/i386/kernel/time.c 2005-04-06 01:06:59 -07:00
+++ b/arch/i386/kernel/time.c 2005-04-06 01:06:59 -07:00
@@ -46,6 +46,7 @@
#include <linux/bcd.h>
#include <linux/efi.h>
#include <linux/mca.h>
+#include <linux/dyn-tick-timer.h>

#include <asm/io.h>
#include <asm/smp.h>
@@ -67,6 +68,7 @@
#include <asm/arch_hooks.h>

#include "io_ports.h"
+#include "dyn-tick.h"

extern spinlock_t i8259A_lock;
int pit_latch_buggy; /* extern */
@@ -308,6 +310,66 @@
return IRQ_HANDLED;
}

+#ifdef CONFIG_NO_IDLE_HZ
+static unsigned long long last_tick;
+void reprogram_pit_tick(int jiffies_to_skip);
+extern void replace_timer_interrupt(void * new_handler);
+
+#if defined(CONFIG_NO_IDLE_HZ) && defined(CONFIG_X86_LOCAL_APIC)
+extern void reprogram_apic_timer(unsigned int count);
+#else
+void reprogram_apic_timer(unsigned int count) {}
+#endif
+
+#ifdef DEBUG
+#define dbg_dyn_tick_irq() {if (skipped && skipped < dyn_tick->skip) \
+ printk("%u/%li ", skipped, dyn_tick->skip);}
+#else
+#define dbg_dyn_tick_irq() {}
+#endif
+
+/*
+ * This interrupt handler updates the time based on number of jiffies skipped
+ * It would be somewhat more optimized to have a customa handler in each timer
+ * using hardware ticks instead of nanoseconds. Note that CONFIG_NO_IDLE_HZ
+ * currently disables timer fallback on skipped jiffies.
+ */
+irqreturn_t dyn_tick_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ unsigned long flags;
+ volatile unsigned long long now;
+ unsigned int skipped = 0;
+
+ if (dyn_tick->state & DYN_TICK_DEBUG) {
+ if (irq == 0)
+ printk(".");
+ else
+ printk("%i ", irq);
+ }
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ now = cur_timer->get_hw_time();
+ while (now - last_tick >= NS_TICK_LEN) {
+ last_tick += NS_TICK_LEN;
+ cur_timer->mark_offset();
+ do_timer_interrupt(irq, NULL, regs);
+ skipped++;
+ }
+ if (dyn_tick->state & (DYN_TICK_ENABLED | DYN_TICK_SKIPPING)) {
+ dbg_dyn_tick_irq();
+ dyn_tick->skip = 1;
+ if (cpu_has_local_apic())
+ reprogram_apic_timer(dyn_tick->skip);
+ reprogram_pit_tick(dyn_tick->skip);
+ dyn_tick->state |= DYN_TICK_ENABLED;
+ dyn_tick->state &= ~DYN_TICK_SKIPPING;
+ }
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ return IRQ_HANDLED;
+}
+#endif /* CONFIG_NO_IDLE_HZ */
+
/* not static: needed by APM */
unsigned long get_cmos_time(void)
{
@@ -416,7 +478,7 @@


/* XXX this driverfs stuff should probably go elsewhere later -john */
-static struct sys_device device_timer = {
+struct sys_device device_timer = {
.id = 0,
.cls = &timer_sysclass,
};
@@ -452,6 +514,69 @@
}
#endif

+#ifdef CONFIG_NO_IDLE_HZ
+
+void disable_pit_tick(void)
+{
+ extern spinlock_t i8253_lock;
+ unsigned long flags;
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(0x31, PIT_MODE); /* binary, mode 1, LSB/MSB, ch 0 */
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+/*
+ * Reprograms the next timer interrupt
+ * PIT timer reprogramming code taken from APM code.
+ * Note that PIT timer is a 16-bit timer, which allows max
+ * skip of only few seconds.
+ */
+void reprogram_pit_tick(int jiffies_to_skip)
+{
+ int skip;
+ extern spinlock_t i8253_lock;
+ unsigned long flags;
+
+ skip = jiffies_to_skip * LATCH;
+ if (skip > 0xffff) {
+ skip = 0xffff;
+ }
+
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(skip & 0xff, PIT_CH0); /* LSB */
+ outb(skip >> 8, PIT_CH0); /* MSB */
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+int __init dyn_tick_late_init(void)
+{
+ unsigned long flags;
+
+ if (!cur_timer->get_hw_time)
+ return -ENODEV;
+ write_seqlock_irqsave(&xtime_lock, flags);
+ last_tick = cur_timer->get_hw_time();
+ dyn_tick->skip = 1;
+ if (!cpu_has_local_apic())
+ dyn_tick->max_skip = 0xffff/LATCH; /* PIT timer length */
+ printk(KERN_INFO "dyn-tick: Maximum ticks to skip limited to %i\n",
+ dyn_tick->max_skip);
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ if (cur_timer->late_init)
+ cur_timer->late_init();
+ dyn_tick->interrupt = dyn_tick_timer_interrupt;
+ replace_timer_interrupt(dyn_tick->interrupt);
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ dyn_tick->state |= DYN_TICK_ENABLED;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ return 0;
+}
+#endif /* CONFIG_NO_IDLE_HZ */
+
void __init time_init(void)
{
#ifdef CONFIG_HPET_TIMER
@@ -471,6 +596,15 @@

cur_timer = select_timer();
printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
+
+#ifdef CONFIG_NO_IDLE_HZ
+ if (strncmp(cur_timer->name, "tsc", 3) == 0 ||
+ strncmp(cur_timer->name, "pmtmr", 3) == 0)
+ dyn_tick->state |= DYN_TICK_SUITABLE;
+ else
+ printk(KERN_INFO "dyn-tick: Cannot use timer %s\n",
+ cur_timer->name);
+#endif

time_init_hook();
}
diff -Nru a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
--- a/arch/i386/kernel/timers/timer_pm.c 2005-04-06 01:06:59 -07:00
+++ b/arch/i386/kernel/timers/timer_pm.c 2005-04-06 01:06:59 -07:00
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/init.h>
+#include <linux/dyn-tick-timer.h>
#include <asm/types.h>
#include <asm/timer.h>
#include <asm/smp.h>
@@ -168,6 +169,7 @@
monotonic_base += delta * NSEC_PER_USEC;
write_sequnlock(&monotonic_lock);

+#ifndef CONFIG_NO_IDLE_HZ
/* convert to ticks */
delta += offset_delay;
lost = delta / (USEC_PER_SEC / HZ);
@@ -184,6 +186,7 @@
first_run = 0;
offset_delay = 0;
}
+#endif
}


@@ -238,6 +241,25 @@
return (unsigned long) offset_delay + cyc2us(delta);
}

+static unsigned long long ns_time;
+
+static unsigned long long get_hw_time_pmtmr(void)
+{
+ u32 now, delta;
+ static unsigned int last_cycles;
+ now = read_pmtmr();
+ delta = (now - last_cycles) & ACPI_PM_MASK;
+ last_cycles = now;
+ ns_time += cyc2us(delta) * NSEC_PER_USEC;
+ return ns_time;
+}
+
+static void late_init_pmtmr(void)
+{
+ ns_time = monotonic_clock_pmtmr();
+}
+
+extern irqreturn_t pmtmr_interrupt(int irq, void *dev_id, struct pt_regs *regs);

/* acpi timer_opts struct */
static struct timer_opts timer_pmtmr = {
@@ -245,7 +267,9 @@
.mark_offset = mark_offset_pmtmr,
.get_offset = get_offset_pmtmr,
.monotonic_clock = monotonic_clock_pmtmr,
+ .get_hw_time = get_hw_time_pmtmr,
.delay = delay_pmtmr,
+ .late_init = late_init_pmtmr,
};

struct init_timer_opts __initdata timer_pmtmr_init = {
diff -Nru a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
--- a/arch/i386/kernel/timers/timer_tsc.c 2005-04-06 01:06:59 -07:00
+++ b/arch/i386/kernel/timers/timer_tsc.c 2005-04-06 01:06:59 -07:00
@@ -112,6 +112,15 @@
return delay_at_last_interrupt + edx;
}

+static unsigned long get_hw_time_tsc(void)
+{
+ register unsigned long eax, edx;
+
+ unsigned long long hw_time;
+ rdtscll(hw_time);
+ return cycles_2_ns(hw_time);
+}
+
static unsigned long long monotonic_clock_tsc(void)
{
unsigned long long last_offset, this_offset, base;
@@ -348,6 +357,7 @@

rdtsc(last_tsc_low, last_tsc_high);

+#ifndef CONFIG_NO_IDLE_HZ
spin_lock(&i8253_lock);
outb_p(0x00, PIT_MODE); /* latch the count ASAP */

@@ -415,11 +425,14 @@
cpufreq_delayed_get();
} else
lost_count = 0;
+#endif
+
/* update the monotonic base value */
this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
monotonic_base += cycles_2_ns(this_offset - last_offset);
write_sequnlock(&monotonic_lock);

+#ifndef CONFIG_NO_IDLE_HZ
/* calculate delay_at_last_interrupt */
count = ((LATCH-1) - count) * TICK_SIZE;
delay_at_last_interrupt = (count + LATCH/2) / LATCH;
@@ -430,6 +443,7 @@
*/
if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
jiffies_64++;
+#endif
}

static int __init init_tsc(char* override)
@@ -551,6 +565,7 @@
.mark_offset = mark_offset_tsc,
.get_offset = get_offset_tsc,
.monotonic_clock = monotonic_clock_tsc,
+ .get_hw_time = get_hw_time_tsc,
.delay = delay_tsc,
};

diff -Nru a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c
--- a/arch/i386/mach-default/setup.c 2005-04-06 01:06:59 -07:00
+++ b/arch/i386/mach-default/setup.c 2005-04-06 01:06:59 -07:00
@@ -85,6 +85,22 @@
setup_irq(0, &irq0);
}

+/**
+ * replace_timer_interrupt - allow replacing timer interrupt handler
+ *
+ * Description:
+ * Can be used to replace timer interrupt handler with a more optimized
+ * handler. Used for enabling and disabling of CONFIG_NO_IDLE_HZ.
+ */
+void replace_timer_interrupt(void * new_handler)
+{
+ unsigned long flags;
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ irq0.handler = new_handler;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+}
+
#ifdef CONFIG_MCA
/**
* mca_nmi_hook - hook into MCA specific NMI chain
diff -Nru a/include/asm-i386/timer.h b/include/asm-i386/timer.h
--- a/include/asm-i386/timer.h 2005-04-06 01:06:59 -07:00
+++ b/include/asm-i386/timer.h 2005-04-06 01:06:59 -07:00
@@ -1,6 +1,7 @@
#ifndef _ASMi386_TIMER_H
#define _ASMi386_TIMER_H
#include <linux/init.h>
+#include <linux/interrupt.h>

/**
* struct timer_ops - used to define a timer source
@@ -21,7 +22,9 @@
void (*mark_offset)(void);
unsigned long (*get_offset)(void);
unsigned long long (*monotonic_clock)(void);
+ unsigned long long (*get_hw_time)(void);
void (*delay)(unsigned long);
+ void (*late_init)(void);
};

struct init_timer_opts {
diff -Nru a/include/linux/dyn-tick-timer.h b/include/linux/dyn-tick-timer.h
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/include/linux/dyn-tick-timer.h 2005-04-06 01:06:59 -07:00
@@ -0,0 +1,74 @@
+/*
+ * linux/include/linux/dyn-tick-timer.h
+ *
+ * Copyright (C) 2004 Nokia Corporation
+ * Written by Tony Lindgen <[email protected]> and
+ * Tuukka Tikkanen <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _DYN_TICK_TIMER_H
+#define _DYN_TICK_TIMER_H
+
+#include <linux/interrupt.h>
+
+#define DYN_TICK_DEBUG (1 << 31)
+#define DYN_TICK_TIMER_INT (1 << 4)
+#define DYN_TICK_USE_APIC (1 << 3)
+#define DYN_TICK_SKIPPING (1 << 2)
+#define DYN_TICK_ENABLED (1 << 1)
+#define DYN_TICK_SUITABLE (1 << 0)
+
+struct dyn_tick_state {
+ unsigned int state; /* Current state */
+ int skip_cpu; /* Skip handling processor */
+ unsigned long skip; /* Ticks to skip */
+ unsigned int max_skip; /* Max number of ticks to skip */
+ unsigned long irq_skip_mask; /* Do not update time from these irqs */
+ irqreturn_t (*interrupt)(int, void *, struct pt_regs *);
+};
+
+struct dyn_tick_timer {
+ int (*arch_init) (void);
+ void (*arch_enable) (void);
+ void (*arch_disable) (void);
+ void (*arch_reprogram_timer) (void);
+};
+
+extern struct dyn_tick_state * dyn_tick;
+extern void dyn_tick_register(struct dyn_tick_timer * new_timer);
+
+#define NS_TICK_LEN ((1 * 1000000000)/HZ)
+#define DYN_TICK_MIN_SKIP 2
+
+#ifdef CONFIG_NO_IDLE_HZ
+
+extern unsigned long dyn_tick_reprogram_timer(void);
+
+#else
+
+#define arch_has_safe_halt() 0
+#define dyn_tick_reprogram_timer() {}
+
+
+#endif /* CONFIG_NO_IDLE_HZ */
+#endif /* _DYN_TICK_TIMER_H */
diff -Nru a/kernel/Makefile b/kernel/Makefile
--- a/kernel/Makefile 2005-04-06 01:06:59 -07:00
+++ b/kernel/Makefile 2005-04-06 01:06:59 -07:00
@@ -28,6 +28,7 @@
obj-$(CONFIG_SYSFS) += ksysfs.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
+obj-$(CONFIG_NO_IDLE_HZ) += dyn-tick-timer.o

ifneq ($(CONFIG_IA64),y)
# According to Alan Modra <[email protected]>, the -fno-omit-frame-pointer is
diff -Nru a/kernel/dyn-tick-timer.c b/kernel/dyn-tick-timer.c
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/kernel/dyn-tick-timer.c 2005-04-06 01:06:59 -07:00
@@ -0,0 +1,254 @@
+/*
+ * linux/arch/i386/kernel/dyn-tick.c
+ *
+ * Beginnings of generic dynamic tick timer support
+ *
+ * Copyright (C) 2004 Nokia Corporation
+ * Written by Tony Lindgen <[email protected]> and
+ * Tuukka Tikkanen <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/version.h>
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sysdev.h>
+#include <linux/interrupt.h>
+#include <linux/cpumask.h>
+#include <linux/pm.h>
+#include <linux/dyn-tick-timer.h>
+#include <asm/io.h>
+
+#include "io_ports.h"
+
+#define DYN_TICK_VERSION "050301-1"
+
+struct dyn_tick_state dyn_tick_state;
+struct dyn_tick_state * dyn_tick = &dyn_tick_state;
+struct dyn_tick_timer * dyn_tick_cfg;
+
+static void (*orig_idle) (void) = 0;
+extern void disable_pit_tick(void);
+extern void reprogram_pit_tick(int jiffies_to_skip);
+extern void reprogram_apic_timer(unsigned int count);
+extern void reprogram_pit_tick(int jiffies_to_skip);
+static cpumask_t dyn_cpu_map;
+
+/*
+ * Arch independed code needed to reprogram next timer interrupt.
+ * Gets called from cpu_idle() before entering idle loop. Note that
+ * we want to have all processors idle before reprogramming the
+ * next timer interrupt.
+ */
+unsigned long dyn_tick_reprogram_timer(void)
+{
+ int cpu;
+ unsigned long flags;
+ cpumask_t idle_cpus;
+ unsigned long next;
+
+ if (dyn_tick->state & DYN_TICK_DEBUG)
+ printk("i");
+
+ if (!(dyn_tick->state & DYN_TICK_ENABLED))
+ return 0;
+
+ /* Check if we are already skipping ticks and can idle other cpus */
+ if (dyn_tick->state & DYN_TICK_SKIPPING) {
+ reprogram_apic_timer(dyn_tick->skip);
+ return 0;
+ }
+
+ /* Check if we can start skipping ticks */
+ write_seqlock_irqsave(&xtime_lock, flags);
+ cpu = smp_processor_id();
+ cpu_set(cpu, dyn_cpu_map);
+ cpus_and(idle_cpus, dyn_cpu_map, cpu_online_map);
+ if (cpus_equal(idle_cpus, cpu_online_map)) {
+ next = next_timer_interrupt();
+ if (jiffies > next) {
+ //printk("Too late? next: %lu jiffies: %lu\n",
+ // next, jjiffies);
+ dyn_tick->skip = 1;
+ } else
+ dyn_tick->skip = next_timer_interrupt() - jiffies;
+ if (dyn_tick->skip > DYN_TICK_MIN_SKIP) {
+ if (dyn_tick->skip > dyn_tick->max_skip)
+ dyn_tick->skip = dyn_tick->max_skip;
+
+ dyn_tick_cfg->arch_reprogram_timer();
+
+ dyn_tick->skip_cpu = cpu;
+ dyn_tick->state |= DYN_TICK_SKIPPING;
+ }
+ cpus_clear(dyn_cpu_map);
+ }
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ return dyn_tick->skip;
+}
+
+void __init dyn_tick_register(struct dyn_tick_timer * arch_timer)
+{
+ dyn_tick_cfg = arch_timer;
+ printk(KERN_INFO "dyn-tick: Registering dynamic tick timer v%s\n",
+ DYN_TICK_VERSION);
+}
+
+/*
+ * ---------------------------------------------------------------------------
+ * Sysfs interface
+ * ---------------------------------------------------------------------------
+ */
+
+extern struct sys_device device_timer;
+
+static ssize_t show_dyn_tick_state(struct sys_device *dev, char *buf)
+{
+ return sprintf(buf, "suitable:\t%i\n"
+ "enabled:\t%i\n"
+ "skipping:\t%i\n"
+ "using APIC:\t%i\n"
+ "int enabled:\t%i\n"
+ "debug:\t\t%i\n",
+ dyn_tick->state & DYN_TICK_SUITABLE,
+ (dyn_tick->state & DYN_TICK_ENABLED) >> 1,
+ (dyn_tick->state & DYN_TICK_SKIPPING) >> 2,
+ (dyn_tick->state & DYN_TICK_USE_APIC) >> 3,
+ (dyn_tick->state & DYN_TICK_TIMER_INT) >> 4,
+ (dyn_tick->state & DYN_TICK_DEBUG) >> 31);
+}
+
+static ssize_t set_dyn_tick_state(struct sys_device *dev, const char * buf,
+ ssize_t count)
+{
+ unsigned long flags;
+ unsigned int enable = simple_strtoul(buf, NULL, 2);
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ if (enable) {
+ if (dyn_tick_cfg->arch_enable)
+ dyn_tick_cfg->arch_enable();
+ dyn_tick->state |= DYN_TICK_ENABLED;
+ } else {
+ if (dyn_tick_cfg->arch_disable)
+ dyn_tick_cfg->arch_disable();
+ dyn_tick->state &= ~DYN_TICK_ENABLED;
+ }
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ return count;
+}
+
+static SYSDEV_ATTR(dyn_tick_state, 0644, show_dyn_tick_state,
+ set_dyn_tick_state);
+
+static ssize_t show_dyn_tick_int(struct sys_device *dev, char *buf)
+{
+ return sprintf(buf, "%i\n",
+ (dyn_tick->state & DYN_TICK_TIMER_INT) >> 4);
+}
+
+static ssize_t set_dyn_tick_int(struct sys_device *dev, const char * buf,
+ ssize_t count)
+{
+ unsigned long flags;
+ unsigned int enable = simple_strtoul(buf, NULL, 2);
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ if (enable)
+ dyn_tick->state |= DYN_TICK_TIMER_INT;
+ else
+ dyn_tick->state &= ~DYN_TICK_TIMER_INT;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ return count;
+}
+
+static SYSDEV_ATTR(dyn_tick_int, 0644, show_dyn_tick_int, set_dyn_tick_int);
+
+static ssize_t show_dyn_tick_dbg(struct sys_device *dev, char *buf)
+{
+ return sprintf(buf, "%i\n",
+ (dyn_tick->state & DYN_TICK_DEBUG) >> 31);
+}
+
+static ssize_t set_dyn_tick_dbg(struct sys_device *dev, const char * buf,
+ ssize_t count)
+{
+ unsigned long flags;
+ unsigned int enable = simple_strtoul(buf, NULL, 2);
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ if (enable)
+ dyn_tick->state |= DYN_TICK_DEBUG;
+ else
+ dyn_tick->state &= ~DYN_TICK_DEBUG;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ return count;
+}
+
+static SYSDEV_ATTR(dyn_tick_dbg, 0644, show_dyn_tick_dbg, set_dyn_tick_dbg);
+
+/*
+ * ---------------------------------------------------------------------------
+ * Init functions
+ * ---------------------------------------------------------------------------
+ */
+
+static int __init dyn_tick_early_init(void)
+{
+ dyn_tick->state |= DYN_TICK_TIMER_INT;
+}
+
+subsys_initcall(dyn_tick_early_init);
+
+/*
+ * We need to initialize dynamic tick after calibrate delay
+ */
+static int __init dyn_tick_late_init(void)
+{
+ int ret = 0;
+
+ ret = sysdev_create_file(&device_timer, &attr_dyn_tick_state);
+ ret = sysdev_create_file(&device_timer, &attr_dyn_tick_int);
+ ret = sysdev_create_file(&device_timer, &attr_dyn_tick_dbg);
+
+ if (dyn_tick_cfg->arch_init == NULL ||
+ !(dyn_tick->state & DYN_TICK_SUITABLE))
+ return -ENODEV;
+
+ ret = dyn_tick_cfg->arch_init();
+ if (ret != 0) {
+ printk(KERN_WARNING "dyn-tick: Init failed\n");
+ return -ENODEV;
+ }
+
+ printk(KERN_INFO "dyn-tick: Timer using dynamic tick\n");
+
+ return ret;
+}
+
+late_initcall(dyn_tick_late_init);


Attachments:
(No filename) (1.08 kB)
patch-dynamic-tick-2.6.12-rc2-050406-1 (28.36 kB)
Download all attachments

2005-04-06 21:17:44

by Frank Sorenson

[permalink] [raw]
Subject: Re: [PATCH] Dynamic Tick version 050406-1

[4294667.296000] Linux version 2.6.12-rc2-fs3 ([email protected]) (gcc version 3.4.2 20041017 (Red Hat 3.4.2-6.fc3)) #3 Wed Apr 6 13:14:48 MDT 2005
[4294667.296000] BIOS-provided physical RAM map:
[4294667.296000] BIOS-e820: 0000000000000000 - 000000000009f000 (usable)
[4294667.296000] BIOS-e820: 000000000009f000 - 00000000000a0000 (reserved)
[4294667.296000] BIOS-e820: 0000000000100000 - 000000005ffae000 (usable)
[4294667.296000] BIOS-e820: 000000005ffae000 - 0000000060000000 (reserved)
[4294667.296000] BIOS-e820: 00000000feda0000 - 00000000fee00000 (reserved)
[4294667.296000] BIOS-e820: 00000000ffb00000 - 0000000100000000 (reserved)
[4294667.296000] 639MB HIGHMEM available.
[4294667.296000] 896MB LOWMEM available.
[4294667.296000] DMI 2.3 present.
[4294667.296000] ACPI: PM-Timer IO Port: 0x808
[4294667.296000] Allocating PCI resources starting at 60000000 (gap: 60000000:9eda0000)
[4294667.296000] Built 1 zonelists
[4294667.296000] Kernel command line: ro root=LABEL=/1 vga=794 nmi_watchdog=1 lapic console=tty0 console=ttyUSB0,9600 psmouse.proto=exps i8k.ignore_dmi:bool=true [email protected]/eth0,[email protected]/ single
[4294667.296000] Unknown boot option `i8k.ignore_dmi:bool=true': ignoring
[4294667.296000] netconsole: local port 6665
[4294667.296000] netconsole: local IP 128.187.171.101
[4294667.296000] netconsole: interface eth0
[4294667.296000] netconsole: remote port 5515[4294667.296000] netconsole: remote IP 128.187.171.102
[4294667.[4294667.296000] Console: colour dummy device 80x25
[4294667.298[4294667.472000] Capability LSM initialized as secondary
[429466[4294667.472000] Checking 'hlt' instruction... OK.
[4294667.4830[4294667.616000] checking if image is initramfs... it is
[429466[4294667.750000] Completing Region/Field/Buffer/Package initiali[4294667.881000] PCI: Transparent bridge - 0000:00:1e.0
[4294667[4294668.309000] pnp: PnP ACPI: found 10 devices
[4294668.310000[4294668.534000] pnp: 00:01: ioport range 0x808-0x80f could not [4294668.607000] audit(1112798526.606:0): initialized
[4294668.6[4294668.625000] ACPI: PCI Interrupt Link [LNKA] enabled at IRQ [4294668.947000] * connector 1 of type 2 (CRT) : 2320
[4294668[4294671.384000] radeonfb: Monitor 1 type LCD found
[4294671.384[4294671.384000] 320 x 400
[4294671.384000] 320 x 400
[4294671[4294671.384000] 1024 x 768
[4294671.384000] 1280 x 1024
[4294[4294671.384000] Setting up default mode based on panel info
[42[4294671.492000] fb1: VESA VGA frame buffer device
[4294671.4980[4294673.099000] agpgart: AGP aperture is 128M @ 0xe8000000
[429[4294673.156000] ACPI: PCI Interrupt Link [LNKB] enabled at IRQ [4294673.205000] PPP generic driver version 2.4.2
[4294673.20800[4294676.218000] b44: eth0: Link is up at 100 Mbps, full duplex.[4294677.477000] ICH4: not 100% native mode: will probe irqs lat[4294682.079000] hda: cache flushes supported
[4294682.081000] [4294682.253000] PCI: Enabling device 0000:02:01.0 (0000 -> 0002[4294682.646000] Badness in device_release at drivers/base/core.[4294682.657000] Databook TCIC-2 PCMCIA probe: not found.
[42946[4294682.683000] hub 1-0:1.0: 6 ports detected
[4294682.705000] [4294682.781000] ACPI: PCI Interrupt 0000:00:1d.1[B] -> Link [LN[4294682.919000] uhci_hcd 0000:00:1d.2: irq 11, io base 0x0000bf[4294683.124000] usbcore: registered new driver usbhid
[4294683.[4294683.141000] drivers/usb/serial/belkin_sa.c: USB Belkin Seri[4294683.155000] drivers/usb/serial/usb-serial.c: USB Serial sup[4294683.169000] drivers/usb/serial/usb-serial.c: USB Serial sup[4294683.183000] drivers/usb/serial/usb-serial.c: USB Serial sup[4294683.198000] drivers/usb/serial/keyspan.c: v1.1.4:Keyspan US[4294683.211000] drivers/usb/serial/usb-serial.c: USB Serial sup[4294685.808000] drivers/usb/serial/whiteheat.c: USB ConnectTech[4294685.833000] md: md driver 0.90.1 MAX_MD_DEVS=256, MD_SB_DIS[4294685.966000] input: PS/2 Generic Mouse on isa0060/serio1
[4294686.655000] intel8x0_measure_ac97_clock: measured 49315 usecs
[4294686.656000] intel8x0: clocking to 48000
[4294686.668000] ACPI: PCI Interrupt 0000:00:1f.6[B] -> Link [LNKB] -> GSI 7 (level, low) -> IRQ 7
[4294686.771000] MC'97 1 converters and GPIO not ready (0xff00)
[4294686.777000] ALSA device list:
[4294686.778000] #0: Intel 82801DB-ICH4 with STAC9750,51 at 0xf8fff800, irq 7
[4294686.779000] #1: Intel 82801DB-ICH4 Modem at 0xd400, irq 7
[4294686.780000] NET: Registered protocol family 2
[4294686.791000] IP: routing cache hash table of 4096 buckets, 128Kbytes
[4294686.792000] TCP established hash table entries: 262144 (order: 9, 2097152 bytes)
[4294686.796000] TCP bind hash table entries: 65536 (order: 8, 1835008 bytes)
[4294686.801000] TCP: Hash tables configured (established 262144 bind 65536)
[4294686.802000] IPv4 over IPv4 tunneling driver
[4294686.804000] GRE over IPv4 tunneling driver
[4294686.806000] Initializing IPsec netlink so[4294686.815000] NET: Registered protocol family 17
[4294688.655000] Unable to handle kernel NULL pointer dereference at virtual address 00000000
[4294688.656000] printing eip:
[4294688.657000] c077f818
[4294688.659000] *pde = 00000000
[4294688.660000] Oops: 0000 [#1]
[4294688.661000] PREEMPT
[4294688.662000] Modules linked in:
[4294688.663000] CPU: 0
[4294688.663000] EIP: 0060:[<c077f818>] Not tainted VLI
[4294688.663000] EFLAGS: 00010202 (2.6.12-rc2-fs3)
[4294688.666000] EIP is at dyn_tick_late_init+0x38/0x80
[4294688.667000] eax: 00000000 ebx: c079f0c0 ecx: 00000000 edx: f7c15d4c
[4294688.668000] esi: f7f02000 edi: 00000000 ebp: f7f03fb8 esp: f7f03fb4
[4294688.669000] ds: 007b es: 007b ss: 0068
[4294688.670000] Process swapper (pid: 1, threadinfo=f7f02000 task=f7f01830)
[4294688.670000] Stack: c077a0e2 f7f03fd8 c076a956 c019bde8 c01002a0 00000000 c01002a0 00000000
[4294688.671000] 00000000 f7f03fec c01002d5 0000007b 0000007b ffffffff 00000000 c0101365
[4294688.672000] 00000000 00000000 00000000
[4294688.673000] Call Trace:
[4294688.675000] [<c0104bfa>] show_stack+0x7a/0x90
[4294688.676000] [<c0104d79>] show_registers+0x149/0x1c0
[4294688.677000] [<c0104fea>] die+0x14a/0x2d0
[4294688.678000] [<c011e3ee>] do_page_fault+0x44e/0x633
[4294688.679000] [<c01046e3>] error_code+0x4f/0x54
[4294688.680000] [<c076a956>] do_initcalls+0x56/0xc0
[4294688.681000] [<c01002d5>] init+0x35/0x110
[4294688.682000] [<c0101365>] kernel_thread_helper+0x5/0x10
[4294688.683000] Code: 83 ec 04 e8 3b 6b c0 ff ba 14 b7<7>eth0: no IPv6 routers present
[4294703.238000] md: stopping all md devices.


Attachments:
kermit-log (6.38 kB)

2005-04-07 08:25:42

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] Dynamic Tick version 050406-1

--- a/kernel/dyn-tick-timer.c 2005-03-01 16:41:05 -08:00
+++ b/kernel/dyn-tick-timer.c 2005-04-07 00:57:30 -07:00
@@ -232,10 +232,6 @@
{
int ret = 0;

- ret = sysdev_create_file(&device_timer, &attr_dyn_tick_state);
- ret = sysdev_create_file(&device_timer, &attr_dyn_tick_int);
- ret = sysdev_create_file(&device_timer, &attr_dyn_tick_dbg);
-
if (dyn_tick_cfg->arch_init == NULL ||
!(dyn_tick->state & DYN_TICK_SUITABLE))
return -ENODEV;
@@ -245,6 +241,10 @@
printk(KERN_WARNING "dyn-tick: Init failed\n");
return -ENODEV;
}
+
+ ret = sysdev_create_file(&device_timer, &attr_dyn_tick_state);
+ ret = sysdev_create_file(&device_timer, &attr_dyn_tick_int);
+ ret = sysdev_create_file(&device_timer, &attr_dyn_tick_dbg);

printk(KERN_INFO "dyn-tick: Timer using dynamic tick\n");


Attachments:
(No filename) (580.00 B)
patch-dyntick-init-fix (806.00 B)
Download all attachments

2005-04-07 09:27:23

by Alexander Nyberg

[permalink] [raw]
Subject: Re: [PATCH] Dynamic Tick version 050406-1

> > > Here's an updated dyn-tick patch. Some minor fixes:
> >
> > Doesn't look so good here. I get this with 2.6.12-rc2 (plus a few other patches).
> > Disabling Dynamic Tick makes everything happy again (it boots).
> >
> > [4294688.655000] Unable to handle kernel NULL pointer dereference at virtual address 00000000
>
> Thanks for trying it out. What kind of hardware do you have? Does it
> have HPET? It looks like no suitable timer for dyn-tick is found...
> Maybe the following patch helps?


===== arch/i386/kernel/Makefile 1.67 vs edited =====
--- 1.67/arch/i386/kernel/Makefile 2005-01-26 06:21:13 +01:00
+++ edited/arch/i386/kernel/Makefile 2005-04-07 11:21:19 +02:00
@@ -32,6 +32,7 @@ obj-$(CONFIG_ACPI_SRAT) += srat.o
obj-$(CONFIG_HPET_TIMER) += time_hpet.o
obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+obj-$(CONFIG_NO_IDLE_HZ) += dyn-tick.o

EXTRA_AFLAGS := -traditional



2005-04-07 21:49:30

by Frank Sorenson

[permalink] [raw]
Subject: Re: [PATCH] Dynamic Tick version 050406-1

[4294667.296000] Linux version 2.6.12-rc2-dyntick ([email protected]) (gcc version 3.4.2 20041017 (Red Hat 3.4.2-6.fc3)) #2 Thu Apr 7 13:12:16 MDT 2005
[4294667.296000] BIOS-provided physical RAM map:
[4294667.296000] BIOS-e820: 0000000000000000 - 000000000009f000 (usable)
[4294667.296000] BIOS-e820: 000000000009f000 - 00000000000a0000 (reserved)
[4294667.296000] BIOS-e820: 0000000000100000 - 000000005ffae000 (usable)
[4294667.296000] BIOS-e820: 000000005ffae000 - 0000000060000000 (reserved)
[4294667.296000] BIOS-e820: 00000000feda0000 - 00000000fee00000 (reserved)
[4294667.296000] BIOS-e820: 00000000ffb00000 - 0000000100000000 (reserved)
[4294667.296000] 639MB HIGHMEM available.
[4294667.296000] 896MB LOWMEM available.
[4294667.296000] DMI 2.3 present.
[4294667.296000] ACPI: PM-Timer IO Port: 0x808
[4294667.296000] Allocating PCI resources starting at 60000000 (gap: 60000000:9eda0000)
[4294667.296000] Built 1 zonelists
[4294667.296000] Kernel command line: ro root=LABEL=/1 vga=794 lapic console=tty0 console=ttyUSB0,9600 psmouse.proto=exps i8k.ignore_dmi:bool=true [email protected]/eth0,[email protected]/ single
[4294667.296000] Unknown boot option `i8k.ignore_dmi:bool=true': ignoring
[4294667.296000] netconsole: local port 6665
[4294667.296000] netconsole: local IP 128.187.171.101
[4294667.296000] netconsole: interface eth0
[4294667.296000] netconsole: remote port 5515
[4294667.296000] netconsole: remote IP 128.187.171.102
[4294667.296000] netcon[4294667.297000] Dentry cache hash table entries: 131072 (order:[4294667.471000] Mount-cache hash table entries: 512
[4294667.47[4294667.482000] tbxface-0118 [02] acpi_load_tables : ACPI[4294667.684000] dyn-tick: Registering dynamic tick timer v05030[4294667.783000] Executing all Device _STA and_INI methods:.....[4294667.959000] ACPI: PCI Interrupt Link [LNKB] (IRQs 5 7) *11
[4294668.299000] options: [pci] [cardbus] [pm]
[4294668.30000[4294668.523000] pnp: 00:02: ioport range 0x806-0x807 has been r[4294668.597000] Total HugeTLB memory allocated, 0
[4294668.5970[4294668.616000] ACPI: PCI Interrupt 0000:01:00.0[A] -> Link [LN[4294668.938000] Starting monitor auto detection...
[4294669.301[4294671.375000] radeonfb: Monitor 2 type no found
[4294671.3750[4294671.375000] 320 x 480
[4294671.375000] 400 x 600
[4294671[4294671.375000] 1280 x 800
[4294671.375000] 1440 x 900
[42946[4294671.375000] vStart = 1201, vEnd = 1204, vTotal = 1250
[4294[4294672.578000] ACPI: Lid Switch [LID]
[4294672.578000] ACPI: P[4294673.083000] ipmi device interface version v33
[4294673.0830[4294673.129000] ACPI: PCI Interrupt 0000:00:1f.6[B] -> Link [LN[4294673.177000] PPP Deflate Compression module registered
[4294[4294677.438000] Uniform Multi-Platform E-IDE driver Revision: 7[4294677.714000] hda: HTS726060M9AT00, ATA DISK drive
[4294678.3[4294682.145000] Uniform CD-ROM driver Revision: 3.20
[4294682.3[4294682.399000] Yenta: CardBus bridge found at 0000:02:01.0 [10[4294682.783000] [<c02a9cbc>] kref_put+0x3c/0x80
[4294682.78400[4294682.805000] ACPI: PCI Interrupt 0000:00:1d.7[D] -> Link [LN[4294682.842000] uhci_hcd 0000:00:1d.0: Intel Corporation 82801D[4294682.979000] uhci_hcd 0000:00:1d.1: irq 11, io base 0x0000bf[4294683.060000] drivers/usb/class/bluetty.c: USB Bluetooth supp[4294683.264000] usbcore: registered new driver usbserial_generi[4294683.278000] drivers/usb/serial/cp2101.c: Silicon Labs CP210[4294683.292000] drivers/usb/serial/usb-serial.c: USB Serial sup[4294683.306000] drivers/usb/serial/io_ti.c: Edgeport USB Serial[4294683.319000] drivers/usb/serial/ftdi_sio.c: v1.4.1:USB FTDI [4294683.334000] usbcore: registered new driver keyspan_pda
[429[4294685.914000] usbcore: registered new driver pl2303
[4294685.[4294685.928000] I2O subsystem v$Rev$
[4294685.930000] i2o: max [4294685.951000] Advanced Linux Sound Architecture Driver Versio[4294686.081000] input: PS/2 Generic Mouse on isa0060/serio1
[4294686.769000] intel8x0_measure_ac97_clock: measured 49355 usecs
[4294686.770000] intel8x0: clocking to 48000
[4294686.781000] ACPI: PCI Interrupt 0000:00:1f.6[B] -> Link [LNKB] -> GSI 7 (level, low) -> IRQ 7
[4294686.884000] MC'97 1 converters and GPIO not ready (0xff00)
[4294686.890000] ALSA device list:
[4294686.891000] #0: Intel 82801DB-ICH4 with STAC9750,51 at 0xf8fff800, irq 7
[4294686.892000] #1: Intel 82801DB-ICH4 Modem at 0xd400, irq 7
[4294686.893000] NET: Registered protocol family 2
[4294686.904000] IP: routing cache hash table of 4096 buckets, 128Kbytes
[4294686.905000] TCP established hash table entries: 262144 (order: 9, 2097152 bytes)
[4294686.909000] TCP bind hash table entries: 65536 (order: 8, 1835008 bytes)
[4294686.914000] TCP: Hash tables configured (established 262144 bind 65536)
[4294686.915000] IPv4 over IPv4 tunneling driver
[4294686.917000] GRE over IPv4[4294686.919000] Initializing IPsec netlink socket
[4294686.9200[4294688.767000] dyn-tick: Maximum ticks to skip limited to 2693


Attachments:
config-2.6.12-rc2 (43.96 kB)
dyntick.log (4.84 kB)
Download all attachments

2005-04-07 22:22:16

by Frank Sorenson

[permalink] [raw]
Subject: Re: [PATCH] Dynamic Tick version 050406-1

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Frank Sorenson wrote:
> Tony Lindgren wrote:
>
>>Thanks for trying it out. What kind of hardware do you have? Does it
>>have HPET? It looks like no suitable timer for dyn-tick is found...
>>Maybe the following patch helps?
>>
>>Tony
>
>
> Does 'different crash' qualify as "helping"? :)

Update:
The patch does seem to fix the crash. This "different crash" I
mentioned appears to be related to the netconsole I was using (serial
console produces stairstepping text, netconsole seems to duplicate
lines--go figure). Without netconsole, dynamic tick appears to be
working, so I'm not sure whether this is a netconsole bug or a dynamic
tick bug.

While dynamic tick no longer panics, with dynamic tick, my system slows
to whatever is slower than a crawl. It now takes 6 minutes 50 seconds
to boot all the way up, compared to 1 minute 35 seconds with my 2.6.12
kernel without the dynamic tick patch. I'm not sure where this slowdown
is occurring yet.

Frank
- --
Frank Sorenson - KD7TZK
Systems Manager, Computer Science Department
Brigham Young University
[email protected]
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.6 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org

iD8DBQFCVbJHaI0dwg4A47wRAmijAKCRgg9MTxrrNWKanMmmSS010BTWdgCeNMnJ
4YJWhHAcizMgZNH/+643Hvk=
=w9Ii
-----END PGP SIGNATURE-----

2005-04-08 06:22:43

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] Dynamic Tick version 050406-1

* Alexander Nyberg <[email protected]> [050407 02:31]:
> > > > Here's an updated dyn-tick patch. Some minor fixes:
> > >
> > > Doesn't look so good here. I get this with 2.6.12-rc2 (plus a few other patches).
> > > Disabling Dynamic Tick makes everything happy again (it boots).
> > >
> > > [4294688.655000] Unable to handle kernel NULL pointer dereference at virtual address 00000000
> >
> > Thanks for trying it out. What kind of hardware do you have? Does it
> > have HPET? It looks like no suitable timer for dyn-tick is found...
> > Maybe the following patch helps?
>
>
> ===== arch/i386/kernel/Makefile 1.67 vs edited =====
> --- 1.67/arch/i386/kernel/Makefile 2005-01-26 06:21:13 +01:00
> +++ edited/arch/i386/kernel/Makefile 2005-04-07 11:21:19 +02:00
> @@ -32,6 +32,7 @@ obj-$(CONFIG_ACPI_SRAT) += srat.o
> obj-$(CONFIG_HPET_TIMER) += time_hpet.o
> obj-$(CONFIG_EFI) += efi.o efi_stub.o
> obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
> +obj-$(CONFIG_NO_IDLE_HZ) += dyn-tick.o
>
> EXTRA_AFLAGS := -traditional

Ah, that explains :) Thanks!

Tony

2005-04-08 06:26:55

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] Dynamic Tick version 050406-1

* Frank Sorenson <[email protected]> [050407 15:21]:
> Frank Sorenson wrote:
> > Tony Lindgren wrote:
> >
> >>Thanks for trying it out. What kind of hardware do you have? Does it
> >>have HPET? It looks like no suitable timer for dyn-tick is found...
> >>Maybe the following patch helps?
> >>
> >>Tony
> >
> >
> > Does 'different crash' qualify as "helping"? :)
>
> Update:
> The patch does seem to fix the crash. This "different crash" I
> mentioned appears to be related to the netconsole I was using (serial
> console produces stairstepping text, netconsole seems to duplicate
> lines--go figure). Without netconsole, dynamic tick appears to be
> working, so I'm not sure whether this is a netconsole bug or a dynamic
> tick bug.

This might be because time does not run correctly, see below.

> While dynamic tick no longer panics, with dynamic tick, my system slows
> to whatever is slower than a crawl. It now takes 6 minutes 50 seconds
> to boot all the way up, compared to 1 minute 35 seconds with my 2.6.12
> kernel without the dynamic tick patch. I'm not sure where this slowdown
> is occurring yet.

I think I have an idea on what's going on; Your system does not wake to
APIC interrupt, and the system timer updates time only on other interrupts.
I'm experiencing the same on a loaner ThinkPad T30.

I'll try to do another patch today. Meanwhile it now should work
without lapic in cmdline.

Tony

2005-04-08 07:57:44

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1

diff -Nru a/arch/i386/Kconfig b/arch/i386/Kconfig
--- a/arch/i386/Kconfig 2005-04-08 00:43:41 -07:00
+++ b/arch/i386/Kconfig 2005-04-08 00:43:41 -07:00
@@ -460,6 +460,26 @@
bool "Provide RTC interrupt"
depends on HPET_TIMER && RTC=y

+config NO_IDLE_HZ
+ bool "Dynamic Tick Timer - Skip timer ticks during idle"
+ help
+ This option enables support for skipping timer ticks when the
+ processor is idle. During system load, timer is continuous.
+ This option saves power, as it allows the system to stay in
+ idle mode longer. Currently supported timers are ACPI PM
+ timer, local APIC timer, and TSC timer. HPET timer is currently
+ not supported.
+
+config DYN_TICK_USE_APIC
+ bool "Use APIC timer instead of PIT timer"
+ help
+ This option enables using APIC timer interrupt if your hardware
+ supports it. APIC timer allows longer sleep periods compared
+ to PIT timer. Note that on some hardware disabling PIT timer
+ also disables APIC timer interrupts, and system won't run
+ properly. Symptoms include slow system boot, and time running
+ slow. If unsure, don't enable this option.
+
config SMP
bool "Symmetric multi-processing support"
---help---
diff -Nru a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
--- a/arch/i386/kernel/Makefile 2005-04-08 00:43:41 -07:00
+++ b/arch/i386/kernel/Makefile 2005-04-08 00:43:41 -07:00
@@ -30,6 +30,7 @@
obj-y += sysenter.o vsyscall.o
obj-$(CONFIG_ACPI_SRAT) += srat.o
obj-$(CONFIG_HPET_TIMER) += time_hpet.o
+obj-$(CONFIG_NO_IDLE_HZ) += dyn-tick.o
obj-$(CONFIG_EFI) += efi.o efi_stub.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o

diff -Nru a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
--- a/arch/i386/kernel/apic.c 2005-04-08 00:43:41 -07:00
+++ b/arch/i386/kernel/apic.c 2005-04-08 00:43:41 -07:00
@@ -26,6 +26,7 @@
#include <linux/mc146818rtc.h>
#include <linux/kernel_stat.h>
#include <linux/sysdev.h>
+#include <linux/dyn-tick-timer.h>

#include <asm/atomic.h>
#include <asm/smp.h>
@@ -909,6 +910,8 @@

#define APIC_DIVISOR 16

+static u32 apic_timer_val;
+
static void __setup_APIC_LVTT(unsigned int clocks)
{
unsigned int lvtt_value, tmp_value, ver;
@@ -927,7 +930,15 @@
& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
| APIC_TDR_DIV_16);

- apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+ apic_timer_val = clocks/APIC_DIVISOR;
+
+#ifdef CONFIG_NO_IDLE_HZ
+ /* Local APIC timer is 24-bit */
+ if (apic_timer_val)
+ dyn_tick->max_skip = 0xffffff / apic_timer_val;
+#endif
+
+ apic_write_around(APIC_TMICT, apic_timer_val);
}

static void __init setup_APIC_timer(unsigned int clocks)
@@ -1040,6 +1051,13 @@
*/
setup_APIC_timer(calibration_result);

+#ifdef CONFIG_NO_IDLE_HZ
+ if (calibration_result)
+ dyn_tick->state |= DYN_TICK_USE_APIC;
+ else
+ printk(KERN_INFO "dyn-tick: Cannot use local APIC\n");
+#endif
+
local_irq_enable();
}

@@ -1068,6 +1086,18 @@
}
}

+#if defined(CONFIG_NO_IDLE_HZ)
+void reprogram_apic_timer(unsigned int count)
+{
+ unsigned long flags;
+
+ count *= apic_timer_val;
+ local_irq_save(flags);
+ apic_write_around(APIC_TMICT, count);
+ local_irq_restore(flags);
+}
+#endif
+
/*
* the frequency of the profiling timer can be changed
* by writing a multiplier value into /proc/profile.
@@ -1160,6 +1190,7 @@

fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
{
+ unsigned long seq;
int cpu = smp_processor_id();

/*
@@ -1178,6 +1209,23 @@
* interrupt lock, which is the WrongThing (tm) to do.
*/
irq_enter();
+
+#ifdef CONFIG_NO_IDLE_HZ
+ /*
+ * Check if we need to wake up PIT interrupt handler.
+ * Otherwise just wake up local APIC timer.
+ */
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ if (dyn_tick->state & (DYN_TICK_ENABLED | DYN_TICK_SKIPPING)) {
+ if (dyn_tick->skip_cpu == cpu && dyn_tick->skip > DYN_TICK_MIN_SKIP)
+ dyn_tick->interrupt(99, NULL, regs);
+ else
+ reprogram_apic_timer(1);
+ }
+ } while (read_seqretry(&xtime_lock, seq));
+#endif
+
smp_local_timer_interrupt(regs);
irq_exit();
}
diff -Nru a/arch/i386/kernel/dyn-tick.c b/arch/i386/kernel/dyn-tick.c
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/arch/i386/kernel/dyn-tick.c 2005-04-08 00:43:41 -07:00
@@ -0,0 +1,64 @@
+/*
+ * linux/arch/i386/kernel/dyn-tick.c
+ *
+ * Copyright (C) 2004 Nokia Corporation
+ * Written by Tony Lindgen <[email protected]> and
+ * Tuukka Tikkanen <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/version.h>
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/dyn-tick-timer.h>
+#include "dyn-tick.h"
+
+extern int dyn_tick_arch_init(void);
+
+void arch_reprogram_timer(void)
+{
+ if (cpu_has_local_apic()) {
+ disable_pit_tick();
+ if (dyn_tick->state & DYN_TICK_TIMER_INT)
+ reprogram_apic_timer(dyn_tick->skip);
+ } else {
+ if (dyn_tick->state & DYN_TICK_TIMER_INT)
+ reprogram_pit_tick(dyn_tick->skip);
+ else
+ disable_pit_tick();
+ }
+}
+
+static struct dyn_tick_timer arch_dyn_tick_timer = {
+ .arch_reprogram_timer = &arch_reprogram_timer,
+};
+
+int __init dyn_tick_init(void)
+{
+ arch_dyn_tick_timer.arch_init = dyn_tick_arch_init;
+ dyn_tick_register(&arch_dyn_tick_timer);
+
+ return 0;
+}
+arch_initcall(dyn_tick_init);
diff -Nru a/arch/i386/kernel/dyn-tick.h b/arch/i386/kernel/dyn-tick.h
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/arch/i386/kernel/dyn-tick.h 2005-04-08 00:43:41 -07:00
@@ -0,0 +1,33 @@
+/*
+ * linux/arch/i386/kernel/dyn-tick.h
+ *
+ * Copyright (C) 2004 Nokia Corporation
+ * Written by Tony Lindgen <[email protected]> and
+ * Tuukka Tikkanen <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#if defined(CONFIG_DYN_TICK_USE_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_X86_UP_APIC))
+#define cpu_has_local_apic() (dyn_tick->state & DYN_TICK_USE_APIC)
+#else
+#define cpu_has_local_apic() 0
+#endif
diff -Nru a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
--- a/arch/i386/kernel/irq.c 2005-04-08 00:43:41 -07:00
+++ b/arch/i386/kernel/irq.c 2005-04-08 00:43:41 -07:00
@@ -15,6 +15,7 @@
#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <linux/dyn-tick-timer.h>

DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
EXPORT_PER_CPU_SYMBOL(irq_stat);
@@ -102,6 +103,12 @@
);
} else
#endif
+
+#ifdef CONFIG_NO_IDLE_HZ
+ if (dyn_tick->state & (DYN_TICK_ENABLED | DYN_TICK_SKIPPING) && irq != 0)
+ dyn_tick->interrupt(irq, NULL, regs);
+#endif
+
__do_IRQ(irq, regs);

irq_exit();
diff -Nru a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
--- a/arch/i386/kernel/process.c 2005-04-08 00:43:41 -07:00
+++ b/arch/i386/kernel/process.c 2005-04-08 00:43:41 -07:00
@@ -160,6 +160,10 @@
if (!idle)
idle = default_idle;

+#ifdef CONFIG_NO_IDLE_HZ
+ dyn_tick_reprogram_timer();
+#endif
+
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
idle();
}
diff -Nru a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
--- a/arch/i386/kernel/time.c 2005-04-08 00:43:41 -07:00
+++ b/arch/i386/kernel/time.c 2005-04-08 00:43:41 -07:00
@@ -46,6 +46,7 @@
#include <linux/bcd.h>
#include <linux/efi.h>
#include <linux/mca.h>
+#include <linux/dyn-tick-timer.h>

#include <asm/io.h>
#include <asm/smp.h>
@@ -67,6 +68,7 @@
#include <asm/arch_hooks.h>

#include "io_ports.h"
+#include "dyn-tick.h"

extern spinlock_t i8259A_lock;
int pit_latch_buggy; /* extern */
@@ -308,6 +310,66 @@
return IRQ_HANDLED;
}

+#ifdef CONFIG_NO_IDLE_HZ
+static unsigned long long last_tick;
+void reprogram_pit_tick(int jiffies_to_skip);
+extern void replace_timer_interrupt(void * new_handler);
+
+#if defined(CONFIG_NO_IDLE_HZ) && defined(CONFIG_X86_LOCAL_APIC)
+extern void reprogram_apic_timer(unsigned int count);
+#else
+void reprogram_apic_timer(unsigned int count) {}
+#endif
+
+#ifdef DEBUG
+#define dbg_dyn_tick_irq() {if (skipped && skipped < dyn_tick->skip) \
+ printk("%u/%li ", skipped, dyn_tick->skip);}
+#else
+#define dbg_dyn_tick_irq() {}
+#endif
+
+/*
+ * This interrupt handler updates the time based on number of jiffies skipped
+ * It would be somewhat more optimized to have a customa handler in each timer
+ * using hardware ticks instead of nanoseconds. Note that CONFIG_NO_IDLE_HZ
+ * currently disables timer fallback on skipped jiffies.
+ */
+irqreturn_t dyn_tick_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+ unsigned long flags;
+ volatile unsigned long long now;
+ unsigned int skipped = 0;
+
+ if (dyn_tick->state & DYN_TICK_DEBUG) {
+ if (irq == 0)
+ printk(".");
+ else
+ printk("%i ", irq);
+ }
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ now = cur_timer->get_hw_time();
+ while (now - last_tick >= NS_TICK_LEN) {
+ last_tick += NS_TICK_LEN;
+ cur_timer->mark_offset();
+ do_timer_interrupt(irq, NULL, regs);
+ skipped++;
+ }
+ if (dyn_tick->state & (DYN_TICK_ENABLED | DYN_TICK_SKIPPING)) {
+ dbg_dyn_tick_irq();
+ dyn_tick->skip = 1;
+ if (cpu_has_local_apic())
+ reprogram_apic_timer(dyn_tick->skip);
+ reprogram_pit_tick(dyn_tick->skip);
+ dyn_tick->state |= DYN_TICK_ENABLED;
+ dyn_tick->state &= ~DYN_TICK_SKIPPING;
+ }
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ return IRQ_HANDLED;
+}
+#endif /* CONFIG_NO_IDLE_HZ */
+
/* not static: needed by APM */
unsigned long get_cmos_time(void)
{
@@ -416,7 +478,7 @@


/* XXX this driverfs stuff should probably go elsewhere later -john */
-static struct sys_device device_timer = {
+struct sys_device device_timer = {
.id = 0,
.cls = &timer_sysclass,
};
@@ -452,6 +514,82 @@
}
#endif

+#ifdef CONFIG_NO_IDLE_HZ
+
+/*
+ * REVISIT: Looks like on p3 APIC timer keeps running if PIT mode
+ * is changed. On p4, changing PIT mode seems to kill
+ * APIC timer interrupts. Same thing with disabling PIT
+ * interrupt.
+ */
+void disable_pit_tick(void)
+{
+ extern spinlock_t i8253_lock;
+ unsigned long flags;
+ spin_lock_irqsave(&i8253_lock, flags);
+ //irq_desc[0].handler->disable(0);
+ outb_p(0x32, PIT_MODE); /* binary, mode 1, LSB/MSB, ch 0 */
+#if 0
+ outb_p(0xff, PIT_CH0); /* LSB */
+ outb(0xff, PIT_CH0); /* MSB */
+#endif
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+/*
+ * Reprograms the next timer interrupt
+ * PIT timer reprogramming code taken from APM code.
+ * Note that PIT timer is a 16-bit timer, which allows max
+ * skip of only few seconds.
+ */
+void reprogram_pit_tick(int jiffies_to_skip)
+{
+ int skip;
+ extern spinlock_t i8253_lock;
+ unsigned long flags;
+
+ skip = jiffies_to_skip * LATCH;
+ if (skip > 0xffff)
+ skip = 0xffff;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
+ outb_p(skip & 0xff, PIT_CH0); /* LSB */
+ outb(skip >> 8, PIT_CH0); /* MSB */
+ //irq_desc[0].handler->enable(0);
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+int __init dyn_tick_arch_init(void)
+{
+ unsigned long flags;
+
+ if (!cur_timer->get_hw_time) {
+ printk(KERN_ERR "dyn-tick: Timer does not have get_hw_time!\n");
+ return -ENODEV;
+ }
+ write_seqlock_irqsave(&xtime_lock, flags);
+ last_tick = cur_timer->get_hw_time();
+ dyn_tick->skip = 1;
+ if (!cpu_has_local_apic())
+ dyn_tick->max_skip = 0xffff/LATCH; /* PIT timer length */
+ printk(KERN_INFO "dyn-tick: Maximum ticks to skip limited to %i\n",
+ dyn_tick->max_skip);
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ if (cur_timer->late_init)
+ cur_timer->late_init();
+ dyn_tick->interrupt = dyn_tick_timer_interrupt;
+ replace_timer_interrupt(dyn_tick->interrupt);
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ dyn_tick->state |= DYN_TICK_ENABLED;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ return 0;
+}
+#endif /* CONFIG_NO_IDLE_HZ */
+
void __init time_init(void)
{
#ifdef CONFIG_HPET_TIMER
@@ -471,6 +609,17 @@

cur_timer = select_timer();
printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
+
+#ifdef CONFIG_NO_IDLE_HZ
+ if (strncmp(cur_timer->name, "tsc", 3) == 0 ||
+ strncmp(cur_timer->name, "pmtmr", 3) == 0) {
+ dyn_tick->state |= DYN_TICK_SUITABLE;
+ printk(KERN_INFO "dyn-tick: Found suitable timer: %s\n",
+ cur_timer->name);
+ } else
+ printk(KERN_ERR "dyn-tick: Cannot use timer %s\n",
+ cur_timer->name);
+#endif

time_init_hook();
}
diff -Nru a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
--- a/arch/i386/kernel/timers/timer_pm.c 2005-04-08 00:43:41 -07:00
+++ b/arch/i386/kernel/timers/timer_pm.c 2005-04-08 00:43:41 -07:00
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/init.h>
+#include <linux/dyn-tick-timer.h>
#include <asm/types.h>
#include <asm/timer.h>
#include <asm/smp.h>
@@ -168,6 +169,7 @@
monotonic_base += delta * NSEC_PER_USEC;
write_sequnlock(&monotonic_lock);

+#ifndef CONFIG_NO_IDLE_HZ
/* convert to ticks */
delta += offset_delay;
lost = delta / (USEC_PER_SEC / HZ);
@@ -184,6 +186,7 @@
first_run = 0;
offset_delay = 0;
}
+#endif
}


@@ -238,6 +241,25 @@
return (unsigned long) offset_delay + cyc2us(delta);
}

+static unsigned long long ns_time;
+
+static unsigned long long get_hw_time_pmtmr(void)
+{
+ u32 now, delta;
+ static unsigned int last_cycles;
+ now = read_pmtmr();
+ delta = (now - last_cycles) & ACPI_PM_MASK;
+ last_cycles = now;
+ ns_time += cyc2us(delta) * NSEC_PER_USEC;
+ return ns_time;
+}
+
+static void late_init_pmtmr(void)
+{
+ ns_time = monotonic_clock_pmtmr();
+}
+
+extern irqreturn_t pmtmr_interrupt(int irq, void *dev_id, struct pt_regs *regs);

/* acpi timer_opts struct */
static struct timer_opts timer_pmtmr = {
@@ -245,7 +267,9 @@
.mark_offset = mark_offset_pmtmr,
.get_offset = get_offset_pmtmr,
.monotonic_clock = monotonic_clock_pmtmr,
+ .get_hw_time = get_hw_time_pmtmr,
.delay = delay_pmtmr,
+ .late_init = late_init_pmtmr,
};

struct init_timer_opts __initdata timer_pmtmr_init = {
diff -Nru a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
--- a/arch/i386/kernel/timers/timer_tsc.c 2005-04-08 00:43:41 -07:00
+++ b/arch/i386/kernel/timers/timer_tsc.c 2005-04-08 00:43:41 -07:00
@@ -112,6 +112,15 @@
return delay_at_last_interrupt + edx;
}

+static unsigned long get_hw_time_tsc(void)
+{
+ register unsigned long eax, edx;
+
+ unsigned long long hw_time;
+ rdtscll(hw_time);
+ return cycles_2_ns(hw_time);
+}
+
static unsigned long long monotonic_clock_tsc(void)
{
unsigned long long last_offset, this_offset, base;
@@ -348,6 +357,7 @@

rdtsc(last_tsc_low, last_tsc_high);

+#ifndef CONFIG_NO_IDLE_HZ
spin_lock(&i8253_lock);
outb_p(0x00, PIT_MODE); /* latch the count ASAP */

@@ -415,11 +425,14 @@
cpufreq_delayed_get();
} else
lost_count = 0;
+#endif
+
/* update the monotonic base value */
this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
monotonic_base += cycles_2_ns(this_offset - last_offset);
write_sequnlock(&monotonic_lock);

+#ifndef CONFIG_NO_IDLE_HZ
/* calculate delay_at_last_interrupt */
count = ((LATCH-1) - count) * TICK_SIZE;
delay_at_last_interrupt = (count + LATCH/2) / LATCH;
@@ -430,6 +443,7 @@
*/
if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
jiffies_64++;
+#endif
}

static int __init init_tsc(char* override)
@@ -551,6 +565,7 @@
.mark_offset = mark_offset_tsc,
.get_offset = get_offset_tsc,
.monotonic_clock = monotonic_clock_tsc,
+ .get_hw_time = get_hw_time_tsc,
.delay = delay_tsc,
};

diff -Nru a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c
--- a/arch/i386/mach-default/setup.c 2005-04-08 00:43:41 -07:00
+++ b/arch/i386/mach-default/setup.c 2005-04-08 00:43:41 -07:00
@@ -85,6 +85,22 @@
setup_irq(0, &irq0);
}

+/**
+ * replace_timer_interrupt - allow replacing timer interrupt handler
+ *
+ * Description:
+ * Can be used to replace timer interrupt handler with a more optimized
+ * handler. Used for enabling and disabling of CONFIG_NO_IDLE_HZ.
+ */
+void replace_timer_interrupt(void * new_handler)
+{
+ unsigned long flags;
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ irq0.handler = new_handler;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+}
+
#ifdef CONFIG_MCA
/**
* mca_nmi_hook - hook into MCA specific NMI chain
diff -Nru a/include/asm-i386/timer.h b/include/asm-i386/timer.h
--- a/include/asm-i386/timer.h 2005-04-08 00:43:41 -07:00
+++ b/include/asm-i386/timer.h 2005-04-08 00:43:41 -07:00
@@ -1,6 +1,7 @@
#ifndef _ASMi386_TIMER_H
#define _ASMi386_TIMER_H
#include <linux/init.h>
+#include <linux/interrupt.h>

/**
* struct timer_ops - used to define a timer source
@@ -21,7 +22,9 @@
void (*mark_offset)(void);
unsigned long (*get_offset)(void);
unsigned long long (*monotonic_clock)(void);
+ unsigned long long (*get_hw_time)(void);
void (*delay)(unsigned long);
+ void (*late_init)(void);
};

struct init_timer_opts {
diff -Nru a/include/linux/dyn-tick-timer.h b/include/linux/dyn-tick-timer.h
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/include/linux/dyn-tick-timer.h 2005-04-08 00:43:41 -07:00
@@ -0,0 +1,74 @@
+/*
+ * linux/include/linux/dyn-tick-timer.h
+ *
+ * Copyright (C) 2004 Nokia Corporation
+ * Written by Tony Lindgen <[email protected]> and
+ * Tuukka Tikkanen <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _DYN_TICK_TIMER_H
+#define _DYN_TICK_TIMER_H
+
+#include <linux/interrupt.h>
+
+#define DYN_TICK_DEBUG (1 << 31)
+#define DYN_TICK_TIMER_INT (1 << 4)
+#define DYN_TICK_USE_APIC (1 << 3)
+#define DYN_TICK_SKIPPING (1 << 2)
+#define DYN_TICK_ENABLED (1 << 1)
+#define DYN_TICK_SUITABLE (1 << 0)
+
+struct dyn_tick_state {
+ unsigned int state; /* Current state */
+ int skip_cpu; /* Skip handling processor */
+ unsigned long skip; /* Ticks to skip */
+ unsigned int max_skip; /* Max number of ticks to skip */
+ unsigned long irq_skip_mask; /* Do not update time from these irqs */
+ irqreturn_t (*interrupt)(int, void *, struct pt_regs *);
+};
+
+struct dyn_tick_timer {
+ int (*arch_init) (void);
+ void (*arch_enable) (void);
+ void (*arch_disable) (void);
+ void (*arch_reprogram_timer) (void);
+};
+
+extern struct dyn_tick_state * dyn_tick;
+extern void dyn_tick_register(struct dyn_tick_timer * new_timer);
+
+#define NS_TICK_LEN ((1 * 1000000000)/HZ)
+#define DYN_TICK_MIN_SKIP 2
+
+#ifdef CONFIG_NO_IDLE_HZ
+
+extern unsigned long dyn_tick_reprogram_timer(void);
+
+#else
+
+#define arch_has_safe_halt() 0
+#define dyn_tick_reprogram_timer() {}
+
+
+#endif /* CONFIG_NO_IDLE_HZ */
+#endif /* _DYN_TICK_TIMER_H */
diff -Nru a/kernel/Makefile b/kernel/Makefile
--- a/kernel/Makefile 2005-04-08 00:43:41 -07:00
+++ b/kernel/Makefile 2005-04-08 00:43:41 -07:00
@@ -28,6 +28,7 @@
obj-$(CONFIG_SYSFS) += ksysfs.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
+obj-$(CONFIG_NO_IDLE_HZ) += dyn-tick-timer.o

ifneq ($(CONFIG_IA64),y)
# According to Alan Modra <[email protected]>, the -fno-omit-frame-pointer is
diff -Nru a/kernel/dyn-tick-timer.c b/kernel/dyn-tick-timer.c
--- /dev/null Wed Dec 31 16:00:00 196900
+++ b/kernel/dyn-tick-timer.c 2005-04-08 00:43:41 -07:00
@@ -0,0 +1,256 @@
+/*
+ * linux/arch/i386/kernel/dyn-tick.c
+ *
+ * Beginnings of generic dynamic tick timer support
+ *
+ * Copyright (C) 2004 Nokia Corporation
+ * Written by Tony Lindgen <[email protected]> and
+ * Tuukka Tikkanen <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/version.h>
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sysdev.h>
+#include <linux/interrupt.h>
+#include <linux/cpumask.h>
+#include <linux/pm.h>
+#include <linux/dyn-tick-timer.h>
+#include <asm/io.h>
+
+#include "io_ports.h"
+
+#define DYN_TICK_VERSION "050301-1"
+
+struct dyn_tick_state dyn_tick_state;
+struct dyn_tick_state * dyn_tick = &dyn_tick_state;
+struct dyn_tick_timer * dyn_tick_cfg;
+
+static void (*orig_idle) (void) = 0;
+extern void disable_pit_tick(void);
+extern void reprogram_pit_tick(int jiffies_to_skip);
+extern void reprogram_apic_timer(unsigned int count);
+extern void reprogram_pit_tick(int jiffies_to_skip);
+static cpumask_t dyn_cpu_map;
+
+/*
+ * Arch independed code needed to reprogram next timer interrupt.
+ * Gets called from cpu_idle() before entering idle loop. Note that
+ * we want to have all processors idle before reprogramming the
+ * next timer interrupt.
+ */
+unsigned long dyn_tick_reprogram_timer(void)
+{
+ int cpu;
+ unsigned long flags;
+ cpumask_t idle_cpus;
+ unsigned long next;
+
+ if (dyn_tick->state & DYN_TICK_DEBUG)
+ printk("i");
+
+ if (!(dyn_tick->state & DYN_TICK_ENABLED))
+ return 0;
+
+ /* Check if we are already skipping ticks and can idle other cpus */
+ if (dyn_tick->state & DYN_TICK_SKIPPING) {
+ reprogram_apic_timer(dyn_tick->skip);
+ return 0;
+ }
+
+ /* Check if we can start skipping ticks */
+ write_seqlock_irqsave(&xtime_lock, flags);
+ cpu = smp_processor_id();
+ cpu_set(cpu, dyn_cpu_map);
+ cpus_and(idle_cpus, dyn_cpu_map, cpu_online_map);
+ if (cpus_equal(idle_cpus, cpu_online_map)) {
+ next = next_timer_interrupt();
+ if (jiffies > next) {
+ //printk("Too late? next: %lu jiffies: %lu\n",
+ // next, jjiffies);
+ dyn_tick->skip = 1;
+ } else
+ dyn_tick->skip = next_timer_interrupt() - jiffies;
+ if (dyn_tick->skip > DYN_TICK_MIN_SKIP) {
+ if (dyn_tick->skip > dyn_tick->max_skip)
+ dyn_tick->skip = dyn_tick->max_skip;
+
+ dyn_tick_cfg->arch_reprogram_timer();
+
+ dyn_tick->skip_cpu = cpu;
+ dyn_tick->state |= DYN_TICK_SKIPPING;
+ }
+ cpus_clear(dyn_cpu_map);
+ }
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ return dyn_tick->skip;
+}
+
+void __init dyn_tick_register(struct dyn_tick_timer * arch_timer)
+{
+ dyn_tick_cfg = arch_timer;
+ printk(KERN_INFO "dyn-tick: Registering dynamic tick timer v%s\n",
+ DYN_TICK_VERSION);
+}
+
+/*
+ * ---------------------------------------------------------------------------
+ * Sysfs interface
+ * ---------------------------------------------------------------------------
+ */
+
+extern struct sys_device device_timer;
+
+static ssize_t show_dyn_tick_state(struct sys_device *dev, char *buf)
+{
+ return sprintf(buf, "suitable:\t%i\n"
+ "enabled:\t%i\n"
+ "skipping:\t%i\n"
+ "using APIC:\t%i\n"
+ "int enabled:\t%i\n"
+ "debug:\t\t%i\n",
+ dyn_tick->state & DYN_TICK_SUITABLE,
+ (dyn_tick->state & DYN_TICK_ENABLED) >> 1,
+ (dyn_tick->state & DYN_TICK_SKIPPING) >> 2,
+ (dyn_tick->state & DYN_TICK_USE_APIC) >> 3,
+ (dyn_tick->state & DYN_TICK_TIMER_INT) >> 4,
+ (dyn_tick->state & DYN_TICK_DEBUG) >> 31);
+}
+
+static ssize_t set_dyn_tick_state(struct sys_device *dev, const char * buf,
+ ssize_t count)
+{
+ unsigned long flags;
+ unsigned int enable = simple_strtoul(buf, NULL, 2);
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ if (enable) {
+ if (dyn_tick_cfg->arch_enable)
+ dyn_tick_cfg->arch_enable();
+ dyn_tick->state |= DYN_TICK_ENABLED;
+ } else {
+ if (dyn_tick_cfg->arch_disable)
+ dyn_tick_cfg->arch_disable();
+ dyn_tick->state &= ~DYN_TICK_ENABLED;
+ }
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ return count;
+}
+
+static SYSDEV_ATTR(dyn_tick_state, 0644, show_dyn_tick_state,
+ set_dyn_tick_state);
+
+static ssize_t show_dyn_tick_int(struct sys_device *dev, char *buf)
+{
+ return sprintf(buf, "%i\n",
+ (dyn_tick->state & DYN_TICK_TIMER_INT) >> 4);
+}
+
+static ssize_t set_dyn_tick_int(struct sys_device *dev, const char * buf,
+ ssize_t count)
+{
+ unsigned long flags;
+ unsigned int enable = simple_strtoul(buf, NULL, 2);
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ if (enable)
+ dyn_tick->state |= DYN_TICK_TIMER_INT;
+ else
+ dyn_tick->state &= ~DYN_TICK_TIMER_INT;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ return count;
+}
+
+static SYSDEV_ATTR(dyn_tick_int, 0644, show_dyn_tick_int, set_dyn_tick_int);
+
+static ssize_t show_dyn_tick_dbg(struct sys_device *dev, char *buf)
+{
+ return sprintf(buf, "%i\n",
+ (dyn_tick->state & DYN_TICK_DEBUG) >> 31);
+}
+
+static ssize_t set_dyn_tick_dbg(struct sys_device *dev, const char * buf,
+ ssize_t count)
+{
+ unsigned long flags;
+ unsigned int enable = simple_strtoul(buf, NULL, 2);
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+ if (enable)
+ dyn_tick->state |= DYN_TICK_DEBUG;
+ else
+ dyn_tick->state &= ~DYN_TICK_DEBUG;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ return count;
+}
+
+static SYSDEV_ATTR(dyn_tick_dbg, 0644, show_dyn_tick_dbg, set_dyn_tick_dbg);
+
+/*
+ * ---------------------------------------------------------------------------
+ * Init functions
+ * ---------------------------------------------------------------------------
+ */
+
+static int __init dyn_tick_early_init(void)
+{
+ dyn_tick->state |= DYN_TICK_TIMER_INT;
+}
+
+subsys_initcall(dyn_tick_early_init);
+
+/*
+ * We need to initialize dynamic tick after calibrate delay
+ */
+static int __init dyn_tick_late_init(void)
+{
+ int ret = 0;
+
+ if (dyn_tick_cfg == NULL || dyn_tick_cfg->arch_init == NULL ||
+ !(dyn_tick->state & DYN_TICK_SUITABLE)) {
+ printk(KERN_ERR, "dyn-tick: No suitable timer found\n");
+ return -ENODEV;
+ }
+
+ ret = dyn_tick_cfg->arch_init();
+ if (ret != 0) {
+ printk(KERN_ERR "dyn-tick: Init failed\n");
+ return -ENODEV;
+ }
+
+ ret = sysdev_create_file(&device_timer, &attr_dyn_tick_state);
+ ret = sysdev_create_file(&device_timer, &attr_dyn_tick_int);
+ ret = sysdev_create_file(&device_timer, &attr_dyn_tick_dbg);
+
+ printk(KERN_INFO "dyn-tick: Timer using dynamic tick\n");
+
+ return ret;
+}
+
+late_initcall(dyn_tick_late_init);


Attachments:
(No filename) (534.00 B)
patch-dynamic-tick-2.6.12-rc2-050408-1 (29.76 kB)
Download all attachments

2005-04-08 08:50:20

by Frank Sorenson

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Tony Lindgren wrote:
| * Tony Lindgren <[email protected]> [050407 23:28]:
|
|>I think I have an idea on what's going on; Your system does not wake to
|>APIC interrupt, and the system timer updates time only on other
interrupts.
|>I'm experiencing the same on a loaner ThinkPad T30.
|>
|>I'll try to do another patch today. Meanwhile it now should work
|>without lapic in cmdline.
|
|
| Following is an updated patch. Anybody having trouble, please try
| disabling CONFIG_DYN_TICK_USE_APIC Kconfig option.
|
| I'm hoping this might work on Pavel's machine too?
|
| Tony

This updated patch seems to work just fine on my machine with lapic on
the cmdline and CONFIG_DYN_TICK_USE_APIC disabled.

Also, you were correct that removing lapic from the cmdline allowed the
previous version to run at full speed.

Now, how can I tell if the patch is doing its thing? What should I be
seeing? :)

Functionally, it looks like it's working. There were a number of
compiler warnings you might wish to fix before calling it good. Such as
"initialization from incompatible pointer type" several times in
dyn-tick-timer.c and a "too many arguments for format" in
dyn_tick_late_init.

Frank
- --
Frank Sorenson - KD7TZK
Systems Manager, Computer Science Department
Brigham Young University
[email protected]
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.6 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org

iD8DBQFCVkWDaI0dwg4A47wRAgzOAKCHcx8p59ZbihYtZJ84p62v2rMauQCfUuzz
D7O98hHvjtTa/CvFHHtJe4c=
=G2I/
-----END PGP SIGNATURE-----

2005-04-08 09:18:40

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1

* Frank Sorenson <[email protected]> [050408 01:49]:
> Tony Lindgren wrote:
> | * Tony Lindgren <[email protected]> [050407 23:28]:
> |
> |>I think I have an idea on what's going on; Your system does not wake to
> |>APIC interrupt, and the system timer updates time only on other
> interrupts.
> |>I'm experiencing the same on a loaner ThinkPad T30.
> |>
> |>I'll try to do another patch today. Meanwhile it now should work
> |>without lapic in cmdline.
> |
> |
> | Following is an updated patch. Anybody having trouble, please try
> | disabling CONFIG_DYN_TICK_USE_APIC Kconfig option.
> |
> | I'm hoping this might work on Pavel's machine too?
> |
> | Tony
>
> This updated patch seems to work just fine on my machine with lapic on
> the cmdline and CONFIG_DYN_TICK_USE_APIC disabled.
>
> Also, you were correct that removing lapic from the cmdline allowed the
> previous version to run at full speed.

Cool.

> Now, how can I tell if the patch is doing its thing? What should I be
> seeing? :)

Download pmstats from http://www.muru.com/linux/dyntick/, you may
need to edit it a bit for correct ACPI battery values. But it should
show you HZ during idle and load. I believe idle still does not go
to ACPI C3 with dyn-tick though...

Then you might as well run timetest from same location too to make
sure your clock keeps correct time.

> Functionally, it looks like it's working. There were a number of
> compiler warnings you might wish to fix before calling it good. Such as
> "initialization from incompatible pointer type" several times in
> dyn-tick-timer.c and a "too many arguments for format" in
> dyn_tick_late_init.

Yeah, I'll fix those...

Tony

2005-04-08 10:31:16

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1

Hi!

> > I think I have an idea on what's going on; Your system does not wake to
> > APIC interrupt, and the system timer updates time only on other interrupts.
> > I'm experiencing the same on a loaner ThinkPad T30.
> >
> > I'll try to do another patch today. Meanwhile it now should work
> > without lapic in cmdline.
>
> Following is an updated patch. Anybody having trouble, please try
> disabling CONFIG_DYN_TICK_USE_APIC Kconfig option.
>
> I'm hoping this might work on Pavel's machine too?

The "volume hang" was explained: I was using CPU frequency scaling, it
probably did not like that. After disabling CPU frequency scaling, it
seems to work ok:

Pavel

pavel@Elf:~$ cat /proc/interrupts ; sleep 1 ; cat /proc/interrupts
CPU0
0: 33288 XT-PIC timer
1: 1021 XT-PIC i8042
2: 0 XT-PIC cascade
9: 2 XT-PIC acpi
10: 94036 XT-PIC yenta, yenta, ehci_hcd:usb1,
uhci_hcd:usb2, uhci_hcd:usb3, uhci_hcd:usb4
11: 3941 XT-PIC Intel 82801DB-ICH4, eth0
12: 17 XT-PIC i8042
14: 5119 XT-PIC ide0
NMI: 0
LOC: 0
ERR: 0
MIS: 0
CPU0
0: 33568 XT-PIC timer
1: 1022 XT-PIC i8042
2: 0 XT-PIC cascade
9: 2 XT-PIC acpi
10: 94323 XT-PIC yenta, yenta, ehci_hcd:usb1,
uhci_hcd:usb2, uhci_hcd:usb3, uhci_hcd:usb4
11: 3951 XT-PIC Intel 82801DB-ICH4, eth0
12: 17 XT-PIC i8042
14: 5192 XT-PIC ide0
NMI: 0
LOC: 0
ERR: 0
MIS: 0
pavel@Elf:~$


--
Boycott Kodak -- for their patent abuse against Java.

2005-04-08 10:55:02

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1

* Pavel Machek <[email protected]> [050408 03:30]:
> Hi!
>
> > > I think I have an idea on what's going on; Your system does not wake to
> > > APIC interrupt, and the system timer updates time only on other interrupts.
> > > I'm experiencing the same on a loaner ThinkPad T30.
> > >
> > > I'll try to do another patch today. Meanwhile it now should work
> > > without lapic in cmdline.
> >
> > Following is an updated patch. Anybody having trouble, please try
> > disabling CONFIG_DYN_TICK_USE_APIC Kconfig option.
> >
> > I'm hoping this might work on Pavel's machine too?
>
> The "volume hang" was explained: I was using CPU frequency scaling, it
> probably did not like that. After disabling CPU frequency scaling, it
> seems to work ok:

OK, good. I assume this was the same machine that did not work with
any of the earlier patches?

Tony

2005-04-08 11:34:59

by Thomas Renninger

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1

Frank Sorenson wrote:
> Tony Lindgren wrote:
> | * Tony Lindgren <[email protected]> [050407 23:28]:
> |
> |>I think I have an idea on what's going on; Your system does not wake to
> |>APIC interrupt, and the system timer updates time only on other
> interrupts.
> |>I'm experiencing the same on a loaner ThinkPad T30.
> |>
> |>I'll try to do another patch today. Meanwhile it now should work
> |>without lapic in cmdline.
> |
> |
> | Following is an updated patch. Anybody having trouble, please try
> | disabling CONFIG_DYN_TICK_USE_APIC Kconfig option.
> |
> | I'm hoping this might work on Pavel's machine too?
> |
> | Tony
>
> This updated patch seems to work just fine on my machine with lapic on
> the cmdline and CONFIG_DYN_TICK_USE_APIC disabled.
>
> Also, you were correct that removing lapic from the cmdline allowed the
> previous version to run at full speed.
>
> Now, how can I tell if the patch is doing its thing? What should I be
> seeing? :)
>
> Functionally, it looks like it's working. There were a number of
> compiler warnings you might wish to fix before calling it good. Such as
> "initialization from incompatible pointer type" several times in
> dyn-tick-timer.c and a "too many arguments for format" in
> dyn_tick_late_init.
>

Here are some figures about idle/C-states:

Passing bm_history=0xF to processor module makes it going into C3 and deeper.
Passing lower values, deeper states are reached more often, but system could freeze:

bm_activity=0x4
bus master activity: fefffffd
states:
C1: type[C1] promotion[C2] demotion[--] latency[001] usage[00000010]
*C2: type[C2] promotion[C3] demotion[C1] latency[001] usage[00007183]
C3: type[C3] promotion[C4] demotion[C2] latency[085] usage[00000515]
C4: type[C3] promotion[--] demotion[C3] latency[185] usage[00000330]

bm_activity=0x1
bus master activity: ffff7ffd
states:
C1: type[C1] promotion[C2] demotion[--] latency[001] usage[00000010]
*C2: type[C2] promotion[C3] demotion[C1] latency[001] usage[00005495]
C3: type[C3] promotion[C4] demotion[C2] latency[085] usage[00000537]
C4: type[C3] promotion[--] demotion[C3] latency[185] usage[00000472]


Figures NO_IDLE_HZ disabled, HZ=1000 (max sleep 1ms)
(Don't trust the figures too much, there probably are little bugs...):

Active C0/C1 state:
Total(ms): 145
Usage: 20205
Failures: 0
Maximum(us): 1967
Average(us): 7
Sleep C2 state:
Total(ms): 19306
Usage: 20074
Failures: 0
Maximum(us): 1275 (-> strange max should be 1000us)
Average(us): 961
Sleep C3 state:
Total(ms): 34
Usage: 131
Failures: 0
Maximum(us): 984
Average(us): 259
Measures based on ACPI PM timer reads

Total switches between C-states: 20205
Switches between C-states per second: 1063 per second
Total measure time (s): 19
Total measure time (based on starting measures) (s): 20


Figures NO_IDLE_HZ enabled, processor.bm_history=0xF HZ=1000:
(Don't trust the figures too much, there probably are little bugs...):

Active C0/C1 state:
Total(ms): 81
Usage: 4659
Failures: 0
Maximum(us): 1608
Average(us): 17
Sleep C2 state:
Total(ms): 71108
Usage: 4241
Failures: 0
Maximum(us): 49921
Average(us): 16766
Sleep C3 state:
Total(ms): 219
Usage: 167
Failures: 0
Maximum(us): 28296
Average(us): 1311
Sleep C4 state:
Total(ms): 374
Usage: 251
Failures: 0
Maximum(us): 18870
Average(us): 1490
Measures based on ACPI PM timer reads

Total switches between C-states: 4659
Switches between C-states per second: 65 per second
Total measure time (s): 71
Total measure time (based on starting measures) (s): 75


I buffer C-state times in an array and write them to /dev/cstX.
>From there I calc the stats from userspace.

Tony: If you like I can send you the patch and dump prog for
http://www.muru.com/linux/dyntick/ ?

I try to find a better algorithm (directly adjust slept time to
C-state latency or something) for NO_IDLE_HZ (hints are very welcome)
and try to come up with new figures soon.


Thomas

2005-04-08 11:56:16

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1

* Thomas Renninger <[email protected]> [050408 04:34]:
>
> Here are some figures about idle/C-states:
>
> Passing bm_history=0xF to processor module makes it going into C3 and deeper.
> Passing lower values, deeper states are reached more often, but system could freeze:

Hmm, I wonder why it freezes? Is it ACPI issue or related to dyn-tick?

> Figures NO_IDLE_HZ disabled, HZ=1000 (max sleep 1ms)
...
> Total switches between C-states: 20205
> Switches between C-states per second: 1063 per second
>
> Figures NO_IDLE_HZ enabled, processor.bm_history=0xF HZ=1000:
...
> Total switches between C-states: 4659
> Switches between C-states per second: 65 per second

The reduction in C state changes should produce some power savings,
assuming the C states do something...

> I buffer C-state times in an array and write them to /dev/cstX.
> From there I calc the stats from userspace.
>
> Tony: If you like I can send you the patch and dump prog for
> http://www.muru.com/linux/dyntick/ ?

Yeah, that would nice to have!

> I try to find a better algorithm (directly adjust slept time to
> C-state latency or something) for NO_IDLE_HZ (hints are very welcome)
> and try to come up with new figures soon.

I suggest we modify idle so we can call it with the estimated sleep
length in usecs. Then the idle loop can directly decide when to go to
C2 or C3 depening on the estimated sleep length.

Tony

2005-04-08 12:25:03

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1

Hi!

> > > > I think I have an idea on what's going on; Your system does not wake to
> > > > APIC interrupt, and the system timer updates time only on other interrupts.
> > > > I'm experiencing the same on a loaner ThinkPad T30.
> > > >
> > > > I'll try to do another patch today. Meanwhile it now should work
> > > > without lapic in cmdline.
> > >
> > > Following is an updated patch. Anybody having trouble, please try
> > > disabling CONFIG_DYN_TICK_USE_APIC Kconfig option.
> > >
> > > I'm hoping this might work on Pavel's machine too?
> >
> > The "volume hang" was explained: I was using CPU frequency scaling, it
> > probably did not like that. After disabling CPU frequency scaling, it
> > seems to work ok:
>
> OK, good. I assume this was the same machine that did not work with
> any of the earlier patches?

I do not have *that* machine near me just now, but I'll try it.

Pavel

--
Boycott Kodak -- for their patent abuse against Java.

2005-04-08 12:59:17

by Thomas Renninger

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1

Tony Lindgren wrote:
> * Thomas Renninger <[email protected]> [050408 04:34]:
>>Here are some figures about idle/C-states:
>>
>>Passing bm_history=0xF to processor module makes it going into C3 and deeper.
>>Passing lower values, deeper states are reached more often, but system could freeze:
>
> Hmm, I wonder why it freezes? Is it ACPI issue or related to dyn-tick?
>
It's an ACPI issue.
As far as I understand: If there has been bus master activity in the last
xx(~30?!?) ms, C3 and deeper sleep states must not be triggered.
If running into it, the system just freezes without any further output
or response.

>>Figures NO_IDLE_HZ disabled, HZ=1000 (max sleep 1ms)
> ...
>>Total switches between C-states: 20205
>>Switches between C-states per second: 1063 per second
>>
>>Figures NO_IDLE_HZ enabled, processor.bm_history=0xF HZ=1000:
> ...
>>Total switches between C-states: 4659
>>Switches between C-states per second: 65 per second
>
> The reduction in C state changes should produce some power savings,
> assuming the C states do something...
>
I heard on this machine battery lasts half an hour longer since
C4 state is used, hopefully we can get some more minutes by using it
more often and longer ...

>>I buffer C-state times in an array and write them to /dev/cstX.
>>From there I calc the stats from userspace.
>>
>>Tony: If you like I can send you the patch and dump prog for
>>http://www.muru.com/linux/dyntick/ ?
>
> Yeah, that would nice to have!

-> I'll send you privately.
>
>>I try to find a better algorithm (directly adjust slept time to
>>C-state latency or something) for NO_IDLE_HZ (hints are very welcome)
>>and try to come up with new figures soon.
>
> I suggest we modify idle so we can call it with the estimated sleep
> length in usecs. Then the idle loop can directly decide when to go to
> C2 or C3 depening on the estimated sleep length.

The sleep time history could be enough?

I don't know how to calc C1 state sleep time (from drivers/acpi/processor_idle.c):
/*
* TBD: Can't get time duration while in C1, as resumes
* go to an ISR rather than here. Need to instrument
* base interrupt handler.
*/

It probably would help to go to deeper states faster.

Whatabout reprogramming timer interrupt for C1 (latency==0), so that it comes out after e.g. 1 ms again.
If it really stayed sleeping for 1ms, 5 times, the machine is really idle and deeper
states are adjusted after sleep time and C-state latency...
(Or only disable timer interrupt after C1 slept long enough X times?)

Thomas

2005-04-08 21:44:03

by Frank Sorenson

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Tony Lindgren wrote:
> * Frank Sorenson <[email protected]> [050408 01:49]:
>>This updated patch seems to work just fine on my machine with lapic on
>>the cmdline and CONFIG_DYN_TICK_USE_APIC disabled.
>>
>>Also, you were correct that removing lapic from the cmdline allowed the
>>previous version to run at full speed.
>
>
> Cool.
>
>
>>Now, how can I tell if the patch is doing its thing? What should I be
>>seeing? :)
>
>
> Download pmstats from http://www.muru.com/linux/dyntick/, you may
> need to edit it a bit for correct ACPI battery values. But it should
> show you HZ during idle and load. I believe idle still does not go
> to ACPI C3 with dyn-tick though...
>
> Then you might as well run timetest from same location too to make
> sure your clock keeps correct time.

Seems to be going up when under load, and down when idle, so I suppose
it's working :) The clock is only a little jittery, but not more than
I'd expect across the network, so it looks like it's keeping time okay.

Would it be possible to determine whether the system will wake to the
APIC interrupt at system boot, rather than hardcoded in the config?
After you explained the problem, I noticed that creating my own
interrupts (holding down a key on the keyboard for example) kept the
system moving and not slow. For example, something like this (sorry, I
don't know the code well enough yet to attempt to code it myself):

set the APIC timer to fire in X
set another timer/interrupt to fire in 2X
wait for the interrupt
if (time_elapsed >= 2X) disable the APIC timer
else APIC timer should work

Or, determine which timer woke us up, etc.

Thanks,
Frank
- --
Frank Sorenson - KD7TZK
Systems Manager, Computer Science Department
Brigham Young University
[email protected]
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.6 (GNU/Linux)
Comment: Using GnuPG with Thunderbird - http://enigmail.mozdev.org

iD8DBQFCVvriaI0dwg4A47wRAhhyAJ928wgPEY/9X4KmyJcsaJ+WZk0XRQCfTfcj
x3yKiwYOhMac/SQ7El9N0q0=
=2QVB
-----END PGP SIGNATURE-----

2005-04-09 08:10:19

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1

On Fri, Apr 08, 2005 at 03:42:59PM -0600, Frank Sorenson wrote:
> Tony Lindgren wrote:
> >
> > Then you might as well run timetest from same location too to make
> > sure your clock keeps correct time.
>
> Seems to be going up when under load, and down when idle, so I suppose
> it's working :) The clock is only a little jittery, but not more than
> I'd expect across the network, so it looks like it's keeping time okay.

Good.

> Would it be possible to determine whether the system will wake to the
> APIC interrupt at system boot, rather than hardcoded in the config?
> After you explained the problem, I noticed that creating my own
> interrupts (holding down a key on the keyboard for example) kept the
> system moving and not slow. For example, something like this (sorry, I
> don't know the code well enough yet to attempt to code it myself):
>
> set the APIC timer to fire in X
> set another timer/interrupt to fire in 2X
> wait for the interrupt
> if (time_elapsed >= 2X) disable the APIC timer
> else APIC timer should work
>
> Or, determine which timer woke us up, etc.

Yeah, I was thinking that too. But maybe there's some way of stopping
PIT interrupts while keeping APIC timer interrupts running on all chips.
It seems to work OK on my P3 boxes, but seems to fail on newer machines.

BTW, stopping PIT interrupts (like the HRT VST patch does) seems to
kill APIC timer interrupts too, the same way as reprogamming PIT does.
Or maybe there's something else that needs to be done to get APIC
interrupts going after PIT interrupts are disabled.

Tony

2005-04-09 08:22:58

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1

On Fri, Apr 08, 2005 at 02:58:50PM +0200, Thomas Renninger wrote:
> Tony Lindgren wrote:
> > * Thomas Renninger <[email protected]> [050408 04:34]:
> >>Here are some figures about idle/C-states:
> >>
> >>Passing bm_history=0xF to processor module makes it going into C3 and deeper.
> >>Passing lower values, deeper states are reached more often, but system could freeze:
> >
> > Hmm, I wonder why it freezes? Is it ACPI issue or related to dyn-tick?
> >
> It's an ACPI issue.

OK

> As far as I understand: If there has been bus master activity in the last
> xx(~30?!?) ms, C3 and deeper sleep states must not be triggered.
> If running into it, the system just freezes without any further output
> or response.

OK

> >>Figures NO_IDLE_HZ disabled, HZ=1000 (max sleep 1ms)
> > ...
> >>Total switches between C-states: 20205
> >>Switches between C-states per second: 1063 per second
> >>
> >>Figures NO_IDLE_HZ enabled, processor.bm_history=0xF HZ=1000:
> > ...
> >>Total switches between C-states: 4659
> >>Switches between C-states per second: 65 per second
> >
> > The reduction in C state changes should produce some power savings,
> > assuming the C states do something...
> >
> I heard on this machine battery lasts half an hour longer since
> C4 state is used, hopefully we can get some more minutes by using it
> more often and longer ...

Yeah, it would be interesting to know how much of a difference it makes.

> >>I buffer C-state times in an array and write them to /dev/cstX.
> >>From there I calc the stats from userspace.
> >>
> >>Tony: If you like I can send you the patch and dump prog for
> >>http://www.muru.com/linux/dyntick/ ?
> >
> > Yeah, that would nice to have!
>
> -> I'll send you privately.

OK

> >>I try to find a better algorithm (directly adjust slept time to
> >>C-state latency or something) for NO_IDLE_HZ (hints are very welcome)
> >>and try to come up with new figures soon.
> >
> > I suggest we modify idle so we can call it with the estimated sleep
> > length in usecs. Then the idle loop can directly decide when to go to
> > C2 or C3 depening on the estimated sleep length.
>
> The sleep time history could be enough?

Well we already know when the next timer interrupt is scheduled to
happen, so make use of that information would make the state selection
easy. And we should probably at some point also account for the wake-up
latency so we can program the timer a bit early depending on the sleep
state.

> I don't know how to calc C1 state sleep time (from drivers/acpi/processor_idle.c):
> /*
> * TBD: Can't get time duration while in C1, as resumes
> * go to an ISR rather than here. Need to instrument
> * base interrupt handler.
> */
>
> It probably would help to go to deeper states faster.

Yes, we should be able to go directly to deeper states with the next
timer interrupt value.

> Whatabout reprogramming timer interrupt for C1 (latency==0), so that it comes out after e.g. 1 ms again.
> If it really stayed sleeping for 1ms, 5 times, the machine is really idle and deeper
> states are adjusted after sleep time and C-state latency...
> (Or only disable timer interrupt after C1 slept long enough X times?)

I'm not sure if I follow this one... But if we know the next timer
interrupt is 500ms away, we should go directly to C3/C4, and no
other calculations should be needed.

Regards,

Tony

2005-04-14 13:00:44

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1

Hi!

> > > > I think I have an idea on what's going on; Your system does not wake to
> > > > APIC interrupt, and the system timer updates time only on other interrupts.
> > > > I'm experiencing the same on a loaner ThinkPad T30.
> > > >
> > > > I'll try to do another patch today. Meanwhile it now should work
> > > > without lapic in cmdline.
> > >
> > > Following is an updated patch. Anybody having trouble, please try
> > > disabling CONFIG_DYN_TICK_USE_APIC Kconfig option.
> > >
> > > I'm hoping this might work on Pavel's machine too?
> >
> > The "volume hang" was explained: I was using CPU frequency scaling, it
> > probably did not like that. After disabling CPU frequency scaling, it
> > seems to work ok:
>
> OK, good. I assume this was the same machine that did not work with
> any of the earlier patches

I did testing on that machine today, and yes it works okay if I disable the
NO_IDLE_HZ_USE_APIC (or how is it called) option. Time problems are gone.
Pavel
--
64 bytes from 195.113.31.123: icmp_seq=28 ttl=51 time=448769.1 ms

2005-04-14 19:43:07

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1

On Sat, Apr 09, 2005 at 11:56:08AM +0200, Pavel Machek wrote:
> Hi!
>
> > > > > I think I have an idea on what's going on; Your system does not wake to
> > > > > APIC interrupt, and the system timer updates time only on other interrupts.
> > > > > I'm experiencing the same on a loaner ThinkPad T30.
> > > > >
> > > > > I'll try to do another patch today. Meanwhile it now should work
> > > > > without lapic in cmdline.
> > > >
> > > > Following is an updated patch. Anybody having trouble, please try
> > > > disabling CONFIG_DYN_TICK_USE_APIC Kconfig option.
> > > >
> > > > I'm hoping this might work on Pavel's machine too?
> > >
> > > The "volume hang" was explained: I was using CPU frequency scaling, it
> > > probably did not like that. After disabling CPU frequency scaling, it
> > > seems to work ok:
> >
> > OK, good. I assume this was the same machine that did not work with
> > any of the earlier patches
>
> I did testing on that machine today, and yes it works okay if I disable the
> NO_IDLE_HZ_USE_APIC (or how is it called) option. Time problems are gone.

That's great!

Tony

2005-04-19 14:58:03

by Thomas Renninger

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1 - C-state measures

Here are some figures (I used your pmstats):

The machine is a Pentium M 2.00 GHz, supporting C0-C4 processor power states.
The machine run at 2.00 GHz all the time.
A lot of modules (pcmcia, usb, ...) where loaded, services that could
produce load where stopped -> processor is mostly idle.

_____________________________________________________________________________________
*Running with 1000Hz:*

_No processor module:_

Average current the last 100 seconds: *2289mA*
(cmp. ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/1000_HZ_No_module_loaded)


_passing bm_history=0xFFFFFFFF (default) to processor module:_

Average current the last 470 seconds: *1986mA* (also measured better values ~1800, does battery level play a role?!?)
(cmp. ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/1000_HZ_bm_history_FFFFFFFF)


_passing bm_history=0xFF to processor module:_

Average current the last 190 seconds: *1757mA*
(cmp. ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/1000_HZ_bm_history_FF)
(Usage count could be bogus, as some invokations could not succeed if bm has currently been active).
_____________________________________________________________________________________

*Running with CONFIG_NO_IDLE_HZ:*
Patched with http://www.muru.com/linux/dyntick/patches/patch-dynamic-tick-2.6.12-rc2-050408-1.gz
(With the c-state patch attached applied)

_No processor module:_

Average current the last 80 seconds: *2262mA*
(cmp. ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/tony_dyn_tick_No_module_loaded)

idle_ms == 40, bm_promote_bs == 30
Average current the last 160 seconds: *1507mA*
(cmp. ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/tony_dyn_tick_processor_idle_40_bm_30)

idle_ms == 100, bm_promote_bs == 30
Average current the last 80 seconds: *1466mA*
(cmp. ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/tony_dyn_tick_processor_idle_100_bm_30)

idle_ms == 40, bm_promote_bs == 50
Average current the last 150 seconds: *1481mA*
(cmp. ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/tony_dyn_tick_processor_idle_40_bm_30)

idle_ms == 40, bm_promote_bs == 10
Average current the last 330 seconds: *1474mA*
(cmp. ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/tony_dyn_tick_processor_idle_40_bm_10)

Hmm, parameters do not influence at all ... (idle_ms should only comes in when switching between idle/not idle).
_____________________________________________________________________________________


The measures are based on the /proc/acpi/battery/*/* info and are not very accurate, but could give an overall picture.

Thomas

P.S.: Not tested, because I have no x86_64 C3 machine, but the patch should also work reliable with Andi's dyn_tick patch
for x86_64 machines.

Tony: I modified your pmstats to produce an average current value: ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/pmstats


Attachments:
dynamic_tick_cstate_patch_final.diff (10.03 kB)

2005-04-19 15:28:04

by Dominik Brodowski

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1 - C-state measures

Hi,

On Tue, Apr 19, 2005 at 04:56:56PM +0200, Thomas Renninger wrote:
> If CONFIG_IDLE_HZ is set, the c-state will be evaluated on
> three control values (averages of the last 4 measures):
>
> a) idle_ms -> if machine was active for longer than this
> value (avg), the machine is assumed to not be idle
> and C1 will be triggered.
>
> b) bm_promote_ms -> if the avg bus master activity is below
> this threshold, C2 is invoked.
>
> c) sleep_avg (no module param) -> the average sleep time of the
> last four C2 (or higher) invokations.
> If a and b does not apply, a C-state will be searched that has
> the highest latency, but still has a latency below the latest
> C2 (or higher) sleeping time and average sleeping time value.

I think that we don't need this complication:

> +//#define DEBUG 1
> +#ifdef DEBUG
> +#define myPrintk(string, args...) printk(KERN_INFO ""string, ##args);
> +#else
> +#define myPrintk(string, args...) {};
> +#endif

Please don't do that... dprintk() is much more common. Also, then don't
comment dprintk() out below in the patch...

> if (pr->flags.bm_check) {
> - u32 bm_status = 0;
> - unsigned long diff = jiffies - pr->power.bm_check_timestamp;
> -
> - if (diff > 32)
> - diff = 32;
> -
> - while (diff) {
> - /* if we didn't get called, assume there was busmaster activity */
> - diff--;
> - if (diff)
> - pr->power.bm_activity |= 0x1;
> - pr->power.bm_activity <<= 1;
> - }

"All" we need to do is to update the "diff". Without dynamic ticks, if the
idle loop didn't get called each jiffy, it was a big hint that there was so
much activity in between, and if there is activity, there is most likely
also bus master activity, or at least more work to do, so interrupt activity
is likely. Therefore we assume there was bm_activity even if there was none.

Now, we do know the jiffy value when we started sleeping. If we use
ticks_elapsed(t1, t2), convert it to jiffies, and do
diff = jiffies - (pr->power.bm_check_timestamp + last_sleep_jiffies);
it should work. I wrote a quick patch to do that, but it locked up my
notebook, so it is most likely broken; hopefully I'll find some time to debug
it, if somebody does it earlier, that'd be great, though.

Thanks,
Dominik


Only assume busmaster activity on non-idle ticks if we didn't sleep until
that jiffy. Needed for dyn-idle.

Signed-off-by: Dominik Brodowski <[email protected]>

--- linux/drivers/acpi/processor_idle.c.original 2005-04-10 20:04:12.000000000 +0200
+++ linux/drivers/acpi/processor_idle.c 2005-04-10 20:14:33.000000000 +0200
@@ -120,6 +120,14 @@
return ((0xFFFFFFFF - t1) + t2);
}

+static inline u32
+ticks_to_jiffies (u32 pm_ticks)
+{
+ pm_ticks *= 286;
+ pm_ticks = (pm_ticks >> 10);
+ return (pm_ticks / (USEC_PER_SEC / HZ));
+}
+

static void
acpi_processor_power_activate (
@@ -169,7 +177,7 @@
struct acpi_processor_cx *cx = NULL;
struct acpi_processor_cx *next_state = NULL;
int sleep_ticks = 0;
- u32 t1, t2 = 0;
+ u32 t1, t2, td = 0;

pr = processors[_smp_processor_id()];
if (!pr)
@@ -201,11 +209,13 @@
* for demotion.
*/
if (pr->flags.bm_check) {
- u32 bm_status = 0;
- unsigned long diff = jiffies - pr->power.bm_check_timestamp;
+ u32 bm_status = 0;
+ long diff = jiffies - pr->power.bm_check_timestamp;

if (diff > 32)
diff = 32;
+ else if (diff < 0)
+ diff = 0;

while (diff) {
/* if we didn't get called, assume there was busmaster activity */
@@ -293,7 +303,9 @@
/* Re-enable interrupts */
local_irq_enable();
/* Compute time (ticks) that we were actually asleep */
- sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
+ td = ticks_elapsed(t1, t2);
+ sleep_ticks = td - cx->latency_ticks - C2_OVERHEAD;
+ pr->power.bm_check_timestamp += ticks_to_jiffies(td);
break;

case ACPI_STATE_C3:
@@ -312,7 +324,9 @@
/* Re-enable interrupts */
local_irq_enable();
/* Compute time (ticks) that we were actually asleep */
- sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD;
+ td = ticks_elapsed(t1, t2);
+ sleep_ticks = td - cx->latency_ticks - C3_OVERHEAD;
+ pr->power.bm_check_timestamp += ticks_to_jiffies(td);
break;

default:

2005-04-19 21:04:35

by Thomas Renninger

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1 - C-state measures

Reducing the CC'd people a bit ...

Dominik Brodowski wrote:
> Hi,
>
> On Tue, Apr 19, 2005 at 04:56:56PM +0200, Thomas Renninger wrote:
>>If CONFIG_IDLE_HZ is set, the c-state will be evaluated on
>>three control values (averages of the last 4 measures):
>>
>>a) idle_ms -> if machine was active for longer than this
>> value (avg), the machine is assumed to not be idle
>> and C1 will be triggered.
>>
>>b) bm_promote_ms -> if the avg bus master activity is below
>> this threshold, C2 is invoked.
>>
>>c) sleep_avg (no module param) -> the average sleep time of the
>> last four C2 (or higher) invokations.
>> If a and b does not apply, a C-state will be searched that has
>> the highest latency, but still has a latency below the latest
>> C2 (or higher) sleeping time and average sleeping time value.
>
> I think that we don't need this complication:
>
>>+//#define DEBUG 1
>>+#ifdef DEBUG
>>+#define myPrintk(string, args...) printk(KERN_INFO ""string, ##args);
>>+#else
>>+#define myPrintk(string, args...) {};
>>+#endif
>
> Please don't do that... dprintk() is much more common. Also, then don't
> comment dprintk() out below in the patch...
>
Ok, this patch is far from perfect, I am happy that it finally runs that nice on
my machine.

>> if (pr->flags.bm_check) {
>>- u32 bm_status = 0;
>>- unsigned long diff = jiffies - pr->power.bm_check_timestamp;
>>-
>>- if (diff > 32)
>>- diff = 32;
>>-
>>- while (diff) {
>>- /* if we didn't get called, assume there was busmaster activity */
>>- diff--;
>>- if (diff)
>>- pr->power.bm_activity |= 0x1;
>>- pr->power.bm_activity <<= 1;
>>- }
>
> "All" we need to do is to update the "diff". Without dynamic ticks, if the
> idle loop didn't get called each jiffy, it was a big hint that there was so
> much activity in between, and if there is activity, there is most likely
> also bus master activity, or at least more work to do, so interrupt activity
> is likely. Therefore we assume there was bm_activity even if there was none.
>
If I understand this right you want at least wait 32 (or whatever value) ms if there was bm activity,
before it is allowed to trigger C3/C4?

I think the problem is (at least I made the experience with this particular machine)
that bm activity comes very often and regularly (each 30-150ms?).

I think the approach to directly adjust the latency to a deeper sleep state if the
average bus master and OS activity is low is very efficient.

Because I don't consider whether there was bm_activity the last ms, I only
consider the average, it seems to happen that I try to trigger
C3/C4 when there is just something copied and some bm active ?!? Therefore, it seems to happen
that triggering C3/C4 fails (sleep_ticks < 0). The value of failures is getting smaller if I increase
the limit for average bm activity before triggering C3/C4 (bm_promote_ms must be smaller than average bm activity),
but it never will reach zero.

The patch is useless if these failures end up in system freezes on other machines...
AFAIK there were a lot of freeze problems with C-states? Don't know, it works here.

The problem with the old approach is, that after (doesn't matter C1-Cx) sleep and dyn_idle_tick,
the chance to wake up because of bm activity is very likely.
You enter idle() again -> there was bm_activity -> C2. Wake up after e.g. 50ms, because
of bm_activity again (bm_sts bit set) -> stay in C2, wake up after 40ms -> bm activity...
You only have the chance to get into deeper states if the sleeps are interrupted by an interrupt, not bm activity.

I also thought about only reprogram timer if C1/C2 was successful x times and no bm activity was detected,
same mechanism as now, then only reprogram timer (dyn tick) for deeper sleep states -> like that, you
still can be sure the last x ms was no bm activity bit set before going to deep sleeps.
But I don't know how to do it.

> Now, we do know the jiffy value when we started sleeping. If we use
> ticks_elapsed(t1, t2), convert it to jiffies, and do
> diff = jiffies - (pr->power.bm_check_timestamp + last_sleep_jiffies);
> it should work. I wrote a quick patch to do that, but it locked up my
> notebook, so it is most likely broken; hopefully I'll find some time to debug
> it, if somebody does it earlier, that'd be great, though.
>
> Thanks,
> Dominik
>
>
> Only assume busmaster activity on non-idle ticks if we didn't sleep until
> that jiffy. Needed for dyn-idle.
>
> Signed-off-by: Dominik Brodowski <[email protected]>
>
> --- linux/drivers/acpi/processor_idle.c.original 2005-04-10 20:04:12.000000000 +0200
> +++ linux/drivers/acpi/processor_idle.c 2005-04-10 20:14:33.000000000 +0200
> @@ -120,6 +120,14 @@
> return ((0xFFFFFFFF - t1) + t2);
> }
>
> +static inline u32
> +ticks_to_jiffies (u32 pm_ticks)
> +{
> + pm_ticks *= 286;
> + pm_ticks = (pm_ticks >> 10);
> + return (pm_ticks / (USEC_PER_SEC / HZ));
> +}
> +
>
> static void
> acpi_processor_power_activate (
> @@ -169,7 +177,7 @@
> struct acpi_processor_cx *cx = NULL;
> struct acpi_processor_cx *next_state = NULL;
> int sleep_ticks = 0;
> - u32 t1, t2 = 0;
> + u32 t1, t2, td = 0;
>
> pr = processors[_smp_processor_id()];
> if (!pr)
> @@ -201,11 +209,13 @@
> * for demotion.
> */
> if (pr->flags.bm_check) {
> - u32 bm_status = 0;
> - unsigned long diff = jiffies - pr->power.bm_check_timestamp;
> + u32 bm_status = 0;
> + long diff = jiffies - pr->power.bm_check_timestamp;
>
> if (diff > 32)
> diff = 32;
> + else if (diff < 0)
> + diff = 0;
>
> while (diff) {
> /* if we didn't get called, assume there was busmaster activity */
> @@ -293,7 +303,9 @@
> /* Re-enable interrupts */
> local_irq_enable();
> /* Compute time (ticks) that we were actually asleep */
> - sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
> + td = ticks_elapsed(t1, t2);
> + sleep_ticks = td - cx->latency_ticks - C2_OVERHEAD;
> + pr->power.bm_check_timestamp += ticks_to_jiffies(td);
> break;
>
> case ACPI_STATE_C3:
> @@ -312,7 +324,9 @@
> /* Re-enable interrupts */
> local_irq_enable();
> /* Compute time (ticks) that we were actually asleep */
> - sleep_ticks = ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD;
> + td = ticks_elapsed(t1, t2);
> + sleep_ticks = td - cx->latency_ticks - C3_OVERHEAD;
> + pr->power.bm_check_timestamp += ticks_to_jiffies(td);
> break;
>
> default:

Hmm, I can give it a shot the next days ...

You could also test whether it was bm activity here that caused the end of sleep (it should be in most cases):

acpi_get_register(ACPI_BITREG_BUS_MASTER_RLD, &bm_wakeup, ACPI_MTX_DO_NOT_LOCK);
if (bm_wakeup){
printk(KERN_INFO "Woke up from C3 from bus master activity after %d ticks\n", td);
acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1, ACPI_MTX_DO_NOT_LOCK);
/* also reset bm_sts bit ?!? */
pr->power.bm_activity++;
}
else{
printk(KERN_INFO "Did not wake up from C3 from bus master activity\n");
}
pr->power.bm_check_timestamp += ticks_to_jiffies(td);

Hmm I wonder what the difference is after waking up and checking for bm_rld or bm_sts bit ...

Thomas

2005-04-19 21:10:35

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1 - C-state measures

Hi!

> The machine is a Pentium M 2.00 GHz, supporting C0-C4 processor power states.
> The machine run at 2.00 GHz all the time.
..
> _passing bm_history=0xFFFFFFFF (default) to processor module:_
>
> Average current the last 470 seconds: *1986mA* (also measured better
> values ~1800, does battery level play a role?!?)

Probably yes. If voltage changes, 2000mA means different ammount of power.


> (cmp. ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/1000_HZ_bm_history_FFFFFFFF)
>
>
> _passing bm_history=0xFF to processor module:_
>
> Average current the last 190 seconds: *1757mA*
> (cmp. ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/1000_HZ_bm_history_FF)
> (Usage count could be bogus, as some invokations could not succeed
> if bm has currently been active).

Ok.

> idle_ms == 100, bm_promote_bs == 30
> Average current the last 80 seconds: *1466mA*
> (cmp.
> ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/tony_dyn_tick_processor_idle_100_bm_30)

Very nice indeed. That seems like ~5W saved, right? That might give
you one more hour of battery life....
Pavel

--
Boycott Kodak -- for their patent abuse against Java.

2005-04-20 11:48:12

by Dominik Brodowski

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1 - C-state measures

On Tue, Apr 19, 2005 at 11:03:30PM +0200, Thomas Renninger wrote:
> > "All" we need to do is to update the "diff". Without dynamic ticks, if the
> > idle loop didn't get called each jiffy, it was a big hint that there was so
> > much activity in between, and if there is activity, there is most likely
> > also bus master activity, or at least more work to do, so interrupt activity
> > is likely. Therefore we assume there was bm_activity even if there was none.
> >
> If I understand this right you want at least wait 32 (or whatever value) ms if there was bm activity,
> before it is allowed to trigger C3/C4?

That's the theory of operation of the current algorithm. I think that we
should do that small change to the current algorithm which allows us to keep
C3/C4 working with dyn-idle first, and then think of a very small abstraction
layer to test different idle algroithms, and -- possibly -- use different
ones for different usages.

> I think the problem is (at least I made the experience with this particular
> machine) that bm activity comes very often and regularly (each 30-150ms?).
>
> I think the approach to directly adjust the latency to a deeper sleep state if the
> average bus master and OS activity is low is very efficient.
>
> Because I don't consider whether there was bm_activity the last ms, I only
> consider the average, it seems to happen that I try to trigger
> C3/C4 when there is just something copied and some bm active ?!?

I don't think that this is perfect behaviour: if the system is idle, and
there is _currently_ bus master activity, the CPU should be put into C1 or
C2 type sleep. If you select C3 and actually enter it, you're risking
DMA issues, AFAICS.

> The patch is useless if these failures end up in system freezes on
> other machines...

I know that my patch is useless in its current form, but I wanted to share
it as a different way of doing things.

> The problem with the old approach is, that after (doesn't matter C1-Cx)
> sleep and dyn_idle_tick, the chance to wake up because of bm activity is
> very likely.
> You enter idle() again -> there was bm_activity -> C2. Wake up after e.g.
> 50ms, because of bm_activity again (bm_sts bit set) -> stay in C2, wake up
> after 40ms -> bm activity... You only have the chance to get into deeper
> states if the sleeps are interrupted by an interrupt, not bm activity.

That's a side-effect, indeed. However: if there _is_ bus master activity, we
must not enter C3, AFAICS.

Dominik

2005-04-20 11:58:31

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1 - C-state measures

Hi!

> > Because I don't consider whether there was bm_activity the last ms, I only
> > consider the average, it seems to happen that I try to trigger
> > C3/C4 when there is just something copied and some bm active ?!?
>
> I don't think that this is perfect behaviour: if the system is idle, and
> there is _currently_ bus master activity, the CPU should be put into C1 or
> C2 type sleep. If you select C3 and actually enter it, you're risking
> DMA issues, AFAICS.

What kinds of DMA issues? Waiting 32msec or so is only heuristic; it
can go wrong any time. It would be really bad if it corrupted data or
something like that.
Pavel

--
Boycott Kodak -- for their patent abuse against Java.

2005-04-20 12:01:39

by Dominik Brodowski

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1 - C-state measures

On Wed, Apr 20, 2005 at 01:57:39PM +0200, Pavel Machek wrote:
> Hi!
>
> > > Because I don't consider whether there was bm_activity the last ms, I only
> > > consider the average, it seems to happen that I try to trigger
> > > C3/C4 when there is just something copied and some bm active ?!?
> >
> > I don't think that this is perfect behaviour: if the system is idle, and
> > there is _currently_ bus master activity, the CPU should be put into C1 or
> > C2 type sleep. If you select C3 and actually enter it, you're risking
> > DMA issues, AFAICS.
>
> What kinds of DMA issues? Waiting 32msec or so is only heuristic; it
> can go wrong any time. It would be really bad if it corrupted data or
> something like that.

loop()
a) bus mastering activity is going on at the very moment
b) the CPU is entering C3
c) the CPU is woken out of C3 because of bus mastering activity

the repeated delay between b) and c) might be problematic, as can be seen
by the comment in processor_idle.c:

* TBD: A better policy might be to fallback to the demotion
* state (use it for this quantum only) istead of
* demoting -- and rely on duration as our sole demotion
* qualification. This may, however, introduce DMA
* issues (e.g. floppy DMA transfer overrun/underrun).
*/

I'm not so worried about floppy DMA but about the ipw2x00 issues here.

Dominik

2005-04-20 12:11:14

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1 - C-state measures

Hi!

> > > > Because I don't consider whether there was bm_activity the last ms, I only
> > > > consider the average, it seems to happen that I try to trigger
> > > > C3/C4 when there is just something copied and some bm active ?!?
> > >
> > > I don't think that this is perfect behaviour: if the system is idle, and
> > > there is _currently_ bus master activity, the CPU should be put into C1 or
> > > C2 type sleep. If you select C3 and actually enter it, you're risking
> > > DMA issues, AFAICS.
> >
> > What kinds of DMA issues? Waiting 32msec or so is only heuristic; it
> > can go wrong any time. It would be really bad if it corrupted data or
> > something like that.
>
> loop()
> a) bus mastering activity is going on at the very moment
> b) the CPU is entering C3
> c) the CPU is woken out of C3 because of bus mastering activity
>
> the repeated delay between b) and c) might be problematic, as can be seen
> by the comment in processor_idle.c:
>
> * TBD: A better policy might be to fallback to the demotion
> * state (use it for this quantum only) istead of
> * demoting -- and rely on duration as our sole demotion
> * qualification. This may, however, introduce DMA
> * issues (e.g. floppy DMA transfer overrun/underrun).
> */
>
> I'm not so worried about floppy DMA but about the ipw2x00 issues here.

Like "ipw2x00 looses packets" if this happens too often?

Pavel
--
Boycott Kodak -- for their patent abuse against Java.

2005-04-20 12:15:03

by Dominik Brodowski

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1 - C-state measures

Hi,

On Wed, Apr 20, 2005 at 02:08:46PM +0200, Pavel Machek wrote:
> Like "ipw2x00 looses packets" if this happens too often?

See "PCI latency error if C3 enabled" on http://ipw2100.sf.net -- it causes
network instability, frequent firmware restarts.

Dominik

2005-04-20 12:42:00

by Thomas Renninger

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1 - C-state measures

Dominik Brodowski wrote:
> On Tue, Apr 19, 2005 at 11:03:30PM +0200, Thomas Renninger wrote:
>>>"All" we need to do is to update the "diff". Without dynamic ticks, if the
>>>idle loop didn't get called each jiffy, it was a big hint that there was so
>>>much activity in between, and if there is activity, there is most likely
>>>also bus master activity, or at least more work to do, so interrupt activity
>>>is likely. Therefore we assume there was bm_activity even if there was none.
>>>
>>If I understand this right you want at least wait 32 (or whatever value) ms if there was bm activity,
>>before it is allowed to trigger C3/C4?
>
> That's the theory of operation of the current algorithm. I think that we
> should do that small change to the current algorithm which allows us to keep
> C3/C4 working with dyn-idle first, and then think of a very small abstraction
> layer to test different idle algroithms, and -- possibly -- use different
> ones for different usages.
>
>>I think the problem is (at least I made the experience with this particular
>>machine) that bm activity comes very often and regularly (each 30-150ms?).
>>
>>I think the approach to directly adjust the latency to a deeper sleep state if the
>>average bus master and OS activity is low is very efficient.
>>
>>Because I don't consider whether there was bm_activity the last ms, I only
>>consider the average, it seems to happen that I try to trigger
>>C3/C4 when there is just something copied and some bm active ?!?
>
> I don't think that this is perfect behaviour: if the system is idle, and
> there is _currently_ bus master activity, the CPU should be put into C1 or
> C2 type sleep. If you select C3 and actually enter it, you're risking
> DMA issues, AFAICS.
>
On my system triggering C3/C4 is just ignored (sleep_ticks < 0).
These ignorings (C3/C4 failures) seem to directly depend on how much bm_activity
there actually is.
With the current method (wait at least 30 ms if there was bm activity before
triggering C3/C4) these failures never happened.
As mentioned using bm_promotion_ms you can lower the failures, but never reach zero.
If these failures lead to system freezes on other systems, my next sentence is valid
(I meant my patch).

>>The patch is useless if these failures end up in system freezes on
>>other machines...
>
> I know that my patch is useless in its current form, but I wanted to share
> it as a different way of doing things.
>
>>The problem with the old approach is, that after (doesn't matter C1-Cx)
>>sleep and dyn_idle_tick, the chance to wake up because of bm activity is
>>very likely.
>>You enter idle() again -> there was bm_activity -> C2. Wake up after e.g.
>>50ms, because of bm_activity again (bm_sts bit set) -> stay in C2, wake up
>>after 40ms -> bm activity... You only have the chance to get into deeper
>>states if the sleeps are interrupted by an interrupt, not bm activity.
>
> That's a side-effect, indeed. However: if there _is_ bus master activity, we
> must not enter C3, AFAICS.
>

What about a mixed approach: only reprogram timer if you want to go to deeper
sleeping states (C3-Cx) when bm activity comes in place?

It's the only way you can say: the last xy ms there was no bm activity (use bm_history),
now it's safe to sleep and also be efficient: don't sleep forever in C1/C2 -> bm_sts bit
will probably be set afterwards and you need to wait another xy ms in C1/C2
-> endless loop ...

Like that the timer is only disabled where it is really useful, on C3-Cx machines
(or are there other cases?).


Thomas

2005-04-20 20:02:35

by Tony Lindgren

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1 - C-state measures

* Pavel Machek <[email protected]> [050419 14:10]:
> Hi!
>
> > The machine is a Pentium M 2.00 GHz, supporting C0-C4 processor power states.
> > The machine run at 2.00 GHz all the time.
> ..
> > _passing bm_history=0xFFFFFFFF (default) to processor module:_
> >
> > Average current the last 470 seconds: *1986mA* (also measured better
> > values ~1800, does battery level play a role?!?)
>
> Probably yes. If voltage changes, 2000mA means different ammount of power.

Thomas, thanks for doing all the stats and patches to squeeze some
real power savings out of this! :)

We should display both average mA and average Watts with pmstats.
BTW, I've posted Thomas' version of pmstats as pmstats-0.2.gz to
muru.com also.

> > (cmp. ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/1000_HZ_bm_history_FFFFFFFF)
> >
> >
> > _passing bm_history=0xFF to processor module:_
> >
> > Average current the last 190 seconds: *1757mA*
> > (cmp. ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/1000_HZ_bm_history_FF)
> > (Usage count could be bogus, as some invokations could not succeed
> > if bm has currently been active).
>
> Ok.
>
> > idle_ms == 100, bm_promote_bs == 30
> > Average current the last 80 seconds: *1466mA*
> > (cmp.
> > ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/tony_dyn_tick_processor_idle_100_bm_30)
>
> Very nice indeed. That seems like ~5W saved, right? That might give
> you one more hour of battery life....

Depending on your battery capacity. But looking at the average Watts
on the first 8 lines of the two stats above:

1000_HZ_bm_history_FFFFFFFF:
(21.43 + 23.32 + 23.32 + 21.71 + 21.71 + 23.84 + 23.84 + 22.62) / 8
= 22.724W

tony_dyn_tick_processor_idle_100_bm_30:
(16.07 + 16.07 + 16.00 + 16.00 + 16.08 + 16.08 + 16.29 + 16.29) / 8
= 16.11W

And then comparing these two:
22.72 / 16.11 = 1.4103

So according to my calculations this should provide about 1.4 times
longer battery life compared to what you were getting earlier...
That is assuming system is mostly idle, of course.

Tony


2005-04-21 08:33:01

by Thomas Renninger

[permalink] [raw]
Subject: Re: [PATCH] Updated: Dynamic Tick version 050408-1 - C-state measures

Tony Lindgren wrote:
> * Pavel Machek <[email protected]> [050419 14:10]:
>>Hi!
>>
>>>The machine is a Pentium M 2.00 GHz, supporting C0-C4 processor power states.
>>>The machine run at 2.00 GHz all the time.
>>..
>>>_passing bm_history=0xFFFFFFFF (default) to processor module:_
>>>
>>>Average current the last 470 seconds: *1986mA* (also measured better
>>>values ~1800, does battery level play a role?!?)
>>Probably yes. If voltage changes, 2000mA means different ammount of power.
>
> Thomas, thanks for doing all the stats and patches to squeeze some
> real power savings out of this! :)
>
> We should display both average mA and average Watts with pmstats.
> BTW, I've posted Thomas' version of pmstats as pmstats-0.2.gz to
> muru.com also.
>
>>>(cmp. ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/1000_HZ_bm_history_FFFFFFFF)
>>>
>>>
>>>_passing bm_history=0xFF to processor module:_
>>>
>>>Average current the last 190 seconds: *1757mA*
>>>(cmp. ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/1000_HZ_bm_history_FF)
>>>(Usage count could be bogus, as some invokations could not succeed
>>>if bm has currently been active).
>>Ok.
>>
>>>idle_ms == 100, bm_promote_bs == 30
>>>Average current the last 80 seconds: *1466mA*
>>>(cmp.
>>>ftp://ftp.suse.com/pub/people/trenn/dyn_tick_c_states/measures_C4_machine/tony_dyn_tick_processor_idle_100_bm_30)
>>Very nice indeed. That seems like ~5W saved, right? That might give
>>you one more hour of battery life....
>
> Depending on your battery capacity. But looking at the average Watts
> on the first 8 lines of the two stats above:
>
> 1000_HZ_bm_history_FFFFFFFF:
> (21.43 + 23.32 + 23.32 + 21.71 + 21.71 + 23.84 + 23.84 + 22.62) / 8
> = 22.724W
>
> tony_dyn_tick_processor_idle_100_bm_30:
> (16.07 + 16.07 + 16.00 + 16.00 + 16.08 + 16.08 + 16.29 + 16.29) / 8
> = 16.11W
>
> And then comparing these two:
> 22.72 / 16.11 = 1.4103
>
> So according to my calculations this should provide about 1.4 times
> longer battery life compared to what you were getting earlier...
> That is assuming system is mostly idle, of course.
>
Be aware that speedstep was off (2.0 GHz). When CPU frequency is controlled
you won't have that much enhancement anymore ...

Thomas