2003-06-30 22:17:46

by john stultz

[permalink] [raw]
Subject: [PATCH SET - 0/3] lost tick fixes

Andrew,
This set of patches fixes up the time doubling issues seen on laptops
with speedstep as well as other systems when HZ=100 or calibrate_tsc()
wasn't precise enough. No last-minute hand-editing of the diffs were
done and I've tested each step to insure they compile and boot. ;) The
patches in the order they should be applied are:

Patch 1: rename-timer_A1
o Your cleanups to the time code.
o Replaces your current rename-timer patch in your tree.
o Removes dependency on lost-tick-speedset-fix.

Patch 2: lost-tick-speedstep-fix_A1
o Detects speedstep time trouble and falls back to the PIT
o Replaces the lost-tick-speedstep-fix patch in your tree
o Adds dependency on rename-timer-A1

Patch 3: lost-tick-corner-fix_A0
o Fixes corner case where we detect an overflow between time source
reads.
o Depends on lost-tick-speedstep-fix_A1

Please consider for inclusion in your tree.

thanks
-john



2003-06-30 22:18:53

by john stultz

[permalink] [raw]
Subject: [PATCH SET - 1/3] linux-2.5.73_rename-timer_A1

This patch is a modification to your rename-timer patch in 2.5.73-mm2
which renames the bad "timer" variable to "cur_timer" and moves externs
to the .h files. This revision removes the dependency on the
lost-tick-speedstep-fix patch, allowing these changes to land first.
Please consider for inclusion in your tree.

thanks
-john

arch/i386/kernel/io_apic.c | 2 +-
arch/i386/kernel/time.c | 29 +++++++++++++----------------
arch/i386/kernel/timers/timer.c | 6 ------
arch/i386/lib/delay.c | 2 +-
include/asm-i386/timer.h | 12 ++++++++++++
5 files changed, 27 insertions(+), 24 deletions(-)



diff -Nru a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
--- a/arch/i386/kernel/io_apic.c Mon Jun 30 13:38:44 2003
+++ b/arch/i386/kernel/io_apic.c Mon Jun 30 13:38:44 2003
@@ -35,6 +35,7 @@
#include <asm/io.h>
#include <asm/smp.h>
#include <asm/desc.h>
+#include <asm/timer.h>

#include <mach_apic.h>

@@ -2052,7 +2053,6 @@
*/
static inline void check_timer(void)
{
- extern int timer_ack;
int pin1, pin2;
int vector;

diff -Nru a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
--- a/arch/i386/kernel/time.c Mon Jun 30 13:38:44 2003
+++ b/arch/i386/kernel/time.c Mon Jun 30 13:38:44 2003
@@ -80,8 +80,7 @@
spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED;
EXPORT_SYMBOL(i8253_lock);

-extern struct timer_opts timer_none;
-struct timer_opts* timer = &timer_none;
+struct timer_opts *cur_timer = &timer_none;

/*
* This version of gettimeofday has microsecond resolution
@@ -93,14 +92,14 @@
unsigned long usec, sec;

do {
+ unsigned long lost;
+
seq = read_seqbegin(&xtime_lock);

- usec = timer->get_offset();
- {
- unsigned long lost = jiffies - wall_jiffies;
- if (lost)
- usec += lost * (1000000 / HZ);
- }
+ usec = cur_timer->get_offset();
+ lost = jiffies - wall_jiffies;
+ if (lost)
+ usec += lost * (1000000 / HZ);
sec = xtime.tv_sec;
usec += (xtime.tv_nsec / 1000);
} while (read_seqretry(&xtime_lock, seq));
@@ -126,7 +125,7 @@
* wall time. Discover what correction gettimeofday() would have
* made, and then undo it!
*/
- tv->tv_nsec -= timer->get_offset() * NSEC_PER_USEC;
+ tv->tv_nsec -= cur_timer->get_offset() * NSEC_PER_USEC;
tv->tv_nsec -= (jiffies - wall_jiffies) * TICK_NSEC;

while (tv->tv_nsec < 0) {
@@ -180,7 +179,7 @@
*/
unsigned long long monotonic_clock(void)
{
- return timer->monotonic_clock();
+ return cur_timer->monotonic_clock();
}
EXPORT_SYMBOL(monotonic_clock);

@@ -189,7 +188,8 @@
* timer_interrupt() needs to keep up the real-time clock,
* as well as call the "do_timer()" routine every clocktick
*/
-static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static inline void do_timer_interrupt(int irq, void *dev_id,
+ struct pt_regs *regs)
{
#ifdef CONFIG_X86_IO_APIC
if (timer_ack) {
@@ -259,7 +259,7 @@
*/
write_seqlock(&xtime_lock);

- timer->mark_offset();
+ cur_timer->mark_offset();

do_timer_interrupt(irq, NULL, regs);

@@ -301,16 +301,13 @@

device_initcall(time_init_device);

-
void __init time_init(void)
{
-
xtime.tv_sec = get_cmos_time();
wall_to_monotonic.tv_sec = -xtime.tv_sec;
xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
wall_to_monotonic.tv_nsec = -xtime.tv_nsec;

-
- timer = select_timer();
+ cur_timer = select_timer();
time_init_hook();
}
diff -Nru a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c
--- a/arch/i386/kernel/timers/timer.c Mon Jun 30 13:38:44 2003
+++ b/arch/i386/kernel/timers/timer.c Mon Jun 30 13:38:44 2003
@@ -3,12 +3,6 @@
#include <linux/string.h>
#include <asm/timer.h>

-/* list of externed timers */
-extern struct timer_opts timer_pit;
-extern struct timer_opts timer_tsc;
-#ifdef CONFIG_X86_CYCLONE_TIMER
-extern struct timer_opts timer_cyclone;
-#endif
/* list of timers, ordered by preference, NULL terminated */
static struct timer_opts* timers[] = {
#ifdef CONFIG_X86_CYCLONE_TIMER
diff -Nru a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c
--- a/arch/i386/lib/delay.c Mon Jun 30 13:38:44 2003
+++ b/arch/i386/lib/delay.c Mon Jun 30 13:38:44 2003
@@ -25,7 +25,7 @@

void __delay(unsigned long loops)
{
- timer->delay(loops);
+ cur_timer->delay(loops);
}

inline void __const_udelay(unsigned long xloops)
diff -Nru a/include/asm-i386/timer.h b/include/asm-i386/timer.h
--- a/include/asm-i386/timer.h Mon Jun 30 13:38:44 2003
+++ b/include/asm-i386/timer.h Mon Jun 30 13:38:44 2003
@@ -25,4 +25,16 @@
/* Modifiers for buggy PIT handling */

extern int pit_latch_buggy;
+
+extern struct timer_opts *cur_timer;
+extern int timer_ack;
+
+/* list of externed timers */
+extern struct timer_opts timer_none;
+extern struct timer_opts timer_pit;
+extern struct timer_opts timer_tsc;
+#ifdef CONFIG_X86_CYCLONE_TIMER
+extern struct timer_opts timer_cyclone;
+#endif
+
#endif



2003-06-30 22:21:18

by john stultz

[permalink] [raw]
Subject: [PATCH SET - 3/3] linux-2.5.73_lost-tick-corner-fix_A0

This patch catches a corner case in the lost-tick compensation code.
There is a check to see if we overflowed between reads of the two time
sources, however should the high res time source be slightly slower then
what we calibrated, its possible to trigger this code when no ticks have
been lost. This patch adds an extra check to insure we have seen more
then one tick before we check for this overflow. This seems to resolve
the remaining "time doubling" issues that I've seen reported. This patch
applies ontop of lost-tick-speedstep-fix_A1.

Please consider for inclusion in your tree.

thanks
-john

timer_cyclone.c | 2 +-
timer_tsc.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)

diff -Nru a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
--- a/arch/i386/kernel/timers/timer_cyclone.c Mon Jun 30 13:53:56 2003
+++ b/arch/i386/kernel/timers/timer_cyclone.c Mon Jun 30 13:53:56 2003
@@ -88,7 +88,7 @@
* between cyclone and pit reads (as noted when
* usec delta is > 90% # of usecs/tick)
*/
- if (abs(delay - delay_at_last_interrupt) > (900000/HZ))
+ if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
jiffies++;
}

diff -Nru a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
--- a/arch/i386/kernel/timers/timer_tsc.c Mon Jun 30 13:53:56 2003
+++ b/arch/i386/kernel/timers/timer_tsc.c Mon Jun 30 13:53:56 2003
@@ -205,7 +205,7 @@
* between tsc and pit reads (as noted when
* usec delta is > 90% # of usecs/tick)
*/
- if (abs(delay - delay_at_last_interrupt) > (900000/HZ))
+ if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
jiffies++;
}




2003-06-30 22:20:53

by john stultz

[permalink] [raw]
Subject: [PATCH SET - 2/3] linux-2.5.73_lost-tick-speedstep-fix_A1

This patch is a modification of the lost-tick-speedstep-fix patch in
2.5.73-mm2.

The patch tries to resolve issues caused by running the TSC based lost
tick compensation code on CPUs that change frequency (speedstep, etc).

Should the CPU be in slow mode when calibrate_tsc() executes, the kernel
will assume we have so many cycles per tick. Later when the cpu speeds
up, the kernel will start noting that too many cycles have past since
the last interrupt. Since this can occasionally happen, the lost tick
compensation code then tries to fix this by incrementing jiffies. Thus
every tick we end up incrementing jiffies many times, causing timers to
expire too quickly and time to rush ahead.

This patch detects when there has been 100 consecutive interrupts where
we had to compensate for lost ticks. If this occurs, we spit out a
warning and fall back to using the PIT as a time source.

I've tested this on my speedstep enabled laptop with success, and others
laptop users seeing this problem have reported it works for them. Also
to ensure we don't fall back to the slower PIT too quickly, I tested the
code on a system I have that looses ~30 ticks about every second and it
can still manage to use the TSC as a good time source.

This solves most of the "time doubling" problems seen on laptops.
Additionally this revision has been modified to use the cleanups made in
rename-timer_A1.

Please consider for inclusion in your tree.

thanks
-john

arch/i386/kernel/timers/timer.c | 9 +++++++++
arch/i386/kernel/timers/timer_tsc.c | 13 ++++++++++++-
include/asm-i386/timer.h | 1 +
3 files changed, 22 insertions(+), 1 deletion(-)

diff -Nru a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c
--- a/arch/i386/kernel/timers/timer.c Mon Jun 30 13:52:43 2003
+++ b/arch/i386/kernel/timers/timer.c Mon Jun 30 13:52:43 2003
@@ -23,6 +23,15 @@
}
__setup("clock=", clock_setup);

+
+/* The chosen timesource has been found to be bad.
+ * Fall back to a known good timesource (the PIT)
+ */
+void clock_fallback(void)
+{
+ cur_timer = &timer_pit;
+}
+
/* iterates through the list of timers, returning the first
* one that initializes successfully.
*/
diff -Nru a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
--- a/arch/i386/kernel/timers/timer_tsc.c Mon Jun 30 13:52:43 2003
+++ b/arch/i386/kernel/timers/timer_tsc.c Mon Jun 30 13:52:43 2003
@@ -124,6 +124,7 @@
int countmp;
static int count1 = 0;
unsigned long long this_offset, last_offset;
+ static int lost_count = 0;

write_lock(&monotonic_lock);
last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
@@ -178,9 +179,19 @@
delta += delay_at_last_interrupt;
lost = delta/(1000000/HZ);
delay = delta%(1000000/HZ);
- if (lost >= 2)
+ if (lost >= 2) {
jiffies += lost-1;

+ /* sanity check to ensure we're not always loosing ticks */
+ if (lost_count++ > 100) {
+ printk(KERN_WARNING "Loosing too many ticks!\n");
+ printk(KERN_WARNING "TSC cannot be used as a timesource."
+ " (Are you running with SpeedStep?)\n");
+ printk(KERN_WARNING "Falling back to a sane timesource.\n");
+ clock_fallback();
+ }
+ } else
+ lost_count = 0;
/* update the monotonic base value */
this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
monotonic_base += cycles_2_ns(this_offset - last_offset);
diff -Nru a/include/asm-i386/timer.h b/include/asm-i386/timer.h
--- a/include/asm-i386/timer.h Mon Jun 30 13:52:43 2003
+++ b/include/asm-i386/timer.h Mon Jun 30 13:52:43 2003
@@ -21,6 +21,7 @@
#define TICK_SIZE (tick_nsec / 1000)

extern struct timer_opts* select_timer(void);
+extern void clock_fallback(void);

/* Modifiers for buggy PIT handling */