Joel, All,
This patch moves the get_cycles() implementation into the timer_opts
subsystem. This patch corrects issues between the hangcheck-timer code
and systems running with timer_cyclone. As an extra bonus, it removes
the CONFIG_X86_TSC #ifdef in get_cycles replacing it with
timer->get_cycles().
Comments flames and suggestions welcome.
thanks
-john
diff -Nru a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
--- a/arch/i386/kernel/timers/timer_cyclone.c Fri Feb 7 12:18:21 2003
+++ b/arch/i386/kernel/timers/timer_cyclone.c Fri Feb 7 12:18:21 2003
@@ -161,6 +161,16 @@
now = cyclone_timer[0];
} while ((now-bclock) < loops);
}
+
+static unsigned long long get_cycles_cyclone(void)
+{
+ unsigned long long ret;
+ ret = cyclone_timer[1]; /* store high 8 bits */
+ ret = (ret & 0xFF) << 32; /* mask and shift them up */
+ ret = ret | cyclone_timer[0]; /* store lower 32 bits */
+ return ret;
+}
+
/************************************************************/
/* cyclone timer_opts struct */
@@ -169,4 +179,5 @@
.mark_offset = mark_offset_cyclone,
.get_offset = get_offset_cyclone,
.delay = delay_cyclone,
+ .get_cycles = get_cycles_cyclone,
};
diff -Nru a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c
--- a/arch/i386/kernel/timers/timer_none.c Fri Feb 7 12:18:21 2003
+++ b/arch/i386/kernel/timers/timer_none.c Fri Feb 7 12:18:21 2003
@@ -28,10 +28,16 @@
:"0" (loops));
}
+static unsigned long long get_cycles_none(void)
+{
+ return 0ULL;
+}
+
/* tsc timer_opts struct */
struct timer_opts timer_none = {
.init = init_none,
.mark_offset = mark_offset_none,
.get_offset = get_offset_none,
.delay = delay_none,
+ .get_cycles = get_cycles_none,
};
diff -Nru a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
--- a/arch/i386/kernel/timers/timer_pit.c Fri Feb 7 12:18:21 2003
+++ b/arch/i386/kernel/timers/timer_pit.c Fri Feb 7 12:18:21 2003
@@ -135,6 +135,11 @@
return count;
}
+static unsigned long long get_cycles_pit(void)
+{
+ return 0ULL;
+}
+
/* tsc timer_opts struct */
struct timer_opts timer_pit = {
@@ -142,4 +147,5 @@
.mark_offset = mark_offset_pit,
.get_offset = get_offset_pit,
.delay = delay_pit,
+ .get_cycles = get_cycles_pit,
};
diff -Nru a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
--- a/arch/i386/kernel/timers/timer_tsc.c Fri Feb 7 12:18:21 2003
+++ b/arch/i386/kernel/timers/timer_tsc.c Fri Feb 7 12:18:21 2003
@@ -118,6 +118,13 @@
} while ((now-bclock) < loops);
}
+static unsigned long long get_cycles_tsc(void)
+{
+ unsigned long long ret;
+ rdtscll(ret);
+ return ret;
+}
+
/* ------ Calibrate the TSC -------
* Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
* Too much 64-bit arithmetic here to do this cleanly in C, and for
@@ -319,4 +326,5 @@
.mark_offset = mark_offset_tsc,
.get_offset = get_offset_tsc,
.delay = delay_tsc,
+ .get_cycles = get_cycles_tsc,
};
diff -Nru a/include/asm-i386/timer.h b/include/asm-i386/timer.h
--- a/include/asm-i386/timer.h Fri Feb 7 12:18:21 2003
+++ b/include/asm-i386/timer.h Fri Feb 7 12:18:21 2003
@@ -15,6 +15,7 @@
void (*mark_offset)(void);
unsigned long (*get_offset)(void);
void (*delay)(unsigned long);
+ unsigned long long (*get_cycles)(void);
};
#define TICK_SIZE (tick_nsec / 1000)
diff -Nru a/include/asm-i386/timex.h b/include/asm-i386/timex.h
--- a/include/asm-i386/timex.h Fri Feb 7 12:18:21 2003
+++ b/include/asm-i386/timex.h Fri Feb 7 12:18:21 2003
@@ -7,7 +7,7 @@
#define _ASMi386_TIMEX_H
#include <linux/config.h>
-#include <asm/msr.h>
+#include <asm/timer.h>
#ifdef CONFIG_MELAN
# define CLOCK_TICK_RATE 1189200 /* AMD Elan has different frequency! */
@@ -37,17 +37,10 @@
typedef unsigned long long cycles_t;
extern cycles_t cacheflush_time;
-
+extern struct timer_opts* timer;
static inline cycles_t get_cycles (void)
{
-#ifndef CONFIG_X86_TSC
- return 0;
-#else
- unsigned long long ret;
-
- rdtscll(ret);
- return ret;
-#endif
+ return timer->get_cycles();
}
extern unsigned long cpu_khz;
john stultz <[email protected]> writes:
> Joel, All,
>
> This patch moves the get_cycles() implementation into the timer_opts
> subsystem. This patch corrects issues between the hangcheck-timer code
> and systems running with timer_cyclone. As an extra bonus, it removes
> the CONFIG_X86_TSC #ifdef in get_cycles replacing it with
> timer->get_cycles().
>
> Comments flames and suggestions welcome.
Is this really a good idea? get_cycles is normally not used to get accurate
time, but just to get random numbers for /dev/random or quickly estimate
some code (scheduler used to use it for that). I think the TSC even when
unsynchronized is better for that than an external timer which potentially
lower resolution and long access time.
If you really added it I would at least change the random device to
use the old macro.
-Andi
On Fri, 2003-02-07 at 13:19, Andi Kleen wrote:
> john stultz <[email protected]> writes:
> > This patch moves the get_cycles() implementation into the timer_opts
> > subsystem. This patch corrects issues between the hangcheck-timer code
> > and systems running with timer_cyclone. As an extra bonus, it removes
> > the CONFIG_X86_TSC #ifdef in get_cycles replacing it with
> > timer->get_cycles().
>
> Is this really a good idea? get_cycles is normally not used to get accurate
> time, but just to get random numbers for /dev/random or quickly estimate
> some code (scheduler used to use it for that). I think the TSC even when
> unsynchronized is better for that than an external timer which potentially
> lower resolution and long access time.
>
> If you really added it I would at least change the random device to
> use the old macro.
I'm sorry, I'm not seeing get_cycles used in /dev/random (although they
do make a direct call to rdtsc if its available - which is fine, since
the TSCs I deal with daily just give random values anyway :). So I don't
believe that is a concern.
Additionally, the hangcheck timer code does calculate time (although,
not system time) using get_cycles, and this gives them a good
abstraction so the right thing happens on the right box.
Let me know if you have any other concerns.
thanks
-john
> I'm sorry, I'm not seeing get_cycles used in /dev/random (although they
> do make a direct call to rdtsc if its available - which is fine, since
Hmm, perhaps it was only a proposed patch that wasn't merged.
Anyways my point stands - get_cycles should be only used when no
wall time is needed, so I see no reason to complicate it and slow
it down with external timers.
-Andi
On Fri, 2003-02-07 at 16:18, Andi Kleen wrote:
> Anyways my point stands - get_cycles should be only used when no
> wall time is needed, so I see no reason to complicate it and slow
> it down with external timers.
Because it (or some similar form) is needed. The hangcheck timer code
needs a method for reading a hard clock.
I've talked it over with Joel and it became obvious that there was no
efficient and clean way to get do_gettimeofday to appropriately handle
the situation where we loose 60 seconds worth of ticks. Thus Joel needs
raw access to a hardware clock, so he used get_cycles() and
loops_per_jiffy to calculate a time interval.
However this doesn't work on systems w/o a synced TSC, so by simply
making get_cycles() indirect to the TSC or cyclone implementation allows
it to do the right thing. It only minorly (extra pointer dereference and
call) affects non summit based boxes and returns (albeit slowly)
non-random values on summit based boxes.
Now maybe if you'd really rather not change get_cycles, hangcheck_timer
could instead use a different interface to do the same indirection.
Would you have any gripes with that?
thanks for the feedback.
-john
> However this doesn't work on systems w/o a synced TSC, so by simply
Why not? This shouldn't be performance critical and you can make
it monotonous with an additional variable + lock if backwards jumps
should be a problem.
Also the variations between non synced TSCs should be far below
any watchdog's radar screen.
-Andi
>> However this doesn't work on systems w/o a synced TSC, so by simply
>
> Why not? This shouldn't be performance critical and you can make
> it monotonous with an additional variable + lock if backwards jumps
> should be a problem.
>
> Also the variations between non synced TSCs should be far below
> any watchdog's radar screen.
Not true. They'll drift further and further apart over time.
Even a 0.01% crystal difference will eventually kill you.
And if that isn't bad enough think about what happens when I run 180 MHz
processors in one node, and 900MHz in another.
You really can't make any assumptions about TSC sync on boxes where
they're not synced.
M.
On Fri, 2003-02-07 at 17:52, Andi Kleen wrote:
> > However this doesn't work on systems w/o a synced TSC, so by simply
>
> Why not? This shouldn't be performance critical and you can make
> it monotonous with an additional variable + lock if backwards jumps
> should be a problem.
>
That sounds horrible! The extra locking and variable reading is going to
kill most of the performance concerns you have about reading an
alternate time source.
I'm not sure I understand your resistance to using an alternate clock
for get_cycles(). Could you better explain your problem with it?
thanks
-john
On Fri, Feb 07, 2003 at 06:14:43PM -0800, john stultz wrote:
> On Fri, 2003-02-07 at 17:52, Andi Kleen wrote:
> > > However this doesn't work on systems w/o a synced TSC, so by simply
> >
> > Why not? This shouldn't be performance critical and you can make
> > it monotonous with an additional variable + lock if backwards jumps
> > should be a problem.
> >
>
> That sounds horrible! The extra locking and variable reading is going to
> kill most of the performance concerns you have about reading an
> alternate time source.
>
> I'm not sure I understand your resistance to using an alternate clock
> for get_cycles(). Could you better explain your problem with it?
I want to keep get_cycles() as a very fast primitive useful for benchmarking
etc. and the random device. Accessing the southbridge would make it magnitudes
slower.
Regarding the watchdog: what it basically wants is a POSIX
CLOCK_MONOTONIC clock. This isn't currently implemented by Linux,
but I expect it will be eventually because it's really useful for a lot
of applications who just need an increasing time stamp in user space,
and who do not want to fight ntpd for this. One example for such
an application is the X server who needs this for its internal
event sequencing.
Implementing it based on the current time infrastructure is very easy -
you just do not add xtime and wall jiffies in, but only jiffies.
I don't think doing any special hacks and complicating get_cycles()
for it is the right way. Just implement a new monotonic clock primitive
(and eventually export it to user space too)
-Andi
On Fri, 2003-02-07 at 18:29, Andi Kleen wrote:
> On Fri, Feb 07, 2003 at 06:14:43PM -0800, john stultz wrote:
> > I'm not sure I understand your resistance to using an alternate clock
> > for get_cycles(). Could you better explain your problem with it?
>
> I want to keep get_cycles() as a very fast primitive useful for benchmarking
> etc. and the random device. Accessing the southbridge would make it magnitudes
> slower.
Ok, fair enough. But maybe a comment needs to be added to the get_cycles
to notify folks of the issues that surround it on systems without a
synced TSC?
> Regarding the watchdog: what it basically wants is a POSIX
> CLOCK_MONOTONIC clock. This isn't currently implemented by Linux,
> but I expect it will be eventually because it's really useful for a lot
> of applications who just need an increasing time stamp in user space,
> and who do not want to fight ntpd for this. One example for such
> an application is the X server who needs this for its internal
> event sequencing.
>
> Implementing it based on the current time infrastructure is very easy -
> you just do not add xtime and wall jiffies in, but only jiffies.
Actually, we can't do this, because if interrupts are being blocked
jiffies won't change. That is why I'm trying to provide hardware clock
access to hangcheck. It just so happened it was using get_cycles() as an
interface and it seemed appropriate for moving to timer_ops.
> I don't think doing any special hacks and complicating get_cycles()
> for it is the right way. Just implement a new monotonic clock primitive
> (and eventually export it to user space too)
Ok, so you're telling me to use another interface. I'm down with that
(sometimes I just need it spelled out). Name suggestions?
read_hw_timer?
thanks
-john
On Fri, Feb 07, 2003 at 06:42:21PM -0800, john stultz wrote:
> > of applications who just need an increasing time stamp in user space,
> > and who do not want to fight ntpd for this. One example for such
> > an application is the X server who needs this for its internal
> > event sequencing.
> >
> > Implementing it based on the current time infrastructure is very easy -
> > you just do not add xtime and wall jiffies in, but only jiffies.
>
> Actually, we can't do this, because if interrupts are being blocked
> jiffies won't change. That is why I'm trying to provide hardware clock
> access to hangcheck. It just so happened it was using get_cycles() as an
> interface and it seemed appropriate for moving to timer_ops.
monotonic clock can use the hw clock too. It just shouldn't use anything
manipulated by settimeofday() et.al.
>
> > I don't think doing any special hacks and complicating get_cycles()
> > for it is the right way. Just implement a new monotonic clock primitive
> > (and eventually export it to user space too)
>
> Ok, so you're telling me to use another interface. I'm down with that
> (sometimes I just need it spelled out). Name suggestions?
> read_hw_timer?
monotonic_clock()
-Andi
On Sat, 8 Feb 2003, Andi Kleen wrote:
> Regarding the watchdog: what it basically wants is a POSIX
> CLOCK_MONOTONIC clock. This isn't currently implemented by Linux,
> but I expect it will be eventually because it's really useful for a lot
> of applications who just need an increasing time stamp in user space,
> and who do not want to fight ntpd for this. One example for such
> an application is the X server who needs this for its internal
> event sequencing.
>
> Implementing it based on the current time infrastructure is very easy -
> you just do not add xtime and wall jiffies in, but only jiffies.
>
> I don't think doing any special hacks and complicating get_cycles()
> for it is the right way. Just implement a new monotonic clock primitive
> (and eventually export it to user space too)
That seems to be the right way to go, rather than slow get_cycles() have a
separate functionality which does what you need. Didn't know about
CLOCK_MONOTONIC by that name, but I agree it's useful in various places.
--
bill davidsen <[email protected]>
CTO, TMR Associates, Inc
Doing interesting things with little computers since 1979.