2003-03-11 19:33:33

by john stultz

[permalink] [raw]
Subject: [RFC][PATCH] linux-2.5.64_monotonic-clock_A1

All,
Here is the next rev of my monotonic-clock patch. This version uses
scaled math to avoid the costly 64 bit divide at interrupt time. Big
thanks to George Anzinger for the suggestion.

This patch, written with the advice of Joel Becker, addresses a problem
with the hangcheck-timer. The basic problem is that the hangcheck-timer
code (Required for Oracle) needs a accurate hard clock which can be used
to detect OS stalls (due to udelay() or pci bus hangs) that would cause
system time to skew (its sort of a sanity check that insures the
system's notion of time is accurate). However, currently they are using
get_cycles() to fetch the cpu's TSC register, thus this does not work on
systems w/o a synced TSC. As suggested by Andi Kleen (see thread here:
http://www.uwsg.iu.edu/hypermail/linux/kernel/0302.0/1234.html ) I've
worked with Joel and others to implement the monotonic_clock()
interface.

This interface returns a unsigned long long representing the number of
nanoseconds that has passed since time_init().

Future plans to the interface include properly handling cpu_freq changes
and porting to the different arches.

Comments, suggestions and flames welcome.

thanks
-john



Attachments:
signature.asc (232.00 B)
This is a digitally signed message part

2003-03-11 19:34:41

by john stultz

[permalink] [raw]
Subject: Re: [RFC][PATCH] linux-2.5.64_monotonic-clock_A1

<sigh> Patch below.

thanks
-john


diff -Nru a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
--- a/arch/i386/kernel/time.c Tue Mar 11 11:24:04 2003
+++ b/arch/i386/kernel/time.c Tue Mar 11 11:24:04 2003
@@ -138,6 +138,19 @@
clock_was_set();
}

+unsigned long long monotonic_clock(void)
+{
+ unsigned long long ret;
+ unsigned long seq;
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ ret = timer->monotonic_clock();
+ } while (read_seqretry(&xtime_lock, seq));
+ return ret;
+}
+EXPORT_SYMBOL(monotonic_clock);
+
+
/*
* In order to set the CMOS clock precisely, set_rtc_mmss has to be
* called 500 ms after the second nowtime has started, because when
diff -Nru a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
--- a/arch/i386/kernel/timers/timer_cyclone.c Tue Mar 11 11:24:04 2003
+++ b/arch/i386/kernel/timers/timer_cyclone.c Tue Mar 11 11:24:04 2003
@@ -27,19 +27,24 @@
#define CYCLONE_MPMC_OFFSET 0x51D0
#define CYCLONE_MPCS_OFFSET 0x51A8
#define CYCLONE_TIMER_FREQ 100000000
-
+#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /*40 bit mask*/
int use_cyclone = 0;

static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
-static u32 last_cyclone_timer;
+static u32 last_cyclone_low;
+static u32 last_cyclone_high;
+static unsigned long long monotonic_base;

static void mark_offset_cyclone(void)
{
int count;
+ unsigned long long this_offset, last_offset;
+ last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
+
spin_lock(&i8253_lock);
/* quickly read the cyclone timer */
- if(cyclone_timer)
- last_cyclone_timer = cyclone_timer[0];
+ last_cyclone_high = cyclone_timer[1];
+ last_cyclone_low = cyclone_timer[0];

/* calculate delay_at_last_interrupt */
outb_p(0x00, 0x43); /* latch the count ASAP */
@@ -50,6 +55,10 @@

count = ((LATCH-1) - count) * TICK_SIZE;
delay_at_last_interrupt = (count + LATCH/2) / LATCH;
+
+ /* update the monotonic base value */
+ this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
+ monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
}

static unsigned long get_offset_cyclone(void)
@@ -63,7 +72,7 @@
offset = cyclone_timer[0];

/* .. relative to previous jiffy */
- offset = offset - last_cyclone_timer;
+ offset = offset - last_cyclone_low;

/* convert cyclone ticks to microseconds */
/* XXX slow, can we speed this up? */
@@ -73,6 +82,21 @@
return delay_at_last_interrupt + offset;
}

+static unsigned long long monotonic_clock_cyclone(void)
+{
+
+ u32 now_low = cyclone_timer[0];
+ u32 now_high = cyclone_timer[1];
+ unsigned long long last_offset, this_offset;
+ unsigned long long ret;
+ last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
+ this_offset = ((unsigned long long)now_high<<32)|now_low;
+
+ ret = monotonic_base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK);
+ ret = ret * (1000000000 / CYCLONE_TIMER_FREQ);
+ return ret;
+}
+
static int init_cyclone(void)
{
u32* reg;
@@ -190,5 +214,6 @@
.init = init_cyclone,
.mark_offset = mark_offset_cyclone,
.get_offset = get_offset_cyclone,
+ .monotonic_clock = monotonic_clock_cyclone,
.delay = delay_cyclone,
};
diff -Nru a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c
--- a/arch/i386/kernel/timers/timer_none.c Tue Mar 11 11:24:04 2003
+++ b/arch/i386/kernel/timers/timer_none.c Tue Mar 11 11:24:04 2003
@@ -15,6 +15,11 @@
return 0;
}

+static unsigned long long monotonic_clock_none(void)
+{
+ return 0;
+}
+
static void delay_none(unsigned long loops)
{
int d0;
@@ -33,5 +38,6 @@
.init = init_none,
.mark_offset = mark_offset_none,
.get_offset = get_offset_none,
+ .monotonic_clock = monotonic_clock_none,
.delay = delay_none,
};
diff -Nru a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
--- a/arch/i386/kernel/timers/timer_pit.c Tue Mar 11 11:24:04 2003
+++ b/arch/i386/kernel/timers/timer_pit.c Tue Mar 11 11:24:04 2003
@@ -27,6 +27,11 @@
/* nothing needed */
}

+static unsigned long long monotonic_clock_pit(void)
+{
+ return 0;
+}
+
static void delay_pit(unsigned long loops)
{
int d0;
@@ -141,5 +146,6 @@
.init = init_pit,
.mark_offset = mark_offset_pit,
.get_offset = get_offset_pit,
+ .monotonic_clock = monotonic_clock_pit,
.delay = delay_pit,
};
diff -Nru a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
--- a/arch/i386/kernel/timers/timer_tsc.c Tue Mar 11 11:24:04 2003
+++ b/arch/i386/kernel/timers/timer_tsc.c Tue Mar 11 11:24:04 2003
@@ -23,6 +23,38 @@
static int delay_at_last_interrupt;

static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
+static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
+static unsigned long long monotonic_base;
+
+
+/* convert from cycles(64bits) => nanoseconds (64bits)
+ * basic equation:
+ * ns = cycles / (freq / ns_per_sec)
+ * ns = cycles * (ns_per_sec / freq)
+ * ns = cycles * (10^9 / (cpu_mhz * 10^6))
+ * ns = cycles * (10^3 / cpu_mhz)
+ *
+ * Then we use scaling math (suggested by [email protected]) to get:
+ * ns = cycles * (10^3 * SC / cpu_mhz) / SC
+ * ns = cycles * cyc2ns_scale / SC
+ *
+ * And since SC is a constant power of two, we can convert the div
+ * into a shift.
+ * [email protected] "math is hard, lets go shopping!"
+ */
+static unsigned long cyc2ns_scale;
+#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen*/
+
+static inline set_cyc2ns_scale(unsigned long cpu_mhz)
+{
+ cyc2ns_scale = (1000 * (1<<CYC2NS_SCALE_FACTOR))/cpu_mhz;
+}
+
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+ return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+}
+

/* Cached *multiplier* to convert TSC counts to microseconds.
* (see the equation below).
@@ -60,11 +92,25 @@
return delay_at_last_interrupt + edx;
}

+static unsigned long long monotonic_clock_tsc(void)
+{
+ unsigned long long last_offset, this_offset;
+ last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+
+ /* Read the Time Stamp Counter */
+ rdtscll(this_offset);
+
+ /* return the value in ns */
+ return monotonic_base + cycles_2_ns(this_offset - last_offset);
+}
+
static void mark_offset_tsc(void)
{
int count;
int countmp;
static int count1=0, count2=LATCH;
+ unsigned long long this_offset, last_offset;
+ last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
/*
* It is important that these two operations happen almost at
* the same time. We do the RDTSC stuff first, since it's
@@ -79,7 +125,7 @@

/* read Pentium cycle counter */

- rdtscl(last_tsc_low);
+ rdtsc(last_tsc_low, last_tsc_high);

spin_lock(&i8253_lock);
outb_p(0x00, 0x43); /* latch the count ASAP */
@@ -104,6 +150,11 @@

count = ((LATCH-1) - count) * TICK_SIZE;
delay_at_last_interrupt = (count + LATCH/2) / LATCH;
+
+ /* update the monotonic base value */
+ this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
+ monotonic_base += cycles_2_ns(this_offset - last_offset);
+
}

static void delay_tsc(unsigned long loops)
@@ -293,6 +344,7 @@
"0" (eax), "1" (edx));
printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000);
}
+ set_cyc2ns_scale(cpu_khz/1000);
return 0;
}
}
@@ -326,5 +378,6 @@
.init = init_tsc,
.mark_offset = mark_offset_tsc,
.get_offset = get_offset_tsc,
+ .monotonic_clock = monotonic_clock_tsc,
.delay = delay_tsc,
};
diff -Nru a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c
--- a/drivers/char/hangcheck-timer.c Tue Mar 11 11:24:04 2003
+++ b/drivers/char/hangcheck-timer.c Tue Mar 11 11:24:04 2003
@@ -78,11 +78,13 @@
static struct timer_list hangcheck_ticktock =
TIMER_INITIALIZER(hangcheck_fire, 0, 0);

+extern unsigned long long monotonic_clock(void);
+
static void hangcheck_fire(unsigned long data)
{
unsigned long long cur_tsc, tsc_diff;

- cur_tsc = get_cycles();
+ cur_tsc = monotonic_clock();

if (cur_tsc > hangcheck_tsc)
tsc_diff = cur_tsc - hangcheck_tsc;
@@ -98,7 +100,7 @@
}
}
mod_timer(&hangcheck_ticktock, jiffies + (hangcheck_tick*HZ));
- hangcheck_tsc = get_cycles();
+ hangcheck_tsc = monotonic_clock();
}


@@ -108,10 +110,10 @@
VERSION_STR, hangcheck_tick, hangcheck_margin);

hangcheck_tsc_margin = hangcheck_margin + hangcheck_tick;
- hangcheck_tsc_margin *= HZ;
- hangcheck_tsc_margin *= current_cpu_data.loops_per_jiffy;
+ hangcheck_tsc_margin *= 1000000000;
+

- hangcheck_tsc = get_cycles();
+ hangcheck_tsc = monotonic_clock();
mod_timer(&hangcheck_ticktock, jiffies + (hangcheck_tick*HZ));

return 0;
diff -Nru a/include/asm-i386/timer.h b/include/asm-i386/timer.h
--- a/include/asm-i386/timer.h Tue Mar 11 11:24:04 2003
+++ b/include/asm-i386/timer.h Tue Mar 11 11:24:04 2003
@@ -14,6 +14,7 @@
int (*init)(void);
void (*mark_offset)(void);
unsigned long (*get_offset)(void);
+ unsigned long long (*monotonic_clock)(void);
void (*delay)(unsigned long);
};






Attachments:
signature.asc (232.00 B)
This is a digitally signed message part

2003-03-11 21:38:07

by George Anzinger

[permalink] [raw]
Subject: Re: [RFC][PATCH] linux-2.5.64_monotonic-clock_A1

john,

Some comments below on the scaling.

On a related note, I would like to extend the CLOCK_MONOTONIC code to
the same res as CLOCK_REALTIME in the POSIX clocks and timers patch.
The patch uses jiffies_64 for CLOCK_MONOTONIC, so what I would like to
do is use get_offset() to fill in the sub_jiffies part. Is this
function available (i.e. timer->get_offset()) on all archs?

It seems to me that the lost jiffies should be rolled into
get_offset(). Have you considered doing this?

-g

john stultz wrote:
> <sigh> Patch below.
>
> thanks
> -john
>
>
> diff -Nru a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
> --- a/arch/i386/kernel/time.c Tue Mar 11 11:24:04 2003
> +++ b/arch/i386/kernel/time.c Tue Mar 11 11:24:04 2003
> @@ -138,6 +138,19 @@
> clock_was_set();
> }
>
> +unsigned long long monotonic_clock(void)
> +{
> + unsigned long long ret;
> + unsigned long seq;
> + do {
> + seq = read_seqbegin(&xtime_lock);
> + ret = timer->monotonic_clock();
> + } while (read_seqretry(&xtime_lock, seq));
> + return ret;
> +}
> +EXPORT_SYMBOL(monotonic_clock);
> +
> +
> /*
> * In order to set the CMOS clock precisely, set_rtc_mmss has to be
> * called 500 ms after the second nowtime has started, because when
> diff -Nru a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
> --- a/arch/i386/kernel/timers/timer_cyclone.c Tue Mar 11 11:24:04 2003
> +++ b/arch/i386/kernel/timers/timer_cyclone.c Tue Mar 11 11:24:04 2003
> @@ -27,19 +27,24 @@
> #define CYCLONE_MPMC_OFFSET 0x51D0
> #define CYCLONE_MPCS_OFFSET 0x51A8
> #define CYCLONE_TIMER_FREQ 100000000
> -
> +#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /*40 bit mask*/
> int use_cyclone = 0;
>
> static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
> -static u32 last_cyclone_timer;
> +static u32 last_cyclone_low;
> +static u32 last_cyclone_high;
> +static unsigned long long monotonic_base;
>
> static void mark_offset_cyclone(void)
> {
> int count;
> + unsigned long long this_offset, last_offset;
> + last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
> +
> spin_lock(&i8253_lock);
> /* quickly read the cyclone timer */
> - if(cyclone_timer)
> - last_cyclone_timer = cyclone_timer[0];
> + last_cyclone_high = cyclone_timer[1];
> + last_cyclone_low = cyclone_timer[0];
>
> /* calculate delay_at_last_interrupt */
> outb_p(0x00, 0x43); /* latch the count ASAP */
> @@ -50,6 +55,10 @@
>
> count = ((LATCH-1) - count) * TICK_SIZE;
> delay_at_last_interrupt = (count + LATCH/2) / LATCH;
> +
> + /* update the monotonic base value */
> + this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
> + monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
> }
>
> static unsigned long get_offset_cyclone(void)
> @@ -63,7 +72,7 @@
> offset = cyclone_timer[0];
>
> /* .. relative to previous jiffy */
> - offset = offset - last_cyclone_timer;
> + offset = offset - last_cyclone_low;
>
> /* convert cyclone ticks to microseconds */
> /* XXX slow, can we speed this up? */
> @@ -73,6 +82,21 @@
> return delay_at_last_interrupt + offset;
> }
>
> +static unsigned long long monotonic_clock_cyclone(void)
> +{
> +
> + u32 now_low = cyclone_timer[0];
> + u32 now_high = cyclone_timer[1];
> + unsigned long long last_offset, this_offset;
> + unsigned long long ret;
> + last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
> + this_offset = ((unsigned long long)now_high<<32)|now_low;
> +
> + ret = monotonic_base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK);
> + ret = ret * (1000000000 / CYCLONE_TIMER_FREQ);
> + return ret;
> +}
> +
> static int init_cyclone(void)
> {
> u32* reg;
> @@ -190,5 +214,6 @@
> .init = init_cyclone,
> .mark_offset = mark_offset_cyclone,
> .get_offset = get_offset_cyclone,
> + .monotonic_clock = monotonic_clock_cyclone,
> .delay = delay_cyclone,
> };
> diff -Nru a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c
> --- a/arch/i386/kernel/timers/timer_none.c Tue Mar 11 11:24:04 2003
> +++ b/arch/i386/kernel/timers/timer_none.c Tue Mar 11 11:24:04 2003
> @@ -15,6 +15,11 @@
> return 0;
> }
>
> +static unsigned long long monotonic_clock_none(void)
> +{
> + return 0;
> +}
> +
> static void delay_none(unsigned long loops)
> {
> int d0;
> @@ -33,5 +38,6 @@
> .init = init_none,
> .mark_offset = mark_offset_none,
> .get_offset = get_offset_none,
> + .monotonic_clock = monotonic_clock_none,
> .delay = delay_none,
> };
> diff -Nru a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
> --- a/arch/i386/kernel/timers/timer_pit.c Tue Mar 11 11:24:04 2003
> +++ b/arch/i386/kernel/timers/timer_pit.c Tue Mar 11 11:24:04 2003
> @@ -27,6 +27,11 @@
> /* nothing needed */
> }
>
> +static unsigned long long monotonic_clock_pit(void)
> +{
> + return 0;
> +}
> +
> static void delay_pit(unsigned long loops)
> {
> int d0;
> @@ -141,5 +146,6 @@
> .init = init_pit,
> .mark_offset = mark_offset_pit,
> .get_offset = get_offset_pit,
> + .monotonic_clock = monotonic_clock_pit,
> .delay = delay_pit,
> };
> diff -Nru a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
> --- a/arch/i386/kernel/timers/timer_tsc.c Tue Mar 11 11:24:04 2003
> +++ b/arch/i386/kernel/timers/timer_tsc.c Tue Mar 11 11:24:04 2003
> @@ -23,6 +23,38 @@
> static int delay_at_last_interrupt;
>
> static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
> +static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
> +static unsigned long long monotonic_base;
> +
> +
> +/* convert from cycles(64bits) => nanoseconds (64bits)
> + * basic equation:
> + * ns = cycles / (freq / ns_per_sec)
> + * ns = cycles * (ns_per_sec / freq)
> + * ns = cycles * (10^9 / (cpu_mhz * 10^6))
> + * ns = cycles * (10^3 / cpu_mhz)
> + *
> + * Then we use scaling math (suggested by [email protected]) to get:
> + * ns = cycles * (10^3 * SC / cpu_mhz) / SC
> + * ns = cycles * cyc2ns_scale / SC
> + *
> + * And since SC is a constant power of two, we can convert the div
> + * into a shift.
> + * [email protected] "math is hard, lets go shopping!"
> + */
> +static unsigned long cyc2ns_scale;
> +#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen*/
> +
> +static inline set_cyc2ns_scale(unsigned long cpu_mhz)
> +{
> + cyc2ns_scale = (1000 * (1<<CYC2NS_SCALE_FACTOR))/cpu_mhz;
The function would be:
div_sc_n(const N, unsigned long a, unsigned long b);
returns (a << N) / b

The only advantage to this would be the ability to handle a u64 as a
result of the (a<<b) (assumes the div moves all significant bits to a
u32).
+
> +}
> +
> +static inline unsigned long long cycles_2_ns(unsigned long long cyc)
> +{
> + return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;

I think this is a problem. cyc is u64 so you are doing a 64 bit mpy
which C will do by draging in the 64-bit math lib, something we don't
want in the kernel.

Since you want u64=u64*u32 the best the package can do is:
mpy_ll_X_l_ll(unsigned long long mpy1, unsigned long mpy2);
returns (unsigned long long)(mpy1 * mpy2)

This requires two mpy in the best of cases. The actual code leans on
the mpy_l_X_l_ll() which does u64=u32*u32. Here is what it looks like
in sc_math.h:
mpy_ll_X_l_ll(unsigned long long mpy1, unsigned long mpy2)
{
unsigned long long result = mpy_l_X_l_ll((unsigned long)mpy1, mpy2);
result += (mpy_l_X_l_ll((long)(mpy1 >> 32), mpy2) << 32);
return result;
}



> +}
> +
>
> /* Cached *multiplier* to convert TSC counts to microseconds.
> * (see the equation below).
> @@ -60,11 +92,25 @@
> return delay_at_last_interrupt + edx;
> }
>
> +static unsigned long long monotonic_clock_tsc(void)
> +{
> + unsigned long long last_offset, this_offset;
> + last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
> +
> + /* Read the Time Stamp Counter */
> + rdtscll(this_offset);
> +
> + /* return the value in ns */
> + return monotonic_base + cycles_2_ns(this_offset - last_offset);
> +}
> +
> static void mark_offset_tsc(void)
> {
> int count;
> int countmp;
> static int count1=0, count2=LATCH;
> + unsigned long long this_offset, last_offset;
> + last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
> /*
> * It is important that these two operations happen almost at
> * the same time. We do the RDTSC stuff first, since it's
> @@ -79,7 +125,7 @@
>
> /* read Pentium cycle counter */
>
> - rdtscl(last_tsc_low);
> + rdtsc(last_tsc_low, last_tsc_high);
>
> spin_lock(&i8253_lock);
> outb_p(0x00, 0x43); /* latch the count ASAP */
> @@ -104,6 +150,11 @@
>
> count = ((LATCH-1) - count) * TICK_SIZE;
> delay_at_last_interrupt = (count + LATCH/2) / LATCH;
> +
> + /* update the monotonic base value */
> + this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
> + monotonic_base += cycles_2_ns(this_offset - last_offset);
> +
> }
>
> static void delay_tsc(unsigned long loops)
> @@ -293,6 +344,7 @@
> "0" (eax), "1" (edx));
> printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000);
> }
> + set_cyc2ns_scale(cpu_khz/1000);
> return 0;
> }
> }
> @@ -326,5 +378,6 @@
> .init = init_tsc,
> .mark_offset = mark_offset_tsc,
> .get_offset = get_offset_tsc,
> + .monotonic_clock = monotonic_clock_tsc,
> .delay = delay_tsc,
> };
> diff -Nru a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c
> --- a/drivers/char/hangcheck-timer.c Tue Mar 11 11:24:04 2003
> +++ b/drivers/char/hangcheck-timer.c Tue Mar 11 11:24:04 2003
> @@ -78,11 +78,13 @@
> static struct timer_list hangcheck_ticktock =
> TIMER_INITIALIZER(hangcheck_fire, 0, 0);
>
> +extern unsigned long long monotonic_clock(void);
> +
> static void hangcheck_fire(unsigned long data)
> {
> unsigned long long cur_tsc, tsc_diff;
>
> - cur_tsc = get_cycles();
> + cur_tsc = monotonic_clock();
>
> if (cur_tsc > hangcheck_tsc)
> tsc_diff = cur_tsc - hangcheck_tsc;
> @@ -98,7 +100,7 @@
> }
> }
> mod_timer(&hangcheck_ticktock, jiffies + (hangcheck_tick*HZ));
> - hangcheck_tsc = get_cycles();
> + hangcheck_tsc = monotonic_clock();
> }
>
>
> @@ -108,10 +110,10 @@
> VERSION_STR, hangcheck_tick, hangcheck_margin);
>
> hangcheck_tsc_margin = hangcheck_margin + hangcheck_tick;
> - hangcheck_tsc_margin *= HZ;
> - hangcheck_tsc_margin *= current_cpu_data.loops_per_jiffy;
> + hangcheck_tsc_margin *= 1000000000;
> +
>
> - hangcheck_tsc = get_cycles();
> + hangcheck_tsc = monotonic_clock();
> mod_timer(&hangcheck_ticktock, jiffies + (hangcheck_tick*HZ));
>
> return 0;
> diff -Nru a/include/asm-i386/timer.h b/include/asm-i386/timer.h
> --- a/include/asm-i386/timer.h Tue Mar 11 11:24:04 2003
> +++ b/include/asm-i386/timer.h Tue Mar 11 11:24:04 2003
> @@ -14,6 +14,7 @@
> int (*init)(void);
> void (*mark_offset)(void);
> unsigned long (*get_offset)(void);
> + unsigned long long (*monotonic_clock)(void);
> void (*delay)(unsigned long);
> };
>
>
>
>
>

--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml

2003-03-11 21:51:57

by john stultz

[permalink] [raw]
Subject: Re: [RFC][PATCH] linux-2.5.64_monotonic-clock_A1

On Tue, 2003-03-11 at 13:47, george anzinger wrote:
> Some comments below on the scaling.

Thanks, I'll try to digest your comments and get back to you.


> On a related note, I would like to extend the CLOCK_MONOTONIC code to
> the same res as CLOCK_REALTIME in the POSIX clocks and timers patch.
> The patch uses jiffies_64 for CLOCK_MONOTONIC, so what I would like to
> do is use get_offset() to fill in the sub_jiffies part. Is this
> function available (i.e. timer->get_offset()) on all archs?


Nope, the timer_opts structure is i386 only. Further, the need for the
monotonic_clock() interface is because timer->get_offset() only returns
32bits of information, which on a 2Ghz cpu is only ~2 seconds worth of
time. We need multiple minutes worth of time to be returned, thus the 64
bit return of monotonic_clock.

I considered making get_offset() return a 64bit value, but worried that
the cost of the 64bit math would hurt gettimeofday too much to be worth
it. So rather then complicate a heavily used function to handle a very
rare case, we decided to implement a new interface that doesn't need to
be as fast as gettimeofday, but can handle long periods of time w/o
interrupts.


> It seems to me that the lost jiffies should be rolled into
> get_offset(). Have you considered doing this?

I'm not sure I'm following this? get_offset returns the amount of time
since mark_offset() was called(last interrupt). The lost-jiffies
compensation code I added uses get_offset() to detect how many jiffies
should have passed. How do you suggest rolling it into get_offset?

thanks
-john



Attachments:
signature.asc (232.00 B)
This is a digitally signed message part

2003-03-11 22:29:53

by George Anzinger

[permalink] [raw]
Subject: Re: [RFC][PATCH] linux-2.5.64_monotonic-clock_A1

john stultz wrote:
> On Tue, 2003-03-11 at 13:47, george anzinger wrote:
>
>>Some comments below on the scaling.
>
>
> Thanks, I'll try to digest your comments and get back to you.
>
>
>
>>On a related note, I would like to extend the CLOCK_MONOTONIC code to
>>the same res as CLOCK_REALTIME in the POSIX clocks and timers patch.
>>The patch uses jiffies_64 for CLOCK_MONOTONIC, so what I would like to
>>do is use get_offset() to fill in the sub_jiffies part. Is this
>>function available (i.e. timer->get_offset()) on all archs?
>
>
>
> Nope, the timer_opts structure is i386 only. Further, the need for the
> monotonic_clock() interface is because timer->get_offset() only returns
> 32bits of information, which on a 2Ghz cpu is only ~2 seconds worth of
> time. We need multiple minutes worth of time to be returned, thus the 64
> bit return of monotonic_clock.
>
> I considered making get_offset() return a 64bit value, but worried that
> the cost of the 64bit math would hurt gettimeofday too much to be worth
> it. So rather then complicate a heavily used function to handle a very
> rare case, we decided to implement a new interface that doesn't need to
> be as fast as gettimeofday, but can handle long periods of time w/o
> interrupts.
>
>
>
>>It seems to me that the lost jiffies should be rolled into
>>get_offset(). Have you considered doing this?
>
>
> I'm not sure I'm following this? get_offset returns the amount of time
> since mark_offset() was called(last interrupt). The lost-jiffies
> compensation code I added uses get_offset() to detect how many jiffies
> should have passed. How do you suggest rolling it into get_offset?

I must have confused you. I am woking on a get time of day sort of
thing. In time.c, the gettimeofday code calls get_offset() and then
adds in lost ticks (ticks clocked by the PIT interrupt but not yet
rolled into the wall clock (xtime). I was thinking that get_offset
might be defined to add this its result.

But, back to the problem I am trying to solve. The posixtimers code
is in the common kernel and needs the result returned by get_offset
OR, we could define a new function, get_monotonictimeofday(), which
returns the jiffies since boot + get_offset() + pending ticks (i.e. it
would be the same as gettimeofday except it would use jiffies_64
instead of xtime to get its result. The format would be a timespec,
i.e. the same as xtime.

This translates directly into a system call and is also used in the
timers code to convert from wall clock time to jiffies time for timers.

Either way, we have a bit of a mess due to the arch dependency. I
don't really care which way it goes, but I do think it should be
resolved in 2.5.

thanks
--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml

2003-03-11 22:52:08

by john stultz

[permalink] [raw]
Subject: Re: [RFC][PATCH] linux-2.5.64_monotonic-clock_A1

On Tue, 2003-03-11 at 14:39, george anzinger wrote:

> I must have confused you. I am woking on a get time of day sort of
> thing. In time.c, the gettimeofday code calls get_offset() and then
> adds in lost ticks (ticks clocked by the PIT interrupt but not yet
> rolled into the wall clock (xtime). I was thinking that get_offset
> might be defined to add this its result.

I'm still not quite following that. But as long as we're both pointing
at the same code and grunting in agreement I think I'll just let it
slide ;)


> But, back to the problem I am trying to solve. The posixtimers code
> is in the common kernel and needs the result returned by get_offset
> OR, we could define a new function, get_monotonictimeofday(), which
> returns the jiffies since boot + get_offset() + pending ticks (i.e. it
> would be the same as gettimeofday except it would use jiffies_64
> instead of xtime to get its result. The format would be a timespec,
> i.e. the same as xtime.


Actually, what is the difference between the call you're trying to
implement and monotonic_clock() (outside of the timespec return)? Could
you point me to the specific code you are describing? It sounds like
we're working on basically the same solution from two different angles.


> This translates directly into a system call and is also used in the
> timers code to convert from wall clock time to jiffies time for timers.
>
> Either way, we have a bit of a mess due to the arch dependency. I
> don't really care which way it goes, but I do think it should be
> resolved in 2.5.

Well, if the generic interfaces aren't providing what you need, then a
new interface needs to be considered. This is precisely what the
hangcheck-timer code ran into, and is why we're working on this
monotonic_clock() code (which is intended be arch independent in the
future).

thanks
-john


Attachments:
signature.asc (232.00 B)
This is a digitally signed message part

2003-03-11 23:35:01

by George Anzinger

[permalink] [raw]
Subject: Re: [RFC][PATCH] linux-2.5.64_monotonic-clock_A1

john stultz wrote:
> On Tue, 2003-03-11 at 14:39, george anzinger wrote:
>
>
>>I must have confused you. I am woking on a get time of day sort of
>>thing. In time.c, the gettimeofday code calls get_offset() and then
>>adds in lost ticks (ticks clocked by the PIT interrupt but not yet
>>rolled into the wall clock (xtime). I was thinking that get_offset
>>might be defined to add this its result.
>
>
> I'm still not quite following that. But as long as we're both pointing
> at the same code and grunting in agreement I think I'll just let it
> slide ;)
>
>
>
>>But, back to the problem I am trying to solve. The posixtimers code
>>is in the common kernel and needs the result returned by get_offset
>>OR, we could define a new function, get_monotonictimeofday(), which
>>returns the jiffies since boot + get_offset() + pending ticks (i.e. it
>>would be the same as gettimeofday except it would use jiffies_64
>>instead of xtime to get its result. The format would be a timespec,
>>i.e. the same as xtime.
>
>
>
> Actually, what is the difference between the call you're trying to
> implement and monotonic_clock() (outside of the timespec return)? Could
> you point me to the specific code you are describing? It sounds like
> we're working on basically the same solution from two different angles.

The code is in .../kernel/posix-timers.c look for:
do_posix_clock_monotonic_gettime(struct timespec *tp)

It currently just converts jiffies_64, but it needs to add the sub
jiffie get_offset() to do the right thing.

As to the difference, my function returns time to the user and is used
to set up timers and even clock_nanosleep. It must be something that
ticks at the same rate at the wall clock. It is not clear that a full
TSC clock does this, i.e. I suspect (nay, I KNOW) there is drift
between the two.
>
>
>
>>This translates directly into a system call and is also used in the
>>timers code to convert from wall clock time to jiffies time for timers.
>>
>>Either way, we have a bit of a mess due to the arch dependency. I
>>don't really care which way it goes, but I do think it should be
>>resolved in 2.5.
>
>
> Well, if the generic interfaces aren't providing what you need, then a
> new interface needs to be considered. This is precisely what the
> hangcheck-timer code ran into, and is why we're working on this
> monotonic_clock() code (which is intended be arch independent in the
> future).
>
Right. As is the above. It is already in the common kernel. We do
need the get_offset() extension, however.

--
George Anzinger [email protected]
High-res-timers: http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml

2003-03-12 02:46:43

by Jim Houston

[permalink] [raw]
Subject: Re: [RFC][PATCH] linux-2.5.64_monotonic-clock_A1

Hi John,

I like the idea of making the monotonic clock use the same
mechanism as the normal timeofday clock.

When I was doing my alternative Posix timers patch, I modified
your get_offset() mechanism to return nanoseconds and added a
"struct time_spec ytime" which was updated in the same place as
xtime but was never set.

You might have a look at my patch archived here:
http://marc.theaimsgroup.com/?l=linux-kernel&m=104006731324824&w=2

Look for the function do_gettime_sinceboot_ns(). You don't have to
keep the monotonic time in cycles. It
would be nice if the timeofday clock was defined as the monotonic
clock + an offset.

Also, if I had one of those cyclone counters, I would never look at
the PIT again.

Jim Houston - Concurrent Computer Corp.