2005-09-30 00:42:23

by john stultz

[permalink] [raw]
Subject: [RFC][PATCH 1/11] Reduced NTP rework (part 1)

All,
With Roman's suggestions, I've been working on reducing the footprint
of my timeofday patches. This is the first of two patches that reworks
some of the interrupt time NTP adjustments so that it could be re-used
with the timeofday patches. The motivation of the change is to logically
separate the code which adjusts xtime and the code that decides, based
on the NTP state variables, how much per tick to adjust xtime.

Thus this patch should not affect the existing behavior, but just
separate the logical functionality so it can be re-used.

This patch applies on top of my ntp-shift-right patch I posted awhile
back.

thanks
-john

timer.c | 88 ++++++++++++++++++++++++++++++++++++++++------------------------
1 files changed, 55 insertions(+), 33 deletions(-)

linux-2.6.14-rc2_timeofday-ntp-part1_B6.patch
============================================
diff --git a/kernel/timer.c b/kernel/timer.c
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -620,6 +620,7 @@ static long time_adj; /* tick adjust (
long time_reftime; /* time at last adjustment (s) */
long time_adjust;
long time_next_adjust;
+long time_adjust_step; /* per tick time_adjust step */

/*
* this routine handles the overflow of the microsecond field
@@ -742,46 +743,46 @@ static void second_overflow(void)
#endif
}

-/* in the NTP reference this is called "hardclock()" */
-static void update_wall_time_one_tick(void)
+
+static void ntp_advance(unsigned long interval_ns)
{
- long time_adjust_step, delta_nsec;
+ static unsigned long interval_sum;

- if ( (time_adjust_step = time_adjust) != 0 ) {
- /* We are doing an adjtime thing.
- *
- * Prepare time_adjust_step to be within bounds.
- * Note that a positive time_adjust means we want the clock
- * to run faster.
- *
- * Limit the amount of the step to be in the range
- * -tickadj .. +tickadj
- */
- time_adjust_step = min(time_adjust_step, (long)tickadj);
- time_adjust_step = max(time_adjust_step, (long)-tickadj);
+ /* increment the interval sum */
+ interval_sum += interval_ns;

- /* Reduce by this step the amount of time left */
- time_adjust -= time_adjust_step;
- }
- delta_nsec = tick_nsec + time_adjust_step * 1000;
- /*
- * Advance the phase, once it gets to one microsecond, then
- * advance the tick more.
- */
- time_phase += time_adj;
- if ((time_phase >= FINENSEC) || (time_phase <= -FINENSEC)) {
- long ltemp = shift_right(time_phase, (SHIFT_SCALE - 10));
- time_phase -= ltemp << (SHIFT_SCALE - 10);
- delta_nsec += ltemp;
+ /* calculate the per tick singleshot adjtime adjustment step */
+ while (interval_ns >= tick_nsec) {
+ time_adjust_step = time_adjust;
+ if (time_adjust_step) {
+ /* We are doing an adjtime thing.
+ *
+ * Prepare time_adjust_step to be within bounds.
+ * Note that a positive time_adjust means we want the clock
+ * to run faster.
+ *
+ * Limit the amount of the step to be in the range
+ * -tickadj .. +tickadj
+ */
+ time_adjust_step = min(time_adjust_step, (long)tickadj);
+ time_adjust_step = max(time_adjust_step, (long)-tickadj);
+
+ /* Reduce by this step the amount of time left */
+ time_adjust -= time_adjust_step;
+ }
+ interval_ns -= tick_nsec;
}
- xtime.tv_nsec += delta_nsec;
- time_interpolator_update(delta_nsec);

/* Changes by adjtime() do not take effect till next tick. */
- if (time_next_adjust != 0) {
+ if (time_next_adjust) {
time_adjust = time_next_adjust;
time_next_adjust = 0;
}
+
+ while (interval_sum > NSEC_PER_SEC) {
+ interval_sum -= NSEC_PER_SEC;
+ second_overflow();
+ }
}

/*
@@ -793,14 +794,35 @@ static void update_wall_time_one_tick(vo
*/
static void update_wall_time(unsigned long ticks)
{
+ long delta_nsec;
+
do {
ticks--;
- update_wall_time_one_tick();
+
+ /* Calculate the nsec delta using the
+ * precomputed NTP adjustments:
+ * tick_nsec, time_adjust_step, time_adj
+ */
+ delta_nsec = tick_nsec + time_adjust_step * 1000;
+ /*
+ * Advance the phase, once it gets to one microsecond, then
+ * advance the tick more.
+ */
+ time_phase += time_adj;
+ if ((time_phase >= FINENSEC) || (time_phase <= -FINENSEC)) {
+ long ltemp = shift_right(time_phase, (SHIFT_SCALE - 10));
+ time_phase -= ltemp << (SHIFT_SCALE - 10);
+ delta_nsec += ltemp;
+ }
+
+ xtime.tv_nsec += delta_nsec;
if (xtime.tv_nsec >= 1000000000) {
xtime.tv_nsec -= 1000000000;
xtime.tv_sec++;
- second_overflow();
}
+ ntp_advance(tick_nsec);
+ time_interpolator_update(delta_nsec);
+
} while (ticks);
}




2005-09-30 00:44:24

by john stultz

[permalink] [raw]
Subject: [RFC][PATCH 2/11] Reduced NTP rework (part 2)

All,
Here is the second of two patches which try to minimize my ntp rework
patches.

This patch further changes the interrupt time NTP code, breaking out the
leapsecond processing and introduces an accessor to a shifted ppm
adjustment value. For correctness, I've also introduced a new lock, the
ntp_lock, which protects the NTP state machine when accessing it from my
timekeeping code (which does not use the xtime_lock).

Again, this patch should not affect the existing behavior, but just
separate the logical functionality so it can be re-used by my timeofday
patches.

thanks
-john

include/linux/timex.h | 24 ++++++
kernel/time.c | 25 ++++---
kernel/timer.c | 175 ++++++++++++++++++++++++++++++++++----------------
3 files changed, 159 insertions(+), 65 deletions(-)

linux-2.6.14-rc2_timeofday-ntp-part2_B6.patch
============================================
diff --git a/include/linux/timex.h b/include/linux/timex.h
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -260,6 +260,7 @@ extern long pps_calcnt; /* calibration
extern long pps_errcnt; /* calibration errors */
extern long pps_stbcnt; /* stability limit exceeded */

+extern seqlock_t ntp_lock;
/**
* ntp_clear - Clears the NTP state variables
*
@@ -267,10 +268,14 @@ extern long pps_stbcnt; /* stability li
*/
static inline void ntp_clear(void)
{
+ unsigned long flags;
+ write_seqlock_irqsave(&ntp_lock, flags);
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
+ write_sequnlock_irqrestore(&ntp_lock, flags);
+
}

/**
@@ -282,6 +287,25 @@ static inline int ntp_synced(void)
return !(time_status & STA_UNSYNC);
}

+/**
+ * ntp_get_ppm_adjustment - Returns Shifted PPM adjustment
+ *
+ */
+long ntp_get_ppm_adjustment(void);
+
+/**
+ * ntp_advance - Advances the NTP state machine by interval_ns
+ *
+ */
+void ntp_advance(unsigned long interval_ns);
+
+/**
+ * ntp_leapsecond - NTP leapsecond processing code.
+ *
+ */
+int ntp_leapsecond(struct timespec now);
+
+
/* Required to safely shift negative values */
#define shift_right(x, s) ({ \
__typeof__(x) __x = (x); \
diff --git a/kernel/time.c b/kernel/time.c
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -231,7 +231,9 @@ int do_adjtimex(struct timex *txc)
{
long ltemp, mtemp, save_adjust;
int result;
-
+ unsigned long flags;
+ struct timespec now_ts;
+ unsigned long seq;
/* In order to modify anything, you gotta be super-user! */
if (txc->modes && !capable(CAP_SYS_TIME))
return -EPERM;
@@ -254,7 +256,13 @@ int do_adjtimex(struct timex *txc)
txc->tick > 1100000/USER_HZ)
return -EINVAL;

- write_seqlock_irq(&xtime_lock);
+ do { /* save off current xtime */
+ seq = read_seqbegin(&xtime_lock);
+ now_ts = xtime;
+ } while (read_seqretry(&xtime_lock, seq));
+
+ write_seqlock_irqsave(&ntp_lock, flags);
+
result = time_state; /* mostly `TIME_OK' */

/* Save for later - semantics of adjtime is to return old value */
@@ -331,9 +339,9 @@ int do_adjtimex(struct timex *txc)
*/

if (time_status & STA_FREQHOLD || time_reftime == 0)
- time_reftime = xtime.tv_sec;
- mtemp = xtime.tv_sec - time_reftime;
- time_reftime = xtime.tv_sec;
+ time_reftime = now_ts.tv_sec;
+ mtemp = now_ts.tv_sec - time_reftime;
+ time_reftime = now_ts.tv_sec;
if (time_status & STA_FLL) {
if (mtemp >= MINSEC) {
ltemp = (time_offset / mtemp) << (SHIFT_USEC -
@@ -392,7 +400,7 @@ leave: if ((time_status & (STA_UNSYNC|ST
txc->calcnt = pps_calcnt;
txc->errcnt = pps_errcnt;
txc->stbcnt = pps_stbcnt;
- write_sequnlock_irq(&xtime_lock);
+ write_sequnlock_irqrestore(&ntp_lock, flags);
do_gettimeofday(&txc->time);
notify_arch_cmos_timer();
return(result);
@@ -509,10 +517,7 @@ int do_settimeofday (struct timespec *tv
set_normalized_timespec(&xtime, sec, nsec);
set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);

- time_adjust = 0; /* stop active adjtime() */
- time_status |= STA_UNSYNC;
- time_maxerror = NTP_PHASE_LIMIT;
- time_esterror = NTP_PHASE_LIMIT;
+ ntp_clear();
time_interpolator_reset();
}
write_sequnlock_irq(&xtime_lock);
diff --git a/kernel/timer.c b/kernel/timer.c
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -613,7 +613,6 @@ long time_tolerance = MAXFREQ; /* frequ
long time_precision = 1; /* clock precision (us) */
long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
-static long time_phase; /* phase offset (scaled us) */
long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC;
/* frequency offset (scaled ppm)*/
static long time_adj; /* tick adjust (scaled 1 / HZ) */
@@ -622,6 +621,86 @@ long time_adjust;
long time_next_adjust;
long time_adjust_step; /* per tick time_adjust step */

+long total_sppm; /* shifted ppm sum of all NTP adjustments */
+long offset_adj_ppm;
+long tick_adj_ppm;
+long singleshot_adj_ppm;
+
+#define MAX_SINGLESHOT_ADJ 500 /* (ppm) */
+#define SEC_PER_DAY 86400
+
+/* NTP lock, protects NTP state machine */
+seqlock_t ntp_lock = SEQLOCK_UNLOCKED;
+
+/**
+ * ntp_leapsecond - NTP leapsecond processing code.
+ * now: the current time
+ *
+ * Returns the number of seconds (-1, 0, or 1) that
+ * should be added to the current time to properly
+ * adjust for leapseconds.
+ */
+int ntp_leapsecond(struct timespec now)
+{
+ unsigned long flags;
+ /*
+ * Leap second processing. If in leap-insert state at
+ * the end of the day, the system clock is set back one
+ * second; if in leap-delete state, the system clock is
+ * set ahead one second.
+ */
+ static time_t leaptime = 0;
+ int ret = 0;
+
+ write_seqlock_irqsave(&ntp_lock, flags);
+ switch (time_state) {
+
+ case TIME_OK:
+ if (time_status & STA_INS)
+ time_state = TIME_INS;
+ else if (time_status & STA_DEL)
+ time_state = TIME_DEL;
+
+ /* calculate end of today (23:59:59)*/
+ leaptime = now.tv_sec + SEC_PER_DAY -
+ (now.tv_sec % SEC_PER_DAY) - 1;
+ break;
+
+ case TIME_INS:
+ /* Once we are at (or past) leaptime, insert the second */
+ if (now.tv_sec >= leaptime) {
+ time_state = TIME_OOP;
+ printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
+ ret = -1;
+ }
+ break;
+
+ case TIME_DEL:
+ /* Once we are at (or past) leaptime, delete the second */
+ if (now.tv_sec >= leaptime) {
+ time_state = TIME_WAIT;
+ printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
+ ret = 1;
+ }
+ break;
+
+ case TIME_OOP:
+ /* Wait for the end of the leap second*/
+ if (now.tv_sec > (leaptime + 1))
+ time_state = TIME_WAIT;
+ time_state = TIME_WAIT;
+ break;
+
+ case TIME_WAIT:
+ if (!(time_status & (STA_INS | STA_DEL)))
+ time_state = TIME_OK;
+ break;
+ }
+
+ write_sequnlock_irqrestore(&ntp_lock, flags);
+ return 0;
+}
+
/*
* this routine handles the overflow of the microsecond field
*
@@ -643,59 +722,6 @@ static void second_overflow(void)
}

/*
- * Leap second processing. If in leap-insert state at
- * the end of the day, the system clock is set back one
- * second; if in leap-delete state, the system clock is
- * set ahead one second. The microtime() routine or
- * external clock driver will insure that reported time
- * is always monotonic. The ugly divides should be
- * replaced.
- */
- switch (time_state) {
-
- case TIME_OK:
- if (time_status & STA_INS)
- time_state = TIME_INS;
- else if (time_status & STA_DEL)
- time_state = TIME_DEL;
- break;
-
- case TIME_INS:
- if (xtime.tv_sec % 86400 == 0) {
- xtime.tv_sec--;
- wall_to_monotonic.tv_sec++;
- /* The timer interpolator will make time change gradually instead
- * of an immediate jump by one second.
- */
- time_interpolator_update(-NSEC_PER_SEC);
- time_state = TIME_OOP;
- clock_was_set();
- printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
- }
- break;
-
- case TIME_DEL:
- if ((xtime.tv_sec + 1) % 86400 == 0) {
- xtime.tv_sec++;
- wall_to_monotonic.tv_sec--;
- /* Use of time interpolator for a gradual change of time */
- time_interpolator_update(NSEC_PER_SEC);
- time_state = TIME_WAIT;
- clock_was_set();
- printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
- }
- break;
-
- case TIME_OOP:
- time_state = TIME_WAIT;
- break;
-
- case TIME_WAIT:
- if (!(time_status & (STA_INS | STA_DEL)))
- time_state = TIME_OK;
- }
-
- /*
* Compute the phase adjustment for the next second. In
* PLL mode, the offset is reduced by a fixed factor
* times the time constant. In FLL mode the offset is
@@ -712,6 +738,13 @@ static void second_overflow(void)
time_offset -= ltemp;
time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);

+ offset_adj_ppm = shift_right(ltemp, SHIFT_UPDATE); /* ppm */
+
+ /* first calculate usec/user_tick offset */
+ tick_adj_ppm = ((USEC_PER_SEC + USER_HZ/2)/USER_HZ) - tick_usec;
+ /* multiply by user_hz to get usec/sec => ppm */
+ tick_adj_ppm *= USER_HZ;
+
/*
* Compute the frequency estimate and additional phase
* adjustment due to frequency error for the next
@@ -744,9 +777,20 @@ static void second_overflow(void)
}


-static void ntp_advance(unsigned long interval_ns)
+/**
+ * ntp_get_ppm_adjustment - Returns Shifted PPM adjustment
+ *
+ */
+long ntp_get_ppm_adjustment(void)
+{
+ return total_sppm;
+}
+
+void ntp_advance(unsigned long interval_ns)
{
static unsigned long interval_sum;
+ unsigned long flags;
+ write_seqlock_irqsave(&ntp_lock, flags);

/* increment the interval sum */
interval_sum += interval_ns;
@@ -772,6 +816,7 @@ static void ntp_advance(unsigned long in
}
interval_ns -= tick_nsec;
}
+ singleshot_adj_ppm = time_adjust_step*(1000000/HZ); /* usec/tick => ppm */

/* Changes by adjtime() do not take effect till next tick. */
if (time_next_adjust) {
@@ -783,6 +828,15 @@ static void ntp_advance(unsigned long in
interval_sum -= NSEC_PER_SEC;
second_overflow();
}
+
+ /* calculate the total continuous ppm adjustment */
+ total_sppm = time_freq; /* already shifted by SHIFT_USEC */
+ total_sppm += offset_adj_ppm << SHIFT_USEC;
+ total_sppm += tick_adj_ppm << SHIFT_USEC;
+ total_sppm += singleshot_adj_ppm << SHIFT_USEC;
+
+ write_sequnlock_irqrestore(&ntp_lock, flags);
+
}

/*
@@ -795,6 +849,7 @@ static void ntp_advance(unsigned long in
static void update_wall_time(unsigned long ticks)
{
long delta_nsec;
+ static long time_phase; /* phase offset (scaled us) */

do {
ticks--;
@@ -817,8 +872,18 @@ static void update_wall_time(unsigned lo

xtime.tv_nsec += delta_nsec;
if (xtime.tv_nsec >= 1000000000) {
+ int leapsecond;
xtime.tv_nsec -= 1000000000;
xtime.tv_sec++;
+ /* process leapsecond */
+ leapsecond = ntp_leapsecond(xtime);
+ if (leapsecond) {
+ xtime.tv_sec += leapsecond;
+ wall_to_monotonic.tv_sec -= leapsecond;
+ /* Use of time interpolator for a gradual change of time */
+ time_interpolator_update(leapsecond*NSEC_PER_SEC);
+ clock_was_set();
+ }
}
ntp_advance(tick_nsec);
time_interpolator_update(delta_nsec);


2005-09-30 00:45:24

by john stultz

[permalink] [raw]
Subject: [RFC][PATCH 3/11] timesource management code

All,
This patch introduces the timesource management infrastructure. A
timesource is a driver-like architecture generic abstraction of a free
running counter. This patch defines the timesource structure, and
provides management code for registering, selecting, accessing and
scaling timesources. The timesource structure is influenced by the
time_interpolator code, although I feel it has a cleaner interface and
avoids preserving system state in the timesource structure.

Additionally, this patch includes the trivial jiffies timesource, a
lowest common denominator timesource, provided mainly for use as an
example.

This patch applies ontop of my ntp cleanup patchset.

Since this patch provides the groundwork for the generic timeofday core,
it will not function without the generic timeofday patches to follow.


thanks
-john

Documentation/kernel-parameters.txt | 7
drivers/Makefile | 1
drivers/timesource/Makefile | 1
drivers/timesource/jiffies.c | 74 ++++++++++
include/linux/timesource.h | 172 +++++++++++++++++++++++
kernel/timesource.c | 259 ++++++++++++++++++++++++++++++++++++
6 files changed, 513 insertions(+), 1 deletion(-)

linux-2.6.14-rc2_timeofday-timesource-core_B6.patch
============================================
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -52,6 +52,7 @@ restrictions referred to are that the re
MTD MTD support is enabled.
NET Appropriate network support is enabled.
NUMA NUMA support is enabled.
+ GENERIC_TOD The generic timeofday code is enabled.
NFS Appropriate NFS support is enabled.
OSS OSS sound support is enabled.
PARIDE The ParIDE subsystem is enabled.
@@ -323,7 +324,7 @@ running once the system is up.
Default value is set via a kernel config option.
Value can be changed at runtime via /selinux/checkreqprot.

- clock= [BUGS=IA-32, HW] gettimeofday timesource override.
+ clock= [BUGS=IA-32, HW] gettimeofday timesource override. [Deprecated]
Forces specified timesource (if avaliable) to be used
when calculating gettimeofday(). If specicified timesource
is not avalible, it defaults to PIT.
@@ -1462,6 +1463,10 @@ running once the system is up.

time Show timing data prefixed to each printk message line

+ timesource= [GENERIC_TOD] Override the default timesource
+ Override the default timesource and use the timesource
+ with the name specified.
+
tipar.timeout= [HW,PPT]
Set communications timeout in tenths of a second
(default 15).
diff --git a/drivers/Makefile b/drivers/Makefile
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -67,3 +67,4 @@ obj-$(CONFIG_INFINIBAND) += infiniband/
obj-$(CONFIG_SGI_IOC4) += sn/
obj-y += firmware/
obj-$(CONFIG_CRYPTO) += crypto/
+obj-$(CONFIG_GENERICTOD) += timesource/
diff --git a/drivers/timesource/Makefile b/drivers/timesource/Makefile
new file mode 100644
--- /dev/null
+++ b/drivers/timesource/Makefile
@@ -0,0 +1 @@
+obj-y += jiffies.o
diff --git a/drivers/timesource/jiffies.c b/drivers/timesource/jiffies.c
new file mode 100644
--- /dev/null
+++ b/drivers/timesource/jiffies.c
@@ -0,0 +1,74 @@
+/***********************************************************************
+* linux/drivers/timesource/jiffies.c
+*
+* This file contains the jiffies based time source.
+*
+* Copyright (C) 2004, 2005 IBM, John Stultz ([email protected])
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+************************************************************************/
+#include <linux/timesource.h>
+#include <linux/jiffies.h>
+#include <linux/init.h>
+
+/* The Jiffies based timesource is the lowest common
+ * denominator time source which should function on
+ * all systems. It has the same coarse resolution as
+ * the timer interrupt frequency HZ and it suffers
+ * inaccuracies caused by missed or lost timer
+ * interrupts and the inability for the timer
+ * interrupt hardware to accuratly tick at the
+ * requested HZ value. It is also not reccomended
+ * for "tick-less" systems.
+ */
+#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ))
+
+/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
+ * conversion, the .shift value could be zero. However
+ * this would make NTP adjustments impossible as they are
+ * in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to
+ * shift both the nominator and denominator the same
+ * amount, and give ntp adjustments in units of 1/2^8
+ *
+ * The value 8 is somewhat carefully chosen, as anything
+ * larger can result in overflows. NSEC_PER_JIFFY grows as
+ * HZ shrinks, so values greater then 8 overflow 32bits when
+ * HZ=100.
+ */
+#define JIFFIES_SHIFT 8
+
+static cycle_t jiffies_read(void)
+{
+ cycle_t ret = get_jiffies_64();
+ return ret;
+}
+
+struct timesource_t timesource_jiffies = {
+ .name = "jiffies",
+ .priority = 0, /* lowest priority*/
+ .type = TIMESOURCE_FUNCTION,
+ .read_fnct = jiffies_read,
+ .mask = (cycle_t)-1,
+ .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* See above for details */
+ .shift = JIFFIES_SHIFT,
+};
+
+static int __init init_jiffies_timesource(void)
+{
+ register_timesource(&timesource_jiffies);
+ return 0;
+}
+module_init(init_jiffies_timesource);
diff --git a/include/linux/timesource.h b/include/linux/timesource.h
new file mode 100644
--- /dev/null
+++ b/include/linux/timesource.h
@@ -0,0 +1,172 @@
+/* linux/include/linux/timesource.h
+ *
+ * This file contains the structure definitions for timesources.
+ *
+ * If you are not a timesource, or the time of day code, you should
+ * not be including this file!
+ */
+#ifndef _LINUX_TIMESOURCE_H
+#define _LINUX_TIMESOURCE_H
+
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <asm/io.h>
+#include <asm/div64.h>
+
+/* struct timesource_t:
+ * Provides mostly state-free accessors to the underlying hardware.
+ *
+ * name: ptr to timesource name
+ * priority: priority value for selection (higher is better)
+ * To avoid priority inflation the following
+ * list should give you a guide as to how
+ * to assign your timesource a priority
+ * 1-99: Unfit for real use
+ * Only available for bootup and testing purposes.
+ * 100-199: Base level usability.
+ * Functional for real use, but not desired.
+ * 200-299: Good.
+ * A correct and usable timesource.
+ * 300-399: Desired.
+ * A reasonably fast and accurate timesource.
+ * 400-499: Perfect
+ * The ideal timesource. A must-use where available.
+ * type: defines timesource type
+ * @read_fnct: returns a cycle value
+ * ptr: ptr to MMIO'ed counter
+ * mask: bitmask for two's complement
+ * subtraction of non 64 bit counters
+ * mult: cycle to nanosecond multiplier
+ * shift: cycle to nanosecond divisor (power of two)
+ * @update_callback: called when safe to alter timesource values
+ */
+struct timesource_t {
+ char* name;
+ int priority;
+ enum {
+ TIMESOURCE_FUNCTION,
+ TIMESOURCE_CYCLES,
+ TIMESOURCE_MMIO_32,
+ TIMESOURCE_MMIO_64
+ } type;
+ cycle_t (*read_fnct)(void);
+ void __iomem *mmio_ptr;
+ cycle_t mask;
+ u32 mult;
+ u32 shift;
+ int (*update_callback)(void);
+};
+
+
+/* Helper functions that converts a khz counter
+ * frequency to a timsource multiplier, given the
+ * timesource shift value
+ */
+static inline u32 timesource_khz2mult(u32 khz, u32 shift_constant)
+{
+ /* khz = cyc/(Million ns)
+ * mult/2^shift = ns/cyc
+ * mult = ns/cyc * 2^shift
+ * mult = 1Million/khz * 2^shift
+ * mult = 1000000 * 2^shift / khz
+ * mult = (1000000<<shift) / khz
+ */
+ u64 tmp = ((u64)1000000) << shift_constant;
+ tmp += khz/2; /* round for do_div */
+ do_div(tmp, khz);
+ return (u32)tmp;
+}
+
+/* Helper functions that converts a hz counter
+ * frequency to a timsource multiplier, given the
+ * timesource shift value
+ */
+static inline u32 timesource_hz2mult(u32 hz, u32 shift_constant)
+{
+ /* hz = cyc/(Billion ns)
+ * mult/2^shift = ns/cyc
+ * mult = ns/cyc * 2^shift
+ * mult = 1Billion/hz * 2^shift
+ * mult = 1000000000 * 2^shift / hz
+ * mult = (1000000000<<shift) / hz
+ */
+ u64 tmp = ((u64)1000000000) << shift_constant;
+ tmp += hz/2; /* round for do_div */
+ do_div(tmp, hz);
+ return (u32)tmp;
+}
+
+
+#ifndef readq
+/* Provide an a way to atomically read a u64 on a 32bit arch */
+static inline unsigned long long timesource_readq(void __iomem *addr)
+{
+ u32 low, high;
+ /* loop is required to make sure we get an atomic read */
+ do {
+ high = readl(addr+4);
+ low = readl(addr);
+ } while (high != readl(addr+4));
+
+ return low | (((unsigned long long)high) << 32LL);
+}
+#else
+#define timesource_readq(x) readq(x)
+#endif
+
+
+/* read_timesource():
+ * Uses the timesource to return the current cycle_t value
+ */
+static inline cycle_t read_timesource(struct timesource_t *ts)
+{
+ switch (ts->type) {
+ case TIMESOURCE_MMIO_32:
+ return (cycle_t)readl(ts->mmio_ptr);
+ case TIMESOURCE_MMIO_64:
+ return (cycle_t)timesource_readq(ts->mmio_ptr);
+ case TIMESOURCE_CYCLES:
+ return (cycle_t)get_cycles();
+ default:/* case: TIMESOURCE_FUNCTION */
+ return ts->read_fnct();
+ }
+}
+
+/* cyc2ns():
+ * Uses the timesource and ntp ajdustment interval to
+ * convert cycle_ts to nanoseconds.
+ */
+static inline nsec_t cyc2ns(struct timesource_t *ts, int ntp_adj, cycle_t cycles)
+{
+ u64 ret;
+ ret = (u64)cycles;
+ ret *= (ts->mult + ntp_adj);
+ ret >>= ts->shift;
+ return (nsec_t)ret;
+}
+
+/* cyc2ns_rem():
+ * Uses the timesource and ntp ajdustment interval to
+ * convert cycle_ts to nanoseconds. Add in remainder portion
+ * which is stored in ns<<ts->shift units and save the new
+ * remainder off.
+ */
+static inline nsec_t cyc2ns_rem(struct timesource_t *ts, int ntp_adj, cycle_t cycles, u64* rem)
+{
+ u64 ret;
+ ret = (u64)cycles;
+ ret *= (ts->mult + ntp_adj);
+ if (rem) {
+ ret += *rem;
+ *rem = ret & ((1<<ts->shift)-1);
+ }
+ ret >>= ts->shift;
+ return (nsec_t)ret;
+}
+
+/* used to install a new time source */
+void register_timesource(struct timesource_t*);
+void reselect_timesource(void);
+struct timesource_t* get_next_timesource(void);
+#endif
diff --git a/kernel/timesource.c b/kernel/timesource.c
new file mode 100644
--- /dev/null
+++ b/kernel/timesource.c
@@ -0,0 +1,259 @@
+/*********************************************************************
+* linux/kernel/timesource.c
+*
+* This file contains the functions which manage timesource drivers.
+*
+* Copyright (C) 2004, 2005 IBM, John Stultz ([email protected])
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+* TODO WishList:
+* o Allow timesource drivers to be unregistered
+* o get rid of timesource_jiffies extern
+**********************************************************************/
+
+#include <linux/timesource.h>
+#include <linux/sysdev.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#define MAX_TIMESOURCES 10
+
+
+/* XXX - Would like a better way for initializing curr_timesource */
+extern struct timesource_t timesource_jiffies;
+
+/*[Timesource internal variables]---------
+ * curr_timesource:
+ * currently selected timesource. Initialized to timesource_jiffies.
+ * next_timesource:
+ * pending next selected timesource.
+ * timesource_list:
+ * array of pointers pointing to registered timesources
+ * timesource_list_counter:
+ * value which counts the number of registered timesources
+ * timesource_lock:
+ * protects manipulations to curr_timesource and next_timesource
+ * and the timesource_list
+ */
+static struct timesource_t *curr_timesource = &timesource_jiffies;
+static struct timesource_t *next_timesource;
+static struct timesource_t *timesource_list[MAX_TIMESOURCES];
+static int timesource_list_counter;
+static seqlock_t timesource_lock = SEQLOCK_UNLOCKED;
+
+static char override_name[32];
+
+/**
+ * get_next_timesource - Returns the selected timesource
+ *
+ */
+struct timesource_t* get_next_timesource(void)
+{
+ write_seqlock(&timesource_lock);
+ if (next_timesource) {
+ curr_timesource = next_timesource;
+ next_timesource = NULL;
+ }
+ write_sequnlock(&timesource_lock);
+
+ return curr_timesource;
+}
+
+/**
+ * select_timesource - Finds the best registered timesource.
+ *
+ * Private function. Must have a writelock on timesource_lock
+ * when called.
+ */
+static struct timesource_t* select_timesource(void)
+{
+ struct timesource_t* best = timesource_list[0];
+ int i;
+
+ for (i=0; i < timesource_list_counter; i++) {
+ /* Check for override */
+ if ((override_name[0] != 0) &&
+ (strlen(override_name)
+ == strlen(timesource_list[i]->name)) &&
+ (!strncmp(timesource_list[i]->name, override_name,
+ strlen(override_name)))) {
+ best = timesource_list[i];
+ break;
+ }
+ /* Pick the highest priority */
+ if (timesource_list[i]->priority > best->priority)
+ best = timesource_list[i];
+ }
+ return best;
+}
+
+/**
+ * register_timesource - Used to install new timesources
+ * @t: timesource to be registered
+ *
+ */
+void register_timesource(struct timesource_t* t)
+{
+ char* error_msg = 0;
+ int i;
+ write_seqlock(&timesource_lock);
+
+ /* check if timesource is already registered */
+ for (i=0; i < timesource_list_counter; i++)
+ if (!strncmp(timesource_list[i]->name, t->name, strlen(t->name))){
+ error_msg = "Already registered!";
+ break;
+ }
+
+ /* check that the list isn't full */
+ if (timesource_list_counter >= MAX_TIMESOURCES)
+ error_msg = "Too many timesources!";
+
+ if(!error_msg)
+ timesource_list[timesource_list_counter++] = t;
+ else
+ printk("register_timesource: Cannot register %s. %s\n",
+ t->name, error_msg);
+
+ /* select next timesource */
+ next_timesource = select_timesource();
+
+ write_sequnlock(&timesource_lock);
+}
+EXPORT_SYMBOL(register_timesource);
+
+
+/**
+ * reselect_timesource - Rescan list for next timesource
+ *
+ * A quick helper function to be used if a timesource
+ * changes its priority. Forces the timesource list to
+ * be re-scaned for the best timesource.
+ */
+void reselect_timesource(void)
+{
+ write_seqlock(&timesource_lock);
+ next_timesource = select_timesource();
+ write_sequnlock(&timesource_lock);
+}
+
+/**
+ * sysfs_show_timesources - sysfs interface for listing timesource
+ * @dev: unused
+ * @buf: char buffer to be filled with timesource list
+ *
+ * Provides sysfs interface for listing registered timesources
+ */
+static ssize_t sysfs_show_timesources(struct sys_device *dev, char *buf)
+{
+ int i;
+ char* curr = buf;
+ write_seqlock(&timesource_lock);
+ for(i=0; i < timesource_list_counter; i++) {
+ /* Mark current timesource w/ a star */
+ if (timesource_list[i] == curr_timesource)
+ curr += sprintf(curr, "*");
+ curr += sprintf(curr, "%s ",timesource_list[i]->name);
+ }
+ write_sequnlock(&timesource_lock);
+
+ curr += sprintf(curr, "\n");
+ return curr - buf;
+}
+
+/**
+ * sysfs_override_timesource - interface for manually overriding timesource
+ * @dev: unused
+ * @buf: name of override timesource
+ *
+ *
+ * Takes input from sysfs interface for manually overriding
+ * the default timesource selction
+ */
+static ssize_t sysfs_override_timesource(struct sys_device *dev,
+ const char *buf, size_t count)
+{
+ /* check to avoid underflow later */
+ if (strlen(buf) == 0)
+ return count;
+
+ write_seqlock(&timesource_lock);
+
+ /* copy the name given */
+ strncpy(override_name, buf, strlen(buf)-1);
+ override_name[strlen(buf)-1] = 0;
+
+ /* see if we can find it */
+ next_timesource = select_timesource();
+
+ write_sequnlock(&timesource_lock);
+ return count;
+}
+
+/* Sysfs setup bits:
+ */
+static SYSDEV_ATTR(timesource, 0600, sysfs_show_timesources, sysfs_override_timesource);
+
+static struct sysdev_class timesource_sysclass = {
+ set_kset_name("timesource"),
+};
+
+static struct sys_device device_timesource = {
+ .id = 0,
+ .cls = &timesource_sysclass,
+};
+
+static int init_timesource_sysfs(void)
+{
+ int error = sysdev_class_register(&timesource_sysclass);
+ if (!error) {
+ error = sysdev_register(&device_timesource);
+ if (!error)
+ error = sysdev_create_file(&device_timesource, &attr_timesource);
+ }
+ return error;
+}
+device_initcall(init_timesource_sysfs);
+
+
+/**
+ * boot_override_timesource - boot time override
+ * @str: override name
+ *
+ * Takes a timesource= boot argument and uses it
+ * as the timesource override name
+ */
+static int __init boot_override_timesource(char* str)
+{
+ if (str)
+ strlcpy(override_name, str, sizeof(override_name));
+ return 1;
+}
+__setup("timesource=", boot_override_timesource);
+
+/**
+ * boot_override_clock - Compatibility layer for deprecated boot option
+ * @str: override name
+ *
+ * DEPRECATED! Takes a clock= boot argument and uses it
+ * as the timesource override name
+ */
+static int __init boot_override_clock(char* str)
+{
+ printk("Warning! clock= boot option is deprecated.\n");
+ return boot_override_timesource(str);
+}
+__setup("clock=", boot_override_clock);


2005-09-30 00:46:38

by john stultz

[permalink] [raw]
Subject: [PATCH 4/11] generic timeofday core subsystem

All,
This patch implements my generic timeofday framework. This common
infrastructure is intended to be used by any arch to reduce code
duplication. Hopefully it will allow generic bugs to be fixed once. And
with many architectures sharing the same time source, this allows
drivers to be written once for multiple architectures.

One major change with this code is that it delineate the lines between
the soft-timer subsystem and the time-of-day subsystem, allowing for a
more direct mapping between a hardware timesource counter and the time
of day. This allows for lost ticks or other software delays to not
affect timekeeping.

Features of this design:
========================

o Splits time of day management from timer interrupts
o Consolidates a large amount of code
o Generic algorithms which use time-source drivers chosen at runtime
o More consistent and readable code

For an older description on the rework, see here:
http://lwn.net/Articles/120850/ (Many thanks to the LWN team for that
easy to understand writeup!)

This patch implements the architecture independent portion of the
generic time of day subsystem. Included below is timeofday.c (which
includes all the time of day management and accessor functions), and
minimal hooks into arch independent code. This patch does not remove the
current timekeeping code, allowing architectures to move over when they
are ready.

This patch applies on top of my timesource management patch.

The patch does nothing without at least minimal architecture specific
hooks (i386, x86-64 and other architecture examples to follow), and it
should be able to be applied to a tree without affecting the existing
code.

Finally I'd like to thank the following people who have contributed
ideas, criticism, testing and code that has helped shape this work:

George Anzinger, Nish Aravamudan, Max Asbock, Dominik Brodowski, Thomas
Gleixner Darren Hart, Christoph Lameter, Matt Mackal, Keith Mannthey,
Martin Schwidefsky, Frank Sorenson, Ulrich Windl, Darrick Wong, Roman
Zippel and any others whom I've accidentally forgotten.

thanks
-john


drivers/char/hangcheck-timer.c | 5
include/asm-generic/timeofday.h | 26 +
include/linux/time.h | 4
include/linux/timeofday.h | 83 +++++
include/linux/timex.h | 2
init/main.c | 2
kernel/Makefile | 1
kernel/time.c | 40 ++
kernel/timeofday.c | 575 ++++++++++++++++++++++++++++++++++++++++
kernel/timer.c | 4
10 files changed, 742 insertions(+)

linux-2.6.14-rc2_timeofday-core_B6.patch
============================================
diff --git a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c
--- a/drivers/char/hangcheck-timer.c
+++ b/drivers/char/hangcheck-timer.c
@@ -49,6 +49,7 @@
#include <linux/delay.h>
#include <asm/uaccess.h>
#include <linux/sysrq.h>
+#include <linux/timeofday.h>


#define VERSION_STR "0.9.0"
@@ -130,8 +131,12 @@ __setup("hcheck_dump_tasks", hangcheck_p
#endif

#ifdef HAVE_MONOTONIC
+#ifndef CONFIG_GENERICTOD
extern unsigned long long monotonic_clock(void);
#else
+#define monotonic_clock() do_monotonic_clock()
+#endif
+#else
static inline unsigned long long monotonic_clock(void)
{
# ifdef __s390__
diff --git a/include/asm-generic/timeofday.h b/include/asm-generic/timeofday.h
new file mode 100644
--- /dev/null
+++ b/include/asm-generic/timeofday.h
@@ -0,0 +1,26 @@
+/* linux/include/asm-generic/timeofday.h
+ *
+ * This file contains the asm-generic interface
+ * to the arch specific calls used by the time of day subsystem
+ */
+#ifndef _ASM_GENERIC_TIMEOFDAY_H
+#define _ASM_GENERIC_TIMEOFDAY_H
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <asm/div64.h>
+#ifdef CONFIG_GENERICTOD
+
+/* Required externs */
+extern nsec_t read_persistent_clock(void);
+extern void sync_persistent_clock(struct timespec ts);
+
+#ifdef CONFIG_GENERICTOD_VSYSCALL
+extern void arch_update_vsyscall_gtod(nsec_t wall_time, cycle_t offset_base,
+ struct timesource_t* timesource, int ntp_adj);
+#else
+#define arch_update_vsyscall_gtod(x,y,z,w) {}
+#endif /* CONFIG_GENERICTOD_VSYSCALL */
+
+#endif /* CONFIG_GENERICTOD */
+#endif
diff --git a/include/linux/time.h b/include/linux/time.h
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -27,6 +27,10 @@ struct timezone {

#ifdef __KERNEL__

+/* timeofday base types */
+typedef s64 nsec_t;
+typedef unsigned long cycle_t;
+
/* Parameters used to convert the timespec values */
#define MSEC_PER_SEC (1000L)
#define USEC_PER_SEC (1000000L)
diff --git a/include/linux/timeofday.h b/include/linux/timeofday.h
new file mode 100644
--- /dev/null
+++ b/include/linux/timeofday.h
@@ -0,0 +1,83 @@
+/* linux/include/linux/timeofday.h
+ *
+ * This file contains the interface to the time of day subsystem
+ */
+#ifndef _LINUX_TIMEOFDAY_H
+#define _LINUX_TIMEOFDAY_H
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <asm/div64.h>
+
+#ifdef CONFIG_GENERICTOD
+/* Public definitions */
+extern nsec_t do_monotonic_clock(void);
+extern void do_gettimeofday(struct timeval *tv);
+extern void getnstimeofday(struct timespec *ts);
+extern int do_settimeofday(struct timespec *tv);
+extern void timeofday_init(void);
+
+/* Inline helper functions */
+static inline struct timeval ns_to_timeval(nsec_t ns)
+{
+ struct timeval tv;
+ tv.tv_sec = div_long_long_rem(ns, NSEC_PER_SEC, &tv.tv_usec);
+ tv.tv_usec = (tv.tv_usec + NSEC_PER_USEC/2) / NSEC_PER_USEC;
+ return tv;
+}
+
+static inline struct timespec ns_to_timespec(nsec_t ns)
+{
+ struct timespec ts;
+ ts.tv_sec = div_long_long_rem(ns, NSEC_PER_SEC, &ts.tv_nsec);
+ return ts;
+}
+
+static inline nsec_t timespec_to_ns(struct timespec* ts)
+{
+ nsec_t ret;
+ ret = ((nsec_t)ts->tv_sec) * NSEC_PER_SEC;
+ ret += ts->tv_nsec;
+ return ret;
+}
+
+static inline nsec_t timeval_to_ns(struct timeval* tv)
+{
+ nsec_t ret;
+ ret = ((nsec_t)tv->tv_sec) * NSEC_PER_SEC;
+ ret += tv->tv_usec * NSEC_PER_USEC;
+ return ret;
+}
+
+static inline void normalize_timespec(struct timespec *ts)
+{
+ while ((unsigned long)ts->tv_nsec > NSEC_PER_SEC) {
+ ts->tv_nsec -= NSEC_PER_SEC;
+ ts->tv_sec++;
+ }
+}
+
+static inline struct timespec timespec_add_ns(struct timespec a, nsec_t ns)
+{
+ while(ns > NSEC_PER_SEC) {
+ normalize_timespec(&a);
+ ns -= NSEC_PER_SEC;
+ a.tv_nsec += NSEC_PER_SEC;
+ }
+ a.tv_nsec += ns;
+ normalize_timespec(&a);
+
+ return a;
+}
+
+
+#ifndef ARCH_IS_TICK_BASED
+#define arch_getoffset() (0)
+#else
+extern nsec_t arch_getoffset(void);
+#endif
+
+#else /* CONFIG_GENERICTOD */
+#define timeofday_init()
+#endif /* CONFIG_GENERICTOD */
+#endif /* _LINUX_TIMEOFDAY_H */
diff --git a/include/linux/timex.h b/include/linux/timex.h
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -313,6 +313,7 @@ int ntp_leapsecond(struct timespec now);
__x < 0 ? -(-__x >> __s) : __x >> __s; \
})

+#ifndef CONFIG_GENERICTOD

#ifdef CONFIG_TIME_INTERPOLATION

@@ -368,6 +369,7 @@ time_interpolator_reset(void)
}

#endif /* !CONFIG_TIME_INTERPOLATION */
+#endif /* !CONFIG_GENERICTOD */

#endif /* KERNEL */

diff --git a/init/main.c b/init/main.c
--- a/init/main.c
+++ b/init/main.c
@@ -47,6 +47,7 @@
#include <linux/rmap.h>
#include <linux/mempolicy.h>
#include <linux/key.h>
+#include <linux/timeofday.h>
#include <net/sock.h>

#include <asm/io.h>
@@ -486,6 +487,7 @@ asmlinkage void __init start_kernel(void
pidhash_init();
init_timers();
softirq_init();
+ timeofday_init();
time_init();

/*
diff --git a/kernel/Makefile b/kernel/Makefile
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,6 +9,7 @@ obj-y = sched.o fork.o exec_domain.o
rcupdate.o intermodule.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o

+obj-$(CONFIG_GENERICTOD) += timeofday.o timesource.o
obj-$(CONFIG_FUTEX) += futex.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
obj-$(CONFIG_SMP) += cpu.o spinlock.o
diff --git a/kernel/time.c b/kernel/time.c
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -38,6 +38,7 @@

#include <asm/uaccess.h>
#include <asm/unistd.h>
+#include <linux/timeofday.h>

/*
* The timezone where the local system is located. Used as a default by some
@@ -128,6 +129,7 @@ asmlinkage long sys_gettimeofday(struct
* as real UNIX machines always do it. This avoids all headaches about
* daylight saving times and warping kernel clocks.
*/
+#ifndef CONFIG_GENERICTOD
static inline void warp_clock(void)
{
write_seqlock_irq(&xtime_lock);
@@ -137,6 +139,18 @@ static inline void warp_clock(void)
write_sequnlock_irq(&xtime_lock);
clock_was_set();
}
+#else /* !CONFIG_GENERICTOD */
+/* XXX - this is somewhat cracked out and should
+ be checked [email protected]
+*/
+static inline void warp_clock(void)
+{
+ struct timespec ts;
+ getnstimeofday(&ts);
+ ts.tv_sec += sys_tz.tz_minuteswest * 60;
+ do_settimeofday(&ts);
+}
+#endif /* !CONFIG_GENERICTOD */

/*
* In case for some reason the CMOS clock has not already been running
@@ -481,6 +495,7 @@ struct timespec timespec_trunc(struct ti
}
EXPORT_SYMBOL(timespec_trunc);

+#ifndef CONFIG_GENERICTOD
#ifdef CONFIG_TIME_INTERPOLATION
void getnstimeofday (struct timespec *tv)
{
@@ -564,6 +579,31 @@ void getnstimeofday(struct timespec *tv)
}
#endif

+/**
+ * do_monotonic_clock - Returns monotonically increasing nanoseconds
+ *
+ * Returns the monotonically increasing number of nanoseconds
+ * since the system booted.
+ */
+nsec_t do_monotonic_clock(void)
+{
+ struct timespec ts, mo;
+ unsigned int seq;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ getnstimeofday(&ts);
+ mo = wall_to_monotonic;
+ } while(read_seqretry(&xtime_lock, seq));
+
+ ts.tv_sec += mo.tv_sec;
+ ts.tv_nsec += mo.tv_nsec;
+
+ return ((u64)ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec;
+}
+
+#endif /* !CONFIG_GENERICTOD */
+
#if (BITS_PER_LONG < 64)
u64 get_jiffies_64(void)
{
diff --git a/kernel/timeofday.c b/kernel/timeofday.c
new file mode 100644
--- /dev/null
+++ b/kernel/timeofday.c
@@ -0,0 +1,575 @@
+/*********************************************************************
+* linux/kernel/timeofday.c
+*
+* This file contains the functions which access and manage
+* the system's time of day functionality.
+*
+* Copyright (C) 2003, 2004, 2005 IBM, John Stultz ([email protected])
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+* TODO WishList:
+* o See XXX's below.
+**********************************************************************/
+
+#include <linux/timeofday.h>
+#include <linux/timesource.h>
+#include <linux/timex.h>
+#include <linux/timer.h>
+#include <linux/module.h>
+#include <linux/sched.h> /* Needed for capable() */
+#include <linux/sysdev.h>
+#include <linux/jiffies.h>
+#include <asm/timeofday.h>
+
+/* only run periodic_hook every 50ms */
+#define PERIODIC_INTERVAL_MS 50
+
+/*[Nanosecond based variables]
+ * system_time:
+ * Monotonically increasing counter of the number of nanoseconds
+ * since boot.
+ * wall_time_offset:
+ * Offset added to system_time to provide accurate time-of-day
+ */
+static nsec_t system_time;
+static nsec_t wall_time_offset;
+
+/*[timespec based variables]
+ * wall_time_ts:
+ * Timespec holding the current wall time. This is redundent
+ * but necessary because we have both nsec_t and timespec
+ * interfaces and the conversion between is costly.
+ * monotonic_time_offset_ts:
+ * Timespec used to keep wall_to_monotonic synced. Hopefully
+ * could be removed once wall_to_monotonic is killed.
+ */
+static struct timespec wall_time_ts;
+static struct timespec monotonic_time_offset_ts;
+
+/*[Cycle based variables]
+ * cycle_last:
+ * Value of the timesource at the last timeofday_periodic_hook()
+ * (adjusted only minorly to account for rounded off cycles)
+ */
+static cycle_t cycle_last;
+
+/* Used for fixed interval conversions
+ * XXX - this is ugly here, can we clean it up
+ * or find a better home for it?
+ */
+#define INTERVAL_LEN 1000000
+static struct interval_t {
+ cycle_t cycles;
+ nsec_t nsecs;
+ u64 remainder;
+} interval;
+
+static struct interval_t calculate_interval(struct timesource_t *t,
+ long adj, unsigned long length_nsec)
+{
+ struct interval_t ret;
+ u64 tmp;
+
+ tmp = length_nsec;
+ tmp <<= t->shift;
+ do_div(tmp, t->mult+adj);
+
+ ret.cycles = (cycle_t)tmp;
+ if(ret.cycles == 0)
+ ret.cycles = 1;
+
+ ret.nsecs = cyc2ns(t, adj, ret.cycles);
+
+ return ret;
+}
+
+/*[Time source data]
+ * timesource:
+ * current timesource pointer
+ */
+static struct timesource_t *timesource;
+
+/*[NTP adjustment]
+ * ntp_adj:
+ * value of the current ntp adjustment,
+ * stored in timesource multiplier units.
+ */
+int ntp_adj;
+
+/*[Locks]
+ * system_time_lock:
+ * generic lock for all locally scoped time values
+ */
+static seqlock_t system_time_lock = SEQLOCK_UNLOCKED;
+
+
+/*[Suspend/Resume info]
+ * time_suspend_state:
+ * variable that keeps track of suspend state
+ * suspend_start:
+ * start of the suspend call
+ */
+static enum {
+ TIME_RUNNING,
+ TIME_SUSPENDED
+} time_suspend_state = TIME_RUNNING;
+
+static nsec_t suspend_start;
+
+/* [Soft-Timers]
+ * timeofday_timer:
+ * soft-timer used to call timeofday_periodic_hook()
+ */
+struct timer_list timeofday_timer;
+
+
+/* [Functions]
+ */
+
+
+/**
+ * update_legacy_time_values - Used to sync legacy time values
+ *
+ * Private function. Used to sync legacy time values to
+ * current timeofday. Assumes we have the system_time_lock.
+ * Hopefully someday this function can be removed.
+ */
+static void update_legacy_time_values(void)
+{
+ unsigned long flags;
+ write_seqlock_irqsave(&xtime_lock, flags);
+ xtime = wall_time_ts;
+ wall_to_monotonic = monotonic_time_offset_ts;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ /* since time state has changed, notify vsyscall code */
+ arch_update_vsyscall_gtod(system_time + wall_time_offset, cycle_last,
+ timesource, ntp_adj);
+}
+
+
+/**
+ * __get_nsec_offset - Returns nanoseconds since last call to periodic_hook
+ *
+ * private function, must hold system_time_lock lock when being
+ * called. Returns the number of nanoseconds since the
+ * last call to timeofday_periodic_hook() (adjusted by NTP scaling)
+ */
+static inline nsec_t __get_nsec_offset(void)
+{
+ nsec_t ns_offset;
+ cycle_t cycle_now, cycle_delta;
+
+ /* read timesource */
+ cycle_now = read_timesource(timesource);
+
+ /* calculate the delta since the last timeofday_periodic_hook */
+ cycle_delta = (cycle_now - cycle_last) & timesource->mask;
+
+ /* convert to nanoseconds */
+ ns_offset = cyc2ns(timesource, ntp_adj, cycle_delta);
+
+ /* Special case for jiffies tick/offset based systems
+ * add arch specific offset
+ */
+ ns_offset += arch_getoffset();
+
+ return ns_offset;
+}
+
+/**
+ * __monotonic_clock - Returns monotonically increasing nanoseconds
+ *
+ * private function, must hold system_time_lock lock when being
+ * called. Returns the monotonically increasing number of
+ * nanoseconds since the system booted (adjusted by NTP scaling)
+ */
+static inline nsec_t __monotonic_clock(void)
+{
+ return system_time + __get_nsec_offset();
+}
+
+
+/**
+ * do_monotonic_clock - Returns monotonically increasing nanoseconds
+ *
+ * Returns the monotonically increasing number of nanoseconds
+ * since the system booted via __monotonic_clock()
+ */
+nsec_t do_monotonic_clock(void)
+{
+ nsec_t ret;
+ unsigned long seq;
+
+ /* atomically read __monotonic_clock() */
+ do {
+ seq = read_seqbegin(&system_time_lock);
+
+ ret = __monotonic_clock();
+
+ } while (read_seqretry(&system_time_lock, seq));
+
+ return ret;
+}
+
+
+
+/**
+ * getnstimeofday - Returns the time of day in a timespec
+ * @ts: pointer to the timespec to be set
+ *
+ * Returns the time of day in a timespec
+ * XXX - For consistency should be renamed
+ * later to do_getnstimeofday()
+ */
+void getnstimeofday(struct timespec *ts)
+{
+ struct timespec now_ts;
+ nsec_t nsecs;
+ unsigned long seq;
+
+ do {
+ seq = read_seqbegin(&system_time_lock);
+
+ now_ts = wall_time_ts;
+ nsecs = __get_nsec_offset();
+
+ } while (read_seqretry(&system_time_lock, seq));
+
+ *ts = timespec_add_ns(now_ts, nsecs);
+}
+EXPORT_SYMBOL(getnstimeofday);
+
+
+/**
+ * do_gettimeofday - Returns the time of day in a timeval
+ * @tv: pointer to the timeval to be set
+ *
+ */
+void do_gettimeofday(struct timeval *tv)
+{
+ struct timespec now_ts;
+ getnstimeofday(&now_ts);
+ tv->tv_sec = now_ts.tv_sec;
+ tv->tv_usec = now_ts.tv_nsec/1000;
+}
+
+EXPORT_SYMBOL(do_gettimeofday);
+
+
+/**
+ * do_settimeofday - Sets the time of day
+ * @tv: pointer to the timespec that will be used to set the time
+ *
+ */
+int do_settimeofday(struct timespec *tv)
+{
+ unsigned long flags;
+ nsec_t newtime;
+
+ newtime = timespec_to_ns(tv);
+
+ write_seqlock_irqsave(&system_time_lock, flags);
+
+ wall_time_offset = newtime - __monotonic_clock();
+
+ wall_time_ts = ns_to_timespec(system_time + wall_time_offset);
+
+ monotonic_time_offset_ts = ns_to_timespec(wall_time_offset);
+ set_normalized_timespec(&monotonic_time_offset_ts,
+ -monotonic_time_offset_ts.tv_sec, -monotonic_time_offset_ts.tv_nsec);
+
+ ntp_clear();
+
+ update_legacy_time_values();
+
+ write_sequnlock_irqrestore(&system_time_lock, flags);
+
+ /* signal posix-timers about time change */
+ clock_was_set();
+
+ return 0;
+}
+EXPORT_SYMBOL(do_settimeofday);
+
+
+/**
+ * timeofday_suspend_hook - allows the timeofday subsystem to be shutdown
+ * @dev: unused
+ * state: unused
+ *
+ * This function allows the timeofday subsystem to
+ * be shutdown for a period of time. Usefull when
+ * going into suspend/hibernate mode. The code is
+ * very similar to the first half of
+ * timeofday_periodic_hook().
+ */
+static int timeofday_suspend_hook(struct sys_device *dev, pm_message_t state)
+{
+ unsigned long flags;
+
+ write_seqlock_irqsave(&system_time_lock, flags);
+
+ BUG_ON(time_suspend_state != TIME_RUNNING);
+
+ /* First off, save suspend start time
+ * then quickly call __monotonic_clock.
+ * These two calls hopefully occur quickly
+ * because the difference between reads will
+ * accumulate as time drift on resume.
+ */
+ suspend_start = read_persistent_clock();
+ system_time = __monotonic_clock();
+
+ time_suspend_state = TIME_SUSPENDED;
+
+ write_sequnlock_irqrestore(&system_time_lock, flags);
+ return 0;
+}
+
+
+/**
+ * timeofday_resume_hook - Resumes the timeofday subsystem.
+ * @dev: unused
+ *
+ * This function resumes the timeofday subsystem
+ * from a previous call to timeofday_suspend_hook.
+ */
+static int timeofday_resume_hook(struct sys_device *dev)
+{
+ nsec_t suspend_end, suspend_time;
+ unsigned long flags;
+
+ write_seqlock_irqsave(&system_time_lock, flags);
+
+ BUG_ON(time_suspend_state != TIME_SUSPENDED);
+
+ /* Read persistent clock to mark the end of
+ * the suspend interval then rebase the
+ * cycle_last to current timesource value.
+ * Again, time between these two calls will
+ * not be accounted for and will show up as
+ * time drift.
+ */
+ suspend_end = read_persistent_clock();
+ cycle_last = read_timesource(timesource);
+
+ suspend_time = suspend_end - suspend_start;
+
+ system_time += suspend_time;
+ wall_time_ts = ns_to_timespec(system_time + wall_time_offset);
+
+ ntp_clear();
+
+ time_suspend_state = TIME_RUNNING;
+
+ update_legacy_time_values();
+
+ write_sequnlock_irqrestore(&system_time_lock, flags);
+
+ /* signal posix-timers about time change */
+ clock_was_set();
+
+ return 0;
+}
+
+/* sysfs resume/suspend bits */
+static struct sysdev_class timeofday_sysclass = {
+ .resume = timeofday_resume_hook,
+ .suspend = timeofday_suspend_hook,
+ set_kset_name("timeofday"),
+};
+static struct sys_device device_timer = {
+ .id = 0,
+ .cls = &timeofday_sysclass,
+};
+static int timeofday_init_device(void)
+{
+ int error = sysdev_class_register(&timeofday_sysclass);
+ if (!error)
+ error = sysdev_register(&device_timer);
+ return error;
+}
+device_initcall(timeofday_init_device);
+
+/**
+ * timeofday_periodic_hook - Does periodic update of timekeeping values.
+ * unused: unused
+ *
+ * Calculates the delta since the last call,
+ * updates system time and clears the offset.
+ *
+ * Called via timeofday_timer.
+ */
+static void timeofday_periodic_hook(unsigned long unused)
+{
+ unsigned long flags;
+
+ cycle_t cycle_now, cycle_delta;
+ nsec_t delta_nsec = 0;
+ static u64 remainder;
+
+ long leapsecond;
+ struct timesource_t* next;
+
+ int ppm;
+ static int ppm_last;
+
+ int something_changed = 0;
+ struct timesource_t old_timesource;
+
+ write_seqlock_irqsave(&system_time_lock, flags);
+
+ /* read time source & calc time since last call*/
+ cycle_now = read_timesource(timesource);
+ cycle_delta = (cycle_now - cycle_last) & timesource->mask;
+
+ /* update system_time */
+ while(cycle_delta > interval.cycles) {
+ delta_nsec += interval.nsecs;
+ cycle_delta -= interval.cycles;
+ cycle_last += interval.cycles;
+ /* XXX - Don't forget remainder accounting */
+ }
+ system_time += delta_nsec;
+ wall_time_ts = timespec_add_ns(wall_time_ts, delta_nsec);
+
+ cycle_last &= timesource->mask;
+
+ /* advance the ntp state machine by ns interval*/
+ ntp_advance(delta_nsec);
+
+ /* do ntp leap second processing*/
+ leapsecond = ntp_leapsecond(wall_time_ts);
+ if (leapsecond) {
+ wall_time_offset += leapsecond * NSEC_PER_SEC;
+ wall_time_ts.tv_sec += leapsecond;
+ monotonic_time_offset_ts.tv_sec -= leapsecond;
+ }
+
+ /* sync the persistent clock */
+ if (ntp_synced())
+ sync_persistent_clock(wall_time_ts);
+
+ /* if necessary, switch timesources */
+ next = get_next_timesource();
+ if (next != timesource) {
+ /* immediately set new cycle_last */
+ cycle_last = read_timesource(next);
+ cycle_now = cycle_last;
+ /* swap timesources */
+ old_timesource = *timesource;
+ timesource = next;
+ printk(KERN_INFO "Time: %s timesource has been installed.\n",
+ timesource->name);
+ ntp_clear();
+ ntp_adj = 0;
+ remainder = 0;
+ something_changed = 1;
+ }
+
+ /* now is a safe time, so allow timesource to adjust
+ * itself (for example: to make cpufreq changes).
+ */
+ if(timesource->update_callback) {
+ /* since timesource state might change, keep a copy */
+ old_timesource = *timesource;
+ if(timesource->update_callback())
+ something_changed = 1;
+ }
+
+ /* check for new PPM adjustment */
+ ppm = ntp_get_ppm_adjustment();
+ if (ppm_last != ppm) {
+ old_timesource = *timesource;
+ something_changed = 1;
+ }
+
+ /* if something changed, recalculate the ntp adjustment value */
+ if (something_changed) {
+ u64 mult_adj;
+
+ /* accumulate the current leftover cycles using old_timesoruce */
+ if (cycle_delta) {
+ delta_nsec = cyc2ns(&old_timesource, ntp_adj, cycle_delta);
+ system_time += delta_nsec;
+ wall_time_ts = timespec_add_ns(wall_time_ts, delta_nsec);
+ ntp_advance(delta_nsec);
+ cycle_last = cycle_now;
+ }
+
+ /* XXX - we can probably optimize most of this away */
+ ppm_last = ppm;
+ mult_adj = abs(ppm);
+ mult_adj = (mult_adj * timesource->mult)>>SHIFT_USEC;
+ mult_adj += 1000000/2; /* round for div*/
+ do_div(mult_adj, 1000000);
+ if (ppm < 0)
+ ntp_adj = -(int)mult_adj;
+ else
+ ntp_adj = (int)mult_adj;
+
+ interval = calculate_interval(timesource, ntp_adj, INTERVAL_LEN);
+ }
+
+ update_legacy_time_values();
+
+ write_sequnlock_irqrestore(&system_time_lock, flags);
+
+ /* Set us up to go off on the next interval */
+ mod_timer(&timeofday_timer,
+ jiffies + msecs_to_jiffies(PERIODIC_INTERVAL_MS));
+}
+
+
+/**
+ * timeofday_init - Initializes time variables
+ *
+ */
+void __init timeofday_init(void)
+{
+ unsigned long flags;
+ write_seqlock_irqsave(&system_time_lock, flags);
+
+ /* initialize the timesource variable */
+ timesource = get_next_timesource();
+
+ /* clear and initialize offsets */
+ cycle_last = read_timesource(timesource);
+ wall_time_offset = read_persistent_clock();
+ wall_time_ts = ns_to_timespec(system_time + wall_time_offset);
+ monotonic_time_offset_ts = ns_to_timespec(wall_time_offset);
+ set_normalized_timespec(&monotonic_time_offset_ts,
+ -monotonic_time_offset_ts.tv_sec, -monotonic_time_offset_ts.tv_nsec);
+
+ /* clear NTP scaling factor & state machine */
+ ntp_adj = 0;
+ ntp_clear();
+ interval = calculate_interval(timesource, ntp_adj, INTERVAL_LEN);
+
+ /* initialize legacy time values */
+ update_legacy_time_values();
+
+ write_sequnlock_irqrestore(&system_time_lock, flags);
+
+ /* Install timeofday_periodic_hook timer */
+ init_timer(&timeofday_timer);
+ timeofday_timer.function = timeofday_periodic_hook;
+ timeofday_timer.expires = jiffies + 1;
+ add_timer(&timeofday_timer);
+
+ return;
+}
diff --git a/kernel/timer.c b/kernel/timer.c
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -839,6 +839,7 @@ void ntp_advance(unsigned long interval_

}

+#ifndef CONFIG_GENERICTOD
/*
* Using a loop looks inefficient, but "ticks" is
* usually just one (we shouldn't be losing ticks,
@@ -890,6 +891,9 @@ static void update_wall_time(unsigned lo

} while (ticks);
}
+#else /* !CONFIG_GENERICTOD */
+#define update_wall_time(x)
+#endif /* !CONFIG_GENERICTOD */

/*
* Called from the timer interrupt handler to charge one tick to the current


2005-09-30 00:48:26

by john stultz

[permalink] [raw]
Subject: [RFC][PATCH 5/11] generic timeofday i386 arch specific changes, part 1

All,
The conversion of i386 to use the generic timeofday subsystem has been
split into 6 parts. This patch, the first of six, is just a simple
cleanup for the i386 arch in preparation of moving the the generic
timeofday infrastructure. It simply moves some code from timer_pit.c to
i8259.c.

It applies on top of my timeofday-core patch. This patch is part the
timeofday-arch-i386 patchset, so without the following parts it is not
expected to compile (although just this one should).


thanks
-john

i8259.c | 40 ++++++++++++++++++++++++++++++++++++++++
time.c | 1 -
timers/timer_pit.c | 13 -------------
3 files changed, 40 insertions(+), 14 deletions(-)

linux-2.6.14-rc2_timeofday-arch-i386-part1_B6.patch
============================================
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -23,6 +23,7 @@
#include <asm/apic.h>
#include <asm/arch_hooks.h>
#include <asm/i8259.h>
+#include <asm/i8253.h>

#include <io_ports.h>

@@ -396,6 +397,45 @@ void __init init_ISA_irqs (void)
}
}

+void setup_pit_timer(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8253_lock, flags);
+ outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
+ udelay(10);
+ outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
+ udelay(10);
+ outb(LATCH >> 8 , PIT_CH0); /* MSB */
+ spin_unlock_irqrestore(&i8253_lock, flags);
+}
+
+static int timer_resume(struct sys_device *dev)
+{
+ setup_pit_timer();
+ return 0;
+}
+
+static struct sysdev_class timer_sysclass = {
+ set_kset_name("timer_pit"),
+ .resume = timer_resume,
+};
+
+static struct sys_device device_timer = {
+ .id = 0,
+ .cls = &timer_sysclass,
+};
+
+static int __init init_timer_sysfs(void)
+{
+ int error = sysdev_class_register(&timer_sysclass);
+ if (!error)
+ error = sysdev_register(&device_timer);
+ return error;
+}
+
+device_initcall(init_timer_sysfs);
+
void __init init_IRQ(void)
{
int i;
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -404,7 +404,6 @@ static int timer_resume(struct sys_devic
if (is_hpet_enabled())
hpet_reenable();
#endif
- setup_pit_timer();
sec = get_cmos_time() + clock_cmos_diff;
sleep_length = (get_cmos_time() - sleep_start) * HZ;
write_seqlock_irqsave(&xtime_lock, flags);
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
--- a/arch/i386/kernel/timers/timer_pit.c
+++ b/arch/i386/kernel/timers/timer_pit.c
@@ -161,16 +161,3 @@ struct init_timer_opts __initdata timer_
.init = init_pit,
.opts = &timer_pit,
};
-
-void setup_pit_timer(void)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&i8253_lock, flags);
- outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
- udelay(10);
- outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
- udelay(10);
- outb(LATCH >> 8 , PIT_CH0); /* MSB */
- spin_unlock_irqrestore(&i8253_lock, flags);
-}


2005-09-30 00:49:12

by john stultz

[permalink] [raw]
Subject: [RFC][PATCH 6/11] generic timeofday i386 arch specific changes, part 2

All,
The conversion of i386 to use the generic timeofday subsystem has been
split into 6 parts. This patch, the second of six, is a cleanup patch
for the i386 arch in preparation of moving the the generic timeofday
infrastructure. It moves some code from timer_tsc.c to a new tsc.c file.

It applies on top of my timeofday-arch-i386-part1 patch. This patch is
part the timeofday-arch-i386 patchset, so without the following parts it
is not expected to compile.

thanks
-john

arch/i386/kernel/Makefile | 2
arch/i386/kernel/timers/common.c | 84 ----------
arch/i386/kernel/timers/timer_tsc.c | 207 -------------------------
arch/i386/kernel/tsc.c | 298 ++++++++++++++++++++++++++++++++++++
include/asm-i386/timex.h | 34 ----
include/asm-i386/tsc.h | 44 +++++
6 files changed, 344 insertions(+), 325 deletions(-)

linux-2.6.14-rc2_timeofday-arch-i386-part2_B6.patch
============================================
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.ld
obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
- doublefault.o quirks.o i8237.o
+ doublefault.o quirks.o i8237.o tsc.o

obj-y += cpu/
obj-y += timers/
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c
--- a/arch/i386/kernel/timers/common.c
+++ b/arch/i386/kernel/timers/common.c
@@ -14,66 +14,6 @@

#include "mach_timer.h"

-/* ------ Calibrate the TSC -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
- * Too much 64-bit arithmetic here to do this cleanly in C, and for
- * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
- * output busy loop as low as possible. We avoid reading the CTC registers
- * directly because of the awkward 8-bit access mechanism of the 82C54
- * device.
- */
-
-#define CALIBRATE_TIME (5 * 1000020/HZ)
-
-unsigned long calibrate_tsc(void)
-{
- mach_prepare_counter();
-
- {
- unsigned long startlow, starthigh;
- unsigned long endlow, endhigh;
- unsigned long count;
-
- rdtsc(startlow,starthigh);
- mach_countup(&count);
- rdtsc(endlow,endhigh);
-
-
- /* Error: ECTCNEVERSET */
- if (count <= 1)
- goto bad_ctc;
-
- /* 64-bit subtract - gcc just messes up with long longs */
- __asm__("subl %2,%0\n\t"
- "sbbl %3,%1"
- :"=a" (endlow), "=d" (endhigh)
- :"g" (startlow), "g" (starthigh),
- "0" (endlow), "1" (endhigh));
-
- /* Error: ECPUTOOFAST */
- if (endhigh)
- goto bad_ctc;
-
- /* Error: ECPUTOOSLOW */
- if (endlow <= CALIBRATE_TIME)
- goto bad_ctc;
-
- __asm__("divl %2"
- :"=a" (endlow), "=d" (endhigh)
- :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));
-
- return endlow;
- }
-
- /*
- * The CTC wasn't reliable: we got a hit on the very first read,
- * or the CPU was so fast/slow that the quotient wouldn't fit in
- * 32 bits..
- */
-bad_ctc:
- return 0;
-}
-
#ifdef CONFIG_HPET_TIMER
/* ------ Calibrate the TSC using HPET -------
* Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq.
@@ -146,27 +86,3 @@ unsigned long read_timer_tsc(void)
rdtscl(retval);
return retval;
}
-
-
-/* calculate cpu_khz */
-void init_cpu_khz(void)
-{
- if (cpu_has_tsc) {
- unsigned long tsc_quotient = calibrate_tsc();
- if (tsc_quotient) {
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (cpu_khz), "=d" (edx)
- :"r" (tsc_quotient),
- "0" (eax), "1" (edx));
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- }
- }
-}
-
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -32,10 +32,6 @@ static unsigned long hpet_last;
static struct timer_opts timer_tsc;
#endif

-static inline void cpufreq_delayed_get(void);
-
-int tsc_disable __devinitdata = 0;
-
static int use_tsc;
/* Number of usecs that the last interrupt was delayed */
static int delay_at_last_interrupt;
@@ -45,34 +41,6 @@ static unsigned long last_tsc_high; /* m
static unsigned long long monotonic_base;
static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;

-/* convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_mhz * 10^6))
- * ns = cycles * (10^3 / cpu_mhz)
- *
- * Then we use scaling math (suggested by [email protected]) to get:
- * ns = cycles * (10^3 * SC / cpu_mhz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- * [email protected] "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
-{
- cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
static int count2; /* counter for mark_offset_tsc() */

/* Cached *multiplier* to convert TSC counts to microseconds.
@@ -130,29 +98,6 @@ static unsigned long long monotonic_cloc
return base + cycles_2_ns(this_offset - last_offset);
}

-/*
- * Scheduler clock - returns current time in nanosec units.
- */
-unsigned long long sched_clock(void)
-{
- unsigned long long this_offset;
-
- /*
- * In the NUMA case we dont use the TSC as they are not
- * synchronized across all CPUs.
- */
-#ifndef CONFIG_NUMA
- if (!use_tsc)
-#endif
- /* no locking but a rare wrong value is not a big deal */
- return jiffies_64 * (1000000000 / HZ);
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return cycles_2_ns(this_offset);
-}

static void delay_tsc(unsigned long loops)
{
@@ -217,127 +162,6 @@ static void mark_offset_tsc_hpet(void)
#endif


-#ifdef CONFIG_CPU_FREQ
-#include <linux/workqueue.h>
-
-static unsigned int cpufreq_delayed_issched = 0;
-static unsigned int cpufreq_init = 0;
-static struct work_struct cpufreq_delayed_get_work;
-
-static void handle_cpufreq_delayed_get(void *v)
-{
- unsigned int cpu;
- for_each_online_cpu(cpu) {
- cpufreq_get(cpu);
- }
- cpufreq_delayed_issched = 0;
-}
-
-/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
- * to verify the CPU frequency the timing core thinks the CPU is running
- * at is still correct.
- */
-static inline void cpufreq_delayed_get(void)
-{
- if (cpufreq_init && !cpufreq_delayed_issched) {
- cpufreq_delayed_issched = 1;
- printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
- schedule_work(&cpufreq_delayed_get_work);
- }
-}
-
-/* If the CPU frequency is scaled, TSC-based delays will need a different
- * loops_per_jiffy value to function properly.
- */
-
-static unsigned int ref_freq = 0;
-static unsigned long loops_per_jiffy_ref = 0;
-
-#ifndef CONFIG_SMP
-static unsigned long fast_gettimeoffset_ref = 0;
-static unsigned int cpu_khz_ref = 0;
-#endif
-
-static int
-time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
- void *data)
-{
- struct cpufreq_freqs *freq = data;
-
- if (val != CPUFREQ_RESUMECHANGE)
- write_seqlock_irq(&xtime_lock);
- if (!ref_freq) {
- ref_freq = freq->old;
- loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
-#ifndef CONFIG_SMP
- fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
- cpu_khz_ref = cpu_khz;
-#endif
- }
-
- if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
- (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
- (val == CPUFREQ_RESUMECHANGE)) {
- if (!(freq->flags & CPUFREQ_CONST_LOOPS))
- cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
-#ifndef CONFIG_SMP
- if (cpu_khz)
- cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
- if (use_tsc) {
- if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
- fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
- set_cyc2ns_scale(cpu_khz/1000);
- }
- }
-#endif
- }
-
- if (val != CPUFREQ_RESUMECHANGE)
- write_sequnlock_irq(&xtime_lock);
-
- return 0;
-}
-
-static struct notifier_block time_cpufreq_notifier_block = {
- .notifier_call = time_cpufreq_notifier
-};
-
-
-static int __init cpufreq_tsc(void)
-{
- int ret;
- INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
- ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
- if (!ret)
- cpufreq_init = 1;
- return ret;
-}
-core_initcall(cpufreq_tsc);
-
-#else /* CONFIG_CPU_FREQ */
-static inline void cpufreq_delayed_get(void) { return; }
-#endif
-
-int recalibrate_cpu_khz(void)
-{
-#ifndef CONFIG_SMP
- unsigned int cpu_khz_old = cpu_khz;
-
- if (cpu_has_tsc) {
- init_cpu_khz();
- cpu_data[0].loops_per_jiffy =
- cpufreq_scale(cpu_data[0].loops_per_jiffy,
- cpu_khz_old,
- cpu_khz);
- return 0;
- } else
- return -ENODEV;
-#else
- return -ENODEV;
-#endif
-}
-EXPORT_SYMBOL(recalibrate_cpu_khz);

static void mark_offset_tsc(void)
{
@@ -543,37 +367,6 @@ static int __init init_tsc(char* overrid
return -ENODEV;
}

-static int tsc_resume(void)
-{
- write_seqlock(&monotonic_lock);
- /* Assume this is the last mark offset time */
- rdtsc(last_tsc_low, last_tsc_high);
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled() && hpet_use_timer)
- hpet_last = hpet_readl(HPET_COUNTER);
-#endif
- write_sequnlock(&monotonic_lock);
- return 0;
-}
-
-#ifndef CONFIG_X86_TSC
-/* disable flag for tsc. Takes effect by clearing the TSC cpu flag
- * in cpu/common.c */
-static int __init tsc_setup(char *str)
-{
- tsc_disable = 1;
- return 1;
-}
-#else
-static int __init tsc_setup(char *str)
-{
- printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
- "cannot disable TSC.\n");
- return 1;
-}
-#endif
-__setup("notsc", tsc_setup);
-


/************************************************************/
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
new file mode 100644
--- /dev/null
+++ b/arch/i386/kernel/tsc.c
@@ -0,0 +1,298 @@
+/*
+ * This code largely moved from arch/i386/kernel/timer/timer_tsc.c
+ * which was originally moved from arch/i386/kernel/time.c.
+ * See comments there for proper credits.
+ */
+
+#include <linux/init.h>
+#include <linux/timex.h>
+#include <linux/cpufreq.h>
+#include <asm/io.h>
+#include "mach_timer.h"
+
+int tsc_disable __initdata = 0;
+#ifndef CONFIG_X86_TSC
+/* disable flag for tsc. Takes effect by clearing the TSC cpu flag
+ * in cpu/common.c */
+static int __init tsc_setup(char *str)
+{
+ tsc_disable = 1;
+ return 1;
+}
+#else
+static int __init tsc_setup(char *str)
+{
+ printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
+ "cannot disable TSC.\n");
+ return 1;
+}
+#endif
+__setup("notsc", tsc_setup);
+
+
+int read_current_timer(unsigned long *timer_val)
+{
+ if (cur_timer->read_timer) {
+ *timer_val = cur_timer->read_timer();
+ return 0;
+ }
+ return -1;
+}
+
+
+/* convert from cycles(64bits) => nanoseconds (64bits)
+ * basic equation:
+ * ns = cycles / (freq / ns_per_sec)
+ * ns = cycles * (ns_per_sec / freq)
+ * ns = cycles * (10^9 / (cpu_mhz * 10^6))
+ * ns = cycles * (10^3 / cpu_mhz)
+ *
+ * Then we use scaling math (suggested by [email protected]) to get:
+ * ns = cycles * (10^3 * SC / cpu_mhz) / SC
+ * ns = cycles * cyc2ns_scale / SC
+ *
+ * And since SC is a constant power of two, we can convert the div
+ * into a shift.
+ * [email protected] "math is hard, lets go shopping!"
+ */
+static unsigned long cyc2ns_scale;
+#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+
+static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
+{
+ cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
+}
+
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+ return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long sched_clock(void)
+{
+ unsigned long long this_offset;
+
+ /*
+ * In the NUMA case we dont use the TSC as they are not
+ * synchronized across all CPUs.
+ */
+#ifndef CONFIG_NUMA
+ if (!use_tsc)
+#endif
+ /* no locking but a rare wrong value is not a big deal */
+ return jiffies_64 * (1000000000 / HZ);
+
+ /* Read the Time Stamp Counter */
+ rdtscll(this_offset);
+
+ /* return the value in ns */
+ return cycles_2_ns(this_offset);
+}
+
+/* ------ Calibrate the TSC -------
+ * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
+ * Too much 64-bit arithmetic here to do this cleanly in C, and for
+ * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
+ * output busy loop as low as possible. We avoid reading the CTC registers
+ * directly because of the awkward 8-bit access mechanism of the 82C54
+ * device.
+ */
+
+#define CALIBRATE_TIME (5 * 1000020/HZ)
+
+unsigned long calibrate_tsc(void)
+{
+ mach_prepare_counter();
+
+ {
+ unsigned long startlow, starthigh;
+ unsigned long endlow, endhigh;
+ unsigned long count;
+
+ rdtsc(startlow,starthigh);
+ mach_countup(&count);
+ rdtsc(endlow,endhigh);
+
+
+ /* Error: ECTCNEVERSET */
+ if (count <= 1)
+ goto bad_ctc;
+
+ /* 64-bit subtract - gcc just messes up with long longs */
+ __asm__("subl %2,%0\n\t"
+ "sbbl %3,%1"
+ :"=a" (endlow), "=d" (endhigh)
+ :"g" (startlow), "g" (starthigh),
+ "0" (endlow), "1" (endhigh));
+
+ /* Error: ECPUTOOFAST */
+ if (endhigh)
+ goto bad_ctc;
+
+ /* Error: ECPUTOOSLOW */
+ if (endlow <= CALIBRATE_TIME)
+ goto bad_ctc;
+
+ __asm__("divl %2"
+ :"=a" (endlow), "=d" (endhigh)
+ :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));
+
+ return endlow;
+ }
+
+ /*
+ * The CTC wasn't reliable: we got a hit on the very first read,
+ * or the CPU was so fast/slow that the quotient wouldn't fit in
+ * 32 bits..
+ */
+bad_ctc:
+ return 0;
+}
+
+int recalibrate_cpu_khz(void)
+{
+#ifndef CONFIG_SMP
+ unsigned long cpu_khz_old = cpu_khz;
+
+ if (cpu_has_tsc) {
+ init_cpu_khz();
+ cpu_data[0].loops_per_jiffy =
+ cpufreq_scale(cpu_data[0].loops_per_jiffy,
+ cpu_khz_old,
+ cpu_khz);
+ return 0;
+ } else
+ return -ENODEV;
+#else
+ return -ENODEV;
+#endif
+}
+EXPORT_SYMBOL(recalibrate_cpu_khz);
+
+
+/* calculate cpu_khz */
+void init_cpu_khz(void)
+{
+ if (cpu_has_tsc) {
+ unsigned long tsc_quotient = calibrate_tsc();
+ if (tsc_quotient) {
+ /* report CPU clock rate in Hz.
+ * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
+ * clock/second. Our precision is about 100 ppm.
+ */
+ { unsigned long eax=0, edx=1000;
+ __asm__("divl %2"
+ :"=a" (cpu_khz), "=d" (edx)
+ :"r" (tsc_quotient),
+ "0" (eax), "1" (edx));
+ printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000);
+ }
+ }
+ }
+}
+
+
+#ifdef CONFIG_CPU_FREQ
+#include <linux/workqueue.h>
+
+static unsigned int cpufreq_delayed_issched = 0;
+static unsigned int cpufreq_init = 0;
+static struct work_struct cpufreq_delayed_get_work;
+
+static void handle_cpufreq_delayed_get(void *v)
+{
+ unsigned int cpu;
+ for_each_online_cpu(cpu) {
+ cpufreq_get(cpu);
+ }
+ cpufreq_delayed_issched = 0;
+}
+
+/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
+ * to verify the CPU frequency the timing core thinks the CPU is running
+ * at is still correct.
+ */
+void cpufreq_delayed_get(void)
+{
+ if (cpufreq_init && !cpufreq_delayed_issched) {
+ cpufreq_delayed_issched = 1;
+ printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
+ schedule_work(&cpufreq_delayed_get_work);
+ }
+}
+
+/* If the CPU frequency is scaled, TSC-based delays will need a different
+ * loops_per_jiffy value to function properly.
+ */
+
+static unsigned int ref_freq = 0;
+static unsigned long loops_per_jiffy_ref = 0;
+
+#ifndef CONFIG_SMP
+static unsigned long fast_gettimeoffset_ref = 0;
+static unsigned long cpu_khz_ref = 0;
+#endif
+
+static int
+time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_freqs *freq = data;
+
+ if (val != CPUFREQ_RESUMECHANGE)
+ write_seqlock_irq(&xtime_lock);
+ if (!ref_freq) {
+ ref_freq = freq->old;
+ loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
+#ifndef CONFIG_SMP
+ fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
+ cpu_khz_ref = cpu_khz;
+#endif
+ }
+
+ if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
+ (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
+ (val == CPUFREQ_RESUMECHANGE)) {
+ if (!(freq->flags & CPUFREQ_CONST_LOOPS))
+ cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
+#ifndef CONFIG_SMP
+ if (cpu_khz)
+ cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
+ if (use_tsc) {
+ if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
+ fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
+ set_cyc2ns_scale(cpu_khz/1000);
+ }
+ }
+#endif
+ }
+
+ if (val != CPUFREQ_RESUMECHANGE)
+ write_sequnlock_irq(&xtime_lock);
+
+ return 0;
+}
+
+static struct notifier_block time_cpufreq_notifier_block = {
+ .notifier_call = time_cpufreq_notifier
+};
+
+
+static int __init cpufreq_tsc(void)
+{
+ int ret;
+ INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
+ ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ if (!ret)
+ cpufreq_init = 1;
+ return ret;
+}
+core_initcall(cpufreq_tsc);
+
+#else /* CONFIG_CPU_FREQ */
+void cpufreq_delayed_get(void) { return; }
+#endif
diff --git a/include/asm-i386/timex.h b/include/asm-i386/timex.h
--- a/include/asm-i386/timex.h
+++ b/include/asm-i386/timex.h
@@ -8,6 +8,7 @@

#include <linux/config.h>
#include <asm/processor.h>
+#include <asm/tsc.h>

#ifdef CONFIG_X86_ELAN
# define CLOCK_TICK_RATE 1189200 /* AMD Elan has different frequency! */
@@ -16,39 +17,6 @@
#endif


-/*
- * Standard way to access the cycle counter on i586+ CPUs.
- * Currently only used on SMP.
- *
- * If you really have a SMP machine with i486 chips or older,
- * compile for that, and this will just always return zero.
- * That's ok, it just means that the nicer scheduling heuristics
- * won't work for you.
- *
- * We only use the low 32 bits, and we'd simply better make sure
- * that we reschedule before that wraps. Scheduling at least every
- * four billion cycles just basically sounds like a good idea,
- * regardless of how fast the machine is.
- */
-typedef unsigned long long cycles_t;
-
-static inline cycles_t get_cycles (void)
-{
- unsigned long long ret=0;
-
-#ifndef CONFIG_X86_TSC
- if (!cpu_has_tsc)
- return 0;
-#endif
-
-#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
- rdtscll(ret);
-#endif
- return ret;
-}
-
-extern unsigned int cpu_khz;
-
extern int read_current_timer(unsigned long *timer_value);
#define ARCH_HAS_READ_CURRENT_TIMER 1

diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h
new file mode 100644
--- /dev/null
+++ b/include/asm-i386/tsc.h
@@ -0,0 +1,44 @@
+/*
+ * linux/include/asm-i386/tsc.h
+ *
+ * i386 TSC related functions
+ */
+#ifndef _ASM_i386_TSC_H
+#define _ASM_i386_TSC_H
+
+#include <linux/config.h>
+#include <asm/processor.h>
+
+/*
+ * Standard way to access the cycle counter on i586+ CPUs.
+ * Currently only used on SMP.
+ *
+ * If you really have a SMP machine with i486 chips or older,
+ * compile for that, and this will just always return zero.
+ * That's ok, it just means that the nicer scheduling heuristics
+ * won't work for you.
+ *
+ * We only use the low 32 bits, and we'd simply better make sure
+ * that we reschedule before that wraps. Scheduling at least every
+ * four billion cycles just basically sounds like a good idea,
+ * regardless of how fast the machine is.
+ */
+typedef unsigned long long cycles_t;
+
+static inline cycles_t get_cycles (void)
+{
+ unsigned long long ret=0;
+
+#ifndef CONFIG_X86_TSC
+ if (!cpu_has_tsc)
+ return 0;
+#endif
+
+#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+ rdtscll(ret);
+#endif
+ return ret;
+}
+
+extern unsigned int cpu_khz;
+#endif


2005-09-30 00:49:57

by john stultz

[permalink] [raw]
Subject: [RFC][PATCH 7/11] generic timeofday i386 arch specific changes, part 3

All,
The conversion of i386 to use the generic timeofday subsystem has been
split into 6 parts. This patch, the third of six, reworks some of the
code in the new tsc.c file, adding some new interfaces and hooks to use
these new interfaces appropriately.

It applies on top of my timeofday-arch-i386-part2 patch. This patch is
part the timeofday-arch-i386 patchset, so without the following parts it
is not expected to compile.

thanks
-john

arch/i386/kernel/setup.c | 1
arch/i386/kernel/tsc.c | 192 ++++++++++++++--------------
drivers/acpi/processor_idle.c | 6
include/asm-i386/mach-default/mach_timer.h | 4
include/asm-i386/mach-summit/mach_mpparse.h | 2
include/asm-i386/tsc.h | 6
6 files changed, 118 insertions(+), 93 deletions(-)

linux-2.6.14-rc2_timeofday-arch-i386-part3_B6.patch
============================================
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -1612,6 +1612,7 @@ void __init setup_arch(char **cmdline_p)
conswitchp = &dummy_con;
#endif
#endif
+ tsc_init();
}

#include "setup_arch_post.h"
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -5,11 +5,17 @@
*/

#include <linux/init.h>
-#include <linux/timex.h>
#include <linux/cpufreq.h>
+#include <asm/tsc.h>
#include <asm/io.h>
#include "mach_timer.h"

+/* On some systems the TSC frequency does not
+ * change with the cpu frequency. So we need
+ * an extra value to store the TSC freq
+ */
+unsigned int tsc_khz;
+
int tsc_disable __initdata = 0;
#ifndef CONFIG_X86_TSC
/* disable flag for tsc. Takes effect by clearing the TSC cpu flag
@@ -32,15 +38,42 @@ __setup("notsc", tsc_setup);

int read_current_timer(unsigned long *timer_val)
{
- if (cur_timer->read_timer) {
- *timer_val = cur_timer->read_timer();
+ if (!tsc_disable && cpu_khz) {
+ rdtscl(*timer_val);
return 0;
}
return -1;
}

+/* Code to mark and check if the TSC is unstable
+ * due to cpufreq or due to unsynced TSCs
+ */
+static int tsc_unstable;
+int check_tsc_unstable(void)
+{
+ return tsc_unstable;
+}
+
+void mark_tsc_unstable(void)
+{
+ tsc_unstable = 1;
+}
+
+/* Code to compensate for C3 stalls */
+static u64 tsc_c3_offset;
+void tsc_c3_compensate(unsigned long usecs)
+{
+ u64 cycles = (usecs * tsc_khz)/1000;
+ tsc_c3_offset += cycles;
+}
+
+u64 tsc_read_c3_time(void)
+{
+ return tsc_c3_offset;
+}

-/* convert from cycles(64bits) => nanoseconds (64bits)
+/* Accellerators for sched_clock()
+ * convert from cycles(64bits) => nanoseconds (64bits)
* basic equation:
* ns = cycles / (freq / ns_per_sec)
* ns = cycles * (ns_per_sec / freq)
@@ -80,76 +113,54 @@ unsigned long long sched_clock(void)
* synchronized across all CPUs.
*/
#ifndef CONFIG_NUMA
- if (!use_tsc)
+ if (!cpu_khz || check_tsc_unstable())
#endif
/* no locking but a rare wrong value is not a big deal */
- return jiffies_64 * (1000000000 / HZ);
+ return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);

/* Read the Time Stamp Counter */
rdtscll(this_offset);
+ this_offset += tsc_read_c3_time();

/* return the value in ns */
return cycles_2_ns(this_offset);
}

-/* ------ Calibrate the TSC -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
- * Too much 64-bit arithmetic here to do this cleanly in C, and for
- * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
- * output busy loop as low as possible. We avoid reading the CTC registers
- * directly because of the awkward 8-bit access mechanism of the 82C54
- * device.
- */
-
-#define CALIBRATE_TIME (5 * 1000020/HZ)

-unsigned long calibrate_tsc(void)
+static unsigned long calculate_cpu_khz(void)
{
- mach_prepare_counter();
-
- {
- unsigned long startlow, starthigh;
- unsigned long endlow, endhigh;
- unsigned long count;
-
- rdtsc(startlow,starthigh);
+ unsigned long long start, end;
+ unsigned long count;
+ u64 delta64;
+ int i;
+ /* run 3 times to ensure the cache is warm */
+ for(i=0; i<3; i++) {
+ mach_prepare_counter();
+ rdtscll(start);
mach_countup(&count);
- rdtsc(endlow,endhigh);
-
-
- /* Error: ECTCNEVERSET */
- if (count <= 1)
- goto bad_ctc;
-
- /* 64-bit subtract - gcc just messes up with long longs */
- __asm__("subl %2,%0\n\t"
- "sbbl %3,%1"
- :"=a" (endlow), "=d" (endhigh)
- :"g" (startlow), "g" (starthigh),
- "0" (endlow), "1" (endhigh));
-
- /* Error: ECPUTOOFAST */
- if (endhigh)
- goto bad_ctc;
-
- /* Error: ECPUTOOSLOW */
- if (endlow <= CALIBRATE_TIME)
- goto bad_ctc;
-
- __asm__("divl %2"
- :"=a" (endlow), "=d" (endhigh)
- :"r" (endlow), "0" (0), "1" (CALIBRATE_TIME));
-
- return endlow;
+ rdtscll(end);
}
-
- /*
+ /* Error: ECTCNEVERSET
* The CTC wasn't reliable: we got a hit on the very first read,
* or the CPU was so fast/slow that the quotient wouldn't fit in
* 32 bits..
*/
-bad_ctc:
- return 0;
+ if (count <= 1)
+ return 0;
+
+ delta64 = end - start;
+
+ /* cpu freq too fast */
+ if(delta64 > (1ULL<<32))
+ return 0;
+ /* cpu freq too slow */
+ if (delta64 <= CALIBRATE_TIME_MSEC)
+ return 0;
+
+ delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */
+ do_div(delta64,CALIBRATE_TIME_MSEC);
+
+ return (unsigned long)delta64;
}

int recalibrate_cpu_khz(void)
@@ -158,11 +169,11 @@ int recalibrate_cpu_khz(void)
unsigned long cpu_khz_old = cpu_khz;

if (cpu_has_tsc) {
- init_cpu_khz();
+ cpu_khz = calculate_cpu_khz();
+ tsc_khz = cpu_khz;
cpu_data[0].loops_per_jiffy =
- cpufreq_scale(cpu_data[0].loops_per_jiffy,
- cpu_khz_old,
- cpu_khz);
+ cpufreq_scale(cpu_data[0].loops_per_jiffy,
+ cpu_khz_old, cpu_khz);
return 0;
} else
return -ENODEV;
@@ -173,25 +184,22 @@ int recalibrate_cpu_khz(void)
EXPORT_SYMBOL(recalibrate_cpu_khz);


-/* calculate cpu_khz */
-void init_cpu_khz(void)
+void tsc_init(void)
{
- if (cpu_has_tsc) {
- unsigned long tsc_quotient = calibrate_tsc();
- if (tsc_quotient) {
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (cpu_khz), "=d" (edx)
- :"r" (tsc_quotient),
- "0" (eax), "1" (edx));
- printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000);
- }
- }
- }
+ if(!cpu_has_tsc || tsc_disable)
+ return;
+
+ cpu_khz = calculate_cpu_khz();
+ tsc_khz = cpu_khz;
+
+ if (!cpu_khz)
+ return;
+
+ printk("Detected %lu.%03lu MHz processor.\n",
+ (unsigned long)cpu_khz / 1000,
+ (unsigned long)cpu_khz % 1000);
+
+ set_cyc2ns_scale(cpu_khz/1000);
}


@@ -211,15 +219,15 @@ static void handle_cpufreq_delayed_get(v
cpufreq_delayed_issched = 0;
}

-/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
+/* if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries
* to verify the CPU frequency the timing core thinks the CPU is running
* at is still correct.
*/
-void cpufreq_delayed_get(void)
+static inline void cpufreq_delayed_get(void)
{
if (cpufreq_init && !cpufreq_delayed_issched) {
cpufreq_delayed_issched = 1;
- printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
+ printk(KERN_DEBUG "Checking if CPU frequency changed.\n");
schedule_work(&cpufreq_delayed_get_work);
}
}
@@ -232,13 +240,11 @@ static unsigned int ref_freq = 0;
static unsigned long loops_per_jiffy_ref = 0;

#ifndef CONFIG_SMP
-static unsigned long fast_gettimeoffset_ref = 0;
static unsigned long cpu_khz_ref = 0;
#endif

-static int
-time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
- void *data)
+static int time_cpufreq_notifier(struct notifier_block *nb,
+ unsigned long val, void *data)
{
struct cpufreq_freqs *freq = data;

@@ -248,7 +254,6 @@ time_cpufreq_notifier(struct notifier_bl
ref_freq = freq->old;
loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
#ifndef CONFIG_SMP
- fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
cpu_khz_ref = cpu_khz;
#endif
}
@@ -258,16 +263,20 @@ time_cpufreq_notifier(struct notifier_bl
(val == CPUFREQ_RESUMECHANGE)) {
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
+
+ if (cpu_khz) {
#ifndef CONFIG_SMP
- if (cpu_khz)
cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
- if (use_tsc) {
+#endif
if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
- fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
+ tsc_khz = cpu_khz;
set_cyc2ns_scale(cpu_khz/1000);
+ /* TSC based sched_clock turns
+ * to junk w/ cpufreq
+ */
+ mark_tsc_unstable();
}
}
-#endif
}

if (val != CPUFREQ_RESUMECHANGE)
@@ -289,10 +298,9 @@ static int __init cpufreq_tsc(void)
CPUFREQ_TRANSITION_NOTIFIER);
if (!ret)
cpufreq_init = 1;
+
return ret;
}
core_initcall(cpufreq_tsc);

-#else /* CONFIG_CPU_FREQ */
-void cpufreq_delayed_get(void) { return; }
#endif
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -166,6 +166,7 @@ acpi_processor_power_activate(struct acp
return;
}

+extern void tsc_c3_compensate(unsigned long usecs);
static atomic_t c3_cpu_count;

static void acpi_processor_idle(void)
@@ -334,6 +335,11 @@ static void acpi_processor_idle(void)
ACPI_MTX_DO_NOT_LOCK);
}

+#ifdef CONFIG_GENERICTOD
+ /* compensate for TSC pause */
+ tsc_c3_compensate((((t2-t1)&0xFFFFFF)*286)>>10);
+#endif
+
/* Re-enable interrupts */
local_irq_enable();
/* Compute time (ticks) that we were actually asleep */
diff --git a/include/asm-i386/mach-default/mach_timer.h b/include/asm-i386/mach-default/mach_timer.h
--- a/include/asm-i386/mach-default/mach_timer.h
+++ b/include/asm-i386/mach-default/mach_timer.h
@@ -15,7 +15,9 @@
#ifndef _MACH_TIMER_H
#define _MACH_TIMER_H

-#define CALIBRATE_LATCH (5 * LATCH)
+#define CALIBRATE_TIME_MSEC 30 /* 30 msecs */
+#define CALIBRATE_LATCH \
+ ((CLOCK_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000)

static inline void mach_prepare_counter(void)
{
diff --git a/include/asm-i386/mach-summit/mach_mpparse.h b/include/asm-i386/mach-summit/mach_mpparse.h
--- a/include/asm-i386/mach-summit/mach_mpparse.h
+++ b/include/asm-i386/mach-summit/mach_mpparse.h
@@ -30,6 +30,7 @@ static inline int mps_oem_check(struct m
(!strncmp(productid, "VIGIL SMP", 9)
|| !strncmp(productid, "EXA", 3)
|| !strncmp(productid, "RUTHLESS SMP", 12))){
+ mark_tsc_unstable();
use_cyclone = 1; /*enable cyclone-timer*/
setup_summit();
usb_early_handoff = 1;
@@ -44,6 +45,7 @@ static inline int acpi_madt_oem_check(ch
if (!strncmp(oem_id, "IBM", 3) &&
(!strncmp(oem_table_id, "SERVIGIL", 8)
|| !strncmp(oem_table_id, "EXA", 3))){
+ mark_tsc_unstable();
use_cyclone = 1; /*enable cyclone-timer*/
setup_summit();
usb_early_handoff = 1;
diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h
--- a/include/asm-i386/tsc.h
+++ b/include/asm-i386/tsc.h
@@ -41,4 +41,10 @@ static inline cycles_t get_cycles (void)
}

extern unsigned int cpu_khz;
+extern unsigned int tsc_khz;
+extern void tsc_init(void);
+void tsc_c3_compensate(unsigned long usecs);
+u64 tsc_read_c3_time(void);
+extern int check_tsc_unstable(void);
+extern void mark_tsc_unstable(void);
#endif


2005-09-30 00:50:45

by john stultz

[permalink] [raw]
Subject: [RFC][PATCH 8/11] generic timeofday i386 arch specific changes, part 4

All,
The conversion of i386 to use the generic timeofday subsystem has been
split into 6 parts. This patch, the fourth of six, renames some ACPI PM
variables.

It applies on top of my timeofday-arch-i386-part3 patch. This patch is
part the timeofday-arch-i386 patchset, so without the following parts it
is not expected to compile.

thanks
-john

boot.c | 14 ++++++++------
1 files changed, 8 insertions(+), 6 deletions(-)

linux-2.6.14-rc2_timeofday-arch-i386-part4_B6.patch
============================================
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -612,7 +612,8 @@ static int __init acpi_parse_hpet(unsign
#endif

#ifdef CONFIG_X86_PM_TIMER
-extern u32 pmtmr_ioport;
+u32 acpi_pmtmr_ioport;
+int acpi_pmtmr_buggy;
#endif

static int __init acpi_parse_fadt(unsigned long phys, unsigned long size)
@@ -640,14 +641,15 @@ static int __init acpi_parse_fadt(unsign
ACPI_ADR_SPACE_SYSTEM_IO)
return 0;

- pmtmr_ioport = fadt->xpm_tmr_blk.address;
+ acpi_pmtmr_ioport = fadt->xpm_tmr_blk.address;
} else {
/* FADT rev. 1 */
- pmtmr_ioport = fadt->V1_pm_tmr_blk;
+ acpi_pmtmr_ioport = fadt->V1_pm_tmr_blk;
}
- if (pmtmr_ioport)
- printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n",
- pmtmr_ioport);
+
+ if (acpi_pmtmr_ioport)
+ printk(KERN_INFO PREFIX "PM-Timer IO Port: %#x\n", acpi_pmtmr_ioport);
+
#endif
return 0;
}


2005-09-30 00:51:28

by john stultz

[permalink] [raw]
Subject: [RFC][PATCH 9/11] generic timeofday i386 arch specific changes, part 5

All,
The conversion of i386 to use the generic timeofday subsystem has been
split into 6 parts. This patch, the fifth of six, converts the i386 arch
to use the generic timeofday subsystem.

It applies on top of my timeofday-arch-i386-part4 patch. This patch is
the last in the the timeofday-arch-i386 patchset, so you should be able
to build and boot a kernel after it has been applied.

Note that this patch does not provide any i386 timesources, so you will
only have the jiffies timesource. To get full replacements for the code
being removed here, the following timeofday-timesources-i386 patch will
need to be applied.

thanks
-john

arch/i386/Kconfig | 4
arch/i386/kernel/Makefile | 1
arch/i386/kernel/time.c | 219 ++++++-------------------------------------
arch/i386/lib/delay.c | 16 ++-
include/asm-i386/timeofday.h | 4
include/asm-i386/timer.h | 57 -----------
6 files changed, 55 insertions(+), 246 deletions(-)

linux-2.6.14-rc2_timeofday-arch-i386-part5_B6.patch
============================================
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -14,6 +14,10 @@ config X86
486, 586, Pentiums, and various instruction-set-compatible chips by
AMD, Cyrix, and others.

+config GENERICTOD
+ bool
+ default y
+
config SEMAPHORE_SLEEPERS
bool
default y
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -10,7 +10,6 @@ obj-y := process.o semaphore.o signal.o
doublefault.o quirks.o i8237.o tsc.o

obj-y += cpu/
-obj-y += timers/
obj-$(CONFIG_ACPI) += acpi/
obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
obj-$(CONFIG_MCA) += mca.o
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -56,6 +56,7 @@
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/timer.h>
+#include <asm/timeofday.h>

#include "mach_time.h"

@@ -91,8 +92,6 @@ EXPORT_SYMBOL(rtc_lock);
DEFINE_SPINLOCK(i8253_lock);
EXPORT_SYMBOL(i8253_lock);

-struct timer_opts *cur_timer __read_mostly = &timer_none;
-
/*
* This is a special lock that is owned by the CPU and holds the index
* register we are working with. It is required for NMI access to the
@@ -122,99 +121,19 @@ void rtc_cmos_write(unsigned char val, u
}
EXPORT_SYMBOL(rtc_cmos_write);

-/*
- * This version of gettimeofday has microsecond resolution
- * and better than microsecond precision on fast x86 machines with TSC.
- */
-void do_gettimeofday(struct timeval *tv)
-{
- unsigned long seq;
- unsigned long usec, sec;
- unsigned long max_ntp_tick;
-
- do {
- unsigned long lost;
-
- seq = read_seqbegin(&xtime_lock);
-
- usec = cur_timer->get_offset();
- lost = jiffies - wall_jiffies;
-
- /*
- * If time_adjust is negative then NTP is slowing the clock
- * so make sure not to go into next possible interval.
- * Better to lose some accuracy than have time go backwards..
- */
- if (unlikely(time_adjust < 0)) {
- max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
- usec = min(usec, max_ntp_tick);
-
- if (lost)
- usec += lost * max_ntp_tick;
- }
- else if (unlikely(lost))
- usec += lost * (USEC_PER_SEC / HZ);
-
- sec = xtime.tv_sec;
- usec += (xtime.tv_nsec / 1000);
- } while (read_seqretry(&xtime_lock, seq));
-
- while (usec >= 1000000) {
- usec -= 1000000;
- sec++;
- }
-
- tv->tv_sec = sec;
- tv->tv_usec = usec;
-}
-
-EXPORT_SYMBOL(do_gettimeofday);
-
-int do_settimeofday(struct timespec *tv)
-{
- time_t wtm_sec, sec = tv->tv_sec;
- long wtm_nsec, nsec = tv->tv_nsec;
-
- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
- return -EINVAL;
-
- write_seqlock_irq(&xtime_lock);
- /*
- * This is revolting. We need to set "xtime" correctly. However, the
- * value in this location is the value at the most recent update of
- * wall time. Discover what correction gettimeofday() would have
- * made, and then undo it!
- */
- nsec -= cur_timer->get_offset() * NSEC_PER_USEC;
- nsec -= (jiffies - wall_jiffies) * TICK_NSEC;
-
- wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
- wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
-
- set_normalized_timespec(&xtime, sec, nsec);
- set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
-
- ntp_clear();
- write_sequnlock_irq(&xtime_lock);
- clock_was_set();
- return 0;
-}
-
-EXPORT_SYMBOL(do_settimeofday);
-
static int set_rtc_mmss(unsigned long nowtime)
{
int retval;
-
- WARN_ON(irqs_disabled());
+ unsigned long flags;

/* gets recalled with irq locally disabled */
- spin_lock_irq(&rtc_lock);
+ /* XXX - does irqsave resolve this? -johnstul */
+ spin_lock_irqsave(&rtc_lock, flags);
if (efi_enabled)
retval = efi_set_rtc_mmss(nowtime);
else
retval = mach_set_rtc_mmss(nowtime);
- spin_unlock_irq(&rtc_lock);
+ spin_unlock_irqrestore(&rtc_lock, flags);

return retval;
}
@@ -222,16 +141,6 @@ static int set_rtc_mmss(unsigned long no

int timer_ack;

-/* monotonic_clock(): returns # of nanoseconds passed since time_init()
- * Note: This function is required to return accurate
- * time even in the absence of multiple timer ticks.
- */
-unsigned long long monotonic_clock(void)
-{
- return cur_timer->monotonic_clock();
-}
-EXPORT_SYMBOL(monotonic_clock);
-
#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
unsigned long profile_pc(struct pt_regs *regs)
{
@@ -246,11 +155,21 @@ EXPORT_SYMBOL(profile_pc);
#endif

/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * This is the same as the above, except we _also_ save the current
+ * Time Stamp Counter value at the time of the timer interrupt, so that
+ * we later on can estimate the time of day more exactly.
*/
-static inline void do_timer_interrupt(int irq, struct pt_regs *regs)
+irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
+ /*
+ * Here we are in the timer irq handler. We just have irqs locally
+ * disabled but we don't know if the timer_bh is running on the other
+ * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
+ * the irq version of write_lock because as just said we have irq
+ * locally disabled. -arca
+ */
+ write_seqlock(&xtime_lock);
+
#ifdef CONFIG_X86_IO_APIC
if (timer_ack) {
/*
@@ -283,27 +202,6 @@ static inline void do_timer_interrupt(in
irq = inb_p( 0x61 ); /* read the current state */
outb_p( irq|0x80, 0x61 ); /* reset the IRQ */
}
-}
-
-/*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
- */
-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
- /*
- * Here we are in the timer irq handler. We just have irqs locally
- * disabled but we don't know if the timer_bh is running on the other
- * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
- * the irq version of write_lock because as just said we have irq
- * locally disabled. -arca
- */
- write_seqlock(&xtime_lock);
-
- cur_timer->mark_offset();
-
- do_timer_interrupt(irq, regs);

write_sequnlock(&xtime_lock);
return IRQ_HANDLED;
@@ -327,58 +225,37 @@ unsigned long get_cmos_time(void)
}
EXPORT_SYMBOL(get_cmos_time);

-static void sync_cmos_clock(unsigned long dummy);
-
-static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
-
-static void sync_cmos_clock(unsigned long dummy)
+/* arch specific timeofday hooks */
+nsec_t read_persistent_clock(void)
{
- struct timeval now, next;
- int fail = 1;
+ return (nsec_t)get_cmos_time() * NSEC_PER_SEC;
+}

+void sync_persistent_clock(struct timespec ts)
+{
+ static unsigned long last_rtc_update;
/*
* If we have an externally synchronized Linux clock, then update
* CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
* called as close as possible to 500 ms before the new second starts.
- * This code is run on a timer. If the clock is set, that timer
- * may not expire at the correct time. Thus, we adjust...
*/
- if (!ntp_synced())
- /*
- * Not synced, exit, do not restart a timer (if one is
- * running, let it run out).
- */
+ if (ts.tv_sec <= last_rtc_update + 660)
return;

- do_gettimeofday(&now);
- if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
- now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
- fail = set_rtc_mmss(now.tv_sec);
-
- next.tv_usec = USEC_AFTER - now.tv_usec;
- if (next.tv_usec <= 0)
- next.tv_usec += USEC_PER_SEC;
-
- if (!fail)
- next.tv_sec = 659;
- else
- next.tv_sec = 0;
-
- if (next.tv_usec >= USEC_PER_SEC) {
- next.tv_sec++;
- next.tv_usec -= USEC_PER_SEC;
+ if((ts.tv_nsec / 1000) >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
+ (ts.tv_nsec / 1000) <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) {
+ /* horrible...FIXME */
+ if (set_rtc_mmss(ts.tv_sec) == 0)
+ last_rtc_update = ts.tv_sec;
+ else
+ last_rtc_update = ts.tv_sec - 600; /* do it again in 60 s */
}
- mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
}

-void notify_arch_cmos_timer(void)
-{
- mod_timer(&sync_cmos_timer, jiffies + 1);
-}
+

static long clock_cmos_diff, sleep_start;

-static struct timer_opts *last_timer;
static int timer_suspend(struct sys_device *dev, pm_message_t state)
{
/*
@@ -387,16 +264,11 @@ static int timer_suspend(struct sys_devi
clock_cmos_diff = -get_cmos_time();
clock_cmos_diff += get_seconds();
sleep_start = get_cmos_time();
- last_timer = cur_timer;
- cur_timer = &timer_none;
- if (last_timer->suspend)
- last_timer->suspend(state);
return 0;
}

static int timer_resume(struct sys_device *dev)
{
- unsigned long flags;
unsigned long sec;
unsigned long sleep_length;

@@ -406,16 +278,8 @@ static int timer_resume(struct sys_devic
#endif
sec = get_cmos_time() + clock_cmos_diff;
sleep_length = (get_cmos_time() - sleep_start) * HZ;
- write_seqlock_irqsave(&xtime_lock, flags);
- xtime.tv_sec = sec;
- xtime.tv_nsec = 0;
- write_sequnlock_irqrestore(&xtime_lock, flags);
jiffies += sleep_length;
wall_jiffies += sleep_length;
- if (last_timer->resume)
- last_timer->resume();
- cur_timer = last_timer;
- last_timer = NULL;
touch_softlockup_watchdog();
return 0;
}
@@ -448,17 +312,10 @@ extern void (*late_time_init)(void);
/* Duplicate of time_init() below, with hpet_enable part added */
static void __init hpet_time_init(void)
{
- xtime.tv_sec = get_cmos_time();
- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
-
if ((hpet_enable() >= 0) && hpet_use_timer) {
printk("Using HPET for base-timer\n");
}

- cur_timer = select_timer();
- printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);

time_init_hook();
}
@@ -476,13 +333,5 @@ void __init time_init(void)
return;
}
#endif
- xtime.tv_sec = get_cmos_time();
- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
-
- cur_timer = select_timer();
- printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name);
-
time_init_hook();
}
diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c
--- a/arch/i386/lib/delay.c
+++ b/arch/i386/lib/delay.c
@@ -13,6 +13,7 @@
#include <linux/config.h>
#include <linux/sched.h>
#include <linux/delay.h>
+#include <linux/timeofday.h>
#include <linux/module.h>
#include <asm/processor.h>
#include <asm/delay.h>
@@ -22,11 +23,20 @@
#include <asm/smp.h>
#endif

-extern struct timer_opts* timer;
-
+/* XXX - For now just use a simple loop delay
+ * This has cpufreq issues, but so did the old method.
+ */
void __delay(unsigned long loops)
{
- cur_timer->delay(loops);
+ int d0;
+ __asm__ __volatile__(
+ "\tjmp 1f\n"
+ ".align 16\n"
+ "1:\tjmp 2f\n"
+ ".align 16\n"
+ "2:\tdecl %0\n\tjns 2b"
+ :"=&a" (d0)
+ :"0" (loops));
}

inline void __const_udelay(unsigned long xloops)
diff --git a/include/asm-i386/timeofday.h b/include/asm-i386/timeofday.h
new file mode 100644
--- /dev/null
+++ b/include/asm-i386/timeofday.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_I386_TIMEOFDAY_H
+#define _ASM_I386_TIMEOFDAY_H
+#include <asm-generic/timeofday.h>
+#endif
diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h
--- a/include/asm-i386/timer.h
+++ b/include/asm-i386/timer.h
@@ -3,68 +3,11 @@
#include <linux/init.h>
#include <linux/pm.h>

-/**
- * struct timer_ops - used to define a timer source
- *
- * @name: name of the timer.
- * @init: Probes and initializes the timer. Takes clock= override
- * string as an argument. Returns 0 on success, anything else
- * on failure.
- * @mark_offset: called by the timer interrupt.
- * @get_offset: called by gettimeofday(). Returns the number of microseconds
- * since the last timer interupt.
- * @monotonic_clock: returns the number of nanoseconds since the init of the
- * timer.
- * @delay: delays this many clock cycles.
- */
-struct timer_opts {
- char* name;
- void (*mark_offset)(void);
- unsigned long (*get_offset)(void);
- unsigned long long (*monotonic_clock)(void);
- void (*delay)(unsigned long);
- unsigned long (*read_timer)(void);
- int (*suspend)(pm_message_t state);
- int (*resume)(void);
-};
-
-struct init_timer_opts {
- int (*init)(char *override);
- struct timer_opts *opts;
-};
-
#define TICK_SIZE (tick_nsec / 1000)
-
-extern struct timer_opts* __init select_timer(void);
-extern void clock_fallback(void);
void setup_pit_timer(void);
-
/* Modifiers for buggy PIT handling */
-
extern int pit_latch_buggy;
-
-extern struct timer_opts *cur_timer;
extern int timer_ack;
-
-/* list of externed timers */
-extern struct timer_opts timer_none;
-extern struct timer_opts timer_pit;
-extern struct init_timer_opts timer_pit_init;
-extern struct init_timer_opts timer_tsc_init;
-#ifdef CONFIG_X86_CYCLONE_TIMER
-extern struct init_timer_opts timer_cyclone_init;
-#endif
-
-extern unsigned long calibrate_tsc(void);
-extern unsigned long read_timer_tsc(void);
-extern void init_cpu_khz(void);
extern int recalibrate_cpu_khz(void);
-#ifdef CONFIG_HPET_TIMER
-extern struct init_timer_opts timer_hpet_init;
-extern unsigned long calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr);
-#endif

-#ifdef CONFIG_X86_PM_TIMER
-extern struct init_timer_opts timer_pmtmr_init;
-#endif
#endif


2005-09-30 00:52:11

by john stultz

[permalink] [raw]
Subject: [RFC][PATCH 10/11] generic timeofday i386 arch specific changes, part 6

All,
The conversion of i386 to use the generic timeofday subsystem has been
split into 6 parts. This patch, the final of four, removes the old
timers/timer_opts infrastructure.

It applies on top of my timeofday-arch-i386-part5 patch. This patch is
the last in the the timeofday-arch-i386 patchset, so you should be able
to build and boot a kernel after it has been applied.

Note that this patch does not provide any i386 timesources, so you will
only have the jiffies timesource. To get full replacements for the code
being removed here, the following timeofday-timesources-i386 patch will
need to be applied.

thanks
-john

Makefile | 9 -
common.c | 88 ------------
timer.c | 75 ----------
timer_cyclone.c | 259 -------------------------------------
timer_hpet.c | 212 ------------------------------
timer_none.c | 39 -----
timer_pit.c | 163 -----------------------
timer_pm.c | 268 --------------------------------------
timer_tsc.c | 388 --------------------------------------------------------
9 files changed, 1501 deletions(-)

linux-2.6.14-rc2_timeofday-arch-i386-part6_B6.patch
============================================
diff --git a/arch/i386/kernel/timers/Makefile b/arch/i386/kernel/timers/Makefile
deleted file mode 100644
--- a/arch/i386/kernel/timers/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-#
-# Makefile for x86 timers
-#
-
-obj-y := timer.o timer_none.o timer_tsc.o timer_pit.o common.o
-
-obj-$(CONFIG_X86_CYCLONE_TIMER) += timer_cyclone.o
-obj-$(CONFIG_HPET_TIMER) += timer_hpet.o
-obj-$(CONFIG_X86_PM_TIMER) += timer_pm.o
diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c
deleted file mode 100644
--- a/arch/i386/kernel/timers/common.c
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Common functions used across the timers go here
- */
-
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/jiffies.h>
-#include <linux/module.h>
-
-#include <asm/io.h>
-#include <asm/timer.h>
-#include <asm/hpet.h>
-
-#include "mach_timer.h"
-
-#ifdef CONFIG_HPET_TIMER
-/* ------ Calibrate the TSC using HPET -------
- * Return 2^32 * (1 / (TSC clocks per usec)) for getting the CPU freq.
- * Second output is parameter 1 (when non NULL)
- * Set 2^32 * (1 / (tsc per HPET clk)) for delay_hpet().
- * calibrate_tsc() calibrates the processor TSC by comparing
- * it to the HPET timer of known frequency.
- * Too much 64-bit arithmetic here to do this cleanly in C
- */
-#define CALIBRATE_CNT_HPET (5 * hpet_tick)
-#define CALIBRATE_TIME_HPET (5 * KERNEL_TICK_USEC)
-
-unsigned long __devinit calibrate_tsc_hpet(unsigned long *tsc_hpet_quotient_ptr)
-{
- unsigned long tsc_startlow, tsc_starthigh;
- unsigned long tsc_endlow, tsc_endhigh;
- unsigned long hpet_start, hpet_end;
- unsigned long result, remain;
-
- hpet_start = hpet_readl(HPET_COUNTER);
- rdtsc(tsc_startlow, tsc_starthigh);
- do {
- hpet_end = hpet_readl(HPET_COUNTER);
- } while ((hpet_end - hpet_start) < CALIBRATE_CNT_HPET);
- rdtsc(tsc_endlow, tsc_endhigh);
-
- /* 64-bit subtract - gcc just messes up with long longs */
- __asm__("subl %2,%0\n\t"
- "sbbl %3,%1"
- :"=a" (tsc_endlow), "=d" (tsc_endhigh)
- :"g" (tsc_startlow), "g" (tsc_starthigh),
- "0" (tsc_endlow), "1" (tsc_endhigh));
-
- /* Error: ECPUTOOFAST */
- if (tsc_endhigh)
- goto bad_calibration;
-
- /* Error: ECPUTOOSLOW */
- if (tsc_endlow <= CALIBRATE_TIME_HPET)
- goto bad_calibration;
-
- ASM_DIV64_REG(result, remain, tsc_endlow, 0, CALIBRATE_TIME_HPET);
- if (remain > (tsc_endlow >> 1))
- result++; /* rounding the result */
-
- if (tsc_hpet_quotient_ptr) {
- unsigned long tsc_hpet_quotient;
-
- ASM_DIV64_REG(tsc_hpet_quotient, remain, tsc_endlow, 0,
- CALIBRATE_CNT_HPET);
- if (remain > (tsc_endlow >> 1))
- tsc_hpet_quotient++; /* rounding the result */
- *tsc_hpet_quotient_ptr = tsc_hpet_quotient;
- }
-
- return result;
-bad_calibration:
- /*
- * the CPU was so fast/slow that the quotient wouldn't fit in
- * 32 bits..
- */
- return 0;
-}
-#endif
-
-
-unsigned long read_timer_tsc(void)
-{
- unsigned long retval;
- rdtscl(retval);
- return retval;
-}
diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c
deleted file mode 100644
--- a/arch/i386/kernel/timers/timer.c
+++ /dev/null
@@ -1,75 +0,0 @@
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <asm/timer.h>
-
-#ifdef CONFIG_HPET_TIMER
-/*
- * HPET memory read is slower than tsc reads, but is more dependable as it
- * always runs at constant frequency and reduces complexity due to
- * cpufreq. So, we prefer HPET timer to tsc based one. Also, we cannot use
- * timer_pit when HPET is active. So, we default to timer_tsc.
- */
-#endif
-/* list of timers, ordered by preference, NULL terminated */
-static struct init_timer_opts* __initdata timers[] = {
-#ifdef CONFIG_X86_CYCLONE_TIMER
- &timer_cyclone_init,
-#endif
-#ifdef CONFIG_HPET_TIMER
- &timer_hpet_init,
-#endif
-#ifdef CONFIG_X86_PM_TIMER
- &timer_pmtmr_init,
-#endif
- &timer_tsc_init,
- &timer_pit_init,
- NULL,
-};
-
-static char clock_override[10] __initdata;
-
-static int __init clock_setup(char* str)
-{
- if (str)
- strlcpy(clock_override, str, sizeof(clock_override));
- return 1;
-}
-__setup("clock=", clock_setup);
-
-
-/* The chosen timesource has been found to be bad.
- * Fall back to a known good timesource (the PIT)
- */
-void clock_fallback(void)
-{
- cur_timer = &timer_pit;
-}
-
-/* iterates through the list of timers, returning the first
- * one that initializes successfully.
- */
-struct timer_opts* __init select_timer(void)
-{
- int i = 0;
-
- /* find most preferred working timer */
- while (timers[i]) {
- if (timers[i]->init)
- if (timers[i]->init(clock_override) == 0)
- return timers[i]->opts;
- ++i;
- }
-
- panic("select_timer: Cannot find a suitable timer\n");
- return NULL;
-}
-
-int read_current_timer(unsigned long *timer_val)
-{
- if (cur_timer->read_timer) {
- *timer_val = cur_timer->read_timer();
- return 0;
- }
- return -1;
-}
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
deleted file mode 100644
--- a/arch/i386/kernel/timers/timer_cyclone.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/* Cyclone-timer:
- * This code implements timer_ops for the cyclone counter found
- * on IBM x440, x360, and other Summit based systems.
- *
- * Copyright (C) 2002 IBM, John Stultz ([email protected])
- */
-
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/fixmap.h>
-#include <asm/i8253.h>
-
-#include "io_ports.h"
-
-/* Number of usecs that the last interrupt was delayed */
-static int delay_at_last_interrupt;
-
-#define CYCLONE_CBAR_ADDR 0xFEB00CD0
-#define CYCLONE_PMCC_OFFSET 0x51A0
-#define CYCLONE_MPMC_OFFSET 0x51D0
-#define CYCLONE_MPCS_OFFSET 0x51A8
-#define CYCLONE_TIMER_FREQ 100000000
-#define CYCLONE_TIMER_MASK (((u64)1<<40)-1) /* 40 bit mask */
-int use_cyclone = 0;
-
-static u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
-static u32 last_cyclone_low;
-static u32 last_cyclone_high;
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* helper macro to atomically read both cyclone counter registers */
-#define read_cyclone_counter(low,high) \
- do{ \
- high = cyclone_timer[1]; low = cyclone_timer[0]; \
- } while (high != cyclone_timer[1]);
-
-
-static void mark_offset_cyclone(void)
-{
- unsigned long lost, delay;
- unsigned long delta = last_cyclone_low;
- int count;
- unsigned long long this_offset, last_offset;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
-
- spin_lock(&i8253_lock);
- read_cyclone_counter(last_cyclone_low,last_cyclone_high);
-
- /* read values for delay_at_last_interrupt */
- outb_p(0x00, 0x43); /* latch the count ASAP */
-
- count = inb_p(0x40); /* read the latched count */
- count |= inb(0x40) << 8;
-
- /*
- * VIA686a test code... reset the latch if count > max + 1
- * from timer_pit.c - cjb
- */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
- spin_unlock(&i8253_lock);
-
- /* lost tick compensation */
- delta = last_cyclone_low - delta;
- delta /= (CYCLONE_TIMER_FREQ/1000000);
- delta += delay_at_last_interrupt;
- lost = delta/(1000000/HZ);
- delay = delta%(1000000/HZ);
- if (lost >= 2)
- jiffies_64 += lost-1;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
- monotonic_base += (this_offset - last_offset) & CYCLONE_TIMER_MASK;
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- count = ((LATCH-1) - count) * TICK_SIZE;
- delay_at_last_interrupt = (count + LATCH/2) / LATCH;
-
-
- /* catch corner case where tick rollover occured
- * between cyclone and pit reads (as noted when
- * usec delta is > 90% # of usecs/tick)
- */
- if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
- jiffies_64++;
-}
-
-static unsigned long get_offset_cyclone(void)
-{
- u32 offset;
-
- if(!cyclone_timer)
- return delay_at_last_interrupt;
-
- /* Read the cyclone timer */
- offset = cyclone_timer[0];
-
- /* .. relative to previous jiffy */
- offset = offset - last_cyclone_low;
-
- /* convert cyclone ticks to microseconds */
- /* XXX slow, can we speed this up? */
- offset = offset/(CYCLONE_TIMER_FREQ/1000000);
-
- /* our adjusted time offset in microseconds */
- return delay_at_last_interrupt + offset;
-}
-
-static unsigned long long monotonic_clock_cyclone(void)
-{
- u32 now_low, now_high;
- unsigned long long last_offset, this_offset, base;
- unsigned long long ret;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_cyclone_high<<32)|last_cyclone_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
-
- /* Read the cyclone counter */
- read_cyclone_counter(now_low,now_high);
- this_offset = ((unsigned long long)now_high<<32)|now_low;
-
- /* convert to nanoseconds */
- ret = base + ((this_offset - last_offset)&CYCLONE_TIMER_MASK);
- return ret * (1000000000 / CYCLONE_TIMER_FREQ);
-}
-
-static int __init init_cyclone(char* override)
-{
- u32* reg;
- u32 base; /* saved cyclone base address */
- u32 pageaddr; /* page that contains cyclone_timer register */
- u32 offset; /* offset from pageaddr to cyclone_timer register */
- int i;
-
- /* check clock override */
- if (override[0] && strncmp(override,"cyclone",7))
- return -ENODEV;
-
- /*make sure we're on a summit box*/
- if(!use_cyclone) return -ENODEV;
-
- printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
-
- /* find base address */
- pageaddr = (CYCLONE_CBAR_ADDR)&PAGE_MASK;
- offset = (CYCLONE_CBAR_ADDR)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
- return -ENODEV;
- }
- base = *reg;
- if(!base){
- printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
- return -ENODEV;
- }
-
- /* setup PMCC */
- pageaddr = (base + CYCLONE_PMCC_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_PMCC_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
- return -ENODEV;
- }
- reg[0] = 0x00000001;
-
- /* setup MPCS */
- pageaddr = (base + CYCLONE_MPCS_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_MPCS_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- reg = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!reg){
- printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
- return -ENODEV;
- }
- reg[0] = 0x00000001;
-
- /* map in cyclone_timer */
- pageaddr = (base + CYCLONE_MPMC_OFFSET)&PAGE_MASK;
- offset = (base + CYCLONE_MPMC_OFFSET)&(~PAGE_MASK);
- set_fixmap_nocache(FIX_CYCLONE_TIMER, pageaddr);
- cyclone_timer = (u32*)(fix_to_virt(FIX_CYCLONE_TIMER) + offset);
- if(!cyclone_timer){
- printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
- return -ENODEV;
- }
-
- /*quick test to make sure its ticking*/
- for(i=0; i<3; i++){
- u32 old = cyclone_timer[0];
- int stall = 100;
- while(stall--) barrier();
- if(cyclone_timer[0] == old){
- printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
- cyclone_timer = 0;
- return -ENODEV;
- }
- }
-
- init_cpu_khz();
-
- /* Everything looks good! */
- return 0;
-}
-
-
-static void delay_cyclone(unsigned long loops)
-{
- unsigned long bclock, now;
- if(!cyclone_timer)
- return;
- bclock = cyclone_timer[0];
- do {
- rep_nop();
- now = cyclone_timer[0];
- } while ((now-bclock) < loops);
-}
-/************************************************************/
-
-/* cyclone timer_opts struct */
-static struct timer_opts timer_cyclone = {
- .name = "cyclone",
- .mark_offset = mark_offset_cyclone,
- .get_offset = get_offset_cyclone,
- .monotonic_clock = monotonic_clock_cyclone,
- .delay = delay_cyclone,
-};
-
-struct init_timer_opts __initdata timer_cyclone_init = {
- .init = init_cyclone,
- .opts = &timer_cyclone,
-};
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
deleted file mode 100644
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-#include <asm/processor.h>
-
-#include "io_ports.h"
-#include "mach_timer.h"
-#include <asm/hpet.h>
-
-static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */
-static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */
-static unsigned long hpet_last; /* hpet counter value at last tick*/
-static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
-static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-/* convert from cycles(64bits) => nanoseconds (64bits)
- * basic equation:
- * ns = cycles / (freq / ns_per_sec)
- * ns = cycles * (ns_per_sec / freq)
- * ns = cycles * (10^9 / (cpu_mhz * 10^6))
- * ns = cycles * (10^3 / cpu_mhz)
- *
- * Then we use scaling math (suggested by [email protected]) to get:
- * ns = cycles * (10^3 * SC / cpu_mhz) / SC
- * ns = cycles * cyc2ns_scale / SC
- *
- * And since SC is a constant power of two, we can convert the div
- * into a shift.
- * [email protected] "math is hard, lets go shopping!"
- */
-static unsigned long cyc2ns_scale;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-
-static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
-{
- cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
-}
-
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
-}
-
-static unsigned long long monotonic_clock_hpet(void)
-{
- unsigned long long last_offset, this_offset, base;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return base + cycles_2_ns(this_offset - last_offset);
-}
-
-static unsigned long get_offset_hpet(void)
-{
- register unsigned long eax, edx;
-
- eax = hpet_readl(HPET_COUNTER);
- eax -= hpet_last; /* hpet delta */
- eax = min(hpet_tick, eax);
- /*
- * Time offset = (hpet delta) * ( usecs per HPET clock )
- * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
- * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
- *
- * Where,
- * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
- *
- * Using a mull instead of a divl saves some cycles in critical path.
- */
- ASM_MUL64_REG(eax, edx, hpet_usec_quotient, eax);
-
- /* our adjusted time offset in microseconds */
- return edx;
-}
-
-static void mark_offset_hpet(void)
-{
- unsigned long long this_offset, last_offset;
- unsigned long offset;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- rdtsc(last_tsc_low, last_tsc_high);
-
- if (hpet_use_timer)
- offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
- else
- offset = hpet_readl(HPET_COUNTER);
- if (unlikely(((offset - hpet_last) >= (2*hpet_tick)) && (hpet_last != 0))) {
- int lost_ticks = ((offset - hpet_last) / hpet_tick) - 1;
- jiffies_64 += lost_ticks;
- }
- hpet_last = offset;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-}
-
-static void delay_hpet(unsigned long loops)
-{
- unsigned long hpet_start, hpet_end;
- unsigned long eax;
-
- /* loops is the number of cpu cycles. Convert it to hpet clocks */
- ASM_MUL64_REG(eax, loops, tsc_hpet_quotient, loops);
-
- hpet_start = hpet_readl(HPET_COUNTER);
- do {
- rep_nop();
- hpet_end = hpet_readl(HPET_COUNTER);
- } while ((hpet_end - hpet_start) < (loops));
-}
-
-static struct timer_opts timer_hpet;
-
-static int __init init_hpet(char* override)
-{
- unsigned long result, remain;
-
- /* check clock override */
- if (override[0] && strncmp(override,"hpet",4))
- return -ENODEV;
-
- if (!is_hpet_enabled())
- return -ENODEV;
-
- printk("Using HPET for gettimeofday\n");
- if (cpu_has_tsc) {
- unsigned long tsc_quotient = calibrate_tsc_hpet(&tsc_hpet_quotient);
- if (tsc_quotient) {
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- ASM_DIV64_REG(cpu_khz, edx, tsc_quotient,
- eax, edx);
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- set_cyc2ns_scale(cpu_khz/1000);
- }
- /* set this only when cpu_has_tsc */
- timer_hpet.read_timer = read_timer_tsc;
- }
-
- /*
- * Math to calculate hpet to usec multiplier
- * Look for the comments at get_offset_hpet()
- */
- ASM_DIV64_REG(result, remain, hpet_tick, 0, KERNEL_TICK_USEC);
- if (remain > (hpet_tick >> 1))
- result++; /* rounding the result */
- hpet_usec_quotient = result;
-
- return 0;
-}
-
-static int hpet_resume(void)
-{
- write_seqlock(&monotonic_lock);
- /* Assume this is the last mark offset time */
- rdtsc(last_tsc_low, last_tsc_high);
-
- if (hpet_use_timer)
- hpet_last = hpet_readl(HPET_T0_CMP) - hpet_tick;
- else
- hpet_last = hpet_readl(HPET_COUNTER);
- write_sequnlock(&monotonic_lock);
- return 0;
-}
-/************************************************************/
-
-/* tsc timer_opts struct */
-static struct timer_opts timer_hpet __read_mostly = {
- .name = "hpet",
- .mark_offset = mark_offset_hpet,
- .get_offset = get_offset_hpet,
- .monotonic_clock = monotonic_clock_hpet,
- .delay = delay_hpet,
- .resume = hpet_resume,
-};
-
-struct init_timer_opts __initdata timer_hpet_init = {
- .init = init_hpet,
- .opts = &timer_hpet,
-};
diff --git a/arch/i386/kernel/timers/timer_none.c b/arch/i386/kernel/timers/timer_none.c
deleted file mode 100644
--- a/arch/i386/kernel/timers/timer_none.c
+++ /dev/null
@@ -1,39 +0,0 @@
-#include <linux/init.h>
-#include <asm/timer.h>
-
-static void mark_offset_none(void)
-{
- /* nothing needed */
-}
-
-static unsigned long get_offset_none(void)
-{
- return 0;
-}
-
-static unsigned long long monotonic_clock_none(void)
-{
- return 0;
-}
-
-static void delay_none(unsigned long loops)
-{
- int d0;
- __asm__ __volatile__(
- "\tjmp 1f\n"
- ".align 16\n"
- "1:\tjmp 2f\n"
- ".align 16\n"
- "2:\tdecl %0\n\tjns 2b"
- :"=&a" (d0)
- :"0" (loops));
-}
-
-/* none timer_opts struct */
-struct timer_opts timer_none = {
- .name = "none",
- .mark_offset = mark_offset_none,
- .get_offset = get_offset_none,
- .monotonic_clock = monotonic_clock_none,
- .delay = delay_none,
-};
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
deleted file mode 100644
--- a/arch/i386/kernel/timers/timer_pit.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- */
-
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/sysdev.h>
-#include <linux/timex.h>
-#include <asm/delay.h>
-#include <asm/mpspec.h>
-#include <asm/timer.h>
-#include <asm/smp.h>
-#include <asm/io.h>
-#include <asm/arch_hooks.h>
-#include <asm/i8253.h>
-
-#include "do_timer.h"
-#include "io_ports.h"
-
-static int count_p; /* counter in get_offset_pit() */
-
-static int __init init_pit(char* override)
-{
- /* check clock override */
- if (override[0] && strncmp(override,"pit",3))
- printk(KERN_ERR "Warning: clock= override failed. Defaulting to PIT\n");
-
- count_p = LATCH;
- return 0;
-}
-
-static void mark_offset_pit(void)
-{
- /* nothing needed */
-}
-
-static unsigned long long monotonic_clock_pit(void)
-{
- return 0;
-}
-
-static void delay_pit(unsigned long loops)
-{
- int d0;
- __asm__ __volatile__(
- "\tjmp 1f\n"
- ".align 16\n"
- "1:\tjmp 2f\n"
- ".align 16\n"
- "2:\tdecl %0\n\tjns 2b"
- :"=&a" (d0)
- :"0" (loops));
-}
-
-
-/* This function must be called with xtime_lock held.
- * It was inspired by Steve McCanne's microtime-i386 for BSD. -- jrs
- *
- * However, the pc-audio speaker driver changes the divisor so that
- * it gets interrupted rather more often - it loads 64 into the
- * counter rather than 11932! This has an adverse impact on
- * do_gettimeoffset() -- it stops working! What is also not
- * good is that the interval that our timer function gets called
- * is no longer 10.0002 ms, but 9.9767 ms. To get around this
- * would require using a different timing source. Maybe someone
- * could use the RTC - I know that this can interrupt at frequencies
- * ranging from 8192Hz to 2Hz. If I had the energy, I'd somehow fix
- * it so that at startup, the timer code in sched.c would select
- * using either the RTC or the 8253 timer. The decision would be
- * based on whether there was any other device around that needed
- * to trample on the 8253. I'd set up the RTC to interrupt at 1024 Hz,
- * and then do some jiggery to have a version of do_timer that
- * advanced the clock by 1/1024 s. Every time that reached over 1/100
- * of a second, then do all the old code. If the time was kept correct
- * then do_gettimeoffset could just return 0 - there is no low order
- * divider that can be accessed.
- *
- * Ideally, you would be able to use the RTC for the speaker driver,
- * but it appears that the speaker driver really needs interrupt more
- * often than every 120 us or so.
- *
- * Anyway, this needs more thought.... pjsg (1993-08-28)
- *
- * If you are really that interested, you should be reading
- * comp.protocols.time.ntp!
- */
-
-static unsigned long get_offset_pit(void)
-{
- int count;
- unsigned long flags;
- static unsigned long jiffies_p = 0;
-
- /*
- * cache volatile jiffies temporarily; we have xtime_lock.
- */
- unsigned long jiffies_t;
-
- spin_lock_irqsave(&i8253_lock, flags);
- /* timer count may underflow right here */
- outb_p(0x00, PIT_MODE); /* latch the count ASAP */
-
- count = inb_p(PIT_CH0); /* read the latched count */
-
- /*
- * We do this guaranteed double memory access instead of a _p
- * postfix in the previous port access. Wheee, hackady hack
- */
- jiffies_t = jiffies;
-
- count |= inb_p(PIT_CH0) << 8;
-
- /* VIA686a test code... reset the latch if count > max + 1 */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
-
- /*
- * avoiding timer inconsistencies (they are rare, but they happen)...
- * there are two kinds of problems that must be avoided here:
- * 1. the timer counter underflows
- * 2. hardware problem with the timer, not giving us continuous time,
- * the counter does small "jumps" upwards on some Pentium systems,
- * (see c't 95/10 page 335 for Neptun bug.)
- */
-
- if( jiffies_t == jiffies_p ) {
- if( count > count_p ) {
- /* the nutcase */
- count = do_timer_overflow(count);
- }
- } else
- jiffies_p = jiffies_t;
-
- count_p = count;
-
- spin_unlock_irqrestore(&i8253_lock, flags);
-
- count = ((LATCH-1) - count) * TICK_SIZE;
- count = (count + LATCH/2) / LATCH;
-
- return count;
-}
-
-
-/* tsc timer_opts struct */
-struct timer_opts timer_pit = {
- .name = "pit",
- .mark_offset = mark_offset_pit,
- .get_offset = get_offset_pit,
- .monotonic_clock = monotonic_clock_pit,
- .delay = delay_pit,
-};
-
-struct init_timer_opts __initdata timer_pit_init = {
- .init = init_pit,
- .opts = &timer_pit,
-};
diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c
deleted file mode 100644
--- a/arch/i386/kernel/timers/timer_pm.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * (C) Dominik Brodowski <[email protected]> 2003
- *
- * Driver to use the Power Management Timer (PMTMR) available in some
- * southbridges as primary timing source for the Linux kernel.
- *
- * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
- * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
- *
- * This file is licensed under the GPL v2.
- */
-
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/init.h>
-#include <asm/types.h>
-#include <asm/timer.h>
-#include <asm/smp.h>
-#include <asm/io.h>
-#include <asm/arch_hooks.h>
-
-#include <linux/timex.h>
-#include "mach_timer.h"
-
-/* Number of PMTMR ticks expected during calibration run */
-#define PMTMR_TICKS_PER_SEC 3579545
-#define PMTMR_EXPECTED_RATE \
- ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10))
-
-
-/* The I/O port the PMTMR resides at.
- * The location is detected during setup_arch(),
- * in arch/i386/acpi/boot.c */
-u32 pmtmr_ioport = 0;
-
-
-/* value of the Power timer at last timer interrupt */
-static u32 offset_tick;
-static u32 offset_delay;
-
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
-
-/*helper function to safely read acpi pm timesource*/
-static inline u32 read_pmtmr(void)
-{
- u32 v1=0,v2=0,v3=0;
- /* It has been reported that because of various broken
- * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
- * source is not latched, so you must read it multiple
- * times to insure a safe value is read.
- */
- do {
- v1 = inl(pmtmr_ioport);
- v2 = inl(pmtmr_ioport);
- v3 = inl(pmtmr_ioport);
- } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
- || (v3 > v1 && v3 < v2));
-
- /* mask the output to 24 bits */
- return v2 & ACPI_PM_MASK;
-}
-
-
-/*
- * Some boards have the PMTMR running way too fast. We check
- * the PMTMR rate against PIT channel 2 to catch these cases.
- */
-static int verify_pmtmr_rate(void)
-{
- u32 value1, value2;
- unsigned long count, delta;
-
- mach_prepare_counter();
- value1 = read_pmtmr();
- mach_countup(&count);
- value2 = read_pmtmr();
- delta = (value2 - value1) & ACPI_PM_MASK;
-
- /* Check that the PMTMR delta is within 5% of what we expect */
- if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 ||
- delta > (PMTMR_EXPECTED_RATE * 21) / 20) {
- printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE);
- return -1;
- }
-
- return 0;
-}
-
-
-static int init_pmtmr(char* override)
-{
- u32 value1, value2;
- unsigned int i;
-
- if (override[0] && strncmp(override,"pmtmr",5))
- return -ENODEV;
-
- if (!pmtmr_ioport)
- return -ENODEV;
-
- /* we use the TSC for delay_pmtmr, so make sure it exists */
- if (!cpu_has_tsc)
- return -ENODEV;
-
- /* "verify" this timing source */
- value1 = read_pmtmr();
- for (i = 0; i < 10000; i++) {
- value2 = read_pmtmr();
- if (value2 == value1)
- continue;
- if (value2 > value1)
- goto pm_good;
- if ((value2 < value1) && ((value2) < 0xFFF))
- goto pm_good;
- printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2);
- return -EINVAL;
- }
- printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1);
- return -ENODEV;
-
-pm_good:
- if (verify_pmtmr_rate() != 0)
- return -ENODEV;
-
- init_cpu_khz();
- return 0;
-}
-
-static inline u32 cyc2us(u32 cycles)
-{
- /* The Power Management Timer ticks at 3.579545 ticks per microsecond.
- * 1 / PM_TIMER_FREQUENCY == 0.27936511 =~ 286/1024 [error: 0.024%]
- *
- * Even with HZ = 100, delta is at maximum 35796 ticks, so it can
- * easily be multiplied with 286 (=0x11E) without having to fear
- * u32 overflows.
- */
- cycles *= 286;
- return (cycles >> 10);
-}
-
-/*
- * this gets called during each timer interrupt
- * - Called while holding the writer xtime_lock
- */
-static void mark_offset_pmtmr(void)
-{
- u32 lost, delta, last_offset;
- static int first_run = 1;
- last_offset = offset_tick;
-
- write_seqlock(&monotonic_lock);
-
- offset_tick = read_pmtmr();
-
- /* calculate tick interval */
- delta = (offset_tick - last_offset) & ACPI_PM_MASK;
-
- /* convert to usecs */
- delta = cyc2us(delta);
-
- /* update the monotonic base value */
- monotonic_base += delta * NSEC_PER_USEC;
- write_sequnlock(&monotonic_lock);
-
- /* convert to ticks */
- delta += offset_delay;
- lost = delta / (USEC_PER_SEC / HZ);
- offset_delay = delta % (USEC_PER_SEC / HZ);
-
-
- /* compensate for lost ticks */
- if (lost >= 2)
- jiffies_64 += lost - 1;
-
- /* don't calculate delay for first run,
- or if we've got less then a tick */
- if (first_run || (lost < 1)) {
- first_run = 0;
- offset_delay = 0;
- }
-}
-
-static int pmtmr_resume(void)
-{
- write_seqlock(&monotonic_lock);
- /* Assume this is the last mark offset time */
- offset_tick = read_pmtmr();
- write_sequnlock(&monotonic_lock);
- return 0;
-}
-
-static unsigned long long monotonic_clock_pmtmr(void)
-{
- u32 last_offset, this_offset;
- unsigned long long base, ret;
- unsigned seq;
-
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = offset_tick;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the pmtmr */
- this_offset = read_pmtmr();
-
- /* convert to nanoseconds */
- ret = (this_offset - last_offset) & ACPI_PM_MASK;
- ret = base + (cyc2us(ret) * NSEC_PER_USEC);
- return ret;
-}
-
-static void delay_pmtmr(unsigned long loops)
-{
- unsigned long bclock, now;
-
- rdtscl(bclock);
- do
- {
- rep_nop();
- rdtscl(now);
- } while ((now-bclock) < loops);
-}
-
-
-/*
- * get the offset (in microseconds) from the last call to mark_offset()
- * - Called holding a reader xtime_lock
- */
-static unsigned long get_offset_pmtmr(void)
-{
- u32 now, offset, delta = 0;
-
- offset = offset_tick;
- now = read_pmtmr();
- delta = (now - offset)&ACPI_PM_MASK;
-
- return (unsigned long) offset_delay + cyc2us(delta);
-}
-
-
-/* acpi timer_opts struct */
-static struct timer_opts timer_pmtmr = {
- .name = "pmtmr",
- .mark_offset = mark_offset_pmtmr,
- .get_offset = get_offset_pmtmr,
- .monotonic_clock = monotonic_clock_pmtmr,
- .delay = delay_pmtmr,
- .read_timer = read_timer_tsc,
- .resume = pmtmr_resume,
-};
-
-struct init_timer_opts __initdata timer_pmtmr_init = {
- .init = init_pmtmr,
- .opts = &timer_pmtmr,
-};
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Dominik Brodowski <[email protected]>");
-MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86");
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
deleted file mode 100644
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ /dev/null
@@ -1,388 +0,0 @@
-/*
- * This code largely moved from arch/i386/kernel/time.c.
- * See comments there for proper credits.
- *
- * 2004-06-25 Jesper Juhl
- * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
- * failing to inline.
- */
-
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/errno.h>
-#include <linux/cpufreq.h>
-#include <linux/string.h>
-#include <linux/jiffies.h>
-
-#include <asm/timer.h>
-#include <asm/io.h>
-/* processor.h for distable_tsc flag */
-#include <asm/processor.h>
-
-#include "io_ports.h"
-#include "mach_timer.h"
-
-#include <asm/hpet.h>
-#include <asm/i8253.h>
-
-#ifdef CONFIG_HPET_TIMER
-static unsigned long hpet_usec_quotient;
-static unsigned long hpet_last;
-static struct timer_opts timer_tsc;
-#endif
-
-static int use_tsc;
-/* Number of usecs that the last interrupt was delayed */
-static int delay_at_last_interrupt;
-
-static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
-static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
-static unsigned long long monotonic_base;
-static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
-
-static int count2; /* counter for mark_offset_tsc() */
-
-/* Cached *multiplier* to convert TSC counts to microseconds.
- * (see the equation below).
- * Equal to 2^32 * (1 / (clocks per usec) ).
- * Initialized in time_init.
- */
-static unsigned long fast_gettimeoffset_quotient;
-
-static unsigned long get_offset_tsc(void)
-{
- register unsigned long eax, edx;
-
- /* Read the Time Stamp Counter */
-
- rdtsc(eax,edx);
-
- /* .. relative to previous jiffy (32 bits is enough) */
- eax -= last_tsc_low; /* tsc_low delta */
-
- /*
- * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
- * = (tsc_low delta) * (usecs_per_clock)
- * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
- *
- * Using a mull instead of a divl saves up to 31 clock cycles
- * in the critical path.
- */
-
- __asm__("mull %2"
- :"=a" (eax), "=d" (edx)
- :"rm" (fast_gettimeoffset_quotient),
- "0" (eax));
-
- /* our adjusted time offset in microseconds */
- return delay_at_last_interrupt + edx;
-}
-
-static unsigned long long monotonic_clock_tsc(void)
-{
- unsigned long long last_offset, this_offset, base;
- unsigned seq;
-
- /* atomically read monotonic base & last_offset */
- do {
- seq = read_seqbegin(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- base = monotonic_base;
- } while (read_seqretry(&monotonic_lock, seq));
-
- /* Read the Time Stamp Counter */
- rdtscll(this_offset);
-
- /* return the value in ns */
- return base + cycles_2_ns(this_offset - last_offset);
-}
-
-
-static void delay_tsc(unsigned long loops)
-{
- unsigned long bclock, now;
-
- rdtscl(bclock);
- do
- {
- rep_nop();
- rdtscl(now);
- } while ((now-bclock) < loops);
-}
-
-#ifdef CONFIG_HPET_TIMER
-static void mark_offset_tsc_hpet(void)
-{
- unsigned long long this_offset, last_offset;
- unsigned long offset, temp, hpet_current;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- /*
- * It is important that these two operations happen almost at
- * the same time. We do the RDTSC stuff first, since it's
- * faster. To avoid any inconsistencies, we need interrupts
- * disabled locally.
- */
- /*
- * Interrupts are just disabled locally since the timer irq
- * has the SA_INTERRUPT flag set. -arca
- */
- /* read Pentium cycle counter */
-
- hpet_current = hpet_readl(HPET_COUNTER);
- rdtsc(last_tsc_low, last_tsc_high);
-
- /* lost tick compensation */
- offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
- if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
- int lost_ticks = (offset - hpet_last) / hpet_tick;
- jiffies_64 += lost_ticks;
- }
- hpet_last = hpet_current;
-
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- /*
- * Time offset = (hpet delta) * ( usecs per HPET clock )
- * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
- * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
- * Where,
- * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
- */
- delay_at_last_interrupt = hpet_current - offset;
- ASM_MUL64_REG(temp, delay_at_last_interrupt,
- hpet_usec_quotient, delay_at_last_interrupt);
-}
-#endif
-
-
-
-static void mark_offset_tsc(void)
-{
- unsigned long lost,delay;
- unsigned long delta = last_tsc_low;
- int count;
- int countmp;
- static int count1 = 0;
- unsigned long long this_offset, last_offset;
- static int lost_count = 0;
-
- write_seqlock(&monotonic_lock);
- last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- /*
- * It is important that these two operations happen almost at
- * the same time. We do the RDTSC stuff first, since it's
- * faster. To avoid any inconsistencies, we need interrupts
- * disabled locally.
- */
-
- /*
- * Interrupts are just disabled locally since the timer irq
- * has the SA_INTERRUPT flag set. -arca
- */
-
- /* read Pentium cycle counter */
-
- rdtsc(last_tsc_low, last_tsc_high);
-
- spin_lock(&i8253_lock);
- outb_p(0x00, PIT_MODE); /* latch the count ASAP */
-
- count = inb_p(PIT_CH0); /* read the latched count */
- count |= inb(PIT_CH0) << 8;
-
- /*
- * VIA686a test code... reset the latch if count > max + 1
- * from timer_pit.c - cjb
- */
- if (count > LATCH) {
- outb_p(0x34, PIT_MODE);
- outb_p(LATCH & 0xff, PIT_CH0);
- outb(LATCH >> 8, PIT_CH0);
- count = LATCH - 1;
- }
-
- spin_unlock(&i8253_lock);
-
- if (pit_latch_buggy) {
- /* get center value of last 3 time lutch */
- if ((count2 >= count && count >= count1)
- || (count1 >= count && count >= count2)) {
- count2 = count1; count1 = count;
- } else if ((count1 >= count2 && count2 >= count)
- || (count >= count2 && count2 >= count1)) {
- countmp = count;count = count2;
- count2 = count1;count1 = countmp;
- } else {
- count2 = count1; count1 = count; count = count1;
- }
- }
-
- /* lost tick compensation */
- delta = last_tsc_low - delta;
- {
- register unsigned long eax, edx;
- eax = delta;
- __asm__("mull %2"
- :"=a" (eax), "=d" (edx)
- :"rm" (fast_gettimeoffset_quotient),
- "0" (eax));
- delta = edx;
- }
- delta += delay_at_last_interrupt;
- lost = delta/(1000000/HZ);
- delay = delta%(1000000/HZ);
- if (lost >= 2) {
- jiffies_64 += lost-1;
-
- /* sanity check to ensure we're not always losing ticks */
- if (lost_count++ > 100) {
- printk(KERN_WARNING "Losing too many ticks!\n");
- printk(KERN_WARNING "TSC cannot be used as a timesource. \n");
- printk(KERN_WARNING "Possible reasons for this are:\n");
- printk(KERN_WARNING " You're running with Speedstep,\n");
- printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n");
- printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n");
- printk(KERN_WARNING "Falling back to a sane timesource now.\n");
-
- clock_fallback();
- }
- /* ... but give the TSC a fair chance */
- if (lost_count > 25)
- cpufreq_delayed_get();
- } else
- lost_count = 0;
- /* update the monotonic base value */
- this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
- monotonic_base += cycles_2_ns(this_offset - last_offset);
- write_sequnlock(&monotonic_lock);
-
- /* calculate delay_at_last_interrupt */
- count = ((LATCH-1) - count) * TICK_SIZE;
- delay_at_last_interrupt = (count + LATCH/2) / LATCH;
-
- /* catch corner case where tick rollover occured
- * between tsc and pit reads (as noted when
- * usec delta is > 90% # of usecs/tick)
- */
- if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
- jiffies_64++;
-}
-
-static int __init init_tsc(char* override)
-{
-
- /* check clock override */
- if (override[0] && strncmp(override,"tsc",3)) {
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled()) {
- printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
- } else
-#endif
- {
- return -ENODEV;
- }
- }
-
- /*
- * If we have APM enabled or the CPU clock speed is variable
- * (CPU stops clock on HLT or slows clock to save power)
- * then the TSC timestamps may diverge by up to 1 jiffy from
- * 'real time' but nothing will break.
- * The most frequent case is that the CPU is "woken" from a halt
- * state by the timer interrupt itself, so we get 0 error. In the
- * rare cases where a driver would "wake" the CPU and request a
- * timestamp, the maximum error is < 1 jiffy. But timestamps are
- * still perfectly ordered.
- * Note that the TSC counter will be reset if APM suspends
- * to disk; this won't break the kernel, though, 'cuz we're
- * smart. See arch/i386/kernel/apm.c.
- */
- /*
- * Firstly we have to do a CPU check for chips with
- * a potentially buggy TSC. At this point we haven't run
- * the ident/bugs checks so we must run this hook as it
- * may turn off the TSC flag.
- *
- * NOTE: this doesn't yet handle SMP 486 machines where only
- * some CPU's have a TSC. Thats never worked and nobody has
- * moaned if you have the only one in the world - you fix it!
- */
-
- count2 = LATCH; /* initialize counter for mark_offset_tsc() */
-
- if (cpu_has_tsc) {
- unsigned long tsc_quotient;
-#ifdef CONFIG_HPET_TIMER
- if (is_hpet_enabled() && hpet_use_timer) {
- unsigned long result, remain;
- printk("Using TSC for gettimeofday\n");
- tsc_quotient = calibrate_tsc_hpet(NULL);
- timer_tsc.mark_offset = &mark_offset_tsc_hpet;
- /*
- * Math to calculate hpet to usec multiplier
- * Look for the comments at get_offset_tsc_hpet()
- */
- ASM_DIV64_REG(result, remain, hpet_tick,
- 0, KERNEL_TICK_USEC);
- if (remain > (hpet_tick >> 1))
- result++; /* rounding the result */
-
- hpet_usec_quotient = result;
- } else
-#endif
- {
- tsc_quotient = calibrate_tsc();
- }
-
- if (tsc_quotient) {
- fast_gettimeoffset_quotient = tsc_quotient;
- use_tsc = 1;
- /*
- * We could be more selective here I suspect
- * and just enable this for the next intel chips ?
- */
- /* report CPU clock rate in Hz.
- * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
- * clock/second. Our precision is about 100 ppm.
- */
- { unsigned long eax=0, edx=1000;
- __asm__("divl %2"
- :"=a" (cpu_khz), "=d" (edx)
- :"r" (tsc_quotient),
- "0" (eax), "1" (edx));
- printk("Detected %u.%03u MHz processor.\n",
- cpu_khz / 1000, cpu_khz % 1000);
- }
- set_cyc2ns_scale(cpu_khz/1000);
- return 0;
- }
- }
- return -ENODEV;
-}
-
-
-
-/************************************************************/
-
-/* tsc timer_opts struct */
-static struct timer_opts timer_tsc = {
- .name = "tsc",
- .mark_offset = mark_offset_tsc,
- .get_offset = get_offset_tsc,
- .monotonic_clock = monotonic_clock_tsc,
- .delay = delay_tsc,
- .read_timer = read_timer_tsc,
- .resume = tsc_resume,
-};
-
-struct init_timer_opts __initdata timer_tsc_init = {
- .init = init_tsc,
- .opts = &timer_tsc,
-};


2005-09-30 00:53:14

by john stultz

[permalink] [raw]
Subject: [RFC][PATCH 11/11] generic timeofday i386 specific timesources

All,

This patch implements the time sources shared between i386 and x86-64
(acpi_pm, cyclone, hpet, pit, tsc and tsc-interp). The patch should
apply on top of the timeofday-arch-i386-part6 patch.

The patch should be fairly straight forward, only adding the new
timesources.

thanks
-john

Makefile | 7 ++
acpi_pm.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
cyclone.c | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++
hpet.c | 59 ++++++++++++++++++++++
i386_pit.c | 63 ++++++++++++++++++++++++
tsc-interp.c | 112 +++++++++++++++++++++++++++++++++++++++++++
tsc.c | 92 +++++++++++++++++++++++++++++++++++
7 files changed, 623 insertions(+)

linux-2.6.14-rc2_timeofday-timesources-i386_B6.patch
============================================
diff --git a/drivers/timesource/Makefile b/drivers/timesource/Makefile
--- a/drivers/timesource/Makefile
+++ b/drivers/timesource/Makefile
@@ -1 +1,8 @@
obj-y += jiffies.o
+
+obj-$(CONFIG_X86) += tsc.o
+obj-$(CONFIG_X86) += i386_pit.o
+#XXX doesn't boot! obj-$(CONFIG_X86) += tsc-interp.o
+obj-$(CONFIG_X86_CYCLONE_TIMER) += cyclone.o
+obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o
+obj-$(CONFIG_HPET_TIMER) += hpet.o
diff --git a/drivers/timesource/acpi_pm.c b/drivers/timesource/acpi_pm.c
new file mode 100644
--- /dev/null
+++ b/drivers/timesource/acpi_pm.c
@@ -0,0 +1,152 @@
+/*
+ * linux/drivers/timesource/acpi_pm.c
+ *
+ * This file contains the ACPI PM based time source.
+ *
+ * This code was largely moved from the i386 timer_pm.c file
+ * which was (C) Dominik Brodowski <[email protected]> 2003
+ * and contained the following comments:
+ *
+ * Driver to use the Power Management Timer (PMTMR) available in some
+ * southbridges as primary timing source for the Linux kernel.
+ *
+ * Based on parts of linux/drivers/acpi/hardware/hwtimer.c, timer_pit.c,
+ * timer_hpet.c, and on Arjan van de Ven's implementation for 2.4.
+ *
+ * This file is licensed under the GPL v2.
+ */
+
+
+#include <linux/timesource.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <asm/io.h>
+
+/* Number of PMTMR ticks expected during calibration run */
+#define PMTMR_TICKS_PER_SEC 3579545
+
+#if (defined(CONFIG_X86) && (!defined(CONFIG_X86_64)))
+#include "mach_timer.h"
+#define PMTMR_EXPECTED_RATE ((PMTMR_TICKS_PER_SEC*CALIBRATE_TIME_MSEC)/1000)
+#endif
+
+/* The I/O port the PMTMR resides at.
+ * The location is detected during setup_arch(),
+ * in arch/i386/acpi/boot.c */
+extern u32 acpi_pmtmr_ioport;
+extern int acpi_pmtmr_buggy;
+
+#define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */
+
+
+static inline u32 read_pmtmr(void)
+{
+ /* mask the output to 24 bits */
+ return inl(acpi_pmtmr_ioport) & ACPI_PM_MASK;
+}
+
+static cycle_t acpi_pm_read_verified(void)
+{
+ u32 v1=0,v2=0,v3=0;
+ /* It has been reported that because of various broken
+ * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
+ * source is not latched, so you must read it multiple
+ * times to insure a safe value is read.
+ */
+ do {
+ v1 = read_pmtmr();
+ v2 = read_pmtmr();
+ v3 = read_pmtmr();
+ } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
+ || (v3 > v1 && v3 < v2));
+
+ return (cycle_t)v2;
+}
+
+
+static cycle_t acpi_pm_read(void)
+{
+ return (cycle_t)read_pmtmr();
+}
+
+struct timesource_t timesource_acpi_pm = {
+ .name = "acpi_pm",
+ .priority = 200,
+ .type = TIMESOURCE_FUNCTION,
+ .read_fnct = acpi_pm_read,
+ .mask = (cycle_t)ACPI_PM_MASK,
+ .mult = 0, /*to be caluclated*/
+ .shift = 22,
+};
+
+#if (defined(CONFIG_X86) && (!defined(CONFIG_X86_64)))
+/*
+ * Some boards have the PMTMR running way too fast. We check
+ * the PMTMR rate against PIT channel 2 to catch these cases.
+ */
+static int __init verify_pmtmr_rate(void)
+{
+ u32 value1, value2;
+ unsigned long count, delta;
+
+ mach_prepare_counter();
+ value1 = read_pmtmr();
+ mach_countup(&count);
+ value2 = read_pmtmr();
+ delta = (value2 - value1) & ACPI_PM_MASK;
+
+ /* Check that the PMTMR delta is within 5% of what we expect */
+ if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 ||
+ delta > (PMTMR_EXPECTED_RATE * 21) / 20) {
+ printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE);
+ return -1;
+ }
+
+ return 0;
+}
+#else
+#define verify_pmtmr_rate() (0)
+#endif
+
+static int __init init_acpi_pm_timesource(void)
+{
+ u32 value1, value2;
+ unsigned int i;
+
+ if (!acpi_pmtmr_ioport)
+ return -ENODEV;
+
+ timesource_acpi_pm.mult = timesource_hz2mult(PMTMR_TICKS_PER_SEC,
+ timesource_acpi_pm.shift);
+
+ /* "verify" this timing source */
+ value1 = read_pmtmr();
+ for (i = 0; i < 10000; i++) {
+ value2 = read_pmtmr();
+ if (value2 == value1)
+ continue;
+ if (value2 > value1)
+ goto pm_good;
+ if ((value2 < value1) && ((value2) < 0xFFF))
+ goto pm_good;
+ printk(KERN_INFO "PM-Timer had inconsistent results: 0x%#x, 0x%#x - aborting.\n", value1, value2);
+ return -EINVAL;
+ }
+ printk(KERN_INFO "PM-Timer had no reasonable result: 0x%#x - aborting.\n", value1);
+ return -ENODEV;
+
+pm_good:
+ if (verify_pmtmr_rate() != 0)
+ return -ENODEV;
+
+ /* check to see if pmtmr is known buggy */
+ if (acpi_pmtmr_buggy) {
+ timesource_acpi_pm.read_fnct = acpi_pm_read_verified;
+ timesource_acpi_pm.priority = 110;
+ }
+
+ register_timesource(&timesource_acpi_pm);
+ return 0;
+}
+
+module_init(init_acpi_pm_timesource);
diff --git a/drivers/timesource/cyclone.c b/drivers/timesource/cyclone.c
new file mode 100644
--- /dev/null
+++ b/drivers/timesource/cyclone.c
@@ -0,0 +1,138 @@
+#include <linux/timesource.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/timex.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include "mach_timer.h"
+
+#define CYCLONE_CBAR_ADDR 0xFEB00CD0 /* base address ptr*/
+#define CYCLONE_PMCC_OFFSET 0x51A0 /* offset to control register */
+#define CYCLONE_MPCS_OFFSET 0x51A8 /* offset to select register */
+#define CYCLONE_MPMC_OFFSET 0x51D0 /* offset to count register */
+#define CYCLONE_TIMER_FREQ 100000000
+#define CYCLONE_TIMER_MASK (0xFFFFFFFF) /* 32 bit mask */
+
+int use_cyclone = 0;
+
+struct timesource_t timesource_cyclone = {
+ .name = "cyclone",
+ .priority = 250,
+ .type = TIMESOURCE_MMIO_32,
+ .mmio_ptr = NULL, /* to be set */
+ .mask = (cycle_t)CYCLONE_TIMER_MASK,
+ .mult = 10,
+ .shift = 0,
+};
+
+static unsigned long __init calibrate_cyclone(void)
+{
+ u64 delta64;
+ unsigned long start, end;
+ unsigned long i, count;
+ unsigned long cyclone_freq_khz;
+
+ /* repeat 3 times to make sure the cache is warm */
+ for(i=0; i < 3; i++) {
+ mach_prepare_counter();
+ start = readl(timesource_cyclone.mmio_ptr);
+ mach_countup(&count);
+ end = readl(timesource_cyclone.mmio_ptr);
+ }
+
+ delta64 = end - start;
+
+ delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */
+ do_div(delta64,CALIBRATE_TIME_MSEC);
+
+ cyclone_freq_khz = (unsigned long)delta64;
+
+ printk("calculated cyclone_freq: %lu khz\n", cyclone_freq_khz);
+ return cyclone_freq_khz;
+}
+
+static int __init init_cyclone_timesource(void)
+{
+ unsigned long base; /* saved value from CBAR */
+ unsigned long offset;
+ u32 __iomem* reg;
+ u32 __iomem* volatile cyclone_timer; /* Cyclone MPMC0 register */
+ unsigned long khz;
+ int i;
+
+ /*make sure we're on a summit box*/
+ if (!use_cyclone) return -ENODEV;
+
+ printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
+
+ /* find base address */
+ offset = CYCLONE_CBAR_ADDR;
+ reg = ioremap_nocache(offset, sizeof(reg));
+ if(!reg){
+ printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
+ return -ENODEV;
+ }
+ /* even on 64bit systems, this is only 32bits */
+ base = readl(reg);
+ if(!base){
+ printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
+ return -ENODEV;
+ }
+ iounmap(reg);
+
+ /* setup PMCC */
+ offset = base + CYCLONE_PMCC_OFFSET;
+ reg = ioremap_nocache(offset, sizeof(reg));
+ if(!reg){
+ printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
+ return -ENODEV;
+ }
+ writel(0x00000001,reg);
+ iounmap(reg);
+
+ /* setup MPCS */
+ offset = base + CYCLONE_MPCS_OFFSET;
+ reg = ioremap_nocache(offset, sizeof(reg));
+ if(!reg){
+ printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
+ return -ENODEV;
+ }
+ writel(0x00000001,reg);
+ iounmap(reg);
+
+ /* map in cyclone_timer */
+ offset = base + CYCLONE_MPMC_OFFSET;
+ cyclone_timer = ioremap_nocache(offset, sizeof(u64));
+ if(!cyclone_timer){
+ printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
+ return -ENODEV;
+ }
+
+ /*quick test to make sure its ticking*/
+ for(i=0; i<3; i++){
+ u32 old = readl(cyclone_timer);
+ int stall = 100;
+ while(stall--) barrier();
+ if(readl(cyclone_timer) == old){
+ printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
+ iounmap(cyclone_timer);
+ cyclone_timer = NULL;
+ return -ENODEV;
+ }
+ }
+ timesource_cyclone.mmio_ptr = cyclone_timer;
+
+ /* sort out mult/shift values */
+ khz = calibrate_cyclone();
+ timesource_cyclone.shift = 22;
+ timesource_cyclone.mult = timesource_khz2mult(khz,
+ timesource_cyclone.shift);
+
+ register_timesource(&timesource_cyclone);
+
+ return 0;
+}
+
+module_init(init_cyclone_timesource);
diff --git a/drivers/timesource/hpet.c b/drivers/timesource/hpet.c
new file mode 100644
--- /dev/null
+++ b/drivers/timesource/hpet.c
@@ -0,0 +1,59 @@
+#include <linux/timesource.h>
+#include <linux/hpet.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <asm/io.h>
+#include <asm/hpet.h>
+
+#define HPET_MASK (0xFFFFFFFF)
+#define HPET_SHIFT 22
+
+/* FSEC = 10^-15 NSEC = 10^-9 */
+#define FSEC_PER_NSEC 1000000
+
+struct timesource_t timesource_hpet = {
+ .name = "hpet",
+ .priority = 250,
+ .type = TIMESOURCE_MMIO_32,
+ .mmio_ptr = NULL,
+ .mask = (cycle_t)HPET_MASK,
+ .mult = 0, /* set below */
+ .shift = HPET_SHIFT,
+};
+
+static int __init init_hpet_timesource(void)
+{
+ unsigned long hpet_period;
+ void __iomem* hpet_base;
+ u64 tmp;
+
+ if (!hpet_address)
+ return -ENODEV;
+
+ /* calculate the hpet address */
+ hpet_base =
+ (void __iomem*)ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+ timesource_hpet.mmio_ptr = hpet_base + HPET_COUNTER;
+
+ /* calculate the frequency */
+ hpet_period = readl(hpet_base + HPET_PERIOD);
+
+
+ /* hpet period is in femto seconds per cycle
+ * so we need to convert this to ns/cyc units
+ * aproximated by mult/2^shift
+ *
+ * fsec/cyc * 1nsec/1000000fsec = nsec/cyc = mult/2^shift
+ * fsec/cyc * 1ns/1000000fsec * 2^shift = mult
+ * fsec/cyc * 2^shift * 1nsec/1000000fsec = mult
+ * (fsec/cyc << shift)/1000000 = mult
+ * (hpet_period << shift)/FSEC_PER_NSEC = mult
+ */
+ tmp = (u64)hpet_period << HPET_SHIFT;
+ do_div(tmp, FSEC_PER_NSEC);
+ timesource_hpet.mult = (u32)tmp;
+
+ register_timesource(&timesource_hpet);
+ return 0;
+}
+module_init(init_hpet_timesource);
diff --git a/drivers/timesource/i386_pit.c b/drivers/timesource/i386_pit.c
new file mode 100644
--- /dev/null
+++ b/drivers/timesource/i386_pit.c
@@ -0,0 +1,63 @@
+#include <linux/timesource.h>
+#include <linux/jiffies.h>
+#include <linux/init.h>
+/* pit timesource does not build on x86-64 */
+#ifndef CONFIG_X86_64
+#include <asm/io.h>
+#include <asm/i8253.h>
+#include "io_ports.h"
+
+/* Since the PIT overflows every tick, its not very useful
+ * to just read by itself. So use jiffies to emulate a free
+ * running counter.
+ */
+
+static cycle_t pit_read(void)
+{
+ unsigned long flags, seq;
+ int count;
+ u64 jifs;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+
+ spin_lock_irqsave(&i8253_lock, flags);
+
+ outb_p(0x00, PIT_MODE); /* latch the count ASAP */
+ count = inb_p(PIT_CH0); /* read the latched count */
+ count |= inb_p(PIT_CH0) << 8;
+
+ /* VIA686a test code... reset the latch if count > max + 1 */
+ if (count > LATCH) {
+ outb_p(0x34, PIT_MODE);
+ outb_p(LATCH & 0xff, PIT_CH0);
+ outb(LATCH >> 8, PIT_CH0);
+ count = LATCH - 1;
+ }
+ spin_unlock_irqrestore(&i8253_lock, flags);
+ jifs = get_jiffies_64() - INITIAL_JIFFIES;
+ } while (read_seqretry(&xtime_lock, seq));
+
+ count = (LATCH-1) - count;
+
+ return (cycle_t)(jifs * LATCH) + count;
+}
+
+static struct timesource_t timesource_pit = {
+ .name = "pit",
+ .priority = 110,
+ .type = TIMESOURCE_FUNCTION,
+ .read_fnct = pit_read,
+ .mask = (cycle_t)-1,
+ .mult = 0,
+ .shift = 20,
+};
+
+static int __init init_pit_timesource(void)
+{
+ timesource_pit.mult = timesource_hz2mult(CLOCK_TICK_RATE, 20);
+ register_timesource(&timesource_pit);
+ return 0;
+}
+module_init(init_pit_timesource);
+#endif
diff --git a/drivers/timesource/tsc-interp.c b/drivers/timesource/tsc-interp.c
new file mode 100644
--- /dev/null
+++ b/drivers/timesource/tsc-interp.c
@@ -0,0 +1,112 @@
+/* TSC-Jiffies Interpolation timesource
+ Example interpolation timesource.
+TODO:
+ o per-cpu TSC offsets
+*/
+#include <linux/timesource.h>
+#include <linux/timer.h>
+#include <linux/timex.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/threads.h>
+#include <linux/smp.h>
+
+static unsigned long current_tsc_khz = 0;
+
+static seqlock_t tsc_interp_lock = SEQLOCK_UNLOCKED;
+static unsigned long tsc_then;
+static unsigned long jiffies_then;
+struct timer_list tsc_interp_timer;
+
+static unsigned long mult, shift;
+
+#define NSEC_PER_JIFFY ((((unsigned long long)NSEC_PER_SEC)<<8)/ACTHZ)
+#define SHIFT_VAL 22
+
+static cycle_t read_tsc_interp(void);
+static void tsc_interp_update_callback(void);
+
+static struct timesource_t timesource_tsc_interp = {
+ .name = "tsc-interp",
+ .priority = 150,
+ .type = TIMESOURCE_FUNCTION,
+ .read_fnct = read_tsc_interp,
+ .mask = (cycle_t)((1ULL<<32)-1),
+ .mult = 1<<SHIFT_VAL,
+ .shift = SHIFT_VAL,
+ .update_callback = tsc_interp_update_callback,
+};
+
+static void tsc_interp_sync(unsigned long unused)
+{
+ cycle_t tsc_now;
+ unsigned long jiffies_now;
+
+ do {
+ jiffies_now = jiffies;
+ rdtscll(tsc_now);
+ } while (jiffies_now != jiffies);
+
+ write_seqlock(&tsc_interp_lock);
+ jiffies_then = jiffies_now;
+ tsc_then = tsc_now;
+ write_sequnlock(&tsc_interp_lock);
+
+ mod_timer(&tsc_interp_timer, jiffies+1);
+}
+
+
+static cycle_t read_tsc_interp(void)
+{
+ cycle_t ret;
+ cycle_t now, then;
+ unsigned long jiffs_now, jiffs_then;
+ unsigned long seq;
+
+ do {
+ seq = read_seqbegin(&tsc_interp_lock);
+
+ jiffs_now = jiffies;
+ jiffs_then = jiffies_then;
+ then = tsc_then;
+
+ } while (read_seqretry(&tsc_interp_lock, seq));
+
+ rdtscll(now);
+ ret = (cycle_t)jiffs_then * NSEC_PER_JIFFY;
+ if (jiffs_then == jiffs_now)
+ ret += min((cycle_t)NSEC_PER_JIFFY,(cycle_t)((now - then)*mult)>> shift);
+ else
+ ret += (cycle_t)(jiffs_now - jiffs_then)*NSEC_PER_JIFFY;
+
+ return ret;
+}
+
+static void tsc_interp_update_callback(void)
+{
+ /* only update if tsc_khz has changed */
+ if (current_tsc_khz != tsc_khz){
+ current_tsc_khz = tsc_khz;
+ mult = timesource_khz2mult(current_tsc_khz, shift);
+ }
+}
+
+
+static int __init init_tsc_interp_timesource(void)
+{
+ /* TSC initialization is done in arch/i386/kernel/tsc.c */
+ if (cpu_has_tsc && tsc_khz) {
+ current_tsc_khz = tsc_khz;
+ shift = SHIFT_VAL;
+ mult = timesource_khz2mult(current_tsc_khz, shift);
+ /* setup periodic soft-timer */
+ init_timer(&tsc_interp_timer);
+ tsc_interp_timer.function = tsc_interp_sync;
+ tsc_interp_timer.expires = jiffies;
+ add_timer(&tsc_interp_timer);
+
+ register_timesource(&timesource_tsc_interp);
+ }
+ return 0;
+}
+module_init(init_tsc_interp_timesource);
diff --git a/drivers/timesource/tsc.c b/drivers/timesource/tsc.c
new file mode 100644
--- /dev/null
+++ b/drivers/timesource/tsc.c
@@ -0,0 +1,92 @@
+/* TODO:
+ * o better calibration
+ */
+
+#include <linux/timesource.h>
+#include <linux/timex.h>
+#include <linux/init.h>
+
+#ifdef CONFIG_X86_64
+#define cpu_has_tsc (1)
+#endif
+
+static unsigned long current_tsc_khz = 0;
+
+static cycle_t read_safe_tsc(void);
+static int tsc_update_callback(void);
+
+static struct timesource_t timesource_safe_tsc = {
+ .name = "c3tsc",
+ .priority = 300,
+ .type = TIMESOURCE_FUNCTION,
+ .read_fnct = read_safe_tsc,
+ .mask = (cycle_t)-1,
+ .mult = 0, /* to be set */
+ .shift = 22,
+ .update_callback = tsc_update_callback,
+};
+
+static struct timesource_t timesource_raw_tsc = {
+ .name = "tsc",
+ .priority = 300,
+ .type = TIMESOURCE_CYCLES,
+ .mask = (cycle_t)-1,
+ .mult = 0, /* to be set */
+ .shift = 22,
+ .update_callback = tsc_update_callback,
+};
+
+static struct timesource_t timesource_tsc;
+
+static cycle_t read_safe_tsc(void)
+{
+ cycle_t ret;
+ rdtscll(ret);
+ return ret + tsc_read_c3_time();
+}
+
+static int tsc_update_callback(void)
+{
+ int change = 0;
+ /* check to see if we should switch to the safe timesource */
+ if (tsc_read_c3_time() &&
+ strncmp(timesource_tsc.name, "c3tsc", 5)) {
+ printk("Falling back to C3 safe TSC\n");
+ timesource_safe_tsc.mult = timesource_tsc.mult;
+ timesource_safe_tsc.priority = timesource_tsc.priority;
+ timesource_tsc = timesource_safe_tsc;
+ change = 1;
+ }
+
+ if (timesource_tsc.priority != 50 && check_tsc_unstable()) {
+ timesource_tsc.priority = 50;
+ reselect_timesource();
+ change = 1;
+ }
+ /* only update if tsc_khz has changed */
+ if (current_tsc_khz != tsc_khz){
+ current_tsc_khz = tsc_khz;
+ timesource_tsc.mult = timesource_khz2mult(current_tsc_khz,
+ timesource_tsc.shift);
+ change = 1;
+ }
+ return change;
+}
+
+static int __init init_tsc_timesource(void)
+{
+
+ timesource_tsc = timesource_raw_tsc;
+
+ /* TSC initialization is done in arch/i386/kernel/tsc.c */
+ if (cpu_has_tsc && tsc_khz) {
+ current_tsc_khz = tsc_khz;
+ timesource_tsc.mult = timesource_khz2mult(current_tsc_khz,
+ timesource_tsc.shift);
+ register_timesource(&timesource_tsc);
+ }
+ return 0;
+}
+
+module_init(init_tsc_timesource);
+