2007-02-01 11:29:15

by Jiri Bohac

[permalink] [raw]
Subject: [patch 7/9] Adapt the time initialization code

We need to:
- initialize the RDTSCP instruction if available
- decide which hardware timer to use as MT (PM or HPET)
- decide what level of TSC->MT approximation to use
- none (as slow as the hardware MT can get) -- "notsc" option
- strictly monotonic (can't be done in a vsyscall) -- default
- unguaranteed monotonicity (very fast, vsyscall) -- "nomonotonic" option

Signed-off-by: Jiri Bohac <[email protected]>

Index: linux-2.6.20-rc5/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux-2.6.20-rc5.orig/arch/x86_64/kernel/smpboot.c
+++ linux-2.6.20-rc5/arch/x86_64/kernel/smpboot.c
@@ -318,6 +318,8 @@ static inline void set_cpu_sibling_map(i
}
}

+extern void time_initialize_cpu(void);
+
/*
* Setup code on secondary processor (after comming out of the trampoline)
*/
@@ -345,6 +347,7 @@ void __cpuinit start_secondary(void)
enable_NMI_through_LVT0(NULL);
enable_8259A_irq(0);
}
+ time_initialize_cpu();

enable_APIC_timer();

@@ -963,6 +966,8 @@ int __cpuinit __cpu_up(unsigned int cpu)
return err;
}

+extern void time_init_gtod(void);
+
/*
* Finish the SMP boot.
*/
Index: linux-2.6.20-rc5/arch/x86_64/kernel/time.c
===================================================================
--- linux-2.6.20-rc5.orig/arch/x86_64/kernel/time.c
+++ linux-2.6.20-rc5/arch/x86_64/kernel/time.c
@@ -968,6 +968,16 @@ static struct irqaction irq0 = {
timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL
};

+static void time_init_rdtsc(void)
+{
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) {
+ int p;
+ p = smp_processor_id();
+ printk(KERN_INFO "Initializing RDTSCP feature on cpu %d.\n", p);
+ write_rdtscp_aux(p);
+ }
+}
+
void __init time_init(void)
{
if (nohpet)
@@ -979,27 +989,40 @@ void __init time_init(void)
set_normalized_timespec(&wall_to_monotonic,
-xtime.tv_sec, -xtime.tv_nsec);

- if (!hpet_init())
- vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period;
- else
- vxtime.hpet_address = 0;
+ /* decide what to use as Master Timer: HPET or PM? */
+ if (!hpet_init()) {
+ vxtime.mt_q = (hpet_period << 32) / FSEC_PER_NSEC;
+ mt_per_tick = hpet_use_timer ?
+ hpet_tick : LATCH * 12;
+ read_master_timer = read_master_timer_hpet;
+ timename = "HPET";
+ } else
+ if (pmtmr_ioport) {
+ vxtime.mt_q = (NSEC_PER_SEC << 32) / 916363636UL;
+ mt_per_tick = (LATCH * 3) << 8;
+ read_master_timer = read_master_timer_pm;
+ timename = "PM timer";
+ } else
+ panic("No suitable Master Timer found.\n");

- if (hpet_use_timer) {
- /* set tick_nsec to use the proper rate for HPET */
- tick_nsec = TICK_NSEC_HPET;
+ /* use PIT as main timer interrupt source if we can't use HPET */
+ if (hpet_use_timer)
cpu_khz = hpet_calibrate_tsc();
- timename = "HPET";
- } else {
+ else {
pit_init();
cpu_khz = pit_calibrate_tsc();
- timename = "PIT";
}

- vxtime.mode = VXTIME_TSC;
- vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
- vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
- vxtime.last_tsc = get_cycles_sync();
- set_cyc2ns_scale(cpu_khz);
+ /* initialize the master timer */
+ set_master_timer64(0);
+
+ /* initialize the timekeeping variables of the boot CPU */
+ vxtime.cpu[0].tsc_last = get_cycles_sync();
+ vxtime.cpu[0].mt_last = vxtime.cpu[0].mt_base = 0;
+ vxtime.cpu[0].tsc_slope = vxtime.cpu[0].tsc_slope_avg = (((USEC_PER_SEC << 32) / vxtime.mt_q) << TSC_SLOPE_SCALE) / cpu_khz;
+ time_init_rdtsc();
+
+ vxtime.mode = VXTIME_TSCS; /* not sure yet if CPUs have synced TSCs */
setup_irq(0, &irq0);

#ifndef CONFIG_SMP
@@ -1037,28 +1060,71 @@ __cpuinit int unsynchronized_tsc(void)
*/
void time_init_gtod(void)
{
- char *timetype;
-
- if (unsynchronized_tsc())
- notsc = 1;
+ char *tsc_method;

if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
vgetcpu_mode = VGETCPU_RDTSCP;
else
vgetcpu_mode = VGETCPU_LSL;

- timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC";
+ if (notsc) {
+ tsc_method = "nothing";
+ vxtime.mode = VXTIME_MT;
+ }
+ else if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) {
+ if (nomonotonic) {
+ tsc_method = "RDTSCP";
+ vxtime.mode = VXTIME_TSCP;
+ }
+ else {
+ tsc_method = "RDTSCM (syscall)";
+ vxtime.mode = VXTIME_TSCM;
+ }
+ }
+ else {
+ tsc_method = "RDTSC";
vxtime.mode = VXTIME_TSC;
+#ifdef CONFIG_SMP
+ if (unsynchronized_tsc()) {
+ if (nomonotonic) {
+ tsc_method = "RDTSC (syscall)";
+ vxtime.mode = VXTIME_TSCS;
+ }
+ else {
+ tsc_method = "RDTSCM (syscall)";
+ vxtime.mode = VXTIME_TSCM;
+ }
+ }
+#endif
+ }
+

- printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n",
- vxtime_hz / 1000000, vxtime_hz % 1000000, timename, timetype);
+ printk(KERN_INFO "time.c: Using %s as master timer, %s for time caching; vsyscall %s.\n",
+ timename, tsc_method, sysctl_vsyscall ? "enabled" : "disabled");
+ printk(KERN_INFO "time.c: Using %s as interrupt source.\n",
+ hpet_use_timer ? "HPET" : "PIT");
printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
- vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
- vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
- vxtime.last_tsc = get_cycles_sync();
+}

- set_cyc2ns_scale(cpu_khz);
+/*
+ * initialize the per_cpu timekeeping variables
+ * for non-boot CPUs
+ */
+void time_initialize_cpu(void *info)
+{
+ unsigned long flags;
+ int cpu = smp_processor_id();
+ u64 mt_now;
+ write_seqlock_irqsave(&xtime_lock, flags);
+ mt_now = get_master_timer64();
+ vxtime.cpu[cpu].tsc_last = get_cycles_sync();
+ vxtime.cpu[cpu].mt_last = mt_now;
+ vxtime.cpu[cpu].mt_base = mt_now;
+ vxtime.cpu[cpu].tsc_slope = vxtime.cpu[0].tsc_slope;
+ vxtime.cpu[cpu].tsc_slope_avg = vxtime.cpu[0].tsc_slope_avg;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+ time_init_rdtsc();
}

__setup("report_lost_ticks", time_setup);
@@ -1098,19 +1164,38 @@ static int timer_resume(struct sys_devic
sleep_length = 0;
ctime = sleep_start;
}
- if (vxtime.hpet_address)
+
+ write_seqlock_irqsave(&xtime_lock,flags);
+
+ /* reenable the Master Timer */
+ if (read_master_timer == read_master_timer_hpet || hpet_use_timer)
hpet_reenable();
+ /* reenable PIT if used as main timer interrupt source */
else
i8254_timer_resume();

sec = ctime + clock_cmos_diff;
- write_seqlock_irqsave(&xtime_lock,flags);
xtime.tv_sec = sec;
xtime.tv_nsec = 0;
- vxtime.last_tsc = get_cycles_sync();
- write_sequnlock_irqrestore(&xtime_lock,flags);
+
+ /* re-initialize the master timer */
+ add_master_timer64(sleep_length * mt_per_tick);
+ vxtime.mt_wall += sleep_length * mt_per_tick;
+
jiffies += sleep_length;
- monotonic_base += sleep_length * (NSEC_PER_SEC/HZ);
+
+ /* re-initialize the timekeeping variables of the boot CPU */
+ vxtime.cpu[0].tsc_last = get_cycles_sync();
+ vxtime.cpu[0].mt_last = vxtime.cpu[0].mt_base = get_master_timer64();
+ /* FIXME: what speed does the cpu really start at; I doubt cpu_khz is right at this point ??!!!
+ And what speed do the non-boot cpus start at? Their timekeeping variables will probably be set wrong
+ by copying from CPU 0 in time_initialize_cpu(); Not a great deal, as they will be synced somehow,
+ but not exactly nice -JB */
+ vxtime.cpu[0].tsc_slope = vxtime.cpu[0].tsc_slope_avg =
+ (((USEC_PER_SEC << 32) / vxtime.mt_q) << TSC_SLOPE_SCALE) / cpu_khz;
+
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
touch_softlockup_watchdog();
return 0;
}
@@ -1402,3 +1487,11 @@ int __init notsc_setup(char *s)
}

__setup("notsc", notsc_setup);
+
+int __init nomonotonic_setup(char *s)
+{
+ nomonotonic = 1;
+ return 1;
+}
+
+__setup("nomonotonic", nomonotonic_setup);
Index: linux-2.6.20-rc5/include/linux/time.h
===================================================================
--- linux-2.6.20-rc5.orig/include/linux/time.h
+++ linux-2.6.20-rc5/include/linux/time.h
@@ -31,6 +31,7 @@ struct timezone {
#define MSEC_PER_SEC 1000L
#define USEC_PER_MSEC 1000L
#define NSEC_PER_USEC 1000L
+#define FSEC_PER_NSEC 1000000L
#define NSEC_PER_MSEC 1000000L
#define USEC_PER_SEC 1000000L
#define NSEC_PER_SEC 1000000000L

--


2007-02-01 11:36:33

by Andi Kleen

[permalink] [raw]
Subject: Re: [patch 7/9] Adapt the time initialization code


> +extern void time_initialize_cpu(void);

Never put externs into .c files. Multiple occurrences.


> +void time_initialize_cpu(void *info)
> +{
> + unsigned long flags;
> + int cpu = smp_processor_id();

Are you sure this can never preempt?

> + /* FIXME: what speed does the cpu really start at; I doubt cpu_khz is right at this point ??!!!

It should be. It comes from measurements. Unless the CPU changes frequency
behind the kernel's back, but there is nothing that can be done then.

-Andio

2007-02-01 13:38:35

by Jiri Bohac

[permalink] [raw]
Subject: Re: [patch 7/9] Adapt the time initialization code

On Thu, Feb 01, 2007 at 12:26:33PM +0100, Andi Kleen wrote:
>
> > +extern void time_initialize_cpu(void);
>
> Never put externs into .c files. Multiple occurrences.

Ok, will fix it, sorry.

> > +void time_initialize_cpu(void *info)
> > +{
> > + unsigned long flags;
> > + int cpu = smp_processor_id();
>
> Are you sure this can never preempt?

yes, preemption is explicitly disabled in start_secondary() that
calls this function.

> > + /* FIXME: what speed does the cpu really start at; I doubt cpu_khz is right at this point ??!!!
>
> It should be. It comes from measurements. Unless the CPU changes frequency
> behind the kernel's back, but there is nothing that can be done then.

Well, I'm not sure. I think the global variable cpu_khz is wrong
in the first place. This should be per-cpu, because each CPU can
have a different frequency, right?

And cpu_khz is adjusted in time_cpufreq_notifier() when whichever
CPU's frequency changes. To me it seems that it's a leftover from
the times when all CPUs in a system ran at the same speed. I
think it should be killed. I just did not want to make too many
unrelated changes in one patchset.

It doesn't matter that much in this case that it probably is
wrong (as the comment explains)...


--
Jiri Bohac <[email protected]>
SUSE Labs, SUSE CZ