Hey all,
I've been working on improving performance of do_gettimeofday on
certain NUMA hardware (see my earlier tsc-disable posts for why the TSCs
cannot be used), and I wanted to run this by the list to make sure I'm
not doing anything too daft.
Mainly I'm concerned about my usage of bt_ioremap. Early in the boot
processes, I need to map in a couple of chipset registers and poke
values into them. However one of the registers needs to be kept mapped
for the life of the system (look for "cyclone_timer" below). I couldn't
find any documentation on whether memory mapped with bt_ioremap can be
held across mem_init(). It seems to "just work" but I wanted to find out
if I really should be re-mapping the register w/ ioremap once it becomes
available.
If you see any other issues with the patch, please let me know (I do
realize its a bit #ifdef happy, so go easy on that point).
Comments, suggestions and flames welcome
thanks
-john
PS: If you are so inclined/loony to actually try this patch, please note
that it depends on my tsc-disable_B2 patch to apply cleanly.
diff -Nru a/Documentation/Configure.help b/Documentation/Configure.help
--- a/Documentation/Configure.help Mon Jul 8 10:53:02 2002
+++ b/Documentation/Configure.help Mon Jul 8 10:53:02 2002
@@ -257,6 +257,11 @@
You will need a new lynxer.elf file to flash your firmware with - send
email to [email protected]
+IBM Enterprise X-Architecture support
+CONFIG_X86_IBMEXA
+ This option makes use of a performance counter on the Cyclone chipset
+ for calculating do_gettimeofday, greatly improving its performance.
+
IO-APIC support on uniprocessors
CONFIG_X86_UP_IOAPIC
An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an
diff -Nru a/arch/i386/config.in b/arch/i386/config.in
--- a/arch/i386/config.in Mon Jul 8 10:53:02 2002
+++ b/arch/i386/config.in Mon Jul 8 10:53:02 2002
@@ -201,6 +201,7 @@
bool 'Multi-node NUMA system support (Caution: Read help first)' CONFIG_X86_NUMA
if [ "$CONFIG_X86_NUMA" = "y" ]; then
bool 'Multiquad (IBM/Sequent) NUMAQ support' CONFIG_MULTIQUAD
+ bool 'IBM Enterprise X-Architecture support' CONFIG_X86_IBMEXA
fi
fi
diff -Nru a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
--- a/arch/i386/kernel/time.c Mon Jul 8 10:53:02 2002
+++ b/arch/i386/kernel/time.c Mon Jul 8 10:53:02 2002
@@ -253,6 +253,87 @@
static unsigned long (*do_gettimeoffset)(void) = do_slow_gettimeoffset;
+
+
+#ifdef CONFIG_X86_IBMEXA
+#define CYCLONE_CBAR_ADDR 0xFEB00CD0
+#define CYCLONE_PMCC_OFFSET 0x51A0
+#define CYCLONE_MPMC_OFFSET 0x51D0
+#define CYCLONE_MPCS_OFFSET 0x51A8
+#define CYCLONE_TIMER_FREQ 100000000
+static u32* cyclone_timer; /*Cyclone MPMC0 register*/
+static void init_cyclone_clock(void)
+{
+ u32* cyclone_cbar; /*Cyclone BaseAddr register*/
+ u32* cyclone_pmcc; /*Cyclone PMCC register*/
+ u32* cyclone_mpcs; /*Cyclone MPCS register*/
+ u32 base;
+
+ /*find base address*/
+ cyclone_cbar = bt_ioremap(CYCLONE_CBAR_ADDR, 4);
+ if(!cyclone_cbar) return;
+ base = *cyclone_cbar;
+ bt_iounmap(cyclone_cbar, 4);
+
+ /*setup PMCC*/
+ cyclone_pmcc = bt_ioremap(base + CYCLONE_PMCC_OFFSET, 8);
+ if(!cyclone_pmcc) return;
+ cyclone_pmcc[0] = 0x00000001;
+ bt_iounmap(cyclone_pmcc, 8);
+
+ /*setup MPCS*/
+ cyclone_mpcs = bt_ioremap(base + CYCLONE_MPCS_OFFSET, 8);
+ if(!cyclone_mpcs) return;
+ cyclone_mpcs[0] = 0x00000001;
+ bt_iounmap(cyclone_mpcs, 8);
+
+ /*map in cyclone_timer*/
+ cyclone_timer = bt_ioremap(base + CYCLONE_MPMC_OFFSET, 8);
+}
+
+static u32 last_cyclone_timer;
+
+static inline void mark_timeoffset_cyclone(void)
+{
+ int count;
+
+ /*quickly read the cyclone timer*/
+ if(cyclone_timer)
+ last_cyclone_timer = cyclone_timer[0];
+
+ /*calculate delay_at_last_interrupt*/
+ spin_lock(&i8253_lock);
+ outb_p(0x00, 0x43); /* latch the count ASAP */
+
+ count = inb_p(0x40); /* read the latched count */
+ count |= inb(0x40) << 8;
+ spin_unlock(&i8253_lock);
+
+ count = ((LATCH-1) - count) * TICK_SIZE;
+ delay_at_last_interrupt = (count + LATCH/2) / LATCH;
+}
+
+static unsigned long do_gettimeoffset_cyclone(void)
+{
+ u32 offset;
+
+ if(!cyclone_timer)
+ return delay_at_last_interrupt;
+
+ /* Read the cyclone timer */
+ offset = cyclone_timer[0];
+
+ /* .. relative to previous jiffy*/
+ offset = offset - last_cyclone_timer;
+
+ /*convert cyclone ticks to microseconds*/
+ offset = offset*(1000000/CYCLONE_TIMER_FREQ);
+
+ /* our adjusted time offset in microseconds */
+ return delay_at_last_interrupt + offset;
+}
+#endif /*CONFIG_X86_IBMEXA*/
+
#else
#define do_gettimeoffset() do_fast_gettimeoffset()
@@ -459,6 +540,7 @@
}
static int use_tsc;
+static int use_cyclone =1; /*XXX should be autodetected*/
/*
* This is the same as the above, except we _also_ save the current
@@ -506,6 +588,10 @@
count = ((LATCH-1) - count) * TICK_SIZE;
delay_at_last_interrupt = (count + LATCH/2) / LATCH;
}
+#ifdef CONFIG_X86_IBMEXA
+ if(use_cyclone)
+ mark_timeoffset_cyclone();
+#endif /*CONFIG_X86_IBMEXA*/
do_timer_interrupt(irq, NULL, regs);
@@ -695,6 +781,14 @@
}
}
}
+
+#ifdef CONFIG_X86_IBMEXA
+ if((!use_tsc) && use_cyclone){
+ printk("IBM EXA: Starting Cyclone Clock.\n");
+ do_gettimeoffset = do_gettimeoffset_cyclone;
+ init_cyclone_clock();
+ }
+#endif /*CONFIG_X86_IBMEXA*/
#ifdef CONFIG_VISWS
printk("Starting Cobalt Timer system clock\n");
On 08 Jul 2002 19:36:23 -0700, john stultz wrote:
> I've been working on improving performance of do_gettimeofday on
>certain NUMA hardware (see my earlier tsc-disable posts for why the TSCs
>cannot be used), and I wanted to run this by the list to make sure I'm
>not doing anything too daft.
>
>Mainly I'm concerned about my usage of bt_ioremap. Early in the boot
>processes, I need to map in a couple of chipset registers and poke
>values into them.
How early? If this is just to get gettimeofday() working, can't it
wait until the regular initcalls are done?
> However one of the registers needs to be kept mapped
>for the life of the system (look for "cyclone_timer" below). I couldn't
>find any documentation on whether memory mapped with bt_ioremap can be
>held across mem_init(). It seems to "just work" but I wanted to find out
>if I really should be re-mapping the register w/ ioremap once it becomes
>available.
If you look in include/asm-i386/fixmap.h you'll see that the BT ioremap
pages are temporary, and they do disappear once mem_init() [or whatever]
is done [pgtable.h sets VMALLOC_END to just below FIXADDR_START, which
protects the permanent fixmaps but intensionally not the temporary ones].
If it seemed to work for you then that's just luck, or perhaps it does
work in highmem kernels?
For things that must be mapped very early and continue to be mapped as
long as the kernel is up, you need a permanent fixmap. fixmap.h has a
number of permanent fixmaps defined #ifdef CONFIG_* and you can easily
add your own there, under #ifdef CONFIG_X86_IBMEXA.
/Mikael
On Tue, 2002-07-09 at 04:03, Mikael Pettersson wrote:
> On 08 Jul 2002 19:36:23 -0700, john stultz wrote:
> >Mainly I'm concerned about my usage of bt_ioremap. Early in the boot
> >processes, I need to map in a couple of chipset registers and poke
> >values into them.
>
> How early? If this is just to get gettimeofday() working, can't it
> wait until the regular initcalls are done?
I guess I might be able to pull it out and do the initialization
somewhere else, but really time_init() seems like the best spot.
> > However one of the registers needs to be kept mapped
> >for the life of the system (look for "cyclone_timer" below). I couldn't
> >find any documentation on whether memory mapped with bt_ioremap can be
> >held across mem_init(). It seems to "just work" but I wanted to find out
> >if I really should be re-mapping the register w/ ioremap once it becomes
> >available.
>
> If you look in include/asm-i386/fixmap.h you'll see that the BT ioremap
> pages are temporary, and they do disappear once mem_init() [or whatever]
> is done [pgtable.h sets VMALLOC_END to just below FIXADDR_START, which
> protects the permanent fixmaps but intensionally not the temporary ones].
> If it seemed to work for you then that's just luck, or perhaps it does
> work in highmem kernels?
>
> For things that must be mapped very early and continue to be mapped as
> long as the kernel is up, you need a permanent fixmap. fixmap.h has a
> number of permanent fixmaps defined #ifdef CONFIG_* and you can easily
> add your own there, under #ifdef CONFIG_X86_IBMEXA.
Thank you for the pointer. I gave it a whirl (see attached) and it seems
to work fine! If anyone notices that I'm still just horribly lucky and
I've done everything wrong, please let me know. :)
Thanks again for the feedback and suggestions!
-john
ps: Again, if anyone feels so inclined to try this patch, do note that
it depends on my tsc-disable patch to apply cleanly. Send me a note and
I'll fwd you my latest version.