2002-10-17 17:06:29

by Hiroshi Miura

[permalink] [raw]
Subject: NatSemi Geode improvement

Hello,

NatSemi Geode has a several feature to speed up,
but reset defalut value is set to slow side.

I make a patch to speed up Geode about 20-40%!!
the benchmark result is downloadable from http://www.da-cha.org/geode/geode_graph.sxc.
that is openoffice format.

I use this patch with 2.4.18, 2.4.19 in 4 month, I think it is stable enough.


diff -urB -x .config -x '*.[oasS]' -x '*.in' -x '*.rej' -x '*.orig' linux-2.5.43-orig/arch/i386/kernel/apm.c linux-2.5.43/arch/i386/kernel/apm.c
diff -urB -x .config -x '*.[oasS]' -x '*.in' -x '*.rej' -x '*.orig' linux-2.5.43-orig/arch/i386/kernel/cpu/cyrix.c linux-2.5.43/arch/i386/kernel/cpu/cyrix.c
--- linux-2.5.43-orig/arch/i386/kernel/cpu/cyrix.c 2002-10-12 13:21:34.000000000 +0900
+++ linux-2.5.43/arch/i386/kernel/cpu/cyrix.c 2002-10-14 21:34:57.000000000 +0900
@@ -1,3 +1,7 @@
+/*
+ * NSC Geode improvments
+ * Hiroshi Miura <[email protected]>, June, 2002.
+ */
#include <linux/init.h>
#include <linux/bitops.h>
#include <linux/delay.h>
@@ -105,6 +109,65 @@
}
}

+static void __init set_cx86_reorder(void)
+{
+ unsigned char ccr3;
+ unsigned long flags;
+
+ printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
+ local_irq_save(flags);
+ ccr3 = getCx86(CX86_CCR3);
+ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
+ /* Load/Store Serialize to mem access disable (=reorder it) */
+ setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80);
+#ifdef CONFIG_NOHIGHMEM
+ /* set load/store serialize from 1GB to 4GB */
+ ccr3 |= 0xe0;
+#endif
+ setCx86(CX86_CCR3, ccr3);
+ local_irq_restore(flags);
+}
+
+static void __init set_cx86_memwb(void)
+{
+ unsigned long cr0;
+ unsigned long flags;
+
+ printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
+ local_irq_save(flags);
+ /* CCR2 bit 2: unlock NW bit */
+ setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
+ /* set 'Not Write-through' */
+ cr0 = 0x20000000;
+ __asm__("movl %%cr0,%%eax\n\t"
+ "orl %0,%%eax\n\t"
+ "movl %%eax,%%cr0\n"
+ : : "r" (cr0)
+ :"ax");
+ /* CCR2 bit 2: lock NW bit and set WT1 */
+ setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
+ local_irq_restore(flags);
+}
+
+static void set_cx86_inc(void)
+{
+ unsigned char ccr3;
+ unsigned long flags;
+
+ printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n");
+ local_irq_save(flags);
+ ccr3 = getCx86(CX86_CCR3);
+ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
+ /* PCR1 -- Performance Control */
+ /* Incrementor on, whatever that is */
+ setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02);
+ /* PCR0 -- Performance Control */
+ /* Incrementor Margin 10 */
+ setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04);
+ setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
+ local_irq_restore(flags);
+}
+
static void __init init_cyrix(struct cpuinfo_x86 *c)
{
unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
@@ -170,7 +233,7 @@
c->coma_bug = 1;
break;

- case 4: /* MediaGX/GXm */
+ case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
#ifdef CONFIG_PCI
/* It isn't really a PCI quirk directly, but the cure is the
same. The MediaGX has deep magic SMM stuff that handles the
@@ -191,26 +250,37 @@

/* GXm supports extended cpuid levels 'ala' AMD */
if (c->cpuid_level == 2) {
- /* Enable Natsemi MMX extensions */
- setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1);
+ if (((0x54 >= dir1) && (dir1 >= 0x50))
+ || (dir1 >= 0x63)) { /* NSC Geode GXlv/GXm/GX1 */
+ /* I dont know about Cyrix GXm/GXi */
+ unsigned char ccr3, ccr4;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ ccr3 = getCx86(CX86_CCR3);
+ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
+ ccr4 = getCx86(CX86_CCR4);
+ ccr4 |= (0x20 /* fpu fast */
+ | 0x10 /* dte cache */
+ | 0x08); /* mem bypass */
+ setCx86(CX86_CCR4, ccr4);
+ setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
+ local_irq_restore(flags);
+
+ set_cx86_memwb(); /* L2 cache write-back mode */
+ set_cx86_reorder(); /* re-order mem write/read */
+ set_cx86_inc(); /* incrementor enable */
+ }
+ /* Enable cxMMX extensions (GX1 Datasheet 54) */
+ setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);

get_model_name(c); /* get CPU marketing name */
/*
* The 5510/5520 companion chips have a funky PIT
* that breaks the TSC synchronizing, so turn it off
*/
if (pci_find_device(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, NULL) ||
pci_find_device(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, NULL))
clear_bit(X86_FEATURE_TSC, c->x86_capability);
return;
}
else { /* MediaGX */
Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4';
p = Cx86_cb+2;
c->x86_model = (dir1 & 0x20) ? 1 : 2;
#ifndef CONFIG_CS5520
clear_bit(X86_FEATURE_TSC, c->x86_capability);
#endif
}
break;

diff -urB -x .config -x '*.[oasS]' -x '*.in' -x '*.rej' -x '*.orig' linux-2.5.43-orig/include/asm-i386/processor.h linux-2.5.43/include/asm-i386/processor.h
--- linux-2.5.43-orig/include/asm-i386/processor.h 2002-10-12 13:21:05.000000000 +0900
+++ linux-2.5.43/include/asm-i386/processor.h 2002-10-14 17:15:38.000000000 +0900
@@ -91,7 +91,7 @@

extern void identify_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
-extern void dodgy_tsc(void);
+extern int pit_latch_buggy;

/*
* EFLAGS bits
@@ -229,8 +229,11 @@
#define CX86_CCR5 0xe9
#define CX86_CCR6 0xea
#define CX86_CCR7 0xeb
+#define CX86_PCR0 0x20
+#define CX86_PCR1 0xf0
#define CX86_DIR0 0xfe
#define CX86_DIR1 0xff
+#define CX86_GCR 0xb8
#define CX86_ARR_BASE 0xc4
#define CX86_RCR_BASE 0xdc


2002-10-17 17:15:14

by Dave Jones

[permalink] [raw]
Subject: Re: NatSemi Geode improvement

On Fri, Oct 18, 2002 at 02:12:17AM +0900, Hiroshi Miura wrote:

> NatSemi Geode has a several feature to speed up,
> but reset defalut value is set to slow side.
>
> I make a patch to speed up Geode about 20-40%!!
> the benchmark result is downloadable from http://www.da-cha.org/geode/geode_graph.sxc.
> that is openoffice format.
>
> I use this patch with 2.4.18, 2.4.19 in 4 month, I think it is stable enough.

Previously these tweaks were done in userspace with the set6x86 utility.
Is there any reason that these need to be done in the kernel apart from
convenience ?

Dave

--
| Dave Jones. http://www.codemonkey.org.uk
| SuSE Labs

2002-10-18 02:23:24

by Hiroshi Miura

[permalink] [raw]
Subject: Re: NatSemi Geode improvement

In message "Re: NatSemi Geode improvement"
on 02/10/17, Dave Jones <[email protected]> writes:
> On Fri, Oct 18, 2002 at 02:12:17AM +0900, Hiroshi Miura wrote:
>
> > NatSemi Geode has a several feature to speed up,
> > but reset defalut value is set to slow side.
> >
> > I make a patch to speed up Geode about 20-40%!!
> > the benchmark result is downloadable from http://www.da-cha.org/geode/geode_graph.sxc.
> > that is openoffice format.
> >
> > I use this patch with 2.4.18, 2.4.19 in 4 month, I think it is stable enough.
>
> Previously these tweaks were done in userspace with the set6x86 utility.
> Is there any reason that these need to be done in the kernel apart from
> convenience ?

I try now using set6x86 to set these registers, then can do most of these
except for set_cx86_memwb().

To set the memory write-back, I need to set the CR0 which needs special previlleges.
set6x86 cannot set CR0.

the set_cx86_memwb() need to be done in the kernel
the others has no reason to do that.
it is ok?

the follows are the set_cx86_memwb() code.

diff -urB -x .config -x '*.[oasS]' -x '*.in' -x '*.rej' -x '*.orig' linux-2.5.43-orig/arch/i386/kernel/apm.c linux-2.5.43/arch/i386/kernel/apm.c
diff -urB -x .config -x '*.[oasS]' -x '*.in' -x '*.rej' -x '*.orig' linux-2.5.43-orig/arch/i386/kernel/cpu/cyrix.c linux-2.5.43/arch/i386/kernel/cpu/cyrix.c
--- linux-2.5.43-orig/arch/i386/kernel/cpu/cyrix.c 2002-10-12 13:21:34.000000000 +0900
+++ linux-2.5.43/arch/i386/kernel/cpu/cyrix.c 2002-10-14 21:34:57.000000000 +0900
@@ -1,3 +1,7 @@
+/*
+ * NSC Geode improvments
+ * Hiroshi Miura <[email protected]>, June, 2002.
+ */
#include <linux/init.h>
#include <linux/bitops.h>
#include <linux/delay.h>
@@ -105,6 +109,27 @@
}
}

+static void __init set_cx86_memwb(void)
+{
+ unsigned long cr0;
+ unsigned long flags;
+
+ printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
+ local_irq_save(flags);
+ /* CCR2 bit 2: unlock NW bit */
+ setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
+ /* set 'Not Write-through' */
+ cr0 = 0x20000000;
+ __asm__("movl %%cr0,%%eax\n\t"
+ "orl %0,%%eax\n\t"
+ "movl %%eax,%%cr0\n"
+ : : "r" (cr0)
+ :"ax");
+ /* CCR2 bit 2: lock NW bit and set WT1 */
+ setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
+ local_irq_restore(flags);
+}
+
static void __init init_cyrix(struct cpuinfo_x86 *c)
{
unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
@@ -170,7 +233,7 @@
c->coma_bug = 1;
break;

- case 4: /* MediaGX/GXm */
+ case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
#ifdef CONFIG_PCI
/* It isn't really a PCI quirk directly, but the cure is the
same. The MediaGX has deep magic SMM stuff that handles the
@@ -191,26 +250,30 @@

/* GXm supports extended cpuid levels 'ala' AMD */
if (c->cpuid_level == 2) {
- /* Enable Natsemi MMX extensions */
- setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1);
+ if (((0x54 >= dir1) && (dir1 >= 0x50))
+ || (dir1 >= 0x63)) { /* NSC Geode GXlv/GXm/GX1 */
+ set_cx86_memwb(); /* L2 cache write-back mode */
+ }
+ /* Enable cxMMX extensions (GX1 Datasheet 54) */
+ setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);

get_model_name(c); /* get CPU marketing name */
/*
* The 5510/5520 companion chips have a funky PIT
* that breaks the TSC synchronizing, so turn it off
*/
if (pci_find_device(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, NULL) ||
pci_find_device(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, NULL))
clear_bit(X86_FEATURE_TSC, c->x86_capability);
return;
}
else { /* MediaGX */
Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4';
p = Cx86_cb+2;
c->x86_model = (dir1 & 0x20) ? 1 : 2;
#ifndef CONFIG_CS5520
clear_bit(X86_FEATURE_TSC, c->x86_capability);
#endif
}
break;

--
Hiroshi Miura --- http://www.da-cha.org/
NTTDATA Corp. Marketing & Business Strategy Planning Dept. --- [email protected]
Key fingerprint = 9117 9407 5684 FBF1 4063 15B4 401D D077 04AB 8617
-- My hacking life is happy as the day is long

2002-10-18 12:04:15

by Dave Jones

[permalink] [raw]
Subject: Re: NatSemi Geode improvement

On Fri, Oct 18, 2002 at 11:29:01AM +0900, Hiroshi Miura wrote:
> I try now using set6x86 to set these registers, then can do most of these
> except for set_cx86_memwb().
>
> To set the memory write-back, I need to set the CR0 which needs special previlleges.
> set6x86 cannot set CR0.
>
> the set_cx86_memwb() need to be done in the kernel
> the others has no reason to do that.
> it is ok?

It's all __init anyway, so it's ok I guess.
The added bloat for non-cyrix users is in the region of a few bytes...

My initial idea for this sort of thing was going to be to dump it
all in the early-userspace thing that Al Viro was hacking up.
Al, anything appearing in a last minute merge over the next
few days ?

Dave

--
| Dave Jones. http://www.codemonkey.org.uk
| SuSE Labs

2002-10-18 17:49:45

by H. Peter Anvin

[permalink] [raw]
Subject: Re: NatSemi Geode improvement

Dave Jones wrote:
> On Fri, Oct 18, 2002 at 11:29:01AM +0900, Hiroshi Miura wrote:
> > I try now using set6x86 to set these registers, then can do most of these
> > except for set_cx86_memwb().
> >
> > To set the memory write-back, I need to set the CR0 which needs special previlleges.
> > set6x86 cannot set CR0.
> >
> > the set_cx86_memwb() need to be done in the kernel
> > the others has no reason to do that.
> > it is ok?
>
> It's all __init anyway, so it's ok I guess.
> The added bloat for non-cyrix users is in the region of a few bytes...
>
> My initial idea for this sort of thing was going to be to dump it
> all in the early-userspace thing that Al Viro was hacking up.
> Al, anything appearing in a last minute merge over the next
> few days ?
>

Al has passed off the initramfs patch, and I will start integration of
klibc into the kernel build tree next week.

(Quite a lot of work has been done on klibc in isolation; it just hasn't
been part of the kernel build tree. If that means it ends up being
built separately it would be unfortunate but no disaster.)

-hpa


2002-10-19 16:17:25

by Bernhard Wesely

[permalink] [raw]
Subject: Re: NatSemi Geode improvement

Dave Jones wrote:
> On Fri, Oct 18, 2002 at 02:12:17AM +0900, Hiroshi Miura wrote:
>
> > NatSemi Geode has a several feature to speed up,
> > but reset defalut value is set to slow side.
> >
> > I make a patch to speed up Geode about 20-40%!!
> > the benchmark result is downloadable from http://www.da-cha.org/geode/geode_graph.sxc.
> > that is openoffice format.
> >
> > I use this patch with 2.4.18, 2.4.19 in 4 month, I think it is stable enough.
>
> Previously these tweaks were done in userspace with the set6x86 utility.
> Is there any reason that these need to be done in the kernel apart from
> convenience ?
>

Maybe not, but as this patch alters the initialization of the CPU to
(seems to me) more useful values, it should be integrated into the
Kernel. Not everyone knows of "set6x86".

Just my thoughts.

> Dave
>

Bernie

2002-10-21 13:46:36

by Alan

[permalink] [raw]
Subject: Re: NatSemi Geode improvement

> + printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
> + local_irq_save(flags);
> + ccr3 = getCx86(CX86_CCR3);
> + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
> + /* Load/Store Serialize to mem access disable (=reorder it) */
> + setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80);
> +#ifdef CONFIG_NOHIGHMEM
> + /* set load/store serialize from 1GB to 4GB */
> + ccr3 |= 0xe0;
> +#endif
> + setCx86(CX86_CCR3, ccr3);

I dont think this is safe. You now need store fences on bus mastering
DMA. You should be able to reuse the IDT winchip code for that - I set
the winchip up for weak store ordering too, and its a big win (I also
saw about 30% on block copies)

Alan

2002-10-22 00:36:52

by Hiroshi Miura

[permalink] [raw]
Subject: Re: NatSemi Geode improvement

In message "Re: NatSemi Geode improvement"
on 02/10/21, Alan Cox <[email protected]> writes:
> > + printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n");
> > + local_irq_save(flags);
> > + ccr3 = getCx86(CX86_CCR3);
> > + setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
> > + /* Load/Store Serialize to mem access disable (=reorder it) */
> > + setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80);
> > +#ifdef CONFIG_NOHIGHMEM
> > + /* set load/store serialize from 1GB to 4GB */
> > + ccr3 |= 0xe0;
> > +#endif
> > + setCx86(CX86_CCR3, ccr3);
>
> I dont think this is safe. You now need store fences on bus mastering
> DMA. You should be able to reuse the IDT winchip code for that - I set
> the winchip up for weak store ordering too, and its a big win (I also
> saw about 30% on block copies)

Winchip C6 MCR is like to intel MTRR and Cyrix 6x86MX ARR.
Geode has NO similar registers and only has serialize flags to 1-2GB,2-3GB,3-4GB
which are the CCR3's MSB bits. Geode is always serialize the 640KB-1MB area.
The read/write reordering and the read bypassing are handled by geode MMU.

It means that mmio must map to over 1GB area or disable this feature.

for example, on my geode machine,

$ cat /proc/iomem
00000000-0009fbff : System RAM
0009fc00-0009ffff : reserved
000a0000-000bffff : Video RAM area
000c0000-000c7fff : Video ROM
000f0000-000fffff : System ROM
00100000-09d7ffff : System RAM
00100000-001e7ce2 : Kernel code
001e7ce3-0022a857 : Kernel data
10000000-10000fff : Ricoh Co Ltd RL5c475
10000000-10000fff : i82365
40010000-40010fff : Cyrix Corporation 5520 [Cognac]
60000000-60000fff : card services
e0000000-e0000fff : Compaq Computer Corporation ZFMicro Chipset USB
e0000000-e0000fff : usb-ohci
ffff0000-ffffffff : reserved
(this sample is on linux-2.4.19-pre10-ac2)

with this, this patch may safe in this condition.

anyway, this reorder setting can be done by 'set6x86' tool,
so user can decide in user space whether use the mem re-ordering or not.


Hiroshi Miura --- http://www.da-cha.org/
NTTDATA Corp. Marketing & Business Strategy Planning Dept. --- [email protected]
Key fingerprint = 9117 9407 5684 FBF1 4063 15B4 401D D077 04AB 8617
-- My hacking life is happy as the day is long



2002-10-22 09:38:44

by Alan

[permalink] [raw]
Subject: Re: NatSemi Geode improvement

On Tue, 2002-10-22 at 01:12, Hiroshi Miura wrote:
> It means that mmio must map to over 1GB area or disable this feature.

You need to think about bus mastering devices as well. With re-ordering
enabled you may confuse bus master hardware by writing fields in the
wrong order (as the PCI device sees it).

This is not a big problem. On the winchip we avoid this by using locked
operations at the end of each of the PCI DMA mapping functions. I think
all that is needed is to also define CONFIG_X86_OOSTORE for a Geode
target. The kernel will then generate

lock; addl $0, 0(%%esp)

to force write ordering where it might be essential, and if OOSTORE is
defined we can safely turn on the speed up.






2002-10-22 20:52:37

by Alan

[permalink] [raw]
Subject: Re: NatSemi Geode improvement

On Fri, 2002-10-18 at 03:29, Hiroshi Miura wrote:
> I try now using set6x86 to set these registers, then can do most of these
> except for set_cx86_memwb().

I think the original patch was the best, setting sensible defaults - as
we can make that code __init so it is free. I merged the other fixes and
made the GEODE CPU choice set OOSTORE. That means what is needed now is
to drop in the code to set the CPU to writeback, and also the code to
check that the mmio space is actually beyond 1Gbyte

2002-10-26 07:37:46

by Hiroshi Miura

[permalink] [raw]
Subject: Re: NatSemi Geode improvement

Hi,

I use CASIO CASSIOPEIA FIVA 101s and 103s, please see http://www.da-cha.org/fiva/fiva.html
for detail. I also post these patches to fiva users ml(japanese only) and
the several ten users use this.
I have not recieved the report that it doesn't run or it is crashd.

Its BIOS is based on award modular bios v.4.51PGM.

Do you have any trouble with this code?

In message "Re: NatSemi Geode improvement"
on 02/10/25, James Finnie <[email protected]> writes:
> Hi,
>
> What Geode platform is your testing on? Any what BIOS is it running??
>
> Thanks in advance,
> Kind regards,
>
> James Finnie

--
Hiroshi Miura --- http://www.da-cha.org/
NTTDATA Corp. Marketing & Business Strategy Planning Dept. --- [email protected]
Key fingerprint = 9117 9407 5684 FBF1 4063 15B4 401D D077 04AB 8617
-- My hacking life is happy as the day is long