Both the APM BIOS and PnP BIOS code use a segment hack to simulate real
mode selector 0x40 (which points to the BIOS data area at 0x00400 in
real mode). Several broken BIOSen use selector 0x40 as if they were
running in real mode, which we make work by faking up selector 0x40 in
the GDT to point to physical memory starting at 0x400. We limit the
access to the remainder of this physical page using a byte granular
limit. Rather than have this tricky code in multiple places, it makes
sense to define it in one place, and the GDT makes a very convenient
place for it. Use GDT entry 4 as the BAD_BIOS_CACHE segment.
Signed-off-by: Zachary Amsden <[email protected]>
Index: linux-2.6.14-zach-work/include/asm-i386/segment.h
===================================================================
--- linux-2.6.14-zach-work.orig/include/asm-i386/segment.h 2005-11-04 15:46:10.000000000 -0800
+++ linux-2.6.14-zach-work/include/asm-i386/segment.h 2005-11-05 00:28:05.000000000 -0800
@@ -91,6 +91,15 @@
#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1)
#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8)
+/*
+ * Turns out the BIOS data area at 0x400 is commonly accessed from broken
+ * BIOS using real mode selector 0x40. We cached the bogus BIOS segment
+ * descriptor in a spare entry and fix it up at boot.
+ */
+#define GDT_ENTRY_BAD_BIOS_CACHE 4
+#define GDT_ENTRY_BAD_BIOS (0x40 >> 3)
+#define BAD_BIOS_AREA (0x400)
+
/* The PnP BIOS entries in the GDT */
#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0)
#define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1)
Index: linux-2.6.14-zach-work/arch/i386/kernel/head.S
===================================================================
--- linux-2.6.14-zach-work.orig/arch/i386/kernel/head.S 2005-11-04 15:46:50.000000000 -0800
+++ linux-2.6.14-zach-work/arch/i386/kernel/head.S 2005-11-05 00:28:12.000000000 -0800
@@ -487,7 +487,7 @@ ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* 0x0b reserved */
.quad 0x0000000000000000 /* 0x13 reserved */
.quad 0x0000000000000000 /* 0x1b reserved */
- .quad 0x0000000000000000 /* 0x20 unused */
+ .quad 0x0040920004000bff /* 0x20 bad bios 3072 bytes at 0x400 */
.quad 0x0000000000000000 /* 0x28 unused */
.quad 0x0000000000000000 /* 0x33 TLS entry 1 */
.quad 0x0000000000000000 /* 0x3b TLS entry 2 */
Index: linux-2.6.14-zach-work/arch/i386/kernel/apm.c
===================================================================
--- linux-2.6.14-zach-work.orig/arch/i386/kernel/apm.c 2005-11-04 15:46:50.000000000 -0800
+++ linux-2.6.14-zach-work/arch/i386/kernel/apm.c 2005-11-05 00:28:11.000000000 -0800
@@ -414,7 +414,6 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitq
static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
static struct apm_user * user_list;
static DEFINE_SPINLOCK(user_list_lock);
-static struct desc_struct bad_bios_desc = { 0, 0x00409200 };
static char driver_version[] = "1.16ac"; /* no spaces */
@@ -593,7 +592,7 @@ static u8 apm_bios_call(u32 func, u32 eb
cpu = get_cpu();
gdt = get_cpu_gdt_table(cpu);
save_desc_40 = gdt[0x40 / 8];
- gdt[0x40 / 8] = bad_bios_desc;
+ gdt[0x40 / 8] = gdt[GDT_ENTRY_BAD_BIOS_CACHE];
local_save_flags(flags);
APM_DO_CLI;
@@ -637,7 +636,7 @@ static u8 apm_bios_call_simple(u32 func,
cpu = get_cpu();
gdt = get_cpu_gdt_table(cpu);
save_desc_40 = gdt[0x40 / 8];
- gdt[0x40 / 8] = bad_bios_desc;
+ gdt[0x40 / 8] = gdt[GDT_ENTRY_BAD_BIOS_CACHE];
local_save_flags(flags);
APM_DO_CLI;
@@ -2275,15 +2274,6 @@ static int __init apm_init(void)
pm_active = 1;
/*
- * Set up a segment that references the real mode segment 0x40
- * that extends up to the end of page zero (that we have reserved).
- * This is for buggy BIOS's that refer to (real mode) segment 0x40
- * even though they are called in protected mode.
- */
- set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
- _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
-
- /*
* Set up the long jump entry point to the APM BIOS, which is called
* from inline assembly.
*/
Index: linux-2.6.14-zach-work/arch/i386/kernel/cpu/common.c
===================================================================
--- linux-2.6.14-zach-work.orig/arch/i386/kernel/cpu/common.c 2005-11-04 15:46:10.000000000 -0800
+++ linux-2.6.14-zach-work/arch/i386/kernel/cpu/common.c 2005-11-05 00:28:09.000000000 -0800
@@ -596,6 +596,15 @@ void __devinit cpu_init(void)
* and set up the GDT descriptor:
*/
memcpy(gdt, cpu_gdt_table, GDT_SIZE);
+
+ /*
+ * Set up a segment that references the real mode segment 0x40
+ * that extends up to the end of page zero (that we have reserved).
+ * This is for buggy BIOS's that refer to (real mode) segment 0x40
+ * even though they are called in protected mode. The limit is
+ * preset, we hardwire the base here.
+ */
+ set_base(gdt[GDT_ENTRY_BAD_BIOS_CACHE], __va(BAD_BIOS_AREA));
/* Set up GDT entry for 16bit stack */
*(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |=
Index: linux-2.6.14-zach-work/drivers/pnp/pnpbios/bioscalls.c
===================================================================
--- linux-2.6.14-zach-work.orig/drivers/pnp/pnpbios/bioscalls.c 2005-11-04 15:46:10.000000000 -0800
+++ linux-2.6.14-zach-work/drivers/pnp/pnpbios/bioscalls.c 2005-11-05 00:28:12.000000000 -0800
@@ -70,8 +70,6 @@ set_base(per_cpu(cpu_gdt_table,cpu)[(sel
set_limit(per_cpu(cpu_gdt_table,cpu)[(selname) >> 3], size); \
} while(0)
-static struct desc_struct bad_bios_desc = { 0, 0x00409200 };
-
/*
* At some point we want to use this stack frame pointer to unwind
* after PnP BIOS oopses.
@@ -107,7 +105,8 @@ static inline u16 call_pnp_bios(u16 func
cpu = get_cpu();
save_desc_40 = per_cpu(cpu_gdt_table,cpu)[0x40 / 8];
- per_cpu(cpu_gdt_table,cpu)[0x40 / 8] = bad_bios_desc;
+ per_cpu(cpu_gdt_table,cpu)[0x40 / 8] =
+ per_cpu(cpu_gdt_table,cpu)[GDT_ENTRY_BAD_BIOS_CACHE];
/* On some boxes IRQ's during PnP BIOS calls are deadly. */
spin_lock_irqsave(&pnp_bios_lock, flags);
@@ -524,8 +523,6 @@ void pnpbios_calls_init(union pnp_bios_i
pnp_bios_callpoint.offset = header->fields.pm16offset;
pnp_bios_callpoint.segment = PNP_CS16;
- set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
- _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
for(i=0; i < NR_CPUS; i++)
{
Q2_SET_SEL(i, PNP_CS32, &pnp_bios_callfunc, 64 * 1024);
* Zachary Amsden <[email protected]> wrote:
> gdt = get_cpu_gdt_table(cpu);
> save_desc_40 = gdt[0x40 / 8];
> - gdt[0x40 / 8] = bad_bios_desc;
> + gdt[0x40 / 8] = gdt[GDT_ENTRY_BAD_BIOS_CACHE];
>
i like the cleanup, but wouldnt it be simpler to dedicate GDT entry #8
to the 0x40 descriptor, and hence be compatible with such broken BIOSes
by default? Right now entry #8 is taken up by TLS segment #2, but we
could change GDT_ENTRY_TLS_MIN from 6 to 9 and push the TLS segments to
entries 9,10,11. [ Could there be any buggy SMM code that relies on
having something at 0x40? ]
Ingo
Ingo Molnar wrote:
>* Zachary Amsden <[email protected]> wrote:
>
>
>
>> gdt = get_cpu_gdt_table(cpu);
>> save_desc_40 = gdt[0x40 / 8];
>>- gdt[0x40 / 8] = bad_bios_desc;
>>+ gdt[0x40 / 8] = gdt[GDT_ENTRY_BAD_BIOS_CACHE];
>>
>>
>>
>
>i like the cleanup, but wouldnt it be simpler to dedicate GDT entry #8
>to the 0x40 descriptor, and hence be compatible with such broken BIOSes
>by default? Right now entry #8 is taken up by TLS segment #2, but we
>could change GDT_ENTRY_TLS_MIN from 6 to 9 and push the TLS segments to
>entries 9,10,11. [ Could there be any buggy SMM code that relies on
>having something at 0x40? ]
>
>
I worry that there could be buggy userspace code that relies on having
selector 0x40 - notably Wine. So although I would like to make 0x40 the
default, can't be guaranteed.
SMM code is safe, since it gets SMRAM mapped in on entry and has
descriptor saved state so it can mess with tables as it sees fit without
worrying about restoring anything.
Zach
* Zachary Amsden <[email protected]> wrote:
> >>- gdt[0x40 / 8] = bad_bios_desc;
> >>+ gdt[0x40 / 8] = gdt[GDT_ENTRY_BAD_BIOS_CACHE];
> >
> >i like the cleanup, but wouldnt it be simpler to dedicate GDT entry #8
> >to the 0x40 descriptor, and hence be compatible with such broken BIOSes
> >by default? Right now entry #8 is taken up by TLS segment #2, but we
> >could change GDT_ENTRY_TLS_MIN from 6 to 9 and push the TLS segments to
> >entries 9,10,11. [ Could there be any buggy SMM code that relies on
> >having something at 0x40? ]
>
> I worry that there could be buggy userspace code that relies on having
> selector 0x40 - notably Wine. So although I would like to make 0x40
> the default, can't be guaranteed.
why use up a GDT entry then for GDT_ENTRY_BAD_BIOS_CACHE? Just put it
into a global variable, into the readmostly section.
Ingo
On Mon, 7 Nov 2005, Zachary Amsden wrote:
>
> Both the APM BIOS and PnP BIOS code use a segment hack to simulate real
> mode selector 0x40 (which points to the BIOS data area at 0x00400 in
> real mode). Several broken BIOSen use selector 0x40 as if they were
> running in real mode, which we make work by faking up selector 0x40 in
> the GDT to point to physical memory starting at 0x400. We limit the
> access to the remainder of this physical page using a byte granular
> limit. Rather than have this tricky code in multiple places, it makes
> sense to define it in one place, and the GDT makes a very convenient
> place for it. Use GDT entry 4 as the BAD_BIOS_CACHE segment.
I'd much rather use entry 8 instead, which should just automatically mean
that selector 0x40 _always_ points to virtual address 0x400. No switching
etc..
Isn't this what Wine already has to work around, or something?
Ingo, can we move the TLS selectors upwards, or does user space perhaps
know about the current TLS layout? Wine in particular may well know ;(
Linus
Linus Torvalds wrote:
>
> On Mon, 7 Nov 2005, Zachary Amsden wrote:
>
>>Both the APM BIOS and PnP BIOS code use a segment hack to simulate real
>>mode selector 0x40 (which points to the BIOS data area at 0x00400 in
>>real mode). Several broken BIOSen use selector 0x40 as if they were
>>running in real mode, which we make work by faking up selector 0x40 in
>>the GDT to point to physical memory starting at 0x400. We limit the
>>access to the remainder of this physical page using a byte granular
>>limit. Rather than have this tricky code in multiple places, it makes
>>sense to define it in one place, and the GDT makes a very convenient
>>place for it. Use GDT entry 4 as the BAD_BIOS_CACHE segment.
>
> I'd much rather use entry 8 instead, which should just automatically mean
> that selector 0x40 _always_ points to virtual address 0x400. No switching
> etc..
>
> Isn't this what Wine already has to work around, or something?
>
> Ingo, can we move the TLS selectors upwards, or does user space perhaps
> know about the current TLS layout? Wine in particular may well know ;(
>
I thought the point was that Wine needs to point GDT entry 8 into userspace.
-hpa
Linus Torvalds wrote:
>On Mon, 7 Nov 2005, Zachary Amsden wrote:
>
>
>>Both the APM BIOS and PnP BIOS code use a segment hack to simulate real
>>mode selector 0x40 (which points to the BIOS data area at 0x00400 in
>>real mode). Several broken BIOSen use selector 0x40 as if they were
>>running in real mode, which we make work by faking up selector 0x40 in
>>the GDT to point to physical memory starting at 0x400. We limit the
>>access to the remainder of this physical page using a byte granular
>>limit. Rather than have this tricky code in multiple places, it makes
>>sense to define it in one place, and the GDT makes a very convenient
>>place for it. Use GDT entry 4 as the BAD_BIOS_CACHE segment.
>>
>>
>
>I'd much rather use entry 8 instead, which should just automatically mean
>that selector 0x40 _always_ points to virtual address 0x400. No switching
>etc..
>
>Isn't this what Wine already has to work around, or something?
>
>Ingo, can we move the TLS selectors upwards, or does user space perhaps
>know about the current TLS layout? Wine in particular may well know ;(
>
>
Looking at the Wine code, it doesn't seem to know. It uses a GDT entry
if possible in preference to an LDT entry, and appears to not care which
TLS descriptor it gets. So I think it would be safe to reserve selector
0x40 for the BIOS real mode segment and move TLS up. That's kind of a
scary change, but I don't see anything wrong with it other than the
testing implications. Hopefully DOSEmu is not affected..
/***********************************************************************
* wine_ldt_alloc_fs
*
* Allocate an LDT entry for a %fs selector, reusing a global
* GDT selector if possible. Return the selector value.
*/
unsigned short wine_ldt_alloc_fs(void)
{
if (global_fs_sel == -1)
{
struct modify_ldt_s ldt_info;
int ret;
ldt_info.entry_number = -1; <--------- note it doesn't care
fill_modify_ldt_struct( &ldt_info, &null_entry );
if ((ret = set_thread_area( &ldt_info ) < 0))
{
global_fs_sel = 0; /* don't try it again */
if (errno != ENOSYS) perror( "set_thread_area" );
}
else global_fs_sel = (ldt_info.entry_number << 3) | 3;
}
if (global_fs_sel > 0) return global_fs_sel;
return wine_ldt_alloc_entries( 1 );
Linus Torvalds wrote:
>On Mon, 7 Nov 2005, Zachary Amsden wrote:
>
>
>>Both the APM BIOS and PnP BIOS code use a segment hack to simulate real
>>mode selector 0x40 (which points to the BIOS data area at 0x00400 in
>>real mode). Several broken BIOSen use selector 0x40 as if they were
>>running in real mode, which we make work by faking up selector 0x40 in
>>the GDT to point to physical memory starting at 0x400. We limit the
>>access to the remainder of this physical page using a byte granular
>>limit. Rather than have this tricky code in multiple places, it makes
>>sense to define it in one place, and the GDT makes a very convenient
>>place for it. Use GDT entry 4 as the BAD_BIOS_CACHE segment.
>>
>>
>
>I'd much rather use entry 8 instead, which should just automatically mean
>that selector 0x40 _always_ points to virtual address 0x400. No switching
>etc..
>
>Isn't this what Wine already has to work around, or something?
>
>
I have answers now to the questions:
Wine has to support allocating thread pointers for NT processes in
ntdll, so it needs a way to allocate descriptors. It doesn't seem to
care if they are LDT or GDT descriptors.
>Ingo, can we move the TLS selectors upwards, or does user space perhaps
>know about the current TLS layout? Wine in particular may well know ;(
>
>
It does not know. And DOSemu appears to only use LDT. GDT is used to
allocate a global thread area for Wine, but it has a fallback mechanism
that appears to have been built from the start to deal with varying
thread selectors rather than a fixed notion (as GDT TLS segments are not
available on 2.4). Rather convenient.
Now the million dollar question is : who uses three TLS segments? Wine
appears to use glibc, private, and I have no idea what other software
makes use of this. If only two thread selectors were needed, then this
does the trick. Or we could rebase the selectors down to 0x20-0x30.
* ------- start of TLS (Thread-Local Storage) segments:
*
* 6 - TLS segment #1 [ glibc's TLS segment ]
* 7 - TLS segment #2 [ Wine's %fs Win32 segment ]
* 8 - BIOS real mode segment
Zach