the attached patch (against BK-curr + Luca Barbieri's two TLS patches)
does two things:
- it implements a second TLS entry for Wine's purposes.
Alexandre suggested that Wine would need two TLS entries, one for glibc
(in %gs), and one for the Win32 API (in %fs). The constant selector is
also a speedup for switches to/from 16-bit mode.
i left the possibility open to add even more TLS entries, but i find it
very unlikely to happen. So the code does not iterate over an array of TLS
descriptors, for performance reasons. This can be changed anytime without
affecting the userspace interface.
- the patch adds the get_thread_area() system-call.
the get_thread_area() call is needed by debuggers, to be able to read the
TLS settings of a threaded application, without having to assume anything
about what was loaded. The get_thread_area() call does not expose any
segmentation details - it returns the TLS info in the same format as
passed to the set_thread_area() call.
i've also attached tls.c which shows off both extensions. These extensions
are source and binary-compatible with any potential TLS code.
Ingo
--- linux/arch/i386/kernel/process.c.orig Wed Aug 7 19:16:45 2002
+++ linux/arch/i386/kernel/process.c Wed Aug 7 19:40:27 2002
@@ -839,6 +839,7 @@
asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags)
{
struct thread_struct *t = ¤t->thread;
+ struct desc_struct *desc;
int writable = 0;
int cpu;
@@ -848,21 +849,62 @@
if (flags & TLS_FLAG_WRITABLE)
writable = 1;
+ desc = &t->tls_desc1;
+ if (flags & TLS_FLAG_ENTRY2)
+ desc = &t->tls_desc2;
/*
* We must not get preempted while modifying the TLS.
*/
cpu = get_cpu();
- t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff;
+ desc->a = ((base & 0x0000ffff) << 16) | 0xffff;
- t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
+ desc->b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
0xf0000 | (writable << 9) | (1 << 15) |
(1 << 22) | (1 << 23) | 0x7000;
load_TLS_desc(t, cpu);
put_cpu();
- return TLS_ENTRY*8 + 3;
+ if (flags & TLS_FLAG_ENTRY2)
+ return TLS_ENTRY2*8 + 3;
+ else
+ return TLS_ENTRY1*8 + 3;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) \
+( (((desc).a >> 16) & 0x0000ffff) | \
+ (((desc).b << 16) & 0x00ff0000) | \
+ ( (desc).b & 0xff000000) )
+
+#define GET_WRITABLE(desc) \
+ (((desc).b >> 9) & 0x00000001)
+
+asmlinkage int sys_get_thread_area(unsigned long *ubase, unsigned long *uflags,
+ unsigned long flags)
+{
+ struct thread_struct *thread = ¤t->thread;
+ unsigned long base, flg;
+
+ if (flags & ~TLS_FLAGS_MASK)
+ return -EINVAL;
+
+ if (flags & TLS_FLAG_ENTRY2) {
+ base = GET_BASE(thread->tls_desc2);
+ flg = GET_WRITABLE(thread->tls_desc2) | TLS_FLAG_ENTRY2;
+ } else {
+ base = GET_BASE(thread->tls_desc1);
+ flg = GET_WRITABLE(thread->tls_desc1) | TLS_FLAG_ENTRY1;
+ }
+ if (copy_to_user(ubase, &base, sizeof(base)))
+ return -EFAULT;
+ if (copy_to_user(uflags, &flg, sizeof(flg)))
+ return -EFAULT;
+ return 0;
}
--- linux/arch/i386/kernel/entry.S.orig Wed Aug 7 19:18:33 2002
+++ linux/arch/i386/kernel/entry.S Wed Aug 7 19:18:21 2002
@@ -753,6 +753,7 @@
.long sys_sched_setaffinity
.long sys_sched_getaffinity
.long sys_set_thread_area
+ .long sys_get_thread_area
.rept NR_syscalls-(.-sys_call_table)/4
.long sys_ni_syscall
--- linux/include/asm-i386/processor.h.orig Wed Aug 7 19:22:57 2002
+++ linux/include/asm-i386/processor.h Wed Aug 7 19:27:01 2002
@@ -376,8 +376,8 @@
unsigned long v86flags, v86mask, v86mode, saved_esp0;
/* IO permissions */
unsigned long *ts_io_bitmap;
-/* TLS cached descriptor */
- struct desc_struct tls_desc;
+/* TLS cached descriptors */
+ struct desc_struct tls_desc1, tls_desc2;
};
#define INIT_THREAD { \
--- linux/include/asm-i386/unistd.h.orig Wed Aug 7 19:18:45 2002
+++ linux/include/asm-i386/unistd.h Wed Aug 7 19:18:58 2002
@@ -248,6 +248,7 @@
#define __NR_sched_setaffinity 241
#define __NR_sched_getaffinity 242
#define __NR_set_thread_area 243
+#define __NR_get_thread_area 244
/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
--- linux/include/asm-i386/desc.h.orig Wed Aug 7 19:20:57 2002
+++ linux/include/asm-i386/desc.h Wed Aug 7 19:51:13 2002
@@ -12,7 +12,7 @@
* 3 - kernel data segment
* 4 - user code segment <==== new cacheline
* 5 - user data segment
- * 6 - Thread-Local Storage (TLS) segment
+ * 6 - Thread-Local Storage (TLS) segment #1
* 7 - LDT
* 8 - APM BIOS support <==== new cacheline
* 9 - APM BIOS support
@@ -23,12 +23,13 @@
* 14 - PNPBIOS support
* 15 - PNPBIOS support
* 16 - PNPBIOS support <==== new cacheline
- * 17 - not used
+ * 17 - TLS segment #2
* 18 - not used
* 19 - not used
*/
#define TSS_ENTRY 1
-#define TLS_ENTRY 6
+#define TLS_ENTRY1 6
+#define TLS_ENTRY2 17
#define LDT_ENTRY 7
/*
* The interrupt descriptor table has room for 256 idt's,
@@ -86,13 +87,16 @@
_set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82);
}
-#define TLS_FLAGS_MASK 0x00000001
+#define TLS_FLAGS_MASK 0x00000003
#define TLS_FLAG_WRITABLE 0x00000001
+#define TLS_FLAG_ENTRY1 0x00000000
+#define TLS_FLAG_ENTRY2 0x00000002
static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu)
{
- cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc;
+ cpu_gdt_table[cpu][TLS_ENTRY1] = t->tls_desc1;
+ cpu_gdt_table[cpu][TLS_ENTRY2] = t->tls_desc2;
}
static inline void clear_LDT(void)
On Wed, 7 Aug 2002, Ingo Molnar wrote:
>
> the attached patch (against BK-curr + Luca Barbieri's two TLS patches)
> does two things:
>
> - it implements a second TLS entry for Wine's purposes.
Guys, I really don't like how the segment map ends up getting uglier and
uglier.
I would suggest:
- move all kernel-related (and thus non-visible to user space) segments
up, and make the cacheline optimizations _there_.
- keep the TLS entries contiguous, and make sure that segment 0040 (ie
GDT entry #8) is available to a TLS entry, since if I remember
correctly, that one is also magical for old Windows binaries for all
the wrong reasons (ie it was some system data area in DOS and in
Windows 3.1)
- and for cleanliness bonus points: make the regular user data segments
just another TLS segment that just happens to have default values. If
the user wants to screw with its own segments, let it.
Then, for double extra bonus points somebody should look into whether
those damn PnP BIOS segments could be simply made to be TLS segments
during module init. I don't know if that PnP stuff is required later or
not.
Linus
On Wed, Aug 07, 2002 at 08:10:40PM +0200, Ingo Molnar wrote:
>
> the attached patch (against BK-curr + Luca Barbieri's two TLS patches)
> does two things:
>
> - it implements a second TLS entry for Wine's purposes.
The sys_set_thread_area interface gets worse with every patch you post..
Why do you really need a magic multiplexer syscall (you could have just
used prctl if you don't need a sane interface..)?
What about a proper interface like:
asmlinkage int
sys_set_thread_area(int entry, unsigned long base, int writeable)
instead?
On Wed, 7 Aug 2002 11:33:23 -0700 (PDT) Linus Torvalds <[email protected]> wrote:
>
> - keep the TLS entries contiguous, and make sure that segment 0040 (ie
> GDT entry #8) is available to a TLS entry, since if I remember
> correctly, that one is also magical for old Windows binaries for all
> the wrong reasons (ie it was some system data area in DOS and in
> Windows 3.1)
segment 0040 is used by the APM driver to work around bugs in some BIOS
implementations where some (brain-dead) BIOS writer has assume that the
BIOS data area is still available in protected mode ...
--
Cheers,
Stephen Rothwell [email protected]
http://www.canb.auug.org.au/~sfr/
On Thu, 8 Aug 2002, Stephen Rothwell wrote:
> On Wed, 7 Aug 2002 11:33:23 -0700 (PDT) Linus Torvalds <[email protected]> wrote:
> >
> > - keep the TLS entries contiguous, and make sure that segment 0040 (ie
> > GDT entry #8) is available to a TLS entry, since if I remember
> > correctly, that one is also magical for old Windows binaries for all
> > the wrong reasons (ie it was some system data area in DOS and in
> > Windows 3.1)
>
> segment 0040 is used by the APM driver to work around bugs in some BIOS
> implementations where some (brain-dead) BIOS writer has assume that the
> BIOS data area is still available in protected mode ...
Ok, sounds like that one ends up having to be a fixed segment (I wonder if
Wine can take advantage of it? looks like it is hardcoded to base 0x400,
which is probably fine for Wine anyway - just map something at the right
address - but it looks CPL0 only? Might be ok to just make it available to
user space).
Linus
Linus Torvalds <[email protected]> writes:
> Ok, sounds like that one ends up having to be a fixed segment (I wonder if
> Wine can take advantage of it? looks like it is hardcoded to base 0x400,
> which is probably fine for Wine anyway - just map something at the right
> address - but it looks CPL0 only? Might be ok to just make it available to
> user space).
Base 0x400 should work just fine for Wine, we already need to have the
BIOS data mapped there anyway, so simply making the selector available
to user space would work completely transparently for us. We are
currently trapping and emulating accesses to that selector so it
doesn't matter much whether it is protected or not, except for a small
performance gain. What would break Wine is if that selector was made
accessible to user space with a different base address, so this should
be avoided.
--
Alexandre Julliard
[email protected]
On Wed, 7 Aug 2002, Linus Torvalds wrote:
> I would suggest:
> - move all kernel-related (and thus non-visible to user space) segments
> up, and make the cacheline optimizations _there_.
> - keep the TLS entries contiguous, and make sure that segment 0040 (ie
> GDT entry #8) is available to a TLS entry, since if I remember
> correctly, that one is also magical for old Windows binaries for all
> the wrong reasons (ie it was some system data area in DOS and in
> Windows 3.1)
> - and for cleanliness bonus points: make the regular user data segments
> just another TLS segment that just happens to have default values. If
> the user wants to screw with its own segments, let it.
i'll do this. Julliard, any additional suggestions perhaps - is GDT entry
8 the best %fs choice for Wine?
Ingo
Ingo Molnar <[email protected]> writes:
> i'll do this. Julliard, any additional suggestions perhaps - is GDT entry
> 8 the best %fs choice for Wine?
No, this one is special and has to point to 0x400, so it's actually
the only one that wouldn't work to use as %fs in Wine.
--
Alexandre Julliard
[email protected]
On Wed, 2002-08-07 at 19:33, Linus Torvalds wrote:
> - keep the TLS entries contiguous, and make sure that segment 0040 (ie
> GDT entry #8) is available to a TLS entry, since if I remember
> correctly, that one is also magical for old Windows binaries for all
> the wrong reasons (ie it was some system data area in DOS and in
> Windows 3.1)
Lots of BIOSes (a million monkeys bashing on typewriters will write
something that passes some BIOS vendor QA in about 2 seconds) illegally
assume that 0040: points at the BIOS data segment 0040 when making APM32
calls. Sufficient that Windows makea it so and its never going to get
corrected.
> Then, for double extra bonus points somebody should look into whether
> those damn PnP BIOS segments could be simply made to be TLS segments
> during module init. I don't know if that PnP stuff is required later or
> not.
PnPBIOS has to rewrite segments as it goes for data passing. It doesnt
really matter where you stuff them though.
On Wed, 2002-08-07 at 20:33, Linus Torvalds wrote:
>
> On Wed, 7 Aug 2002, Ingo Molnar wrote:
> >
> > the attached patch (against BK-curr + Luca Barbieri's two TLS patches)
> > does two things:
> >
> > - it implements a second TLS entry for Wine's purposes.
>
> Guys, I really don't like how the segment map ends up getting uglier and
> uglier.
>
> I would suggest:
> - move all kernel-related (and thus non-visible to user space) segments
> up, and make the cacheline optimizations _there_.
Done.
> - keep the TLS entries contiguous, and make sure that segment 0040 (ie
> GDT entry #8) is available to a TLS entry, since if I remember
> correctly, that one is also magical for old Windows binaries for all
> the wrong reasons (ie it was some system data area in DOS and in
> Windows 3.1)
Done. Segment 0x40 set to CPL 3.
> - and for cleanliness bonus points: make the regular user data segments
> just another TLS segment that just happens to have default values. If
> the user wants to screw with its own segments, let it.
Bad idea: makes task switch slower without any practical advantage.
The user may load a TLS or LDT selector in %ds to get the same effect.
> Then, for double extra bonus points somebody should look into whether
> those damn PnP BIOS segments could be simply made to be TLS segments
> during module init. I don't know if that PnP stuff is required later or
> not.
Not sure what you mean. The current definition of TLS segments is "a
minimal number of GDT entries that are modified on task switch and that
can be set on a per-task basis so that the selectors can be loaded %fs
and %gs". How can kernel PNPBIOS segments fit in this definition?
The patch changes the descriptior layout so that LDT is in the kernel
segment cacheline, the 16-bit APM segments are together and user
segments are together. It also sets segment 0x40 CPL to 3.
__BOOT_CS and __BOOT_DS are introduced as the value of segment selectors
during boot (so that we don't have to enlarge the gdt in setup.s).
New layout:
* 0 - null
* 1 - PNPBIOS support (16->32 gate)
* 2 - boot code segment
* 3 - boot data segment
* 4 - PNPBIOS support <==== new cacheline
* 5 - PNPBIOS support
* 6 - PNPBIOS support
* 7 - PNPBIOS support
* 8 - APM BIOS support (0x400-0x1000)<==== new cacheline
* 9 - APM BIOS support
* 10 - APM BIOS support
* 11 - APM BIOS support
* 12 - kernel code segment <==== new cacheline
* 13 - kernel data segment
* 14 - TSS
* 15 - LDT
* ------- start of user segments
* 16 - user code segment <==== new cacheline
* 17 - user data segment
* 18 - Thread-Local Storage (TLS) segment #1
* 19 - Thread-Local Storage (TLS) segment #2
diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/arch/i386/boot/compressed/head.S b/arch/i386/boot/compressed/head.S
--- a/arch/i386/boot/compressed/head.S 2002-07-20 21:12:21.000000000 +0200
+++ b/arch/i386/boot/compressed/head.S 2002-08-08 00:14:45.000000000 +0200
@@ -31,7 +31,7 @@
startup_32:
cld
cli
- movl $(__KERNEL_DS),%eax
+ movl $(__BOOT_DS),%eax
movl %eax,%ds
movl %eax,%es
movl %eax,%fs
@@ -74,7 +74,7 @@
popl %esi # discard address
popl %esi # real mode pointer
xorl %ebx,%ebx
- ljmp $(__KERNEL_CS), $0x100000
+ ljmp $(__BOOT_CS), $0x100000
/*
* We come here, if we were loaded high.
@@ -101,7 +101,7 @@
popl %eax # hcount
movl $0x100000,%edi
cli # make sure we don't get interrupted
- ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
+ ljmp $(__BOOT_CS), $0x1000 # and jump to the move routine
/*
* Routine (template) for moving the decompressed kernel in place,
@@ -124,5 +124,5 @@
movsl
movl %ebx,%esi # Restore setup pointer
xorl %ebx,%ebx
- ljmp $(__KERNEL_CS), $0x100000
+ ljmp $(__BOOT_CS), $0x100000
move_routine_end:
diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c
--- a/arch/i386/boot/compressed/misc.c 2002-07-20 21:11:24.000000000 +0200
+++ b/arch/i386/boot/compressed/misc.c 2002-08-07 23:48:58.000000000 +0200
@@ -299,7 +299,7 @@
struct {
long * a;
short b;
- } stack_start = { & user_stack [STACK_SIZE] , __KERNEL_DS };
+ } stack_start = { & user_stack [STACK_SIZE] , __BOOT_DS };
static void setup_normal_output_buffer(void)
{
diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S
--- a/arch/i386/boot/setup.S 2002-07-20 21:11:05.000000000 +0200
+++ b/arch/i386/boot/setup.S 2002-08-08 00:14:30.000000000 +0200
@@ -801,7 +801,7 @@
subw $DELTA_INITSEG, %si
shll $4, %esi # Convert to 32-bit pointer
# NOTE: For high loaded big kernels we need a
-# jmpi 0x100000,__KERNEL_CS
+# jmpi 0x100000,__BOOT_CS
#
# but we yet haven't reloaded the CS register, so the default size
# of the target offset still is 16 bit.
@@ -812,7 +812,7 @@
.byte 0x66, 0xea # prefix + jmpi-opcode
code32: .long 0x1000 # will be set to 0x100000
# for big kernels
- .word __KERNEL_CS
+ .word __BOOT_CS
# Here's a bunch of information about your current kernel..
kernel_version: .ascii UTS_RELEASE
diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
--- a/arch/i386/kernel/head.S 2002-08-07 19:03:24.000000000 +0200
+++ b/arch/i386/kernel/head.S 2002-08-08 00:08:48.000000000 +0200
@@ -46,7 +46,7 @@
* Set segments to known values
*/
cld
- movl $(__KERNEL_DS),%eax
+ movl $(__BOOT_DS),%eax
movl %eax,%ds
movl %eax,%es
movl %eax,%fs
@@ -239,12 +239,7 @@
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
-#ifdef CONFIG_SMP
- movl $(__KERNEL_DS), %eax
- movl %eax,%ss # Reload the stack pointer (segment only)
-#else
- lss stack_start,%esp # Load processor stack
-#endif
+ movl %eax,%ss
xorl %eax,%eax
lldt %ax
cld # gcc2 wants the direction flag cleared at all times
@@ -311,7 +306,7 @@
ENTRY(stack_start)
.long init_thread_union+8192
- .long __KERNEL_DS
+ .long __BOOT_DS
/* This is the default interrupt "handler" :-) */
int_msg:
@@ -415,31 +410,30 @@
* The Global Descriptor Table contains 20 quadwords, per-CPU.
*/
ENTRY(cpu_gdt_table)
- .quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x0000000000000000 /* TSS descriptor */
- .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */
- .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */
- .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */
- .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */
- .quad 0x0000000000000000 /* TLS descriptor */
- .quad 0x0000000000000000 /* LDT descriptor */
+ .quad 0x0000000000000000 /* 0x00 NULL descriptor */
+ .quad 0x00c09a0000000000 /* 0x08 PNPBIOS 32-bit code */
+ .quad 0x00cf9a000000ffff /* 0x10 boot 4GB code at 0x00000000 */
+ .quad 0x00cf92000000ffff /* 0x18 boot 4GB data at 0x00000000 */
+ .quad 0x00809a0000000000 /* 0x20 PNPBIOS 16-bit code */
+ .quad 0x0080920000000000 /* 0x28 PNPBIOS 16-bit data */
+ .quad 0x0080920000000000 /* 0x30 PNPBIOS 16-bit data */
+ .quad 0x0080920000000000 /* 0x38 PNPBIOS 16-bit data */
/*
* The APM segments have byte granularity and their bases
* and limits are set at run time.
*/
- .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */
+ .quad 0x0040f20000000000 /* 0x40 APM set up for bad BIOS's */
.quad 0x00409a0000000000 /* 0x48 APM CS code */
.quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */
.quad 0x0040920000000000 /* 0x58 APM DS data */
- /* Segments used for calling PnP BIOS */
- .quad 0x00c09a0000000000 /* 0x60 32-bit code */
- .quad 0x00809a0000000000 /* 0x68 16-bit code */
- .quad 0x0080920000000000 /* 0x70 16-bit data */
- .quad 0x0080920000000000 /* 0x78 16-bit data */
- .quad 0x0080920000000000 /* 0x80 16-bit data */
- .quad 0x0000000000000000 /* 0x88 not used */
- .quad 0x0000000000000000 /* 0x90 not used */
- .quad 0x0000000000000000 /* 0x98 not used */
+ .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
+ .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
+ .quad 0x0000000000000000 /* 0x70 TSS descriptor */
+ .quad 0x0000000000000000 /* 0x78 LDT descriptor */
+ .quad 0x00cffa000000ffff /* 0x80 user 4GB code at 0x00000000 */
+ .quad 0x00cff2000000ffff /* 0x88 user 4GB data at 0x00000000 */
+ .quad 0x0000000000000000 /* 0x90 TLS1 descriptor */
+ .quad 0x0000000000000000 /* 0x98 TLS2 descriptor */
#if CONFIG_SMP
.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/drivers/pnp/pnpbios_core.c b/drivers/pnp/pnpbios_core.c
--- a/drivers/pnp/pnpbios_core.c 2002-08-02 01:19:05.000000000 +0200
+++ b/drivers/pnp/pnpbios_core.c 2002-08-08 00:03:13.000000000 +0200
@@ -90,12 +90,13 @@
static union pnp_bios_expansion_header * pnp_bios_hdr = NULL;
/* The PnP BIOS entries in the GDT */
-#define PNP_GDT (0x0060)
-#define PNP_CS32 (PNP_GDT+0x00) /* segment for calling fn */
-#define PNP_CS16 (PNP_GDT+0x08) /* code segment for BIOS */
-#define PNP_DS (PNP_GDT+0x10) /* data segment for BIOS */
-#define PNP_TS1 (PNP_GDT+0x18) /* transfer data segment */
-#define PNP_TS2 (PNP_GDT+0x20) /* another data segment */
+#define PNP_CS32 (0x08) /* segment for calling fn */
+
+#define PNP_GDT (0x20)
+#define PNP_CS16 (PNP_GDT+0x00) /* code segment for BIOS */
+#define PNP_DS (PNP_GDT+0x08) /* data segment for BIOS */
+#define PNP_TS1 (PNP_GDT+0x10) /* transfer data segment */
+#define PNP_TS2 (PNP_GDT+0x18) /* another data segment */
/*
* These are some opcodes for a "static asmlinkage"
diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/include/asm-i386/desc.h b/include/asm-i386/desc.h
--- a/include/asm-i386/desc.h 2002-08-07 21:27:54.000000000 +0200
+++ b/include/asm-i386/desc.h 2002-08-08 00:12:01.000000000 +0200
@@ -7,30 +7,31 @@
* The layout of the per-CPU GDT under Linux:
*
* 0 - null
- * 1 - TSS
- * 2 - kernel code segment
- * 3 - kernel data segment
- * 4 - user code segment <==== new cacheline
- * 5 - user data segment
- * 6 - Thread-Local Storage (TLS) segment #1
- * 7 - LDT
- * 8 - APM BIOS support <==== new cacheline
+ * 1 - PNPBIOS support (16->32 gate)
+ * 2 - boot code segment
+ * 3 - boot data segment
+ * 4 - PNPBIOS support <==== new cacheline
+ * 5 - PNPBIOS support
+ * 6 - PNPBIOS support
+ * 7 - PNPBIOS support
+ * 8 - APM BIOS support (0x400-0x1000)<==== new cacheline
* 9 - APM BIOS support
* 10 - APM BIOS support
- * 11 - APM BIOS support
- * 12 - PNPBIOS support <==== new cacheline
- * 13 - PNPBIOS support
- * 14 - PNPBIOS support
- * 15 - PNPBIOS support
- * 16 - PNPBIOS support <==== new cacheline
- * 17 - TLS segment #2
- * 18 - not used
- * 19 - not used
+ * 11 - APM BIOS support
+ * 12 - kernel code segment <==== new cacheline
+ * 13 - kernel data segment
+ * 14 - TSS
+ * 15 - LDT
+ * ------- start of user segments
+ * 16 - user code segment <==== new cacheline
+ * 17 - user data segment
+ * 18 - Thread-Local Storage (TLS) segment #1
+ * 19 - Thread-Local Storage (TLS) segment #2
*/
-#define TSS_ENTRY 1
-#define TLS_ENTRY1 6
-#define TLS_ENTRY2 17
-#define LDT_ENTRY 7
+#define TSS_ENTRY 14
+#define LDT_ENTRY 15
+#define TLS_ENTRY1 18
+#define TLS_ENTRY2 19
/*
* The interrupt descriptor table has room for 256 idt's,
* the global descriptor table is dependent on the number
diff --exclude-from=/home/ldb/src/linux-exclude -urNd a/include/asm-i386/segment.h b/include/asm-i386/segment.h
--- a/include/asm-i386/segment.h 2002-07-20 21:11:11.000000000 +0200
+++ b/include/asm-i386/segment.h 2002-08-07 23:50:08.000000000 +0200
@@ -1,10 +1,13 @@
#ifndef _ASM_SEGMENT_H
#define _ASM_SEGMENT_H
-#define __KERNEL_CS 0x10
-#define __KERNEL_DS 0x18
+#define __BOOT_CS 0x10
+#define __BOOT_DS 0x18
-#define __USER_CS 0x23
-#define __USER_DS 0x2B
+#define __KERNEL_CS 0x60
+#define __KERNEL_DS 0x68
+
+#define __USER_CS 0x83
+#define __USER_DS 0x8B
#endif
On 8 Aug 2002, Luca Barbieri wrote:
> > I would suggest:
> > - move all kernel-related (and thus non-visible to user space) segments
> > up, and make the cacheline optimizations _there_.
> Done.
> > - keep the TLS entries contiguous, and make sure that segment 0040 (ie
> > GDT entry #8) is available to a TLS entry, since if I remember
> > correctly, that one is also magical for old Windows binaries for all
> > the wrong reasons (ie it was some system data area in DOS and in
> > Windows 3.1)
> Done. Segment 0x40 set to CPL 3.
> > - and for cleanliness bonus points: make the regular user data segments
> > just another TLS segment that just happens to have default values. If
> > the user wants to screw with its own segments, let it.
> Bad idea: makes task switch slower without any practical advantage.
> The user may load a TLS or LDT selector in %ds to get the same effect.
your patch looks good to me - as long as we want to keep those 2 TLS
entries and nothing more. (which i believe we want.) If even more TLS
entries are to be made possible then a cleaner TLS enumeration interface
has to be used like Christoph mentioned - although i dont think we really
want that, 3 or more entries would be a stretch i think.
Ingo
> your patch looks good to me - as long as we want to keep those 2 TLS
> entries and nothing more. (which i believe we want.) If even more TLS
> entries are to be made possible then a cleaner TLS enumeration interface
> has to be used like Christoph mentioned - although i dont think we really
> want that, 3 or more entries would be a stretch i think.
I think that 2 are enough.
Flat 32-bit programs set ds=es=ss=__USER_DS and cs=__USER_CS so they
only have fs and gs left.
16-bit programs and other odd ones can use the LDT support.
As for the interface I would suggest replacing the current one with a
single interface for LDT and GDT modifications that would provide the
following parameters:
unsigned table
- LDT
- GDTAVAIL: GDT starting from first TLS
- GDTABS: GDT starting from 0
- AUTO: starts with the 2 TLS entries and proceeds with LDT
unsigned operation
- set: copy to kernel space (enlarge table if necessary). If root, don't
check validity for speed, otherwise check to ensure the user is not e.g.
putting call gates to CPL 0 code.
- set1: like set, but passes a single entry directly in the num and ptr
parameters
- get: copy from kernel space
- free: free memory and lower limits. If entry = 0 and num = ~0,
completely frees table.
- map: only for LDT and for root, allows to directly point to a user
memory range
- movekern: when support for per-task GDT is implemented, this would
allow to change the entries used for kernel entries. This would be
implemented with per-CPU IDTs and maybe dynamically generated code.
Useful for virtualization programs.
unsigned entry
- first entry affected. ~0 for first unused entry.
unsigned num
- number of entries affected
void* ptr
- pointer to read/write entries from
(table and operations may be merged)
Return value: first entry changed
e.g. libpthread would use table = AUTO, operation = set1, entry = ~0.
For the LDT things would be implemented as usual. For the GDT the
initial implementation would just modify TLS entries.
In future, support for dynamically allocated per-task GDTs could be
added.
I would implement this by adding ops to sys_modify_ldt.
BTW, tls_desc1/tls_desc2 would IMHO be better as gdt_desc[2].
I don't plan to implement this myself.
Ingo Molnar wrote:
> the attached patch (against BK-curr + Luca Barbieri's two TLS patches)
> does two things:
>
> - it implements a second TLS entry for Wine's purposes.
Oh good; I was going to ask for this. Wine isn't the only program that
wants to use its own thread-local storage mechanism and link with Glibc
at the same time.
The LDT works, but with limitations and overhead.
thanks,
-- Jamie
the attached patch cleans up the TLS code and it introduces a number of
new capabilities as well:
- move the TLS space to the first 12 GDT descriptors - kernel descriptors
come afterwards.
- make USER CS and DS just another TLS entry, which happen to have a
default value that matches the current segments. It's done in a way
that does not result in extra context-switch overhead.
- make segment 0040 available to Wine, allow the setting of 16-bit
segments. Allow full flexibility of all the safe segment variants.
- sys_set_thread_area(&info) can be both for a specific GDT entry, but it
can also trigger an 'allocation' of a yet unused TLS entry, by using
an ->entry_number of -1. It's recommended for userspace code to use the
-1 value, to make sure different libraries can nest properly.
- sys_get_thread_area(&info) can be used to read TLS entries into the same
userspace descriptor format as sys_set_thread_area() does. The new
syscalls are now actually relatively clean, and the TLS area can be
extended seemlessly.
- move KERNEL CS, DS, TSS and LDT to the same cacheline.
- clean up all the kernel descriptors to be more or less easily
modified/reordered from segment.h only, with minimal dependencies.
- move the GDT/TLS definitions to asm-i386/segment.h, to make it easier to
include the constants into assembly code and lowlevel include files.
an open issue: the context-switch code uses an optimized variant of TLS
loading - only the truly affected portions of the GDT get rewritten. But
i'm not 100% convinced this is the right way - i kept the TLS in the same
format as the GDT, so we could as well just write 96 bytes
unconditionally. That's smaller a single cacheline on modern CPUs. Doing
this would greatly simplify the code. I've mainly done this current
optimization to show that it can be done in a relatively straightforward
way, but that i dont think it's worth it. Especially since the TLS area is
3 32-byte cachelines, it should easily trigger all the memcpy fastpaths in
various CPUs. So i'd suggest to keep the tls_bytes variables only, and
thus non-TLS code would see only a single branch in the context-switch
path.
another issue: i've not gone the whole way of unifying LDT and TLS support
- we've already got compatibility code in the LDT interfaces and changing
LDTs via the TLS syscalls would only make the situation even more messy.
Nevertheless there are some new synergies between the LDT and TSS code,
which resulted in some ldt.c code reduction.
i've attached a new version of tls.c that tests the new TLS syscall
variants and shows off some of the new capabilities. TLS support works
just fine on 2.5.31 + this patch, on SMP and UP as well.
Comments?
Ingo
--- linux/drivers/pnp/pnpbios_core.c.orig Sun Aug 11 17:01:17 2002
+++ linux/drivers/pnp/pnpbios_core.c Sun Aug 11 23:28:44 2002
@@ -90,7 +90,8 @@
static union pnp_bios_expansion_header * pnp_bios_hdr = NULL;
/* The PnP BIOS entries in the GDT */
-#define PNP_GDT (0x0060)
+#define PNP_GDT (GDT_ENTRY_PNPBIOS_BASE * 8)
+
#define PNP_CS32 (PNP_GDT+0x00) /* segment for calling fn */
#define PNP_CS16 (PNP_GDT+0x08) /* code segment for BIOS */
#define PNP_DS (PNP_GDT+0x10) /* data segment for BIOS */
--- linux/arch/i386/kernel/cpu/common.c.orig Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/cpu/common.c Sun Aug 11 23:28:44 2002
@@ -423,6 +423,7 @@
{
int cpu = smp_processor_id();
struct tss_struct * t = init_tss + cpu;
+ struct thread_struct *thread = ¤t->thread;
if (test_and_set_bit(cpu, &cpu_initialized)) {
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -447,9 +448,14 @@
*/
if (cpu) {
memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
- cpu_gdt_descr[cpu].size = GDT_SIZE;
+ cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
}
+ /*
+ * Set up the per-thread TLS descriptor cache:
+ */
+ memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8);
+ clear_TLS(thread);
__asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu]));
__asm__ __volatile__("lidt %0": "=m" (idt_descr));
@@ -468,9 +474,9 @@
BUG();
enter_lazy_tlb(&init_mm, current, cpu);
- t->esp0 = current->thread.esp0;
+ t->esp0 = thread->esp0;
set_tss_desc(cpu,t);
- cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
load_TR_desc();
load_LDT(&init_mm.context);
--- linux/arch/i386/kernel/entry.S.orig Sun Aug 11 17:01:07 2002
+++ linux/arch/i386/kernel/entry.S Sun Aug 11 23:28:44 2002
@@ -753,6 +753,7 @@
.long sys_sched_setaffinity
.long sys_sched_getaffinity
.long sys_set_thread_area
+ .long sys_get_thread_area
.rept NR_syscalls-(.-sys_call_table)/4
.long sys_ni_syscall
--- linux/arch/i386/kernel/head.S.orig Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/head.S Sun Aug 11 23:28:44 2002
@@ -239,12 +239,7 @@
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
-#ifdef CONFIG_SMP
- movl $(__KERNEL_DS), %eax
- movl %eax,%ss # Reload the stack pointer (segment only)
-#else
- lss stack_start,%esp # Load processor stack
-#endif
+ movl %eax,%ss
xorl %eax,%eax
lldt %ax
cld # gcc2 wants the direction flag cleared at all times
@@ -412,34 +407,44 @@
ALIGN
/*
- * The Global Descriptor Table contains 20 quadwords, per-CPU.
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
*/
ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x0000000000000000 /* TLS descriptor */
- .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */
- .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */
- .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */
- .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */
- .quad 0x0000000000000000 /* TSS descriptor */
- .quad 0x0000000000000000 /* LDT descriptor */
+ .quad 0x00cffa000000ffff /* 0x0b user 4GB code at 0x00000000 */
+ .quad 0x00cff2000000ffff /* 0x13 user 4GB data at 0x00000000 */
+ .quad 0x0000000000000000 /* 0x1b TLS entry 3 */
+ .quad 0x0000000000000000 /* ... */
+ .quad 0x0000000000000000
+ .quad 0x0000000000000000
+ .quad 0x0000000000000000
+ .quad 0x0000000000000000
+ .quad 0x0000000000000000
+ .quad 0x0000000000000000 /* ... */
+ .quad 0x0000000000000000 /* 0x5b TLS entry 11 */
+
+ .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
+ .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
+ .quad 0x0000000000000000 /* 0x70 TSS descriptor */
+ .quad 0x0000000000000000 /* 0x78 LDT descriptor */
+
/*
* The APM segments have byte granularity and their bases
* and limits are set at run time.
*/
- .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */
- .quad 0x00409a0000000000 /* 0x48 APM CS code */
- .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */
- .quad 0x0040920000000000 /* 0x58 APM DS data */
+ .quad 0x0040920000000000 /* 0x80 APM set up for bad BIOS's */
+ .quad 0x00409a0000000000 /* 0x88 APM CS code */
+ .quad 0x00009a0000000000 /* 0x90 APM CS 16 code (16 bit) */
+ .quad 0x0040920000000000 /* 0x98 APM DS data */
/* Segments used for calling PnP BIOS */
- .quad 0x00c09a0000000000 /* 0x60 32-bit code */
- .quad 0x00809a0000000000 /* 0x68 16-bit code */
- .quad 0x0080920000000000 /* 0x70 16-bit data */
- .quad 0x0080920000000000 /* 0x78 16-bit data */
- .quad 0x0080920000000000 /* 0x80 16-bit data */
- .quad 0x0000000000000000 /* 0x88 not used */
- .quad 0x0000000000000000 /* 0x90 not used */
- .quad 0x0000000000000000 /* 0x98 not used */
+ .quad 0x00c09a0000000000 /* 0xa0 32-bit code */
+ .quad 0x00809a0000000000 /* 0xa8 16-bit code */
+ .quad 0x0080920000000000 /* 0xb0 16-bit data */
+ .quad 0x0080920000000000 /* 0xb8 16-bit data */
+ .quad 0x0080920000000000 /* 0xc0 16-bit data */
+ .quad 0x0000000000000000 /* 0xc8 not used */
+ .quad 0x0000000000000000 /* 0xd0 not used */
+ .quad 0x0000000000000000 /* 0xd8 not used */
#if CONFIG_SMP
.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
--- linux/arch/i386/kernel/process.c.orig Sun Aug 11 17:01:08 2002
+++ linux/arch/i386/kernel/process.c Sun Aug 11 23:28:44 2002
@@ -681,11 +681,9 @@
/*
* Load the per-thread Thread-Local Storage descriptor.
- *
- * NOTE: it's faster to do the two stores unconditionally
- * than to branch away.
*/
- load_TLS_desc(next, cpu);
+ if (prev->nr_tls_bytes || next->nr_tls_bytes)
+ load_TLS(prev, next, cpu);
/*
* Save away %fs and %gs. No need to save %es and %ds, as
@@ -834,35 +832,168 @@
#undef first_sched
/*
- * Set the Thread-Local Storage area:
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
*/
-asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags)
+static int get_free_idx(void)
{
struct thread_struct *t = ¤t->thread;
- int writable = 0;
- int cpu;
+ int idx;
- /* do not allow unused flags */
- if (flags & ~TLS_FLAGS_MASK)
+ for (idx = GDT_ENTRY_TLS_MIN; idx <= GDT_ENTRY_TLS_MAX; idx++)
+ if (desc_empty(t->tls_array + idx))
+ return idx;
+ return -ESRCH;
+}
+
+static inline int first_tls(struct desc_struct *array)
+{
+ struct desc_struct *default_array = init_task.thread.tls_array;
+ int idx;
+
+ for (idx = GDT_ENTRY_TLS_MIN; idx <= GDT_ENTRY_TLS_MAX; idx++)
+ if (!desc_equal(array + idx, default_array + idx))
+ return idx;
+
+ return 0;
+}
+
+static inline int last_tls(struct desc_struct *array)
+{
+ struct desc_struct *default_array = init_task.thread.tls_array;
+ int idx;
+
+ for (idx = GDT_ENTRY_TLS_MAX; idx >= GDT_ENTRY_TLS_MIN; idx--)
+ if (!desc_equal(array + idx, default_array + idx))
+ return idx;
+
+ return 0;
+}
+
+#define CHECK_TLS_IDX(idx) \
+do { \
+ if ((idx) < GDT_ENTRY_TLS_MIN || (idx) > GDT_ENTRY_TLS_MAX) \
+ BUG(); \
+} while (0)
+
+/*
+ * Set a given TLS descriptor:
+ */
+asmlinkage int sys_set_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+ struct thread_struct *t = ¤t->thread;
+ struct modify_ldt_ldt_s info;
+ struct desc_struct *desc;
+ int cpu, idx;
+
+ if (copy_from_user(&info, u_info, sizeof(info)))
+ return -EFAULT;
+ idx = info.entry_number;
+
+ /*
+ * index -1 means the kernel should try to find and
+ * allocate an empty descriptor:
+ */
+ if (idx == -1) {
+ idx = get_free_idx();
+ if (idx < 0)
+ return idx;
+ if (put_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ }
+
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
return -EINVAL;
- if (flags & TLS_FLAG_WRITABLE)
- writable = 1;
+ desc = t->tls_array + idx;
/*
* We must not get preempted while modifying the TLS.
*/
cpu = get_cpu();
- t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff;
+ if (LDT_empty(&info)) {
+ desc->a = 0;
+ desc->b = 0;
+ } else {
+ desc->a = LDT_entry_a(&info);
+ desc->b = LDT_entry_b(&info);
+ }
+
+ t->first_tls_byte = first_tls(t->tls_array) * 8;
+ t->last_tls_byte = (last_tls(t->tls_array) + 1) * 8;
- t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
- 0xf0000 | (writable << 9) | (1 << 15) |
- (1 << 22) | (1 << 23) | 0x7000;
+ if (t->first_tls_byte || t->last_tls_byte) {
+ CHECK_TLS_IDX(t->first_tls_byte/8);
+ CHECK_TLS_IDX(t->last_tls_byte/8-1);
+ t->nr_tls_bytes = t->last_tls_byte - t->first_tls_byte;
+ if (t->nr_tls_bytes < 0)
+ BUG();
+ if (t->nr_tls_bytes > GDT_ENTRY_TLS_ENTRIES * 8)
+ BUG();
+ } else {
+ /*
+ * If a thread has no TLS then invert the first/last
+ * range so that if we switch from (or to) a TLS-using
+ * thread then it will be the thread's TLS area that
+ * will be copied into the GDT.
+ */
+ t->nr_tls_bytes = 0;
+ t->first_tls_byte = 0;
+ t->last_tls_byte = (GDT_ENTRY_TLS_MAX + 1) * 8;
+ }
+
+ load_TLS(t, t, cpu);
- load_TLS_desc(t, cpu);
put_cpu();
- return TLS_ENTRY*8 + 3;
+ return 0;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+ (((desc)->a >> 16) & 0x0000ffff) | \
+ (((desc)->b << 16) & 0x00ff0000) | \
+ ( (desc)->b & 0xff000000) )
+
+#define GET_LIMIT(desc) ( \
+ ((desc)->a & 0x0ffff) | \
+ ((desc)->b & 0xf0000) )
+
+#define GET_32BIT(desc) (((desc)->b >> 23) & 1)
+#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
+#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
+
+asmlinkage int sys_get_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+ struct modify_ldt_ldt_s info;
+ struct desc_struct *desc;
+ int idx;
+
+ if (get_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+ desc = current->thread.tls_array + idx;
+
+ info.entry_number = idx;
+ info.base_addr = GET_BASE(desc);
+ info.limit = GET_LIMIT(desc);
+ info.seg_32bit = GET_32BIT(desc);
+ info.contents = GET_CONTENTS(desc);
+ info.read_exec_only = !GET_WRITABLE(desc);
+ info.limit_in_pages = GET_LIMIT_PAGES(desc);
+ info.seg_not_present = !GET_PRESENT(desc);
+ info.useable = GET_USEABLE(desc);
+
+ if (copy_to_user(u_info, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
}
--- linux/arch/i386/kernel/suspend.c.orig Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/suspend.c Sun Aug 11 23:28:44 2002
@@ -207,7 +207,7 @@
struct tss_struct * t = init_tss + cpu;
set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */
- cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
load_TR_desc(); /* This does ltr */
load_LDT(¤t->mm->context); /* This does lldt */
--- linux/arch/i386/kernel/ldt.c.orig Sun Aug 11 17:01:04 2002
+++ linux/arch/i386/kernel/ldt.c Sun Aug 11 23:28:44 2002
@@ -200,32 +200,17 @@
/* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
- if (oldmode ||
- (ldt_info.contents == 0 &&
- ldt_info.read_exec_only == 1 &&
- ldt_info.seg_32bit == 0 &&
- ldt_info.limit_in_pages == 0 &&
- ldt_info.seg_not_present == 1 &&
- ldt_info.useable == 0 )) {
+ if (oldmode || LDT_empty(&ldt_info)) {
entry_1 = 0;
entry_2 = 0;
goto install;
}
}
- entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
- (ldt_info.limit & 0x0ffff);
- entry_2 = (ldt_info.base_addr & 0xff000000) |
- ((ldt_info.base_addr & 0x00ff0000) >> 16) |
- (ldt_info.limit & 0xf0000) |
- ((ldt_info.read_exec_only ^ 1) << 9) |
- (ldt_info.contents << 10) |
- ((ldt_info.seg_not_present ^ 1) << 15) |
- (ldt_info.seg_32bit << 22) |
- (ldt_info.limit_in_pages << 23) |
- 0x7000;
- if (!oldmode)
- entry_2 |= (ldt_info.useable << 20);
+ entry_1 = LDT_entry_a(&ldt_info);
+ entry_2 = LDT_entry_b(&ldt_info);
+ if (oldmode)
+ entry_2 &= ~(1 << 20);
/* Install the new entry ... */
install:
--- linux/arch/i386/boot/setup.S.orig Sun Jun 9 07:26:32 2002
+++ linux/arch/i386/boot/setup.S Sun Aug 11 23:28:44 2002
@@ -1005,9 +1005,14 @@
ret
# Descriptor tables
+#
+# NOTE: if you think the GDT is large, you can make it smaller by just
+# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt
+# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into
+# the GDT, but those wont be used so it's not a problem.
+#
gdt:
- .word 0, 0, 0, 0 # dummy
- .word 0, 0, 0, 0 # unused
+ .fill GDT_ENTRY_KERNEL_CS,8,0
.word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
.word 0 # base address = 0
--- linux/include/linux/apm_bios.h.orig Sun Jun 9 07:30:24 2002
+++ linux/include/linux/apm_bios.h Sun Aug 11 23:28:44 2002
@@ -21,8 +21,8 @@
#ifdef __KERNEL__
-#define APM_40 0x40
-#define APM_CS (APM_40 + 8)
+#define APM_40 (GDT_ENTRY_APMBIOS_BASE * 8)
+#define APM_CS (APM_BASE + 8)
#define APM_CS_16 (APM_CS + 8)
#define APM_DS (APM_CS_16 + 8)
--- linux/include/asm-i386/desc.h.orig Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/desc.h Sun Aug 11 23:28:44 2002
@@ -2,50 +2,12 @@
#define __ARCH_DESC_H
#include <asm/ldt.h>
-
-/*
- * The layout of the per-CPU GDT under Linux:
- *
- * 0 - null
- * 1 - Thread-Local Storage (TLS) segment
- * 2 - kernel code segment
- * 3 - kernel data segment
- * 4 - user code segment <==== new cacheline
- * 5 - user data segment
- * 6 - TSS
- * 7 - LDT
- * 8 - APM BIOS support <==== new cacheline
- * 9 - APM BIOS support
- * 10 - APM BIOS support
- * 11 - APM BIOS support
- * 12 - PNPBIOS support <==== new cacheline
- * 13 - PNPBIOS support
- * 14 - PNPBIOS support
- * 15 - PNPBIOS support
- * 16 - PNPBIOS support <==== new cacheline
- * 17 - not used
- * 18 - not used
- * 19 - not used
- */
-#define TLS_ENTRY 1
-#define TSS_ENTRY 6
-#define LDT_ENTRY 7
-/*
- * The interrupt descriptor table has room for 256 idt's,
- * the global descriptor table is dependent on the number
- * of tasks we can have..
- *
- * We pad the GDT to cacheline boundary.
- */
-#define IDT_ENTRIES 256
-#define GDT_ENTRIES 20
+#include <asm/segment.h>
#ifndef __ASSEMBLY__
#include <asm/mmu.h>
-#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct))
-
extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
struct Xgt_desc_struct {
@@ -55,8 +17,8 @@
extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
-#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3))
-#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3))
+#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
+#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
/*
* This is the ldt that every process will get unless we need
@@ -78,21 +40,52 @@
static inline void set_tss_desc(unsigned int cpu, void *addr)
{
- _set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89);
+ _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89);
}
static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
{
- _set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82);
+ _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
}
-#define TLS_FLAGS_MASK 0x00000001
+#define LDT_entry_a(info) \
+ ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-#define TLS_FLAG_WRITABLE 0x00000001
+#define LDT_entry_b(info) \
+ (((info)->base_addr & 0xff000000) | \
+ (((info)->base_addr & 0x00ff0000) >> 16) | \
+ ((info)->limit & 0xf0000) | \
+ (((info)->read_exec_only ^ 1) << 9) | \
+ ((info)->contents << 10) | \
+ (((info)->seg_not_present ^ 1) << 15) | \
+ ((info)->seg_32bit << 22) | \
+ ((info)->limit_in_pages << 23) | \
+ ((info)->useable << 20) | \
+ 0x7000)
+
+#define LDT_empty(info) (\
+ (info)->base_addr == 0 && \
+ (info)->limit == 0 && \
+ (info)->contents == 0 && \
+ (info)->read_exec_only == 1 && \
+ (info)->seg_32bit == 0 && \
+ (info)->limit_in_pages == 0 && \
+ (info)->seg_not_present == 1 && \
+ (info)->useable == 0 )
-static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu)
+static inline void clear_TLS(struct thread_struct *t)
{
- cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc;
+ t->nr_tls_bytes = 0;
+ t->first_tls_byte = 0;
+ t->last_tls_byte = (GDT_ENTRY_TLS_MAX + 1) * 8;
+}
+
+static inline void load_TLS(struct thread_struct *prev, struct thread_struct *next, unsigned int cpu)
+{
+ int first_byte = min(prev->first_tls_byte, next->first_tls_byte);
+ int last_byte = max(prev->last_tls_byte, next->last_tls_byte);
+
+ memcpy((char *)(cpu_gdt_table[cpu]) + first_byte, (char *)next->tls_array + first_byte, last_byte - first_byte);
}
static inline void clear_LDT(void)
--- linux/include/asm-i386/processor.h.orig Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/processor.h Sun Aug 11 23:28:44 2002
@@ -22,6 +22,11 @@
unsigned long a,b;
};
+#define desc_empty(desc) \
+ (!((desc)->a + (desc)->b))
+
+#define desc_equal(desc1, desc2) \
+ (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
@@ -376,8 +381,16 @@
unsigned long v86flags, v86mask, v86mode, saved_esp0;
/* IO permissions */
unsigned long *ts_io_bitmap;
-/* TLS cached descriptor */
- struct desc_struct tls_desc;
+
+ /*
+ * cached TLS descriptors.
+ *
+ * The offset calculation is needed to not copy the whole TLS
+ * into the local GDT all the time.
+ * We count offsets in bytes to reduce context-switch overhead.
+ */
+ int nr_tls_bytes, first_tls_byte, last_tls_byte;
+ struct desc_struct tls_array[GDT_ENTRY_TLS_MAX + 1];
};
#define INIT_THREAD { \
@@ -401,7 +414,7 @@
0,0,0,0, /* esp,ebp,esi,edi */ \
0,0,0,0,0,0, /* es,cs,ss */ \
0,0,0,0,0,0, /* ds,fs,gs */ \
- LDT_ENTRY,0, /* ldt */ \
+ GDT_ENTRY_LDT,0, /* ldt */ \
0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \
{~0, } /* ioperm */ \
}
--- linux/include/asm-i386/segment.h.orig Sun Jun 9 07:28:19 2002
+++ linux/include/asm-i386/segment.h Sun Aug 11 23:28:44 2002
@@ -1,10 +1,84 @@
#ifndef _ASM_SEGMENT_H
#define _ASM_SEGMENT_H
-#define __KERNEL_CS 0x10
-#define __KERNEL_DS 0x18
+/*
+ * The layout of the per-CPU GDT under Linux:
+ *
+ * 0 - null
+ *
+ * ------- start of TLS (Thread-Local Storage) segments:
+ *
+ * 1 - TLS segment #1 [ default user CS ]
+ * 2 - TLS segment #2 [ default user DS ]
+ * 3 - TLS segment #3 [ glibc's TLS segment ]
+ * 4 - TLS segment #4 [ Wine's %fs Win32 segment ]
+ * 5 - TLS segment #5
+ * 6 - TLS segment #6
+ * 7 - TLS segment #7
+ * 8 - TLS segment #8 [ segment 0040 used by Wine ]
+ * 9 - TLS segment #9
+ * 10 - TLS segment #9
+ * 11 - TLS segment #9
+ *
+ * ------- start of kernel segments, on a full cacheline:
+ *
+ * 12 - kernel code segment <==== new cacheline
+ * 13 - kernel data segment
+ * 14 - TSS
+ * 15 - LDT
+ *
+ * ------- these are the less performance-sensitive segments:
+ *
+ * 16 - APM BIOS support
+ * 17 - APM BIOS support
+ * 18 - APM BIOS support
+ * 19 - APM BIOS support
+ * 20 - PNPBIOS support (16->32 gate)
+ * 21 - PNPBIOS support
+ * 22 - PNPBIOS support
+ * 23 - PNPBIOS support
+ * 24 - PNPBIOS support
+ * 25 - reserved
+ * 26 - reserved
+ * 27 - reserved
+ */
+#define GDT_ENTRY_TLS_ENTRIES 11
+#define GDT_ENTRY_TLS_MIN 1
+#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
-#define __USER_CS 0x23
-#define __USER_DS 0x2B
+#define GDT_ENTRY_DEFAULT_USER_CS (GDT_ENTRY_TLS_MIN + 0)
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
+
+#define GDT_ENTRY_DEFAULT_USER_DS (GDT_ENTRY_TLS_MIN + 1)
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
+
+
+#define GDT_ENTRY_KERNEL_BASE 12
+
+#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+
+#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+
+#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 2)
+#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 3)
+
+#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 4)
+#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 8)
+
+/*
+ * The GDT has 25 entries but we pad it to cacheline boundary:
+ */
+#define GDT_ENTRIES 28
+
+#define GDT_SIZE (GDT_ENTRIES * 8)
+
+/*
+ * The interrupt descriptor table has room for 256 idt's,
+ * the global descriptor table is dependent on the number
+ * of tasks we can have..
+ */
+#define IDT_ENTRIES 256
#endif
--- linux/include/asm-i386/unistd.h.orig Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/unistd.h Sun Aug 11 23:28:44 2002
@@ -248,6 +248,7 @@
#define __NR_sched_setaffinity 241
#define __NR_sched_getaffinity 242
#define __NR_set_thread_area 243
+#define __NR_get_thread_area 244
/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
Hi Ingo,
On Sun, 11 Aug 2002 23:46:01 +0200 (CEST) Ingo Molnar <[email protected]> wrote:
>
> /*
> * The APM segments have byte granularity and their bases
> * and limits are set at run time.
> */
> - .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */
> - .quad 0x00409a0000000000 /* 0x48 APM CS code */
> - .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */
> - .quad 0x0040920000000000 /* 0x58 APM DS data */
> + .quad 0x0040920000000000 /* 0x80 APM set up for bad BIOS's */
> + .quad 0x00409a0000000000 /* 0x88 APM CS code */
> + .quad 0x00009a0000000000 /* 0x90 APM CS 16 code (16 bit) */
> + .quad 0x0040920000000000 /* 0x98 APM DS data */
I just lost 0x40 which needs to be exactly 0x40 if it is do its job (i.e.
cope with brain dead BIOS writers using 0x40 as a segment offset in
protected mode ...
The idea is that segment 0x40 maps from physical address 0x400 to the end
of the first physical page. As a real mode program would (more or less)
expect it to.
The other three segments don't matter as longs as they are in that order
and contiguous.
--
Cheers,
Stephen Rothwell [email protected]
http://www.canb.auug.org.au/~sfr/
On Mon, 12 Aug 2002, Stephen Rothwell wrote:
> > - .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */
> > - .quad 0x00409a0000000000 /* 0x48 APM CS code */
> > - .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */
> > - .quad 0x0040920000000000 /* 0x58 APM DS data */
> > + .quad 0x0040920000000000 /* 0x80 APM set up for bad BIOS's */
> > + .quad 0x00409a0000000000 /* 0x88 APM CS code */
> > + .quad 0x00009a0000000000 /* 0x90 APM CS 16 code (16 bit) */
> > + .quad 0x0040920000000000 /* 0x98 APM DS data */
>
> I just lost 0x40 which needs to be exactly 0x40 if it is do its job
> (i.e. cope with brain dead BIOS writers using 0x40 as a segment offset
> in protected mode ...
you can save/restore 0x40 in kernel-space if you need to no problem.
> The idea is that segment 0x40 maps from physical address 0x400 to the
> end of the first physical page. As a real mode program would (more or
> less) expect it to.
so you are using the kernel's GDT in real mode as well?
Ingo
On Mon, 12 Aug 2002 12:07:19 +0200 (CEST) Ingo Molnar <[email protected]> wrote:
>
> you can save/restore 0x40 in kernel-space if you need to no problem.
I guess I could around every BIOS call ...
Also, Alan (Cox) will say that's OK until he does APM on SMP on broken
BIOS's :-)
We could also just say that we no longer support those broken BIOS's ...
> so you are using the kernel's GDT in real mode as well?
No. The problem is that there are some BIOS's that contain code that (even
though they are called in protected mode) load 0x40 into ds and expect to
be able to reference stuff ... Causes really interesting OOPSs :-(
--
Cheers,
Stephen Rothwell [email protected]
http://www.canb.auug.org.au/~sfr/
On Mon, 2002-08-12 at 09:23, Stephen Rothwell wrote:
> > you can save/restore 0x40 in kernel-space if you need to no problem.
> I guess I could around every BIOS call ...
>
> Also, Alan (Cox) will say that's OK until he does APM on SMP on broken
> BIOS's :-)
SMP actually makes no difference. I have full SMP APM working on my test
boxes now. However pre-empt and SMP are the same problem space
> We could also just say that we no longer support those broken BIOS's ...
>
> > so you are using the kernel's GDT in real mode as well?
Yes. APM calls are made by all sorts of processes.
> No. The problem is that there are some BIOS's that contain code that (even
> though they are called in protected mode) load 0x40 into ds and expect to
> be able to reference stuff ... Causes really interesting OOPSs :-(
Which does mean you can steal the old TLS value and put it back across
the calls just by changing the TLS data for that process. For that
matter on Windows emulation I thought Windows also needed 0x40 to be the
same offset as the BIOS does so can't we leave it hardwired ?
On 12 Aug 2002, Alan Cox wrote:
> > No. The problem is that there are some BIOS's that contain code that (even
> > though they are called in protected mode) load 0x40 into ds and expect to
> > be able to reference stuff ... Causes really interesting OOPSs :-(
>
> Which does mean you can steal the old TLS value and put it back across
> the calls just by changing the TLS data for that process. For that
> matter on Windows emulation I thought Windows also needed 0x40 to be the
> same offset as the BIOS does so can't we leave it hardwired ?
i have no problem with hardwiring it (and excluding it from the TLS
allocation/setting syscalls) - in fact i almost did it that way. The
question is, is the required descriptor format 100% the same for all APM
variants, Wine and Windows and DOS emulators? It would suck if we had a
bad descriptor and also removed the ability of Wine to trap 0x40 access.
but, couldnt APM use its own private GDT for real-mode calls, with 0x40
filled in properly? That would pretty much decouple things.
Ingo
On Mon, 2002-08-12 at 11:49, Ingo Molnar wrote:
> but, couldnt APM use its own private GDT for real-mode calls, with 0x40
> filled in properly? That would pretty much decouple things.
Oh and secondly they are not actually real mode calls, they are
protected mode 32bit calls with certain segment registers set up to
point to specific things taken from the apm bios 32 interface
On Mon, 2002-08-12 at 11:49, Ingo Molnar wrote:
> but, couldnt APM use its own private GDT for real-mode calls, with 0x40
> filled in properly? That would pretty much decouple things.
That would get extremely messy when handing interrupts arriving while in
an APM bios call (which is required on many laptops). I believe the 0x40
= 0x40 assumption is identical across windows, buggy apm, buggy bios32,
buggy edd, buggy .. (you get the picture)
On 12 Aug 2002, Alan Cox wrote:
> That would get extremely messy when handing interrupts arriving while in
> an APM bios call (which is required on many laptops). I believe the 0x40
> = 0x40 assumption is identical across windows, buggy apm, buggy bios32,
> buggy edd, buggy .. (you get the picture)
ugh, we do Linux interrupts while in the APM BIOS?
in any case, it should be possible to create a 'minimal GDT' for the APM
BIOS [so that Linux interrupt handling is still possible] - to isolate it
from Linux as much as possible. But i agree that this gets messy ...
Ingo
On Mon, 2002-08-12 at 13:17, Ingo Molnar wrote:
> ugh, we do Linux interrupts while in the APM BIOS?
We have to. Most APM bios expects interrupts to be happening. In
pre-emptive mode we may well even be switching to/from APM BIOS code in
2.5 at the moment. I've not looked into that.
On 12 Aug 2002, Alan Cox wrote:
> > ugh, we do Linux interrupts while in the APM BIOS?
>
> We have to. Most APM bios expects interrupts to be happening. In
> pre-emptive mode we may well even be switching to/from APM BIOS code in
> 2.5 at the moment. I've not looked into that.
i think that since we hold the APM spinlock (do we always, when calling
into the APM BIOS?), we should not preempt any APM BIOS code.
Ingo
On Mon, 2002-08-12 at 13:55, Ingo Molnar wrote:
>
> On 12 Aug 2002, Alan Cox wrote:
>
> > > ugh, we do Linux interrupts while in the APM BIOS?
> >
> > We have to. Most APM bios expects interrupts to be happening. In
> > pre-emptive mode we may well even be switching to/from APM BIOS code in
> > 2.5 at the moment. I've not looked into that.
>
> i think that since we hold the APM spinlock (do we always, when calling
> into the APM BIOS?), we should not preempt any APM BIOS code.
Looking at the 2.5.29 tree I have handy here there is no APM spinlock. I
don't have 2.5.30/31 unpacked to check those
> Comments?
Numbers:
unconditional copy of 2 tls descs: 5 cycles
this patch with 1 tls desc: 26 cycles
this patch with 8 tls descs: 52 cycles
lldt: 51 cycles
lgdt: 50 cycles
context switch: 2000 cycles (measured with pipe read/write and vmstat so
it's not very accurate)
So this patch causes a 1% context switch performance drop for
multithreaded applications.
Note: the benchmark doesn't include the initial test for non-zero
nr_tls_bytes and doesn't include setting the LDT descriptor
On 12 Aug 2002, Luca Barbieri wrote:
> Numbers:
> unconditional copy of 2 tls descs: 5 cycles
> this patch with 1 tls desc: 26 cycles
> this patch with 8 tls descs: 52 cycles
[ 0 tls descs: 2 cycles. ]
but yes, this is rougly what i'd say this approach costs.
> lldt: 51 cycles
> lgdt: 50 cycles
> context switch: 2000 cycles (measured with pipe read/write and vmstat so
> it's not very accurate)
> So this patch causes a 1% context switch performance drop for
> multithreaded applications.
how did you calculate this? glibc multithreaded applications can avoid the
lldt via using the TLS, and thus it's a net win.
Ingo
Alan Cox wrote:
>
> For that
> matter on Windows emulation I thought Windows also needed 0x40 to be the
> same offset as the BIOS does so can't we leave it hardwired ?
Does Wine and the BIOS actually want the same? I would believe there
would have to be a small difference. Having Wine and BIOS using the
same memory doesn't sound right to me.
Wine wanting segment 0x40 to point to virtual address 0x400 and BIOS
wanting segment 0x40 to point to physical address 0x400 sounds more
reasonable to me. But physical address 0x400 would be virtual address
0xC0000400 with the default PAGE_OFFSET.
--
Kasper Dupont -- der bruger for meget tid p? usenet.
For sending spam use mailto:[email protected]
or mailto:[email protected]
On 12 Aug 2002, Alan Cox wrote:
> Which does mean you can steal the old TLS value and put it back across
> the calls just by changing the TLS data for that process. [...]
yes - the 0x40 segment can be saved & restored safely. We have per-CPU
GDTs so nobody can modify them while the APM BIOS is executing. (assuming
preemption is disabled.)
> [...] For that matter on Windows emulation I thought Windows also needed
> 0x40 to be the same offset as the BIOS does so can't we leave it
> hardwired ?
another thing: do we want this with descriptor priviledge level 3? Because
the APM 0x40 GDT entry was a ring 0 descriptor, but that would not be
accessible to Wine or DOSEMU.
Ingo
On Mon, 2002-08-12 at 17:12, Ingo Molnar wrote:
>
> On 12 Aug 2002, Luca Barbieri wrote:
>
> > Numbers:
> > unconditional copy of 2 tls descs: 5 cycles
> > this patch with 1 tls desc: 26 cycles
> > this patch with 8 tls descs: 52 cycles
>
> [ 0 tls descs: 2 cycles. ]
Yes but common multithreaded applications will have at least 1 for
pthreads.
> but yes, this is rougly what i'd say this approach costs.
>
> > lldt: 51 cycles
> > lgdt: 50 cycles
> > context switch: 2000 cycles (measured with pipe read/write and vmstat so
> > it's not very accurate)
>
> > So this patch causes a 1% context switch performance drop for
> > multithreaded applications.
>
> how did you calculate this?
((26 - 5) / 2000) * 100 ~= 1
Benchmarks done in kernel mode (2.4.18) with interrupts disabled on a
Pentium3 running the rdtsc timed benchmark in a loop 1 million times
with 8 unbenchmarked iterations to warm up caches and with the time to
execute an empty benchmark subtracted.
> glibc multithreaded applications can avoid the
> lldt via using the TLS, and thus it's a net win.
Surely, this patch is better than the old LDT method but much worse than
the 2-TLS one.
So I would use the 2-TLS approach plus my patch plus the syscall and
segment.h improvements of the tls-2.5.31-C3 patch plus support for
setting the 0x40 segment around APM calls.
BTW, are there any programs that would benefit from having more than 2
user-settable GDT entries but that don't need more than about 8?
(assuming we have a fixed flat code and data segment and 0x40 segment)
this is my latest TLS tree, changes relative to the 2.5.31-C3 patch:
- streamlined GDT layout:
* 0 - null
* 1 - TLS segment #1 [ default user CS ]
* 2 - TLS segment #2 [ default user DS ]
* 3 - TLS segment #3 [ glibc's TLS segment ]
* 4 - TLS segment #4 [ Wine's %fs Win32 segment ]
* 5 - TLS segment #5
* 6 - TLS segment #6
* 7 - TLS segment #7
* 8 - APM BIOS support [ segment 0x40 ]
* 9 - APM BIOS support
* 10 - APM BIOS support
* 11 - APM BIOS support
* 12 - kernel code segment <==== new cacheline
* 13 - kernel data segment
* 14 - TSS
* 15 - LDT
* 16 - PNPBIOS support (16->32 gate)
* 17 - PNPBIOS support
* 18 - PNPBIOS support
* 19 - PNPBIOS support
* 20 - PNPBIOS support
- simplified the TLS context-switch code, no more offsets, just a
thread->private_tls flag tells whether the task has a non-default TLS.
these two changes make the copying of the TLS faster as well - exactly 64
bytes need to be copied. Default memcpy() manages it in ~60 cycles, fully
inlined memcpy code does it in ~30 cycles. I'm copying the NULL entry as
well, to make the copy (and copy size) aligned on cacheline boundaries.
The TLS area in the thread structure is not cacheline-aligned yet though.
and the APM code should be back to functioning again. If a common 0x40
segment can be agreed on then the APM entry should be changed and made
available to Wine - although i suspect Wine needs a 16-bit entry, while
the APM one is a 32-bit entry ...
Ingo
--- linux/drivers/pnp/pnpbios_core.c.orig Sun Aug 11 17:01:17 2002
+++ linux/drivers/pnp/pnpbios_core.c Mon Aug 12 15:47:36 2002
@@ -90,7 +90,8 @@
static union pnp_bios_expansion_header * pnp_bios_hdr = NULL;
/* The PnP BIOS entries in the GDT */
-#define PNP_GDT (0x0060)
+#define PNP_GDT (GDT_ENTRY_PNPBIOS_BASE * 8)
+
#define PNP_CS32 (PNP_GDT+0x00) /* segment for calling fn */
#define PNP_CS16 (PNP_GDT+0x08) /* code segment for BIOS */
#define PNP_DS (PNP_GDT+0x10) /* data segment for BIOS */
--- linux/arch/i386/kernel/cpu/common.c.orig Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/cpu/common.c Mon Aug 12 15:47:36 2002
@@ -423,6 +423,7 @@
{
int cpu = smp_processor_id();
struct tss_struct * t = init_tss + cpu;
+ struct thread_struct *thread = ¤t->thread;
if (test_and_set_bit(cpu, &cpu_initialized)) {
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -447,9 +448,13 @@
*/
if (cpu) {
memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
- cpu_gdt_descr[cpu].size = GDT_SIZE;
+ cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
}
+ /*
+ * Set up the per-thread TLS descriptor cache:
+ */
+ memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8);
__asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu]));
__asm__ __volatile__("lidt %0": "=m" (idt_descr));
@@ -468,9 +473,9 @@
BUG();
enter_lazy_tlb(&init_mm, current, cpu);
- t->esp0 = current->thread.esp0;
+ t->esp0 = thread->esp0;
set_tss_desc(cpu,t);
- cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
load_TR_desc();
load_LDT(&init_mm.context);
--- linux/arch/i386/kernel/entry.S.orig Sun Aug 11 17:01:07 2002
+++ linux/arch/i386/kernel/entry.S Mon Aug 12 15:47:36 2002
@@ -753,6 +753,7 @@
.long sys_sched_setaffinity
.long sys_sched_getaffinity
.long sys_set_thread_area
+ .long sys_get_thread_area
.rept NR_syscalls-(.-sys_call_table)/4
.long sys_ni_syscall
--- linux/arch/i386/kernel/head.S.orig Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/head.S Mon Aug 12 15:47:36 2002
@@ -239,12 +239,7 @@
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
-#ifdef CONFIG_SMP
- movl $(__KERNEL_DS), %eax
- movl %eax,%ss # Reload the stack pointer (segment only)
-#else
- lss stack_start,%esp # Load processor stack
-#endif
+ movl %eax,%ss
xorl %eax,%eax
lldt %ax
cld # gcc2 wants the direction flag cleared at all times
@@ -412,17 +407,17 @@
ALIGN
/*
- * The Global Descriptor Table contains 20 quadwords, per-CPU.
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
*/
ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x0000000000000000 /* TLS descriptor */
- .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */
- .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */
- .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */
- .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */
- .quad 0x0000000000000000 /* TSS descriptor */
- .quad 0x0000000000000000 /* LDT descriptor */
+ .quad 0x00cffa000000ffff /* 0x0b user 4GB code at 0x00000000 */
+ .quad 0x00cff2000000ffff /* 0x13 user 4GB data at 0x00000000 */
+ .quad 0x0000000000000000 /* 0x1b TLS entry 3 */
+ .quad 0x0000000000000000 /* 0x23 TLS entry 4 */
+ .quad 0x0000000000000000 /* 0x2b TLS entry 5 */
+ .quad 0x0000000000000000 /* 0x33 TLS entry 6 */
+ .quad 0x0000000000000000 /* 0x3b TLS entry 7 */
/*
* The APM segments have byte granularity and their bases
* and limits are set at run time.
@@ -431,15 +426,21 @@
.quad 0x00409a0000000000 /* 0x48 APM CS code */
.quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */
.quad 0x0040920000000000 /* 0x58 APM DS data */
+
+ .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
+ .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
+ .quad 0x0000000000000000 /* 0x70 TSS descriptor */
+ .quad 0x0000000000000000 /* 0x78 LDT descriptor */
+
/* Segments used for calling PnP BIOS */
- .quad 0x00c09a0000000000 /* 0x60 32-bit code */
- .quad 0x00809a0000000000 /* 0x68 16-bit code */
- .quad 0x0080920000000000 /* 0x70 16-bit data */
- .quad 0x0080920000000000 /* 0x78 16-bit data */
- .quad 0x0080920000000000 /* 0x80 16-bit data */
- .quad 0x0000000000000000 /* 0x88 not used */
- .quad 0x0000000000000000 /* 0x90 not used */
- .quad 0x0000000000000000 /* 0x98 not used */
+ .quad 0x00c09a0000000000 /* 0x80 32-bit code */
+ .quad 0x00809a0000000000 /* 0x88 16-bit code */
+ .quad 0x0080920000000000 /* 0x90 16-bit data */
+ .quad 0x0080920000000000 /* 0x98 16-bit data */
+ .quad 0x0080920000000000 /* 0xa0 16-bit data */
+ .quad 0x0000000000000000 /* 0xa8 not used */
+ .quad 0x0000000000000000 /* 0xb0 not used */
+ .quad 0x0000000000000000 /* 0xb8 not used */
#if CONFIG_SMP
.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
--- linux/arch/i386/kernel/process.c.orig Sun Aug 11 17:01:08 2002
+++ linux/arch/i386/kernel/process.c Mon Aug 12 15:47:36 2002
@@ -681,11 +681,9 @@
/*
* Load the per-thread Thread-Local Storage descriptor.
- *
- * NOTE: it's faster to do the two stores unconditionally
- * than to branch away.
*/
- load_TLS_desc(next, cpu);
+ if (prev->private_tls || next->private_tls)
+ load_TLS(prev, next, cpu);
/*
* Save away %fs and %gs. No need to save %es and %ds, as
@@ -834,35 +832,142 @@
#undef first_sched
/*
- * Set the Thread-Local Storage area:
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
*/
-asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags)
+static int get_free_idx(void)
{
struct thread_struct *t = ¤t->thread;
- int writable = 0;
- int cpu;
+ int idx;
- /* do not allow unused flags */
- if (flags & ~TLS_FLAGS_MASK)
+ for (idx = GDT_ENTRY_TLS_MIN; idx <= GDT_ENTRY_TLS_MAX; idx++)
+ if (desc_empty(t->tls_array + idx))
+ return idx;
+ return -ESRCH;
+}
+
+static inline int private_tls(struct desc_struct *array)
+{
+ struct desc_struct *default_array = init_task.thread.tls_array;
+
+ if (!memcmp(array, default_array, TLS_SIZE))
+ return 0;
+ return 1;
+}
+
+static inline int last_tls(struct desc_struct *array)
+{
+ struct desc_struct *default_array = init_task.thread.tls_array;
+ int idx;
+
+ for (idx = GDT_ENTRY_TLS_MAX; idx >= GDT_ENTRY_TLS_MIN; idx--)
+ if (!desc_equal(array + idx, default_array + idx))
+ return idx;
+
+ return 0;
+}
+
+#define CHECK_TLS_IDX(idx) \
+do { \
+ if ((idx) < GDT_ENTRY_TLS_MIN || (idx) > GDT_ENTRY_TLS_MAX) \
+ BUG(); \
+} while (0)
+
+/*
+ * Set a given TLS descriptor:
+ */
+asmlinkage int sys_set_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+ struct thread_struct *t = ¤t->thread;
+ struct modify_ldt_ldt_s info;
+ struct desc_struct *desc;
+ int cpu, idx;
+
+ if (copy_from_user(&info, u_info, sizeof(info)))
+ return -EFAULT;
+ idx = info.entry_number;
+
+ /*
+ * index -1 means the kernel should try to find and
+ * allocate an empty descriptor:
+ */
+ if (idx == -1) {
+ idx = get_free_idx();
+ if (idx < 0)
+ return idx;
+ if (put_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ }
+
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
return -EINVAL;
- if (flags & TLS_FLAG_WRITABLE)
- writable = 1;
+ desc = t->tls_array + idx;
/*
* We must not get preempted while modifying the TLS.
*/
cpu = get_cpu();
- t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff;
-
- t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
- 0xf0000 | (writable << 9) | (1 << 15) |
- (1 << 22) | (1 << 23) | 0x7000;
+ if (LDT_empty(&info)) {
+ desc->a = 0;
+ desc->b = 0;
+ } else {
+ desc->a = LDT_entry_a(&info);
+ desc->b = LDT_entry_b(&info);
+ }
+ t->private_tls = private_tls(t->tls_array);
+ load_TLS(t, t, cpu);
- load_TLS_desc(t, cpu);
put_cpu();
- return TLS_ENTRY*8 + 3;
+ return 0;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+ (((desc)->a >> 16) & 0x0000ffff) | \
+ (((desc)->b << 16) & 0x00ff0000) | \
+ ( (desc)->b & 0xff000000) )
+
+#define GET_LIMIT(desc) ( \
+ ((desc)->a & 0x0ffff) | \
+ ((desc)->b & 0xf0000) )
+
+#define GET_32BIT(desc) (((desc)->b >> 23) & 1)
+#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
+#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
+
+asmlinkage int sys_get_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+ struct modify_ldt_ldt_s info;
+ struct desc_struct *desc;
+ int idx;
+
+ if (get_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+ desc = current->thread.tls_array + idx;
+
+ info.entry_number = idx;
+ info.base_addr = GET_BASE(desc);
+ info.limit = GET_LIMIT(desc);
+ info.seg_32bit = GET_32BIT(desc);
+ info.contents = GET_CONTENTS(desc);
+ info.read_exec_only = !GET_WRITABLE(desc);
+ info.limit_in_pages = GET_LIMIT_PAGES(desc);
+ info.seg_not_present = !GET_PRESENT(desc);
+ info.useable = GET_USEABLE(desc);
+
+ if (copy_to_user(u_info, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
}
--- linux/arch/i386/kernel/suspend.c.orig Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/suspend.c Mon Aug 12 15:47:37 2002
@@ -207,7 +207,7 @@
struct tss_struct * t = init_tss + cpu;
set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */
- cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
load_TR_desc(); /* This does ltr */
load_LDT(¤t->mm->context); /* This does lldt */
--- linux/arch/i386/kernel/ldt.c.orig Sun Aug 11 17:01:04 2002
+++ linux/arch/i386/kernel/ldt.c Mon Aug 12 15:47:37 2002
@@ -200,32 +200,17 @@
/* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
- if (oldmode ||
- (ldt_info.contents == 0 &&
- ldt_info.read_exec_only == 1 &&
- ldt_info.seg_32bit == 0 &&
- ldt_info.limit_in_pages == 0 &&
- ldt_info.seg_not_present == 1 &&
- ldt_info.useable == 0 )) {
+ if (oldmode || LDT_empty(&ldt_info)) {
entry_1 = 0;
entry_2 = 0;
goto install;
}
}
- entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
- (ldt_info.limit & 0x0ffff);
- entry_2 = (ldt_info.base_addr & 0xff000000) |
- ((ldt_info.base_addr & 0x00ff0000) >> 16) |
- (ldt_info.limit & 0xf0000) |
- ((ldt_info.read_exec_only ^ 1) << 9) |
- (ldt_info.contents << 10) |
- ((ldt_info.seg_not_present ^ 1) << 15) |
- (ldt_info.seg_32bit << 22) |
- (ldt_info.limit_in_pages << 23) |
- 0x7000;
- if (!oldmode)
- entry_2 |= (ldt_info.useable << 20);
+ entry_1 = LDT_entry_a(&ldt_info);
+ entry_2 = LDT_entry_b(&ldt_info);
+ if (oldmode)
+ entry_2 &= ~(1 << 20);
/* Install the new entry ... */
install:
--- linux/arch/i386/boot/setup.S.orig Sun Jun 9 07:26:32 2002
+++ linux/arch/i386/boot/setup.S Mon Aug 12 15:47:37 2002
@@ -1005,9 +1005,14 @@
ret
# Descriptor tables
+#
+# NOTE: if you think the GDT is large, you can make it smaller by just
+# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt
+# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into
+# the GDT, but those wont be used so it's not a problem.
+#
gdt:
- .word 0, 0, 0, 0 # dummy
- .word 0, 0, 0, 0 # unused
+ .fill GDT_ENTRY_KERNEL_CS,8,0
.word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
.word 0 # base address = 0
--- linux/include/linux/apm_bios.h.orig Sun Jun 9 07:30:24 2002
+++ linux/include/linux/apm_bios.h Mon Aug 12 15:47:37 2002
@@ -21,8 +21,8 @@
#ifdef __KERNEL__
-#define APM_40 0x40
-#define APM_CS (APM_40 + 8)
+#define APM_40 (GDT_ENTRY_APMBIOS_BASE * 8)
+#define APM_CS (APM_BASE + 8)
#define APM_CS_16 (APM_CS + 8)
#define APM_DS (APM_CS_16 + 8)
--- linux/include/asm-i386/desc.h.orig Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/desc.h Mon Aug 12 15:47:37 2002
@@ -2,50 +2,12 @@
#define __ARCH_DESC_H
#include <asm/ldt.h>
-
-/*
- * The layout of the per-CPU GDT under Linux:
- *
- * 0 - null
- * 1 - Thread-Local Storage (TLS) segment
- * 2 - kernel code segment
- * 3 - kernel data segment
- * 4 - user code segment <==== new cacheline
- * 5 - user data segment
- * 6 - TSS
- * 7 - LDT
- * 8 - APM BIOS support <==== new cacheline
- * 9 - APM BIOS support
- * 10 - APM BIOS support
- * 11 - APM BIOS support
- * 12 - PNPBIOS support <==== new cacheline
- * 13 - PNPBIOS support
- * 14 - PNPBIOS support
- * 15 - PNPBIOS support
- * 16 - PNPBIOS support <==== new cacheline
- * 17 - not used
- * 18 - not used
- * 19 - not used
- */
-#define TLS_ENTRY 1
-#define TSS_ENTRY 6
-#define LDT_ENTRY 7
-/*
- * The interrupt descriptor table has room for 256 idt's,
- * the global descriptor table is dependent on the number
- * of tasks we can have..
- *
- * We pad the GDT to cacheline boundary.
- */
-#define IDT_ENTRIES 256
-#define GDT_ENTRIES 20
+#include <asm/segment.h>
#ifndef __ASSEMBLY__
#include <asm/mmu.h>
-#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct))
-
extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
struct Xgt_desc_struct {
@@ -55,8 +17,8 @@
extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
-#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3))
-#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3))
+#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
+#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
/*
* This is the ldt that every process will get unless we need
@@ -78,21 +40,42 @@
static inline void set_tss_desc(unsigned int cpu, void *addr)
{
- _set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89);
+ _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89);
}
static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
{
- _set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82);
+ _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
}
-#define TLS_FLAGS_MASK 0x00000001
+#define LDT_entry_a(info) \
+ ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-#define TLS_FLAG_WRITABLE 0x00000001
+#define LDT_entry_b(info) \
+ (((info)->base_addr & 0xff000000) | \
+ (((info)->base_addr & 0x00ff0000) >> 16) | \
+ ((info)->limit & 0xf0000) | \
+ (((info)->read_exec_only ^ 1) << 9) | \
+ ((info)->contents << 10) | \
+ (((info)->seg_not_present ^ 1) << 15) | \
+ ((info)->seg_32bit << 22) | \
+ ((info)->limit_in_pages << 23) | \
+ ((info)->useable << 20) | \
+ 0x7000)
+
+#define LDT_empty(info) (\
+ (info)->base_addr == 0 && \
+ (info)->limit == 0 && \
+ (info)->contents == 0 && \
+ (info)->read_exec_only == 1 && \
+ (info)->seg_32bit == 0 && \
+ (info)->limit_in_pages == 0 && \
+ (info)->seg_not_present == 1 && \
+ (info)->useable == 0 )
-static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu)
+static inline void load_TLS(struct thread_struct *prev, struct thread_struct *next, unsigned int cpu)
{
- cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc;
+ memcpy(cpu_gdt_table[cpu], next->tls_array, TLS_SIZE);
}
static inline void clear_LDT(void)
--- linux/include/asm-i386/processor.h.orig Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/processor.h Mon Aug 12 15:47:37 2002
@@ -22,6 +22,11 @@
unsigned long a,b;
};
+#define desc_empty(desc) \
+ (!((desc)->a + (desc)->b))
+
+#define desc_equal(desc1, desc2) \
+ (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
@@ -376,8 +381,16 @@
unsigned long v86flags, v86mask, v86mode, saved_esp0;
/* IO permissions */
unsigned long *ts_io_bitmap;
-/* TLS cached descriptor */
- struct desc_struct tls_desc;
+
+ /*
+ * cached TLS descriptors.
+ *
+ * The offset calculation is needed to not copy the whole TLS
+ * into the local GDT all the time.
+ * We count offsets in bytes to reduce context-switch overhead.
+ */
+ int private_tls;
+ struct desc_struct tls_array[GDT_ENTRY_TLS_MAX + 1];
};
#define INIT_THREAD { \
@@ -401,7 +414,7 @@
0,0,0,0, /* esp,ebp,esi,edi */ \
0,0,0,0,0,0, /* es,cs,ss */ \
0,0,0,0,0,0, /* ds,fs,gs */ \
- LDT_ENTRY,0, /* ldt */ \
+ GDT_ENTRY_LDT,0, /* ldt */ \
0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \
{~0, } /* ioperm */ \
}
--- linux/include/asm-i386/segment.h.orig Sun Jun 9 07:28:19 2002
+++ linux/include/asm-i386/segment.h Mon Aug 12 15:47:37 2002
@@ -1,10 +1,79 @@
#ifndef _ASM_SEGMENT_H
#define _ASM_SEGMENT_H
-#define __KERNEL_CS 0x10
-#define __KERNEL_DS 0x18
+/*
+ * The layout of the per-CPU GDT under Linux:
+ *
+ * 0 - null
+ *
+ * ------- start of TLS (Thread-Local Storage) segments:
+ *
+ * 1 - TLS segment #1 [ default user CS ]
+ * 2 - TLS segment #2 [ default user DS ]
+ * 3 - TLS segment #3 [ glibc's TLS segment ]
+ * 4 - TLS segment #4 [ Wine's %fs Win32 segment ]
+ * 5 - TLS segment #5
+ * 6 - TLS segment #6
+ * 7 - TLS segment #7
+ *
+ * ------- start of kernel segments:
+ *
+ * 8 - APM BIOS support [ segment 0x40 ]
+ * 9 - APM BIOS support
+ * 10 - APM BIOS support
+ * 11 - APM BIOS support
+ * 12 - kernel code segment <==== new cacheline
+ * 13 - kernel data segment
+ * 14 - TSS
+ * 15 - LDT
+ * 16 - PNPBIOS support (16->32 gate)
+ * 17 - PNPBIOS support
+ * 18 - PNPBIOS support
+ * 19 - PNPBIOS support
+ * 20 - PNPBIOS support
+ * 21 - reserved
+ * 22 - reserved
+ * 23 - reserved
+ */
+#define GDT_ENTRY_TLS_ENTRIES 7
+#define GDT_ENTRY_TLS_MIN 1
+#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
-#define __USER_CS 0x23
-#define __USER_DS 0x2B
+#define TLS_SIZE (GDT_ENTRY_TLS_MAX * 8)
+
+#define GDT_ENTRY_DEFAULT_USER_CS (GDT_ENTRY_TLS_MIN + 0)
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
+
+#define GDT_ENTRY_DEFAULT_USER_DS (GDT_ENTRY_TLS_MIN + 1)
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
+
+
+#define GDT_ENTRY_KERNEL_BASE 8
+
+#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 0)
+#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 4)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+
+#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 5)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+
+#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 6)
+#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 7)
+
+#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 8)
+
+/*
+ * The GDT has 21 entries but we pad it to cacheline boundary:
+ */
+#define GDT_ENTRIES 24
+
+#define GDT_SIZE (GDT_ENTRIES * 8)
+
+/*
+ * The interrupt descriptor table has room for 256 idt's,
+ * the global descriptor table is dependent on the number
+ * of tasks we can have..
+ */
+#define IDT_ENTRIES 256
#endif
--- linux/include/asm-i386/unistd.h.orig Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/unistd.h Mon Aug 12 15:47:37 2002
@@ -248,6 +248,7 @@
#define __NR_sched_setaffinity 241
#define __NR_sched_getaffinity 242
#define __NR_set_thread_area 243
+#define __NR_get_thread_area 244
/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
On 12 Aug 2002, Luca Barbieri wrote:
> > > Numbers:
> > > unconditional copy of 2 tls descs: 5 cycles
> > > this patch with 1 tls desc: 26 cycles
> > > this patch with 8 tls descs: 52 cycles
> >
> > [ 0 tls descs: 2 cycles. ]
> Yes but common multithreaded applications will have at least 1 for
> pthreads.
i would not say 'common' and 'multithreaded' in the same sentence. It
might be so in the future, but it isnt today.
> > how did you calculate this?
> ((26 - 5) / 2000) * 100 ~= 1
> Benchmarks done in kernel mode (2.4.18) with interrupts disabled on a
> Pentium3 running the rdtsc timed benchmark in a loop 1 million times
> with 8 unbenchmarked iterations to warm up caches and with the time to
> execute an empty benchmark subtracted.
old libpthreads or new one?
> > glibc multithreaded applications can avoid the
> > lldt via using the TLS, and thus it's a net win.
> Surely, this patch is better than the old LDT method but much worse than
> the 2-TLS one.
people asked for a 3rd TLS already.
Ingo
the attached patch (against 2.5.31-vanilla) further reduces the number of
TLS entries and optimizes the load_TLS() code, which is now down to 11
cycles. There are 3 more entries left around for cacheline alignment
reasons, so we can use them just in case more TLSs are needed.
this is in essence the '2 free TLS entries' code, with the difference of
more flexibility and the ability to change the default CS and DS segments
as well.
Ingo
--- linux/drivers/pnp/pnpbios_core.c.orig Sun Aug 11 17:01:17 2002
+++ linux/drivers/pnp/pnpbios_core.c Mon Aug 12 16:12:38 2002
@@ -90,7 +90,8 @@
static union pnp_bios_expansion_header * pnp_bios_hdr = NULL;
/* The PnP BIOS entries in the GDT */
-#define PNP_GDT (0x0060)
+#define PNP_GDT (GDT_ENTRY_PNPBIOS_BASE * 8)
+
#define PNP_CS32 (PNP_GDT+0x00) /* segment for calling fn */
#define PNP_CS16 (PNP_GDT+0x08) /* code segment for BIOS */
#define PNP_DS (PNP_GDT+0x10) /* data segment for BIOS */
--- linux/arch/i386/kernel/cpu/common.c.orig Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/cpu/common.c Mon Aug 12 16:12:38 2002
@@ -423,6 +423,7 @@
{
int cpu = smp_processor_id();
struct tss_struct * t = init_tss + cpu;
+ struct thread_struct *thread = ¤t->thread;
if (test_and_set_bit(cpu, &cpu_initialized)) {
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -447,9 +448,13 @@
*/
if (cpu) {
memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
- cpu_gdt_descr[cpu].size = GDT_SIZE;
+ cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
}
+ /*
+ * Set up the per-thread TLS descriptor cache:
+ */
+ memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8);
__asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu]));
__asm__ __volatile__("lidt %0": "=m" (idt_descr));
@@ -468,9 +473,9 @@
BUG();
enter_lazy_tlb(&init_mm, current, cpu);
- t->esp0 = current->thread.esp0;
+ t->esp0 = thread->esp0;
set_tss_desc(cpu,t);
- cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
load_TR_desc();
load_LDT(&init_mm.context);
--- linux/arch/i386/kernel/entry.S.orig Sun Aug 11 17:01:07 2002
+++ linux/arch/i386/kernel/entry.S Mon Aug 12 16:12:38 2002
@@ -753,6 +753,7 @@
.long sys_sched_setaffinity
.long sys_sched_getaffinity
.long sys_set_thread_area
+ .long sys_get_thread_area
.rept NR_syscalls-(.-sys_call_table)/4
.long sys_ni_syscall
--- linux/arch/i386/kernel/head.S.orig Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/head.S Mon Aug 12 16:12:38 2002
@@ -239,12 +239,7 @@
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
-#ifdef CONFIG_SMP
- movl $(__KERNEL_DS), %eax
- movl %eax,%ss # Reload the stack pointer (segment only)
-#else
- lss stack_start,%esp # Load processor stack
-#endif
+ movl %eax,%ss
xorl %eax,%eax
lldt %ax
cld # gcc2 wants the direction flag cleared at all times
@@ -412,17 +407,17 @@
ALIGN
/*
- * The Global Descriptor Table contains 20 quadwords, per-CPU.
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
*/
ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x0000000000000000 /* TLS descriptor */
- .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */
- .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */
- .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */
- .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */
- .quad 0x0000000000000000 /* TSS descriptor */
- .quad 0x0000000000000000 /* LDT descriptor */
+ .quad 0x00cffa000000ffff /* 0x0b user 4GB code at 0x00000000 */
+ .quad 0x00cff2000000ffff /* 0x13 user 4GB data at 0x00000000 */
+ .quad 0x0000000000000000 /* 0x1b TLS entry 3 */
+ .quad 0x0000000000000000 /* 0x23 TLS entry 4 */
+ .quad 0x0000000000000000 /* 0x2b reserved */
+ .quad 0x0000000000000000 /* 0x33 reserved */
+ .quad 0x0000000000000000 /* 0x3b reserved */
/*
* The APM segments have byte granularity and their bases
* and limits are set at run time.
@@ -431,15 +426,21 @@
.quad 0x00409a0000000000 /* 0x48 APM CS code */
.quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */
.quad 0x0040920000000000 /* 0x58 APM DS data */
+
+ .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
+ .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
+ .quad 0x0000000000000000 /* 0x70 TSS descriptor */
+ .quad 0x0000000000000000 /* 0x78 LDT descriptor */
+
/* Segments used for calling PnP BIOS */
- .quad 0x00c09a0000000000 /* 0x60 32-bit code */
- .quad 0x00809a0000000000 /* 0x68 16-bit code */
- .quad 0x0080920000000000 /* 0x70 16-bit data */
- .quad 0x0080920000000000 /* 0x78 16-bit data */
- .quad 0x0080920000000000 /* 0x80 16-bit data */
- .quad 0x0000000000000000 /* 0x88 not used */
- .quad 0x0000000000000000 /* 0x90 not used */
- .quad 0x0000000000000000 /* 0x98 not used */
+ .quad 0x00c09a0000000000 /* 0x80 32-bit code */
+ .quad 0x00809a0000000000 /* 0x88 16-bit code */
+ .quad 0x0080920000000000 /* 0x90 16-bit data */
+ .quad 0x0080920000000000 /* 0x98 16-bit data */
+ .quad 0x0080920000000000 /* 0xa0 16-bit data */
+ .quad 0x0000000000000000 /* 0xa8 not used */
+ .quad 0x0000000000000000 /* 0xb0 not used */
+ .quad 0x0000000000000000 /* 0xb8 not used */
#if CONFIG_SMP
.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
--- linux/arch/i386/kernel/process.c.orig Sun Aug 11 17:01:08 2002
+++ linux/arch/i386/kernel/process.c Mon Aug 12 16:12:38 2002
@@ -681,11 +681,9 @@
/*
* Load the per-thread Thread-Local Storage descriptor.
- *
- * NOTE: it's faster to do the two stores unconditionally
- * than to branch away.
*/
- load_TLS_desc(next, cpu);
+ if (prev->private_tls || next->private_tls)
+ load_TLS(next, cpu);
/*
* Save away %fs and %gs. No need to save %es and %ds, as
@@ -834,35 +832,142 @@
#undef first_sched
/*
- * Set the Thread-Local Storage area:
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
*/
-asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags)
+static int get_free_idx(void)
{
struct thread_struct *t = ¤t->thread;
- int writable = 0;
- int cpu;
+ int idx;
- /* do not allow unused flags */
- if (flags & ~TLS_FLAGS_MASK)
+ for (idx = GDT_ENTRY_TLS_MIN; idx <= GDT_ENTRY_TLS_MAX; idx++)
+ if (desc_empty(t->tls_array + idx))
+ return idx;
+ return -ESRCH;
+}
+
+static inline int private_tls(struct desc_struct *array)
+{
+ struct desc_struct *default_array = init_task.thread.tls_array;
+
+ if (!memcmp(array, default_array, TLS_SIZE))
+ return 0;
+ return 1;
+}
+
+static inline int last_tls(struct desc_struct *array)
+{
+ struct desc_struct *default_array = init_task.thread.tls_array;
+ int idx;
+
+ for (idx = GDT_ENTRY_TLS_MAX; idx >= GDT_ENTRY_TLS_MIN; idx--)
+ if (!desc_equal(array + idx, default_array + idx))
+ return idx;
+
+ return 0;
+}
+
+#define CHECK_TLS_IDX(idx) \
+do { \
+ if ((idx) < GDT_ENTRY_TLS_MIN || (idx) > GDT_ENTRY_TLS_MAX) \
+ BUG(); \
+} while (0)
+
+/*
+ * Set a given TLS descriptor:
+ */
+asmlinkage int sys_set_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+ struct thread_struct *t = ¤t->thread;
+ struct modify_ldt_ldt_s info;
+ struct desc_struct *desc;
+ int cpu, idx;
+
+ if (copy_from_user(&info, u_info, sizeof(info)))
+ return -EFAULT;
+ idx = info.entry_number;
+
+ /*
+ * index -1 means the kernel should try to find and
+ * allocate an empty descriptor:
+ */
+ if (idx == -1) {
+ idx = get_free_idx();
+ if (idx < 0)
+ return idx;
+ if (put_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ }
+
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
return -EINVAL;
- if (flags & TLS_FLAG_WRITABLE)
- writable = 1;
+ desc = t->tls_array + idx;
/*
* We must not get preempted while modifying the TLS.
*/
cpu = get_cpu();
- t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff;
-
- t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
- 0xf0000 | (writable << 9) | (1 << 15) |
- (1 << 22) | (1 << 23) | 0x7000;
+ if (LDT_empty(&info)) {
+ desc->a = 0;
+ desc->b = 0;
+ } else {
+ desc->a = LDT_entry_a(&info);
+ desc->b = LDT_entry_b(&info);
+ }
+ t->private_tls = private_tls(t->tls_array);
+ load_TLS(t, cpu);
- load_TLS_desc(t, cpu);
put_cpu();
- return TLS_ENTRY*8 + 3;
+ return 0;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+ (((desc)->a >> 16) & 0x0000ffff) | \
+ (((desc)->b << 16) & 0x00ff0000) | \
+ ( (desc)->b & 0xff000000) )
+
+#define GET_LIMIT(desc) ( \
+ ((desc)->a & 0x0ffff) | \
+ ((desc)->b & 0xf0000) )
+
+#define GET_32BIT(desc) (((desc)->b >> 23) & 1)
+#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
+#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
+
+asmlinkage int sys_get_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+ struct modify_ldt_ldt_s info;
+ struct desc_struct *desc;
+ int idx;
+
+ if (get_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+ desc = current->thread.tls_array + idx;
+
+ info.entry_number = idx;
+ info.base_addr = GET_BASE(desc);
+ info.limit = GET_LIMIT(desc);
+ info.seg_32bit = GET_32BIT(desc);
+ info.contents = GET_CONTENTS(desc);
+ info.read_exec_only = !GET_WRITABLE(desc);
+ info.limit_in_pages = GET_LIMIT_PAGES(desc);
+ info.seg_not_present = !GET_PRESENT(desc);
+ info.useable = GET_USEABLE(desc);
+
+ if (copy_to_user(u_info, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
}
--- linux/arch/i386/kernel/suspend.c.orig Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/suspend.c Mon Aug 12 16:12:38 2002
@@ -207,7 +207,7 @@
struct tss_struct * t = init_tss + cpu;
set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */
- cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
load_TR_desc(); /* This does ltr */
load_LDT(¤t->mm->context); /* This does lldt */
--- linux/arch/i386/kernel/ldt.c.orig Sun Aug 11 17:01:04 2002
+++ linux/arch/i386/kernel/ldt.c Mon Aug 12 16:12:38 2002
@@ -200,32 +200,17 @@
/* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
- if (oldmode ||
- (ldt_info.contents == 0 &&
- ldt_info.read_exec_only == 1 &&
- ldt_info.seg_32bit == 0 &&
- ldt_info.limit_in_pages == 0 &&
- ldt_info.seg_not_present == 1 &&
- ldt_info.useable == 0 )) {
+ if (oldmode || LDT_empty(&ldt_info)) {
entry_1 = 0;
entry_2 = 0;
goto install;
}
}
- entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
- (ldt_info.limit & 0x0ffff);
- entry_2 = (ldt_info.base_addr & 0xff000000) |
- ((ldt_info.base_addr & 0x00ff0000) >> 16) |
- (ldt_info.limit & 0xf0000) |
- ((ldt_info.read_exec_only ^ 1) << 9) |
- (ldt_info.contents << 10) |
- ((ldt_info.seg_not_present ^ 1) << 15) |
- (ldt_info.seg_32bit << 22) |
- (ldt_info.limit_in_pages << 23) |
- 0x7000;
- if (!oldmode)
- entry_2 |= (ldt_info.useable << 20);
+ entry_1 = LDT_entry_a(&ldt_info);
+ entry_2 = LDT_entry_b(&ldt_info);
+ if (oldmode)
+ entry_2 &= ~(1 << 20);
/* Install the new entry ... */
install:
--- linux/arch/i386/boot/setup.S.orig Sun Jun 9 07:26:32 2002
+++ linux/arch/i386/boot/setup.S Mon Aug 12 16:12:38 2002
@@ -1005,9 +1005,14 @@
ret
# Descriptor tables
+#
+# NOTE: if you think the GDT is large, you can make it smaller by just
+# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt
+# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into
+# the GDT, but those wont be used so it's not a problem.
+#
gdt:
- .word 0, 0, 0, 0 # dummy
- .word 0, 0, 0, 0 # unused
+ .fill GDT_ENTRY_KERNEL_CS,8,0
.word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
.word 0 # base address = 0
--- linux/include/linux/apm_bios.h.orig Sun Jun 9 07:30:24 2002
+++ linux/include/linux/apm_bios.h Mon Aug 12 16:12:38 2002
@@ -21,8 +21,8 @@
#ifdef __KERNEL__
-#define APM_40 0x40
-#define APM_CS (APM_40 + 8)
+#define APM_40 (GDT_ENTRY_APMBIOS_BASE * 8)
+#define APM_CS (APM_BASE + 8)
#define APM_CS_16 (APM_CS + 8)
#define APM_DS (APM_CS_16 + 8)
--- linux/include/asm-i386/desc.h.orig Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/desc.h Mon Aug 12 16:12:38 2002
@@ -2,50 +2,12 @@
#define __ARCH_DESC_H
#include <asm/ldt.h>
-
-/*
- * The layout of the per-CPU GDT under Linux:
- *
- * 0 - null
- * 1 - Thread-Local Storage (TLS) segment
- * 2 - kernel code segment
- * 3 - kernel data segment
- * 4 - user code segment <==== new cacheline
- * 5 - user data segment
- * 6 - TSS
- * 7 - LDT
- * 8 - APM BIOS support <==== new cacheline
- * 9 - APM BIOS support
- * 10 - APM BIOS support
- * 11 - APM BIOS support
- * 12 - PNPBIOS support <==== new cacheline
- * 13 - PNPBIOS support
- * 14 - PNPBIOS support
- * 15 - PNPBIOS support
- * 16 - PNPBIOS support <==== new cacheline
- * 17 - not used
- * 18 - not used
- * 19 - not used
- */
-#define TLS_ENTRY 1
-#define TSS_ENTRY 6
-#define LDT_ENTRY 7
-/*
- * The interrupt descriptor table has room for 256 idt's,
- * the global descriptor table is dependent on the number
- * of tasks we can have..
- *
- * We pad the GDT to cacheline boundary.
- */
-#define IDT_ENTRIES 256
-#define GDT_ENTRIES 20
+#include <asm/segment.h>
#ifndef __ASSEMBLY__
#include <asm/mmu.h>
-#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct))
-
extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
struct Xgt_desc_struct {
@@ -55,8 +17,8 @@
extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
-#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3))
-#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3))
+#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
+#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
/*
* This is the ldt that every process will get unless we need
@@ -78,21 +40,48 @@
static inline void set_tss_desc(unsigned int cpu, void *addr)
{
- _set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89);
+ _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89);
}
static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
{
- _set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82);
+ _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
}
-#define TLS_FLAGS_MASK 0x00000001
+#define LDT_entry_a(info) \
+ ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-#define TLS_FLAG_WRITABLE 0x00000001
+#define LDT_entry_b(info) \
+ (((info)->base_addr & 0xff000000) | \
+ (((info)->base_addr & 0x00ff0000) >> 16) | \
+ ((info)->limit & 0xf0000) | \
+ (((info)->read_exec_only ^ 1) << 9) | \
+ ((info)->contents << 10) | \
+ (((info)->seg_not_present ^ 1) << 15) | \
+ ((info)->seg_32bit << 22) | \
+ ((info)->limit_in_pages << 23) | \
+ ((info)->useable << 20) | \
+ 0x7000)
+
+#define LDT_empty(info) (\
+ (info)->base_addr == 0 && \
+ (info)->limit == 0 && \
+ (info)->contents == 0 && \
+ (info)->read_exec_only == 1 && \
+ (info)->seg_32bit == 0 && \
+ (info)->limit_in_pages == 0 && \
+ (info)->seg_not_present == 1 && \
+ (info)->useable == 0 )
+
+#if TLS_SIZE != 32
+# error update this code.
+#endif
-static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu)
+static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
{
- cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc;
+#define C(i) cpu_gdt_table[cpu][i] = t->tls_array[i]
+ C(1); C(2); C(3); C(4); C(5); C(6); C(7); C(8);
+#undef C
}
static inline void clear_LDT(void)
--- linux/include/asm-i386/processor.h.orig Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/processor.h Mon Aug 12 16:12:38 2002
@@ -22,6 +22,11 @@
unsigned long a,b;
};
+#define desc_empty(desc) \
+ (!((desc)->a + (desc)->b))
+
+#define desc_equal(desc1, desc2) \
+ (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
@@ -376,8 +381,16 @@
unsigned long v86flags, v86mask, v86mode, saved_esp0;
/* IO permissions */
unsigned long *ts_io_bitmap;
-/* TLS cached descriptor */
- struct desc_struct tls_desc;
+
+ /*
+ * cached TLS descriptors.
+ *
+ * The offset calculation is needed to not copy the whole TLS
+ * into the local GDT all the time.
+ * We count offsets in bytes to reduce context-switch overhead.
+ */
+ int private_tls;
+ struct desc_struct tls_array[GDT_ENTRY_TLS_MAX + 1];
};
#define INIT_THREAD { \
@@ -401,7 +414,7 @@
0,0,0,0, /* esp,ebp,esi,edi */ \
0,0,0,0,0,0, /* es,cs,ss */ \
0,0,0,0,0,0, /* ds,fs,gs */ \
- LDT_ENTRY,0, /* ldt */ \
+ GDT_ENTRY_LDT,0, /* ldt */ \
0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \
{~0, } /* ioperm */ \
}
--- linux/include/asm-i386/segment.h.orig Sun Jun 9 07:28:19 2002
+++ linux/include/asm-i386/segment.h Mon Aug 12 16:12:38 2002
@@ -1,10 +1,79 @@
#ifndef _ASM_SEGMENT_H
#define _ASM_SEGMENT_H
-#define __KERNEL_CS 0x10
-#define __KERNEL_DS 0x18
+/*
+ * The layout of the per-CPU GDT under Linux:
+ *
+ * 0 - null
+ *
+ * ------- start of TLS (Thread-Local Storage) segments:
+ *
+ * 1 - TLS segment #1 [ default user CS ]
+ * 2 - TLS segment #2 [ default user DS ]
+ * 3 - TLS segment #3 [ glibc's TLS segment ]
+ * 4 - TLS segment #4 [ Wine's %fs Win32 segment ]
+ *
+ * ------- start of kernel segments:
+ *
+ * 5 - reserved
+ * 6 - reserved
+ * 7 - reserved
+ * 8 - APM BIOS support [ segment 0x40 ]
+ * 9 - APM BIOS support
+ * 10 - APM BIOS support
+ * 11 - APM BIOS support
+ * 12 - kernel code segment <==== new cacheline
+ * 13 - kernel data segment
+ * 14 - TSS
+ * 15 - LDT
+ * 16 - PNPBIOS support (16->32 gate)
+ * 17 - PNPBIOS support
+ * 18 - PNPBIOS support
+ * 19 - PNPBIOS support
+ * 20 - PNPBIOS support
+ * 21 - reserved
+ * 22 - reserved
+ * 23 - reserved
+ */
+#define GDT_ENTRY_TLS_ENTRIES 4
+#define GDT_ENTRY_TLS_MIN 1
+#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
-#define __USER_CS 0x23
-#define __USER_DS 0x2B
+#define TLS_SIZE (GDT_ENTRY_TLS_MAX * 8)
+
+#define GDT_ENTRY_DEFAULT_USER_CS (GDT_ENTRY_TLS_MIN + 0)
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
+
+#define GDT_ENTRY_DEFAULT_USER_DS (GDT_ENTRY_TLS_MIN + 1)
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
+
+
+#define GDT_ENTRY_KERNEL_BASE 8
+
+#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 0)
+#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 4)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+
+#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 5)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+
+#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 6)
+#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 7)
+
+#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 8)
+
+/*
+ * The GDT has 21 entries but we pad it to cacheline boundary:
+ */
+#define GDT_ENTRIES 24
+
+#define GDT_SIZE (GDT_ENTRIES * 8)
+
+/*
+ * The interrupt descriptor table has room for 256 idt's,
+ * the global descriptor table is dependent on the number
+ * of tasks we can have..
+ */
+#define IDT_ENTRIES 256
#endif
--- linux/include/asm-i386/unistd.h.orig Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/unistd.h Mon Aug 12 16:12:38 2002
@@ -248,6 +248,7 @@
#define __NR_sched_setaffinity 241
#define __NR_sched_getaffinity 242
#define __NR_set_thread_area 243
+#define __NR_get_thread_area 244
/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
On Mon, 2002-08-12 at 17:57, Ingo Molnar wrote:
>
> On 12 Aug 2002, Luca Barbieri wrote:
>
> > > > Numbers:
> > > > unconditional copy of 2 tls descs: 5 cycles
> > > > this patch with 1 tls desc: 26 cycles
> > > > this patch with 8 tls descs: 52 cycles
> > >
> > > [ 0 tls descs: 2 cycles. ]
> > Yes but common multithreaded applications will have at least 1 for
> > pthreads.
>
> i would not say 'common' and 'multithreaded' in the same sentence. It
> might be so in the future, but it isnt today.
Most modern servers (e.g. Apache2, MySQL) are multithreaded and so are
large desktop applications (e.g. Evolution, Galeon, Nautilus).
> > > how did you calculate this?
> > ((26 - 5) / 2000) * 100 ~= 1
> > Benchmarks done in kernel mode (2.4.18) with interrupts disabled on a
> > Pentium3 running the rdtsc timed benchmark in a loop 1 million times
> > with 8 unbenchmarked iterations to warm up caches and with the time to
> > execute an empty benchmark subtracted.
>
> old libpthreads or new one?
What are you asking about? (benchmarks are in kernel mode and context
switch is from forked processes)
> > > glibc multithreaded applications can avoid the
> > > lldt via using the TLS, and thus it's a net win.
> > Surely, this patch is better than the old LDT method but much worse than
> > the 2-TLS one.
>
> people asked for a 3rd TLS already.
It would be interesting to know what they would use it for.
> the ability to change the default CS and DS segments
> as well.
This does not make any sense.
The user is free to load any selector in %cs/%ds/%es/%ss so the default
flat segments should be left alone so that a process can have the flat
segments _plus_ all the tls entries.
> although i suspect Wine needs a 16-bit entry, while
> the APM one is a 32-bit entry ...
AFAIK this only matters for code and stack segments and anyway the APM
one should be a 16-bit entry since it exists because the BIOS wrongly
assumes that it is a real-mode segment.
Anyway, isn't it better to put the user segments in a cacheline that
doesn't already lose one entry to the null selector? (and leave the
first one either empty or for BIOS/boot selectors)
On Mon, 12 Aug 2002 12:07:19 +0200 (CEST) Ingo Molnar <[email protected]> wrote:
>
> you can save/restore 0x40 in kernel-space if you need to no problem.
How about the following (untested, not even compiled):
--
Cheers,
Stephen Rothwell [email protected]
http://www.canb.auug.org.au/~sfr/
diff -ruN 2.5.31/arch/i386/kernel/apm.c 2.5.31-apm.1/arch/i386/kernel/apm.c
--- 2.5.31/arch/i386/kernel/apm.c 2002-08-02 11:11:34.000000000 +1000
+++ 2.5.31-apm.1/arch/i386/kernel/apm.c 2002-08-13 00:20:56.000000000 +1000
@@ -215,6 +215,7 @@
#include <linux/pm.h>
#include <linux/kernel.h>
#include <linux/smp_lock.h>
+#include <linux/smp.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -419,6 +420,7 @@
static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
static struct apm_user * user_list;
static spinlock_t user_list_lock = SPIN_LOCK_UNLOCKED;
+static struct desc_struct bad_bios_desc = { 0, 0x00409200 };
static char driver_version[] = "1.16"; /* no spaces */
@@ -569,7 +571,12 @@
{
APM_DECL_SEGS
unsigned long flags;
+ int cpu;
+ struct desc_struct save_desc_40;
+ cpu = get_cpu();
+ save_desc_40 = cpu_gdt_table[cpu][0x40 / 8];
+ cpu_gdt_table[cpu][0x40 / 8] = bad_bios_desc;
local_save_flags(flags);
APM_DO_CLI;
APM_DO_SAVE_SEGS;
@@ -591,6 +598,8 @@
: "memory", "cc");
APM_DO_RESTORE_SEGS;
local_irq_restore(flags);
+ cpu_gdt_table[cpu][0x40 / 8] = save_desc_40;
+ put_cpu();
return *eax & 0xff;
}
@@ -613,7 +622,12 @@
u8 error;
APM_DECL_SEGS
unsigned long flags;
+ int cpu;
+ struct desc_struct save_desc_40;
+ cpu = get_cpu();
+ save_desc_40 = cpu_gdt_table[cpu][0x40 / 8];
+ cpu_gdt_table[cpu][0x40 / 8] = bad_bios_desc;
local_save_flags(flags);
APM_DO_CLI;
APM_DO_SAVE_SEGS;
@@ -639,6 +653,8 @@
}
APM_DO_RESTORE_SEGS;
local_irq_restore(flags);
+ cpu_gdt_table[smp_processor_id()][0x40 / 8] = save_desc_40;
+ put_cpu();
return error;
}
@@ -1923,17 +1939,14 @@
* that extends up to the end of page zero (that we have reserved).
* This is for buggy BIOS's that refer to (real mode) segment 0x40
* even though they are called in protected mode.
- *
- * NOTE: on SMP we call into the APM BIOS only on CPU#0, so it's
- * enough to modify CPU#0's GDT.
*/
- for (i = 0; i < NR_CPUS; i++) {
- set_base(cpu_gdt_table[i][APM_40 >> 3],
- __va((unsigned long)0x40 << 4));
- _set_limit((char *)&cpu_gdt_table[i][APM_40 >> 3], 4095 - (0x40 << 4));
+ set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
+ _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
+
+ apm_bios_entry.offset = apm_info.bios.offset;
+ apm_bios_entry.segment = APM_CS;
- apm_bios_entry.offset = apm_info.bios.offset;
- apm_bios_entry.segment = APM_CS;
+ for (i = 0; i < NR_CPUS; i++) {
set_base(cpu_gdt_table[i][APM_CS >> 3],
__va((unsigned long)apm_info.bios.cseg << 4));
set_base(cpu_gdt_table[i][APM_CS_16 >> 3],
diff -ruN 2.5.31/arch/i386/kernel/head.S 2.5.31-apm.1/arch/i386/kernel/head.S
--- 2.5.31/arch/i386/kernel/head.S 2002-07-28 21:11:25.000000000 +1000
+++ 2.5.31-apm.1/arch/i386/kernel/head.S 2002-08-13 00:29:38.000000000 +1000
@@ -427,7 +427,10 @@
* The APM segments have byte granularity and their bases
* and limits are set at run time.
*/
- .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */
+ .quad 0x0000000000000000 /* 0x40 APM will be used for bad BIOS's
+ * Will be saved and restored
+ * across BIOS calls. MUST NOT BE ONE
+ * OF THE FOLLOWING THREE! */
.quad 0x00409a0000000000 /* 0x48 APM CS code */
.quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */
.quad 0x0040920000000000 /* 0x58 APM DS data */
diff -ruN 2.5.31/include/linux/apm_bios.h 2.5.31-apm.1/include/linux/apm_bios.h
--- 2.5.31/include/linux/apm_bios.h 2001-08-14 09:39:28.000000000 +1000
+++ 2.5.31-apm.1/include/linux/apm_bios.h 2002-08-13 00:38:52.000000000 +1000
@@ -21,8 +21,7 @@
#ifdef __KERNEL__
-#define APM_40 0x40
-#define APM_CS (APM_40 + 8)
+#define APM_CS 0x48
#define APM_CS_16 (APM_CS + 8)
#define APM_DS (APM_CS_16 + 8)
okay, the attached patch does some more things:
- moves the first two TLS entries and the user CS/DS entries on the same
cacheline.
- excludes CS/DS from the TLS space - Luca is right in that it only slows
things down unnecesserily, and there is nothing that cannot be done by
changing the %ds %cs selectors - and every cycle counts in the
context-switch path.
the only open issues are the number of TLSs supported. I'd vote for making
them 4 and then we can inline the copy and make it unconditional, it will
be 12 cycles to copy them all which alone is better than a branch miss. In
this patch it's 2, thus the copying cost is 6 cycles.
with 4 entries the 0x40 entry would be taken and APM has to move further
up, and has to save/restore the 0x40 entry across BIOS calls.
Ingo
--- linux/drivers/pnp/pnpbios_core.c.orig Sun Aug 11 17:01:17 2002
+++ linux/drivers/pnp/pnpbios_core.c Mon Aug 12 17:01:11 2002
@@ -90,7 +90,8 @@
static union pnp_bios_expansion_header * pnp_bios_hdr = NULL;
/* The PnP BIOS entries in the GDT */
-#define PNP_GDT (0x0060)
+#define PNP_GDT (GDT_ENTRY_PNPBIOS_BASE * 8)
+
#define PNP_CS32 (PNP_GDT+0x00) /* segment for calling fn */
#define PNP_CS16 (PNP_GDT+0x08) /* code segment for BIOS */
#define PNP_DS (PNP_GDT+0x10) /* data segment for BIOS */
--- linux/arch/i386/kernel/cpu/common.c.orig Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/cpu/common.c Mon Aug 12 17:01:11 2002
@@ -423,6 +423,7 @@
{
int cpu = smp_processor_id();
struct tss_struct * t = init_tss + cpu;
+ struct thread_struct *thread = ¤t->thread;
if (test_and_set_bit(cpu, &cpu_initialized)) {
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -447,9 +448,13 @@
*/
if (cpu) {
memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
- cpu_gdt_descr[cpu].size = GDT_SIZE;
+ cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
}
+ /*
+ * Set up the per-thread TLS descriptor cache:
+ */
+ memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8);
__asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu]));
__asm__ __volatile__("lidt %0": "=m" (idt_descr));
@@ -468,9 +473,9 @@
BUG();
enter_lazy_tlb(&init_mm, current, cpu);
- t->esp0 = current->thread.esp0;
+ t->esp0 = thread->esp0;
set_tss_desc(cpu,t);
- cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
load_TR_desc();
load_LDT(&init_mm.context);
--- linux/arch/i386/kernel/entry.S.orig Sun Aug 11 17:01:07 2002
+++ linux/arch/i386/kernel/entry.S Mon Aug 12 17:01:11 2002
@@ -753,6 +753,7 @@
.long sys_sched_setaffinity
.long sys_sched_getaffinity
.long sys_set_thread_area
+ .long sys_get_thread_area
.rept NR_syscalls-(.-sys_call_table)/4
.long sys_ni_syscall
--- linux/arch/i386/kernel/head.S.orig Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/head.S Mon Aug 12 17:01:11 2002
@@ -239,12 +239,7 @@
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
-#ifdef CONFIG_SMP
- movl $(__KERNEL_DS), %eax
- movl %eax,%ss # Reload the stack pointer (segment only)
-#else
- lss stack_start,%esp # Load processor stack
-#endif
+ movl %eax,%ss
xorl %eax,%eax
lldt %ax
cld # gcc2 wants the direction flag cleared at all times
@@ -412,17 +407,17 @@
ALIGN
/*
- * The Global Descriptor Table contains 20 quadwords, per-CPU.
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
*/
ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x0000000000000000 /* TLS descriptor */
- .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */
- .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */
- .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */
- .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */
- .quad 0x0000000000000000 /* TSS descriptor */
- .quad 0x0000000000000000 /* LDT descriptor */
+ .quad 0x0000000000000000 /* 0x0b reserved */
+ .quad 0x0000000000000000 /* 0x13 reserved */
+ .quad 0x0000000000000000 /* 0x1b reserved */
+ .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */
+ .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */
+ .quad 0x0000000000000000 /* 0x33 TLS entry 1 */
+ .quad 0x0000000000000000 /* 0x3b TLS entry 2 */
/*
* The APM segments have byte granularity and their bases
* and limits are set at run time.
@@ -431,15 +426,21 @@
.quad 0x00409a0000000000 /* 0x48 APM CS code */
.quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */
.quad 0x0040920000000000 /* 0x58 APM DS data */
+
+ .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
+ .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
+ .quad 0x0000000000000000 /* 0x70 TSS descriptor */
+ .quad 0x0000000000000000 /* 0x78 LDT descriptor */
+
/* Segments used for calling PnP BIOS */
- .quad 0x00c09a0000000000 /* 0x60 32-bit code */
- .quad 0x00809a0000000000 /* 0x68 16-bit code */
- .quad 0x0080920000000000 /* 0x70 16-bit data */
- .quad 0x0080920000000000 /* 0x78 16-bit data */
- .quad 0x0080920000000000 /* 0x80 16-bit data */
- .quad 0x0000000000000000 /* 0x88 not used */
- .quad 0x0000000000000000 /* 0x90 not used */
- .quad 0x0000000000000000 /* 0x98 not used */
+ .quad 0x00c09a0000000000 /* 0x80 32-bit code */
+ .quad 0x00809a0000000000 /* 0x88 16-bit code */
+ .quad 0x0080920000000000 /* 0x90 16-bit data */
+ .quad 0x0080920000000000 /* 0x98 16-bit data */
+ .quad 0x0080920000000000 /* 0xa0 16-bit data */
+ .quad 0x0000000000000000 /* 0xa8 not used */
+ .quad 0x0000000000000000 /* 0xb0 not used */
+ .quad 0x0000000000000000 /* 0xb8 not used */
#if CONFIG_SMP
.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
--- linux/arch/i386/kernel/process.c.orig Sun Aug 11 17:01:08 2002
+++ linux/arch/i386/kernel/process.c Mon Aug 12 17:01:11 2002
@@ -681,11 +681,9 @@
/*
* Load the per-thread Thread-Local Storage descriptor.
- *
- * NOTE: it's faster to do the two stores unconditionally
- * than to branch away.
*/
- load_TLS_desc(next, cpu);
+ if (prev->private_tls || next->private_tls)
+ load_TLS(next, cpu);
/*
* Save away %fs and %gs. No need to save %es and %ds, as
@@ -834,35 +832,125 @@
#undef first_sched
/*
- * Set the Thread-Local Storage area:
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
*/
-asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags)
+static int get_free_idx(void)
{
struct thread_struct *t = ¤t->thread;
- int writable = 0;
- int cpu;
+ int idx;
- /* do not allow unused flags */
- if (flags & ~TLS_FLAGS_MASK)
+ for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+ if (desc_empty(t->tls_array + idx))
+ return idx + GDT_ENTRY_TLS_MIN;
+ return -ESRCH;
+}
+
+static inline int private_tls(struct desc_struct *array)
+{
+ int idx;
+
+ for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+ if (!desc_empty(array + idx))
+ return 0;
+ return 1;
+}
+
+/*
+ * Set a given TLS descriptor:
+ */
+asmlinkage int sys_set_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+ struct thread_struct *t = ¤t->thread;
+ struct modify_ldt_ldt_s info;
+ struct desc_struct *desc;
+ int cpu, idx;
+
+ if (copy_from_user(&info, u_info, sizeof(info)))
+ return -EFAULT;
+ idx = info.entry_number;
+
+ /*
+ * index -1 means the kernel should try to find and
+ * allocate an empty descriptor:
+ */
+ if (idx == -1) {
+ idx = get_free_idx();
+ if (idx < 0)
+ return idx;
+ if (put_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ }
+
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
return -EINVAL;
- if (flags & TLS_FLAG_WRITABLE)
- writable = 1;
+ desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
/*
* We must not get preempted while modifying the TLS.
*/
cpu = get_cpu();
- t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff;
-
- t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
- 0xf0000 | (writable << 9) | (1 << 15) |
- (1 << 22) | (1 << 23) | 0x7000;
+ if (LDT_empty(&info)) {
+ desc->a = 0;
+ desc->b = 0;
+ } else {
+ desc->a = LDT_entry_a(&info);
+ desc->b = LDT_entry_b(&info);
+ }
+ t->private_tls = private_tls(t->tls_array);
+ load_TLS(t, cpu);
- load_TLS_desc(t, cpu);
put_cpu();
- return TLS_ENTRY*8 + 3;
+ return 0;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+ (((desc)->a >> 16) & 0x0000ffff) | \
+ (((desc)->b << 16) & 0x00ff0000) | \
+ ( (desc)->b & 0xff000000) )
+
+#define GET_LIMIT(desc) ( \
+ ((desc)->a & 0x0ffff) | \
+ ((desc)->b & 0xf0000) )
+
+#define GET_32BIT(desc) (((desc)->b >> 23) & 1)
+#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
+#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
+
+asmlinkage int sys_get_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+ struct modify_ldt_ldt_s info;
+ struct desc_struct *desc;
+ int idx;
+
+ if (get_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+ desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+ info.entry_number = idx;
+ info.base_addr = GET_BASE(desc);
+ info.limit = GET_LIMIT(desc);
+ info.seg_32bit = GET_32BIT(desc);
+ info.contents = GET_CONTENTS(desc);
+ info.read_exec_only = !GET_WRITABLE(desc);
+ info.limit_in_pages = GET_LIMIT_PAGES(desc);
+ info.seg_not_present = !GET_PRESENT(desc);
+ info.useable = GET_USEABLE(desc);
+
+ if (copy_to_user(u_info, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
}
--- linux/arch/i386/kernel/suspend.c.orig Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/suspend.c Mon Aug 12 17:01:11 2002
@@ -207,7 +207,7 @@
struct tss_struct * t = init_tss + cpu;
set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */
- cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
load_TR_desc(); /* This does ltr */
load_LDT(¤t->mm->context); /* This does lldt */
--- linux/arch/i386/kernel/ldt.c.orig Sun Aug 11 17:01:04 2002
+++ linux/arch/i386/kernel/ldt.c Mon Aug 12 17:01:11 2002
@@ -200,32 +200,17 @@
/* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
- if (oldmode ||
- (ldt_info.contents == 0 &&
- ldt_info.read_exec_only == 1 &&
- ldt_info.seg_32bit == 0 &&
- ldt_info.limit_in_pages == 0 &&
- ldt_info.seg_not_present == 1 &&
- ldt_info.useable == 0 )) {
+ if (oldmode || LDT_empty(&ldt_info)) {
entry_1 = 0;
entry_2 = 0;
goto install;
}
}
- entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
- (ldt_info.limit & 0x0ffff);
- entry_2 = (ldt_info.base_addr & 0xff000000) |
- ((ldt_info.base_addr & 0x00ff0000) >> 16) |
- (ldt_info.limit & 0xf0000) |
- ((ldt_info.read_exec_only ^ 1) << 9) |
- (ldt_info.contents << 10) |
- ((ldt_info.seg_not_present ^ 1) << 15) |
- (ldt_info.seg_32bit << 22) |
- (ldt_info.limit_in_pages << 23) |
- 0x7000;
- if (!oldmode)
- entry_2 |= (ldt_info.useable << 20);
+ entry_1 = LDT_entry_a(&ldt_info);
+ entry_2 = LDT_entry_b(&ldt_info);
+ if (oldmode)
+ entry_2 &= ~(1 << 20);
/* Install the new entry ... */
install:
--- linux/arch/i386/boot/setup.S.orig Sun Jun 9 07:26:32 2002
+++ linux/arch/i386/boot/setup.S Mon Aug 12 17:01:11 2002
@@ -1005,9 +1005,14 @@
ret
# Descriptor tables
+#
+# NOTE: if you think the GDT is large, you can make it smaller by just
+# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt
+# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into
+# the GDT, but those wont be used so it's not a problem.
+#
gdt:
- .word 0, 0, 0, 0 # dummy
- .word 0, 0, 0, 0 # unused
+ .fill GDT_ENTRY_KERNEL_CS,8,0
.word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
.word 0 # base address = 0
--- linux/include/linux/apm_bios.h.orig Sun Jun 9 07:30:24 2002
+++ linux/include/linux/apm_bios.h Mon Aug 12 17:01:11 2002
@@ -21,8 +21,8 @@
#ifdef __KERNEL__
-#define APM_40 0x40
-#define APM_CS (APM_40 + 8)
+#define APM_40 (GDT_ENTRY_APMBIOS_BASE * 8)
+#define APM_CS (APM_BASE + 8)
#define APM_CS_16 (APM_CS + 8)
#define APM_DS (APM_CS_16 + 8)
--- linux/include/asm-i386/desc.h.orig Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/desc.h Mon Aug 12 17:01:11 2002
@@ -2,50 +2,12 @@
#define __ARCH_DESC_H
#include <asm/ldt.h>
-
-/*
- * The layout of the per-CPU GDT under Linux:
- *
- * 0 - null
- * 1 - Thread-Local Storage (TLS) segment
- * 2 - kernel code segment
- * 3 - kernel data segment
- * 4 - user code segment <==== new cacheline
- * 5 - user data segment
- * 6 - TSS
- * 7 - LDT
- * 8 - APM BIOS support <==== new cacheline
- * 9 - APM BIOS support
- * 10 - APM BIOS support
- * 11 - APM BIOS support
- * 12 - PNPBIOS support <==== new cacheline
- * 13 - PNPBIOS support
- * 14 - PNPBIOS support
- * 15 - PNPBIOS support
- * 16 - PNPBIOS support <==== new cacheline
- * 17 - not used
- * 18 - not used
- * 19 - not used
- */
-#define TLS_ENTRY 1
-#define TSS_ENTRY 6
-#define LDT_ENTRY 7
-/*
- * The interrupt descriptor table has room for 256 idt's,
- * the global descriptor table is dependent on the number
- * of tasks we can have..
- *
- * We pad the GDT to cacheline boundary.
- */
-#define IDT_ENTRIES 256
-#define GDT_ENTRIES 20
+#include <asm/segment.h>
#ifndef __ASSEMBLY__
#include <asm/mmu.h>
-#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct))
-
extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
struct Xgt_desc_struct {
@@ -55,8 +17,8 @@
extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
-#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3))
-#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3))
+#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
+#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
/*
* This is the ldt that every process will get unless we need
@@ -78,21 +40,48 @@
static inline void set_tss_desc(unsigned int cpu, void *addr)
{
- _set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89);
+ _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89);
}
static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
{
- _set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82);
+ _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
}
-#define TLS_FLAGS_MASK 0x00000001
+#define LDT_entry_a(info) \
+ ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-#define TLS_FLAG_WRITABLE 0x00000001
+#define LDT_entry_b(info) \
+ (((info)->base_addr & 0xff000000) | \
+ (((info)->base_addr & 0x00ff0000) >> 16) | \
+ ((info)->limit & 0xf0000) | \
+ (((info)->read_exec_only ^ 1) << 9) | \
+ ((info)->contents << 10) | \
+ (((info)->seg_not_present ^ 1) << 15) | \
+ ((info)->seg_32bit << 22) | \
+ ((info)->limit_in_pages << 23) | \
+ ((info)->useable << 20) | \
+ 0x7000)
+
+#define LDT_empty(info) (\
+ (info)->base_addr == 0 && \
+ (info)->limit == 0 && \
+ (info)->contents == 0 && \
+ (info)->read_exec_only == 1 && \
+ (info)->seg_32bit == 0 && \
+ (info)->limit_in_pages == 0 && \
+ (info)->seg_not_present == 1 && \
+ (info)->useable == 0 )
+
+#if TLS_SIZE != 16
+# error update this code.
+#endif
-static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu)
+static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
{
- cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc;
+#define C(i) cpu_gdt_table[cpu][GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
+ C(0); C(1);
+#undef C
}
static inline void clear_LDT(void)
--- linux/include/asm-i386/processor.h.orig Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/processor.h Mon Aug 12 17:01:11 2002
@@ -22,6 +22,11 @@
unsigned long a,b;
};
+#define desc_empty(desc) \
+ (!((desc)->a + (desc)->b))
+
+#define desc_equal(desc1, desc2) \
+ (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
@@ -376,8 +381,16 @@
unsigned long v86flags, v86mask, v86mode, saved_esp0;
/* IO permissions */
unsigned long *ts_io_bitmap;
-/* TLS cached descriptor */
- struct desc_struct tls_desc;
+
+ /*
+ * cached TLS descriptors.
+ *
+ * The offset calculation is needed to not copy the whole TLS
+ * into the local GDT all the time.
+ * We count offsets in bytes to reduce context-switch overhead.
+ */
+ int private_tls;
+ struct desc_struct tls_array[GDT_ENTRY_TLS_MAX + 1];
};
#define INIT_THREAD { \
@@ -401,7 +414,7 @@
0,0,0,0, /* esp,ebp,esi,edi */ \
0,0,0,0,0,0, /* es,cs,ss */ \
0,0,0,0,0,0, /* ds,fs,gs */ \
- LDT_ENTRY,0, /* ldt */ \
+ GDT_ENTRY_LDT,0, /* ldt */ \
0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \
{~0, } /* ioperm */ \
}
--- linux/include/asm-i386/segment.h.orig Sun Jun 9 07:28:19 2002
+++ linux/include/asm-i386/segment.h Mon Aug 12 17:01:11 2002
@@ -1,10 +1,79 @@
#ifndef _ASM_SEGMENT_H
#define _ASM_SEGMENT_H
-#define __KERNEL_CS 0x10
-#define __KERNEL_DS 0x18
+/*
+ * The layout of the per-CPU GDT under Linux:
+ *
+ * 0 - null
+ * 1 - reserved
+ * 2 - reserved
+ * 3 - reserved
+ *
+ * 4 - default user CS <==== new cacheline
+ * 5 - default user DS
+ *
+ * ------- start of TLS (Thread-Local Storage) segments:
+ *
+ * 6 - TLS segment #1 [ glibc's TLS segment ]
+ * 7 - TLS segment #2 [ Wine's %fs Win32 segment ]
+ *
+ * ------- start of kernel segments:
+ *
+ * 8 - APM BIOS support [ segment 0x40 ]
+ * 9 - APM BIOS support
+ * 10 - APM BIOS support
+ * 11 - APM BIOS support
+ * 12 - kernel code segment <==== new cacheline
+ * 13 - kernel data segment
+ * 14 - TSS
+ * 15 - LDT
+ * 16 - PNPBIOS support (16->32 gate)
+ * 17 - PNPBIOS support
+ * 18 - PNPBIOS support
+ * 19 - PNPBIOS support
+ * 20 - PNPBIOS support
+ * 21 - reserved
+ * 22 - reserved
+ * 23 - reserved
+ */
+#define GDT_ENTRY_TLS_ENTRIES 2
+#define GDT_ENTRY_TLS_MIN 6
+#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
-#define __USER_CS 0x23
-#define __USER_DS 0x2B
+#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+
+#define GDT_ENTRY_DEFAULT_USER_CS 4
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
+
+#define GDT_ENTRY_DEFAULT_USER_DS 5
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
+
+#define GDT_ENTRY_KERNEL_BASE 8
+
+#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 0)
+#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 4)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+
+#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 5)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+
+#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 6)
+#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 7)
+
+#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 8)
+
+/*
+ * The GDT has 21 entries but we pad it to cacheline boundary:
+ */
+#define GDT_ENTRIES 24
+
+#define GDT_SIZE (GDT_ENTRIES * 8)
+
+/*
+ * The interrupt descriptor table has room for 256 idt's,
+ * the global descriptor table is dependent on the number
+ * of tasks we can have..
+ */
+#define IDT_ENTRIES 256
#endif
--- linux/include/asm-i386/unistd.h.orig Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/unistd.h Mon Aug 12 17:01:11 2002
@@ -248,6 +248,7 @@
#define __NR_sched_setaffinity 241
#define __NR_sched_getaffinity 242
#define __NR_set_thread_area 243
+#define __NR_get_thread_area 244
/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
On Mon, Aug 12, 2002 at 07:06:50PM +0200, Ingo Molnar wrote:
>
> okay, the attached patch does some more things:
>
> - moves the first two TLS entries and the user CS/DS entries on the same
> cacheline.
>
> - excludes CS/DS from the TLS space - Luca is right in that it only slows
> things down unnecesserily, and there is nothing that cannot be done by
> changing the %ds %cs selectors - and every cycle counts in the
> context-switch path.
>
> the only open issues are the number of TLSs supported. I'd vote for making
> them 4 and then we can inline the copy and make it unconditional, it will
> be 12 cycles to copy them all which alone is better than a branch miss. In
> this patch it's 2, thus the copying cost is 6 cycles.
>
> with 4 entries the 0x40 entry would be taken and APM has to move further
> up, and has to save/restore the 0x40 entry across BIOS calls.
As each supported TLS entry has its context-switch time cost, I think we
should stay at 2 supported TLS entries.
My understanding was that the GDT patches were written to optimize the
common case (all threaded apps using LDT and with the advent of __thread
support causing every single application to use LDT), with 2 TLS entries
where one is for libc/libpthread and the other one is for application
usage I think it is enough for 99.9% of apps. In the rare
case someone needs more, there is still LDT which offers 8192 entries.
Jakub
> the only open issues are the number of TLSs supported. I'd vote for
> making them 4 and then we can inline the copy and make it unconditional,
> it will be 12 cycles to copy them all which alone is better than a
> branch miss. In this patch it's 2, thus the copying cost is 6 cycles.
>
> with 4 entries the 0x40 entry would be taken and APM has to move further
> up, and has to save/restore the 0x40 entry across BIOS calls.
the attached patch does this:
- there are now 4 freely usable TLS entries, amongst them 0x40 for Wine
- the 3 APM segments fit into the hole at the end of the kernel
descriptor area exactly => no GDT size increase.
- the ->private_tls code is gone - unconditional inline copies are more
robust and faster as well.
Plus the APM code needs Stephen's fix. I think this is the best approach
we had so far. Any objections?
Ingo
--- linux/drivers/pnp/pnpbios_core.c.orig Sun Aug 11 17:01:17 2002
+++ linux/drivers/pnp/pnpbios_core.c Mon Aug 12 17:21:29 2002
@@ -90,7 +90,8 @@
static union pnp_bios_expansion_header * pnp_bios_hdr = NULL;
/* The PnP BIOS entries in the GDT */
-#define PNP_GDT (0x0060)
+#define PNP_GDT (GDT_ENTRY_PNPBIOS_BASE * 8)
+
#define PNP_CS32 (PNP_GDT+0x00) /* segment for calling fn */
#define PNP_CS16 (PNP_GDT+0x08) /* code segment for BIOS */
#define PNP_DS (PNP_GDT+0x10) /* data segment for BIOS */
--- linux/arch/i386/kernel/cpu/common.c.orig Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/cpu/common.c Mon Aug 12 17:21:29 2002
@@ -423,6 +423,7 @@
{
int cpu = smp_processor_id();
struct tss_struct * t = init_tss + cpu;
+ struct thread_struct *thread = ¤t->thread;
if (test_and_set_bit(cpu, &cpu_initialized)) {
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -447,9 +448,13 @@
*/
if (cpu) {
memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
- cpu_gdt_descr[cpu].size = GDT_SIZE;
+ cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
}
+ /*
+ * Set up the per-thread TLS descriptor cache:
+ */
+ memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8);
__asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu]));
__asm__ __volatile__("lidt %0": "=m" (idt_descr));
@@ -468,9 +473,9 @@
BUG();
enter_lazy_tlb(&init_mm, current, cpu);
- t->esp0 = current->thread.esp0;
+ t->esp0 = thread->esp0;
set_tss_desc(cpu,t);
- cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
load_TR_desc();
load_LDT(&init_mm.context);
--- linux/arch/i386/kernel/entry.S.orig Sun Aug 11 17:01:07 2002
+++ linux/arch/i386/kernel/entry.S Mon Aug 12 17:21:29 2002
@@ -753,6 +753,7 @@
.long sys_sched_setaffinity
.long sys_sched_getaffinity
.long sys_set_thread_area
+ .long sys_get_thread_area
.rept NR_syscalls-(.-sys_call_table)/4
.long sys_ni_syscall
--- linux/arch/i386/kernel/head.S.orig Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/head.S Mon Aug 12 17:21:29 2002
@@ -239,12 +239,7 @@
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
-#ifdef CONFIG_SMP
- movl $(__KERNEL_DS), %eax
- movl %eax,%ss # Reload the stack pointer (segment only)
-#else
- lss stack_start,%esp # Load processor stack
-#endif
+ movl %eax,%ss
xorl %eax,%eax
lldt %ax
cld # gcc2 wants the direction flag cleared at all times
@@ -412,34 +407,40 @@
ALIGN
/*
- * The Global Descriptor Table contains 20 quadwords, per-CPU.
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
*/
ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x0000000000000000 /* TLS descriptor */
- .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */
- .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */
- .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */
- .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */
- .quad 0x0000000000000000 /* TSS descriptor */
- .quad 0x0000000000000000 /* LDT descriptor */
+ .quad 0x0000000000000000 /* 0x0b reserved */
+ .quad 0x0000000000000000 /* 0x13 reserved */
+ .quad 0x0000000000000000 /* 0x1b reserved */
+ .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */
+ .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */
+ .quad 0x0000000000000000 /* 0x33 TLS entry 1 */
+ .quad 0x0000000000000000 /* 0x3b TLS entry 2 */
+ .quad 0x0000000000000000 /* 0x43 TLS entry 3 */
+ .quad 0x0000000000000000 /* 0x4b TLS entry 4 */
+ .quad 0x0000000000000000 /* 0x53 reserved */
+ .quad 0x0000000000000000 /* 0x5b reserved */
+
+ .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
+ .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
+ .quad 0x0000000000000000 /* 0x70 TSS descriptor */
+ .quad 0x0000000000000000 /* 0x78 LDT descriptor */
+
+ /* Segments used for calling PnP BIOS */
+ .quad 0x00c09a0000000000 /* 0x80 32-bit code */
+ .quad 0x00809a0000000000 /* 0x88 16-bit code */
+ .quad 0x0080920000000000 /* 0x90 16-bit data */
+ .quad 0x0080920000000000 /* 0x98 16-bit data */
+ .quad 0x0080920000000000 /* 0xa0 16-bit data */
/*
* The APM segments have byte granularity and their bases
* and limits are set at run time.
*/
- .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */
- .quad 0x00409a0000000000 /* 0x48 APM CS code */
- .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */
- .quad 0x0040920000000000 /* 0x58 APM DS data */
- /* Segments used for calling PnP BIOS */
- .quad 0x00c09a0000000000 /* 0x60 32-bit code */
- .quad 0x00809a0000000000 /* 0x68 16-bit code */
- .quad 0x0080920000000000 /* 0x70 16-bit data */
- .quad 0x0080920000000000 /* 0x78 16-bit data */
- .quad 0x0080920000000000 /* 0x80 16-bit data */
- .quad 0x0000000000000000 /* 0x88 not used */
- .quad 0x0000000000000000 /* 0x90 not used */
- .quad 0x0000000000000000 /* 0x98 not used */
+ .quad 0x00409a0000000000 /* 0xa8 APM CS code */
+ .quad 0x00009a0000000000 /* 0xb0 APM CS 16 code (16 bit) */
+ .quad 0x0040920000000000 /* 0xb8 APM DS data */
#if CONFIG_SMP
.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
--- linux/arch/i386/kernel/process.c.orig Sun Aug 11 17:01:08 2002
+++ linux/arch/i386/kernel/process.c Mon Aug 12 17:21:29 2002
@@ -681,11 +681,8 @@
/*
* Load the per-thread Thread-Local Storage descriptor.
- *
- * NOTE: it's faster to do the two stores unconditionally
- * than to branch away.
*/
- load_TLS_desc(next, cpu);
+ load_TLS(next, cpu);
/*
* Save away %fs and %gs. No need to save %es and %ds, as
@@ -834,35 +831,114 @@
#undef first_sched
/*
- * Set the Thread-Local Storage area:
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
*/
-asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags)
+static int get_free_idx(void)
{
struct thread_struct *t = ¤t->thread;
- int writable = 0;
- int cpu;
+ int idx;
- /* do not allow unused flags */
- if (flags & ~TLS_FLAGS_MASK)
+ for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+ if (desc_empty(t->tls_array + idx))
+ return idx + GDT_ENTRY_TLS_MIN;
+ return -ESRCH;
+}
+
+/*
+ * Set a given TLS descriptor:
+ */
+asmlinkage int sys_set_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+ struct thread_struct *t = ¤t->thread;
+ struct modify_ldt_ldt_s info;
+ struct desc_struct *desc;
+ int cpu, idx;
+
+ if (copy_from_user(&info, u_info, sizeof(info)))
+ return -EFAULT;
+ idx = info.entry_number;
+
+ /*
+ * index -1 means the kernel should try to find and
+ * allocate an empty descriptor:
+ */
+ if (idx == -1) {
+ idx = get_free_idx();
+ if (idx < 0)
+ return idx;
+ if (put_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ }
+
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
return -EINVAL;
- if (flags & TLS_FLAG_WRITABLE)
- writable = 1;
+ desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
/*
* We must not get preempted while modifying the TLS.
*/
cpu = get_cpu();
- t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff;
-
- t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
- 0xf0000 | (writable << 9) | (1 << 15) |
- (1 << 22) | (1 << 23) | 0x7000;
+ if (LDT_empty(&info)) {
+ desc->a = 0;
+ desc->b = 0;
+ } else {
+ desc->a = LDT_entry_a(&info);
+ desc->b = LDT_entry_b(&info);
+ }
+ load_TLS(t, cpu);
- load_TLS_desc(t, cpu);
put_cpu();
- return TLS_ENTRY*8 + 3;
+ return 0;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+ (((desc)->a >> 16) & 0x0000ffff) | \
+ (((desc)->b << 16) & 0x00ff0000) | \
+ ( (desc)->b & 0xff000000) )
+
+#define GET_LIMIT(desc) ( \
+ ((desc)->a & 0x0ffff) | \
+ ((desc)->b & 0xf0000) )
+
+#define GET_32BIT(desc) (((desc)->b >> 23) & 1)
+#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
+#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
+
+asmlinkage int sys_get_thread_area(struct modify_ldt_ldt_s *u_info)
+{
+ struct modify_ldt_ldt_s info;
+ struct desc_struct *desc;
+ int idx;
+
+ if (get_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+ desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+ info.entry_number = idx;
+ info.base_addr = GET_BASE(desc);
+ info.limit = GET_LIMIT(desc);
+ info.seg_32bit = GET_32BIT(desc);
+ info.contents = GET_CONTENTS(desc);
+ info.read_exec_only = !GET_WRITABLE(desc);
+ info.limit_in_pages = GET_LIMIT_PAGES(desc);
+ info.seg_not_present = !GET_PRESENT(desc);
+ info.useable = GET_USEABLE(desc);
+
+ if (copy_to_user(u_info, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
}
--- linux/arch/i386/kernel/suspend.c.orig Sun Aug 11 17:01:06 2002
+++ linux/arch/i386/kernel/suspend.c Mon Aug 12 17:21:29 2002
@@ -207,7 +207,7 @@
struct tss_struct * t = init_tss + cpu;
set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */
- cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
load_TR_desc(); /* This does ltr */
load_LDT(¤t->mm->context); /* This does lldt */
--- linux/arch/i386/kernel/ldt.c.orig Sun Aug 11 17:01:04 2002
+++ linux/arch/i386/kernel/ldt.c Mon Aug 12 17:21:29 2002
@@ -200,32 +200,17 @@
/* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
- if (oldmode ||
- (ldt_info.contents == 0 &&
- ldt_info.read_exec_only == 1 &&
- ldt_info.seg_32bit == 0 &&
- ldt_info.limit_in_pages == 0 &&
- ldt_info.seg_not_present == 1 &&
- ldt_info.useable == 0 )) {
+ if (oldmode || LDT_empty(&ldt_info)) {
entry_1 = 0;
entry_2 = 0;
goto install;
}
}
- entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
- (ldt_info.limit & 0x0ffff);
- entry_2 = (ldt_info.base_addr & 0xff000000) |
- ((ldt_info.base_addr & 0x00ff0000) >> 16) |
- (ldt_info.limit & 0xf0000) |
- ((ldt_info.read_exec_only ^ 1) << 9) |
- (ldt_info.contents << 10) |
- ((ldt_info.seg_not_present ^ 1) << 15) |
- (ldt_info.seg_32bit << 22) |
- (ldt_info.limit_in_pages << 23) |
- 0x7000;
- if (!oldmode)
- entry_2 |= (ldt_info.useable << 20);
+ entry_1 = LDT_entry_a(&ldt_info);
+ entry_2 = LDT_entry_b(&ldt_info);
+ if (oldmode)
+ entry_2 &= ~(1 << 20);
/* Install the new entry ... */
install:
--- linux/arch/i386/boot/setup.S.orig Sun Jun 9 07:26:32 2002
+++ linux/arch/i386/boot/setup.S Mon Aug 12 17:21:29 2002
@@ -1005,9 +1005,14 @@
ret
# Descriptor tables
+#
+# NOTE: if you think the GDT is large, you can make it smaller by just
+# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt
+# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into
+# the GDT, but those wont be used so it's not a problem.
+#
gdt:
- .word 0, 0, 0, 0 # dummy
- .word 0, 0, 0, 0 # unused
+ .fill GDT_ENTRY_KERNEL_CS,8,0
.word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
.word 0 # base address = 0
--- linux/include/linux/apm_bios.h.orig Sun Jun 9 07:30:24 2002
+++ linux/include/linux/apm_bios.h Mon Aug 12 17:21:29 2002
@@ -21,8 +21,8 @@
#ifdef __KERNEL__
-#define APM_40 0x40
-#define APM_CS (APM_40 + 8)
+#define APM_40 (GDT_ENTRY_APMBIOS_BASE * 8)
+#define APM_CS (APM_BASE + 8)
#define APM_CS_16 (APM_CS + 8)
#define APM_DS (APM_CS_16 + 8)
--- linux/include/asm-i386/desc.h.orig Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/desc.h Mon Aug 12 17:21:29 2002
@@ -2,50 +2,12 @@
#define __ARCH_DESC_H
#include <asm/ldt.h>
-
-/*
- * The layout of the per-CPU GDT under Linux:
- *
- * 0 - null
- * 1 - Thread-Local Storage (TLS) segment
- * 2 - kernel code segment
- * 3 - kernel data segment
- * 4 - user code segment <==== new cacheline
- * 5 - user data segment
- * 6 - TSS
- * 7 - LDT
- * 8 - APM BIOS support <==== new cacheline
- * 9 - APM BIOS support
- * 10 - APM BIOS support
- * 11 - APM BIOS support
- * 12 - PNPBIOS support <==== new cacheline
- * 13 - PNPBIOS support
- * 14 - PNPBIOS support
- * 15 - PNPBIOS support
- * 16 - PNPBIOS support <==== new cacheline
- * 17 - not used
- * 18 - not used
- * 19 - not used
- */
-#define TLS_ENTRY 1
-#define TSS_ENTRY 6
-#define LDT_ENTRY 7
-/*
- * The interrupt descriptor table has room for 256 idt's,
- * the global descriptor table is dependent on the number
- * of tasks we can have..
- *
- * We pad the GDT to cacheline boundary.
- */
-#define IDT_ENTRIES 256
-#define GDT_ENTRIES 20
+#include <asm/segment.h>
#ifndef __ASSEMBLY__
#include <asm/mmu.h>
-#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct))
-
extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
struct Xgt_desc_struct {
@@ -55,8 +17,8 @@
extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
-#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3))
-#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3))
+#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
+#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
/*
* This is the ldt that every process will get unless we need
@@ -78,21 +40,48 @@
static inline void set_tss_desc(unsigned int cpu, void *addr)
{
- _set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89);
+ _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89);
}
static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
{
- _set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82);
+ _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
}
-#define TLS_FLAGS_MASK 0x00000001
+#define LDT_entry_a(info) \
+ ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-#define TLS_FLAG_WRITABLE 0x00000001
+#define LDT_entry_b(info) \
+ (((info)->base_addr & 0xff000000) | \
+ (((info)->base_addr & 0x00ff0000) >> 16) | \
+ ((info)->limit & 0xf0000) | \
+ (((info)->read_exec_only ^ 1) << 9) | \
+ ((info)->contents << 10) | \
+ (((info)->seg_not_present ^ 1) << 15) | \
+ ((info)->seg_32bit << 22) | \
+ ((info)->limit_in_pages << 23) | \
+ ((info)->useable << 20) | \
+ 0x7000)
+
+#define LDT_empty(info) (\
+ (info)->base_addr == 0 && \
+ (info)->limit == 0 && \
+ (info)->contents == 0 && \
+ (info)->read_exec_only == 1 && \
+ (info)->seg_32bit == 0 && \
+ (info)->limit_in_pages == 0 && \
+ (info)->seg_not_present == 1 && \
+ (info)->useable == 0 )
+
+#if TLS_SIZE != 32
+# error update this code.
+#endif
-static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu)
+static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
{
- cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc;
+#define C(i) cpu_gdt_table[cpu][GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
+ C(0); C(1); C(2); C(3);
+#undef C
}
static inline void clear_LDT(void)
--- linux/include/asm-i386/processor.h.orig Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/processor.h Mon Aug 12 17:21:29 2002
@@ -22,6 +22,11 @@
unsigned long a,b;
};
+#define desc_empty(desc) \
+ (!((desc)->a + (desc)->b))
+
+#define desc_equal(desc1, desc2) \
+ (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
@@ -359,6 +364,8 @@
};
struct thread_struct {
+/* cached TLS descriptors. */
+ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
unsigned long esp0;
unsigned long eip;
unsigned long esp;
@@ -376,11 +383,10 @@
unsigned long v86flags, v86mask, v86mode, saved_esp0;
/* IO permissions */
unsigned long *ts_io_bitmap;
-/* TLS cached descriptor */
- struct desc_struct tls_desc;
};
#define INIT_THREAD { \
+ { { 0, 0 } , }, \
0, \
0, 0, 0, 0, \
{ [0 ... 7] = 0 }, /* debugging registers */ \
@@ -401,7 +407,7 @@
0,0,0,0, /* esp,ebp,esi,edi */ \
0,0,0,0,0,0, /* es,cs,ss */ \
0,0,0,0,0,0, /* ds,fs,gs */ \
- LDT_ENTRY,0, /* ldt */ \
+ GDT_ENTRY_LDT,0, /* ldt */ \
0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \
{~0, } /* ioperm */ \
}
--- linux/include/asm-i386/segment.h.orig Sun Jun 9 07:28:19 2002
+++ linux/include/asm-i386/segment.h Mon Aug 12 17:21:29 2002
@@ -1,10 +1,79 @@
#ifndef _ASM_SEGMENT_H
#define _ASM_SEGMENT_H
-#define __KERNEL_CS 0x10
-#define __KERNEL_DS 0x18
+/*
+ * The layout of the per-CPU GDT under Linux:
+ *
+ * 0 - null
+ * 1 - reserved
+ * 2 - reserved
+ * 3 - reserved
+ *
+ * 4 - default user CS <==== new cacheline
+ * 5 - default user DS
+ *
+ * ------- start of TLS (Thread-Local Storage) segments:
+ *
+ * 6 - TLS segment #1 [ glibc's TLS segment ]
+ * 7 - TLS segment #2 [ Wine's %fs Win32 segment ]
+ * 8 - TLS segment #3
+ * 9 - TLS segment #4
+ * 10 - reserved
+ * 11 - reserved
+ *
+ * ------- start of kernel segments:
+ *
+ * 12 - kernel code segment <==== new cacheline
+ * 13 - kernel data segment
+ * 14 - TSS
+ * 15 - LDT
+ * 16 - PNPBIOS support (16->32 gate)
+ * 17 - PNPBIOS support
+ * 18 - PNPBIOS support
+ * 19 - PNPBIOS support
+ * 20 - PNPBIOS support
+ * 21 - APM BIOS support
+ * 22 - APM BIOS support
+ * 23 - APM BIOS support
+ */
+#define GDT_ENTRY_TLS_ENTRIES 4
+#define GDT_ENTRY_TLS_MIN 6
+#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
-#define __USER_CS 0x23
-#define __USER_DS 0x2B
+#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+
+#define GDT_ENTRY_DEFAULT_USER_CS 4
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
+
+#define GDT_ENTRY_DEFAULT_USER_DS 5
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
+
+#define GDT_ENTRY_KERNEL_BASE 12
+
+#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+
+#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+
+#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 2)
+#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 3)
+
+#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 4)
+#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 9)
+
+/*
+ * The GDT has 21 entries but we pad it to cacheline boundary:
+ */
+#define GDT_ENTRIES 24
+
+#define GDT_SIZE (GDT_ENTRIES * 8)
+
+/*
+ * The interrupt descriptor table has room for 256 idt's,
+ * the global descriptor table is dependent on the number
+ * of tasks we can have..
+ */
+#define IDT_ENTRIES 256
#endif
--- linux/include/asm-i386/unistd.h.orig Sun Aug 11 17:01:07 2002
+++ linux/include/asm-i386/unistd.h Mon Aug 12 17:21:29 2002
@@ -248,6 +248,7 @@
#define __NR_sched_setaffinity 241
#define __NR_sched_getaffinity 242
#define __NR_set_thread_area 243
+#define __NR_get_thread_area 244
/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
On Mon, Aug 12, 2002 at 07:24:25PM +0200, Ingo Molnar wrote:
> the attached patch does this:
>
> - there are now 4 freely usable TLS entries, amongst them 0x40 for Wine
>
> - the 3 APM segments fit into the hole at the end of the kernel
> descriptor area exactly => no GDT size increase.
>
> - the ->private_tls code is gone - unconditional inline copies are more
> robust and faster as well.
>
> Plus the APM code needs Stephen's fix. I think this is the best approach
> we had so far. Any objections?
Patch looks good so far, but _please_ rename struct modify_ldt_ldt_s to
something more sensible. (yes, I know it existed before, but with this
patch the name is even more stupid than before)
On Mon, 12 Aug 2002, Jakub Jelinek wrote:
> As each supported TLS entry has its context-switch time cost, I think we
> should stay at 2 supported TLS entries.
4 are almost as good - and they also solve the 0x40 problem.
> My understanding was that the GDT patches were written to optimize the
> common case (all threaded apps using LDT and with the advent of __thread
> support causing every single application to use LDT), with 2 TLS entries
> where one is for libc/libpthread and the other one is for application
> usage I think it is enough for 99.9% of apps. In the rare case someone
> needs more, there is still LDT which offers 8192 entries.
well, i think i have to agree ... if it wasnt for Wine's 0x40 descriptor.
But it certainly does not come free. We could have 3 TLS entries (0x40
will be the last entry), and the copying cost is 9 cycles. (compared to 6
cycles in the 2 entries case.) Good enough?
Ingo
> well, i think i have to agree ... if it wasnt for Wine's 0x40 descriptor.
> But it certainly does not come free. We could have 3 TLS entries (0x40
> will be the last entry), and the copying cost is 9 cycles. (compared to 6
> cycles in the 2 entries case.) Good enough?
Or we could leave 0x40 fixed to 0x400 and use only 2.
This loses flexibility but anyway the only 2 apps that could use it are
dosemu and wine and I think that they already need to have it mapped at
0x400 for vm86 (no one uses 16-bit DLLs anymore).
Of course this is only valid if Win32 doesn't use it because otherwise
we would lose the ability to do null-pointer checking in programs using
Win32 DLLs (e.g. mplayer).
okay, here is YAGL. (Yet Another GDT Layout)
3 TLS entries, 9 cycles copying and no branches in the context-switch
path. The patch also adds Christoph's suggestion and renames
modify_ldt_ldt_s (yuck!) to user_desc.
(all patches i posted were test-compiled and test-booted against
2.5.31-vanilla.)
Ingo
--- linux/drivers/pnp/pnpbios_core.c.orig Mon Aug 12 17:51:27 2002
+++ linux/drivers/pnp/pnpbios_core.c Mon Aug 12 17:56:27 2002
@@ -90,7 +90,8 @@
static union pnp_bios_expansion_header * pnp_bios_hdr = NULL;
/* The PnP BIOS entries in the GDT */
-#define PNP_GDT (0x0060)
+#define PNP_GDT (GDT_ENTRY_PNPBIOS_BASE * 8)
+
#define PNP_CS32 (PNP_GDT+0x00) /* segment for calling fn */
#define PNP_CS16 (PNP_GDT+0x08) /* code segment for BIOS */
#define PNP_DS (PNP_GDT+0x10) /* data segment for BIOS */
--- linux/arch/i386/kernel/cpu/common.c.orig Mon Aug 12 17:56:01 2002
+++ linux/arch/i386/kernel/cpu/common.c Mon Aug 12 17:56:27 2002
@@ -423,6 +423,7 @@
{
int cpu = smp_processor_id();
struct tss_struct * t = init_tss + cpu;
+ struct thread_struct *thread = ¤t->thread;
if (test_and_set_bit(cpu, &cpu_initialized)) {
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -447,9 +448,13 @@
*/
if (cpu) {
memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
- cpu_gdt_descr[cpu].size = GDT_SIZE;
+ cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
}
+ /*
+ * Set up the per-thread TLS descriptor cache:
+ */
+ memcpy(thread->tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_MAX * 8);
__asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu]));
__asm__ __volatile__("lidt %0": "=m" (idt_descr));
@@ -468,9 +473,9 @@
BUG();
enter_lazy_tlb(&init_mm, current, cpu);
- t->esp0 = current->thread.esp0;
+ t->esp0 = thread->esp0;
set_tss_desc(cpu,t);
- cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
load_TR_desc();
load_LDT(&init_mm.context);
--- linux/arch/i386/kernel/entry.S.orig Mon Aug 12 17:56:02 2002
+++ linux/arch/i386/kernel/entry.S Mon Aug 12 17:56:27 2002
@@ -753,6 +753,7 @@
.long sys_sched_setaffinity
.long sys_sched_getaffinity
.long sys_set_thread_area
+ .long sys_get_thread_area
.rept NR_syscalls-(.-sys_call_table)/4
.long sys_ni_syscall
--- linux/arch/i386/kernel/head.S.orig Mon Aug 12 17:56:02 2002
+++ linux/arch/i386/kernel/head.S Mon Aug 12 17:56:27 2002
@@ -239,12 +239,7 @@
movl %eax,%es
movl %eax,%fs
movl %eax,%gs
-#ifdef CONFIG_SMP
- movl $(__KERNEL_DS), %eax
- movl %eax,%ss # Reload the stack pointer (segment only)
-#else
- lss stack_start,%esp # Load processor stack
-#endif
+ movl %eax,%ss
xorl %eax,%eax
lldt %ax
cld # gcc2 wants the direction flag cleared at all times
@@ -412,34 +407,40 @@
ALIGN
/*
- * The Global Descriptor Table contains 20 quadwords, per-CPU.
+ * The Global Descriptor Table contains 28 quadwords, per-CPU.
*/
ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x0000000000000000 /* TLS descriptor */
- .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */
- .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */
- .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */
- .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */
- .quad 0x0000000000000000 /* TSS descriptor */
- .quad 0x0000000000000000 /* LDT descriptor */
+ .quad 0x0000000000000000 /* 0x0b reserved */
+ .quad 0x0000000000000000 /* 0x13 reserved */
+ .quad 0x0000000000000000 /* 0x1b reserved */
+ .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */
+ .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */
+ .quad 0x0000000000000000 /* 0x33 TLS entry 1 */
+ .quad 0x0000000000000000 /* 0x3b TLS entry 2 */
+ .quad 0x0000000000000000 /* 0x43 TLS entry 3 */
+ .quad 0x0000000000000000 /* 0x4b reserved */
+ .quad 0x0000000000000000 /* 0x53 reserved */
+ .quad 0x0000000000000000 /* 0x5b reserved */
+
+ .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
+ .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
+ .quad 0x0000000000000000 /* 0x70 TSS descriptor */
+ .quad 0x0000000000000000 /* 0x78 LDT descriptor */
+
+ /* Segments used for calling PnP BIOS */
+ .quad 0x00c09a0000000000 /* 0x80 32-bit code */
+ .quad 0x00809a0000000000 /* 0x88 16-bit code */
+ .quad 0x0080920000000000 /* 0x90 16-bit data */
+ .quad 0x0080920000000000 /* 0x98 16-bit data */
+ .quad 0x0080920000000000 /* 0xa0 16-bit data */
/*
* The APM segments have byte granularity and their bases
* and limits are set at run time.
*/
- .quad 0x0040920000000000 /* 0x40 APM set up for bad BIOS's */
- .quad 0x00409a0000000000 /* 0x48 APM CS code */
- .quad 0x00009a0000000000 /* 0x50 APM CS 16 code (16 bit) */
- .quad 0x0040920000000000 /* 0x58 APM DS data */
- /* Segments used for calling PnP BIOS */
- .quad 0x00c09a0000000000 /* 0x60 32-bit code */
- .quad 0x00809a0000000000 /* 0x68 16-bit code */
- .quad 0x0080920000000000 /* 0x70 16-bit data */
- .quad 0x0080920000000000 /* 0x78 16-bit data */
- .quad 0x0080920000000000 /* 0x80 16-bit data */
- .quad 0x0000000000000000 /* 0x88 not used */
- .quad 0x0000000000000000 /* 0x90 not used */
- .quad 0x0000000000000000 /* 0x98 not used */
+ .quad 0x00409a0000000000 /* 0xa8 APM CS code */
+ .quad 0x00009a0000000000 /* 0xb0 APM CS 16 code (16 bit) */
+ .quad 0x0040920000000000 /* 0xb8 APM DS data */
#if CONFIG_SMP
.fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */
--- linux/arch/i386/kernel/process.c.orig Mon Aug 12 17:56:02 2002
+++ linux/arch/i386/kernel/process.c Mon Aug 12 17:56:27 2002
@@ -681,11 +681,8 @@
/*
* Load the per-thread Thread-Local Storage descriptor.
- *
- * NOTE: it's faster to do the two stores unconditionally
- * than to branch away.
*/
- load_TLS_desc(next, cpu);
+ load_TLS(next, cpu);
/*
* Save away %fs and %gs. No need to save %es and %ds, as
@@ -834,35 +831,114 @@
#undef first_sched
/*
- * Set the Thread-Local Storage area:
+ * sys_alloc_thread_area: get a yet unused TLS descriptor index.
*/
-asmlinkage int sys_set_thread_area(unsigned long base, unsigned long flags)
+static int get_free_idx(void)
{
struct thread_struct *t = ¤t->thread;
- int writable = 0;
- int cpu;
+ int idx;
- /* do not allow unused flags */
- if (flags & ~TLS_FLAGS_MASK)
+ for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
+ if (desc_empty(t->tls_array + idx))
+ return idx + GDT_ENTRY_TLS_MIN;
+ return -ESRCH;
+}
+
+/*
+ * Set a given TLS descriptor:
+ */
+asmlinkage int sys_set_thread_area(struct user_desc *u_info)
+{
+ struct thread_struct *t = ¤t->thread;
+ struct user_desc info;
+ struct desc_struct *desc;
+ int cpu, idx;
+
+ if (copy_from_user(&info, u_info, sizeof(info)))
+ return -EFAULT;
+ idx = info.entry_number;
+
+ /*
+ * index -1 means the kernel should try to find and
+ * allocate an empty descriptor:
+ */
+ if (idx == -1) {
+ idx = get_free_idx();
+ if (idx < 0)
+ return idx;
+ if (put_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ }
+
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
return -EINVAL;
- if (flags & TLS_FLAG_WRITABLE)
- writable = 1;
+ desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
/*
* We must not get preempted while modifying the TLS.
*/
cpu = get_cpu();
- t->tls_desc.a = ((base & 0x0000ffff) << 16) | 0xffff;
-
- t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) |
- 0xf0000 | (writable << 9) | (1 << 15) |
- (1 << 22) | (1 << 23) | 0x7000;
+ if (LDT_empty(&info)) {
+ desc->a = 0;
+ desc->b = 0;
+ } else {
+ desc->a = LDT_entry_a(&info);
+ desc->b = LDT_entry_b(&info);
+ }
+ load_TLS(t, cpu);
- load_TLS_desc(t, cpu);
put_cpu();
- return TLS_ENTRY*8 + 3;
+ return 0;
+}
+
+/*
+ * Get the current Thread-Local Storage area:
+ */
+
+#define GET_BASE(desc) ( \
+ (((desc)->a >> 16) & 0x0000ffff) | \
+ (((desc)->b << 16) & 0x00ff0000) | \
+ ( (desc)->b & 0xff000000) )
+
+#define GET_LIMIT(desc) ( \
+ ((desc)->a & 0x0ffff) | \
+ ((desc)->b & 0xf0000) )
+
+#define GET_32BIT(desc) (((desc)->b >> 23) & 1)
+#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
+#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
+#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
+#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
+#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
+
+asmlinkage int sys_get_thread_area(struct user_desc *u_info)
+{
+ struct user_desc info;
+ struct desc_struct *desc;
+ int idx;
+
+ if (get_user(idx, &u_info->entry_number))
+ return -EFAULT;
+ if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
+ return -EINVAL;
+
+ desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
+
+ info.entry_number = idx;
+ info.base_addr = GET_BASE(desc);
+ info.limit = GET_LIMIT(desc);
+ info.seg_32bit = GET_32BIT(desc);
+ info.contents = GET_CONTENTS(desc);
+ info.read_exec_only = !GET_WRITABLE(desc);
+ info.limit_in_pages = GET_LIMIT_PAGES(desc);
+ info.seg_not_present = !GET_PRESENT(desc);
+ info.useable = GET_USEABLE(desc);
+
+ if (copy_to_user(u_info, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
}
--- linux/arch/i386/kernel/suspend.c.orig Mon Aug 12 17:56:02 2002
+++ linux/arch/i386/kernel/suspend.c Mon Aug 12 17:56:27 2002
@@ -207,7 +207,7 @@
struct tss_struct * t = init_tss + cpu;
set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */
- cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff;
+ cpu_gdt_table[cpu][GDT_ENTRY_TSS].b &= 0xfffffdff;
load_TR_desc(); /* This does ltr */
load_LDT(¤t->mm->context); /* This does lldt */
--- linux/arch/i386/kernel/ldt.c.orig Mon Aug 12 17:56:02 2002
+++ linux/arch/i386/kernel/ldt.c Mon Aug 12 17:56:27 2002
@@ -170,7 +170,7 @@
struct mm_struct * mm = current->mm;
__u32 entry_1, entry_2, *lp;
int error;
- struct modify_ldt_ldt_s ldt_info;
+ struct user_desc ldt_info;
error = -EINVAL;
if (bytecount != sizeof(ldt_info))
@@ -200,32 +200,17 @@
/* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
- if (oldmode ||
- (ldt_info.contents == 0 &&
- ldt_info.read_exec_only == 1 &&
- ldt_info.seg_32bit == 0 &&
- ldt_info.limit_in_pages == 0 &&
- ldt_info.seg_not_present == 1 &&
- ldt_info.useable == 0 )) {
+ if (oldmode || LDT_empty(&ldt_info)) {
entry_1 = 0;
entry_2 = 0;
goto install;
}
}
- entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
- (ldt_info.limit & 0x0ffff);
- entry_2 = (ldt_info.base_addr & 0xff000000) |
- ((ldt_info.base_addr & 0x00ff0000) >> 16) |
- (ldt_info.limit & 0xf0000) |
- ((ldt_info.read_exec_only ^ 1) << 9) |
- (ldt_info.contents << 10) |
- ((ldt_info.seg_not_present ^ 1) << 15) |
- (ldt_info.seg_32bit << 22) |
- (ldt_info.limit_in_pages << 23) |
- 0x7000;
- if (!oldmode)
- entry_2 |= (ldt_info.useable << 20);
+ entry_1 = LDT_entry_a(&ldt_info);
+ entry_2 = LDT_entry_b(&ldt_info);
+ if (oldmode)
+ entry_2 &= ~(1 << 20);
/* Install the new entry ... */
install:
--- linux/arch/i386/boot/setup.S.orig Mon Aug 12 17:51:32 2002
+++ linux/arch/i386/boot/setup.S Mon Aug 12 17:56:27 2002
@@ -1005,9 +1005,14 @@
ret
# Descriptor tables
+#
+# NOTE: if you think the GDT is large, you can make it smaller by just
+# defining the KERNEL_CS and KERNEL_DS entries and shifting the gdt
+# address down by GDT_ENTRY_KERNEL_CS*8. This puts bogus entries into
+# the GDT, but those wont be used so it's not a problem.
+#
gdt:
- .word 0, 0, 0, 0 # dummy
- .word 0, 0, 0, 0 # unused
+ .fill GDT_ENTRY_KERNEL_CS,8,0
.word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
.word 0 # base address = 0
--- linux/include/linux/apm_bios.h.orig Mon Aug 12 17:51:39 2002
+++ linux/include/linux/apm_bios.h Mon Aug 12 17:56:27 2002
@@ -21,8 +21,8 @@
#ifdef __KERNEL__
-#define APM_40 0x40
-#define APM_CS (APM_40 + 8)
+#define APM_40 (GDT_ENTRY_APMBIOS_BASE * 8)
+#define APM_CS (APM_BASE + 8)
#define APM_CS_16 (APM_CS + 8)
#define APM_DS (APM_CS_16 + 8)
--- linux/include/asm-i386/desc.h.orig Mon Aug 12 17:56:15 2002
+++ linux/include/asm-i386/desc.h Mon Aug 12 17:56:27 2002
@@ -2,50 +2,12 @@
#define __ARCH_DESC_H
#include <asm/ldt.h>
-
-/*
- * The layout of the per-CPU GDT under Linux:
- *
- * 0 - null
- * 1 - Thread-Local Storage (TLS) segment
- * 2 - kernel code segment
- * 3 - kernel data segment
- * 4 - user code segment <==== new cacheline
- * 5 - user data segment
- * 6 - TSS
- * 7 - LDT
- * 8 - APM BIOS support <==== new cacheline
- * 9 - APM BIOS support
- * 10 - APM BIOS support
- * 11 - APM BIOS support
- * 12 - PNPBIOS support <==== new cacheline
- * 13 - PNPBIOS support
- * 14 - PNPBIOS support
- * 15 - PNPBIOS support
- * 16 - PNPBIOS support <==== new cacheline
- * 17 - not used
- * 18 - not used
- * 19 - not used
- */
-#define TLS_ENTRY 1
-#define TSS_ENTRY 6
-#define LDT_ENTRY 7
-/*
- * The interrupt descriptor table has room for 256 idt's,
- * the global descriptor table is dependent on the number
- * of tasks we can have..
- *
- * We pad the GDT to cacheline boundary.
- */
-#define IDT_ENTRIES 256
-#define GDT_ENTRIES 20
+#include <asm/segment.h>
#ifndef __ASSEMBLY__
#include <asm/mmu.h>
-#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct))
-
extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
struct Xgt_desc_struct {
@@ -55,8 +17,8 @@
extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
-#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3))
-#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3))
+#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (GDT_ENTRY_TSS*8))
+#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (GDT_ENTRY_LDT*8))
/*
* This is the ldt that every process will get unless we need
@@ -78,21 +40,48 @@
static inline void set_tss_desc(unsigned int cpu, void *addr)
{
- _set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89);
+ _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (int)addr, 235, 0x89);
}
static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
{
- _set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82);
+ _set_tssldt_desc(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
}
-#define TLS_FLAGS_MASK 0x00000001
+#define LDT_entry_a(info) \
+ ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
-#define TLS_FLAG_WRITABLE 0x00000001
+#define LDT_entry_b(info) \
+ (((info)->base_addr & 0xff000000) | \
+ (((info)->base_addr & 0x00ff0000) >> 16) | \
+ ((info)->limit & 0xf0000) | \
+ (((info)->read_exec_only ^ 1) << 9) | \
+ ((info)->contents << 10) | \
+ (((info)->seg_not_present ^ 1) << 15) | \
+ ((info)->seg_32bit << 22) | \
+ ((info)->limit_in_pages << 23) | \
+ ((info)->useable << 20) | \
+ 0x7000)
+
+#define LDT_empty(info) (\
+ (info)->base_addr == 0 && \
+ (info)->limit == 0 && \
+ (info)->contents == 0 && \
+ (info)->read_exec_only == 1 && \
+ (info)->seg_32bit == 0 && \
+ (info)->limit_in_pages == 0 && \
+ (info)->seg_not_present == 1 && \
+ (info)->useable == 0 )
+
+#if TLS_SIZE != 24
+# error update this code.
+#endif
-static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu)
+static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
{
- cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc;
+#define C(i) cpu_gdt_table[cpu][GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
+ C(0); C(1); C(2);
+#undef C
}
static inline void clear_LDT(void)
--- linux/include/asm-i386/processor.h.orig Mon Aug 12 17:56:16 2002
+++ linux/include/asm-i386/processor.h Mon Aug 12 17:56:27 2002
@@ -22,6 +22,11 @@
unsigned long a,b;
};
+#define desc_empty(desc) \
+ (!((desc)->a + (desc)->b))
+
+#define desc_equal(desc1, desc2) \
+ (((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
@@ -359,6 +364,8 @@
};
struct thread_struct {
+/* cached TLS descriptors. */
+ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
unsigned long esp0;
unsigned long eip;
unsigned long esp;
@@ -376,11 +383,10 @@
unsigned long v86flags, v86mask, v86mode, saved_esp0;
/* IO permissions */
unsigned long *ts_io_bitmap;
-/* TLS cached descriptor */
- struct desc_struct tls_desc;
};
#define INIT_THREAD { \
+ { { 0, 0 } , }, \
0, \
0, 0, 0, 0, \
{ [0 ... 7] = 0 }, /* debugging registers */ \
@@ -401,7 +407,7 @@
0,0,0,0, /* esp,ebp,esi,edi */ \
0,0,0,0,0,0, /* es,cs,ss */ \
0,0,0,0,0,0, /* ds,fs,gs */ \
- LDT_ENTRY,0, /* ldt */ \
+ GDT_ENTRY_LDT,0, /* ldt */ \
0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \
{~0, } /* ioperm */ \
}
--- linux/include/asm-i386/segment.h.orig Mon Aug 12 17:56:16 2002
+++ linux/include/asm-i386/segment.h Mon Aug 12 17:56:27 2002
@@ -1,10 +1,79 @@
#ifndef _ASM_SEGMENT_H
#define _ASM_SEGMENT_H
-#define __KERNEL_CS 0x10
-#define __KERNEL_DS 0x18
+/*
+ * The layout of the per-CPU GDT under Linux:
+ *
+ * 0 - null
+ * 1 - reserved
+ * 2 - reserved
+ * 3 - reserved
+ *
+ * 4 - default user CS <==== new cacheline
+ * 5 - default user DS
+ *
+ * ------- start of TLS (Thread-Local Storage) segments:
+ *
+ * 6 - TLS segment #1 [ glibc's TLS segment ]
+ * 7 - TLS segment #2 [ Wine's %fs Win32 segment ]
+ * 8 - TLS segment #3
+ * 9 - reserved
+ * 10 - reserved
+ * 11 - reserved
+ *
+ * ------- start of kernel segments:
+ *
+ * 12 - kernel code segment <==== new cacheline
+ * 13 - kernel data segment
+ * 14 - TSS
+ * 15 - LDT
+ * 16 - PNPBIOS support (16->32 gate)
+ * 17 - PNPBIOS support
+ * 18 - PNPBIOS support
+ * 19 - PNPBIOS support
+ * 20 - PNPBIOS support
+ * 21 - APM BIOS support
+ * 22 - APM BIOS support
+ * 23 - APM BIOS support
+ */
+#define GDT_ENTRY_TLS_ENTRIES 3
+#define GDT_ENTRY_TLS_MIN 6
+#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
-#define __USER_CS 0x23
-#define __USER_DS 0x2B
+#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+
+#define GDT_ENTRY_DEFAULT_USER_CS 4
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS * 8 + 3)
+
+#define GDT_ENTRY_DEFAULT_USER_DS 5
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS * 8 + 3)
+
+#define GDT_ENTRY_KERNEL_BASE 12
+
+#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+
+#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+
+#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 2)
+#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 3)
+
+#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 4)
+#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE + 9)
+
+/*
+ * The GDT has 21 entries but we pad it to cacheline boundary:
+ */
+#define GDT_ENTRIES 24
+
+#define GDT_SIZE (GDT_ENTRIES * 8)
+
+/*
+ * The interrupt descriptor table has room for 256 idt's,
+ * the global descriptor table is dependent on the number
+ * of tasks we can have..
+ */
+#define IDT_ENTRIES 256
#endif
--- linux/include/asm-i386/unistd.h.orig Mon Aug 12 17:56:16 2002
+++ linux/include/asm-i386/unistd.h Mon Aug 12 17:56:27 2002
@@ -248,6 +248,7 @@
#define __NR_sched_setaffinity 241
#define __NR_sched_getaffinity 242
#define __NR_set_thread_area 243
+#define __NR_get_thread_area 244
/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
--- linux/include/asm-i386/ldt.h.orig Mon Aug 12 17:56:16 2002
+++ linux/include/asm-i386/ldt.h Mon Aug 12 17:56:27 2002
@@ -12,7 +12,7 @@
#define LDT_ENTRY_SIZE 8
#ifndef __ASSEMBLY__
-struct modify_ldt_ldt_s {
+struct user_desc {
unsigned int entry_number;
unsigned long base_addr;
unsigned int limit;
Ingo Molnar <[email protected]> writes:
> well, i think i have to agree ... if it wasnt for Wine's 0x40 descriptor.
> But it certainly does not come free. We could have 3 TLS entries (0x40
> will be the last entry), and the copying cost is 9 cycles. (compared to 6
> cycles in the 2 entries case.) Good enough?
Note that Wine doesn't really require the 0x40 descriptor. As long as
we can trap accesses to it and emulate them like we do now, that's
good enough. Of course having a GDT entry would save a few cycles, but
this only matters for old Win16 apps, so I'm not sure adding even 1
cycle to the task switch time is worth it.
--
Alexandre Julliard
[email protected]