2005-12-08 19:42:45

by Ravikiran G Thirumalai

[permalink] [raw]
Subject: [patch] x86_64: align and pad x86_64 GDT on page boundary

This patch is on the same lines as Zachary Amsden's i386 GDT page alignemnt
patch in -mm, but for x86_64.

Thanks,
Kiran


Patch to align and pad x86_64 GDT on page boundries.

Signed-off-by: Nippun Goel <[email protected]>
Signed-off-by: Ravikiran Thirumalai <[email protected]>
Signed-off-by: Shai Fultheim <[email protected]>


Index: linux-2.6.15-rc3/arch/x86_64/kernel/head.S
===================================================================
--- linux-2.6.15-rc3.orig/arch/x86_64/kernel/head.S 2005-12-01 15:49:27.000000000 -0800
+++ linux-2.6.15-rc3/arch/x86_64/kernel/head.S 2005-12-01 16:19:29.000000000 -0800
@@ -379,7 +379,7 @@
* Also sysret mandates a special GDT layout
*/

-.align L1_CACHE_BYTES
+.align PAGE_SIZE

/* The TLS descriptors are currently at a different place compared to i386.
Hopefully nobody expects them at a fixed place (Wine?) */
@@ -401,10 +401,11 @@
gdt_end:
/* asm/segment.h:GDT_ENTRIES must match this */
/* This should be a multiple of the cache line size */
- /* GDTs of other CPUs: */
- .fill (GDT_SIZE * NR_CPUS) - (gdt_end - cpu_gdt_table)
+ /* GDTs of other CPUs are now dynamically allocated */

- .align L1_CACHE_BYTES
+ /* zero the remaining page */
+ .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
+
ENTRY(idt_table)
.rept 256
.quad 0
Index: linux-2.6.15-rc3/arch/x86_64/kernel/setup64.c
===================================================================
--- linux-2.6.15-rc3.orig/arch/x86_64/kernel/setup64.c 2005-12-01 16:19:28.000000000 -0800
+++ linux-2.6.15-rc3/arch/x86_64/kernel/setup64.c 2005-12-01 16:19:29.000000000 -0800
@@ -232,15 +232,15 @@
* and set up the GDT descriptor:
*/
if (cpu) {
- memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
+ memcpy(get_cpu_gdt_table(cpu), cpu_gdt_table, GDT_SIZE);
}

cpu_gdt_descr[cpu].size = GDT_SIZE;
- cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
+
asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
asm volatile("lidt %0" :: "m" (idt_descr));

- memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES * 8);
+ memcpy(me->thread.tls_array, get_cpu_gdt_table(cpu), GDT_ENTRY_TLS_ENTRIES * 8);

/*
* Delete NT
Index: linux-2.6.15-rc3/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux-2.6.15-rc3.orig/arch/x86_64/kernel/smpboot.c 2005-12-01 16:19:28.000000000 -0800
+++ linux-2.6.15-rc3/arch/x86_64/kernel/smpboot.c 2005-12-01 16:19:29.000000000 -0800
@@ -743,6 +743,13 @@
};
DECLARE_WORK(work, do_fork_idle, &c_idle);

+ /* allocate memory for gdts of secondary cpus. Hotplug is considered */
+ if (!cpu_gdt_descr[cpu].address &&
+ !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
+ printk("Failed to allocate GDT for CPU %d\n", cpu);
+ return 1;
+ }
+
c_idle.idle = get_idle_for_cpu(cpu);

if (c_idle.idle) {
Index: linux-2.6.15-rc3/arch/x86_64/kernel/suspend.c
===================================================================
--- linux-2.6.15-rc3.orig/arch/x86_64/kernel/suspend.c 2005-12-01 15:49:27.000000000 -0800
+++ linux-2.6.15-rc3/arch/x86_64/kernel/suspend.c 2005-12-01 16:19:29.000000000 -0800
@@ -120,7 +120,7 @@

set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy TSS or some similar stupidity. */

- cpu_gdt_table[cpu][GDT_ENTRY_TSS].type = 9;
+ get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9;

syscall_init(); /* This sets MSR_*STAR and related */
load_TR_desc(); /* This does ltr */
Index: linux-2.6.15-rc3/include/asm-x86_64/desc.h
===================================================================
--- linux-2.6.15-rc3.orig/include/asm-x86_64/desc.h 2005-12-01 15:49:27.000000000 -0800
+++ linux-2.6.15-rc3/include/asm-x86_64/desc.h 2005-12-01 16:19:29.000000000 -0800
@@ -25,7 +25,7 @@
unsigned int a,b;
};

-extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
+extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];

enum {
GATE_INTERRUPT = 0xE,
@@ -79,6 +79,9 @@
extern struct gate_struct idt_table[];
extern struct desc_ptr cpu_gdt_descr[];

+/* the cpu gdt accessor */
+#define get_cpu_gdt_table(_cpu) ((struct desc_struct *)cpu_gdt_descr[(_cpu)].address)
+
static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist)
{
struct gate_struct s;
@@ -139,20 +142,20 @@
* -1? seg base+limit should be pointing to the address of the
* last valid byte
*/
- set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_TSS],
+ set_tssldt_descriptor(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS],
(unsigned long)addr, DESC_TSS,
IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
}

static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
{
- set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (unsigned long)addr,
+ set_tssldt_descriptor(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (unsigned long)addr,
DESC_LDT, size * 8 - 1);
}

static inline void set_seg_base(unsigned cpu, int entry, void *base)
{
- struct desc_struct *d = &cpu_gdt_table[cpu][entry];
+ struct desc_struct *d = &get_cpu_gdt_table(cpu)[entry];
u32 addr = (u32)(u64)base;
BUG_ON((u64)base >> 32);
d->base0 = addr & 0xffff;
@@ -194,7 +197,7 @@

static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
{
- u64 *gdt = (u64 *)(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN);
+ u64 *gdt = (u64 *)(get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN);
gdt[0] = t->tls_array[0];
gdt[1] = t->tls_array[1];
gdt[2] = t->tls_array[2];


2005-12-08 20:15:25

by Andi Kleen

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

On Thu, Dec 08, 2005 at 11:42:32AM -0800, Ravikiran G Thirumalai wrote:
>
> - .align L1_CACHE_BYTES
> + /* zero the remaining page */
> + .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
> +
> ENTRY(idt_table)

Why can't the IDT not be shared with the GDT page? It should be mostly
read only right and putting r-o data on that page should be ok, right?

> @@ -743,6 +743,13 @@
> };
> DECLARE_WORK(work, do_fork_idle, &c_idle);
>
> + /* allocate memory for gdts of secondary cpus. Hotplug is considered */
> + if (!cpu_gdt_descr[cpu].address &&
> + !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
> + printk("Failed to allocate GDT for CPU %d\n", cpu);
> + return 1;

Is this return really correctly handled? Doubtful.

-Andi

2005-12-08 21:55:29

by Ravikiran G Thirumalai

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

On Thu, Dec 08, 2005 at 09:15:18PM +0100, Andi Kleen wrote:
> On Thu, Dec 08, 2005 at 11:42:32AM -0800, Ravikiran G Thirumalai wrote:
> >
> > - .align L1_CACHE_BYTES
> > + /* zero the remaining page */
> > + .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
> > +
> > ENTRY(idt_table)
>
> Why can't the IDT not be shared with the GDT page? It should be mostly
> read only right and putting r-o data on that page should be ok, right?

Yes, you are right. This should not have been a problem.
Some people reported this symbol (cpu_gdt) though. Will have to go back and
check.

Thanks,
Kiran

2005-12-08 23:03:12

by Rohit Seth

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

On Thu, 2005-12-08 at 13:55 -0800, Ravikiran G Thirumalai wrote:
> On Thu, Dec 08, 2005 at 09:15:18PM +0100, Andi Kleen wrote:
> > On Thu, Dec 08, 2005 at 11:42:32AM -0800, Ravikiran G Thirumalai
> wrote:
> > >
> > > - .align L1_CACHE_BYTES
> > > + /* zero the remaining page */
> > > + .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
> > > +
> > > ENTRY(idt_table)
> >
> > Why can't the IDT not be shared with the GDT page? It should be
> mostly
> > read only right and putting r-o data on that page should be ok,
> right?
>
> Yes, you are right. This should not have been a problem.
> Some people reported this symbol (cpu_gdt) though. Will have to go
> back and
> check.

IIRC, Zach's patches for gdt alignment, moved the gdts from per_cpu data
structure to each secondary CPU dynamically allocating page for its gdt.
This was mainly to address the excessive cost in virtualized
environments of sharing the gdt page with other RW items in per_cpu_data
structure. I think the padding was because xen does not like sharing
gdt page with anything else...

I agree that if no other constraint is there then IDT could be moved
into the same page.

-rohit

2005-12-08 23:11:47

by Andi Kleen

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

On Thu, Dec 08, 2005 at 03:09:17PM -0800, Rohit Seth wrote:
> On Thu, 2005-12-08 at 13:55 -0800, Ravikiran G Thirumalai wrote:
> > On Thu, Dec 08, 2005 at 09:15:18PM +0100, Andi Kleen wrote:
> > > On Thu, Dec 08, 2005 at 11:42:32AM -0800, Ravikiran G Thirumalai
> > wrote:
> > > >
> > > > - .align L1_CACHE_BYTES
> > > > + /* zero the remaining page */
> > > > + .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
> > > > +
> > > > ENTRY(idt_table)
> > >
> > > Why can't the IDT not be shared with the GDT page? It should be
> > mostly
> > > read only right and putting r-o data on that page should be ok,
> > right?
> >
> > Yes, you are right. This should not have been a problem.
> > Some people reported this symbol (cpu_gdt) though. Will have to go
> > back and
> > check.
>
> IIRC, Zach's patches for gdt alignment, moved the gdts from per_cpu data
> structure to each secondary CPU dynamically allocating page for its gdt.

Kiran's patch does this too. Except for the BP GDT, which could
be shared with the single IDT.

-Andi (who actually plans to attack per CPU IDTs at some point
so this could change later)

2005-12-08 23:20:25

by Rohit Seth

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

On Fri, 2005-12-09 at 00:11 +0100, Andi Kleen wrote:
> On Thu, Dec 08, 2005 at 03:09:17PM -0800, Rohit Seth wrote:
> >
> > IIRC, Zach's patches for gdt alignment, moved the gdts from per_cpu data
> > structure to each secondary CPU dynamically allocating page for its gdt.
>
> Kiran's patch does this too. Except for the BP GDT, which could
> be shared with the single IDT.
>

...that padding in BP's GDT (in this and original patches) could be
because of the Xen requirements to have dedicated pages for gdt.

-rohit




2005-12-08 23:26:15

by Andi Kleen

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

On Thu, Dec 08, 2005 at 03:26:07PM -0800, Rohit Seth wrote:
> On Fri, 2005-12-09 at 00:11 +0100, Andi Kleen wrote:
> > On Thu, Dec 08, 2005 at 03:09:17PM -0800, Rohit Seth wrote:
> > >
> > > IIRC, Zach's patches for gdt alignment, moved the gdts from per_cpu data
> > > structure to each secondary CPU dynamically allocating page for its gdt.
> >
> > Kiran's patch does this too. Except for the BP GDT, which could
> > be shared with the single IDT.
> >
>
> ...that padding in BP's GDT (in this and original patches) could be
> because of the Xen requirements to have dedicated pages for gdt.

Well if the Xen people have such requirements they can submit
separate patches. Currently they don't seem to be interested
at all in submitting patches to mainline, so we must work
with the VM hackers who are interested in this (scalex86, VMware)
And AFAIK they only care about not having false sharing in there.

-Andi

2005-12-08 23:38:47

by Rohit Seth

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

On Fri, 2005-12-09 at 00:26 +0100, Andi Kleen wrote:

> Well if the Xen people have such requirements they can submit
> separate patches. Currently they don't seem to be interested
> at all in submitting patches to mainline, so we must work
> with the VM hackers who are interested in this (scalex86, VMware)
> And AFAIK they only care about not having false sharing in there.
>


Agreed.

Though do we need to have full page allocated for each gdt (256 bytes)
then? ...possibly use kmalloc.

-rohit

2005-12-08 23:43:22

by Andi Kleen

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

On Thu, Dec 08, 2005 at 03:45:11PM -0800, Rohit Seth wrote:
> On Fri, 2005-12-09 at 00:26 +0100, Andi Kleen wrote:
>
> > Well if the Xen people have such requirements they can submit
> > separate patches. Currently they don't seem to be interested
> > at all in submitting patches to mainline, so we must work
> > with the VM hackers who are interested in this (scalex86, VMware)
> > And AFAIK they only care about not having false sharing in there.
> >
>
>
> Agreed.
>
> Though do we need to have full page allocated for each gdt (256 bytes)
> then? ...possibly use kmalloc.

For scalex I think it needs to be page aligned because that is what
the effective cacheline size for remote nodes is in their setup.
That would be difficult for kmalloc because it cannot guarantee that
alignment nor avoid false sharing. For the BP case it's ok as
long as the beginning is correctly aligned and the rest
is read-only.

-Andi

2005-12-08 23:44:58

by Rohit Seth

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

On Thu, 2005-12-08 at 15:45 -0800, Rohit Seth wrote:

>
>
> Agreed.
>
> Though do we need to have full page allocated for each gdt (256 bytes)
> then? ...possibly use kmalloc.
>


sorry, forgot about the false sharing part.

-rohit

2005-12-09 22:19:43

by Ravikiran G Thirumalai

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

On Fri, Dec 09, 2005 at 12:43:20AM +0100, Andi Kleen wrote:
> On Thu, Dec 08, 2005 at 03:45:11PM -0800, Rohit Seth wrote:
> > On Fri, 2005-12-09 at 00:26 +0100, Andi Kleen wrote:
> >
> > > Well if the Xen people have such requirements they can submit
> > > separate patches. Currently they don't seem to be interested
> > > at all in submitting patches to mainline, so we must work
> > > with the VM hackers who are interested in this (scalex86, VMware)
> > > And AFAIK they only care about not having false sharing in there.
> > >
> >
> >
> > Agreed.
> >
> > Though do we need to have full page allocated for each gdt (256 bytes)
> > then? ...possibly use kmalloc.
>
> For scalex I think it needs to be page aligned because that is what
> the effective cacheline size for remote nodes is in their setup.
> That would be difficult for kmalloc because it cannot guarantee that
> alignment nor avoid false sharing.

Exactly.

> For the BP case it's ok as
> long as the beginning is correctly aligned and the rest
> is read-only.

Just that any writes on the bp GDT will invalidate the idt_table cacheline,
which is read mostly (as Nippun pointed out). So could we keep the padding
as it is for the BP too?

Attached is the patch which fixes the retval in do_boot_cpu to return -1.
__cpu_up handles -1.

Hope this patch is OK.

Thanks,
Kiran

---
This patch is on the same lines as Zachary Amsden's i386 GDT page alignemnt
patch in -mm, but for x86_64.

Patch to align and pad x86_64 GDT on page boundries.

Signed-off-by: Nippun Goel <[email protected]>
Signed-off-by: Ravikiran Thirumalai <[email protected]>
Signed-off-by: Shai Fultheim <[email protected]>

Index: linux-2.6.15-rc3/arch/x86_64/kernel/head.S
===================================================================
--- linux-2.6.15-rc3.orig/arch/x86_64/kernel/head.S 2005-12-01 15:49:27.000000000 -0800
+++ linux-2.6.15-rc3/arch/x86_64/kernel/head.S 2005-12-01 16:19:29.000000000 -0800
@@ -379,7 +379,7 @@
* Also sysret mandates a special GDT layout
*/

-.align L1_CACHE_BYTES
+.align PAGE_SIZE

/* The TLS descriptors are currently at a different place compared to i386.
Hopefully nobody expects them at a fixed place (Wine?) */
@@ -401,10 +401,11 @@
gdt_end:
/* asm/segment.h:GDT_ENTRIES must match this */
/* This should be a multiple of the cache line size */
- /* GDTs of other CPUs: */
- .fill (GDT_SIZE * NR_CPUS) - (gdt_end - cpu_gdt_table)
+ /* GDTs of other CPUs are now dynamically allocated */

- .align L1_CACHE_BYTES
+ /* zero the remaining page */
+ .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
+
ENTRY(idt_table)
.rept 256
.quad 0
Index: linux-2.6.15-rc3/arch/x86_64/kernel/setup64.c
===================================================================
--- linux-2.6.15-rc3.orig/arch/x86_64/kernel/setup64.c 2005-12-01 16:19:28.000000000 -0800
+++ linux-2.6.15-rc3/arch/x86_64/kernel/setup64.c 2005-12-01 16:19:29.000000000 -0800
@@ -232,15 +232,15 @@
* and set up the GDT descriptor:
*/
if (cpu) {
- memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
+ memcpy(get_cpu_gdt_table(cpu), cpu_gdt_table, GDT_SIZE);
}

cpu_gdt_descr[cpu].size = GDT_SIZE;
- cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
+
asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
asm volatile("lidt %0" :: "m" (idt_descr));

- memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES * 8);
+ memcpy(me->thread.tls_array, get_cpu_gdt_table(cpu), GDT_ENTRY_TLS_ENTRIES * 8);

/*
* Delete NT
Index: linux-2.6.15-rc3/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux-2.6.15-rc3.orig/arch/x86_64/kernel/smpboot.c 2005-12-01 16:19:28.000000000 -0800
+++ linux-2.6.15-rc3/arch/x86_64/kernel/smpboot.c 2005-12-01 16:19:29.000000000 -0800
@@ -743,6 +743,13 @@
};
DECLARE_WORK(work, do_fork_idle, &c_idle);

+ /* allocate memory for gdts of secondary cpus. Hotplug is considered */
+ if (!cpu_gdt_descr[cpu].address &&
+ !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
+ printk("Failed to allocate GDT for CPU %d\n", cpu);
+ return -1;
+ }
+
c_idle.idle = get_idle_for_cpu(cpu);

if (c_idle.idle) {
Index: linux-2.6.15-rc3/arch/x86_64/kernel/suspend.c
===================================================================
--- linux-2.6.15-rc3.orig/arch/x86_64/kernel/suspend.c 2005-12-01 15:49:27.000000000 -0800
+++ linux-2.6.15-rc3/arch/x86_64/kernel/suspend.c 2005-12-01 16:19:29.000000000 -0800
@@ -120,7 +120,7 @@

set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy TSS or some similar stupidity. */

- cpu_gdt_table[cpu][GDT_ENTRY_TSS].type = 9;
+ get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9;

syscall_init(); /* This sets MSR_*STAR and related */
load_TR_desc(); /* This does ltr */
Index: linux-2.6.15-rc3/include/asm-x86_64/desc.h
===================================================================
--- linux-2.6.15-rc3.orig/include/asm-x86_64/desc.h 2005-12-01 15:49:27.000000000 -0800
+++ linux-2.6.15-rc3/include/asm-x86_64/desc.h 2005-12-01 16:19:29.000000000 -0800
@@ -25,7 +25,7 @@
unsigned int a,b;
};

-extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
+extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];

enum {
GATE_INTERRUPT = 0xE,
@@ -79,6 +79,9 @@
extern struct gate_struct idt_table[];
extern struct desc_ptr cpu_gdt_descr[];

+/* the cpu gdt accessor */
+#define get_cpu_gdt_table(_cpu) ((struct desc_struct *)cpu_gdt_descr[(_cpu)].address)
+
static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist)
{
struct gate_struct s;
@@ -139,20 +142,20 @@
* -1? seg base+limit should be pointing to the address of the
* last valid byte
*/
- set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_TSS],
+ set_tssldt_descriptor(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS],
(unsigned long)addr, DESC_TSS,
IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
}

static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
{
- set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (unsigned long)addr,
+ set_tssldt_descriptor(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (unsigned long)addr,
DESC_LDT, size * 8 - 1);
}

static inline void set_seg_base(unsigned cpu, int entry, void *base)
{
- struct desc_struct *d = &cpu_gdt_table[cpu][entry];
+ struct desc_struct *d = &get_cpu_gdt_table(cpu)[entry];
u32 addr = (u32)(u64)base;
BUG_ON((u64)base >> 32);
d->base0 = addr & 0xffff;
@@ -194,7 +197,7 @@

static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
{
- u64 *gdt = (u64 *)(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN);
+ u64 *gdt = (u64 *)(get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN);
gdt[0] = t->tls_array[0];
gdt[1] = t->tls_array[1];
gdt[2] = t->tls_array[2];

2005-12-09 22:55:25

by Rohit Seth

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

On Fri, 2005-12-09 at 14:19 -0800, Ravikiran G Thirumalai wrote:
> On Fri, Dec 09, 2005 at 12:43:20AM +0100, Andi Kleen wrote:
> >
> > For scalex I think it needs to be page aligned because that is what
> > the effective cacheline size for remote nodes is in their setup.
> > That would be difficult for kmalloc because it cannot guarantee that
> > alignment nor avoid false sharing.
>
> Exactly.
>

Are you saying remote node will cache a whole page for every byte access
on that page.

> > For the BP case it's ok as
> > long as the beginning is correctly aligned and the rest
> > is read-only.
>
> Just that any writes on the bp GDT will invalidate the idt_table cacheline,
> which is read mostly (as Nippun pointed out). So could we keep the padding
> as it is for the BP too?
>

Do you write into GDT often for this to be an issue. The reason I'm
asking this because the per-cpu IDTs that Andi refered in the future.
If we are really not using too many bytes in GDT then rest of the page
can be used for IDT and such mostly RO data.

-rohit

2005-12-09 22:57:12

by Andi Kleen

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

> Just that any writes on the bp GDT will invalidate the idt_table cacheline,
> which is read mostly (as Nippun pointed out). So could we keep the padding
> as it is for the BP too?

Yes, good point.

> Attached is the patch which fixes the retval in do_boot_cpu to return -1.
> __cpu_up handles -1.
>
> Hope this patch is OK.

Looks good.

Thanks,
-Andi

2005-12-09 22:59:23

by Andi Kleen

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

On Fri, Dec 09, 2005 at 03:01:27PM -0800, Rohit Seth wrote:
> > > For the BP case it's ok as
> > > long as the beginning is correctly aligned and the rest
> > > is read-only.
> >
> > Just that any writes on the bp GDT will invalidate the idt_table cacheline,
> > which is read mostly (as Nippun pointed out). So could we keep the padding
> > as it is for the BP too?
> >
>
> Do you write into GDT often for this to be an issue. The reason I'm

The context switch writes into the GDT to switch around the TLS segments
when they are <4GB. Or in pre NPTL the same would be done for the LDT
also used for TLS.

> asking this because the per-cpu IDTs that Andi refered in the future.
> If we are really not using too many bytes in GDT then rest of the page
> can be used for IDT and such mostly RO data.

Once I implement that it can be shared with that page.

-Andi

2005-12-09 23:46:23

by Ravikiran G Thirumalai

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

On Fri, Dec 09, 2005 at 11:59:21PM +0100, Andi Kleen wrote:
> On Fri, Dec 09, 2005 at 03:01:27PM -0800, Rohit Seth wrote:
> > > > For the BP case it's ok as
> > > > long as the beginning is correctly aligned and the rest
> > > > is read-only.
> > >
> > > Just that any writes on the bp GDT will invalidate the idt_table cacheline,
> > > which is read mostly (as Nippun pointed out). So could we keep the padding
> > > as it is for the BP too?
> > >
> >
> > Do you write into GDT often for this to be an issue. The reason I'm
>
> The context switch writes into the GDT to switch around the TLS segments

Yup.

>
> > asking this because the per-cpu IDTs that Andi refered in the future.
> > If we are really not using too many bytes in GDT then rest of the page
> > can be used for IDT and such mostly RO data.
>
> Once I implement that it can be shared with that page.

Yes, that will be nice.

Thanks,
Kiran

2005-12-12 02:31:25

by Andi Kleen

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

On Sun, Dec 11, 2005 at 06:34:16PM -0800, Zwane Mwaikambo wrote:
> On Fri, 9 Dec 2005, Ravikiran G Thirumalai wrote:
>
> > > For the BP case it's ok as
> > > long as the beginning is correctly aligned and the rest
> > > is read-only.
> >
> > Just that any writes on the bp GDT will invalidate the idt_table cacheline,
> > which is read mostly (as Nippun pointed out). So could we keep the padding
> > as it is for the BP too?
>
> But how often is this occuring? I presume this is for the virtualisation

GDT writes happen often if you use threads with TLS.

> case only?

In this case for programs running on ScaleX86's hypervisor yes.

-Andi

2005-12-12 02:28:51

by Zwane Mwaikambo

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

On Fri, 9 Dec 2005, Ravikiran G Thirumalai wrote:

> > For the BP case it's ok as
> > long as the beginning is correctly aligned and the rest
> > is read-only.
>
> Just that any writes on the bp GDT will invalidate the idt_table cacheline,
> which is read mostly (as Nippun pointed out). So could we keep the padding
> as it is for the BP too?

But how often is this occuring? I presume this is for the virtualisation
case only?

Thanks

2005-12-12 02:32:54

by Zwane Mwaikambo

[permalink] [raw]
Subject: Re: [discuss] [patch] x86_64: align and pad x86_64 GDT on page boundary

On Sun, 11 Dec 2005, Zwane Mwaikambo wrote:

> On Fri, 9 Dec 2005, Ravikiran G Thirumalai wrote:
>
> > > For the BP case it's ok as
> > > long as the beginning is correctly aligned and the rest
> > > is read-only.
> >
> > Just that any writes on the bp GDT will invalidate the idt_table cacheline,
> > which is read mostly (as Nippun pointed out). So could we keep the padding
> > as it is for the BP too?
>
> But how often is this occuring? I presume this is for the virtualisation
> case only?

Ignore this it's a misfire from an old postponed message (i saw the reply
by Andi).