This patch:
[PATCH] x86: GDT alignment fix
completely broke my voyager boot sequence. The reason is that the patch
introduces a subtle dependence on the assumption that the boot CPU is
CPU0, which is untrue for voyager. I think the most correct fix for
this is to use a per_cpu for cpu_gdt_descr and still allocate the actual
descriptors as page allocations. I think it's also much more efficient
to do these allocations in cpu_init() where they'll be automatic for all
subarchitectures, rather than placing them in each do_boot_cpu
implementation. Note, I've also fixed a potentially hard to trace bug
as well: the secondaries in the prior scheme were actually using the
Boot CPU's GDT to come up with ... whatever cpu_init() and subsequent
manipulations alter it to be. This implementation goes back to using
the special (and now not altered) cpu_gdt_table for all booting CPUs.
Signed-off-by: James Bottomley <[email protected]>
---
James
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index cbc3206..d561a56 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -4,6 +4,7 @@
#include <linux/smp.h>
#include <linux/module.h>
#include <linux/percpu.h>
+#include <linux/bootmem.h>
#include <asm/semaphore.h>
#include <asm/processor.h>
#include <asm/i387.h>
@@ -18,6 +19,9 @@
#include "cpu.h"
+DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
+EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
+
DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
@@ -559,8 +563,9 @@ void __devinit cpu_init(void)
int cpu = smp_processor_id();
struct tss_struct * t = &per_cpu(init_tss, cpu);
struct thread_struct *thread = ¤t->thread;
- struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+ struct desc_struct *gdt;
__u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
+ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
if (cpu_test_and_set(cpu, cpu_initialized)) {
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -577,6 +582,22 @@ void __devinit cpu_init(void)
set_in_cr4(X86_CR4_TSD);
}
+ /* This is a horrible hack to allocate the GDT. The problem
+ * is that cpu_init() is called really early for the boot CPU
+ * (and hence needs bootmem) but much later for the secondary
+ * CPUs, when bootmem will have gone away */
+ if (NODE_DATA(0)->bdata->node_bootmem_map) {
+ gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
+ /* alloc_bootmem_pages panics on failure, so no check */
+ memset(gdt, 0, PAGE_SIZE);
+ } else {
+ gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
+ if (unlikely(!gdt)) {
+ printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
+ for (;;) local_irq_enable();
+ }
+ }
+
/*
* Initialize the per-CPU GDT with the boot GDT,
* and set up the GDT descriptor:
@@ -589,10 +610,10 @@ void __devinit cpu_init(void)
((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
(CPU_16BIT_STACK_SIZE - 1);
- cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
- cpu_gdt_descr[cpu].address = (unsigned long)gdt;
+ cpu_gdt_descr->size = GDT_SIZE - 1;
+ cpu_gdt_descr->address = (unsigned long)gdt;
- load_gdt(&cpu_gdt_descr[cpu]);
+ load_gdt(cpu_gdt_descr);
load_idt(&idt_descr);
/*
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index 870f20b..e437fb3 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -525,5 +525,3 @@ ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* 0xf0 - unused */
.quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */
- /* Be sure this is zeroed to avoid false validations in Xen */
- .fill PAGE_SIZE_asm / 8 - GDT_ENTRIES,8,0
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index b3c2e2c..9ed449a 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -903,12 +903,6 @@ static int __devinit do_boot_cpu(int api
unsigned long start_eip;
unsigned short nmi_high = 0, nmi_low = 0;
- if (!cpu_gdt_descr[cpu].address &&
- !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
- printk("Failed to allocate GDT for CPU %d\n", cpu);
- return 1;
- }
-
++cpucount;
/*
diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h
index 494e73b..89b9c32 100644
--- a/include/asm-i386/desc.h
+++ b/include/asm-i386/desc.h
@@ -25,10 +25,12 @@ struct Xgt_desc_struct {
} __attribute__ ((packed));
extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
+DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
+
static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
{
- return ((struct desc_struct *)cpu_gdt_descr[cpu].address);
+ return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
}
#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
James Bottomley wrote:
>This patch:
>
>
I'm not adverse to making the gdt descriptors part of the per-cpu data.
But I think your patch is missing some necessary changes to
include/asm-i386/desc.h - what do you plan to do with the following
definition there?
extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
Also, it seems likely you may have broken APM and/or PnP BIOS.
>diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
>index cbc3206..d561a56 100644
>--- a/arch/i386/kernel/cpu/common.c
>+++ b/arch/i386/kernel/cpu/common.c
>@@ -4,6 +4,7 @@
> #include <linux/smp.h>
> #include <linux/module.h>
> #include <linux/percpu.h>
>+#include <linux/bootmem.h>
> #include <asm/semaphore.h>
> #include <asm/processor.h>
> #include <asm/i387.h>
>@@ -18,6 +19,9 @@
>
> #include "cpu.h"
>
>+DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
>+EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
>+
> DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
> EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
>
>@@ -559,8 +563,9 @@ void __devinit cpu_init(void)
> int cpu = smp_processor_id();
> struct tss_struct * t = &per_cpu(init_tss, cpu);
> struct thread_struct *thread = ¤t->thread;
>- struct desc_struct *gdt = get_cpu_gdt_table(cpu);
>+ struct desc_struct *gdt;
> __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
>+ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
>
> if (cpu_test_and_set(cpu, cpu_initialized)) {
> printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
>@@ -577,6 +582,22 @@ void __devinit cpu_init(void)
> set_in_cr4(X86_CR4_TSD);
> }
>
>+ /* This is a horrible hack to allocate the GDT. The problem
>+ * is that cpu_init() is called really early for the boot CPU
>+ * (and hence needs bootmem) but much later for the secondary
>+ * CPUs, when bootmem will have gone away */
>+ if (NODE_DATA(0)->bdata->node_bootmem_map) {
>+ gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
>+ /* alloc_bootmem_pages panics on failure, so no check */
>+ memset(gdt, 0, PAGE_SIZE);
>+ } else {
>+ gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
>+ if (unlikely(!gdt)) {
>+ printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
>+ for (;;) local_irq_enable();
>+ }
>+ }
>+
> /*
> * Initialize the per-CPU GDT with the boot GDT,
> * and set up the GDT descriptor:
>
>
Can't you get rid of this ugly hack _and_ optimize the code at the same
time? I don't understand the details of voyager bringup, but it seems
clear that the BSP or node 0 is special in both sub-architectures. If
it is special anyway, the conditional logic can be merged, and better
yet, the allocation for the first caller of cpu_init() can skip the
allocation entirely - it can simply use the boot GDT, which is already
page-aligned and ready to go. This gets rid of the
alloc_bootmem_pages() piece of the hack above.
Also, it seems you left the allocation of the GDT in do_boot_cpu(). How
do you plan to make sure that GDT allocation is compatible with hotplug
CPU startup? I was worried about that, which is why I moved the GDT
allocation for secondary processors (originally in
arch/i386/kernel/cpu/common.c) to smpboot.c.
Zach
On Wed, 2006-02-22 at 10:08 -0800, Zachary Amsden wrote:
> I'm not adverse to making the gdt descriptors part of the per-cpu data.
If you can think of another way to fix the bug, I'm open to it.
> But I think your patch is missing some necessary changes to
> include/asm-i386/desc.h - what do you plan to do with the following
> definition there?
>
> extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
Nothing ... it predates your patch ... it's not much use, but just in
case someone wants to get at the boot cpu gdt descriptors.
> Also, it seems likely you may have broken APM and/or PnP BIOS.
I don't think so, how? ... APM alters the GDT for the APM bios long
after cpu_init() is called, as does drivers/pnp/pnpbios.
> Can't you get rid of this ugly hack _and_ optimize the code at the same
> time? I don't understand the details of voyager bringup, but it seems
> clear that the BSP or node 0 is special in both sub-architectures. If
> it is special anyway, the conditional logic can be merged, and better
> yet, the allocation for the first caller of cpu_init() can skip the
> allocation entirely - it can simply use the boot GDT, which is already
> page-aligned and ready to go. This gets rid of the
> alloc_bootmem_pages() piece of the hack above.
Not without repeating your bug. In the original code the gdt runs in
the boot area until cpu_init() where it switches to the original per_cpu
for the descriptor. Your patch moved the boot CPU to using the boot
area after cpu_init(), which means subsequent boot cpu gdt changes alter
that area before the secondaries have come up (also using it).
If we have to move to individual pages for descriptors, then every CPU
needs one.
> Also, it seems you left the allocation of the GDT in do_boot_cpu(). How
> do you plan to make sure that GDT allocation is compatible with hotplug
> CPU startup? I was worried about that, which is why I moved the GDT
> allocation for secondary processors (originally in
> arch/i386/kernel/cpu/common.c) to smpboot.c.
No, this piece of the patch:
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index b3c2e2c..9ed449a 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -903,12 +903,6 @@ static int __devinit do_boot_cpu(int api
unsigned long start_eip;
unsigned short nmi_high = 0, nmi_low = 0;
- if (!cpu_gdt_descr[cpu].address &&
- !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL)))
{
- printk("Failed to allocate GDT for CPU %d\n", cpu);
- return 1;
- }
-
++cpucount;
/*
removes it.
Since the allocation is in cpu_init(), it will be called on hotplug as
well.
James
On Wed, 2006-02-22 at 10:08 -0800, Zachary Amsden wrote:
> I'm not adverse to making the gdt descriptors part of the per-cpu data.
> But I think your patch is missing some necessary changes to
> include/asm-i386/desc.h - what do you plan to do with the following
> definition there?
>
> extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
Actually, I checked ... it looks like the externel definition of
cpu_gdt_descr can be dumped ... nothing's using it (well, at least in a
voyager build), so I updated the patch.
James
---
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index cbc3206..d561a56 100644
Index: BUILD-2.6/arch/i386/kernel/cpu/common.c
===================================================================
--- BUILD-2.6.orig/arch/i386/kernel/cpu/common.c 2006-02-22 13:53:43.000000000 -0600
+++ BUILD-2.6/arch/i386/kernel/cpu/common.c 2006-02-22 15:28:37.000000000 -0600
@@ -4,6 +4,7 @@
#include <linux/smp.h>
#include <linux/module.h>
#include <linux/percpu.h>
+#include <linux/bootmem.h>
#include <asm/semaphore.h>
#include <asm/processor.h>
#include <asm/i387.h>
@@ -18,6 +19,9 @@
#include "cpu.h"
+DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
+EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
+
DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
@@ -571,8 +575,9 @@
int cpu = smp_processor_id();
struct tss_struct * t = &per_cpu(init_tss, cpu);
struct thread_struct *thread = ¤t->thread;
- struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+ struct desc_struct *gdt;
__u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
+ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
if (cpu_test_and_set(cpu, cpu_initialized)) {
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -589,6 +594,22 @@
set_in_cr4(X86_CR4_TSD);
}
+ /* This is a horrible hack to allocate the GDT. The problem
+ * is that cpu_init() is called really early for the boot CPU
+ * (and hence needs bootmem) but much later for the secondary
+ * CPUs, when bootmem will have gone away */
+ if (NODE_DATA(0)->bdata->node_bootmem_map) {
+ gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
+ /* alloc_bootmem_pages panics on failure, so no check */
+ memset(gdt, 0, PAGE_SIZE);
+ } else {
+ gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
+ if (unlikely(!gdt)) {
+ printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
+ for (;;) local_irq_enable();
+ }
+ }
+
/*
* Initialize the per-CPU GDT with the boot GDT,
* and set up the GDT descriptor:
@@ -601,10 +622,10 @@
((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
(CPU_16BIT_STACK_SIZE - 1);
- cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
- cpu_gdt_descr[cpu].address = (unsigned long)gdt;
+ cpu_gdt_descr->size = GDT_SIZE - 1;
+ cpu_gdt_descr->address = (unsigned long)gdt;
- load_gdt(&cpu_gdt_descr[cpu]);
+ load_gdt(cpu_gdt_descr);
load_idt(&idt_descr);
/*
Index: BUILD-2.6/arch/i386/kernel/head.S
===================================================================
--- BUILD-2.6.orig/arch/i386/kernel/head.S 2006-02-22 13:53:43.000000000 -0600
+++ BUILD-2.6/arch/i386/kernel/head.S 2006-02-22 15:28:37.000000000 -0600
@@ -534,5 +534,3 @@
.quad 0x0000000000000000 /* 0xf0 - unused */
.quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */
- /* Be sure this is zeroed to avoid false validations in Xen */
- .fill PAGE_SIZE_asm / 8 - GDT_ENTRIES,8,0
Index: BUILD-2.6/arch/i386/kernel/smpboot.c
===================================================================
--- BUILD-2.6.orig/arch/i386/kernel/smpboot.c 2006-02-22 13:53:43.000000000 -0600
+++ BUILD-2.6/arch/i386/kernel/smpboot.c 2006-02-22 15:28:37.000000000 -0600
@@ -898,12 +898,6 @@
unsigned long start_eip;
unsigned short nmi_high = 0, nmi_low = 0;
- if (!cpu_gdt_descr[cpu].address &&
- !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
- printk("Failed to allocate GDT for CPU %d\n", cpu);
- return 1;
- }
-
++cpucount;
/*
Index: BUILD-2.6/include/asm-i386/desc.h
===================================================================
--- BUILD-2.6.orig/include/asm-i386/desc.h 2006-02-22 13:53:47.000000000 -0600
+++ BUILD-2.6/include/asm-i386/desc.h 2006-02-22 15:28:37.000000000 -0600
@@ -24,11 +24,13 @@
unsigned short pad;
} __attribute__ ((packed));
-extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS];
+extern struct Xgt_desc_struct idt_descr;
+DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
+
static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
{
- return ((struct desc_struct *)cpu_gdt_descr[cpu].address);
+ return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
}
#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
Index: BUILD-2.6/arch/i386/kernel/i386_ksyms.c
===================================================================
--- BUILD-2.6.orig/arch/i386/kernel/i386_ksyms.c 2006-02-22 15:26:57.000000000 -0600
+++ BUILD-2.6/arch/i386/kernel/i386_ksyms.c 2006-02-22 15:28:54.000000000 -0600
@@ -3,8 +3,6 @@
#include <asm/checksum.h>
#include <asm/desc.h>
-EXPORT_SYMBOL_GPL(cpu_gdt_descr);
-
EXPORT_SYMBOL(__down_failed);
EXPORT_SYMBOL(__down_failed_interruptible);
EXPORT_SYMBOL(__down_failed_trylock);