As of v2.6.36-rc8-54-gb40827f, initial_page_table needs to be set up to boot.
My initial fix for lguest was to cut & paste in the initialization code, but
as it's simply byte-twiddling (which could be done at compile time if we
were clever enough), it seems better to do it unconditionally.
Jeremy, Ingo does this make sense to you? Given it's rc5, I have an
lguest-specific patch ready as an alternative...
Thanks,
Rusty.
Subject: x86: initialize initial_page_table before paravirt jumps
v2.6.36-rc8-54-gb40827f (x86-32, mm: Add an initial page table for
core bootstrapping) made x86 boot using initial_page_table and broke
lguest.
Rather than cut & paste initialization code into lguest, let's just
do that init before the paravirt jump (and remove the max_pfn_mapped
setting in lguest, since this code does it).
Signed-off-by: Rusty Russell <[email protected]>
---
arch/x86/kernel/head_32.S | 73 +++++++++++++++++++++++-----------------------
arch/x86/lguest/boot.c | 3 -
2 files changed, 37 insertions(+), 39 deletions(-)
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -137,39 +137,6 @@ 1:
movl %eax, pa(olpc_ofw_pgd)
#endif
-#ifdef CONFIG_PARAVIRT
- /* This is can only trip for a broken bootloader... */
- cmpw $0x207, pa(boot_params + BP_version)
- jb default_entry
-
- /* Paravirt-compatible boot parameters. Look to see what architecture
- we're booting under. */
- movl pa(boot_params + BP_hardware_subarch), %eax
- cmpl $num_subarch_entries, %eax
- jae bad_subarch
-
- movl pa(subarch_entries)(,%eax,4), %eax
- subl $__PAGE_OFFSET, %eax
- jmp *%eax
-
-bad_subarch:
-WEAK(lguest_entry)
-WEAK(xen_entry)
- /* Unknown implementation; there's really
- nothing we can do at this point. */
- ud2a
-
- __INITDATA
-
-subarch_entries:
- .long default_entry /* normal x86/PC */
- .long lguest_entry /* lguest hypervisor */
- .long xen_entry /* Xen hypervisor */
- .long default_entry /* Moorestown MID */
-num_subarch_entries = (. - subarch_entries) / 4
-.previous
-#endif /* CONFIG_PARAVIRT */
-
/*
* Initialize page tables. This creates a PDE and a set of page
* tables, which are located immediately beyond __brk_base. The variable
@@ -179,7 +146,6 @@ num_subarch_entries = (. - subarch_entri
*
* Note that the stack is not yet set up!
*/
-default_entry:
#ifdef CONFIG_X86_PAE
/*
@@ -259,7 +225,42 @@ 11:
movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
movl %eax,pa(initial_page_table+0xffc)
#endif
- jmp 3f
+
+#ifdef CONFIG_PARAVIRT
+ /* This is can only trip for a broken bootloader... */
+ cmpw $0x207, pa(boot_params + BP_version)
+ jb default_entry
+
+ /* Paravirt-compatible boot parameters. Look to see what architecture
+ we're booting under. */
+ movl pa(boot_params + BP_hardware_subarch), %eax
+ cmpl $num_subarch_entries, %eax
+ jae bad_subarch
+
+ movl pa(subarch_entries)(,%eax,4), %eax
+ subl $__PAGE_OFFSET, %eax
+ jmp *%eax
+
+bad_subarch:
+WEAK(lguest_entry)
+WEAK(xen_entry)
+ /* Unknown implementation; there's really
+ nothing we can do at this point. */
+ ud2a
+
+ __INITDATA
+
+subarch_entries:
+ .long default_entry /* normal x86/PC */
+ .long lguest_entry /* lguest hypervisor */
+ .long xen_entry /* Xen hypervisor */
+ .long default_entry /* Moorestown MID */
+num_subarch_entries = (. - subarch_entries) / 4
+.previous
+#else
+ jmp default_entry
+#endif /* CONFIG_PARAVIRT */
+
/*
* Non-boot CPU entry point; entered from trampoline.S
* We can't lgdt here, because lgdt itself uses a data segment, but
@@ -280,7 +281,7 @@ ENTRY(startup_32_smp)
movl %eax,%fs
movl %eax,%gs
#endif /* CONFIG_SMP */
-3:
+default_entry:
/*
* New page tables may be in 4Mbyte page mode and may
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1349,9 +1349,6 @@ __init void lguest_init(void)
*/
switch_to_new_gdt(0);
- /* We actually boot with all memory mapped, but let's say 128MB. */
- max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
-
/*
* The Host<->Guest Switcher lives at the top of our address space, and
* the Host told us how big it is when we made LGUEST_INIT hypercall:
On 12/08/2010 05:59 PM, Rusty Russell wrote:
> As of v2.6.36-rc8-54-gb40827f, initial_page_table needs to be set up to boot.
> My initial fix for lguest was to cut & paste in the initialization code, but
> as it's simply byte-twiddling (which could be done at compile time if we
> were clever enough), it seems better to do it unconditionally.
>
> Jeremy, Ingo does this make sense to you? Given it's rc5, I have an
> lguest-specific patch ready as an alternative...
We have already committed a Xen-specific fix for this (twice!). Your
patch is a no-op for Xen because we never ended up using that entry path
through head_32.S, so if it fixes things for lguest then go for it.
J
> Thanks,
> Rusty.
>
> Subject: x86: initialize initial_page_table before paravirt jumps
>
> v2.6.36-rc8-54-gb40827f (x86-32, mm: Add an initial page table for
> core bootstrapping) made x86 boot using initial_page_table and broke
> lguest.
>
> Rather than cut & paste initialization code into lguest, let's just
> do that init before the paravirt jump (and remove the max_pfn_mapped
> setting in lguest, since this code does it).
>
> Signed-off-by: Rusty Russell <[email protected]>
> ---
> arch/x86/kernel/head_32.S | 73 +++++++++++++++++++++++-----------------------
> arch/x86/lguest/boot.c | 3 -
> 2 files changed, 37 insertions(+), 39 deletions(-)
>
> diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
> --- a/arch/x86/kernel/head_32.S
> +++ b/arch/x86/kernel/head_32.S
> @@ -137,39 +137,6 @@ 1:
> movl %eax, pa(olpc_ofw_pgd)
> #endif
>
> -#ifdef CONFIG_PARAVIRT
> - /* This is can only trip for a broken bootloader... */
> - cmpw $0x207, pa(boot_params + BP_version)
> - jb default_entry
> -
> - /* Paravirt-compatible boot parameters. Look to see what architecture
> - we're booting under. */
> - movl pa(boot_params + BP_hardware_subarch), %eax
> - cmpl $num_subarch_entries, %eax
> - jae bad_subarch
> -
> - movl pa(subarch_entries)(,%eax,4), %eax
> - subl $__PAGE_OFFSET, %eax
> - jmp *%eax
> -
> -bad_subarch:
> -WEAK(lguest_entry)
> -WEAK(xen_entry)
> - /* Unknown implementation; there's really
> - nothing we can do at this point. */
> - ud2a
> -
> - __INITDATA
> -
> -subarch_entries:
> - .long default_entry /* normal x86/PC */
> - .long lguest_entry /* lguest hypervisor */
> - .long xen_entry /* Xen hypervisor */
> - .long default_entry /* Moorestown MID */
> -num_subarch_entries = (. - subarch_entries) / 4
> -.previous
> -#endif /* CONFIG_PARAVIRT */
> -
> /*
> * Initialize page tables. This creates a PDE and a set of page
> * tables, which are located immediately beyond __brk_base. The variable
> @@ -179,7 +146,6 @@ num_subarch_entries = (. - subarch_entri
> *
> * Note that the stack is not yet set up!
> */
> -default_entry:
> #ifdef CONFIG_X86_PAE
>
> /*
> @@ -259,7 +225,42 @@ 11:
> movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
> movl %eax,pa(initial_page_table+0xffc)
> #endif
> - jmp 3f
> +
> +#ifdef CONFIG_PARAVIRT
> + /* This is can only trip for a broken bootloader... */
> + cmpw $0x207, pa(boot_params + BP_version)
> + jb default_entry
> +
> + /* Paravirt-compatible boot parameters. Look to see what architecture
> + we're booting under. */
> + movl pa(boot_params + BP_hardware_subarch), %eax
> + cmpl $num_subarch_entries, %eax
> + jae bad_subarch
> +
> + movl pa(subarch_entries)(,%eax,4), %eax
> + subl $__PAGE_OFFSET, %eax
> + jmp *%eax
> +
> +bad_subarch:
> +WEAK(lguest_entry)
> +WEAK(xen_entry)
> + /* Unknown implementation; there's really
> + nothing we can do at this point. */
> + ud2a
> +
> + __INITDATA
> +
> +subarch_entries:
> + .long default_entry /* normal x86/PC */
> + .long lguest_entry /* lguest hypervisor */
> + .long xen_entry /* Xen hypervisor */
> + .long default_entry /* Moorestown MID */
> +num_subarch_entries = (. - subarch_entries) / 4
> +.previous
> +#else
> + jmp default_entry
> +#endif /* CONFIG_PARAVIRT */
> +
> /*
> * Non-boot CPU entry point; entered from trampoline.S
> * We can't lgdt here, because lgdt itself uses a data segment, but
> @@ -280,7 +281,7 @@ ENTRY(startup_32_smp)
> movl %eax,%fs
> movl %eax,%gs
> #endif /* CONFIG_SMP */
> -3:
> +default_entry:
>
> /*
> * New page tables may be in 4Mbyte page mode and may
> diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
> --- a/arch/x86/lguest/boot.c
> +++ b/arch/x86/lguest/boot.c
> @@ -1349,9 +1349,6 @@ __init void lguest_init(void)
> */
> switch_to_new_gdt(0);
>
> - /* We actually boot with all memory mapped, but let's say 128MB. */
> - max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
> -
> /*
> * The Host<->Guest Switcher lives at the top of our address space, and
> * the Host told us how big it is when we made LGUEST_INIT hypercall:
>
On Fri, 10 Dec 2010 04:22:24 am Jeremy Fitzhardinge wrote:
> On 12/08/2010 05:59 PM, Rusty Russell wrote:
> > As of v2.6.36-rc8-54-gb40827f, initial_page_table needs to be set up to boot.
> > My initial fix for lguest was to cut & paste in the initialization code, but
> > as it's simply byte-twiddling (which could be done at compile time if we
> > were clever enough), it seems better to do it unconditionally.
> >
> > Jeremy, Ingo does this make sense to you? Given it's rc5, I have an
> > lguest-specific patch ready as an alternative...
>
> We have already committed a Xen-specific fix for this (twice!). Your
> patch is a no-op for Xen because we never ended up using that entry path
> through head_32.S, so if it fixes things for lguest then go for it.
Hmm, I'm going to call that "Acked-by".
But given the late stage in the cycle, I'll put the lguest-only version in
now and submit this for next merge window.
Thanks!
Rusty.