Linus,
please pull the latest x86 fixes tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git x86-fixes-for-linus
NOTE: contains the second, lguest half of the FPU fixlet from Suresh.
Ingo
------------------>
Isaku Yamahata (1):
xen: Use wmb instead of rmb in xen_evtchn_do_upcall().
Jeremy Fitzhardinge (2):
xen: mask unwanted pte bits in __supported_pte_mask
xen: don't drop NX bit
Suresh Siddha (1):
x86: fix NULL pointer deref in __switch_to
arch/x86/xen/enlighten.c | 5 ++++
arch/x86/xen/mmu.c | 56 ++++++++++++++++++++++++---------------------
arch/x86/xen/xen-head.S | 2 +-
drivers/lguest/x86/core.c | 15 +++++++-----
drivers/xen/events.c | 2 +-
5 files changed, 46 insertions(+), 34 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c8a56e4..c048de3 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1228,6 +1228,11 @@ asmlinkage void __init xen_start_kernel(void)
if (xen_feature(XENFEAT_supervisor_mode_kernel))
pv_info.kernel_rpl = 0;
+ /* Prevent unwanted bits from being set in PTEs. */
+ __supported_pte_mask &= ~_PAGE_GLOBAL;
+ if (!is_initial_xendomain())
+ __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
+
/* set the limit of our address space */
xen_reserve_top();
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3525ef5..265601d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -179,48 +179,54 @@ out:
preempt_enable();
}
-pteval_t xen_pte_val(pte_t pte)
+/* Assume pteval_t is equivalent to all the other *val_t types. */
+static pteval_t pte_mfn_to_pfn(pteval_t val)
{
- pteval_t ret = pte.pte;
+ if (val & _PAGE_PRESENT) {
+ unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT;
+ pteval_t flags = val & ~PTE_MASK;
+ val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags;
+ }
+
+ return val;
+}
+
+static pteval_t pte_pfn_to_mfn(pteval_t val)
+{
+ if (val & _PAGE_PRESENT) {
+ unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT;
+ pteval_t flags = val & ~PTE_MASK;
+ val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
+ }
- if (ret & _PAGE_PRESENT)
- ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
+ return val;
+}
- return ret;
+pteval_t xen_pte_val(pte_t pte)
+{
+ return pte_mfn_to_pfn(pte.pte);
}
pgdval_t xen_pgd_val(pgd_t pgd)
{
- pgdval_t ret = pgd.pgd;
- if (ret & _PAGE_PRESENT)
- ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
- return ret;
+ return pte_mfn_to_pfn(pgd.pgd);
}
pte_t xen_make_pte(pteval_t pte)
{
- if (pte & _PAGE_PRESENT) {
- pte = phys_to_machine(XPADDR(pte)).maddr;
- pte &= ~(_PAGE_PCD | _PAGE_PWT);
- }
-
- return (pte_t){ .pte = pte };
+ pte = pte_pfn_to_mfn(pte);
+ return native_make_pte(pte);
}
pgd_t xen_make_pgd(pgdval_t pgd)
{
- if (pgd & _PAGE_PRESENT)
- pgd = phys_to_machine(XPADDR(pgd)).maddr;
-
- return (pgd_t){ pgd };
+ pgd = pte_pfn_to_mfn(pgd);
+ return native_make_pgd(pgd);
}
pmdval_t xen_pmd_val(pmd_t pmd)
{
- pmdval_t ret = native_pmd_val(pmd);
- if (ret & _PAGE_PRESENT)
- ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
- return ret;
+ return pte_mfn_to_pfn(pmd.pmd);
}
#ifdef CONFIG_X86_PAE
void xen_set_pud(pud_t *ptr, pud_t val)
@@ -267,9 +273,7 @@ void xen_pmd_clear(pmd_t *pmdp)
pmd_t xen_make_pmd(pmdval_t pmd)
{
- if (pmd & _PAGE_PRESENT)
- pmd = phys_to_machine(XPADDR(pmd)).maddr;
-
+ pmd = pte_pfn_to_mfn(pmd);
return native_make_pmd(pmd);
}
#else /* !PAE */
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 288d587..3175e97 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -17,7 +17,7 @@ ENTRY(startup_xen)
__FINIT
-.pushsection .bss.page_aligned
+.pushsection .text
.align PAGE_SIZE_asm
ENTRY(hypercall_page)
.skip 0x1000
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 5126d5d..2e554a4 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -176,7 +176,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
* we set it now, so we can trap and pass that trap to the Guest if it
* uses the FPU. */
if (cpu->ts)
- lguest_set_ts();
+ unlazy_fpu(current);
/* SYSENTER is an optimized way of doing system calls. We can't allow
* it because it always jumps to privilege level 0. A normal Guest
@@ -196,6 +196,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
* trap made the switcher code come back, and an error code which some
* traps set. */
+ /* Restore SYSENTER if it's supposed to be on. */
+ if (boot_cpu_has(X86_FEATURE_SEP))
+ wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
+
/* If the Guest page faulted, then the cr2 register will tell us the
* bad virtual address. We have to grab this now, because once we
* re-enable interrupts an interrupt could fault and thus overwrite
@@ -203,13 +207,12 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
if (cpu->regs->trapnum == 14)
cpu->arch.last_pagefault = read_cr2();
/* Similarly, if we took a trap because the Guest used the FPU,
- * we have to restore the FPU it expects to see. */
+ * we have to restore the FPU it expects to see.
+ * math_state_restore() may sleep and we may even move off to
+ * a different CPU. So all the critical stuff should be done
+ * before this. */
else if (cpu->regs->trapnum == 7)
math_state_restore();
-
- /* Restore SYSENTER if it's supposed to be on. */
- if (boot_cpu_has(X86_FEATURE_SEP))
- wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
}
/*H:130 Now we've examined the hypercall code; our Guest can make requests.
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 4f0f22b..76e5b73 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -529,7 +529,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
/* Clear master flag /before/ clearing selector flag. */
- rmb();
+ wmb();
#endif
pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
while (pending_words != 0) {
On Mon, Jun 23, 2008 at 09:40:55PM +0200, Ingo Molnar wrote:
> Linus,
>
> please pull the latest x86 fixes tree from:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git x86-fixes-for-linus
>
> NOTE: contains the second, lguest half of the FPU fixlet from Suresh.
>
> Ingo
>...
> Jeremy Fitzhardinge (2):
>...
> xen: don't drop NX bit
>...
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
>...
> pmdval_t xen_pmd_val(pmd_t pmd)
> {
> - pmdval_t ret = native_pmd_val(pmd);
> - if (ret & _PAGE_PRESENT)
> - ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
> - return ret;
> + return pte_mfn_to_pfn(pmd.pmd);
> }
>...
Another compile error from the x86 tree:
<-- snip -->
...
CC arch/x86/xen/mmu.o
/home/bunk/linux/kernel-2.6/git/linux-2.6/arch/x86/xen/mmu.c: In function ‘xen_pmd_val’:
/home/bunk/linux/kernel-2.6/git/linux-2.6/arch/x86/xen/mmu.c:229: error: ‘pmd_t’ has no member named ‘pmd’
make[2]: *** [arch/x86/xen/mmu.o] Error 1
<-- snip -->
The problem seems to be related to CONFIG_X86_PAE=n which results in
PAGETABLE_LEVELS == 2.
Not visible in -next due to "xen: remove support for non-PAE 32-bit",
but that is not in 2.6.26.
.config attached.
cu
Adrian
--
"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed
Adrian Bunk wrote:
> Another compile error from the x86 tree:
>
> <-- snip -->
>
> ...
> CC arch/x86/xen/mmu.o
> /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/x86/xen/mmu.c: In function ‘xen_pmd_val’:
> /home/bunk/linux/kernel-2.6/git/linux-2.6/arch/x86/xen/mmu.c:229: error: ‘pmd_t’ has no member named ‘pmd’
> make[2]: *** [arch/x86/xen/mmu.o] Error 1
>
> <-- snip -->
>
> The problem seems to be related to CONFIG_X86_PAE=n which results in
> PAGETABLE_LEVELS == 2.
>
> Not visible in -next due to "xen: remove support for non-PAE 32-bit",
> but that is not in 2.6.26.
>
Thanks Adrian,
Bringing that patch over into .26 would be the easiest fix. It's
effectively a bugfix because non-PAE Xen is broken and unusable anyway.
Alternatively, I can easily put a little patch together to use the right
accessors that will compile in non-PAE.
J
* Jeremy Fitzhardinge <[email protected]> wrote:
> Bringing that patch over into .26 would be the easiest fix. It's
> effectively a bugfix because non-PAE Xen is broken and unusable
> anyway.
>
> Alternatively, I can easily put a little patch together to use the
> right accessors that will compile in non-PAE.
that patch has been in testing since early May so we can put it into
v2.6.26 - and as you say non-PAE paravirt guest support in Xen is
deprecated and rarely used/tested.
Linus,
Please pull the latest x86 fixes git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git x86-fixes-for-linus
which includes a cherry-pick of this one Xen commit from Jeremy - this
fixes the build error reported by Adrian Bunk.
( alternatively we could do just the arch/x86/xen/Kconfig bit too but i
think we should just take this one as it touches Xen code only. )
Thanks,
Ingo
------------------>
Jeremy Fitzhardinge (1):
xen: remove support for non-PAE 32-bit
arch/x86/xen/Kconfig | 2 +-
arch/x86/xen/enlighten.c | 51 +++++++++++++++----------------------------
arch/x86/xen/mmu.c | 19 +--------------
arch/x86/xen/mmu.h | 24 +++++---------------
arch/x86/xen/xen-head.S | 4 ---
include/asm-x86/xen/page.h | 4 ---
6 files changed, 27 insertions(+), 77 deletions(-)
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 2e641be..525b108 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -6,7 +6,7 @@ config XEN
bool "Xen guest support"
select PARAVIRT
depends on X86_32
- depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER)
+ depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER)
help
This is the Linux Xen port. Enabling this will allow the
kernel to boot in a paravirtualized environment under the
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c048de3..f09c1c6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -785,38 +785,35 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
static __init void xen_pagetable_setup_start(pgd_t *base)
{
pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
+ int i;
/* special set_pte for pagetable initialization */
pv_mmu_ops.set_pte = xen_set_pte_init;
init_mm.pgd = base;
/*
- * copy top-level of Xen-supplied pagetable into place. For
- * !PAE we can use this as-is, but for PAE it is a stand-in
- * while we copy the pmd pages.
+ * copy top-level of Xen-supplied pagetable into place. This
+ * is a stand-in while we copy the pmd pages.
*/
memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
- if (PTRS_PER_PMD > 1) {
- int i;
- /*
- * For PAE, need to allocate new pmds, rather than
- * share Xen's, since Xen doesn't like pmd's being
- * shared between address spaces.
- */
- for (i = 0; i < PTRS_PER_PGD; i++) {
- if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
- pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+ /*
+ * For PAE, need to allocate new pmds, rather than
+ * share Xen's, since Xen doesn't like pmd's being
+ * shared between address spaces.
+ */
+ for (i = 0; i < PTRS_PER_PGD; i++) {
+ if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
+ pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
- memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
- PAGE_SIZE);
+ memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
+ PAGE_SIZE);
- make_lowmem_page_readonly(pmd);
+ make_lowmem_page_readonly(pmd);
- set_pgd(&base[i], __pgd(1 + __pa(pmd)));
- } else
- pgd_clear(&base[i]);
- }
+ set_pgd(&base[i], __pgd(1 + __pa(pmd)));
+ } else
+ pgd_clear(&base[i]);
}
/* make sure zero_page is mapped RO so we can use it in pagetables */
@@ -873,17 +870,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
/* Actually pin the pagetable down, but we can't set PG_pinned
yet because the page structures don't exist yet. */
- {
- unsigned level;
-
-#ifdef CONFIG_X86_PAE
- level = MMUEXT_PIN_L3_TABLE;
-#else
- level = MMUEXT_PIN_L2_TABLE;
-#endif
-
- pin_pagetable_pfn(level, PFN_DOWN(__pa(base)));
- }
+ pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
}
/* This is called once we have the cpu_possible_map */
@@ -1093,7 +1080,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
.make_pte = xen_make_pte,
.make_pgd = xen_make_pgd,
-#ifdef CONFIG_X86_PAE
.set_pte_atomic = xen_set_pte_atomic,
.set_pte_present = xen_set_pte_at,
.set_pud = xen_set_pud,
@@ -1102,7 +1088,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
.make_pmd = xen_make_pmd,
.pmd_val = xen_pmd_val,
-#endif /* PAE */
.activate_mm = xen_activate_mm,
.dup_mmap = xen_dup_mmap,
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 265601d..df40bf7 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -228,7 +228,7 @@ pmdval_t xen_pmd_val(pmd_t pmd)
{
return pte_mfn_to_pfn(pmd.pmd);
}
-#ifdef CONFIG_X86_PAE
+
void xen_set_pud(pud_t *ptr, pud_t val)
{
struct multicall_space mcs;
@@ -276,12 +276,6 @@ pmd_t xen_make_pmd(pmdval_t pmd)
pmd = pte_pfn_to_mfn(pmd);
return native_make_pmd(pmd);
}
-#else /* !PAE */
-void xen_set_pte(pte_t *ptep, pte_t pte)
-{
- *ptep = pte;
-}
-#endif /* CONFIG_X86_PAE */
/*
(Yet another) pagetable walker. This one is intended for pinning a
@@ -434,8 +428,6 @@ static int pin_page(struct page *page, enum pt_level level)
read-only, and can be pinned. */
void xen_pgd_pin(pgd_t *pgd)
{
- unsigned level;
-
xen_mc_batch();
if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
@@ -445,14 +437,7 @@ void xen_pgd_pin(pgd_t *pgd)
xen_mc_batch();
}
-#ifdef CONFIG_X86_PAE
- level = MMUEXT_PIN_L3_TABLE;
-#else
- level = MMUEXT_PIN_L2_TABLE;
-#endif
-
- xen_do_pin(level, PFN_DOWN(__pa(pgd)));
-
+ xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
xen_mc_issue(0);
}
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index b5e189b..5fe961c 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -37,14 +37,13 @@ void xen_exit_mmap(struct mm_struct *mm);
void xen_pgd_pin(pgd_t *pgd);
//void xen_pgd_unpin(pgd_t *pgd);
-#ifdef CONFIG_X86_PAE
-unsigned long long xen_pte_val(pte_t);
-unsigned long long xen_pmd_val(pmd_t);
-unsigned long long xen_pgd_val(pgd_t);
+pteval_t xen_pte_val(pte_t);
+pmdval_t xen_pmd_val(pmd_t);
+pgdval_t xen_pgd_val(pgd_t);
-pte_t xen_make_pte(unsigned long long);
-pmd_t xen_make_pmd(unsigned long long);
-pgd_t xen_make_pgd(unsigned long long);
+pte_t xen_make_pte(pteval_t);
+pmd_t xen_make_pmd(pmdval_t);
+pgd_t xen_make_pgd(pgdval_t);
void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval);
@@ -53,15 +52,4 @@ void xen_set_pud(pud_t *ptr, pud_t val);
void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
void xen_pmd_clear(pmd_t *pmdp);
-
-#else
-unsigned long xen_pte_val(pte_t);
-unsigned long xen_pmd_val(pmd_t);
-unsigned long xen_pgd_val(pgd_t);
-
-pte_t xen_make_pte(unsigned long);
-pmd_t xen_make_pmd(unsigned long);
-pgd_t xen_make_pgd(unsigned long);
-#endif
-
#endif /* _XEN_MMU_H */
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 3175e97..6ec3b4f 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -30,11 +30,7 @@ ENTRY(hypercall_page)
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen)
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page)
ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
-#ifdef CONFIG_X86_PAE
ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
-#else
- ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no")
-#endif
ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
#endif /*CONFIG_XEN */
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index baf3a4d..e11f240 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -150,13 +150,9 @@ static inline pte_t __pte_ma(pteval_t x)
return (pte_t) { .pte = x };
}
-#ifdef CONFIG_X86_PAE
#define pmd_val_ma(v) ((v).pmd)
#define pud_val_ma(v) ((v).pgd.pgd)
#define __pmd_ma(x) ((pmd_t) { (x) } )
-#else /* !X86_PAE */
-#define pmd_val_ma(v) ((v).pud.pgd.pgd)
-#endif /* CONFIG_X86_PAE */
#define pgd_val_ma(x) ((x).pgd)