A set of PVH-related patches.
The first patch is x86-64 ABI fix for PVH guests. The second is a small update
that removes redundant memset (both on PV and PVH code paths)
The rest is to enable non-privileged 32-bit PVH guests. This requires hypervisor
patches from
http://lists.xenproject.org/archives/html/xen-devel/2015-06/msg04817.html
Boris Ostrovsky (6):
xen/x86/pvh: Save %rbx in xen_pvh_early_cpu_init()
xen/x86: Remove unnecessary memset() call
xen/x86/pvh: Properly set page tables for 32-bit PVH guests
xen/x86/pvh: Set up descriptors for 32-bit PVH guests
xen/x86/pvh: Add 32-bit PVH initialization code
xen/x86/pvh: Allow building 32-bit PVH guests
arch/x86/xen/Kconfig | 2 +-
arch/x86/xen/enlighten.c | 20 ++++++++++++--------
arch/x86/xen/mmu.c | 22 +++++++++++++---------
arch/x86/xen/smp.c | 21 ++++++++++++---------
arch/x86/xen/smp.h | 4 ++++
arch/x86/xen/xen-head.S | 29 +++++++++++++++++++++--------
6 files changed, 63 insertions(+), 35 deletions(-)
--
1.8.1.4
x86-64 ABI requires that functions preserve %rbx. When
xen_pvh_early_cpu_init() is executed on boot cpu it is invoked as a
function and 'cpuid' instruction will clobber %rbx. (This is not a
concern on secondary processors since there xen_pvh_early_cpu_init() is
the entry point and thus does not need to preserve registers.)
Since we cannot access stack on secondary processors (PTE's NX bit will
cause #GP on AMD --- see commit a2ef5dc2c7cb ("x86/xen: Set EFER.NX and
EFER.SCE in PVH guests")) we can't always save %rbx on the stack. We
work around this by creating a new entry point for those processors ---
xen_pvh_early_cpu_init_secondary(). Note that on secondary CPUs we will
load %rbx with garbage, which is OK.
As a side effect of these changes we don't need to save %rsi anymore,
since we can use %rbx instead of %rsi as a temporary register.
(We could store %rbx into another scratch register such as %r8 but since
we will need new entry point for 32-bit PVH anyway we go with this
approach for symmetry)
Signed-off-by: Boris Ostrovsky <[email protected]>
---
arch/x86/xen/smp.c | 3 ++-
arch/x86/xen/smp.h | 4 ++++
arch/x86/xen/xen-head.S | 14 +++++++-------
3 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 8648438..ca7ee1f 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -423,7 +423,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
* bit set. This means before DS/SS is touched, NX in
* EFER must be set. Hence the following assembly glue code.
*/
- ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init;
+ ctxt->user_regs.eip =
+ (unsigned long)xen_pvh_early_cpu_init_secondary;
ctxt->user_regs.rdi = cpu;
ctxt->user_regs.rsi = true; /* entry == true */
}
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
index 963d62a..bf2b43c 100644
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -10,10 +10,14 @@ extern void xen_send_IPI_self(int vector);
#ifdef CONFIG_XEN_PVH
extern void xen_pvh_early_cpu_init(int cpu, bool entry);
+extern void xen_pvh_early_cpu_init_secondary(int cpu, bool entry);
#else
static inline void xen_pvh_early_cpu_init(int cpu, bool entry)
{
}
+static inline void xen_pvh_early_cpu_init_secondary(int cpu, bool entry)
+{
+}
#endif
#endif
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 8afdfcc..b1508a8 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -56,28 +56,28 @@ ENTRY(startup_xen)
* @entry: true if this is a secondary vcpu coming up on this entry
* point, false if this is the boot CPU being initialized for
* the first time (%rsi)
- *
- * Note: This is called as a function on the boot CPU, and is the entry point
- * on the secondary CPU.
*/
ENTRY(xen_pvh_early_cpu_init)
- mov %rsi, %r11
+ mov %rbx, -8(%rsp)
+/* Entry point for secondary CPUs */
+ENTRY(xen_pvh_early_cpu_init_secondary)
/* Gather features to see if NX implemented. */
mov $0x80000001, %eax
cpuid
- mov %edx, %esi
+ mov %edx, %ebx
mov $MSR_EFER, %ecx
rdmsr
bts $_EFER_SCE, %eax
- bt $20, %esi
+ bt $20, %ebx
jnc 1f /* No NX, skip setting it */
bts $_EFER_NX, %eax
1: wrmsr
+ mov -8(%rsp), %rbx
#ifdef CONFIG_SMP
- cmp $0, %r11b
+ cmp $0, %esi
jne cpu_bringup_and_idle
#endif
ret
--
1.8.1.4
Since ctxt is kzalloc'd there is no need to call a memset for
ctxt->fpu_ctxt.
Signed-off-by: Boris Ostrovsky <[email protected]>
---
arch/x86/xen/smp.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index ca7ee1f..7cf0765 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -377,7 +377,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->user_regs.fs = __KERNEL_PERCPU;
ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
#endif
- memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
--
1.8.1.4
32-bit PVH guests don't want to write-protect/pin page tables.
Signed-off-by: Boris Ostrovsky <[email protected]>
---
arch/x86/xen/mmu.c | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index dd151b2..b473df8 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1762,8 +1762,9 @@ void __init xen_setup_machphys_mapping(void)
machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
}
#ifdef CONFIG_X86_32
- WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
- < machine_to_phys_mapping);
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
+ WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
+ < machine_to_phys_mapping);
#endif
}
@@ -1958,15 +1959,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
initial_page_table[KERNEL_PGD_BOUNDARY] =
__pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
- set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
- set_page_prot(initial_page_table, PAGE_KERNEL_RO);
- set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
+ set_page_prot(initial_page_table, PAGE_KERNEL_RO);
+ set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
- pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
- pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
- PFN_DOWN(__pa(initial_page_table)));
- xen_write_cr3(__pa(initial_page_table));
+ pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
+ PFN_DOWN(__pa(initial_page_table)));
+ xen_write_cr3(__pa(initial_page_table));
+ } else
+ native_write_cr3(__pa(initial_page_table));
memblock_reserve(__pa(xen_start_info->pt_base),
xen_start_info->nr_pt_frames * PAGE_SIZE);
--
1.8.1.4
Signed-off-by: Boris Ostrovsky <[email protected]>
---
arch/x86/xen/enlighten.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0b95c9b..7953e68 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1362,12 +1362,12 @@ static void __init xen_boot_params_init_edd(void)
static void __ref xen_setup_gdt(int cpu)
{
if (xen_feature(XENFEAT_auto_translated_physmap)) {
-#ifdef CONFIG_X86_64
unsigned long dummy;
- load_percpu_segment(cpu); /* We need to access per-cpu area */
+ setup_stack_canary_segment(cpu);
switch_to_new_gdt(cpu); /* GDT and GS set */
+#ifdef CONFIG_X86_64
/* We are switching of the Xen provided GDT to our HVM mode
* GDT. The new GDT has __KERNEL_CS with CS.L = 1
* and we are jumping to reload it.
@@ -1392,8 +1392,16 @@ static void __ref xen_setup_gdt(int cpu)
loadsegment(ds, 0);
loadsegment(fs, 0);
#else
- /* PVH: TODO Implement. */
- BUG();
+ asm volatile ("ljmp %1,$1f\n"
+ "1:\n"
+ "movl %2,%0\n"
+ "movl %0,%%ss\n"
+ "movl %3,%0\n"
+ "movl %0,%%ds\n"
+ "movl %0,%%es\n"
+ : "=&r" (dummy)
+ : "i" (__KERNEL_CS), "i" (__KERNEL_DS),
+ "i" (__USER_DS));
#endif
return; /* PVH does not need any PV GDT ops. */
}
--
1.8.1.4
Signed-off-by: Boris Ostrovsky <[email protected]>
---
arch/x86/xen/enlighten.c | 4 ----
arch/x86/xen/smp.c | 17 ++++++++++-------
arch/x86/xen/xen-head.S | 17 +++++++++++++++--
3 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 7953e68..807337e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1464,10 +1464,6 @@ static void __init xen_pvh_early_guest_init(void)
xen_pvh_early_cpu_init(0, false);
xen_pvh_set_cr_flags(0);
-
-#ifdef CONFIG_X86_32
- BUG(); /* PVH: Implement proper support. */
-#endif
}
#endif /* CONFIG_XEN_PVH */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 7cf0765..99be53b 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -372,11 +372,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
gdt = get_cpu_gdt_table(cpu);
-#ifdef CONFIG_X86_32
- /* Note: PVH is not yet supported on x86_32. */
- ctxt->user_regs.fs = __KERNEL_PERCPU;
- ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
-#endif
+ ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
+ ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
@@ -403,6 +400,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->kernel_sp = idle->thread.sp0;
#ifdef CONFIG_X86_32
+ ctxt->user_regs.fs = __KERNEL_PERCPU;
+ ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
ctxt->event_callback_cs = __KERNEL_CS;
ctxt->failsafe_callback_cs = __KERNEL_CS;
#else
@@ -424,12 +423,16 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
*/
ctxt->user_regs.eip =
(unsigned long)xen_pvh_early_cpu_init_secondary;
+#ifdef CONFIG_X86_64
ctxt->user_regs.rdi = cpu;
ctxt->user_regs.rsi = true; /* entry == true */
+#else
+ *((uint32_t *)ctxt->user_regs.esp + 1) = cpu;
+ *((uint32_t *)ctxt->user_regs.esp + 2) = true;
+#endif
}
#endif
- ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
- ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
+
if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
BUG();
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index b1508a8..12c4e2a 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -51,14 +51,18 @@ ENTRY(startup_xen)
#ifdef CONFIG_XEN_PVH
/*
- * xen_pvh_early_cpu_init() - early PVH VCPU initialization
+ * xen_pvh_early_cpu_init(cpu, entry) - early PVH VCPU initialization
* @cpu: this cpu number (%rdi)
* @entry: true if this is a secondary vcpu coming up on this entry
* point, false if this is the boot CPU being initialized for
* the first time (%rsi)
*/
ENTRY(xen_pvh_early_cpu_init)
+#ifdef CONFIG_X86_64
mov %rbx, -8(%rsp)
+#else
+ mov %ebx, -4(%esp)
+#endif
/* Entry point for secondary CPUs */
ENTRY(xen_pvh_early_cpu_init_secondary)
@@ -69,15 +73,24 @@ ENTRY(xen_pvh_early_cpu_init_secondary)
mov $MSR_EFER, %ecx
rdmsr
+#ifdef CONFIG_X86_64
bts $_EFER_SCE, %eax
+#endif
bt $20, %ebx
jnc 1f /* No NX, skip setting it */
bts $_EFER_NX, %eax
1: wrmsr
+
+#ifdef CONFIG_X86_64
mov -8(%rsp), %rbx
-#ifdef CONFIG_SMP
cmp $0, %esi
+#else
+ mov -4(%esp), %ebx
+ cmp $0, 8(%esp) /* second argument */
+#endif
+
+#ifdef CONFIG_SMP
jne cpu_bringup_and_idle
#endif
ret
--
1.8.1.4
Signed-off-by: Boris Ostrovsky <[email protected]>
---
arch/x86/xen/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index e88fda8..891031e 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -48,5 +48,5 @@ config XEN_DEBUG_FS
config XEN_PVH
bool "Support for running as a PVH guest"
- depends on X86_64 && XEN && XEN_PVHVM
+ depends on XEN && XEN_PVHVM
def_bool n
--
1.8.1.4
On Mon, Jul 06, 2015 at 11:34:21PM -0400, Boris Ostrovsky wrote:
> Since ctxt is kzalloc'd there is no need to call a memset for
> ctxt->fpu_ctxt.
>
> Signed-off-by: Boris Ostrovsky <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
> ---
> arch/x86/xen/smp.c | 1 -
> 1 file changed, 1 deletion(-)
>
> diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
> index ca7ee1f..7cf0765 100644
> --- a/arch/x86/xen/smp.c
> +++ b/arch/x86/xen/smp.c
> @@ -377,7 +377,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
> ctxt->user_regs.fs = __KERNEL_PERCPU;
> ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
> #endif
> - memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
>
> if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
> --
> 1.8.1.4
>
On Mon, Jul 06, 2015 at 11:34:22PM -0400, Boris Ostrovsky wrote:
> 32-bit PVH guests don't want to write-protect/pin page tables.
>
> Signed-off-by: Boris Ostrovsky <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
> ---
> arch/x86/xen/mmu.c | 22 +++++++++++++---------
> 1 file changed, 13 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> index dd151b2..b473df8 100644
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -1762,8 +1762,9 @@ void __init xen_setup_machphys_mapping(void)
> machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
> }
> #ifdef CONFIG_X86_32
> - WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
> - < machine_to_phys_mapping);
> + if (!xen_feature(XENFEAT_auto_translated_physmap))
> + WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
> + < machine_to_phys_mapping);
> #endif
> }
>
> @@ -1958,15 +1959,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
> initial_page_table[KERNEL_PGD_BOUNDARY] =
> __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
>
> - set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
> - set_page_prot(initial_page_table, PAGE_KERNEL_RO);
> - set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
> + if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> + set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
> + set_page_prot(initial_page_table, PAGE_KERNEL_RO);
> + set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
>
> - pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
> + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
>
> - pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
> - PFN_DOWN(__pa(initial_page_table)));
> - xen_write_cr3(__pa(initial_page_table));
> + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
> + PFN_DOWN(__pa(initial_page_table)));
> + xen_write_cr3(__pa(initial_page_table));
> + } else
> + native_write_cr3(__pa(initial_page_table));
>
> memblock_reserve(__pa(xen_start_info->pt_base),
> xen_start_info->nr_pt_frames * PAGE_SIZE);
> --
> 1.8.1.4
>
On Mon, Jul 06, 2015 at 11:34:23PM -0400, Boris Ostrovsky wrote:
> Signed-off-by: Boris Ostrovsky <[email protected]>
> ---
> arch/x86/xen/enlighten.c | 16 ++++++++++++----
> 1 file changed, 12 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index 0b95c9b..7953e68 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -1362,12 +1362,12 @@ static void __init xen_boot_params_init_edd(void)
> static void __ref xen_setup_gdt(int cpu)
> {
> if (xen_feature(XENFEAT_auto_translated_physmap)) {
> -#ifdef CONFIG_X86_64
> unsigned long dummy;
>
> - load_percpu_segment(cpu); /* We need to access per-cpu area */
> + setup_stack_canary_segment(cpu);
> switch_to_new_gdt(cpu); /* GDT and GS set */
>
> +#ifdef CONFIG_X86_64
> /* We are switching of the Xen provided GDT to our HVM mode
> * GDT. The new GDT has __KERNEL_CS with CS.L = 1
> * and we are jumping to reload it.
> @@ -1392,8 +1392,16 @@ static void __ref xen_setup_gdt(int cpu)
> loadsegment(ds, 0);
> loadsegment(fs, 0);
> #else
> - /* PVH: TODO Implement. */
> - BUG();
> + asm volatile ("ljmp %1,$1f\n"
> + "1:\n"
> + "movl %2,%0\n"
> + "movl %0,%%ss\n"
> + "movl %3,%0\n"
> + "movl %0,%%ds\n"
> + "movl %0,%%es\n"
> + : "=&r" (dummy)
> + : "i" (__KERNEL_CS), "i" (__KERNEL_DS),
> + "i" (__USER_DS));
> #endif
Which comes out to:
ljmp $96,$1f #[KERNEL_CS]
1:
movl $104,%eax #, dummy #[KERNEL_DS]
movl %eax,%ss # dummy
movl $123,%eax #, dummy #[USER_DS]
movl %eax,%ds # dummy
movl %eax,%es # dummy
And that looks right
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
> return; /* PVH does not need any PV GDT ops. */
> }
> --
> 1.8.1.4
>
On Mon, Jul 06, 2015 at 11:34:24PM -0400, Boris Ostrovsky wrote:
> Signed-off-by: Boris Ostrovsky <[email protected]>
A little bit more here I think - as in that the parameter
passing is not an Xen specific thing but something this
code cooked up?
> ---
> arch/x86/xen/enlighten.c | 4 ----
> arch/x86/xen/smp.c | 17 ++++++++++-------
> arch/x86/xen/xen-head.S | 17 +++++++++++++++--
> 3 files changed, 25 insertions(+), 13 deletions(-)
>
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index 7953e68..807337e 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -1464,10 +1464,6 @@ static void __init xen_pvh_early_guest_init(void)
>
> xen_pvh_early_cpu_init(0, false);
> xen_pvh_set_cr_flags(0);
> -
> -#ifdef CONFIG_X86_32
> - BUG(); /* PVH: Implement proper support. */
> -#endif
> }
> #endif /* CONFIG_XEN_PVH */
>
> diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
> index 7cf0765..99be53b 100644
> --- a/arch/x86/xen/smp.c
> +++ b/arch/x86/xen/smp.c
> @@ -372,11 +372,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
>
> gdt = get_cpu_gdt_table(cpu);
>
> -#ifdef CONFIG_X86_32
> - /* Note: PVH is not yet supported on x86_32. */
> - ctxt->user_regs.fs = __KERNEL_PERCPU;
> - ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
> -#endif
> + ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
> + ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
>
> if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
> @@ -403,6 +400,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
> ctxt->kernel_sp = idle->thread.sp0;
>
> #ifdef CONFIG_X86_32
> + ctxt->user_regs.fs = __KERNEL_PERCPU;
> + ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
> ctxt->event_callback_cs = __KERNEL_CS;
> ctxt->failsafe_callback_cs = __KERNEL_CS;
> #else
> @@ -424,12 +423,16 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
> */
> ctxt->user_regs.eip =
> (unsigned long)xen_pvh_early_cpu_init_secondary;
> +#ifdef CONFIG_X86_64
> ctxt->user_regs.rdi = cpu;
> ctxt->user_regs.rsi = true; /* entry == true */
> +#else
> + *((uint32_t *)ctxt->user_regs.esp + 1) = cpu;
> + *((uint32_t *)ctxt->user_regs.esp + 2) = true;
> +#endif
> }
> #endif
> - ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
> - ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
> +
> if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
> BUG();
>
> diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
> index b1508a8..12c4e2a 100644
> --- a/arch/x86/xen/xen-head.S
> +++ b/arch/x86/xen/xen-head.S
> @@ -51,14 +51,18 @@ ENTRY(startup_xen)
>
> #ifdef CONFIG_XEN_PVH
> /*
> - * xen_pvh_early_cpu_init() - early PVH VCPU initialization
> + * xen_pvh_early_cpu_init(cpu, entry) - early PVH VCPU initialization
> * @cpu: this cpu number (%rdi)
> * @entry: true if this is a secondary vcpu coming up on this entry
> * point, false if this is the boot CPU being initialized for
> * the first time (%rsi)
Should this be updated to reflect how it is to be done on 32-bit (as in
not using %rdi and %rsi?)
> */
> ENTRY(xen_pvh_early_cpu_init)
> +#ifdef CONFIG_X86_64
> mov %rbx, -8(%rsp)
> +#else
> + mov %ebx, -4(%esp)
> +#endif
>
> /* Entry point for secondary CPUs */
> ENTRY(xen_pvh_early_cpu_init_secondary)
> @@ -69,15 +73,24 @@ ENTRY(xen_pvh_early_cpu_init_secondary)
>
> mov $MSR_EFER, %ecx
> rdmsr
> +#ifdef CONFIG_X86_64
> bts $_EFER_SCE, %eax
> +#endif
>
> bt $20, %ebx
> jnc 1f /* No NX, skip setting it */
> bts $_EFER_NX, %eax
> 1: wrmsr
> +
> +#ifdef CONFIG_X86_64
> mov -8(%rsp), %rbx
> -#ifdef CONFIG_SMP
> cmp $0, %esi
> +#else
> + mov -4(%esp), %ebx
> + cmp $0, 8(%esp) /* second argument */
> +#endif
> +
> +#ifdef CONFIG_SMP
> jne cpu_bringup_and_idle
> #endif
> ret
> --
> 1.8.1.4
>
On Mon, Jul 06, 2015 at 11:34:25PM -0400, Boris Ostrovsky wrote:
> Signed-off-by: Boris Ostrovsky <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
> ---
> arch/x86/xen/Kconfig | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
> index e88fda8..891031e 100644
> --- a/arch/x86/xen/Kconfig
> +++ b/arch/x86/xen/Kconfig
> @@ -48,5 +48,5 @@ config XEN_DEBUG_FS
>
> config XEN_PVH
> bool "Support for running as a PVH guest"
> - depends on X86_64 && XEN && XEN_PVHVM
> + depends on XEN && XEN_PVHVM
> def_bool n
> --
> 1.8.1.4
>
On Mon, Jul 06, 2015 at 11:34:20PM -0400, Boris Ostrovsky wrote:
> x86-64 ABI requires that functions preserve %rbx. When
> xen_pvh_early_cpu_init() is executed on boot cpu it is invoked as a
> function and 'cpuid' instruction will clobber %rbx. (This is not a
> concern on secondary processors since there xen_pvh_early_cpu_init() is
> the entry point and thus does not need to preserve registers.)
>
> Since we cannot access stack on secondary processors (PTE's NX bit will
> cause #GP on AMD --- see commit a2ef5dc2c7cb ("x86/xen: Set EFER.NX and
> EFER.SCE in PVH guests")) we can't always save %rbx on the stack. We
> work around this by creating a new entry point for those processors ---
> xen_pvh_early_cpu_init_secondary(). Note that on secondary CPUs we will
> load %rbx with garbage, which is OK.
.. as we overwrite it with %edx.
>
> As a side effect of these changes we don't need to save %rsi anymore,
> since we can use %rbx instead of %rsi as a temporary register.
>
> (We could store %rbx into another scratch register such as %r8 but since
> we will need new entry point for 32-bit PVH anyway we go with this
> approach for symmetry)
>
> Signed-off-by: Boris Ostrovsky <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
> ---
> arch/x86/xen/smp.c | 3 ++-
> arch/x86/xen/smp.h | 4 ++++
> arch/x86/xen/xen-head.S | 14 +++++++-------
> 3 files changed, 13 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
> index 8648438..ca7ee1f 100644
> --- a/arch/x86/xen/smp.c
> +++ b/arch/x86/xen/smp.c
> @@ -423,7 +423,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
> * bit set. This means before DS/SS is touched, NX in
> * EFER must be set. Hence the following assembly glue code.
> */
> - ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init;
> + ctxt->user_regs.eip =
> + (unsigned long)xen_pvh_early_cpu_init_secondary;
> ctxt->user_regs.rdi = cpu;
> ctxt->user_regs.rsi = true; /* entry == true */
> }
> diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
> index 963d62a..bf2b43c 100644
> --- a/arch/x86/xen/smp.h
> +++ b/arch/x86/xen/smp.h
> @@ -10,10 +10,14 @@ extern void xen_send_IPI_self(int vector);
>
> #ifdef CONFIG_XEN_PVH
> extern void xen_pvh_early_cpu_init(int cpu, bool entry);
> +extern void xen_pvh_early_cpu_init_secondary(int cpu, bool entry);
> #else
> static inline void xen_pvh_early_cpu_init(int cpu, bool entry)
> {
> }
> +static inline void xen_pvh_early_cpu_init_secondary(int cpu, bool entry)
> +{
> +}
> #endif
>
> #endif
> diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
> index 8afdfcc..b1508a8 100644
> --- a/arch/x86/xen/xen-head.S
> +++ b/arch/x86/xen/xen-head.S
> @@ -56,28 +56,28 @@ ENTRY(startup_xen)
> * @entry: true if this is a secondary vcpu coming up on this entry
> * point, false if this is the boot CPU being initialized for
> * the first time (%rsi)
> - *
> - * Note: This is called as a function on the boot CPU, and is the entry point
> - * on the secondary CPU.
> */
> ENTRY(xen_pvh_early_cpu_init)
> - mov %rsi, %r11
> + mov %rbx, -8(%rsp)
>
> +/* Entry point for secondary CPUs */
> +ENTRY(xen_pvh_early_cpu_init_secondary)
> /* Gather features to see if NX implemented. */
> mov $0x80000001, %eax
> cpuid
> - mov %edx, %esi
> + mov %edx, %ebx
>
> mov $MSR_EFER, %ecx
> rdmsr
> bts $_EFER_SCE, %eax
>
> - bt $20, %esi
> + bt $20, %ebx
> jnc 1f /* No NX, skip setting it */
> bts $_EFER_NX, %eax
> 1: wrmsr
> + mov -8(%rsp), %rbx
> #ifdef CONFIG_SMP
> - cmp $0, %r11b
> + cmp $0, %esi
> jne cpu_bringup_and_idle
> #endif
> ret
> --
> 1.8.1.4
>
> > diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
> > index 8afdfcc..b1508a8 100644
> > --- a/arch/x86/xen/xen-head.S
> > +++ b/arch/x86/xen/xen-head.S
> > @@ -56,28 +56,28 @@ ENTRY(startup_xen)
> > * @entry: true if this is a secondary vcpu coming up on this entry
> > * point, false if this is the boot CPU being initialized for
> > * the first time (%rsi)
> > - *
> > - * Note: This is called as a function on the boot CPU, and is the entry point
> > - * on the secondary CPU.
> > */
> > ENTRY(xen_pvh_early_cpu_init)
> > - mov %rsi, %r11
> > + mov %rbx, -8(%rsp)
> >
> > +/* Entry point for secondary CPUs */
Actually, could you do this (since the BSP can access the stack and
it can do jumps) - not compile tested:
This way you don't even need the second argument - as there
are two functions now - and no need to figure out whether
one is for BSP and the other for other CPUs.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 7953e68..c8e0655 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1462,7 +1462,7 @@ static void __init xen_pvh_early_guest_init(void)
xen_have_vector_callback = 1;
- xen_pvh_early_cpu_init(0, false);
+ xen_pvh_early_cpu_init(0);
xen_pvh_set_cr_flags(0);
#ifdef CONFIG_X86_32
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index ca7ee1f..e53be3b 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -426,7 +426,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->user_regs.eip =
(unsigned long)xen_pvh_early_cpu_init_secondary;
ctxt->user_regs.rdi = cpu;
- ctxt->user_regs.rsi = true; /* entry == true */
}
#endif
ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
index bf2b43c..bec860a 100644
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -9,10 +9,10 @@ extern void xen_send_IPI_all(int vector);
extern void xen_send_IPI_self(int vector);
#ifdef CONFIG_XEN_PVH
-extern void xen_pvh_early_cpu_init(int cpu, bool entry);
+extern void xen_pvh_early_cpu_init(int cpu);
extern void xen_pvh_early_cpu_init_secondary(int cpu, bool entry);
#else
-static inline void xen_pvh_early_cpu_init(int cpu, bool entry)
+static inline void xen_pvh_early_cpu_init(int cpu)
{
}
static inline void xen_pvh_early_cpu_init_secondary(int cpu, bool entry)
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index b1508a8..1c20669 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -53,15 +53,17 @@ ENTRY(startup_xen)
/*
* xen_pvh_early_cpu_init() - early PVH VCPU initialization
* @cpu: this cpu number (%rdi)
- * @entry: true if this is a secondary vcpu coming up on this entry
- * point, false if this is the boot CPU being initialized for
- * the first time (%rsi)
*/
ENTRY(xen_pvh_early_cpu_init)
mov %rbx, -8(%rsp)
-
-/* Entry point for secondary CPUs */
+ xor %esi, %esi
+ jmp 1f
+/* Entry point for secondary CPUs. Can't touch stack (no jumps!) until NX
+ * is dealt with.
+ */
ENTRY(xen_pvh_early_cpu_init_secondary)
+ mov $1, %esi
+1:
/* Gather features to see if NX implemented. */
mov $0x80000001, %eax
cpuid
@@ -72,9 +74,9 @@ ENTRY(xen_pvh_early_cpu_init_secondary)
bts $_EFER_SCE, %eax
bt $20, %ebx
- jnc 1f /* No NX, skip setting it */
+ jnc 2f /* No NX, skip setting it */
bts $_EFER_NX, %eax
-1: wrmsr
+2: wrmsr
mov -8(%rsp), %rbx
#ifdef CONFIG_SMP
cmp $0, %esi
On Mon, Jul 06, 2015 at 11:34:24PM -0400, Boris Ostrovsky wrote:
> Signed-off-by: Boris Ostrovsky <[email protected]>
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
..thought we could just skip the whole 'entry' parameter check.
> ---
> arch/x86/xen/enlighten.c | 4 ----
> arch/x86/xen/smp.c | 17 ++++++++++-------
> arch/x86/xen/xen-head.S | 17 +++++++++++++++--
> 3 files changed, 25 insertions(+), 13 deletions(-)
>
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index 7953e68..807337e 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -1464,10 +1464,6 @@ static void __init xen_pvh_early_guest_init(void)
>
> xen_pvh_early_cpu_init(0, false);
> xen_pvh_set_cr_flags(0);
> -
> -#ifdef CONFIG_X86_32
> - BUG(); /* PVH: Implement proper support. */
> -#endif
> }
> #endif /* CONFIG_XEN_PVH */
>
> diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
> index 7cf0765..99be53b 100644
> --- a/arch/x86/xen/smp.c
> +++ b/arch/x86/xen/smp.c
> @@ -372,11 +372,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
>
> gdt = get_cpu_gdt_table(cpu);
>
> -#ifdef CONFIG_X86_32
> - /* Note: PVH is not yet supported on x86_32. */
> - ctxt->user_regs.fs = __KERNEL_PERCPU;
> - ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
> -#endif
> + ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
> + ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
>
> if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
> @@ -403,6 +400,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
> ctxt->kernel_sp = idle->thread.sp0;
>
> #ifdef CONFIG_X86_32
> + ctxt->user_regs.fs = __KERNEL_PERCPU;
> + ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
> ctxt->event_callback_cs = __KERNEL_CS;
> ctxt->failsafe_callback_cs = __KERNEL_CS;
> #else
> @@ -424,12 +423,16 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
> */
> ctxt->user_regs.eip =
> (unsigned long)xen_pvh_early_cpu_init_secondary;
> +#ifdef CONFIG_X86_64
> ctxt->user_regs.rdi = cpu;
> ctxt->user_regs.rsi = true; /* entry == true */
> +#else
> + *((uint32_t *)ctxt->user_regs.esp + 1) = cpu;
> + *((uint32_t *)ctxt->user_regs.esp + 2) = true;
> +#endif
> }
> #endif
> - ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
> - ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
> +
> if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
> BUG();
>
> diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
> index b1508a8..12c4e2a 100644
> --- a/arch/x86/xen/xen-head.S
> +++ b/arch/x86/xen/xen-head.S
> @@ -51,14 +51,18 @@ ENTRY(startup_xen)
>
> #ifdef CONFIG_XEN_PVH
> /*
> - * xen_pvh_early_cpu_init() - early PVH VCPU initialization
> + * xen_pvh_early_cpu_init(cpu, entry) - early PVH VCPU initialization
> * @cpu: this cpu number (%rdi)
> * @entry: true if this is a secondary vcpu coming up on this entry
> * point, false if this is the boot CPU being initialized for
> * the first time (%rsi)
> */
> ENTRY(xen_pvh_early_cpu_init)
> +#ifdef CONFIG_X86_64
> mov %rbx, -8(%rsp)
> +#else
> + mov %ebx, -4(%esp)
> +#endif
>
> /* Entry point for secondary CPUs */
> ENTRY(xen_pvh_early_cpu_init_secondary)
> @@ -69,15 +73,24 @@ ENTRY(xen_pvh_early_cpu_init_secondary)
>
> mov $MSR_EFER, %ecx
> rdmsr
> +#ifdef CONFIG_X86_64
> bts $_EFER_SCE, %eax
> +#endif
>
> bt $20, %ebx
> jnc 1f /* No NX, skip setting it */
> bts $_EFER_NX, %eax
> 1: wrmsr
> +
> +#ifdef CONFIG_X86_64
> mov -8(%rsp), %rbx
> -#ifdef CONFIG_SMP
> cmp $0, %esi
> +#else
> + mov -4(%esp), %ebx
> + cmp $0, 8(%esp) /* second argument */
> +#endif
> +
> +#ifdef CONFIG_SMP
> jne cpu_bringup_and_idle
> #endif
> ret
> --
> 1.8.1.4
>
On 07/07/2015 03:54 PM, Konrad Rzeszutek Wilk wrote:
> On Mon, Jul 06, 2015 at 11:34:23PM -0400, Boris Ostrovsky wrote:
>> Signed-off-by: Boris Ostrovsky <[email protected]>
>> ---
>> arch/x86/xen/enlighten.c | 16 ++++++++++++----
>> 1 file changed, 12 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
>> index 0b95c9b..7953e68 100644
>> --- a/arch/x86/xen/enlighten.c
>> +++ b/arch/x86/xen/enlighten.c
>> @@ -1362,12 +1362,12 @@ static void __init xen_boot_params_init_edd(void)
>> static void __ref xen_setup_gdt(int cpu)
>> {
>> if (xen_feature(XENFEAT_auto_translated_physmap)) {
>> -#ifdef CONFIG_X86_64
>> unsigned long dummy;
>>
>> - load_percpu_segment(cpu); /* We need to access per-cpu area */
>> + setup_stack_canary_segment(cpu);
>> switch_to_new_gdt(cpu); /* GDT and GS set */
>>
>> +#ifdef CONFIG_X86_64
>> /* We are switching of the Xen provided GDT to our HVM mode
>> * GDT. The new GDT has __KERNEL_CS with CS.L = 1
>> * and we are jumping to reload it.
>> @@ -1392,8 +1392,16 @@ static void __ref xen_setup_gdt(int cpu)
>> loadsegment(ds, 0);
>> loadsegment(fs, 0);
>> #else
>> - /* PVH: TODO Implement. */
>> - BUG();
>> + asm volatile ("ljmp %1,$1f\n"
>> + "1:\n"
>> + "movl %2,%0\n"
>> + "movl %0,%%ss\n"
>> + "movl %3,%0\n"
>> + "movl %0,%%ds\n"
>> + "movl %0,%%es\n"
>> + : "=&r" (dummy)
>> + : "i" (__KERNEL_CS), "i" (__KERNEL_DS),
>> + "i" (__USER_DS));
>> #endif
> Which comes out to:
> ljmp $96,$1f #[KERNEL_CS]
> 1:
> movl $104,%eax #, dummy #[KERNEL_DS]
> movl %eax,%ss # dummy
> movl $123,%eax #, dummy #[USER_DS]
> movl %eax,%ds # dummy
> movl %eax,%es # dummy
>
> And that looks right
Actually, now that look at this again I think I should just use
loadsegment(), just like 64-bit code above.
-boris
>
> Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
>
>> return; /* PVH does not need any PV GDT ops. */
>> }
>> --
>> 1.8.1.4
>>
On 07/07/2015 04:16 PM, Konrad Rzeszutek Wilk wrote:
> On Mon, Jul 06, 2015 at 11:34:20PM -0400, Boris Ostrovsky wrote:
>> x86-64 ABI requires that functions preserve %rbx. When
>> xen_pvh_early_cpu_init() is executed on boot cpu it is invoked as a
>> function and 'cpuid' instruction will clobber %rbx. (This is not a
>> concern on secondary processors since there xen_pvh_early_cpu_init() is
>> the entry point and thus does not need to preserve registers.)
>>
>> Since we cannot access stack on secondary processors (PTE's NX bit will
>> cause #GP on AMD --- see commit a2ef5dc2c7cb ("x86/xen: Set EFER.NX and
>> EFER.SCE in PVH guests")) we can't always save %rbx on the stack. We
>> work around this by creating a new entry point for those processors ---
>> xen_pvh_early_cpu_init_secondary(). Note that on secondary CPUs we will
>> load %rbx with garbage, which is OK.
> .. as we overwrite it with %edx.
I meant here that we do 'mov -8(%rsp), %rbx' before returning and on
secondary CPUs we will grab garbage from the stack. But it's OK because
we are in the entry point and cpu_bringup_and_idle (which is where we
will be jumping to) doesn't care what's in %rbx.
Overwriting with %edx happens earlier.
-boris
>
>> As a side effect of these changes we don't need to save %rsi anymore,
>> since we can use %rbx instead of %rsi as a temporary register.
>>
>> (We could store %rbx into another scratch register such as %r8 but since
>> we will need new entry point for 32-bit PVH anyway we go with this
>> approach for symmetry)
>>
>> Signed-off-by: Boris Ostrovsky <[email protected]>
> Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
>
>
>> ---
>> arch/x86/xen/smp.c | 3 ++-
>> arch/x86/xen/smp.h | 4 ++++
>> arch/x86/xen/xen-head.S | 14 +++++++-------
>> 3 files changed, 13 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
>> index 8648438..ca7ee1f 100644
>> --- a/arch/x86/xen/smp.c
>> +++ b/arch/x86/xen/smp.c
>> @@ -423,7 +423,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
>> * bit set. This means before DS/SS is touched, NX in
>> * EFER must be set. Hence the following assembly glue code.
>> */
>> - ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init;
>> + ctxt->user_regs.eip =
>> + (unsigned long)xen_pvh_early_cpu_init_secondary;
>> ctxt->user_regs.rdi = cpu;
>> ctxt->user_regs.rsi = true; /* entry == true */
>> }
>> diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
>> index 963d62a..bf2b43c 100644
>> --- a/arch/x86/xen/smp.h
>> +++ b/arch/x86/xen/smp.h
>> @@ -10,10 +10,14 @@ extern void xen_send_IPI_self(int vector);
>>
>> #ifdef CONFIG_XEN_PVH
>> extern void xen_pvh_early_cpu_init(int cpu, bool entry);
>> +extern void xen_pvh_early_cpu_init_secondary(int cpu, bool entry);
>> #else
>> static inline void xen_pvh_early_cpu_init(int cpu, bool entry)
>> {
>> }
>> +static inline void xen_pvh_early_cpu_init_secondary(int cpu, bool entry)
>> +{
>> +}
>> #endif
>>
>> #endif
>> diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
>> index 8afdfcc..b1508a8 100644
>> --- a/arch/x86/xen/xen-head.S
>> +++ b/arch/x86/xen/xen-head.S
>> @@ -56,28 +56,28 @@ ENTRY(startup_xen)
>> * @entry: true if this is a secondary vcpu coming up on this entry
>> * point, false if this is the boot CPU being initialized for
>> * the first time (%rsi)
>> - *
>> - * Note: This is called as a function on the boot CPU, and is the entry point
>> - * on the secondary CPU.
>> */
>> ENTRY(xen_pvh_early_cpu_init)
>> - mov %rsi, %r11
>> + mov %rbx, -8(%rsp)
>>
>> +/* Entry point for secondary CPUs */
>> +ENTRY(xen_pvh_early_cpu_init_secondary)
>> /* Gather features to see if NX implemented. */
>> mov $0x80000001, %eax
>> cpuid
>> - mov %edx, %esi
>> + mov %edx, %ebx
>>
>> mov $MSR_EFER, %ecx
>> rdmsr
>> bts $_EFER_SCE, %eax
>>
>> - bt $20, %esi
>> + bt $20, %ebx
>> jnc 1f /* No NX, skip setting it */
>> bts $_EFER_NX, %eax
>> 1: wrmsr
>> + mov -8(%rsp), %rbx
>> #ifdef CONFIG_SMP
>> - cmp $0, %r11b
>> + cmp $0, %esi
>> jne cpu_bringup_and_idle
>> #endif
>> ret
>> --
>> 1.8.1.4
>>
On 07/07/2015 04:26 PM, Konrad Rzeszutek Wilk wrote:
>>> diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
>>> index 8afdfcc..b1508a8 100644
>>> --- a/arch/x86/xen/xen-head.S
>>> +++ b/arch/x86/xen/xen-head.S
>>> @@ -56,28 +56,28 @@ ENTRY(startup_xen)
>>> * @entry: true if this is a secondary vcpu coming up on this entry
>>> * point, false if this is the boot CPU being initialized for
>>> * the first time (%rsi)
>>> - *
>>> - * Note: This is called as a function on the boot CPU, and is the entry point
>>> - * on the secondary CPU.
>>> */
>>> ENTRY(xen_pvh_early_cpu_init)
>>> - mov %rsi, %r11
>>> + mov %rbx, -8(%rsp)
>>>
>>> +/* Entry point for secondary CPUs */
> Actually, could you do this (since the BSP can access the stack and
> it can do jumps) - not compile tested:
Yes, this looks better, I'll try it. Thanks.
-boris
On 07/07/15 04:34, Boris Ostrovsky wrote:
>
> A set of PVH-related patches.
>
> The first patch is x86-64 ABI fix for PVH guests. The second is a small update
> that removes redundant memset (both on PV and PVH code paths)
>
> The rest is to enable non-privileged 32-bit PVH guests. This requires hypervisor
> patches from
> http://lists.xenproject.org/archives/html/xen-devel/2015-06/msg04817.html
The more I look at the PVH entry point into Linux, and the less I like
it -- it's much too PV-like instead of HVM-like.
However, this 32-bit support doesn't make it much worse and until
there's a decision on the longterm future of PVH I'm inclined to accept
it (provided the hypervisor side is accepted) to make it easier to
continue working on PVH support.
David