2012-10-20 01:30:20

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject:

Mukesh provided me with the git of his with some of the changes to his
patches and as well the modifications done per review. I've done a bit
of changes in the git description to flesh them out some more.

Sending them out and I've also stuck the redone patches in my tree:
git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git devel/pvh.v4

Please review. I think the only changes are just in the git commit
descriptions - so please pull our your wordsmith hat and lupe!

arch/x86/include/asm/xen/interface.h | 11 ++-
arch/x86/include/asm/xen/page.h | 3 +
arch/x86/xen/Kconfig | 10 ++
arch/x86/xen/enlighten.c | 77 ++++++++++++----
arch/x86/xen/irq.c | 5 +-
arch/x86/xen/mmu.c | 162 ++++++++++++++++++++++++++++++++--
arch/x86/xen/mmu.h | 2 +
arch/x86/xen/p2m.c | 2 +-
arch/x86/xen/setup.c | 64 +++++++++++---
arch/x86/xen/smp.c | 75 ++++++++++------
arch/x86/xen/xen-head.S | 11 ++-
drivers/xen/balloon.c | 15 ++--
drivers/xen/cpu_hotplug.c | 4 +-
drivers/xen/events.c | 9 ++-
drivers/xen/gntdev.c | 3 +-
drivers/xen/grant-table.c | 26 +++++-
drivers/xen/privcmd.c | 72 ++++++++++++++-
drivers/xen/xenbus/xenbus_client.c | 3 +-
include/xen/interface/memory.h | 24 +++++-
include/xen/interface/physdev.h | 10 ++
include/xen/xen-ops.h | 5 +-
21 files changed, 507 insertions(+), 86 deletions(-)


2012-10-20 01:30:24

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: [PATCH 1/6] xen/pvh: Support ParaVirtualized Hardware extensions.

From: Mukesh Rathor <[email protected]>

PVH allows PV linux guest to utilize hardware extended capabilities, such
as running MMU updates in a HVM container.

This patch allows it to be configured and enabled. Also, basic header file
changes to add new subcalls to physmap hypercall.

Lastly, mfn_to_local_pfn must return mfn for paging mode translate
(since we let the hypervisor - or CPU - do them for us).

Signed-off-by: Mukesh Rathor <[email protected]>
Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
---
arch/x86/include/asm/xen/page.h | 3 +++
arch/x86/xen/Kconfig | 10 ++++++++++
arch/x86/xen/xen-head.S | 11 ++++++++++-
include/xen/interface/memory.h | 24 +++++++++++++++++++++++-
include/xen/interface/physdev.h | 10 ++++++++++
5 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 472b9b7..6af440d 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -159,6 +159,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
{
unsigned long pfn = mfn_to_pfn(mfn);
+
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return mfn;
if (get_phys_to_machine(pfn) != mfn)
return -1; /* force !pfn_valid() */
return pfn;
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index fdce49c..822c5a0 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -50,3 +50,13 @@ config XEN_DEBUG_FS
Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead.

+config XEN_X86_PVH
+ bool "Support for running as a PVH guest (EXPERIMENTAL)"
+ depends on X86_64 && XEN && EXPERIMENTAL
+ default n
+ help
+ This option enables support for running as a PVH guest (PV guest
+ using hardware extensions) under a suitably capable hypervisor.
+ This option is EXPERIMENTAL because the hypervisor interfaces
+ which it uses are not yet considered stable therefore backwards and
+ forwards compatibility is not yet guaranteed. If unsure, say N.
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 7faed58..1a6bca1 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -13,6 +13,15 @@
#include <xen/interface/elfnote.h>
#include <asm/xen/interface.h>

+#ifdef CONFIG_XEN_X86_PVH
+#define FEATURES_PVH "|writable_descriptor_tables" \
+ "|auto_translated_physmap" \
+ "|supervisor_mode_kernel" \
+ "|hvm_callback_vector"
+#else
+#define FEATURES_PVH /* Not supported */
+#endif
+
__INIT
ENTRY(startup_xen)
cld
@@ -95,7 +104,7 @@ NEXT_HYPERCALL(arch_6)
#endif
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
- ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
+ ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb"FEATURES_PVH)
ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index b66d04c..8beebdb 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -169,7 +169,13 @@ struct xen_add_to_physmap {
/* Source mapping space. */
#define XENMAPSPACE_shared_info 0 /* shared info page */
#define XENMAPSPACE_grant_table 1 /* grant table page */
- unsigned int space;
+#define XENMAPSPACE_gmfn 2 /* GMFN */
+#define XENMAPSPACE_gmfn_range 3 /* GMFN range */
+#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another guest */
+ uint16_t space;
+ domid_t foreign_domid; /* IFF XENMAPSPACE_gmfn_foreign */
+
+#define XENMAPIDX_grant_table_status 0x80000000

/* Index into source mapping space. */
xen_ulong_t idx;
@@ -237,4 +243,20 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
* during a driver critical region.
*/
extern spinlock_t xen_reservation_lock;
+
+/*
+ * Unmaps the page appearing at a particular GPFN from the specified guest's
+ * pseudophysical address space.
+ * arg == addr of xen_remove_from_physmap_t.
+ */
+#define XENMEM_remove_from_physmap 15
+struct xen_remove_from_physmap {
+ /* Which domain to change the mapping for. */
+ domid_t domid;
+
+ /* GPFN of the current mapping of the page. */
+ xen_pfn_t gpfn;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
+
#endif /* __XEN_PUBLIC_MEMORY_H__ */
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
index 1844d31..83050d3 100644
--- a/include/xen/interface/physdev.h
+++ b/include/xen/interface/physdev.h
@@ -274,6 +274,16 @@ struct physdev_dbgp_op {
} u;
};

+#define PHYSDEVOP_map_iomem 30
+struct physdev_map_iomem {
+ /* IN */
+ uint64_t first_gfn;
+ uint64_t first_mfn;
+ uint32_t nr_mfns;
+ uint32_t add_mapping; /* 1 == add mapping; 0 == unmap */
+
+};
+
/*
* Notify that some PIRQ-bound event channels have been unmasked.
* ** This command is obsolete since interface version 0x00030202 and is **
--
1.7.7.6

2012-10-20 01:30:23

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: [PATCH 5/6] xen/pvh: balloon and grant changes.

From: Mukesh Rathor <[email protected]>

For balloon changes we skip setting of local P2M as it's updated
in Xen. For grant, the shared grant frame is the pfn and not mfn,
hence its mapped via the same code path as HVM.

Signed-off-by: Mukesh Rathor <[email protected]>
Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
---
drivers/xen/balloon.c | 15 +++++++++------
drivers/xen/gntdev.c | 3 ++-
drivers/xen/grant-table.c | 26 ++++++++++++++++++++++----
3 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 31ab82f..c825b63 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -361,7 +361,9 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
set_phys_to_machine(pfn, frame_list[i]);

/* Link back into the page tables if not highmem. */
- if (xen_pv_domain() && !PageHighMem(page)) {
+ if (xen_pv_domain() && !PageHighMem(page) &&
+ !xen_feature(XENFEAT_auto_translated_physmap)) {
+
int ret;
ret = HYPERVISOR_update_va_mapping(
(unsigned long)__va(pfn << PAGE_SHIFT),
@@ -418,12 +420,13 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
scrub_page(page);

if (xen_pv_domain() && !PageHighMem(page)) {
- ret = HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- __pte_ma(0), 0);
- BUG_ON(ret);
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ ret = HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ __pte_ma(0), 0);
+ BUG_ON(ret);
+ }
}
-
}

/* Ensure that ballooned highmem pages don't have kmaps. */
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 610bfc6..d39d54e 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -803,7 +803,8 @@ static int __init gntdev_init(void)
if (!xen_domain())
return -ENODEV;

- use_ptemod = xen_pv_domain();
+ use_ptemod = xen_pv_domain() &&
+ !xen_feature(XENFEAT_auto_translated_physmap);

err = misc_register(&gntdev_miscdev);
if (err != 0) {
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index b2b0a37..9c0019d 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -1029,14 +1029,19 @@ static void gnttab_unmap_frames_v2(void)
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
struct gnttab_setup_table setup;
- unsigned long *frames;
+ unsigned long *frames, start_gpfn;
unsigned int nr_gframes = end_idx + 1;
int rc;

- if (xen_hvm_domain()) {
+ if (xen_hvm_domain() || xen_feature(XENFEAT_auto_translated_physmap)) {
struct xen_add_to_physmap xatp;
unsigned int i = end_idx;
rc = 0;
+
+ if (xen_hvm_domain())
+ start_gpfn = xen_hvm_resume_frames >> PAGE_SHIFT;
+ else
+ start_gpfn = virt_to_pfn(gnttab_shared.addr);
/*
* Loop backwards, so that the first hypercall has the largest
* index, ensuring that the table will grow only once.
@@ -1045,7 +1050,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
xatp.domid = DOMID_SELF;
xatp.idx = i;
xatp.space = XENMAPSPACE_grant_table;
- xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
+ xatp.gpfn = start_gpfn + i;
rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
if (rc != 0) {
printk(KERN_WARNING
@@ -1108,7 +1113,7 @@ static void gnttab_request_version(void)
int rc;
struct gnttab_set_version gsv;

- if (xen_hvm_domain())
+ if (xen_hvm_domain() || xen_feature(XENFEAT_auto_translated_physmap))
gsv.version = 1;
else
gsv.version = 2;
@@ -1136,12 +1141,25 @@ static void gnttab_request_version(void)
int gnttab_resume(void)
{
unsigned int max_nr_gframes;
+ char *kmsg = "Failed to kmalloc pages for pv in hvm grant frames\n";

gnttab_request_version();
max_nr_gframes = gnttab_max_grant_frames();
if (max_nr_gframes < nr_grant_frames)
return -ENOSYS;

+ /* PVH note: xen will free existing kmalloc'd mfn in
+ * XENMEM_add_to_physmap. TBD/FIXME: use xen ballooning instead of
+ * kmalloc(). */
+ if (xen_pv_domain() && xen_feature(XENFEAT_auto_translated_physmap) &&
+ !gnttab_shared.addr) {
+ gnttab_shared.addr =
+ kmalloc(max_nr_gframes * PAGE_SIZE, GFP_KERNEL);
+ if (!gnttab_shared.addr) {
+ pr_warn("%s", kmsg);
+ return -ENOMEM;
+ }
+ }
if (xen_pv_domain())
return gnttab_map(0, nr_grant_frames - 1);

--
1.7.7.6

2012-10-20 01:30:22

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: [PATCH 2/6] xen/pvh: Extend vcpu_guest_context, p2m, event, and xenbus to support PVH.

From: Mukesh Rathor <[email protected]>

make gdt_frames[]/gdt_ents into a union with {gdtaddr, gdtsz}, as PVH
only needs to send down gdtaddr and gdtsz.

For interrupts, PVH uses native_irq_ops.
vcpu hotplug is currently not available for PVH.

For events we follow what PVHVM does - to use callback vector.
Lastly, also use HVM path to setup XenBus.

Signed-off-by: Mukesh Rathor <[email protected]>
Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
---
arch/x86/include/asm/xen/interface.h | 11 +++++-
arch/x86/xen/irq.c | 5 ++-
arch/x86/xen/p2m.c | 2 +-
arch/x86/xen/smp.c | 75 ++++++++++++++++++++++------------
drivers/xen/cpu_hotplug.c | 4 +-
drivers/xen/events.c | 9 ++++-
drivers/xen/xenbus/xenbus_client.c | 3 +-
7 files changed, 77 insertions(+), 32 deletions(-)

diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index 6d2f75a..4c08f23 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -144,7 +144,16 @@ struct vcpu_guest_context {
struct cpu_user_regs user_regs; /* User-level CPU registers */
struct trap_info trap_ctxt[256]; /* Virtual IDT */
unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
- unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
+ union {
+ struct {
+ /* PV: GDT (machine frames, # ents).*/
+ unsigned long gdt_frames[16], gdt_ents;
+ } pv;
+ struct {
+ /* PVH: GDTR addr and size */
+ unsigned long gdtaddr, gdtsz;
+ } pvh;
+ } u;
unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
/* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 01a4dc0..fcbe56a 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -5,6 +5,7 @@
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
#include <xen/interface/vcpu.h>
+#include <xen/features.h>
#include <xen/events.h>

#include <asm/xen/hypercall.h>
@@ -129,6 +130,8 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {

void __init xen_init_irq_ops(void)
{
- pv_irq_ops = xen_irq_ops;
+ /* For PVH we use default pv_irq_ops settings */
+ if (!xen_feature(XENFEAT_hvm_callback_vector))
+ pv_irq_ops = xen_irq_ops;
x86_init.irqs.intr_init = xen_init_IRQ;
}
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 95fb2aa..ea553c8 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -798,7 +798,7 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
unsigned topidx, mididx, idx;

- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
return true;
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 353c50f..df400349 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -68,9 +68,11 @@ static void __cpuinit cpu_bringup(void)
touch_softlockup_watchdog();
preempt_disable();

- xen_enable_sysenter();
- xen_enable_syscall();
-
+ /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
+ if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
+ xen_enable_sysenter();
+ xen_enable_syscall();
+ }
cpu = smp_processor_id();
smp_store_cpu_info(cpu);
cpu_data(cpu).x86_max_cores = 1;
@@ -230,10 +232,11 @@ static void __init xen_smp_prepare_boot_cpu(void)
BUG_ON(smp_processor_id() != 0);
native_smp_prepare_boot_cpu();

- /* We've switched to the "real" per-cpu gdt, so make sure the
- old memory can be recycled */
- make_lowmem_page_readwrite(xen_initial_gdt);
-
+ if (!xen_feature(XENFEAT_writable_page_tables)) {
+ /* We've switched to the "real" per-cpu gdt, so make sure the
+ * old memory can be recycled */
+ make_lowmem_page_readwrite(xen_initial_gdt);
+ }
xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
}
@@ -300,8 +303,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
gdt = get_cpu_gdt_table(cpu);

ctxt->flags = VGCF_IN_KERNEL;
- ctxt->user_regs.ds = __USER_DS;
- ctxt->user_regs.es = __USER_DS;
ctxt->user_regs.ss = __KERNEL_DS;
#ifdef CONFIG_X86_32
ctxt->user_regs.fs = __KERNEL_PERCPU;
@@ -310,35 +311,57 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
- ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */

memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));

- xen_copy_trap_info(ctxt->trap_ctxt);
+ /* check for autoxlated to get it right for 32bit kernel */
+ if (xen_feature(XENFEAT_auto_translated_physmap) &&
+ xen_feature(XENFEAT_supervisor_mode_kernel)) {

- ctxt->ldt_ents = 0;
+ ctxt->user_regs.ds = __KERNEL_DS;
+ ctxt->user_regs.es = 0;
+ ctxt->user_regs.gs = 0;

- BUG_ON((unsigned long)gdt & ~PAGE_MASK);
+ ctxt->u.pvh.gdtaddr = (unsigned long)gdt;
+ ctxt->u.pvh.gdtsz = (unsigned long)(GDT_SIZE - 1);

- gdt_mfn = arbitrary_virt_to_mfn(gdt);
- make_lowmem_page_readonly(gdt);
- make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
+#ifdef CONFIG_X86_64
+ /* Note: PVH is not supported on x86_32. */
+ ctxt->gs_base_user = (unsigned long)
+ per_cpu(irq_stack_union.gs_base, cpu);
+#endif
+ } else {
+ ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
+ ctxt->user_regs.ds = __USER_DS;
+ ctxt->user_regs.es = __USER_DS;

- ctxt->gdt_frames[0] = gdt_mfn;
- ctxt->gdt_ents = GDT_ENTRIES;
+ xen_copy_trap_info(ctxt->trap_ctxt);

- ctxt->user_regs.cs = __KERNEL_CS;
- ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
+ ctxt->ldt_ents = 0;

- ctxt->kernel_ss = __KERNEL_DS;
- ctxt->kernel_sp = idle->thread.sp0;
+ BUG_ON((unsigned long)gdt & ~PAGE_MASK);
+
+ gdt_mfn = arbitrary_virt_to_mfn(gdt);
+ make_lowmem_page_readonly(gdt);
+ make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
+
+ ctxt->u.pv.gdt_frames[0] = gdt_mfn;
+ ctxt->u.pv.gdt_ents = GDT_ENTRIES;
+
+ ctxt->kernel_ss = __KERNEL_DS;
+ ctxt->kernel_sp = idle->thread.sp0;

#ifdef CONFIG_X86_32
- ctxt->event_callback_cs = __KERNEL_CS;
- ctxt->failsafe_callback_cs = __KERNEL_CS;
+ ctxt->event_callback_cs = __KERNEL_CS;
+ ctxt->failsafe_callback_cs = __KERNEL_CS;
#endif
- ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
- ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
+ ctxt->event_callback_eip =
+ (unsigned long)xen_hypervisor_callback;
+ ctxt->failsafe_callback_eip =
+ (unsigned long)xen_failsafe_callback;
+ }
+ ctxt->user_regs.cs = __KERNEL_CS;
+ ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);

per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
index 4dcfced..de6bcf9 100644
--- a/drivers/xen/cpu_hotplug.c
+++ b/drivers/xen/cpu_hotplug.c
@@ -2,6 +2,7 @@

#include <xen/xen.h>
#include <xen/xenbus.h>
+#include <xen/features.h>

#include <asm/xen/hypervisor.h>
#include <asm/cpu.h>
@@ -100,7 +101,8 @@ static int __init setup_vcpu_hotplug_event(void)
static struct notifier_block xsn_cpu = {
.notifier_call = setup_cpu_watcher };

- if (!xen_pv_domain())
+ /* PVH TBD/FIXME: future work */
+ if (!xen_pv_domain() || xen_feature(XENFEAT_auto_translated_physmap))
return -ENODEV;

register_xenstore_notifier(&xsn_cpu);
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 59e10a1..7131fdd 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -1774,7 +1774,7 @@ int xen_set_callback_via(uint64_t via)
}
EXPORT_SYMBOL_GPL(xen_set_callback_via);

-#ifdef CONFIG_XEN_PVHVM
+#ifdef CONFIG_X86
/* Vector callbacks are better than PCI interrupts to receive event
* channel notifications because we can receive vector callbacks on any
* vcpu and we don't need PCI support or APIC interactions. */
@@ -1835,6 +1835,13 @@ void __init xen_init_IRQ(void)
if (xen_initial_domain())
pci_xen_initial_domain();

+ if (xen_feature(XENFEAT_hvm_callback_vector)) {
+ xen_callback_vector();
+ return;
+ }
+
+ /* PVH: TBD/FIXME: debug and fix eio map to work with pvh */
+
pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map);
rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index bcf3ba4..356461e 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -44,6 +44,7 @@
#include <xen/grant_table.h>
#include <xen/xenbus.h>
#include <xen/xen.h>
+#include <xen/features.h>

#include "xenbus_probe.h"

@@ -741,7 +742,7 @@ static const struct xenbus_ring_ops ring_ops_hvm = {

void __init xenbus_ring_ops_init(void)
{
- if (xen_pv_domain())
+ if (xen_pv_domain() && !xen_feature(XENFEAT_auto_translated_physmap))
ring_ops = &ring_ops_pv;
else
ring_ops = &ring_ops_hvm;
--
1.7.7.6

2012-10-20 01:30:52

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: [PATCH 3/6] xen/pvh: Implements mmu changes for PVH.

From: Mukesh Rathor <[email protected]>

First the set/clear mmio pte function makes a hypercall to update the
P2M in Xen with 1:1 mapping. Since PVH uses mostly native mmu ops, we
leave the generic (native_*) for the rest.

Two local functions are introduced to add to xen physmap for xen remap
interface. Xen unmap interface is introduced so the privcmd pte entries
can be cleared in Xen p2m table.

Signed-off-by: Mukesh Rathor <[email protected]>
Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
---
arch/x86/xen/mmu.c | 162 +++++++++++++++++++++++++++++++++++++++++++++++--
arch/x86/xen/mmu.h | 2 +
drivers/xen/privcmd.c | 5 +-
include/xen/xen-ops.h | 5 +-
4 files changed, 165 insertions(+), 9 deletions(-)

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 6226c99..5747a41 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -74,6 +74,7 @@
#include <xen/interface/version.h>
#include <xen/interface/memory.h>
#include <xen/hvc-console.h>
+#include <xen/balloon.h>

#include "multicalls.h"
#include "mmu.h"
@@ -332,6 +333,20 @@ static void xen_set_pte(pte_t *ptep, pte_t pteval)
__xen_set_pte(ptep, pteval);
}

+void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
+ int nr_mfns, int add_mapping)
+{
+ struct physdev_map_iomem iomem;
+
+ iomem.first_gfn = pfn;
+ iomem.first_mfn = mfn;
+ iomem.nr_mfns = nr_mfns;
+ iomem.add_mapping = add_mapping;
+
+ if (HYPERVISOR_physdev_op(PHYSDEVOP_map_iomem, &iomem))
+ BUG();
+}
+
static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval)
{
@@ -1221,6 +1236,8 @@ static void __init xen_pagetable_init(void)
#endif
paging_init();
xen_setup_shared_info();
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return;
#ifdef CONFIG_X86_64
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
unsigned long new_mfn_list;
@@ -1528,6 +1545,10 @@ static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
{
struct mmuext_op op;
+
+ if (xen_feature(XENFEAT_writable_page_tables))
+ return;
+
op.cmd = cmd;
op.arg1.mfn = pfn_to_mfn(pfn);
if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
@@ -1725,6 +1746,10 @@ static void set_page_prot(void *addr, pgprot_t prot)
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
pte_t pte = pfn_pte(pfn, prot);

+ /* recall for PVH, page tables are native. */
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return;
+
if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
BUG();
}
@@ -1802,6 +1827,9 @@ static void convert_pfn_mfn(void *v)
pte_t *pte = v;
int i;

+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return;
+
/* All levels are converted the same way, so just treat them
as ptes. */
for (i = 0; i < PTRS_PER_PTE; i++)
@@ -1821,6 +1849,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
(*pt_end)--;
}
}
+
/*
* Set up the initial kernel pagetable.
*
@@ -1831,6 +1860,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
* but that's enough to get __va working. We need to fill in the rest
* of the physical mapping once some sort of allocator has been set
* up.
+ * NOTE: for PVH, the page tables are native.
*/
void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
{
@@ -1908,10 +1938,13 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
* structure to attach it to, so make sure we just set kernel
* pgd.
*/
- xen_mc_batch();
- __xen_write_cr3(true, __pa(init_level4_pgt));
- xen_mc_issue(PARAVIRT_LAZY_CPU);
-
+ if (xen_feature(XENFEAT_writable_page_tables)) {
+ native_write_cr3(__pa(init_level4_pgt));
+ } else {
+ xen_mc_batch();
+ __xen_write_cr3(true, __pa(init_level4_pgt));
+ xen_mc_issue(PARAVIRT_LAZY_CPU);
+ }
/* We can't that easily rip out L3 and L2, as the Xen pagetables are
* set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for
* the initial domain. For guests using the toolstack, they are in:
@@ -2178,8 +2211,13 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {

void __init xen_init_mmu_ops(void)
{
- x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
x86_init.paging.pagetable_init = xen_pagetable_init;
+
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
+ return;
+ }
+ x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
pv_mmu_ops = xen_mmu_ops;

memset(dummy_mapping, 0xff, PAGE_SIZE);
@@ -2455,6 +2493,89 @@ void __init xen_hvm_init_mmu_ops(void)
}
#endif

+/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user space
+ * creating new guest on PVH dom0 and needs to map domU pages.
+ */
+static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long fgmfn,
+ unsigned int domid)
+{
+ int rc;
+ struct xen_add_to_physmap xatp = { .foreign_domid = domid };
+
+ xatp.gpfn = lpfn;
+ xatp.idx = fgmfn;
+ xatp.domid = DOMID_SELF;
+ xatp.space = XENMAPSPACE_gmfn_foreign;
+ rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
+ if (rc)
+ pr_warn("d0: Failed to map pfn (0x%lx) to mfn (0x%lx) rc:%d\n",
+ lpfn, fgmfn, rc);
+ return rc;
+}
+
+static int pvh_rem_xen_p2m(unsigned long spfn, int count)
+{
+ struct xen_remove_from_physmap xrp;
+ int i, rc;
+
+ for (i = 0; i < count; i++) {
+ xrp.domid = DOMID_SELF;
+ xrp.gpfn = spfn+i;
+ rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
+ if (rc) {
+ pr_warn("Failed to unmap pfn:%lx rc:%d done:%d\n",
+ spfn+i, rc, i);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+struct pvh_remap_data {
+ unsigned long fgmfn; /* foreign domain's gmfn */
+ pgprot_t prot;
+ domid_t domid;
+ int index;
+ struct page **pages;
+};
+
+static int pvh_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
+ void *data)
+{
+ int rc;
+ struct pvh_remap_data *remap = data;
+ unsigned long pfn = page_to_pfn(remap->pages[remap->index++]);
+ pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot));
+
+ rc = pvh_add_to_xen_p2m(pfn, remap->fgmfn, remap->domid);
+ if (rc)
+ return rc;
+ native_set_pte(ptep, pteval);
+
+ return 0;
+}
+
+static int pvh_remap_gmfn_range(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long mfn, int nr,
+ pgprot_t prot, unsigned domid,
+ struct page **pages)
+{
+ int err;
+ struct pvh_remap_data pvhdata;
+
+ BUG_ON(!pages);
+
+ pvhdata.fgmfn = mfn;
+ pvhdata.prot = prot;
+ pvhdata.domid = domid;
+ pvhdata.index = 0;
+ pvhdata.pages = pages;
+ err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
+ pvh_map_pte_fn, &pvhdata);
+ flush_tlb_all();
+ return err;
+}
+
#define REMAP_BATCH_SIZE 16

struct remap_data {
@@ -2479,7 +2600,9 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr,
unsigned long mfn, int nr,
- pgprot_t prot, unsigned domid)
+ pgprot_t prot, unsigned domid,
+ struct page **pages)
+
{
struct remap_data rmd;
struct mmu_update mmu_update[REMAP_BATCH_SIZE];
@@ -2494,6 +2617,10 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,

BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));

+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* We need to update the local page tables and the xen HAP */
+ return pvh_remap_gmfn_range(vma, addr, mfn, nr, prot, domid, pages);
+ }
rmd.mfn = mfn;
rmd.prot = prot;

@@ -2523,3 +2650,26 @@ out:
return err;
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
+
+/* Returns: 0 success */
+int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
+ int numpgs, struct page **pages)
+{
+ if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
+ return 0;
+
+ while (numpgs--) {
+
+ /* the mmu has already cleaned up the process mmu resources at
+ * this point (lookup_address will return NULL). */
+ unsigned long pfn = page_to_pfn(pages[numpgs]);
+
+ pvh_rem_xen_p2m(pfn, 1);
+ }
+ /* We don't need to flush tlbs because as part of pvh_rem_xen_p2m(),
+ * the hypervisor will do tlb flushes after removing the p2m entries
+ * from the EPT/NPT */
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 73809bb..6d0bb56 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -23,4 +23,6 @@ unsigned long xen_read_cr2_direct(void);

extern void xen_init_mmu_ops(void);
extern void xen_hvm_init_mmu_ops(void);
+extern void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
+ int nr_mfns, int add_mapping);
#endif /* _XEN_MMU_H */
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 8adb9cc..b612267 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -178,7 +178,7 @@ static int mmap_mfn_range(void *data, void *state)
msg->va & PAGE_MASK,
msg->mfn, msg->npages,
vma->vm_page_prot,
- st->domain);
+ st->domain, NULL);
if (rc < 0)
return rc;

@@ -267,7 +267,8 @@ static int mmap_batch_fn(void *data, void *state)
int ret;

ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
- st->vma->vm_page_prot, st->domain);
+ st->vma->vm_page_prot, st->domain,
+ NULL);

/* Store error code for second pass. */
*(st->err++) = ret;
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 6a198e4..990b43e 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -27,6 +27,9 @@ struct vm_area_struct;
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr,
unsigned long mfn, int nr,
- pgprot_t prot, unsigned domid);
+ pgprot_t prot, unsigned domid,
+ struct page **pages);
+int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
+ int numpgs, struct page **pages);

#endif /* INCLUDE_XEN_OPS_H */
--
1.7.7.6

2012-10-20 01:31:19

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: [PATCH 4/6] xen/pvh: bootup and setup related changes.

From: Mukesh Rathor <[email protected]>

In enlighten.c for PVH we can trap cpuid via vmexit, so don't
need to use emulated prefix call. We also check for vector callback
early on, as it is a required feature. PVH also runs at default kernel
IOPL.

In setup.c, in xen_add_extra_mem() we can skip updating P2M as it's managed
by Xen. PVH maps the entire IO space, but only RAM pages need to be repopulated.

Finally, pure PV settings are moved to a separate function that is only called
for pure PV, ie, pv with pvmmu.

Signed-off-by: Mukesh Rathor <[email protected]>
Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
---
arch/x86/xen/enlighten.c | 77 ++++++++++++++++++++++++++++++++++-----------
arch/x86/xen/setup.c | 64 +++++++++++++++++++++++++++++++-------
2 files changed, 110 insertions(+), 31 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e3497f2..53db37f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -45,6 +45,7 @@
#include <xen/hvm.h>
#include <xen/hvc-console.h>
#include <xen/acpi.h>
+#include <xen/features.h>

#include <asm/paravirt.h>
#include <asm/apic.h>
@@ -107,6 +108,9 @@ RESERVE_BRK(shared_info_page_brk, PAGE_SIZE);
__read_mostly int xen_have_vector_callback;
EXPORT_SYMBOL_GPL(xen_have_vector_callback);

+#define xen_pvh_domain() (xen_pv_domain() && \
+ xen_feature(XENFEAT_auto_translated_physmap) && \
+ xen_have_vector_callback)
/*
* Point at some empty memory to start with. We map the real shared_info
* page as soon as fixmap is up and running.
@@ -219,8 +223,9 @@ static void __init xen_banner(void)
struct xen_extraversion extra;
HYPERVISOR_xen_version(XENVER_extraversion, &extra);

- printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
- pv_info.name);
+ pr_info("Booting paravirtualized kernel %son %s\n",
+ xen_feature(XENFEAT_auto_translated_physmap) ?
+ "with PVH extensions " : "", pv_info.name);
printk(KERN_INFO "Xen version: %d.%d%s%s\n",
version >> 16, version & 0xffff, extra.extraversion,
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
@@ -273,12 +278,15 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
break;
}

- asm(XEN_EMULATE_PREFIX "cpuid"
- : "=a" (*ax),
- "=b" (*bx),
- "=c" (*cx),
- "=d" (*dx)
- : "0" (*ax), "2" (*cx));
+ if (xen_pvh_domain())
+ native_cpuid(ax, bx, cx, dx);
+ else
+ asm(XEN_EMULATE_PREFIX "cpuid"
+ : "=a" (*ax),
+ "=b" (*bx),
+ "=c" (*cx),
+ "=d" (*dx)
+ : "0" (*ax), "2" (*cx));

*bx &= maskebx;
*cx &= maskecx;
@@ -1055,6 +1063,10 @@ void xen_setup_shared_info(void)
HYPERVISOR_shared_info =
(struct shared_info *)__va(xen_start_info->shared_info);

+ /* PVH TBD/FIXME: vcpu info placement in phase 2 */
+ if (xen_pvh_domain())
+ return;
+
#ifndef CONFIG_SMP
/* In UP this is as good a place as any to set up shared info */
xen_setup_vcpu_info_placement();
@@ -1292,6 +1304,11 @@ static const struct machine_ops xen_machine_ops __initconst = {
*/
static void __init xen_setup_stackprotector(void)
{
+ /* PVH TBD/FIXME: investigate setup_stack_canary_segment */
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ switch_to_new_gdt(0);
+ return;
+ }
pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
pv_cpu_ops.load_gdt = xen_load_gdt_boot;

@@ -1302,6 +1319,19 @@ static void __init xen_setup_stackprotector(void)
pv_cpu_ops.load_gdt = xen_load_gdt;
}

+static void __init xen_pvh_early_guest_init(void)
+{
+ if (xen_feature(XENFEAT_hvm_callback_vector))
+ xen_have_vector_callback = 1;
+
+#ifdef CONFIG_X86_32
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ xen_raw_printk("ERROR: 32bit PVH guests are not supported\n");
+ BUG();
+ }
+#endif
+}
+
/* First C function to be called on Xen boot */
asmlinkage void __init xen_start_kernel(void)
{
@@ -1313,13 +1343,18 @@ asmlinkage void __init xen_start_kernel(void)

xen_domain_type = XEN_PV_DOMAIN;

+ xen_setup_features();
+ xen_pvh_early_guest_init();
xen_setup_machphys_mapping();

/* Install Xen paravirt ops */
pv_info = xen_info;
pv_init_ops = xen_init_ops;
- pv_cpu_ops = xen_cpu_ops;
pv_apic_ops = xen_apic_ops;
+ if (xen_pvh_domain())
+ pv_cpu_ops.cpuid = xen_cpuid;
+ else
+ pv_cpu_ops = xen_cpu_ops;

x86_init.resources.memory_setup = xen_memory_setup;
x86_init.oem.arch_setup = xen_arch_setup;
@@ -1351,8 +1386,6 @@ asmlinkage void __init xen_start_kernel(void)
/* Work out if we support NX */
x86_configure_nx();

- xen_setup_features();
-
/* Get mfn list */
if (!xen_feature(XENFEAT_auto_translated_physmap))
xen_build_dynamic_phys_to_machine();
@@ -1423,14 +1456,18 @@ asmlinkage void __init xen_start_kernel(void)
/* set the limit of our address space */
xen_reserve_top();

- /* We used to do this in xen_arch_setup, but that is too late on AMD
- * were early_cpu_init (run before ->arch_setup()) calls early_amd_init
- * which pokes 0xcf8 port.
- */
- set_iopl.iopl = 1;
- rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
- if (rc != 0)
- xen_raw_printk("physdev_op failed %d\n", rc);
+ /* PVH: runs at default kernel iopl of 0 */
+ if (!xen_pvh_domain()) {
+ /*
+ * We used to do this in xen_arch_setup, but that is too late
+ * on AMD were early_cpu_init (run before ->arch_setup()) calls
+ * early_amd_init which pokes 0xcf8 port.
+ */
+ set_iopl.iopl = 1;
+ rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
+ if (rc != 0)
+ xen_raw_printk("physdev_op failed %d\n", rc);
+ }

#ifdef CONFIG_X86_32
/* set up basic CPUID stuff */
@@ -1497,6 +1534,8 @@ asmlinkage void __init xen_start_kernel(void)
#endif
}

+/* Use a pfn in RAM, may move to MMIO before kexec.
+ * This function also called for PVH dom0 */
void __ref xen_hvm_init_shared_info(void)
{
int cpu;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 8971a26..8cce47b 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -27,6 +27,7 @@
#include <xen/interface/memory.h>
#include <xen/interface/physdev.h>
#include <xen/features.h>
+#include "mmu.h"
#include "xen-ops.h"
#include "vdso.h"

@@ -78,6 +79,9 @@ static void __init xen_add_extra_mem(u64 start, u64 size)

memblock_reserve(start, size);

+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return;
+
xen_max_p2m_pfn = PFN_DOWN(start + size);
for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
unsigned long mfn = pfn_to_mfn(pfn);
@@ -100,6 +104,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
.domid = DOMID_SELF
};
unsigned long len = 0;
+ int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
unsigned long pfn;
int ret;

@@ -113,7 +118,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
continue;
frame = mfn;
} else {
- if (mfn != INVALID_P2M_ENTRY)
+ if (!xlated_phys && mfn != INVALID_P2M_ENTRY)
continue;
frame = pfn;
}
@@ -230,6 +235,27 @@ static void __init xen_set_identity_and_release_chunk(
*identity += set_phys_range_identity(start_pfn, end_pfn);
}

+/* For PVH, the pfns [0..MAX] are mapped to mfn's in the EPT/NPT. The mfns
+ * are released as part of this 1:1 mapping hypercall back to the dom heap.
+ * Also, we map the entire IO space, ie, beyond max_pfn_mapped.
+ */
+static void __init xen_pvh_identity_map_chunk(unsigned long start_pfn,
+ unsigned long end_pfn, unsigned long *released,
+ unsigned long *identity, unsigned long max_pfn)
+{
+ unsigned long pfn;
+ int numpfns = 1, add_mapping = 1;
+
+ for (pfn = start_pfn; pfn < end_pfn; pfn++)
+ xen_set_clr_mmio_pvh_pte(pfn, pfn, numpfns, add_mapping);
+
+ if (start_pfn <= max_pfn) {
+ unsigned long end = min(max_pfn_mapped, end_pfn);
+ *released += end - start_pfn;
+ }
+ *identity += end_pfn - start_pfn;
+}
+
static unsigned long __init xen_set_identity_and_release(
const struct e820entry *list, size_t map_size, unsigned long nr_pages)
{
@@ -238,6 +264,7 @@ static unsigned long __init xen_set_identity_and_release(
unsigned long identity = 0;
const struct e820entry *entry;
int i;
+ int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);

/*
* Combine non-RAM regions and gaps until a RAM region (or the
@@ -259,11 +286,17 @@ static unsigned long __init xen_set_identity_and_release(
if (entry->type == E820_RAM)
end_pfn = PFN_UP(entry->addr);

- if (start_pfn < end_pfn)
- xen_set_identity_and_release_chunk(
- start_pfn, end_pfn, nr_pages,
- &released, &identity);
-
+ if (start_pfn < end_pfn) {
+ if (xlated_phys) {
+ xen_pvh_identity_map_chunk(start_pfn,
+ end_pfn, &released, &identity,
+ nr_pages);
+ } else {
+ xen_set_identity_and_release_chunk(
+ start_pfn, end_pfn, nr_pages,
+ &released, &identity);
+ }
+ }
start = end;
}
}
@@ -526,16 +559,14 @@ void __cpuinit xen_enable_syscall(void)
#endif /* CONFIG_X86_64 */
}

-void __init xen_arch_setup(void)
+/* Non auto translated PV domain, ie, it's not PVH. */
+static __init void xen_pvmmu_arch_setup(void)
{
- xen_panic_handler_init();
-
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);

- if (!xen_feature(XENFEAT_auto_translated_physmap))
- HYPERVISOR_vm_assist(VMASST_CMD_enable,
- VMASST_TYPE_pae_extended_cr3);
+ HYPERVISOR_vm_assist(VMASST_CMD_enable,
+ VMASST_TYPE_pae_extended_cr3);

if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
@@ -543,6 +574,15 @@ void __init xen_arch_setup(void)

xen_enable_sysenter();
xen_enable_syscall();
+}
+
+/* This function not called for HVM domain */
+void __init xen_arch_setup(void)
+{
+ xen_panic_handler_init();
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
+ xen_pvmmu_arch_setup();

#ifdef CONFIG_ACPI
if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
--
1.7.7.6

2012-10-20 01:31:41

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: [PATCH 6/6] xen/pvh: /dev/xen/privcmd changes.

From: Mukesh Rathor <[email protected]>

PVH only supports the batch interface. To map a foreign page to a process,
the PFN must be allocated and PVH path uses ballooning for that purpose.

The returned PFN is then mapped to the foreign page.
xen_unmap_domain_mfn_range() is introduced to unmap these pages via the
privcmd close call.

Signed-off-by: Mukesh Rathor <[email protected]>
[v1: Fix up privcmd_close]
Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
---
drivers/xen/privcmd.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index b612267..b9d0898 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -33,11 +33,14 @@
#include <xen/features.h>
#include <xen/page.h>
#include <xen/xen-ops.h>
+#include <xen/balloon.h>

#include "privcmd.h"

MODULE_LICENSE("GPL");

+#define PRIV_VMA_LOCKED ((void *)1)
+
#ifndef HAVE_ARCH_PRIVCMD_MMAP
static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
#endif
@@ -199,6 +202,10 @@ static long privcmd_ioctl_mmap(void __user *udata)
if (!xen_initial_domain())
return -EPERM;

+ /* We only support privcmd_ioctl_mmap_batch for auto translated. */
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return -ENOSYS;
+
if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
return -EFAULT;

@@ -246,6 +253,7 @@ struct mmap_batch_state {
domid_t domain;
unsigned long va;
struct vm_area_struct *vma;
+ int index;
/* A tristate:
* 0 for no errors
* 1 if at least one error has happened (and no
@@ -260,15 +268,24 @@ struct mmap_batch_state {
xen_pfn_t __user *user_mfn;
};

+/* auto translated dom0 note: if domU being created is PV, then mfn is
+ * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
+ */
static int mmap_batch_fn(void *data, void *state)
{
xen_pfn_t *mfnp = data;
struct mmap_batch_state *st = state;
+ struct vm_area_struct *vma = st->vma;
+ struct page **pages = vma->vm_private_data;
+ struct page *cur_page = NULL;
int ret;

+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ cur_page = pages[st->index++];
+
ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
st->vma->vm_page_prot, st->domain,
- NULL);
+ &cur_page);

/* Store error code for second pass. */
*(st->err++) = ret;
@@ -304,6 +321,32 @@ static int mmap_return_errors_v1(void *data, void *state)
return __put_user(*mfnp, st->user_mfn++);
}

+/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
+ * the vma with the page info to use later.
+ * Returns: 0 if success, otherwise -errno
+ */
+static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
+{
+ int rc;
+ struct page **pages;
+
+ pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL);
+ if (pages == NULL)
+ return -ENOMEM;
+
+ rc = alloc_xenballooned_pages(numpgs, pages, 0);
+ if (rc != 0) {
+ pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__,
+ numpgs, rc);
+ kfree(pages);
+ return -ENOMEM;
+ }
+ BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED);
+ vma->vm_private_data = pages;
+
+ return 0;
+}
+
static struct vm_operations_struct privcmd_vm_ops;

static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
@@ -371,10 +414,18 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
up_write(&mm->mmap_sem);
goto out;
}
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
+ ret = alloc_empty_pages(vma, m.num);
+ if (ret < 0) {
+ up_write(&mm->mmap_sem);
+ goto out;
+ }
+ }

state.domain = m.dom;
state.vma = vma;
state.va = m.addr;
+ state.index = 0;
state.global_error = 0;
state.err = err_array;

@@ -439,6 +490,19 @@ static long privcmd_ioctl(struct file *file,
return ret;
}

+static void privcmd_close(struct vm_area_struct *vma)
+{
+ struct page **pages = vma->vm_private_data;
+ int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap || !numpgs || !pages))
+ return;
+
+ xen_unmap_domain_mfn_range(vma, numpgs, pages);
+ free_xenballooned_pages(numpgs, pages);
+ kfree(pages);
+}
+
static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
@@ -449,6 +513,7 @@ static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}

static struct vm_operations_struct privcmd_vm_ops = {
+ .close = privcmd_close,
.fault = privcmd_fault
};

@@ -466,7 +531,7 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)

static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
{
- return (xchg(&vma->vm_private_data, (void *)1) == NULL);
+ return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED);
}

const struct file_operations xen_privcmd_fops = {
--
1.7.7.6

2012-10-22 13:23:08

by Stefano Stabellini

[permalink] [raw]
Subject: Re: [PATCH 6/6] xen/pvh: /dev/xen/privcmd changes.

On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> From: Mukesh Rathor <[email protected]>
>
> PVH only supports the batch interface. To map a foreign page to a process,
> the PFN must be allocated and PVH path uses ballooning for that purpose.
>
> The returned PFN is then mapped to the foreign page.
> xen_unmap_domain_mfn_range() is introduced to unmap these pages via the
> privcmd close call.
>
> Signed-off-by: Mukesh Rathor <[email protected]>
> [v1: Fix up privcmd_close]
> Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>


Acked-by: Stefano Stabellini <[email protected]>

> drivers/xen/privcmd.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++-
> 1 files changed, 67 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index b612267..b9d0898 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -33,11 +33,14 @@
> #include <xen/features.h>
> #include <xen/page.h>
> #include <xen/xen-ops.h>
> +#include <xen/balloon.h>
>
> #include "privcmd.h"
>
> MODULE_LICENSE("GPL");
>
> +#define PRIV_VMA_LOCKED ((void *)1)
> +
> #ifndef HAVE_ARCH_PRIVCMD_MMAP
> static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
> #endif
> @@ -199,6 +202,10 @@ static long privcmd_ioctl_mmap(void __user *udata)
> if (!xen_initial_domain())
> return -EPERM;
>
> + /* We only support privcmd_ioctl_mmap_batch for auto translated. */
> + if (xen_feature(XENFEAT_auto_translated_physmap))
> + return -ENOSYS;
> +
> if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
> return -EFAULT;
>
> @@ -246,6 +253,7 @@ struct mmap_batch_state {
> domid_t domain;
> unsigned long va;
> struct vm_area_struct *vma;
> + int index;
> /* A tristate:
> * 0 for no errors
> * 1 if at least one error has happened (and no
> @@ -260,15 +268,24 @@ struct mmap_batch_state {
> xen_pfn_t __user *user_mfn;
> };
>
> +/* auto translated dom0 note: if domU being created is PV, then mfn is
> + * mfn(addr on bus). If it's auto xlated, then mfn is pfn (input to HAP).
> + */
> static int mmap_batch_fn(void *data, void *state)
> {
> xen_pfn_t *mfnp = data;
> struct mmap_batch_state *st = state;
> + struct vm_area_struct *vma = st->vma;
> + struct page **pages = vma->vm_private_data;
> + struct page *cur_page = NULL;
> int ret;
>
> + if (xen_feature(XENFEAT_auto_translated_physmap))
> + cur_page = pages[st->index++];
> +
> ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> st->vma->vm_page_prot, st->domain,
> - NULL);
> + &cur_page);
>
> /* Store error code for second pass. */
> *(st->err++) = ret;
> @@ -304,6 +321,32 @@ static int mmap_return_errors_v1(void *data, void *state)
> return __put_user(*mfnp, st->user_mfn++);
> }
>
> +/* Allocate pfns that are then mapped with gmfns from foreign domid. Update
> + * the vma with the page info to use later.
> + * Returns: 0 if success, otherwise -errno
> + */
> +static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
> +{
> + int rc;
> + struct page **pages;
> +
> + pages = kcalloc(numpgs, sizeof(pages[0]), GFP_KERNEL);
> + if (pages == NULL)
> + return -ENOMEM;
> +
> + rc = alloc_xenballooned_pages(numpgs, pages, 0);
> + if (rc != 0) {
> + pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__,
> + numpgs, rc);
> + kfree(pages);
> + return -ENOMEM;
> + }
> + BUG_ON(vma->vm_private_data != PRIV_VMA_LOCKED);
> + vma->vm_private_data = pages;
> +
> + return 0;
> +}
> +
> static struct vm_operations_struct privcmd_vm_ops;
>
> static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> @@ -371,10 +414,18 @@ static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> up_write(&mm->mmap_sem);
> goto out;
> }
> + if (xen_feature(XENFEAT_auto_translated_physmap)) {
> + ret = alloc_empty_pages(vma, m.num);
> + if (ret < 0) {
> + up_write(&mm->mmap_sem);
> + goto out;
> + }
> + }
>
> state.domain = m.dom;
> state.vma = vma;
> state.va = m.addr;
> + state.index = 0;
> state.global_error = 0;
> state.err = err_array;
>
> @@ -439,6 +490,19 @@ static long privcmd_ioctl(struct file *file,
> return ret;
> }
>
> +static void privcmd_close(struct vm_area_struct *vma)
> +{
> + struct page **pages = vma->vm_private_data;
> + int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
> +
> + if (!xen_feature(XENFEAT_auto_translated_physmap || !numpgs || !pages))
> + return;
> +
> + xen_unmap_domain_mfn_range(vma, numpgs, pages);
> + free_xenballooned_pages(numpgs, pages);
> + kfree(pages);
> +}
> +
> static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> {
> printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
> @@ -449,6 +513,7 @@ static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> }
>
> static struct vm_operations_struct privcmd_vm_ops = {
> + .close = privcmd_close,
> .fault = privcmd_fault
> };
>
> @@ -466,7 +531,7 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
>
> static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
> {
> - return (xchg(&vma->vm_private_data, (void *)1) == NULL);
> + return !cmpxchg(&vma->vm_private_data, NULL, PRIV_VMA_LOCKED);
> }
>
> const struct file_operations xen_privcmd_fops = {
> --
> 1.7.7.6
>

2012-10-22 13:26:30

by Stefano Stabellini

[permalink] [raw]
Subject: Re: [PATCH 5/6] xen/pvh: balloon and grant changes.

On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> From: Mukesh Rathor <[email protected]>
>
> For balloon changes we skip setting of local P2M as it's updated
> in Xen. For grant, the shared grant frame is the pfn and not mfn,
> hence its mapped via the same code path as HVM.
>
> Signed-off-by: Mukesh Rathor <[email protected]>
> Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> ---
> drivers/xen/balloon.c | 15 +++++++++------
> drivers/xen/gntdev.c | 3 ++-
> drivers/xen/grant-table.c | 26 ++++++++++++++++++++++----
> 3 files changed, 33 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
> index 31ab82f..c825b63 100644
> --- a/drivers/xen/balloon.c
> +++ b/drivers/xen/balloon.c
> @@ -361,7 +361,9 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
> set_phys_to_machine(pfn, frame_list[i]);
>
> /* Link back into the page tables if not highmem. */
> - if (xen_pv_domain() && !PageHighMem(page)) {
> + if (xen_pv_domain() && !PageHighMem(page) &&
> + !xen_feature(XENFEAT_auto_translated_physmap)) {
> +
> int ret;
> ret = HYPERVISOR_update_va_mapping(
> (unsigned long)__va(pfn << PAGE_SHIFT),
> @@ -418,12 +420,13 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
> scrub_page(page);
>
> if (xen_pv_domain() && !PageHighMem(page)) {
> - ret = HYPERVISOR_update_va_mapping(
> - (unsigned long)__va(pfn << PAGE_SHIFT),
> - __pte_ma(0), 0);
> - BUG_ON(ret);
> + if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> + ret = HYPERVISOR_update_va_mapping(
> + (unsigned long)__va(pfn << PAGE_SHIFT),
> + __pte_ma(0), 0);
> + BUG_ON(ret);
> + }
> }

this change, unlike the one before, actually makes things different for
traditional pv domains in case PageHighMem(page).

The rest looks good.

> }
>
> /* Ensure that ballooned highmem pages don't have kmaps. */
> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
> index 610bfc6..d39d54e 100644
> --- a/drivers/xen/gntdev.c
> +++ b/drivers/xen/gntdev.c
> @@ -803,7 +803,8 @@ static int __init gntdev_init(void)
> if (!xen_domain())
> return -ENODEV;
>
> - use_ptemod = xen_pv_domain();
> + use_ptemod = xen_pv_domain() &&
> + !xen_feature(XENFEAT_auto_translated_physmap);
>
> err = misc_register(&gntdev_miscdev);
> if (err != 0) {
> diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
> index b2b0a37..9c0019d 100644
> --- a/drivers/xen/grant-table.c
> +++ b/drivers/xen/grant-table.c
> @@ -1029,14 +1029,19 @@ static void gnttab_unmap_frames_v2(void)
> static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
> {
> struct gnttab_setup_table setup;
> - unsigned long *frames;
> + unsigned long *frames, start_gpfn;
> unsigned int nr_gframes = end_idx + 1;
> int rc;
>
> - if (xen_hvm_domain()) {
> + if (xen_hvm_domain() || xen_feature(XENFEAT_auto_translated_physmap)) {
> struct xen_add_to_physmap xatp;
> unsigned int i = end_idx;
> rc = 0;
> +
> + if (xen_hvm_domain())
> + start_gpfn = xen_hvm_resume_frames >> PAGE_SHIFT;
> + else
> + start_gpfn = virt_to_pfn(gnttab_shared.addr);
> /*
> * Loop backwards, so that the first hypercall has the largest
> * index, ensuring that the table will grow only once.
> @@ -1045,7 +1050,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
> xatp.domid = DOMID_SELF;
> xatp.idx = i;
> xatp.space = XENMAPSPACE_grant_table;
> - xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
> + xatp.gpfn = start_gpfn + i;
> rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
> if (rc != 0) {
> printk(KERN_WARNING
> @@ -1108,7 +1113,7 @@ static void gnttab_request_version(void)
> int rc;
> struct gnttab_set_version gsv;
>
> - if (xen_hvm_domain())
> + if (xen_hvm_domain() || xen_feature(XENFEAT_auto_translated_physmap))
> gsv.version = 1;
> else
> gsv.version = 2;
> @@ -1136,12 +1141,25 @@ static void gnttab_request_version(void)
> int gnttab_resume(void)
> {
> unsigned int max_nr_gframes;
> + char *kmsg = "Failed to kmalloc pages for pv in hvm grant frames\n";
>
> gnttab_request_version();
> max_nr_gframes = gnttab_max_grant_frames();
> if (max_nr_gframes < nr_grant_frames)
> return -ENOSYS;
>
> + /* PVH note: xen will free existing kmalloc'd mfn in
> + * XENMEM_add_to_physmap. TBD/FIXME: use xen ballooning instead of
> + * kmalloc(). */
> + if (xen_pv_domain() && xen_feature(XENFEAT_auto_translated_physmap) &&
> + !gnttab_shared.addr) {
> + gnttab_shared.addr =
> + kmalloc(max_nr_gframes * PAGE_SIZE, GFP_KERNEL);
> + if (!gnttab_shared.addr) {
> + pr_warn("%s", kmsg);
> + return -ENOMEM;
> + }
> + }
> if (xen_pv_domain())
> return gnttab_map(0, nr_grant_frames - 1);
>
> --
> 1.7.7.6
>

2012-10-22 13:35:26

by Stefano Stabellini

[permalink] [raw]
Subject: Re: [PATCH 4/6] xen/pvh: bootup and setup related changes.

On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> From: Mukesh Rathor <[email protected]>
>
> In enlighten.c for PVH we can trap cpuid via vmexit, so don't
> need to use emulated prefix call. We also check for vector callback
> early on, as it is a required feature. PVH also runs at default kernel
> IOPL.
>
> In setup.c, in xen_add_extra_mem() we can skip updating P2M as it's managed
> by Xen. PVH maps the entire IO space, but only RAM pages need to be repopulated.
>
> Finally, pure PV settings are moved to a separate function that is only called
> for pure PV, ie, pv with pvmmu.
>
> Signed-off-by: Mukesh Rathor <[email protected]>
> Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
>
> diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
> index 8971a26..8cce47b 100644
> --- a/arch/x86/xen/setup.c
> +++ b/arch/x86/xen/setup.c
> @@ -27,6 +27,7 @@
> #include <xen/interface/memory.h>
> #include <xen/interface/physdev.h>
> #include <xen/features.h>
> +#include "mmu.h"
> #include "xen-ops.h"
> #include "vdso.h"
>
> @@ -78,6 +79,9 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
>
> memblock_reserve(start, size);
>
> + if (xen_feature(XENFEAT_auto_translated_physmap))
> + return;
> +
> xen_max_p2m_pfn = PFN_DOWN(start + size);
> for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
> unsigned long mfn = pfn_to_mfn(pfn);
> @@ -100,6 +104,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
> .domid = DOMID_SELF
> };
> unsigned long len = 0;
> + int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
> unsigned long pfn;
> int ret;
>
> @@ -113,7 +118,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
> continue;
> frame = mfn;
> } else {
> - if (mfn != INVALID_P2M_ENTRY)
> + if (!xlated_phys && mfn != INVALID_P2M_ENTRY)
> continue;
> frame = pfn;
> }

Shouldn't we add a "!xlated_phys &&" to the other case as well?

2012-10-22 13:38:30

by Stefano Stabellini

[permalink] [raw]
Subject: Re: [PATCH 1/6] xen/pvh: Support ParaVirtualized Hardware extensions.

On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> From: Mukesh Rathor <[email protected]>
>
> PVH allows PV linux guest to utilize hardware extended capabilities, such
> as running MMU updates in a HVM container.
>
> This patch allows it to be configured and enabled. Also, basic header file
> changes to add new subcalls to physmap hypercall.
>
> Lastly, mfn_to_local_pfn must return mfn for paging mode translate
> (since we let the hypervisor - or CPU - do them for us).
>
> Signed-off-by: Mukesh Rathor <[email protected]>
> Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>


Acked-by: Stefano Stabellini <[email protected]>

> arch/x86/include/asm/xen/page.h | 3 +++
> arch/x86/xen/Kconfig | 10 ++++++++++
> arch/x86/xen/xen-head.S | 11 ++++++++++-
> include/xen/interface/memory.h | 24 +++++++++++++++++++++++-
> include/xen/interface/physdev.h | 10 ++++++++++
> 5 files changed, 56 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
> index 472b9b7..6af440d 100644
> --- a/arch/x86/include/asm/xen/page.h
> +++ b/arch/x86/include/asm/xen/page.h
> @@ -159,6 +159,9 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine)
> static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
> {
> unsigned long pfn = mfn_to_pfn(mfn);
> +
> + if (xen_feature(XENFEAT_auto_translated_physmap))
> + return mfn;
> if (get_phys_to_machine(pfn) != mfn)
> return -1; /* force !pfn_valid() */
> return pfn;
> diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
> index fdce49c..822c5a0 100644
> --- a/arch/x86/xen/Kconfig
> +++ b/arch/x86/xen/Kconfig
> @@ -50,3 +50,13 @@ config XEN_DEBUG_FS
> Enable statistics output and various tuning options in debugfs.
> Enabling this option may incur a significant performance overhead.
>
> +config XEN_X86_PVH
> + bool "Support for running as a PVH guest (EXPERIMENTAL)"
> + depends on X86_64 && XEN && EXPERIMENTAL
> + default n
> + help
> + This option enables support for running as a PVH guest (PV guest
> + using hardware extensions) under a suitably capable hypervisor.
> + This option is EXPERIMENTAL because the hypervisor interfaces
> + which it uses are not yet considered stable therefore backwards and
> + forwards compatibility is not yet guaranteed. If unsure, say N.
> diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
> index 7faed58..1a6bca1 100644
> --- a/arch/x86/xen/xen-head.S
> +++ b/arch/x86/xen/xen-head.S
> @@ -13,6 +13,15 @@
> #include <xen/interface/elfnote.h>
> #include <asm/xen/interface.h>
>
> +#ifdef CONFIG_XEN_X86_PVH
> +#define FEATURES_PVH "|writable_descriptor_tables" \
> + "|auto_translated_physmap" \
> + "|supervisor_mode_kernel" \
> + "|hvm_callback_vector"
> +#else
> +#define FEATURES_PVH /* Not supported */
> +#endif
> +
> __INIT
> ENTRY(startup_xen)
> cld
> @@ -95,7 +104,7 @@ NEXT_HYPERCALL(arch_6)
> #endif
> ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen)
> ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
> - ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
> + ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb"FEATURES_PVH)
> ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
> ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
> ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
> diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
> index b66d04c..8beebdb 100644
> --- a/include/xen/interface/memory.h
> +++ b/include/xen/interface/memory.h
> @@ -169,7 +169,13 @@ struct xen_add_to_physmap {
> /* Source mapping space. */
> #define XENMAPSPACE_shared_info 0 /* shared info page */
> #define XENMAPSPACE_grant_table 1 /* grant table page */
> - unsigned int space;
> +#define XENMAPSPACE_gmfn 2 /* GMFN */
> +#define XENMAPSPACE_gmfn_range 3 /* GMFN range */
> +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another guest */
> + uint16_t space;
> + domid_t foreign_domid; /* IFF XENMAPSPACE_gmfn_foreign */
> +
> +#define XENMAPIDX_grant_table_status 0x80000000
>
> /* Index into source mapping space. */
> xen_ulong_t idx;
> @@ -237,4 +243,20 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
> * during a driver critical region.
> */
> extern spinlock_t xen_reservation_lock;
> +
> +/*
> + * Unmaps the page appearing at a particular GPFN from the specified guest's
> + * pseudophysical address space.
> + * arg == addr of xen_remove_from_physmap_t.
> + */
> +#define XENMEM_remove_from_physmap 15
> +struct xen_remove_from_physmap {
> + /* Which domain to change the mapping for. */
> + domid_t domid;
> +
> + /* GPFN of the current mapping of the page. */
> + xen_pfn_t gpfn;
> +};
> +DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
> +
> #endif /* __XEN_PUBLIC_MEMORY_H__ */
> diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
> index 1844d31..83050d3 100644
> --- a/include/xen/interface/physdev.h
> +++ b/include/xen/interface/physdev.h
> @@ -274,6 +274,16 @@ struct physdev_dbgp_op {
> } u;
> };
>
> +#define PHYSDEVOP_map_iomem 30
> +struct physdev_map_iomem {
> + /* IN */
> + uint64_t first_gfn;
> + uint64_t first_mfn;
> + uint32_t nr_mfns;
> + uint32_t add_mapping; /* 1 == add mapping; 0 == unmap */
> +
> +};
> +
> /*
> * Notify that some PIRQ-bound event channels have been unmasked.
> * ** This command is obsolete since interface version 0x00030202 and is **
> --
> 1.7.7.6
>

2012-10-22 13:45:27

by Stefano Stabellini

[permalink] [raw]
Subject: Re: [PATCH 2/6] xen/pvh: Extend vcpu_guest_context, p2m, event, and xenbus to support PVH.

On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> From: Mukesh Rathor <[email protected]>
>
> make gdt_frames[]/gdt_ents into a union with {gdtaddr, gdtsz}, as PVH
> only needs to send down gdtaddr and gdtsz.
>
> For interrupts, PVH uses native_irq_ops.
> vcpu hotplug is currently not available for PVH.
>
> For events we follow what PVHVM does - to use callback vector.
> Lastly, also use HVM path to setup XenBus.
>
> Signed-off-by: Mukesh Rathor <[email protected]>
> Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> ---
> arch/x86/include/asm/xen/interface.h | 11 +++++-
> arch/x86/xen/irq.c | 5 ++-
> arch/x86/xen/p2m.c | 2 +-
> arch/x86/xen/smp.c | 75 ++++++++++++++++++++++------------
> drivers/xen/cpu_hotplug.c | 4 +-
> drivers/xen/events.c | 9 ++++-
> drivers/xen/xenbus/xenbus_client.c | 3 +-
> 7 files changed, 77 insertions(+), 32 deletions(-)
>
> diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
> index 6d2f75a..4c08f23 100644
> --- a/arch/x86/include/asm/xen/interface.h
> +++ b/arch/x86/include/asm/xen/interface.h
> @@ -144,7 +144,16 @@ struct vcpu_guest_context {
> struct cpu_user_regs user_regs; /* User-level CPU registers */
> struct trap_info trap_ctxt[256]; /* Virtual IDT */
> unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
> - unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
> + union {
> + struct {
> + /* PV: GDT (machine frames, # ents).*/
> + unsigned long gdt_frames[16], gdt_ents;
> + } pv;
> + struct {
> + /* PVH: GDTR addr and size */
> + unsigned long gdtaddr, gdtsz;
> + } pvh;
> + } u;
> unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
> /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
> unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */
> diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
> index 01a4dc0..fcbe56a 100644
> --- a/arch/x86/xen/irq.c
> +++ b/arch/x86/xen/irq.c
> @@ -5,6 +5,7 @@
> #include <xen/interface/xen.h>
> #include <xen/interface/sched.h>
> #include <xen/interface/vcpu.h>
> +#include <xen/features.h>
> #include <xen/events.h>
>
> #include <asm/xen/hypercall.h>
> @@ -129,6 +130,8 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
>
> void __init xen_init_irq_ops(void)
> {
> - pv_irq_ops = xen_irq_ops;
> + /* For PVH we use default pv_irq_ops settings */
> + if (!xen_feature(XENFEAT_hvm_callback_vector))
> + pv_irq_ops = xen_irq_ops;
> x86_init.irqs.intr_init = xen_init_IRQ;
> }
> diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
> index 95fb2aa..ea553c8 100644
> --- a/arch/x86/xen/p2m.c
> +++ b/arch/x86/xen/p2m.c
> @@ -798,7 +798,7 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
> {
> unsigned topidx, mididx, idx;
>
> - if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
> + if (xen_feature(XENFEAT_auto_translated_physmap)) {
> BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
> return true;
> }
> diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
> index 353c50f..df400349 100644
> --- a/arch/x86/xen/smp.c
> +++ b/arch/x86/xen/smp.c
> @@ -68,9 +68,11 @@ static void __cpuinit cpu_bringup(void)
> touch_softlockup_watchdog();
> preempt_disable();
>
> - xen_enable_sysenter();
> - xen_enable_syscall();
> -
> + /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
> + if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
> + xen_enable_sysenter();
> + xen_enable_syscall();
> + }
> cpu = smp_processor_id();
> smp_store_cpu_info(cpu);
> cpu_data(cpu).x86_max_cores = 1;
> @@ -230,10 +232,11 @@ static void __init xen_smp_prepare_boot_cpu(void)
> BUG_ON(smp_processor_id() != 0);
> native_smp_prepare_boot_cpu();
>
> - /* We've switched to the "real" per-cpu gdt, so make sure the
> - old memory can be recycled */
> - make_lowmem_page_readwrite(xen_initial_gdt);
> -
> + if (!xen_feature(XENFEAT_writable_page_tables)) {
> + /* We've switched to the "real" per-cpu gdt, so make sure the
> + * old memory can be recycled */
> + make_lowmem_page_readwrite(xen_initial_gdt);
> + }
> xen_filter_cpu_maps();
> xen_setup_vcpu_info_placement();
> }
> @@ -300,8 +303,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
> gdt = get_cpu_gdt_table(cpu);
>
> ctxt->flags = VGCF_IN_KERNEL;
> - ctxt->user_regs.ds = __USER_DS;
> - ctxt->user_regs.es = __USER_DS;
> ctxt->user_regs.ss = __KERNEL_DS;
> #ifdef CONFIG_X86_32
> ctxt->user_regs.fs = __KERNEL_PERCPU;
> @@ -310,35 +311,57 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
> ctxt->gs_base_kernel = per_cpu_offset(cpu);
> #endif
> ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
> - ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
>
> memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
>
> - xen_copy_trap_info(ctxt->trap_ctxt);
> + /* check for autoxlated to get it right for 32bit kernel */

I am not sure what this comment means, considering that in another
comment below you say that we don't support 32bit PVH kernels.


> + if (xen_feature(XENFEAT_auto_translated_physmap) &&
> + xen_feature(XENFEAT_supervisor_mode_kernel)) {
>
> - ctxt->ldt_ents = 0;
> + ctxt->user_regs.ds = __KERNEL_DS;
> + ctxt->user_regs.es = 0;
> + ctxt->user_regs.gs = 0;
>
> - BUG_ON((unsigned long)gdt & ~PAGE_MASK);
> + ctxt->u.pvh.gdtaddr = (unsigned long)gdt;
> + ctxt->u.pvh.gdtsz = (unsigned long)(GDT_SIZE - 1);
>
> - gdt_mfn = arbitrary_virt_to_mfn(gdt);
> - make_lowmem_page_readonly(gdt);
> - make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
> +#ifdef CONFIG_X86_64
> + /* Note: PVH is not supported on x86_32. */
> + ctxt->gs_base_user = (unsigned long)
> + per_cpu(irq_stack_union.gs_base, cpu);
> +#endif
> + } else {
> + ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
> + ctxt->user_regs.ds = __USER_DS;
> + ctxt->user_regs.es = __USER_DS;
>
> - ctxt->gdt_frames[0] = gdt_mfn;
> - ctxt->gdt_ents = GDT_ENTRIES;
> + xen_copy_trap_info(ctxt->trap_ctxt);
>
> - ctxt->user_regs.cs = __KERNEL_CS;
> - ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
> + ctxt->ldt_ents = 0;
>
> - ctxt->kernel_ss = __KERNEL_DS;
> - ctxt->kernel_sp = idle->thread.sp0;
> + BUG_ON((unsigned long)gdt & ~PAGE_MASK);
> +
> + gdt_mfn = arbitrary_virt_to_mfn(gdt);
> + make_lowmem_page_readonly(gdt);
> + make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
> +
> + ctxt->u.pv.gdt_frames[0] = gdt_mfn;
> + ctxt->u.pv.gdt_ents = GDT_ENTRIES;
> +
> + ctxt->kernel_ss = __KERNEL_DS;
> + ctxt->kernel_sp = idle->thread.sp0;
>
> #ifdef CONFIG_X86_32
> - ctxt->event_callback_cs = __KERNEL_CS;
> - ctxt->failsafe_callback_cs = __KERNEL_CS;
> + ctxt->event_callback_cs = __KERNEL_CS;
> + ctxt->failsafe_callback_cs = __KERNEL_CS;
> #endif
> - ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
> - ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
> + ctxt->event_callback_eip =
> + (unsigned long)xen_hypervisor_callback;
> + ctxt->failsafe_callback_eip =
> + (unsigned long)xen_failsafe_callback;
> + }
> + ctxt->user_regs.cs = __KERNEL_CS;
> + ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
>
> per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
> ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));

The tradional path looks the same as before, however it is hard to tell
whether the PVH path is correct without the Xen side. For example, what
is gdtsz?

2012-10-22 13:46:57

by Stefano Stabellini

[permalink] [raw]
Subject: Re: [PATCH 3/6] xen/pvh: Implements mmu changes for PVH.

On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> From: Mukesh Rathor <[email protected]>
>
> First the set/clear mmio pte function makes a hypercall to update the
> P2M in Xen with 1:1 mapping. Since PVH uses mostly native mmu ops, we
> leave the generic (native_*) for the rest.
>
> Two local functions are introduced to add to xen physmap for xen remap
> interface. Xen unmap interface is introduced so the privcmd pte entries
> can be cleared in Xen p2m table.
>
> Signed-off-by: Mukesh Rathor <[email protected]>
> Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>

this patch looks all right, but I would like to read Ian's feedback too


> arch/x86/xen/mmu.c | 162 +++++++++++++++++++++++++++++++++++++++++++++++--
> arch/x86/xen/mmu.h | 2 +
> drivers/xen/privcmd.c | 5 +-
> include/xen/xen-ops.h | 5 +-
> 4 files changed, 165 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
> index 6226c99..5747a41 100644
> --- a/arch/x86/xen/mmu.c
> +++ b/arch/x86/xen/mmu.c
> @@ -74,6 +74,7 @@
> #include <xen/interface/version.h>
> #include <xen/interface/memory.h>
> #include <xen/hvc-console.h>
> +#include <xen/balloon.h>
>
> #include "multicalls.h"
> #include "mmu.h"
> @@ -332,6 +333,20 @@ static void xen_set_pte(pte_t *ptep, pte_t pteval)
> __xen_set_pte(ptep, pteval);
> }
>
> +void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
> + int nr_mfns, int add_mapping)
> +{
> + struct physdev_map_iomem iomem;
> +
> + iomem.first_gfn = pfn;
> + iomem.first_mfn = mfn;
> + iomem.nr_mfns = nr_mfns;
> + iomem.add_mapping = add_mapping;
> +
> + if (HYPERVISOR_physdev_op(PHYSDEVOP_map_iomem, &iomem))
> + BUG();
> +}
> +
> static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
> pte_t *ptep, pte_t pteval)
> {
> @@ -1221,6 +1236,8 @@ static void __init xen_pagetable_init(void)
> #endif
> paging_init();
> xen_setup_shared_info();
> + if (xen_feature(XENFEAT_auto_translated_physmap))
> + return;
> #ifdef CONFIG_X86_64
> if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> unsigned long new_mfn_list;
> @@ -1528,6 +1545,10 @@ static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
> static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
> {
> struct mmuext_op op;
> +
> + if (xen_feature(XENFEAT_writable_page_tables))
> + return;
> +
> op.cmd = cmd;
> op.arg1.mfn = pfn_to_mfn(pfn);
> if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
> @@ -1725,6 +1746,10 @@ static void set_page_prot(void *addr, pgprot_t prot)
> unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
> pte_t pte = pfn_pte(pfn, prot);
>
> + /* recall for PVH, page tables are native. */
> + if (xen_feature(XENFEAT_auto_translated_physmap))
> + return;
> +
> if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
> BUG();
> }
> @@ -1802,6 +1827,9 @@ static void convert_pfn_mfn(void *v)
> pte_t *pte = v;
> int i;
>
> + if (xen_feature(XENFEAT_auto_translated_physmap))
> + return;
> +
> /* All levels are converted the same way, so just treat them
> as ptes. */
> for (i = 0; i < PTRS_PER_PTE; i++)
> @@ -1821,6 +1849,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
> (*pt_end)--;
> }
> }
> +
> /*
> * Set up the initial kernel pagetable.
> *
> @@ -1831,6 +1860,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
> * but that's enough to get __va working. We need to fill in the rest
> * of the physical mapping once some sort of allocator has been set
> * up.
> + * NOTE: for PVH, the page tables are native.
> */
> void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
> {
> @@ -1908,10 +1938,13 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
> * structure to attach it to, so make sure we just set kernel
> * pgd.
> */
> - xen_mc_batch();
> - __xen_write_cr3(true, __pa(init_level4_pgt));
> - xen_mc_issue(PARAVIRT_LAZY_CPU);
> -
> + if (xen_feature(XENFEAT_writable_page_tables)) {
> + native_write_cr3(__pa(init_level4_pgt));
> + } else {
> + xen_mc_batch();
> + __xen_write_cr3(true, __pa(init_level4_pgt));
> + xen_mc_issue(PARAVIRT_LAZY_CPU);
> + }
> /* We can't that easily rip out L3 and L2, as the Xen pagetables are
> * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for
> * the initial domain. For guests using the toolstack, they are in:
> @@ -2178,8 +2211,13 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
>
> void __init xen_init_mmu_ops(void)
> {
> - x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
> x86_init.paging.pagetable_init = xen_pagetable_init;
> +
> + if (xen_feature(XENFEAT_auto_translated_physmap)) {
> + pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others;
> + return;
> + }
> + x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
> pv_mmu_ops = xen_mmu_ops;
>
> memset(dummy_mapping, 0xff, PAGE_SIZE);
> @@ -2455,6 +2493,89 @@ void __init xen_hvm_init_mmu_ops(void)
> }
> #endif
>
> +/* Map foreign gmfn, fgmfn, to local pfn, lpfn. This for the user space
> + * creating new guest on PVH dom0 and needs to map domU pages.
> + */
> +static int pvh_add_to_xen_p2m(unsigned long lpfn, unsigned long fgmfn,
> + unsigned int domid)
> +{
> + int rc;
> + struct xen_add_to_physmap xatp = { .foreign_domid = domid };
> +
> + xatp.gpfn = lpfn;
> + xatp.idx = fgmfn;
> + xatp.domid = DOMID_SELF;
> + xatp.space = XENMAPSPACE_gmfn_foreign;
> + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
> + if (rc)
> + pr_warn("d0: Failed to map pfn (0x%lx) to mfn (0x%lx) rc:%d\n",
> + lpfn, fgmfn, rc);
> + return rc;
> +}
> +
> +static int pvh_rem_xen_p2m(unsigned long spfn, int count)
> +{
> + struct xen_remove_from_physmap xrp;
> + int i, rc;
> +
> + for (i = 0; i < count; i++) {
> + xrp.domid = DOMID_SELF;
> + xrp.gpfn = spfn+i;
> + rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp);
> + if (rc) {
> + pr_warn("Failed to unmap pfn:%lx rc:%d done:%d\n",
> + spfn+i, rc, i);
> + return 1;
> + }
> + }
> + return 0;
> +}
> +
> +struct pvh_remap_data {
> + unsigned long fgmfn; /* foreign domain's gmfn */
> + pgprot_t prot;
> + domid_t domid;
> + int index;
> + struct page **pages;
> +};
> +
> +static int pvh_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
> + void *data)
> +{
> + int rc;
> + struct pvh_remap_data *remap = data;
> + unsigned long pfn = page_to_pfn(remap->pages[remap->index++]);
> + pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot));
> +
> + rc = pvh_add_to_xen_p2m(pfn, remap->fgmfn, remap->domid);
> + if (rc)
> + return rc;
> + native_set_pte(ptep, pteval);
> +
> + return 0;
> +}
> +
> +static int pvh_remap_gmfn_range(struct vm_area_struct *vma,
> + unsigned long addr, unsigned long mfn, int nr,
> + pgprot_t prot, unsigned domid,
> + struct page **pages)
> +{
> + int err;
> + struct pvh_remap_data pvhdata;
> +
> + BUG_ON(!pages);
> +
> + pvhdata.fgmfn = mfn;
> + pvhdata.prot = prot;
> + pvhdata.domid = domid;
> + pvhdata.index = 0;
> + pvhdata.pages = pages;
> + err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT,
> + pvh_map_pte_fn, &pvhdata);
> + flush_tlb_all();
> + return err;
> +}
> +
> #define REMAP_BATCH_SIZE 16
>
> struct remap_data {
> @@ -2479,7 +2600,9 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
> int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
> unsigned long addr,
> unsigned long mfn, int nr,
> - pgprot_t prot, unsigned domid)
> + pgprot_t prot, unsigned domid,
> + struct page **pages)
> +
> {
> struct remap_data rmd;
> struct mmu_update mmu_update[REMAP_BATCH_SIZE];
> @@ -2494,6 +2617,10 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
>
> BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
>
> + if (xen_feature(XENFEAT_auto_translated_physmap)) {
> + /* We need to update the local page tables and the xen HAP */
> + return pvh_remap_gmfn_range(vma, addr, mfn, nr, prot, domid, pages);
> + }
> rmd.mfn = mfn;
> rmd.prot = prot;
>
> @@ -2523,3 +2650,26 @@ out:
> return err;
> }
> EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
> +
> +/* Returns: 0 success */
> +int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
> + int numpgs, struct page **pages)
> +{
> + if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
> + return 0;
> +
> + while (numpgs--) {
> +
> + /* the mmu has already cleaned up the process mmu resources at
> + * this point (lookup_address will return NULL). */
> + unsigned long pfn = page_to_pfn(pages[numpgs]);
> +
> + pvh_rem_xen_p2m(pfn, 1);
> + }
> + /* We don't need to flush tlbs because as part of pvh_rem_xen_p2m(),
> + * the hypervisor will do tlb flushes after removing the p2m entries
> + * from the EPT/NPT */
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
> diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
> index 73809bb..6d0bb56 100644
> --- a/arch/x86/xen/mmu.h
> +++ b/arch/x86/xen/mmu.h
> @@ -23,4 +23,6 @@ unsigned long xen_read_cr2_direct(void);
>
> extern void xen_init_mmu_ops(void);
> extern void xen_hvm_init_mmu_ops(void);
> +extern void xen_set_clr_mmio_pvh_pte(unsigned long pfn, unsigned long mfn,
> + int nr_mfns, int add_mapping);
> #endif /* _XEN_MMU_H */
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index 8adb9cc..b612267 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -178,7 +178,7 @@ static int mmap_mfn_range(void *data, void *state)
> msg->va & PAGE_MASK,
> msg->mfn, msg->npages,
> vma->vm_page_prot,
> - st->domain);
> + st->domain, NULL);
> if (rc < 0)
> return rc;
>
> @@ -267,7 +267,8 @@ static int mmap_batch_fn(void *data, void *state)
> int ret;
>
> ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> - st->vma->vm_page_prot, st->domain);
> + st->vma->vm_page_prot, st->domain,
> + NULL);
>
> /* Store error code for second pass. */
> *(st->err++) = ret;
> diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
> index 6a198e4..990b43e 100644
> --- a/include/xen/xen-ops.h
> +++ b/include/xen/xen-ops.h
> @@ -27,6 +27,9 @@ struct vm_area_struct;
> int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
> unsigned long addr,
> unsigned long mfn, int nr,
> - pgprot_t prot, unsigned domid);
> + pgprot_t prot, unsigned domid,
> + struct page **pages);
> +int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
> + int numpgs, struct page **pages);
>
> #endif /* INCLUDE_XEN_OPS_H */
> --
> 1.7.7.6
>

2012-10-22 16:09:36

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [PATCH 4/6] xen/pvh: bootup and setup related changes.

On Mon, Oct 22, 2012 at 02:34:44PM +0100, Stefano Stabellini wrote:
> On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> > From: Mukesh Rathor <[email protected]>
> >
> > In enlighten.c for PVH we can trap cpuid via vmexit, so don't
> > need to use emulated prefix call. We also check for vector callback
> > early on, as it is a required feature. PVH also runs at default kernel
> > IOPL.
> >
> > In setup.c, in xen_add_extra_mem() we can skip updating P2M as it's managed
> > by Xen. PVH maps the entire IO space, but only RAM pages need to be repopulated.
> >
> > Finally, pure PV settings are moved to a separate function that is only called
> > for pure PV, ie, pv with pvmmu.
> >
> > Signed-off-by: Mukesh Rathor <[email protected]>
> > Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> >
> > diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
> > index 8971a26..8cce47b 100644
> > --- a/arch/x86/xen/setup.c
> > +++ b/arch/x86/xen/setup.c
> > @@ -27,6 +27,7 @@
> > #include <xen/interface/memory.h>
> > #include <xen/interface/physdev.h>
> > #include <xen/features.h>
> > +#include "mmu.h"
> > #include "xen-ops.h"
> > #include "vdso.h"
> >
> > @@ -78,6 +79,9 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
> >
> > memblock_reserve(start, size);
> >
> > + if (xen_feature(XENFEAT_auto_translated_physmap))
> > + return;
> > +
> > xen_max_p2m_pfn = PFN_DOWN(start + size);
> > for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
> > unsigned long mfn = pfn_to_mfn(pfn);
> > @@ -100,6 +104,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
> > .domid = DOMID_SELF
> > };
> > unsigned long len = 0;
> > + int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
> > unsigned long pfn;
> > int ret;
> >
> > @@ -113,7 +118,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
> > continue;
> > frame = mfn;
> > } else {
> > - if (mfn != INVALID_P2M_ENTRY)
> > + if (!xlated_phys && mfn != INVALID_P2M_ENTRY)
> > continue;
> > frame = pfn;
> > }
>
> Shouldn't we add a "!xlated_phys &&" to the other case as well?

No. That is handled in xen_pvh_identity_map_chunk which
just does a xen_set_clr_mmio_pvh_pte call for the "released"
pages. But that is a bit of ... well, extra logic. I think
if we did this it would work and look much nicer:


diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 8cce47b..b451a77 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -114,9 +114,15 @@ static unsigned long __init xen_do_chunk(unsigned long start,

if (release) {
/* Make sure pfn exists to start with */
- if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
+ if (mfn == INVALID_P2M_ENTRY || (!xlated_phys && (mfn_to_pfn(mfn) != pfn)))
continue;
frame = mfn;
+ /* The hypercall PHYSDEVOP_map_iomem to release memory has already
+ * happend, so we just do a nop here. */
+ if (xlated_phys) {
+ len++;
+ continue;
+ }
} else {
if (!xlated_phys && mfn != INVALID_P2M_ENTRY)
continue;
@@ -219,15 +225,24 @@ static void __init xen_set_identity_and_release_chunk(
{
unsigned long pfn;

+ /* For PVH, the pfns [0..MAX] are mapped to mfn's in the EPT/NPT. The mfns
+ * are released as part of this 1:1 mapping hypercall back to the dom heap.
+ * Also, we map the entire IO space, ie, beyond max_pfn_mapped.
+ */
+ int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
+
/*
* If the PFNs are currently mapped, the VA mapping also needs
* to be updated to be 1:1.
*/
- for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
- (void)HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- mfn_pte(pfn, PAGE_KERNEL_IO), 0);
-
+ for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
+ if (xlated_phys)
+ xen_set_clr_mmio_pvh_pte(pfn, pfn, 1 /* one pfn */, 1 /* add mapping */);
+ else
+ (void)HYPERVISOR_update_va_mapping(
+ (unsigned long)__va(pfn << PAGE_SHIFT),
+ mfn_pte(pfn, PAGE_KERNEL_IO), 0);
+ }
if (start_pfn < nr_pages)
*released += xen_release_chunk(
start_pfn, min(end_pfn, nr_pages));
@@ -235,27 +250,6 @@ static void __init xen_set_identity_and_release_chunk(
*identity += set_phys_range_identity(start_pfn, end_pfn);
}

-/* For PVH, the pfns [0..MAX] are mapped to mfn's in the EPT/NPT. The mfns
- * are released as part of this 1:1 mapping hypercall back to the dom heap.
- * Also, we map the entire IO space, ie, beyond max_pfn_mapped.
- */
-static void __init xen_pvh_identity_map_chunk(unsigned long start_pfn,
- unsigned long end_pfn, unsigned long *released,
- unsigned long *identity, unsigned long max_pfn)
-{
- unsigned long pfn;
- int numpfns = 1, add_mapping = 1;
-
- for (pfn = start_pfn; pfn < end_pfn; pfn++)
- xen_set_clr_mmio_pvh_pte(pfn, pfn, numpfns, add_mapping);
-
- if (start_pfn <= max_pfn) {
- unsigned long end = min(max_pfn_mapped, end_pfn);
- *released += end - start_pfn;
- }
- *identity += end_pfn - start_pfn;
-}
-
static unsigned long __init xen_set_identity_and_release(
const struct e820entry *list, size_t map_size, unsigned long nr_pages)
{
@@ -286,17 +280,10 @@ static unsigned long __init xen_set_identity_and_release(
if (entry->type == E820_RAM)
end_pfn = PFN_UP(entry->addr);

- if (start_pfn < end_pfn) {
- if (xlated_phys) {
- xen_pvh_identity_map_chunk(start_pfn,
- end_pfn, &released, &identity,
- nr_pages);
- } else {
- xen_set_identity_and_release_chunk(
+ if (start_pfn < end_pfn)
+ xen_set_identity_and_release_chunk(
start_pfn, end_pfn, nr_pages,
&released, &identity);
- }
- }
start = end;
}
}

2012-10-22 16:17:13

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [PATCH 5/6] xen/pvh: balloon and grant changes.

On Mon, Oct 22, 2012 at 02:25:43PM +0100, Stefano Stabellini wrote:
> On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> > From: Mukesh Rathor <[email protected]>
> >
> > For balloon changes we skip setting of local P2M as it's updated
> > in Xen. For grant, the shared grant frame is the pfn and not mfn,
> > hence its mapped via the same code path as HVM.
> >
> > Signed-off-by: Mukesh Rathor <[email protected]>
> > Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> > ---
> > drivers/xen/balloon.c | 15 +++++++++------
> > drivers/xen/gntdev.c | 3 ++-
> > drivers/xen/grant-table.c | 26 ++++++++++++++++++++++----
> > 3 files changed, 33 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
> > index 31ab82f..c825b63 100644
> > --- a/drivers/xen/balloon.c
> > +++ b/drivers/xen/balloon.c
> > @@ -361,7 +361,9 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
> > set_phys_to_machine(pfn, frame_list[i]);
> >
> > /* Link back into the page tables if not highmem. */
> > - if (xen_pv_domain() && !PageHighMem(page)) {
> > + if (xen_pv_domain() && !PageHighMem(page) &&
> > + !xen_feature(XENFEAT_auto_translated_physmap)) {

This could be done as:

if ((xen_pv_domain() && !PageHighMem(page) && !xen_feature(XENFEAT_auto_translated_physmap))

Just to make it more easier to read.

> > +
> > int ret;
> > ret = HYPERVISOR_update_va_mapping(
> > (unsigned long)__va(pfn << PAGE_SHIFT),
> > @@ -418,12 +420,13 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
> > scrub_page(page);
> >
> > if (xen_pv_domain() && !PageHighMem(page)) {
> > - ret = HYPERVISOR_update_va_mapping(
> > - (unsigned long)__va(pfn << PAGE_SHIFT),
> > - __pte_ma(0), 0);
> > - BUG_ON(ret);
> > + if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> > + ret = HYPERVISOR_update_va_mapping(
> > + (unsigned long)__va(pfn << PAGE_SHIFT),
> > + __pte_ma(0), 0);
> > + BUG_ON(ret);
> > + }
> > }
>
> this change, unlike the one before, actually makes things different for
> traditional pv domains in case PageHighMem(page).

How? He is not altering the !PageHighMem check. Just adding a check
before the hypercall to render it nop on PVH.

2012-10-22 16:40:48

by Stefano Stabellini

[permalink] [raw]
Subject: Re: [PATCH 5/6] xen/pvh: balloon and grant changes.

On Mon, 22 Oct 2012, Konrad Rzeszutek Wilk wrote:
> > > +
> > > int ret;
> > > ret = HYPERVISOR_update_va_mapping(
> > > (unsigned long)__va(pfn << PAGE_SHIFT),
> > > @@ -418,12 +420,13 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
> > > scrub_page(page);
> > >
> > > if (xen_pv_domain() && !PageHighMem(page)) {
> > > - ret = HYPERVISOR_update_va_mapping(
> > > - (unsigned long)__va(pfn << PAGE_SHIFT),
> > > - __pte_ma(0), 0);
> > > - BUG_ON(ret);
> > > + if (!xen_feature(XENFEAT_auto_translated_physmap)) {
> > > + ret = HYPERVISOR_update_va_mapping(
> > > + (unsigned long)__va(pfn << PAGE_SHIFT),
> > > + __pte_ma(0), 0);
> > > + BUG_ON(ret);
> > > + }
> > > }
> >
> > this change, unlike the one before, actually makes things different for
> > traditional pv domains in case PageHighMem(page).
>
> How? He is not altering the !PageHighMem check. Just adding a check
> before the hypercall to render it nop on PVH.

sorry, you are right, I need new glasses

2012-10-22 17:07:52

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [PATCH 2/6] xen/pvh: Extend vcpu_guest_context, p2m, event, and xenbus to support PVH.

On Mon, Oct 22, 2012 at 02:44:40PM +0100, Stefano Stabellini wrote:
> On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> > From: Mukesh Rathor <[email protected]>
> >
> > make gdt_frames[]/gdt_ents into a union with {gdtaddr, gdtsz}, as PVH
> > only needs to send down gdtaddr and gdtsz.
> >
> > For interrupts, PVH uses native_irq_ops.
> > vcpu hotplug is currently not available for PVH.
> >
> > For events we follow what PVHVM does - to use callback vector.
> > Lastly, also use HVM path to setup XenBus.
> >
> > Signed-off-by: Mukesh Rathor <[email protected]>
> > Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> > ---
> > arch/x86/include/asm/xen/interface.h | 11 +++++-
> > arch/x86/xen/irq.c | 5 ++-
> > arch/x86/xen/p2m.c | 2 +-
> > arch/x86/xen/smp.c | 75 ++++++++++++++++++++++------------
> > drivers/xen/cpu_hotplug.c | 4 +-
> > drivers/xen/events.c | 9 ++++-
> > drivers/xen/xenbus/xenbus_client.c | 3 +-
> > 7 files changed, 77 insertions(+), 32 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
> > index 6d2f75a..4c08f23 100644
> > --- a/arch/x86/include/asm/xen/interface.h
> > +++ b/arch/x86/include/asm/xen/interface.h
> > @@ -144,7 +144,16 @@ struct vcpu_guest_context {
> > struct cpu_user_regs user_regs; /* User-level CPU registers */
> > struct trap_info trap_ctxt[256]; /* Virtual IDT */
> > unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
> > - unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
> > + union {
> > + struct {
> > + /* PV: GDT (machine frames, # ents).*/
> > + unsigned long gdt_frames[16], gdt_ents;
> > + } pv;
> > + struct {
> > + /* PVH: GDTR addr and size */
> > + unsigned long gdtaddr, gdtsz;
> > + } pvh;
> > + } u;
> > unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
> > /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
> > unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */
> > diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
> > index 01a4dc0..fcbe56a 100644
> > --- a/arch/x86/xen/irq.c
> > +++ b/arch/x86/xen/irq.c
> > @@ -5,6 +5,7 @@
> > #include <xen/interface/xen.h>
> > #include <xen/interface/sched.h>
> > #include <xen/interface/vcpu.h>
> > +#include <xen/features.h>
> > #include <xen/events.h>
> >
> > #include <asm/xen/hypercall.h>
> > @@ -129,6 +130,8 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
> >
> > void __init xen_init_irq_ops(void)
> > {
> > - pv_irq_ops = xen_irq_ops;
> > + /* For PVH we use default pv_irq_ops settings */
> > + if (!xen_feature(XENFEAT_hvm_callback_vector))
> > + pv_irq_ops = xen_irq_ops;
> > x86_init.irqs.intr_init = xen_init_IRQ;
> > }
> > diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
> > index 95fb2aa..ea553c8 100644
> > --- a/arch/x86/xen/p2m.c
> > +++ b/arch/x86/xen/p2m.c
> > @@ -798,7 +798,7 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
> > {
> > unsigned topidx, mididx, idx;
> >
> > - if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
> > + if (xen_feature(XENFEAT_auto_translated_physmap)) {
> > BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
> > return true;
> > }
> > diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
> > index 353c50f..df400349 100644
> > --- a/arch/x86/xen/smp.c
> > +++ b/arch/x86/xen/smp.c
> > @@ -68,9 +68,11 @@ static void __cpuinit cpu_bringup(void)
> > touch_softlockup_watchdog();
> > preempt_disable();
> >
> > - xen_enable_sysenter();
> > - xen_enable_syscall();
> > -
> > + /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
> > + if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
> > + xen_enable_sysenter();
> > + xen_enable_syscall();
> > + }
> > cpu = smp_processor_id();
> > smp_store_cpu_info(cpu);
> > cpu_data(cpu).x86_max_cores = 1;
> > @@ -230,10 +232,11 @@ static void __init xen_smp_prepare_boot_cpu(void)
> > BUG_ON(smp_processor_id() != 0);
> > native_smp_prepare_boot_cpu();
> >
> > - /* We've switched to the "real" per-cpu gdt, so make sure the
> > - old memory can be recycled */
> > - make_lowmem_page_readwrite(xen_initial_gdt);
> > -
> > + if (!xen_feature(XENFEAT_writable_page_tables)) {
> > + /* We've switched to the "real" per-cpu gdt, so make sure the
> > + * old memory can be recycled */
> > + make_lowmem_page_readwrite(xen_initial_gdt);
> > + }
> > xen_filter_cpu_maps();
> > xen_setup_vcpu_info_placement();
> > }
> > @@ -300,8 +303,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
> > gdt = get_cpu_gdt_table(cpu);
> >
> > ctxt->flags = VGCF_IN_KERNEL;
> > - ctxt->user_regs.ds = __USER_DS;
> > - ctxt->user_regs.es = __USER_DS;
> > ctxt->user_regs.ss = __KERNEL_DS;
> > #ifdef CONFIG_X86_32
> > ctxt->user_regs.fs = __KERNEL_PERCPU;
> > @@ -310,35 +311,57 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
> > ctxt->gs_base_kernel = per_cpu_offset(cpu);
> > #endif
> > ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
> > - ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
> >
> > memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
> >
> > - xen_copy_trap_info(ctxt->trap_ctxt);
> > + /* check for autoxlated to get it right for 32bit kernel */
>
> I am not sure what this comment means, considering that in another
> comment below you say that we don't support 32bit PVH kernels.

Hm, even the V1 had this. I think he meant something else.

>
>
> > + if (xen_feature(XENFEAT_auto_translated_physmap) &&
> > + xen_feature(XENFEAT_supervisor_mode_kernel)) {
> >
> > - ctxt->ldt_ents = 0;
> > + ctxt->user_regs.ds = __KERNEL_DS;
> > + ctxt->user_regs.es = 0;
> > + ctxt->user_regs.gs = 0;
> >
> > - BUG_ON((unsigned long)gdt & ~PAGE_MASK);
> > + ctxt->u.pvh.gdtaddr = (unsigned long)gdt;
> > + ctxt->u.pvh.gdtsz = (unsigned long)(GDT_SIZE - 1);
> >
> > - gdt_mfn = arbitrary_virt_to_mfn(gdt);
> > - make_lowmem_page_readonly(gdt);
> > - make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
> > +#ifdef CONFIG_X86_64
> > + /* Note: PVH is not supported on x86_32. */
> > + ctxt->gs_base_user = (unsigned long)
> > + per_cpu(irq_stack_union.gs_base, cpu);
> > +#endif
> > + } else {
> > + ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
> > + ctxt->user_regs.ds = __USER_DS;
> > + ctxt->user_regs.es = __USER_DS;
> >
> > - ctxt->gdt_frames[0] = gdt_mfn;
> > - ctxt->gdt_ents = GDT_ENTRIES;
> > + xen_copy_trap_info(ctxt->trap_ctxt);
> >
> > - ctxt->user_regs.cs = __KERNEL_CS;
> > - ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
> > + ctxt->ldt_ents = 0;
> >
> > - ctxt->kernel_ss = __KERNEL_DS;
> > - ctxt->kernel_sp = idle->thread.sp0;
> > + BUG_ON((unsigned long)gdt & ~PAGE_MASK);
> > +
> > + gdt_mfn = arbitrary_virt_to_mfn(gdt);
> > + make_lowmem_page_readonly(gdt);
> > + make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
> > +
> > + ctxt->u.pv.gdt_frames[0] = gdt_mfn;
> > + ctxt->u.pv.gdt_ents = GDT_ENTRIES;
> > +
> > + ctxt->kernel_ss = __KERNEL_DS;
> > + ctxt->kernel_sp = idle->thread.sp0;
> >
> > #ifdef CONFIG_X86_32
> > - ctxt->event_callback_cs = __KERNEL_CS;
> > - ctxt->failsafe_callback_cs = __KERNEL_CS;
> > + ctxt->event_callback_cs = __KERNEL_CS;
> > + ctxt->failsafe_callback_cs = __KERNEL_CS;
> > #endif
> > - ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
> > - ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
> > + ctxt->event_callback_eip =
> > + (unsigned long)xen_hypervisor_callback;
> > + ctxt->failsafe_callback_eip =
> > + (unsigned long)xen_failsafe_callback;
> > + }
> > + ctxt->user_regs.cs = __KERNEL_CS;
> > + ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
> >
> > per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
> > ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
>
> The tradional path looks the same as before, however it is hard to tell
> whether the PVH path is correct without the Xen side. For example, what
> is gdtsz?

2012-10-22 18:32:06

by Mukesh Rathor

[permalink] [raw]
Subject: Re: [PATCH 2/6] xen/pvh: Extend vcpu_guest_context, p2m, event, and xenbus to support PVH.

On Mon, 22 Oct 2012 14:44:40 +0100
Stefano Stabellini <[email protected]> wrote:

> On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> > From: Mukesh Rathor <[email protected]>
> >
> > make gdt_frames[]/gdt_ents into a union with {gdtaddr, gdtsz}, as
> > PVH only needs to send down gdtaddr and gdtsz.
> >
> > For interrupts, PVH uses native_irq_ops.
> > vcpu hotplug is currently not available for PVH.
> >
> > For events we follow what PVHVM does - to use callback vector.
> > Lastly, also use HVM path to setup XenBus.
> >
> > Signed-off-by: Mukesh Rathor <[email protected]>
> > Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> > ---
> > return true;
> > }
> > - xen_copy_trap_info(ctxt->trap_ctxt);
> > + /* check for autoxlated to get it right for 32bit kernel */
>
> I am not sure what this comment means, considering that in another
> comment below you say that we don't support 32bit PVH kernels.

Function is common to both 32bit and 64bit kernels. We need to check
for auto xlated also in the if statement in addition to supervisor
mode kernel, so 32 bit doesn't go down the wrong path.

PVH is not supported for 32bit kernels, and gs_base_user doesn't exist
in the structure for 32bit so it needs to be if'def'd 64bit which is
ok because PVH is not supprted on 32bit kernel.

> > + (unsigned
> > long)xen_hypervisor_callback;
> > + ctxt->failsafe_callback_eip =
> > + (unsigned
> > long)xen_failsafe_callback;
> > + }
> > + ctxt->user_regs.cs = __KERNEL_CS;
> > + ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct
> > pt_regs);
> > per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
> > ctxt->ctrlreg[3] =
> > xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
>
> The tradional path looks the same as before, however it is hard to
> tell whether the PVH path is correct without the Xen side. For
> example, what is gdtsz?

gdtsz is GUEST_GDTR_LIMIT and gdtaddr is GUEST_GDTR_BASE in the vmcs.

thanks,
Mukesh

2012-10-22 20:27:13

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [PATCH 2/6] xen/pvh: Extend vcpu_guest_context, p2m, event, and xenbus to support PVH.

On Mon, Oct 22, 2012 at 11:31:54AM -0700, Mukesh Rathor wrote:
> On Mon, 22 Oct 2012 14:44:40 +0100
> Stefano Stabellini <[email protected]> wrote:
>
> > On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> > > From: Mukesh Rathor <[email protected]>
> > >
> > > make gdt_frames[]/gdt_ents into a union with {gdtaddr, gdtsz}, as
> > > PVH only needs to send down gdtaddr and gdtsz.
> > >
> > > For interrupts, PVH uses native_irq_ops.
> > > vcpu hotplug is currently not available for PVH.
> > >
> > > For events we follow what PVHVM does - to use callback vector.
> > > Lastly, also use HVM path to setup XenBus.
> > >
> > > Signed-off-by: Mukesh Rathor <[email protected]>
> > > Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> > > ---
> > > return true;
> > > }
> > > - xen_copy_trap_info(ctxt->trap_ctxt);
> > > + /* check for autoxlated to get it right for 32bit kernel */
> >
> > I am not sure what this comment means, considering that in another
> > comment below you say that we don't support 32bit PVH kernels.
>
> Function is common to both 32bit and 64bit kernels. We need to check
> for auto xlated also in the if statement in addition to supervisor
> mode kernel, so 32 bit doesn't go down the wrong path.

Can one just make it #ifdef CONFIG_X86_64 for the whole thing?
You are either way during bootup doing a 'BUG' when booting as 32-bit?


>
> PVH is not supported for 32bit kernels, and gs_base_user doesn't exist
> in the structure for 32bit so it needs to be if'def'd 64bit which is
> ok because PVH is not supprted on 32bit kernel.
>
> > > + (unsigned
> > > long)xen_hypervisor_callback;
> > > + ctxt->failsafe_callback_eip =
> > > + (unsigned
> > > long)xen_failsafe_callback;
> > > + }
> > > + ctxt->user_regs.cs = __KERNEL_CS;
> > > + ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct
> > > pt_regs);
> > > per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
> > > ctxt->ctrlreg[3] =
> > > xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
> >
> > The tradional path looks the same as before, however it is hard to
> > tell whether the PVH path is correct without the Xen side. For
> > example, what is gdtsz?
>
> gdtsz is GUEST_GDTR_LIMIT and gdtaddr is GUEST_GDTR_BASE in the vmcs.

looking at this I figured it could be a bit neater. So I split it in
two patches which should make it easier to read the PVH one.


>From f9455e293169d73e5698df62801bcd5fd64a5259 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <[email protected]>
Date: Mon, 22 Oct 2012 11:35:16 -0400
Subject: [PATCH 1/2] xen/smp: Move the common CPU init code a bit to prep for
PVH patch.

The PV and PVH code CPU init code share some functionality. The
PVH code ("xen/pvh: Extend vcpu_guest_context, p2m, event, and XenBus")
sets some of these up, but not all. To make it easier to read, this
patch removes the PV specific out of the generic way.

No functional change, just code move.

Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
---
arch/x86/xen/smp.c | 42 +++++++++++++++++++++++-------------------
1 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 353c50f..ba49a3a 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -300,8 +300,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
gdt = get_cpu_gdt_table(cpu);

ctxt->flags = VGCF_IN_KERNEL;
- ctxt->user_regs.ds = __USER_DS;
- ctxt->user_regs.es = __USER_DS;
ctxt->user_regs.ss = __KERNEL_DS;
#ifdef CONFIG_X86_32
ctxt->user_regs.fs = __KERNEL_PERCPU;
@@ -310,35 +308,41 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
- ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */

memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));

- xen_copy_trap_info(ctxt->trap_ctxt);
+ {
+ ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
+ ctxt->user_regs.ds = __USER_DS;
+ ctxt->user_regs.es = __USER_DS;

- ctxt->ldt_ents = 0;
+ xen_copy_trap_info(ctxt->trap_ctxt);

- BUG_ON((unsigned long)gdt & ~PAGE_MASK);
+ ctxt->ldt_ents = 0;

- gdt_mfn = arbitrary_virt_to_mfn(gdt);
- make_lowmem_page_readonly(gdt);
- make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
+ BUG_ON((unsigned long)gdt & ~PAGE_MASK);

- ctxt->gdt_frames[0] = gdt_mfn;
- ctxt->gdt_ents = GDT_ENTRIES;
+ gdt_mfn = arbitrary_virt_to_mfn(gdt);
+ make_lowmem_page_readonly(gdt);
+ make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));

- ctxt->user_regs.cs = __KERNEL_CS;
- ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
+ ctxt->u.pv.gdt_frames[0] = gdt_mfn;
+ ctxt->u.pv.gdt_ents = GDT_ENTRIES;

- ctxt->kernel_ss = __KERNEL_DS;
- ctxt->kernel_sp = idle->thread.sp0;
+ ctxt->kernel_ss = __KERNEL_DS;
+ ctxt->kernel_sp = idle->thread.sp0;

#ifdef CONFIG_X86_32
- ctxt->event_callback_cs = __KERNEL_CS;
- ctxt->failsafe_callback_cs = __KERNEL_CS;
+ ctxt->event_callback_cs = __KERNEL_CS;
+ ctxt->failsafe_callback_cs = __KERNEL_CS;
#endif
- ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
- ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
+ ctxt->event_callback_eip =
+ (unsigned long)xen_hypervisor_callback;
+ ctxt->failsafe_callback_eip =
+ (unsigned long)xen_failsafe_callback;
+ }
+ ctxt->user_regs.cs = __KERNEL_CS;
+ ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);

per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
--
1.7.7.6




>From 2c4dd7f567b229451f3dc1ae00d784da8b4a5072 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <[email protected]>
Date: Mon, 22 Oct 2012 11:37:57 -0400
Subject: [PATCH 2/2] xen/pvh: Extend vcpu_guest_context, p2m, event, and
XenBus.

Make gdt_frames[]/gdt_ents into a union with {gdtaddr, gdtsz},
as PVH only needs to send down gdtaddr and gdtsz in the
vcpu_guest_context structure..

For interrupts, PVH uses native_irq_ops so we can skip most of the
PV ones. In the future we can support the pirq_eoi_map..
Also VCPU hotplug is currently not available for PVH.

For events (and IRQs) we follow what PVHVM does - so use callback
vector. Lastly, for XenBus we use the same logic that is used in
the PVHVM case.

Signed-off-by: Mukesh Rathor <[email protected]>
[v2: Rebased it]
[v3: Move 64-bit ifdef and based on Stefan add extra comments.]
Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
---
arch/x86/include/asm/xen/interface.h | 11 +++++++++-
arch/x86/xen/irq.c | 5 +++-
arch/x86/xen/p2m.c | 2 +-
arch/x86/xen/smp.c | 36 ++++++++++++++++++++++++++-------
drivers/xen/cpu_hotplug.c | 4 ++-
drivers/xen/events.c | 9 +++++++-
drivers/xen/xenbus/xenbus_client.c | 3 +-
7 files changed, 56 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index 6d2f75a..4c08f23 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -144,7 +144,16 @@ struct vcpu_guest_context {
struct cpu_user_regs user_regs; /* User-level CPU registers */
struct trap_info trap_ctxt[256]; /* Virtual IDT */
unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
- unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
+ union {
+ struct {
+ /* PV: GDT (machine frames, # ents).*/
+ unsigned long gdt_frames[16], gdt_ents;
+ } pv;
+ struct {
+ /* PVH: GDTR addr and size */
+ unsigned long gdtaddr, gdtsz;
+ } pvh;
+ } u;
unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
/* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 01a4dc0..fcbe56a 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -5,6 +5,7 @@
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
#include <xen/interface/vcpu.h>
+#include <xen/features.h>
#include <xen/events.h>

#include <asm/xen/hypercall.h>
@@ -129,6 +130,8 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {

void __init xen_init_irq_ops(void)
{
- pv_irq_ops = xen_irq_ops;
+ /* For PVH we use default pv_irq_ops settings */
+ if (!xen_feature(XENFEAT_hvm_callback_vector))
+ pv_irq_ops = xen_irq_ops;
x86_init.irqs.intr_init = xen_init_IRQ;
}
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 95fb2aa..ea553c8 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -798,7 +798,7 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
unsigned topidx, mididx, idx;

- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
return true;
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index ba49a3a..6f831a1 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -68,9 +68,11 @@ static void __cpuinit cpu_bringup(void)
touch_softlockup_watchdog();
preempt_disable();

- xen_enable_sysenter();
- xen_enable_syscall();
-
+ /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
+ if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
+ xen_enable_sysenter();
+ xen_enable_syscall();
+ }
cpu = smp_processor_id();
smp_store_cpu_info(cpu);
cpu_data(cpu).x86_max_cores = 1;
@@ -230,10 +232,11 @@ static void __init xen_smp_prepare_boot_cpu(void)
BUG_ON(smp_processor_id() != 0);
native_smp_prepare_boot_cpu();

- /* We've switched to the "real" per-cpu gdt, so make sure the
- old memory can be recycled */
- make_lowmem_page_readwrite(xen_initial_gdt);
-
+ if (!xen_feature(XENFEAT_writable_page_tables)) {
+ /* We've switched to the "real" per-cpu gdt, so make sure the
+ * old memory can be recycled */
+ make_lowmem_page_readwrite(xen_initial_gdt);
+ }
xen_filter_cpu_maps();
xen_setup_vcpu_info_placement();
}
@@ -311,7 +314,24 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)

memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));

- {
+ /* check for autoxlated to get it right for 32bit kernel */
+ if (xen_feature(XENFEAT_auto_translated_physmap) &&
+ xen_feature(XENFEAT_supervisor_mode_kernel)) {
+#ifdef CONFIG_X86_64
+ ctxt->user_regs.ds = __KERNEL_DS;
+ ctxt->user_regs.es = 0;
+ ctxt->user_regs.gs = 0;
+
+ /* GUEST_GDTR_BASE and */
+ ctxt->u.pvh.gdtaddr = (unsigned long)gdt;
+ /* GUEST_GDTR_LIMIT in the VMCS. */
+ ctxt->u.pvh.gdtsz = (unsigned long)(GDT_SIZE - 1);
+
+ /* Note: PVH is not supported on x86_32. */
+ ctxt->gs_base_user = (unsigned long)
+ per_cpu(irq_stack_union.gs_base, cpu);
+#endif
+ } else {
ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
ctxt->user_regs.ds = __USER_DS;
ctxt->user_regs.es = __USER_DS;
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
index 4dcfced..de6bcf9 100644
--- a/drivers/xen/cpu_hotplug.c
+++ b/drivers/xen/cpu_hotplug.c
@@ -2,6 +2,7 @@

#include <xen/xen.h>
#include <xen/xenbus.h>
+#include <xen/features.h>

#include <asm/xen/hypervisor.h>
#include <asm/cpu.h>
@@ -100,7 +101,8 @@ static int __init setup_vcpu_hotplug_event(void)
static struct notifier_block xsn_cpu = {
.notifier_call = setup_cpu_watcher };

- if (!xen_pv_domain())
+ /* PVH TBD/FIXME: future work */
+ if (!xen_pv_domain() || xen_feature(XENFEAT_auto_translated_physmap))
return -ENODEV;

register_xenstore_notifier(&xsn_cpu);
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 59e10a1..7131fdd 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -1774,7 +1774,7 @@ int xen_set_callback_via(uint64_t via)
}
EXPORT_SYMBOL_GPL(xen_set_callback_via);

-#ifdef CONFIG_XEN_PVHVM
+#ifdef CONFIG_X86
/* Vector callbacks are better than PCI interrupts to receive event
* channel notifications because we can receive vector callbacks on any
* vcpu and we don't need PCI support or APIC interactions. */
@@ -1835,6 +1835,13 @@ void __init xen_init_IRQ(void)
if (xen_initial_domain())
pci_xen_initial_domain();

+ if (xen_feature(XENFEAT_hvm_callback_vector)) {
+ xen_callback_vector();
+ return;
+ }
+
+ /* PVH: TBD/FIXME: debug and fix eio map to work with pvh */
+
pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map);
rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index bcf3ba4..356461e 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -44,6 +44,7 @@
#include <xen/grant_table.h>
#include <xen/xenbus.h>
#include <xen/xen.h>
+#include <xen/features.h>

#include "xenbus_probe.h"

@@ -741,7 +742,7 @@ static const struct xenbus_ring_ops ring_ops_hvm = {

void __init xenbus_ring_ops_init(void)
{
- if (xen_pv_domain())
+ if (xen_pv_domain() && !xen_feature(XENFEAT_auto_translated_physmap))
ring_ops = &ring_ops_pv;
else
ring_ops = &ring_ops_hvm;
--
1.7.7.6

2012-10-22 20:39:43

by Mukesh Rathor

[permalink] [raw]
Subject: Re: [PATCH 2/6] xen/pvh: Extend vcpu_guest_context, p2m, event, and xenbus to support PVH.

On Mon, 22 Oct 2012 16:14:51 -0400
Konrad Rzeszutek Wilk <[email protected]> wrote:

> On Mon, Oct 22, 2012 at 11:31:54AM -0700, Mukesh Rathor wrote:
> > On Mon, 22 Oct 2012 14:44:40 +0100
> > Stefano Stabellini <[email protected]> wrote:
> >
> > > On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> > > > From: Mukesh Rathor <[email protected]>
> > > >
> > > > make gdt_frames[]/gdt_ents into a union with {gdtaddr, gdtsz},
> > > > as PVH only needs to send down gdtaddr and gdtsz.
> > > >
> > > > For interrupts, PVH uses native_irq_ops.
> > > > vcpu hotplug is currently not available for PVH.
> > > >
> > > > For events we follow what PVHVM does - to use callback vector.
> > > > Lastly, also use HVM path to setup XenBus.
> > > >
> > > > Signed-off-by: Mukesh Rathor <[email protected]>
> > > > Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> > > > ---
> > > > return true;
> > > > }
> > > > - xen_copy_trap_info(ctxt->trap_ctxt);
> > > > + /* check for autoxlated to get it right for 32bit
> > > > kernel */
> > >
> > > I am not sure what this comment means, considering that in another
> > > comment below you say that we don't support 32bit PVH kernels.
> >
> > Function is common to both 32bit and 64bit kernels. We need to
> > check for auto xlated also in the if statement in addition to
> > supervisor mode kernel, so 32 bit doesn't go down the wrong path.
>
> Can one just make it #ifdef CONFIG_X86_64 for the whole thing?
> You are either way during bootup doing a 'BUG' when booting as 32-bit?

32bit pure pv, ie, pv mmu, path. BUG() is for 32bit PVH.
Sure we could ifdef whole thing, but then we'd have to add more ifdef's
around else, closing else, etc.. I"m ok with whatever works for you.

thanks
mukesh


2012-10-23 12:26:46

by Stefano Stabellini

[permalink] [raw]
Subject: Re: [PATCH 2/6] xen/pvh: Extend vcpu_guest_context, p2m, event, and xenbus to support PVH.

On Mon, 22 Oct 2012, Konrad Rzeszutek Wilk wrote:
> On Mon, Oct 22, 2012 at 11:31:54AM -0700, Mukesh Rathor wrote:
> > On Mon, 22 Oct 2012 14:44:40 +0100
> > Stefano Stabellini <[email protected]> wrote:
> >
> > > On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> > > > From: Mukesh Rathor <[email protected]>
> > > >
> > > > make gdt_frames[]/gdt_ents into a union with {gdtaddr, gdtsz}, as
> > > > PVH only needs to send down gdtaddr and gdtsz.
> > > >
> > > > For interrupts, PVH uses native_irq_ops.
> > > > vcpu hotplug is currently not available for PVH.
> > > >
> > > > For events we follow what PVHVM does - to use callback vector.
> > > > Lastly, also use HVM path to setup XenBus.
> > > >
> > > > Signed-off-by: Mukesh Rathor <[email protected]>
> > > > Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> > > > ---
> > > > return true;
> > > > }
> > > > - xen_copy_trap_info(ctxt->trap_ctxt);
> > > > + /* check for autoxlated to get it right for 32bit kernel */
> > >
> > > I am not sure what this comment means, considering that in another
> > > comment below you say that we don't support 32bit PVH kernels.
> >
> > Function is common to both 32bit and 64bit kernels. We need to check
> > for auto xlated also in the if statement in addition to supervisor
> > mode kernel, so 32 bit doesn't go down the wrong path.
>
> Can one just make it #ifdef CONFIG_X86_64 for the whole thing?
> You are either way during bootup doing a 'BUG' when booting as 32-bit?
>
>
> >
> > PVH is not supported for 32bit kernels, and gs_base_user doesn't exist
> > in the structure for 32bit so it needs to be if'def'd 64bit which is
> > ok because PVH is not supprted on 32bit kernel.
> >
> > > > + (unsigned
> > > > long)xen_hypervisor_callback;
> > > > + ctxt->failsafe_callback_eip =
> > > > + (unsigned
> > > > long)xen_failsafe_callback;
> > > > + }
> > > > + ctxt->user_regs.cs = __KERNEL_CS;
> > > > + ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct
> > > > pt_regs);
> > > > per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
> > > > ctxt->ctrlreg[3] =
> > > > xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
> > >
> > > The tradional path looks the same as before, however it is hard to
> > > tell whether the PVH path is correct without the Xen side. For
> > > example, what is gdtsz?
> >
> > gdtsz is GUEST_GDTR_LIMIT and gdtaddr is GUEST_GDTR_BASE in the vmcs.
>
> looking at this I figured it could be a bit neater. So I split it in
> two patches which should make it easier to read the PVH one.

It is much more readable now, thanks!

You can have my ack on both of them.



> >From f9455e293169d73e5698df62801bcd5fd64a5259 Mon Sep 17 00:00:00 2001
> From: Konrad Rzeszutek Wilk <[email protected]>
> Date: Mon, 22 Oct 2012 11:35:16 -0400
> Subject: [PATCH 1/2] xen/smp: Move the common CPU init code a bit to prep for
> PVH patch.
>
> The PV and PVH code CPU init code share some functionality. The
> PVH code ("xen/pvh: Extend vcpu_guest_context, p2m, event, and XenBus")
> sets some of these up, but not all. To make it easier to read, this
> patch removes the PV specific out of the generic way.
>
> No functional change, just code move.
>
> Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> ---
> arch/x86/xen/smp.c | 42 +++++++++++++++++++++++-------------------
> 1 files changed, 23 insertions(+), 19 deletions(-)
>
> diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
> index 353c50f..ba49a3a 100644
> --- a/arch/x86/xen/smp.c
> +++ b/arch/x86/xen/smp.c
> @@ -300,8 +300,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
> gdt = get_cpu_gdt_table(cpu);
>
> ctxt->flags = VGCF_IN_KERNEL;
> - ctxt->user_regs.ds = __USER_DS;
> - ctxt->user_regs.es = __USER_DS;
> ctxt->user_regs.ss = __KERNEL_DS;
> #ifdef CONFIG_X86_32
> ctxt->user_regs.fs = __KERNEL_PERCPU;
> @@ -310,35 +308,41 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
> ctxt->gs_base_kernel = per_cpu_offset(cpu);
> #endif
> ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
> - ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
>
> memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
>
> - xen_copy_trap_info(ctxt->trap_ctxt);
> + {
> + ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
> + ctxt->user_regs.ds = __USER_DS;
> + ctxt->user_regs.es = __USER_DS;
>
> - ctxt->ldt_ents = 0;
> + xen_copy_trap_info(ctxt->trap_ctxt);
>
> - BUG_ON((unsigned long)gdt & ~PAGE_MASK);
> + ctxt->ldt_ents = 0;
>
> - gdt_mfn = arbitrary_virt_to_mfn(gdt);
> - make_lowmem_page_readonly(gdt);
> - make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
> + BUG_ON((unsigned long)gdt & ~PAGE_MASK);
>
> - ctxt->gdt_frames[0] = gdt_mfn;
> - ctxt->gdt_ents = GDT_ENTRIES;
> + gdt_mfn = arbitrary_virt_to_mfn(gdt);
> + make_lowmem_page_readonly(gdt);
> + make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
>
> - ctxt->user_regs.cs = __KERNEL_CS;
> - ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
> + ctxt->u.pv.gdt_frames[0] = gdt_mfn;
> + ctxt->u.pv.gdt_ents = GDT_ENTRIES;
>
> - ctxt->kernel_ss = __KERNEL_DS;
> - ctxt->kernel_sp = idle->thread.sp0;
> + ctxt->kernel_ss = __KERNEL_DS;
> + ctxt->kernel_sp = idle->thread.sp0;
>
> #ifdef CONFIG_X86_32
> - ctxt->event_callback_cs = __KERNEL_CS;
> - ctxt->failsafe_callback_cs = __KERNEL_CS;
> + ctxt->event_callback_cs = __KERNEL_CS;
> + ctxt->failsafe_callback_cs = __KERNEL_CS;
> #endif
> - ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback;
> - ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
> + ctxt->event_callback_eip =
> + (unsigned long)xen_hypervisor_callback;
> + ctxt->failsafe_callback_eip =
> + (unsigned long)xen_failsafe_callback;
> + }
> + ctxt->user_regs.cs = __KERNEL_CS;
> + ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
>
> per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
> ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
> --
> 1.7.7.6
>
>
>
>
> >From 2c4dd7f567b229451f3dc1ae00d784da8b4a5072 Mon Sep 17 00:00:00 2001
> From: Konrad Rzeszutek Wilk <[email protected]>
> Date: Mon, 22 Oct 2012 11:37:57 -0400
> Subject: [PATCH 2/2] xen/pvh: Extend vcpu_guest_context, p2m, event, and
> XenBus.
>
> Make gdt_frames[]/gdt_ents into a union with {gdtaddr, gdtsz},
> as PVH only needs to send down gdtaddr and gdtsz in the
> vcpu_guest_context structure..
>
> For interrupts, PVH uses native_irq_ops so we can skip most of the
> PV ones. In the future we can support the pirq_eoi_map..
> Also VCPU hotplug is currently not available for PVH.
>
> For events (and IRQs) we follow what PVHVM does - so use callback
> vector. Lastly, for XenBus we use the same logic that is used in
> the PVHVM case.
>
> Signed-off-by: Mukesh Rathor <[email protected]>
> [v2: Rebased it]
> [v3: Move 64-bit ifdef and based on Stefan add extra comments.]
> Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> ---
> arch/x86/include/asm/xen/interface.h | 11 +++++++++-
> arch/x86/xen/irq.c | 5 +++-
> arch/x86/xen/p2m.c | 2 +-
> arch/x86/xen/smp.c | 36 ++++++++++++++++++++++++++-------
> drivers/xen/cpu_hotplug.c | 4 ++-
> drivers/xen/events.c | 9 +++++++-
> drivers/xen/xenbus/xenbus_client.c | 3 +-
> 7 files changed, 56 insertions(+), 14 deletions(-)
>
> diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
> index 6d2f75a..4c08f23 100644
> --- a/arch/x86/include/asm/xen/interface.h
> +++ b/arch/x86/include/asm/xen/interface.h
> @@ -144,7 +144,16 @@ struct vcpu_guest_context {
> struct cpu_user_regs user_regs; /* User-level CPU registers */
> struct trap_info trap_ctxt[256]; /* Virtual IDT */
> unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */
> - unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
> + union {
> + struct {
> + /* PV: GDT (machine frames, # ents).*/
> + unsigned long gdt_frames[16], gdt_ents;
> + } pv;
> + struct {
> + /* PVH: GDTR addr and size */
> + unsigned long gdtaddr, gdtsz;
> + } pvh;
> + } u;
> unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */
> /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
> unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */
> diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
> index 01a4dc0..fcbe56a 100644
> --- a/arch/x86/xen/irq.c
> +++ b/arch/x86/xen/irq.c
> @@ -5,6 +5,7 @@
> #include <xen/interface/xen.h>
> #include <xen/interface/sched.h>
> #include <xen/interface/vcpu.h>
> +#include <xen/features.h>
> #include <xen/events.h>
>
> #include <asm/xen/hypercall.h>
> @@ -129,6 +130,8 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
>
> void __init xen_init_irq_ops(void)
> {
> - pv_irq_ops = xen_irq_ops;
> + /* For PVH we use default pv_irq_ops settings */
> + if (!xen_feature(XENFEAT_hvm_callback_vector))
> + pv_irq_ops = xen_irq_ops;
> x86_init.irqs.intr_init = xen_init_IRQ;
> }
> diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
> index 95fb2aa..ea553c8 100644
> --- a/arch/x86/xen/p2m.c
> +++ b/arch/x86/xen/p2m.c
> @@ -798,7 +798,7 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
> {
> unsigned topidx, mididx, idx;
>
> - if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
> + if (xen_feature(XENFEAT_auto_translated_physmap)) {
> BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
> return true;
> }
> diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
> index ba49a3a..6f831a1 100644
> --- a/arch/x86/xen/smp.c
> +++ b/arch/x86/xen/smp.c
> @@ -68,9 +68,11 @@ static void __cpuinit cpu_bringup(void)
> touch_softlockup_watchdog();
> preempt_disable();
>
> - xen_enable_sysenter();
> - xen_enable_syscall();
> -
> + /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
> + if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
> + xen_enable_sysenter();
> + xen_enable_syscall();
> + }
> cpu = smp_processor_id();
> smp_store_cpu_info(cpu);
> cpu_data(cpu).x86_max_cores = 1;
> @@ -230,10 +232,11 @@ static void __init xen_smp_prepare_boot_cpu(void)
> BUG_ON(smp_processor_id() != 0);
> native_smp_prepare_boot_cpu();
>
> - /* We've switched to the "real" per-cpu gdt, so make sure the
> - old memory can be recycled */
> - make_lowmem_page_readwrite(xen_initial_gdt);
> -
> + if (!xen_feature(XENFEAT_writable_page_tables)) {
> + /* We've switched to the "real" per-cpu gdt, so make sure the
> + * old memory can be recycled */
> + make_lowmem_page_readwrite(xen_initial_gdt);
> + }
> xen_filter_cpu_maps();
> xen_setup_vcpu_info_placement();
> }
> @@ -311,7 +314,24 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
>
> memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
>
> - {
> + /* check for autoxlated to get it right for 32bit kernel */
> + if (xen_feature(XENFEAT_auto_translated_physmap) &&
> + xen_feature(XENFEAT_supervisor_mode_kernel)) {
> +#ifdef CONFIG_X86_64
> + ctxt->user_regs.ds = __KERNEL_DS;
> + ctxt->user_regs.es = 0;
> + ctxt->user_regs.gs = 0;
> +
> + /* GUEST_GDTR_BASE and */
> + ctxt->u.pvh.gdtaddr = (unsigned long)gdt;
> + /* GUEST_GDTR_LIMIT in the VMCS. */
> + ctxt->u.pvh.gdtsz = (unsigned long)(GDT_SIZE - 1);
> +
> + /* Note: PVH is not supported on x86_32. */
> + ctxt->gs_base_user = (unsigned long)
> + per_cpu(irq_stack_union.gs_base, cpu);
> +#endif
> + } else {
> ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
> ctxt->user_regs.ds = __USER_DS;
> ctxt->user_regs.es = __USER_DS;
> diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
> index 4dcfced..de6bcf9 100644
> --- a/drivers/xen/cpu_hotplug.c
> +++ b/drivers/xen/cpu_hotplug.c
> @@ -2,6 +2,7 @@
>
> #include <xen/xen.h>
> #include <xen/xenbus.h>
> +#include <xen/features.h>
>
> #include <asm/xen/hypervisor.h>
> #include <asm/cpu.h>
> @@ -100,7 +101,8 @@ static int __init setup_vcpu_hotplug_event(void)
> static struct notifier_block xsn_cpu = {
> .notifier_call = setup_cpu_watcher };
>
> - if (!xen_pv_domain())
> + /* PVH TBD/FIXME: future work */
> + if (!xen_pv_domain() || xen_feature(XENFEAT_auto_translated_physmap))
> return -ENODEV;
>
> register_xenstore_notifier(&xsn_cpu);
> diff --git a/drivers/xen/events.c b/drivers/xen/events.c
> index 59e10a1..7131fdd 100644
> --- a/drivers/xen/events.c
> +++ b/drivers/xen/events.c
> @@ -1774,7 +1774,7 @@ int xen_set_callback_via(uint64_t via)
> }
> EXPORT_SYMBOL_GPL(xen_set_callback_via);
>
> -#ifdef CONFIG_XEN_PVHVM
> +#ifdef CONFIG_X86
> /* Vector callbacks are better than PCI interrupts to receive event
> * channel notifications because we can receive vector callbacks on any
> * vcpu and we don't need PCI support or APIC interactions. */
> @@ -1835,6 +1835,13 @@ void __init xen_init_IRQ(void)
> if (xen_initial_domain())
> pci_xen_initial_domain();
>
> + if (xen_feature(XENFEAT_hvm_callback_vector)) {
> + xen_callback_vector();
> + return;
> + }
> +
> + /* PVH: TBD/FIXME: debug and fix eio map to work with pvh */
> +
> pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
> eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map);
> rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
> diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
> index bcf3ba4..356461e 100644
> --- a/drivers/xen/xenbus/xenbus_client.c
> +++ b/drivers/xen/xenbus/xenbus_client.c
> @@ -44,6 +44,7 @@
> #include <xen/grant_table.h>
> #include <xen/xenbus.h>
> #include <xen/xen.h>
> +#include <xen/features.h>
>
> #include "xenbus_probe.h"
>
> @@ -741,7 +742,7 @@ static const struct xenbus_ring_ops ring_ops_hvm = {
>
> void __init xenbus_ring_ops_init(void)
> {
> - if (xen_pv_domain())
> + if (xen_pv_domain() && !xen_feature(XENFEAT_auto_translated_physmap))
> ring_ops = &ring_ops_pv;
> else
> ring_ops = &ring_ops_hvm;
> --
> 1.7.7.6
>

2012-10-23 13:07:37

by Stefano Stabellini

[permalink] [raw]
Subject: Re: [PATCH 4/6] xen/pvh: bootup and setup related changes.

On Mon, 22 Oct 2012, Konrad Rzeszutek Wilk wrote:
> On Mon, Oct 22, 2012 at 02:34:44PM +0100, Stefano Stabellini wrote:
> > On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> > > From: Mukesh Rathor <[email protected]>
> > >
> > > In enlighten.c for PVH we can trap cpuid via vmexit, so don't
> > > need to use emulated prefix call. We also check for vector callback
> > > early on, as it is a required feature. PVH also runs at default kernel
> > > IOPL.
> > >
> > > In setup.c, in xen_add_extra_mem() we can skip updating P2M as it's managed
> > > by Xen. PVH maps the entire IO space, but only RAM pages need to be repopulated.
> > >
> > > Finally, pure PV settings are moved to a separate function that is only called
> > > for pure PV, ie, pv with pvmmu.
> > >
> > > Signed-off-by: Mukesh Rathor <[email protected]>
> > > Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>
> > >
> > > diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
> > > index 8971a26..8cce47b 100644
> > > --- a/arch/x86/xen/setup.c
> > > +++ b/arch/x86/xen/setup.c
> > > @@ -27,6 +27,7 @@
> > > #include <xen/interface/memory.h>
> > > #include <xen/interface/physdev.h>
> > > #include <xen/features.h>
> > > +#include "mmu.h"
> > > #include "xen-ops.h"
> > > #include "vdso.h"
> > >
> > > @@ -78,6 +79,9 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
> > >
> > > memblock_reserve(start, size);
> > >
> > > + if (xen_feature(XENFEAT_auto_translated_physmap))
> > > + return;
> > > +
> > > xen_max_p2m_pfn = PFN_DOWN(start + size);
> > > for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
> > > unsigned long mfn = pfn_to_mfn(pfn);
> > > @@ -100,6 +104,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
> > > .domid = DOMID_SELF
> > > };
> > > unsigned long len = 0;
> > > + int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
> > > unsigned long pfn;
> > > int ret;
> > >
> > > @@ -113,7 +118,7 @@ static unsigned long __init xen_do_chunk(unsigned long start,
> > > continue;
> > > frame = mfn;
> > > } else {
> > > - if (mfn != INVALID_P2M_ENTRY)
> > > + if (!xlated_phys && mfn != INVALID_P2M_ENTRY)
> > > continue;
> > > frame = pfn;
> > > }
> >
> > Shouldn't we add a "!xlated_phys &&" to the other case as well?
>
> No. That is handled in xen_pvh_identity_map_chunk which
> just does a xen_set_clr_mmio_pvh_pte call for the "released"
> pages. But that is a bit of ... well, extra logic. I think
> if we did this it would work and look much nicer:

Yes, I think that this version looks better

>
> diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
> index 8cce47b..b451a77 100644
> --- a/arch/x86/xen/setup.c
> +++ b/arch/x86/xen/setup.c
> @@ -114,9 +114,15 @@ static unsigned long __init xen_do_chunk(unsigned long start,
>
> if (release) {
> /* Make sure pfn exists to start with */
> - if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn)
> + if (mfn == INVALID_P2M_ENTRY || (!xlated_phys && (mfn_to_pfn(mfn) != pfn)))
> continue;
> frame = mfn;
> + /* The hypercall PHYSDEVOP_map_iomem to release memory has already
> + * happend, so we just do a nop here. */
> + if (xlated_phys) {
> + len++;
> + continue;
> + }
> } else {
> if (!xlated_phys && mfn != INVALID_P2M_ENTRY)
> continue;
> @@ -219,15 +225,24 @@ static void __init xen_set_identity_and_release_chunk(
> {
> unsigned long pfn;
>
> + /* For PVH, the pfns [0..MAX] are mapped to mfn's in the EPT/NPT. The mfns
> + * are released as part of this 1:1 mapping hypercall back to the dom heap.
> + * Also, we map the entire IO space, ie, beyond max_pfn_mapped.
> + */
> + int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap);
> +
> /*
> * If the PFNs are currently mapped, the VA mapping also needs
> * to be updated to be 1:1.
> */
> - for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
> - (void)HYPERVISOR_update_va_mapping(
> - (unsigned long)__va(pfn << PAGE_SHIFT),
> - mfn_pte(pfn, PAGE_KERNEL_IO), 0);
> -
> + for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
> + if (xlated_phys)
> + xen_set_clr_mmio_pvh_pte(pfn, pfn, 1 /* one pfn */, 1 /* add mapping */);
> + else
> + (void)HYPERVISOR_update_va_mapping(
> + (unsigned long)__va(pfn << PAGE_SHIFT),
> + mfn_pte(pfn, PAGE_KERNEL_IO), 0);
> + }
> if (start_pfn < nr_pages)
> *released += xen_release_chunk(
> start_pfn, min(end_pfn, nr_pages));
> @@ -235,27 +250,6 @@ static void __init xen_set_identity_and_release_chunk(
> *identity += set_phys_range_identity(start_pfn, end_pfn);
> }
>
> -/* For PVH, the pfns [0..MAX] are mapped to mfn's in the EPT/NPT. The mfns
> - * are released as part of this 1:1 mapping hypercall back to the dom heap.
> - * Also, we map the entire IO space, ie, beyond max_pfn_mapped.
> - */
> -static void __init xen_pvh_identity_map_chunk(unsigned long start_pfn,
> - unsigned long end_pfn, unsigned long *released,
> - unsigned long *identity, unsigned long max_pfn)
> -{
> - unsigned long pfn;
> - int numpfns = 1, add_mapping = 1;
> -
> - for (pfn = start_pfn; pfn < end_pfn; pfn++)
> - xen_set_clr_mmio_pvh_pte(pfn, pfn, numpfns, add_mapping);
> -
> - if (start_pfn <= max_pfn) {
> - unsigned long end = min(max_pfn_mapped, end_pfn);
> - *released += end - start_pfn;
> - }
> - *identity += end_pfn - start_pfn;
> -}
> -
> static unsigned long __init xen_set_identity_and_release(
> const struct e820entry *list, size_t map_size, unsigned long nr_pages)
> {
> @@ -286,17 +280,10 @@ static unsigned long __init xen_set_identity_and_release(
> if (entry->type == E820_RAM)
> end_pfn = PFN_UP(entry->addr);
>
> - if (start_pfn < end_pfn) {
> - if (xlated_phys) {
> - xen_pvh_identity_map_chunk(start_pfn,
> - end_pfn, &released, &identity,
> - nr_pages);
> - } else {
> - xen_set_identity_and_release_chunk(
> + if (start_pfn < end_pfn)
> + xen_set_identity_and_release_chunk(
> start_pfn, end_pfn, nr_pages,
> &released, &identity);
> - }
> - }
> start = end;
> }
> }
>

2012-10-23 14:20:13

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [PATCH 1/6] xen/pvh: Support ParaVirtualized Hardware extensions.

> +/*
> + * Unmaps the page appearing at a particular GPFN from the specified guest's
> + * pseudophysical address space.
> + * arg == addr of xen_remove_from_physmap_t.
> + */
> +#define XENMEM_remove_from_physmap 15
> +struct xen_remove_from_physmap {
> + /* Which domain to change the mapping for. */
> + domid_t domid;
> +
> + /* GPFN of the current mapping of the page. */
> + xen_pfn_t gpfn;
> +};

I just realized that this a bit of unfortunate. We end up
with on 64-bit with this layout:

{
0->1 [domid]
2->7 [nothing]
8->16 [gpfn]

}

which if one were to do a 32-bit build you would get:
{
0->1 [domid]
2->3 [nothing]
4->7 [gpfn]
}
which means another compat layer in Xen.

So I think it makes sense to modify this to be 32 and 64-bit
clean, something like this:

{
domid_t domid;
u8 pad[6];
xen_pfn_t gpfn;
/* xen_pfn_t under 32-bit x86 is 4 bytes, so extend it */
#ifdef CONFIG_X86_32
__u8 pad1[4];
#endif
}

that way the structure is exactly the same size and the offsets
align.

Mukesh, you OK with that?

2012-10-23 14:58:32

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [PATCH 1/6] xen/pvh: Support ParaVirtualized Hardware extensions.

> diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
> index b66d04c..8beebdb 100644
> --- a/include/xen/interface/memory.h
> +++ b/include/xen/interface/memory.h
> @@ -169,7 +169,13 @@ struct xen_add_to_physmap {
> /* Source mapping space. */
> #define XENMAPSPACE_shared_info 0 /* shared info page */
> #define XENMAPSPACE_grant_table 1 /* grant table page */
> - unsigned int space;
> +#define XENMAPSPACE_gmfn 2 /* GMFN */
> +#define XENMAPSPACE_gmfn_range 3 /* GMFN range */
> +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another guest */
> + uint16_t space;
> + domid_t foreign_domid; /* IFF XENMAPSPACE_gmfn_foreign */
> +
> +#define XENMAPIDX_grant_table_status 0x80000000
>
> /* Index into source mapping space. */
> xen_ulong_t idx;

And this breaks 32-bit PVHVM :-(

The reason being that in xen_hvm_init_shared_info we allocate the
structure on the stack, and do not clear the foreign_domid to zero.

This means we can (and do get) for the argument space, something
like this:
xatp.space == 0xffff0001;
instead of
xatp.space = 0x1;

This fixes it:

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e3497f2..b679f86 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1509,6 +1509,7 @@ void __ref xen_hvm_init_shared_info(void)
xatp.domid = DOMID_SELF;
xatp.idx = 0;
xatp.space = XENMAPSPACE_shared_info;
+ xatp.foreign_domid = 0;
xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
BUG();
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index b2b0a37..cbfd929 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -1044,6 +1044,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
do {
xatp.domid = DOMID_SELF;
xatp.idx = i;
+ xatp.foreign_domid = 0;
xatp.space = XENMAPSPACE_grant_table;
xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);

2012-10-24 00:03:22

by Mukesh Rathor

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH 4/6] xen/pvh: bootup and setup related changes.

On Tue, 23 Oct 2012 16:47:29 -0700
Mukesh Rathor <[email protected]> wrote:

> On Tue, 23 Oct 2012 14:07:06 +0100
> Stefano Stabellini <[email protected]> wrote:
>
> > On Mon, 22 Oct 2012, Konrad Rzeszutek Wilk wrote:
> > > On Mon, Oct 22, 2012 at 02:34:44PM +0100, Stefano Stabellini
> > > wrote:
> > > > On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> > > > > From: Mukesh Rathor <[email protected]>
> > > > >
> > > > > for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn;
> > > > > pfn++) { unsigned long mfn = pfn_to_mfn(pfn);
> > > > > @@ -100,6 +104,7 @@ static unsigned long __init
> > > > > xen_do_chunk(unsigned long start, .domid = DOMID_SELF
> > > > > };
> > > > > unsigned long len = 0;
> > > > > + int xlated_phys =
> > > > > xen_feature(XENFEAT_auto_translated_physmap); unsigned long
> > > > > pfn; int ret;
> > > > >
> > > > > @@ -113,7 +118,7 @@ static unsigned long __init
> > > > > xen_do_chunk(unsigned long start, continue;
> > > > > frame = mfn;
> > > > > } else {
> > > > > - if (mfn != INVALID_P2M_ENTRY)
> > > > > + if (!xlated_phys && mfn !=
> > > > > INVALID_P2M_ENTRY) continue;
> > > > > frame = pfn;
> > > > > }
> > > >
> > > > Shouldn't we add a "!xlated_phys &&" to the other case as well?
> > >
> > > No. That is handled in xen_pvh_identity_map_chunk which
> > > just does a xen_set_clr_mmio_pvh_pte call for the "released"
> > > pages. But that is a bit of ... well, extra logic. I think
> > > if we did this it would work and look much nicer:
> >
> > Yes, I think that this version looks better
>
> But doesn't boot:
>
> (XEN) vmx_hybrid.c:710:d0 Dom:0 EPT violation 0x181 (r--/---), gpa
> 0x000000bf421e1c, mfn 0xffffffffffffffff, type 4. (XEN)
> p2m-ept.c:642:d0 Walking EPT tables for domain 0 gfn bf421 (XEN)
> p2m-ept.c:648:d0 gfn exceeds max_mapped_pfn 4b062 (XEN)
> vmx_hybrid.c:717:d0 --- GLA 0xffffffffff477e1c
>
>
> I'll have to debug it, or we can go back to the prev version, which
> I don't think is that un-pretty :).
>

The reason being:
xen_set_identity_and_release_chunk():
NEW : > for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {

xen_pvh_identity_map_chunk():
OLD: for (pfn = start_pfn; pfn < end_pfn; pfn++)

IOW, for PVH we need to avoid testing for max_pfn_mapped, as we are
mapping the entire IO space.

thanks
mukesh

2012-10-24 00:11:04

by Mukesh Rathor

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH 4/6] xen/pvh: bootup and setup related changes.

On Tue, 23 Oct 2012 17:03:10 -0700
Mukesh Rathor <[email protected]> wrote:

> On Tue, 23 Oct 2012 16:47:29 -0700
> Mukesh Rathor <[email protected]> wrote:
>
> > On Tue, 23 Oct 2012 14:07:06 +0100
> > Stefano Stabellini <[email protected]> wrote:
> >
> > > On Mon, 22 Oct 2012, Konrad Rzeszutek Wilk wrote:
> > > > On Mon, Oct 22, 2012 at 02:34:44PM +0100, Stefano Stabellini
> > > > wrote:
> > > > > On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> > > > > > From: Mukesh Rathor <[email protected]>
> > > > > >
> > > > > > for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn;
> > > > > > pfn++) { unsigned long mfn = pfn_to_mfn(pfn);
> > > > > > @@ -100,6 +104,7 @@ static unsigned long __init
> > > > > > xen_do_chunk(unsigned long start, .domid = DOMID_SELF
> > > > > > };
> > > > > > unsigned long len = 0;
> > > > > > + int xlated_phys =
> > > > > > xen_feature(XENFEAT_auto_translated_physmap); unsigned long
> > > > > > pfn; int ret;
> > > > > >
> > > > > > @@ -113,7 +118,7 @@ static unsigned long __init
> > > > > > xen_do_chunk(unsigned long start, continue;
> > > > > > frame = mfn;
> > > > > > } else {
> > > > > > - if (mfn != INVALID_P2M_ENTRY)
> > > > > > + if (!xlated_phys && mfn !=
> > > > > > INVALID_P2M_ENTRY) continue;
> > > > > > frame = pfn;
> > > > > > }
> > > > >
> > > > > Shouldn't we add a "!xlated_phys &&" to the other case as
> > > > > well?
> > > >
> > > > No. That is handled in xen_pvh_identity_map_chunk which
> > > > just does a xen_set_clr_mmio_pvh_pte call for the "released"
> > > > pages. But that is a bit of ... well, extra logic. I think
> > > > if we did this it would work and look much nicer:
> > >
> > > Yes, I think that this version looks better
> >
> > But doesn't boot:
> >
> > (XEN) vmx_hybrid.c:710:d0 Dom:0 EPT violation 0x181 (r--/---), gpa
> > 0x000000bf421e1c, mfn 0xffffffffffffffff, type 4. (XEN)
> > p2m-ept.c:642:d0 Walking EPT tables for domain 0 gfn bf421 (XEN)
> > p2m-ept.c:648:d0 gfn exceeds max_mapped_pfn 4b062 (XEN)
> > vmx_hybrid.c:717:d0 --- GLA 0xffffffffff477e1c
> >
> >
> > I'll have to debug it, or we can go back to the prev version, which
> > I don't think is that un-pretty :).
> >
>
> The reason being:
> xen_set_identity_and_release_chunk():
> NEW : > for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn;
> pfn++) {
>
> xen_pvh_identity_map_chunk():
> OLD: for (pfn = start_pfn; pfn < end_pfn; pfn++)
>
> IOW, for PVH we need to avoid testing for max_pfn_mapped, as we are
> mapping the entire IO space.

Also, now your counts for released and identity are off. Can we please
go back to the way it was?

thanks
Mukesh

2012-10-24 01:37:03

by Mukesh Rathor

[permalink] [raw]
Subject: Re: [PATCH 4/6] xen/pvh: bootup and setup related changes.

On Tue, 23 Oct 2012 14:07:06 +0100
Stefano Stabellini <[email protected]> wrote:

> On Mon, 22 Oct 2012, Konrad Rzeszutek Wilk wrote:
> > On Mon, Oct 22, 2012 at 02:34:44PM +0100, Stefano Stabellini wrote:
> > > On Sat, 20 Oct 2012, Konrad Rzeszutek Wilk wrote:
> > > > From: Mukesh Rathor <[email protected]>
> > > >
> > > > for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn;
> > > > pfn++) { unsigned long mfn = pfn_to_mfn(pfn);
> > > > @@ -100,6 +104,7 @@ static unsigned long __init
> > > > xen_do_chunk(unsigned long start, .domid = DOMID_SELF
> > > > };
> > > > unsigned long len = 0;
> > > > + int xlated_phys =
> > > > xen_feature(XENFEAT_auto_translated_physmap); unsigned long pfn;
> > > > int ret;
> > > >
> > > > @@ -113,7 +118,7 @@ static unsigned long __init
> > > > xen_do_chunk(unsigned long start, continue;
> > > > frame = mfn;
> > > > } else {
> > > > - if (mfn != INVALID_P2M_ENTRY)
> > > > + if (!xlated_phys && mfn !=
> > > > INVALID_P2M_ENTRY) continue;
> > > > frame = pfn;
> > > > }
> > >
> > > Shouldn't we add a "!xlated_phys &&" to the other case as well?
> >
> > No. That is handled in xen_pvh_identity_map_chunk which
> > just does a xen_set_clr_mmio_pvh_pte call for the "released"
> > pages. But that is a bit of ... well, extra logic. I think
> > if we did this it would work and look much nicer:
>
> Yes, I think that this version looks better

But doesn't boot:

(XEN) vmx_hybrid.c:710:d0 Dom:0 EPT violation 0x181 (r--/---), gpa 0x000000bf421e1c, mfn 0xffffffffffffffff, type 4.
(XEN) p2m-ept.c:642:d0 Walking EPT tables for domain 0 gfn bf421
(XEN) p2m-ept.c:648:d0 gfn exceeds max_mapped_pfn 4b062
(XEN) vmx_hybrid.c:717:d0 --- GLA 0xffffffffff477e1c


I'll have to debug it, or we can go back to the prev version, which
I don't think is that un-pretty :).

Mukesh

2012-10-24 09:27:57

by Ian Campbell

[permalink] [raw]
Subject: Re: [PATCH 1/6] xen/pvh: Support ParaVirtualized Hardware extensions.

On Tue, 2012-10-23 at 15:46 +0100, Konrad Rzeszutek Wilk wrote:
> > diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
> > index b66d04c..8beebdb 100644
> > --- a/include/xen/interface/memory.h
> > +++ b/include/xen/interface/memory.h
> > @@ -169,7 +169,13 @@ struct xen_add_to_physmap {
> > /* Source mapping space. */
> > #define XENMAPSPACE_shared_info 0 /* shared info page */
> > #define XENMAPSPACE_grant_table 1 /* grant table page */
> > - unsigned int space;
> > +#define XENMAPSPACE_gmfn 2 /* GMFN */
> > +#define XENMAPSPACE_gmfn_range 3 /* GMFN range */
> > +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another guest */
> > + uint16_t space;
> > + domid_t foreign_domid; /* IFF XENMAPSPACE_gmfn_foreign */
> > +
> > +#define XENMAPIDX_grant_table_status 0x80000000
> >
> > /* Index into source mapping space. */
> > xen_ulong_t idx;
>
> And this breaks 32-bit PVHVM :-(
>
> The reason being that in xen_hvm_init_shared_info we allocate the
> structure on the stack, and do not clear the foreign_domid to zero.
>
> This means we can (and do get) for the argument space, something
> like this:
> xatp.space == 0xffff0001;
> instead of
> xatp.space = 0x1;

We should be reverting this patch and using XENMEM_add_to_physmap range
instead, see my "xen: x86 pvh: use XENMEM_add_to_physmap_range for
foreign gmfn mappings" from last week.

Mukesh wanted to go with this original version in the interim so
probably fixing it as you have makes sense anyhow.

>
> This fixes it:
>
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index e3497f2..b679f86 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -1509,6 +1509,7 @@ void __ref xen_hvm_init_shared_info(void)
> xatp.domid = DOMID_SELF;
> xatp.idx = 0;
> xatp.space = XENMAPSPACE_shared_info;
> + xatp.foreign_domid = 0;
> xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
> if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
> BUG();
> diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
> index b2b0a37..cbfd929 100644
> --- a/drivers/xen/grant-table.c
> +++ b/drivers/xen/grant-table.c
> @@ -1044,6 +1044,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
> do {
> xatp.domid = DOMID_SELF;
> xatp.idx = i;
> + xatp.foreign_domid = 0;
> xatp.space = XENMAPSPACE_grant_table;
> xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
> rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);

2012-10-24 09:29:48

by Ian Campbell

[permalink] [raw]
Subject: Re: [PATCH 1/6] xen/pvh: Support ParaVirtualized Hardware extensions.

On Tue, 2012-10-23 at 15:07 +0100, Konrad Rzeszutek Wilk wrote:
> > +/*
> > + * Unmaps the page appearing at a particular GPFN from the specified guest's
> > + * pseudophysical address space.
> > + * arg == addr of xen_remove_from_physmap_t.
> > + */
> > +#define XENMEM_remove_from_physmap 15
> > +struct xen_remove_from_physmap {
> > + /* Which domain to change the mapping for. */
> > + domid_t domid;
> > +
> > + /* GPFN of the current mapping of the page. */
> > + xen_pfn_t gpfn;
> > +};
>
> I just realized that this a bit of unfortunate. We end up
> with on 64-bit with this layout:
>
> {
> 0->1 [domid]
> 2->7 [nothing]
> 8->16 [gpfn]
>
> }
>
> which if one were to do a 32-bit build you would get:
> {
> 0->1 [domid]
> 2->3 [nothing]
> 4->7 [gpfn]
> }
> which means another compat layer in Xen.

A relatively simple one, but yes.

> So I think it makes sense to modify this to be 32 and 64-bit
> clean, something like this:

That works.
We could just swap the domid and gpfn members around, although that
ordering reads a bit unnaturally.

>
> {
> domid_t domid;
> u8 pad[6];
> xen_pfn_t gpfn;
> /* xen_pfn_t under 32-bit x86 is 4 bytes, so extend it */
> #ifdef CONFIG_X86_32
> __u8 pad1[4];
> #endif
> }
>
> that way the structure is exactly the same size and the offsets
> align.
>
> Mukesh, you OK with that?

2012-10-25 12:39:06

by Konrad Rzeszutek Wilk

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH 4/6] xen/pvh: bootup and setup related changes.

> > > > Yes, I think that this version looks better
> > >
> > > But doesn't boot:
> > >
> > > (XEN) vmx_hybrid.c:710:d0 Dom:0 EPT violation 0x181 (r--/---), gpa
> > > 0x000000bf421e1c, mfn 0xffffffffffffffff, type 4. (XEN)
> > > p2m-ept.c:642:d0 Walking EPT tables for domain 0 gfn bf421 (XEN)
> > > p2m-ept.c:648:d0 gfn exceeds max_mapped_pfn 4b062 (XEN)
> > > vmx_hybrid.c:717:d0 --- GLA 0xffffffffff477e1c
> > >
> > >
> > > I'll have to debug it, or we can go back to the prev version, which
> > > I don't think is that un-pretty :).
> > >
> >
> > The reason being:
> > xen_set_identity_and_release_chunk():
> > NEW : > for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn;
> > pfn++) {
> >
> > xen_pvh_identity_map_chunk():
> > OLD: for (pfn = start_pfn; pfn < end_pfn; pfn++)
> >
> > IOW, for PVH we need to avoid testing for max_pfn_mapped, as we are
> > mapping the entire IO space.
>
> Also, now your counts for released and identity are off. Can we please
> go back to the way it was?

Lets drop this patch then for right now. Can you post an alpha-RFC of your
Xen hypervisor patches so that I can wrangle the PV and PVH in the E820
code to use same paths as much as possible.