Hi, Keir & Jeremy
This patchset enabled Xen Hybrid extension support.
As we know that PV guest have performance issue with x86_64 that guest kernel
and userspace resistent in the same ring, then the necessary TLB flushes when
switch between guest userspace and guest kernel cause overhead, and much more
syscall overhead is also introduced. The Hybrid Extension estimated these
overhead by putting guest kernel back in (non-root) ring0 then achieve the better
performance than PV guest.
The Hybrid Extension is started from real mode like HVM guest, but also with a
component based PV feature selection(e.g. PV halt, PV timer, event channel,
then PV drivers). So guest with Hybrid extension feature can takes the
advantages of both H/W virtualization and Para-Virtualization.
The first two of the patchset imported several header file from Jeremy's tree
and Xen tree.
The whole patchset based on Linux 2.6.30, and Xen 3.4.1.
Please give comments. Thanks!
--
regards
Yang, Sheng
From: Jeremy Fitzhardinge <[email protected]>
Add support for hvm_op hypercall.
Signed-off-by: Jeremy Fitzhardinge <[email protected]>
Signed-off-by: Sheng Yang <[email protected]>
---
arch/x86/include/asm/xen/hypercall.h | 6 ++
include/xen/hvm.h | 23 +++++++
include/xen/interface/hvm/hvm_op.h | 72 ++++++++++++++++++++++
include/xen/interface/hvm/params.h | 111 ++++++++++++++++++++++++++++++++++
4 files changed, 212 insertions(+), 0 deletions(-)
create mode 100644 include/xen/hvm.h
create mode 100644 include/xen/interface/hvm/hvm_op.h
create mode 100644 include/xen/interface/hvm/params.h
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 9c371e4..47c2ebb 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -417,6 +417,12 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg)
return _hypercall2(int, nmi_op, op, arg);
}
+static inline unsigned long __must_check
+HYPERVISOR_hvm_op(int op, void *arg)
+{
+ return _hypercall2(unsigned long, hvm_op, op, arg);
+}
+
static inline void
MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
{
diff --git a/include/xen/hvm.h b/include/xen/hvm.h
new file mode 100644
index 0000000..4ea8887
--- /dev/null
+++ b/include/xen/hvm.h
@@ -0,0 +1,23 @@
+/* Simple wrappers around HVM functions */
+#ifndef XEN_HVM_H__
+#define XEN_HVM_H__
+
+#include <xen/interface/hvm/params.h>
+
+static inline unsigned long hvm_get_parameter(int idx)
+{
+ struct xen_hvm_param xhv;
+ int r;
+
+ xhv.domid = DOMID_SELF;
+ xhv.index = idx;
+ r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
+ if (r < 0) {
+ printk(KERN_ERR "cannot get hvm parameter %d: %d.\n",
+ idx, r);
+ return 0;
+ }
+ return xhv.value;
+}
+
+#endif /* XEN_HVM_H__ */
diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h
new file mode 100644
index 0000000..7c74ba4
--- /dev/null
+++ b/include/xen/interface/hvm/hvm_op.h
@@ -0,0 +1,72 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
+#define __XEN_PUBLIC_HVM_HVM_OP_H__
+
+/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */
+#define HVMOP_set_param 0
+#define HVMOP_get_param 1
+struct xen_hvm_param {
+ domid_t domid; /* IN */
+ uint32_t index; /* IN */
+ uint64_t value; /* IN/OUT */
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param);
+
+/* Set the logical level of one of a domain's PCI INTx wires. */
+#define HVMOP_set_pci_intx_level 2
+struct xen_hvm_set_pci_intx_level {
+ /* Domain to be updated. */
+ domid_t domid;
+ /* PCI INTx identification in PCI topology (domain:bus:device:intx). */
+ uint8_t domain, bus, device, intx;
+ /* Assertion level (0 = unasserted, 1 = asserted). */
+ uint8_t level;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_pci_intx_level);
+
+/* Set the logical level of one of a domain's ISA IRQ wires. */
+#define HVMOP_set_isa_irq_level 3
+struct xen_hvm_set_isa_irq_level {
+ /* Domain to be updated. */
+ domid_t domid;
+ /* ISA device identification, by ISA IRQ (0-15). */
+ uint8_t isa_irq;
+ /* Assertion level (0 = unasserted, 1 = asserted). */
+ uint8_t level;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_isa_irq_level);
+
+#define HVMOP_set_pci_link_route 4
+struct xen_hvm_set_pci_link_route {
+ /* Domain to be updated. */
+ domid_t domid;
+ /* PCI link identifier (0-3). */
+ uint8_t link;
+ /* ISA IRQ (1-15), or 0 (disable link). */
+ uint8_t isa_irq;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_pci_link_route);
+
+/* Flushes all VCPU TLBs: @arg must be NULL. */
+#define HVMOP_flush_tlbs 5
+
+#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h
new file mode 100644
index 0000000..15d828f
--- /dev/null
+++ b/include/xen/interface/hvm/params.h
@@ -0,0 +1,111 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_PARAMS_H__
+#define __XEN_PUBLIC_HVM_PARAMS_H__
+
+#include "hvm_op.h"
+
+/*
+ * Parameter space for HVMOP_{set,get}_param.
+ */
+
+/*
+ * How should CPU0 event-channel notifications be delivered?
+ * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt).
+ * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows:
+ * Domain = val[47:32], Bus = val[31:16],
+ * DevFn = val[15: 8], IntX = val[ 1: 0]
+ * If val == 0 then CPU0 event-channel notifications are not delivered.
+ */
+#define HVM_PARAM_CALLBACK_IRQ 0
+
+/*
+ * These are not used by Xen. They are here for convenience of HVM-guest
+ * xenbus implementations.
+ */
+#define HVM_PARAM_STORE_PFN 1
+#define HVM_PARAM_STORE_EVTCHN 2
+
+#define HVM_PARAM_PAE_ENABLED 4
+
+#define HVM_PARAM_IOREQ_PFN 5
+
+#define HVM_PARAM_BUFIOREQ_PFN 6
+
+#ifdef __ia64__
+
+#define HVM_PARAM_NVRAM_FD 7
+#define HVM_PARAM_VHPT_SIZE 8
+#define HVM_PARAM_BUFPIOREQ_PFN 9
+
+#elif defined(__i386__) || defined(__x86_64__)
+
+/* Expose Viridian interfaces to this HVM guest? */
+#define HVM_PARAM_VIRIDIAN 9
+
+#endif
+
+/*
+ * Set mode for virtual timers (currently x86 only):
+ * delay_for_missed_ticks (default):
+ * Do not advance a vcpu's time beyond the correct delivery time for
+ * interrupts that have been missed due to preemption. Deliver missed
+ * interrupts when the vcpu is rescheduled and advance the vcpu's virtual
+ * time stepwise for each one.
+ * no_delay_for_missed_ticks:
+ * As above, missed interrupts are delivered, but guest time always tracks
+ * wallclock (i.e., real) time while doing so.
+ * no_missed_ticks_pending:
+ * No missed interrupts are held pending. Instead, to ensure ticks are
+ * delivered at some non-zero rate, if we detect missed ticks then the
+ * internal tick alarm is not disabled if the VCPU is preempted during the
+ * next tick period.
+ * one_missed_tick_pending:
+ * Missed interrupts are collapsed together and delivered as one 'late tick'.
+ * Guest time always tracks wallclock (i.e., real) time.
+ */
+#define HVM_PARAM_TIMER_MODE 10
+#define HVMPTM_delay_for_missed_ticks 0
+#define HVMPTM_no_delay_for_missed_ticks 1
+#define HVMPTM_no_missed_ticks_pending 2
+#define HVMPTM_one_missed_tick_pending 3
+
+/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
+#define HVM_PARAM_HPET_ENABLED 11
+
+/* Identity-map page directory used by Intel EPT when CR0.PG=0. */
+#define HVM_PARAM_IDENT_PT 12
+
+/* Device Model domain, defaults to 0. */
+#define HVM_PARAM_DM_DOMAIN 13
+
+/* ACPI S state: currently support S0 and S3 on x86. */
+#define HVM_PARAM_ACPI_S_STATE 14
+
+/* TSS used on Intel when CR0.PE=0. */
+#define HVM_PARAM_VM86_TSS 15
+
+/* Boolean: Enable aligning all periodic vpts to reduce interrupts */
+#define HVM_PARAM_VPT_ALIGN 16
+
+#define HVM_NR_PARAMS 17
+
+#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
--
1.5.4.5
Which would be used by CPUID detection later
Signed-off-by: Sheng Yang <[email protected]>
---
arch/x86/include/asm/xen/cpuid.h | 68 ++++++++++++++++++++++++++++++++++++++
1 files changed, 68 insertions(+), 0 deletions(-)
create mode 100644 arch/x86/include/asm/xen/cpuid.h
diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h
new file mode 100644
index 0000000..8787f03
--- /dev/null
+++ b/arch/x86/include/asm/xen/cpuid.h
@@ -0,0 +1,68 @@
+/******************************************************************************
+ * arch/include/asm/xen/cpuid.h
+ *
+ * CPUID interface to Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2007 Citrix Systems, Inc.
+ *
+ * Authors:
+ * Keir Fraser <[email protected]>
+ */
+
+#ifndef __ASM_X86_XEN_CPUID_H__
+#define __ASM_X86_XEN_CPUID_H__
+
+/* Xen identification leaves start at 0x40000000. */
+#define XEN_CPUID_FIRST_LEAF 0x40000000
+#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i))
+
+/*
+ * Leaf 1 (0x40000000)
+ * EAX: Largest Xen-information leaf. All leaves up to an including @EAX
+ * are supported by the Xen host.
+ * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification
+ * of a Xen host.
+ */
+#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */
+#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */
+#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */
+
+/*
+ * Leaf 2 (0x40000001)
+ * EAX[31:16]: Xen major version.
+ * EAX[15: 0]: Xen minor version.
+ * EBX-EDX: Reserved (currently all zeroes).
+ */
+
+/*
+ * Leaf 3 (0x40000002)
+ * EAX: Number of hypercall transfer pages. This register is always guaranteed
+ * to specify one hypercall page.
+ * EBX: Base address of Xen-specific MSRs.
+ * ECX: Features 1. Unused bits are set to zero.
+ * EDX: Features 2. Unused bits are set to zero.
+ */
+
+/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */
+#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
+#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0)
+
+#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */
--
1.5.4.5
As we know that PV guest have performance issue with x86_64 that guest kernel
and userspace resistent in the same ring, then the necessary TLB flushes when
switch between guest userspace and guest kernel cause overhead, and much more
syscall overhead is also introduced. The Hybrid Extension estimated these
overhead by putting guest kernel back in (non-root) ring0 then achieve the
better performance than PV guest.
The Hybrid Extension is started from real mode like HVM guest, but also with a
component based PV feature selection(e.g. PV halt, PV timer, event channel,
then PV drivers). So guest with Hybrid extension feature can takes the
advantages of both H/W virtualization and Para-Virtualization.
This patch introduced the Hybrid Extension guest initialization.
Guest would detect Hybrid capability using CPUID 0x40000002.edx, then call
HVMOP_enable_hybrid hypercall to enable hybrid support in hypervisor.
Signed-off-by: Sheng Yang <[email protected]>
Signed-off-by: Yaozu (Eddie) Dong <[email protected]>
---
arch/x86/include/asm/xen/cpuid.h | 7 +++
arch/x86/include/asm/xen/hypervisor.h | 12 +++++
arch/x86/kernel/paravirt.c | 13 +++++-
arch/x86/xen/enlighten.c | 72 +++++++++++++++++++++++++++++++++
include/xen/interface/hvm/hvm_op.h | 8 ++++
5 files changed, 111 insertions(+), 1 deletions(-)
diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h
index 8787f03..6fa82c0 100644
--- a/arch/x86/include/asm/xen/cpuid.h
+++ b/arch/x86/include/asm/xen/cpuid.h
@@ -65,4 +65,11 @@
#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0)
+#define _XEN_CPUID_FEAT2_HYBRID 0
+#define XEN_CPUID_FEAT2_HYBRID (1u<<0)
+#define _XEN_CPUID_FEAT2_HYBRID_TIMER 1
+#define XEN_CPUID_FEAT2_HYBRID_TIMER (1u<<1)
+#define _XEN_CPUID_FEAT2_HYBRID_EVTCHN 2
+#define XEN_CPUID_FEAT2_HYBRID_EVTCHN (1u<<2)
+
#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index d5b7e90..7eee836 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -45,6 +45,7 @@ enum xen_domain_type {
#ifdef CONFIG_XEN
extern enum xen_domain_type xen_domain_type;
+extern void xen_start_hybrid(void);
#else
#define xen_domain_type XEN_NATIVE
#endif
@@ -55,6 +56,17 @@ extern enum xen_domain_type xen_domain_type;
#define xen_hvm_domain() (xen_domain() && \
xen_domain_type == XEN_HVM_DOMAIN)
+#define XEN_HYBRID_ENABLED (1u << 0)
+#define XEN_HYBRID_TIMER_ENABLED (1u << 1)
+#define XEN_HYBRID_EVTCHN_ENABLED (1u << 2)
+extern u32 xen_hybrid_status;
+
+#define xen_hybrid_enabled() (xen_hybrid_status & XEN_HYBRID_ENABLED)
+#define xen_hybrid_timer_enabled() \
+ (xen_hybrid_status & XEN_HYBRID_TIMER_ENABLED)
+#define xen_hybrid_evtchn_enabled() \
+ (xen_hybrid_status & XEN_HYBRID_EVTCHN_ENABLED)
+
#ifdef CONFIG_XEN_DOM0
#include <xen/interface/xen.h>
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 9faf43b..323c98a 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -38,6 +38,10 @@
#include <asm/tlbflush.h>
#include <asm/timer.h>
+#ifdef CONFIG_XEN
+#include <asm/xen/hypervisor.h>
+#endif
+
/* nop stub */
void _paravirt_nop(void)
{
@@ -313,6 +317,13 @@ void arch_flush_lazy_cpu_mode(void)
preempt_enable();
}
+void hybrid_arch_setup(void)
+{
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN)
+ xen_start_hybrid();
+#endif
+}
+
struct pv_info pv_info = {
.name = "bare hardware",
.paravirt_enabled = 0,
@@ -323,7 +334,7 @@ struct pv_info pv_info = {
struct pv_init_ops pv_init_ops = {
.patch = native_patch,
.banner = default_banner,
- .arch_setup = paravirt_nop,
+ .arch_setup = hybrid_arch_setup,
.memory_setup = machine_specific_memory_setup,
};
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f09e8c3..b93604e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -31,6 +31,7 @@
#include <xen/interface/version.h>
#include <xen/interface/physdev.h>
#include <xen/interface/vcpu.h>
+#include <xen/interface/hvm/hvm_op.h>
#include <xen/features.h>
#include <xen/page.h>
#include <xen/hvc-console.h>
@@ -40,6 +41,7 @@
#include <asm/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
+#include <asm/xen/cpuid.h>
#include <asm/fixmap.h>
#include <asm/processor.h>
#include <asm/proto.h>
@@ -1037,3 +1039,73 @@ asmlinkage void __init xen_start_kernel(void)
x86_64_start_reservations((char *)__pa_symbol(&boot_params));
#endif
}
+
+static int xen_para_available(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+ cpuid(XEN_CPUID_LEAF(0), &eax, &ebx, &ecx, &edx);
+
+ if (ebx == XEN_CPUID_SIGNATURE_EBX &&
+ ecx == XEN_CPUID_SIGNATURE_ECX &&
+ edx == XEN_CPUID_SIGNATURE_EDX &&
+ ((eax - XEN_CPUID_LEAF(0)) >= 2))
+ return 1;
+
+ return 0;
+}
+
+u32 xen_hybrid_status;
+EXPORT_SYMBOL_GPL(xen_hybrid_status);
+
+static int enable_hybrid(u64 flags)
+{
+ struct xen_hvm_hybrid_type a;
+
+ a.domid = DOMID_SELF;
+ a.flags = flags;
+ return HYPERVISOR_hvm_op(HVMOP_enable_hybrid, &a);
+}
+
+static int init_hybrid_info(void)
+{
+ uint32_t ecx, edx, pages, msr;
+ u64 pfn, flags = 0;
+
+ if (!xen_para_available())
+ return -EINVAL;
+
+ cpuid(XEN_CPUID_LEAF(2), &pages, &msr, &ecx, &edx);
+
+ /* Check if hybrid mode is supported */
+ if (!(edx & XEN_CPUID_FEAT2_HYBRID))
+ return -ENODEV;
+
+ xen_hybrid_status = XEN_HYBRID_ENABLED;
+
+ /* We only support 1 page of hypercall for now */
+ if (pages != 1)
+ return -ENOMEM;
+
+ pfn = __pa(hypercall_page);
+ wrmsrl(msr, pfn);
+
+ xen_setup_features();
+
+ if (enable_hybrid(flags))
+ return -EINVAL;
+
+ return 0;
+}
+
+void __init xen_start_hybrid(void)
+{
+ int r;
+
+ if (!xen_para_available())
+ return;
+
+ r = init_hybrid_info();
+ if (r < 0)
+ return;
+}
+
diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h
index 7c74ba4..936a391 100644
--- a/include/xen/interface/hvm/hvm_op.h
+++ b/include/xen/interface/hvm/hvm_op.h
@@ -69,4 +69,12 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_pci_link_route);
/* Flushes all VCPU TLBs: @arg must be NULL. */
#define HVMOP_flush_tlbs 5
+#define HVMOP_enable_hybrid 9
+struct xen_hvm_hybrid_type {
+ domid_t domid;
+ uint64_t flags;
+#define HVM_HYBRID_TIMER (1ull<<1)
+#define HVM_HYBRID_EVTCHN (1ull<<2)
+};
+
#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
--
1.5.4.5
Including Hybrid specific banner
Signed-off-by: Sheng Yang <[email protected]>
Signed-off-by: Yaozu (Eddie) Dong <[email protected]>
---
arch/x86/xen/enlighten.c | 15 +++++++++++++++
1 files changed, 15 insertions(+), 0 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b93604e..b290d65 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1040,6 +1040,17 @@ asmlinkage void __init xen_start_kernel(void)
#endif
}
+static void __init xen_hybrid_banner(void)
+{
+ unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
+ struct xen_extraversion extra;
+ HYPERVISOR_xen_version(XENVER_extraversion, &extra);
+
+ printk(KERN_INFO "Booting hybrid kernel on %s\n", pv_info.name);
+ printk(KERN_INFO "Xen version: %d.%d%s\n",
+ version >> 16, version & 0xffff, extra.extraversion);
+}
+
static int xen_para_available(void)
{
uint32_t eax, ebx, ecx, edx;
@@ -1094,6 +1105,10 @@ static int init_hybrid_info(void)
if (enable_hybrid(flags))
return -EINVAL;
+ pv_init_ops.banner = xen_hybrid_banner;
+ pv_info = xen_info;
+ pv_info.kernel_rpl = 0;
+
return 0;
}
--
1.5.4.5
Signed-off-by: Sheng Yang <[email protected]>
---
arch/x86/xen/enlighten.c | 2 ++
arch/x86/xen/irq.c | 10 ++++++++++
arch/x86/xen/xen-ops.h | 1 +
3 files changed, 13 insertions(+), 0 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b290d65..b95c696 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1122,5 +1122,7 @@ void __init xen_start_hybrid(void)
r = init_hybrid_info();
if (r < 0)
return;
+
+ xen_hybrid_init_irq_ops();
}
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index cfd1779..52885c1 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -98,6 +98,10 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable);
static void xen_safe_halt(void)
{
+ /* Do local_irq_enable() explicitly in hybrid guest */
+ if (xen_hybrid_enabled())
+ local_irq_enable();
+
/* Blocking includes an implicit local_irq_enable(). */
if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
BUG();
@@ -130,3 +134,9 @@ void __init xen_init_irq_ops()
{
pv_irq_ops = xen_irq_ops;
}
+
+void __init xen_hybrid_init_irq_ops(void)
+{
+ pv_irq_ops.safe_halt = xen_safe_halt;
+ pv_irq_ops.halt = xen_halt;
+}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index ca6596b..9bb90d5 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -43,6 +43,7 @@ void xen_vcpu_restore(void);
void __init xen_build_dynamic_phys_to_machine(void);
void xen_init_irq_ops(void);
+void xen_hybrid_init_irq_ops(void);
void xen_setup_timer(int cpu);
void xen_teardown_timer(int cpu);
cycle_t xen_clocksource_read(void);
--
1.5.4.5
Reserved shared_info page in xen-head.S, as hypercall page
Notice that we would modify the shared_info page, so put it in the data
section.
Signed-off-by: Sheng Yang <[email protected]>
---
arch/x86/xen/enlighten.c | 25 +++++++++++++++++++++++++
arch/x86/xen/xen-head.S | 6 ++++++
2 files changed, 31 insertions(+), 0 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b95c696..b6751ed 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -31,6 +31,7 @@
#include <xen/interface/version.h>
#include <xen/interface/physdev.h>
#include <xen/interface/vcpu.h>
+#include <xen/interface/memory.h>
#include <xen/interface/hvm/hvm_op.h>
#include <xen/features.h>
#include <xen/page.h>
@@ -1112,6 +1113,28 @@ static int init_hybrid_info(void)
return 0;
}
+extern struct shared_info shared_info_page;
+
+static int __init init_shared_info(void)
+{
+ struct xen_add_to_physmap xatp;
+
+ xatp.domid = DOMID_SELF;
+ xatp.idx = 0;
+ xatp.space = XENMAPSPACE_shared_info;
+ xatp.gpfn = __pa(&shared_info_page) >> PAGE_SHIFT;
+ if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
+ BUG();
+
+ HYPERVISOR_shared_info = (struct shared_info *)&shared_info_page;
+
+ /* Don't do the full vcpu_info placement stuff until we have a
+ possible map and a non-dummy shared_info. */
+ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
+
+ return 0;
+}
+
void __init xen_start_hybrid(void)
{
int r;
@@ -1124,5 +1147,7 @@ void __init xen_start_hybrid(void)
return;
xen_hybrid_init_irq_ops();
+
+ init_shared_info();
}
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 1a5ff24..26041ce 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -33,6 +33,12 @@ ENTRY(hypercall_page)
.skip PAGE_SIZE_asm
.popsection
+.pushsection .data
+ .align PAGE_SIZE_asm
+ENTRY(shared_info_page)
+ .skip PAGE_SIZE_asm
+.popsection
+
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
--
1.5.4.5
If we have event channel, we can use VIRQ_TIMER to deliver timer interrupt,
otherwise we would reuse IRQ0.
Signed-off-by: Sheng Yang <[email protected]>
---
arch/x86/xen/enlighten.c | 14 ++++++++++++++
arch/x86/xen/time.c | 18 +++++++++++++++---
2 files changed, 29 insertions(+), 3 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index b6751ed..18aba22 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -53,6 +53,8 @@
#include <asm/tlbflush.h>
#include <asm/reboot.h>
+#include <xen/hvm.h>
+
#include "xen-ops.h"
#include "mmu.h"
#include "multicalls.h"
@@ -1050,6 +1052,9 @@ static void __init xen_hybrid_banner(void)
printk(KERN_INFO "Booting hybrid kernel on %s\n", pv_info.name);
printk(KERN_INFO "Xen version: %d.%d%s\n",
version >> 16, version & 0xffff, extra.extraversion);
+
+ if (xen_hybrid_timer_enabled())
+ printk(KERN_INFO "Hybrid feature: PV Timer enabled\n");
}
static int xen_para_available(void)
@@ -1093,6 +1098,10 @@ static int init_hybrid_info(void)
return -ENODEV;
xen_hybrid_status = XEN_HYBRID_ENABLED;
+ if (edx & XEN_CPUID_FEAT2_HYBRID_TIMER) {
+ xen_hybrid_status |= XEN_HYBRID_TIMER_ENABLED;
+ flags |= HVM_HYBRID_TIMER;
+ }
/* We only support 1 page of hypercall for now */
if (pages != 1)
@@ -1149,5 +1158,10 @@ void __init xen_start_hybrid(void)
xen_hybrid_init_irq_ops();
init_shared_info();
+
+ if (xen_hybrid_timer_enabled()) {
+ pv_time_ops = xen_time_ops;
+ pv_apic_ops = xen_apic_ops;
+ }
}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 0a5aa44..381f135 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -421,6 +421,13 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
return ret;
}
+static struct irqaction xen_timer_irq0 = {
+ .handler = xen_timer_interrupt,
+ .flags = IRQF_DISABLED | IRQF_IRQPOLL |
+ IRQF_NOBALANCING | IRQF_TIMER,
+ .name = "timer"
+};
+
void xen_setup_timer(int cpu)
{
const char *name;
@@ -433,9 +440,14 @@ void xen_setup_timer(int cpu)
if (!name)
name = "<timer kasprintf failed>";
- irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
- IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
- name, NULL);
+ if (xen_hybrid_timer_enabled() && !xen_hybrid_evtchn_enabled()) {
+ irq = 0;
+ setup_irq(0, &xen_timer_irq0);
+ } else
+ irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu,
+ xen_timer_interrupt,
+ IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+ name, NULL);
evt = &per_cpu(xen_clock_events, cpu);
memcpy(evt, xen_clockevent, sizeof(*evt));
--
1.5.4.5
Xen hybrid guest don't use lapic, but smp_generic_interrupt() assume all it's
caller using lapic.
Signed-off-by: Sheng Yang <[email protected]>
---
arch/x86/kernel/irq.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index c3fe010..a887d25 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -236,7 +236,8 @@ void smp_generic_interrupt(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- ack_APIC_irq();
+ if (!disable_apic)
+ ack_APIC_irq();
exit_idle();
--
1.5.4.5
We mapped each IOAPIC pin to a VIRQ, so that we can deliver interrupt through
these VIRQs.
We also use GENERIC_INTERRUPT_VECTOR as the noficiation vector for hypervisor
to notify guest about the event.
Then we don't need IOAPIC/LAPIC now...
Signed-off-by: Sheng Yang <[email protected]>
---
arch/x86/kernel/smpboot.c | 14 ++++++++++++
arch/x86/xen/enlighten.c | 49 +++++++++++++++++++++++++++++++++++++++++++
arch/x86/xen/irq.c | 15 +++++++++++-
drivers/xen/events.c | 47 +++++++++++++++++++++++++++++++++++++++++
include/xen/events.h | 1 +
include/xen/hvm.h | 5 ++++
include/xen/interface/xen.h | 6 ++++-
7 files changed, 134 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 58d24ef..39c1890 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -67,6 +67,10 @@
#include <asm/smpboot_hooks.h>
+#ifdef CONFIG_XEN
+#include <asm/xen/hypervisor.h>
+#endif
+
#ifdef CONFIG_X86_32
u8 apicid_2_node[MAX_APICID];
static int low_mappings;
@@ -1062,6 +1066,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
}
set_cpu_sibling_map(0);
+#ifdef CONFIG_XEN
+ if (xen_hybrid_evtchn_enabled())
+ goto out;
+#endif
+
enable_IR_x2apic();
#ifdef CONFIG_X86_64
default_setup_apic_routing();
@@ -1131,6 +1140,11 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
{
pr_debug("Boot done.\n");
+#ifdef CONFIG_XEN
+ if (xen_hybrid_evtchn_enabled())
+ return;
+#endif
+
impress_friends();
#ifdef CONFIG_X86_IO_APIC
setup_ioapic_dest();
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 18aba22..f515584 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -54,6 +54,10 @@
#include <asm/reboot.h>
#include <xen/hvm.h>
+#include <xen/events.h>
+#include <asm/acpi.h>
+#include <asm/irq_vectors.h>
+#include <asm/irq.h>
#include "xen-ops.h"
#include "mmu.h"
@@ -1055,6 +1059,8 @@ static void __init xen_hybrid_banner(void)
if (xen_hybrid_timer_enabled())
printk(KERN_INFO "Hybrid feature: PV Timer enabled\n");
+ if (xen_hybrid_evtchn_enabled())
+ printk(KERN_INFO "Hybrid feature: Event channel enabled\n");
}
static int xen_para_available(void)
@@ -1102,6 +1108,10 @@ static int init_hybrid_info(void)
xen_hybrid_status |= XEN_HYBRID_TIMER_ENABLED;
flags |= HVM_HYBRID_TIMER;
}
+ if (edx & XEN_CPUID_FEAT2_HYBRID_EVTCHN) {
+ xen_hybrid_status |= XEN_HYBRID_EVTCHN_ENABLED;
+ flags |= HVM_HYBRID_EVTCHN;
+ }
/* We only support 1 page of hypercall for now */
if (pages != 1)
@@ -1144,9 +1154,27 @@ static int __init init_shared_info(void)
return 0;
}
+static int set_callback_via(uint64_t via)
+{
+ struct xen_hvm_param a;
+
+ a.domid = DOMID_SELF;
+ a.index = HVM_PARAM_CALLBACK_IRQ;
+ a.value = via;
+ return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
+}
+
+void do_hybrid_intr(void)
+{
+ per_cpu(irq_count, smp_processor_id())++;
+ xen_evtchn_do_upcall(get_irq_regs());
+ per_cpu(irq_count, smp_processor_id())--;
+}
+
void __init xen_start_hybrid(void)
{
int r;
+ uint64_t callback_via;
if (!xen_para_available())
return;
@@ -1163,5 +1191,26 @@ void __init xen_start_hybrid(void)
pv_time_ops = xen_time_ops;
pv_apic_ops = xen_apic_ops;
}
+
+ if (xen_hybrid_evtchn_enabled()) {
+ pv_apic_ops = xen_apic_ops;
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * set up the basic apic ops.
+ */
+ set_xen_basic_apic_ops();
+#endif
+
+ callback_via = HVM_CALLBACK_VECTOR(GENERIC_INTERRUPT_VECTOR);
+ set_callback_via(callback_via);
+
+ generic_interrupt_extension = do_hybrid_intr;
+
+ disable_acpi();
+ disable_apic = 1;
+
+ machine_ops = xen_machine_ops;
+ smp_ops.smp_send_stop = paravirt_nop;
+ }
}
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 52885c1..edca1c4 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -66,6 +66,9 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
static void xen_irq_disable(void)
{
+ if (xen_hybrid_evtchn_enabled())
+ asm volatile("cli" : : : "memory");
+
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
@@ -79,6 +82,9 @@ static void xen_irq_enable(void)
{
struct vcpu_info *vcpu;
+ if (xen_hybrid_evtchn_enabled())
+ asm volatile("sti" : : : "memory");
+
/* We don't need to worry about being preempted here, since
either a) interrupts are disabled, so no preemption, or b)
the caller is confused and is trying to re-enable interrupts
@@ -137,6 +143,11 @@ void __init xen_init_irq_ops()
void __init xen_hybrid_init_irq_ops(void)
{
- pv_irq_ops.safe_halt = xen_safe_halt;
- pv_irq_ops.halt = xen_halt;
+ if (!xen_hybrid_evtchn_enabled()) {
+ pv_irq_ops.safe_halt = xen_safe_halt;
+ pv_irq_ops.halt = xen_halt;
+ } else {
+ pv_irq_ops = xen_irq_ops;
+ pv_irq_ops.adjust_exception_frame = paravirt_nop;
+ }
}
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 30963af..4973b70 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -40,6 +40,8 @@
#include <xen/interface/xen.h>
#include <xen/interface/event_channel.h>
+#include <asm/desc.h>
+
/*
* This lock protects updates to the following mapping and reference-count
* arrays. The lock does not need to be acquired to read the mapping tables.
@@ -931,4 +933,49 @@ void __init xen_init_IRQ(void)
mask_evtchn(i);
irq_ctx_init(smp_processor_id());
+
+ if (!xen_hybrid_evtchn_enabled())
+ return;
+
+ for (i = 0; i < NR_IRQS_LEGACY; i++) {
+ struct evtchn_bind_virq bind_virq;
+ struct irq_desc *desc = irq_to_desc(i);
+ int virq, evtchn;
+
+ virq = i + VIRQ_EMUL_PIN_START;
+ bind_virq.virq = virq;
+ bind_virq.vcpu = 0;
+
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+ &bind_virq) != 0)
+ BUG();
+
+ evtchn = bind_virq.port;
+ evtchn_to_irq[evtchn] = i;
+ irq_info[i] = mk_virq_info(evtchn, virq);
+
+ desc->status = IRQ_DISABLED;
+ desc->action = NULL;
+ desc->depth = 1;
+
+ /*
+ * 16 old-style INTA-cycle interrupts:
+ */
+ set_irq_chip_and_handler_name(i, &xen_dynamic_chip,
+ handle_level_irq, "event");
+ }
+
+ /*
+ * Cover the whole vector space, no vector can escape
+ * us. (some of these will be overridden and become
+ * 'special' SMP interrupts)
+ */
+ for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
+ int vector = FIRST_EXTERNAL_VECTOR + i;
+ if (vector != IA32_SYSCALL_VECTOR)
+ set_intr_gate(vector, interrupt[i]);
+ }
+
+ /* generic IPI for platform specific use, now used for hybrid */
+ alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
}
diff --git a/include/xen/events.h b/include/xen/events.h
index 0d5f1ad..a21c68f 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -53,4 +53,5 @@ bool xen_test_irq_pending(int irq);
irq will be disabled so it won't deliver an interrupt. */
void xen_poll_irq(int irq);
+void xen_evtchn_do_upcall(struct pt_regs *regs);
#endif /* _XEN_EVENTS_H */
diff --git a/include/xen/hvm.h b/include/xen/hvm.h
index 4ea8887..c66d788 100644
--- a/include/xen/hvm.h
+++ b/include/xen/hvm.h
@@ -20,4 +20,9 @@ static inline unsigned long hvm_get_parameter(int idx)
return xhv.value;
}
+#define HVM_CALLBACK_VIA_TYPE_VECTOR 0x2
+#define HVM_CALLBACK_VIA_TYPE_SHIFT 56
+#define HVM_CALLBACK_VECTOR(x) (((uint64_t)HVM_CALLBACK_VIA_TYPE_VECTOR)<<\
+ HVM_CALLBACK_VIA_TYPE_SHIFT | (x))
+
#endif /* XEN_HVM_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 2befa3e..9282ff7 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -90,7 +90,11 @@
#define VIRQ_ARCH_6 22
#define VIRQ_ARCH_7 23
-#define NR_VIRQS 24
+#define VIRQ_EMUL_PIN_START 24
+#define VIRQ_EMUL_PIN_NUM 16
+
+#define NR_VIRQS 40
+
/*
* MMU-UPDATE REQUESTS
*
--
1.5.4.5
Now vnif and vbd drivers can work.
Notice one memory region(0xfbfe0000ul - 0xfc000000ul) would be reserved in the
bios E820 table. This memory region would be used as grant table.
Signed-off-by: Sheng Yang <[email protected]>
---
arch/x86/include/asm/xen/hypervisor.h | 3 +
arch/x86/xen/enlighten.c | 1 +
drivers/block/xen-blkfront.c | 3 +
drivers/input/xen-kbdfront.c | 4 ++
drivers/net/xen-netfront.c | 3 +
drivers/video/xen-fbfront.c | 4 ++
drivers/xen/grant-table.c | 67 ++++++++++++++++++++++++++++++++-
drivers/xen/xenbus/xenbus_probe.c | 23 ++++++++++-
8 files changed, 103 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 7eee836..267fa43 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -41,6 +41,7 @@ enum xen_domain_type {
XEN_NATIVE, /* running on bare hardware */
XEN_PV_DOMAIN, /* running in a PV domain */
XEN_HVM_DOMAIN, /* running in a Xen hvm domain */
+ XEN_HYBRID_DOMAIN,
};
#ifdef CONFIG_XEN
@@ -55,6 +56,8 @@ extern void xen_start_hybrid(void);
xen_domain_type == XEN_PV_DOMAIN)
#define xen_hvm_domain() (xen_domain() && \
xen_domain_type == XEN_HVM_DOMAIN)
+#define xen_hybrid_domain() (xen_domain() && \
+ xen_domain_type == XEN_HYBRID_DOMAIN)
#define XEN_HYBRID_ENABLED (1u << 0)
#define XEN_HYBRID_TIMER_ENABLED (1u << 1)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index f515584..6c4f0c1 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1129,6 +1129,7 @@ static int init_hybrid_info(void)
pv_info = xen_info;
pv_info.kernel_rpl = 0;
+ xen_domain_type = XEN_HYBRID_DOMAIN;
return 0;
}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a6cbf7b..14188ed 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1072,6 +1072,9 @@ static int __init xlblk_init(void)
if (!xen_domain())
return -ENODEV;
+ if (xen_hybrid_domain() && !xen_hybrid_evtchn_enabled())
+ return -ENODEV;
+
if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
XENVBD_MAJOR, DEV_NAME);
diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c
index 928d2ed..a82b6aa 100644
--- a/drivers/input/xen-kbdfront.c
+++ b/drivers/input/xen-kbdfront.c
@@ -338,6 +338,10 @@ static int __init xenkbd_init(void)
if (!xen_domain())
return -ENODEV;
+ /* Xen Hybrid domain don't need vkbd */
+ if (xen_hybrid_domain())
+ return -ENODEV;
+
/* Nothing to do if running in dom0. */
if (xen_initial_domain())
return -ENODEV;
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index f673253..3ad2c45 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1806,6 +1806,9 @@ static int __init netif_init(void)
if (!xen_domain())
return -ENODEV;
+ if (xen_hybrid_domain() && !xen_hybrid_evtchn_enabled())
+ return -ENODEV;
+
if (xen_initial_domain())
return 0;
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c
index 2493f05..b423248 100644
--- a/drivers/video/xen-fbfront.c
+++ b/drivers/video/xen-fbfront.c
@@ -683,6 +683,10 @@ static int __init xenfb_init(void)
if (!xen_domain())
return -ENODEV;
+ /* Don't enable vfb in Xen hybrid domain */
+ if (xen_hybrid_domain())
+ return -ENODEV;
+
/* Nothing to do if running in dom0. */
if (xen_initial_domain())
return -ENODEV;
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 7d8f531..3e4ac4a 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -45,6 +45,9 @@
#include <asm/pgtable.h>
#include <asm/sync_bitops.h>
+#include <xen/interface/memory.h>
+#include <linux/io.h>
+#include <asm/e820.h>
/* External tools reserve first few grant table entries. */
#define NR_RESERVED_ENTRIES 8
@@ -440,12 +443,33 @@ static inline unsigned int max_nr_grant_frames(void)
return xen_max;
}
+static unsigned long hybrid_resume_frames;
+
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
struct gnttab_setup_table setup;
unsigned long *frames;
unsigned int nr_gframes = end_idx + 1;
int rc;
+ struct xen_add_to_physmap xatp;
+ unsigned int i = end_idx;
+
+ if (xen_hybrid_domain() && xen_hybrid_evtchn_enabled()) {
+ /*
+ * Loop backwards, so that the first hypercall has the largest
+ * index, ensuring that the table will grow only once.
+ */
+ do {
+ xatp.domid = DOMID_SELF;
+ xatp.idx = i;
+ xatp.space = XENMAPSPACE_grant_table;
+ xatp.gpfn = (hybrid_resume_frames >> PAGE_SHIFT) + i;
+ if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
+ BUG();
+ } while (i-- > start_idx);
+
+ return 0;
+ }
frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
if (!frames)
@@ -472,11 +496,47 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
return 0;
}
+#define GNTTAB_START 0xfbfe0000ul
+#define GNTTAB_SIZE 0x20000ul
+
int gnttab_resume(void)
{
- if (max_nr_grant_frames() < nr_grant_frames)
+ unsigned int max_nr_gframes;
+
+ max_nr_gframes = max_nr_grant_frames();
+ if (max_nr_gframes < nr_grant_frames)
return -ENOSYS;
- return gnttab_map(0, nr_grant_frames - 1);
+
+ if (!(xen_hybrid_domain() && xen_hybrid_evtchn_enabled()))
+ return gnttab_map(0, nr_grant_frames - 1);
+
+ if (!hybrid_resume_frames) {
+ /* Check if e820 reserved the related region */
+ if (!e820_all_mapped(GNTTAB_START,
+ GNTTAB_START + GNTTAB_SIZE, 2)) {
+ printk(KERN_WARNING
+ "Fail to found grant table region in e820!\n");
+ return -ENODEV;
+ }
+ if (PAGE_SIZE * max_nr_gframes > GNTTAB_SIZE) {
+ printk(KERN_WARNING
+ "Grant table size exceed the limit!\n");
+ return -EINVAL;
+ }
+
+ hybrid_resume_frames = GNTTAB_START;
+ shared = ioremap(hybrid_resume_frames,
+ PAGE_SIZE * max_nr_gframes);
+ if (shared == NULL) {
+ printk(KERN_WARNING
+ "Fail to ioremap gnttab share frames\n");
+ return -ENOMEM;
+ }
+ }
+
+ gnttab_map(0, nr_grant_frames - 1);
+
+ return 0;
}
int gnttab_suspend(void)
@@ -512,6 +572,9 @@ static int __devinit gnttab_init(void)
if (!xen_domain())
return -ENODEV;
+ if (xen_hybrid_domain() && !xen_hybrid_evtchn_enabled())
+ return -ENODEV;
+
nr_grant_frames = 1;
boot_max_nr_grant_frames = __max_nr_grant_frames();
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 773d1cf..4b87605 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -53,6 +53,8 @@
#include <xen/events.h>
#include <xen/page.h>
+#include <xen/hvm.h>
+
#include "xenbus_comms.h"
#include "xenbus_probe.h"
@@ -828,6 +830,9 @@ static int __init xenbus_probe_init(void)
if (!xen_domain())
goto out_error;
+ if (xen_hybrid_domain() && !xen_hybrid_evtchn_enabled())
+ goto out_error;
+
/* Register ourselves with the kernel bus subsystem */
err = bus_register(&xenbus_frontend.bus);
if (err)
@@ -844,10 +849,19 @@ static int __init xenbus_probe_init(void)
/* dom0 not yet supported */
} else {
xenstored_ready = 1;
- xen_store_evtchn = xen_start_info->store_evtchn;
- xen_store_mfn = xen_start_info->store_mfn;
+ if (xen_hybrid_domain()) {
+ xen_store_evtchn =
+ hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
+ xen_store_mfn =
+ hvm_get_parameter(HVM_PARAM_STORE_PFN);
+ xen_store_interface =
+ ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
+ } else {
+ xen_store_evtchn = xen_start_info->store_evtchn;
+ xen_store_mfn = xen_start_info->store_mfn;
+ xen_store_interface = mfn_to_virt(xen_store_mfn);
+ }
}
- xen_store_interface = mfn_to_virt(xen_store_mfn);
/* Initialize the interface to xenstore. */
err = xs_init();
@@ -959,6 +973,9 @@ static void wait_for_devices(struct xenbus_driver *xendrv)
if (!ready_to_wait_for_devices || !xen_domain())
return;
+ if (xen_hybrid_domain() && !xen_hybrid_evtchn_enabled())
+ return;
+
while (exists_disconnected_device(drv)) {
if (time_after(jiffies, timeout))
break;
--
1.5.4.5
[Sheng Yang - Wed, Sep 16, 2009 at 04:42:29PM +0800]
| Xen hybrid guest don't use lapic, but smp_generic_interrupt() assume all it's
| caller using lapic.
|
| Signed-off-by: Sheng Yang <[email protected]>
| ---
| arch/x86/kernel/irq.c | 3 ++-
| 1 files changed, 2 insertions(+), 1 deletions(-)
|
| diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
| index c3fe010..a887d25 100644
| --- a/arch/x86/kernel/irq.c
| +++ b/arch/x86/kernel/irq.c
| @@ -236,7 +236,8 @@ void smp_generic_interrupt(struct pt_regs *regs)
| {
| struct pt_regs *old_regs = set_irq_regs(regs);
|
| - ack_APIC_irq();
| + if (!disable_apic)
| + ack_APIC_irq();
|
| exit_idle();
Hi Sheng,
is there was some problem with it? I'm asking you
because if disable_apic=1 then any apic write/read
operations become NOPs. So I don't see how it may
hurt. But I could be missing something.
-- Cyrill
[Cyrill Gorcunov - Wed, Sep 16, 2009 at 12:58:35PM +0400]
...
|
| Hi Sheng,
|
| is there was some problem with it? I'm asking you
| because if disable_apic=1 then any apic write/read
| operations become NOPs. So I don't see how it may
| hurt. But I could be missing something.
|
| -- Cyrill
Ah, I see -- it's due to your other patch...
Hmm this makes all "disable apic" idea less
general. And safety of ack_APIC_irq is now
under suspicious.
-- Cyrill
[Cyrill Gorcunov - Wed, Sep 16, 2009 at 01:03:06PM +0400]
| [Cyrill Gorcunov - Wed, Sep 16, 2009 at 12:58:35PM +0400]
| ...
| |
| | Hi Sheng,
| |
| | is there was some problem with it? I'm asking you
| | because if disable_apic=1 then any apic write/read
| | operations become NOPs. So I don't see how it may
| | hurt. But I could be missing something.
| |
| | -- Cyrill
|
| Ah, I see -- it's due to your other patch...
| Hmm this makes all "disable apic" idea less
| general. And safety of ack_APIC_irq is now
| under suspicious.
|
| -- Cyrill
And how msi_compose_msg would work then?
Don't get me wrong please, I'm just trying to understand.
Perhaps Xen specifics will handle it (I didn't read Xen
internals) by substituting all this with own handler.
Since comments are requested I thought I may ask? :)
-- Cyrill
On Wed, Sep 16, 2009 at 04:42:21PM +0800, Sheng Yang wrote:
> Hi, Keir & Jeremy
>
> This patchset enabled Xen Hybrid extension support.
>
> As we know that PV guest have performance issue with x86_64 that guest kernel
> and userspace resistent in the same ring, then the necessary TLB flushes when
> switch between guest userspace and guest kernel cause overhead, and much more
> syscall overhead is also introduced. The Hybrid Extension estimated these
> overhead by putting guest kernel back in (non-root) ring0 then achieve the better
> performance than PV guest.
What was the overhead? Is there a step-by-step list of operations you did
to figure out the performance numbers?
I am asking this b/c at some point I would like to compare the pv-ops vs native
and I am not entirely sure what is the best way to do this.
On 09/16/09 01:42, Sheng Yang wrote:
> As we know that PV guest have performance issue with x86_64 that guest kernel
> and userspace resistent in the same ring, then the necessary TLB flushes when
> switch between guest userspace and guest kernel cause overhead, and much more
> syscall overhead is also introduced. The Hybrid Extension estimated these
> overhead by putting guest kernel back in (non-root) ring0 then achieve the
> better performance than PV guest.
>
> The Hybrid Extension is started from real mode like HVM guest, but also with a
> component based PV feature selection(e.g. PV halt, PV timer, event channel,
> then PV drivers). So guest with Hybrid extension feature can takes the
> advantages of both H/W virtualization and Para-Virtualization.
>
> This patch introduced the Hybrid Extension guest initialization.
>
> Guest would detect Hybrid capability using CPUID 0x40000002.edx, then call
> HVMOP_enable_hybrid hypercall to enable hybrid support in hypervisor.
>
I think having an option to put PV guests into an HVM container is a
good one, but as I mentioned in the other mail, I don't think this is
the right approach.
It would be much better to make it so that an unmodified guest works in
such a mode; even with no specific optimisations the guest would get
benefit from faster kernel<->usermode switches.
Then we can add specific optimisations to take advantage of, say,
running in ring 0 (=fast syscalls) and having access to HAP hardware
(=direct pagetable updates, no pinning).
J
On 09/16/09 01:42, Sheng Yang wrote:
> If we have event channel, we can use VIRQ_TIMER to deliver timer interrupt,
> otherwise we would reuse IRQ0.
>
What's the purpose of this? Why can we get a timer virq via event
channel as usual? I'm very dubious about having any platform
interrupts/apic interaction in this code.
J
On 09/16/09 01:42, Sheng Yang wrote:
> We mapped each IOAPIC pin to a VIRQ, so that we can deliver interrupt through
> these VIRQs.
>
> We also use GENERIC_INTERRUPT_VECTOR as the noficiation vector for hypervisor
> to notify guest about the event.
>
> Then we don't need IOAPIC/LAPIC now...
>
I commented a bit more below, but this patch is pretty unpleasant. It
certainly can't be used in this form.
> Signed-off-by: Sheng Yang <[email protected]>
> ---
> arch/x86/kernel/smpboot.c | 14 ++++++++++++
> arch/x86/xen/enlighten.c | 49 +++++++++++++++++++++++++++++++++++++++++++
> arch/x86/xen/irq.c | 15 +++++++++++-
> drivers/xen/events.c | 47 +++++++++++++++++++++++++++++++++++++++++
> include/xen/events.h | 1 +
> include/xen/hvm.h | 5 ++++
> include/xen/interface/xen.h | 6 ++++-
> 7 files changed, 134 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
> index 58d24ef..39c1890 100644
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -67,6 +67,10 @@
>
> #include <asm/smpboot_hooks.h>
>
> +#ifdef CONFIG_XEN
> +#include <asm/xen/hypervisor.h>
> +#endif
> +
> #ifdef CONFIG_X86_32
> u8 apicid_2_node[MAX_APICID];
> static int low_mappings;
> @@ -1062,6 +1066,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
> }
> set_cpu_sibling_map(0);
>
> +#ifdef CONFIG_XEN
> + if (xen_hybrid_evtchn_enabled())
> + goto out;
> +#endif
> +
> enable_IR_x2apic();
> #ifdef CONFIG_X86_64
> default_setup_apic_routing();
> @@ -1131,6 +1140,11 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
> {
> pr_debug("Boot done.\n");
>
> +#ifdef CONFIG_XEN
> + if (xen_hybrid_evtchn_enabled())
> + return;
> +#endif
>
These changes will never fly. I'm aggressively moving away from making
any Xen-specific changes in core files for dom0; I don't want to add any
more for a hybrid mode. (I'd really prefer not to have a hybrid mode at
all.)
> +
> impress_friends();
> #ifdef CONFIG_X86_IO_APIC
> setup_ioapic_dest();
> diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> index 18aba22..f515584 100644
> --- a/arch/x86/xen/enlighten.c
> +++ b/arch/x86/xen/enlighten.c
> @@ -54,6 +54,10 @@
> #include <asm/reboot.h>
>
> #include <xen/hvm.h>
> +#include <xen/events.h>
> +#include <asm/acpi.h>
> +#include <asm/irq_vectors.h>
> +#include <asm/irq.h>
>
> #include "xen-ops.h"
> #include "mmu.h"
> @@ -1055,6 +1059,8 @@ static void __init xen_hybrid_banner(void)
>
> if (xen_hybrid_timer_enabled())
> printk(KERN_INFO "Hybrid feature: PV Timer enabled\n");
> + if (xen_hybrid_evtchn_enabled())
> + printk(KERN_INFO "Hybrid feature: Event channel enabled\n");
> }
>
> static int xen_para_available(void)
> @@ -1102,6 +1108,10 @@ static int init_hybrid_info(void)
> xen_hybrid_status |= XEN_HYBRID_TIMER_ENABLED;
> flags |= HVM_HYBRID_TIMER;
> }
> + if (edx & XEN_CPUID_FEAT2_HYBRID_EVTCHN) {
> + xen_hybrid_status |= XEN_HYBRID_EVTCHN_ENABLED;
> + flags |= HVM_HYBRID_EVTCHN;
> + }
>
> /* We only support 1 page of hypercall for now */
> if (pages != 1)
> @@ -1144,9 +1154,27 @@ static int __init init_shared_info(void)
> return 0;
> }
>
> +static int set_callback_via(uint64_t via)
> +{
> + struct xen_hvm_param a;
> +
> + a.domid = DOMID_SELF;
> + a.index = HVM_PARAM_CALLBACK_IRQ;
> + a.value = via;
> + return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
> +}
> +
> +void do_hybrid_intr(void)
> +{
> + per_cpu(irq_count, smp_processor_id())++;
> + xen_evtchn_do_upcall(get_irq_regs());
> + per_cpu(irq_count, smp_processor_id())--;
> +}
> +
> void __init xen_start_hybrid(void)
> {
> int r;
> + uint64_t callback_via;
>
> if (!xen_para_available())
> return;
> @@ -1163,5 +1191,26 @@ void __init xen_start_hybrid(void)
> pv_time_ops = xen_time_ops;
> pv_apic_ops = xen_apic_ops;
> }
> +
> + if (xen_hybrid_evtchn_enabled()) {
> + pv_apic_ops = xen_apic_ops;
> +#ifdef CONFIG_X86_LOCAL_APIC
> + /*
> + * set up the basic apic ops.
> + */
> + set_xen_basic_apic_ops();
> +#endif
> +
> + callback_via = HVM_CALLBACK_VECTOR(GENERIC_INTERRUPT_VECTOR);
> + set_callback_via(callback_via);
> +
> + generic_interrupt_extension = do_hybrid_intr;
> +
> + disable_acpi();
> + disable_apic = 1;
> +
> + machine_ops = xen_machine_ops;
> + smp_ops.smp_send_stop = paravirt_nop;
> + }
> }
>
> diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
> index 52885c1..edca1c4 100644
> --- a/arch/x86/xen/irq.c
> +++ b/arch/x86/xen/irq.c
> @@ -66,6 +66,9 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
>
> static void xen_irq_disable(void)
> {
> + if (xen_hybrid_evtchn_enabled())
> + asm volatile("cli" : : : "memory");
>
!!! We have pvops for a reason. If you want to override irq_disable,
define a new pvop function.
J
On Wednesday 16 September 2009 17:37:31 Cyrill Gorcunov wrote:
> [Cyrill Gorcunov - Wed, Sep 16, 2009 at 01:03:06PM +0400]
>
> | [Cyrill Gorcunov - Wed, Sep 16, 2009 at 12:58:35PM +0400]
> | ...
> |
> | | Hi Sheng,
> | |
> | | is there was some problem with it? I'm asking you
> | | because if disable_apic=1 then any apic write/read
> | | operations become NOPs. So I don't see how it may
> | | hurt. But I could be missing something.
> | |
> | | -- Cyrill
> |
> | Ah, I see -- it's due to your other patch...
> | Hmm this makes all "disable apic" idea less
> | general. And safety of ack_APIC_irq is now
> | under suspicious.
Um, probably. I've seen a ack_APIC_irq() in do_IRQ when handle_irq() fail.
Seems the assumption that ack_APIC_irq() always safe is there. I will check if
I can make it more elegant - maybe disable the warning in the Xen code...
> |
> | -- Cyrill
>
> And how msi_compose_msg would work then?
As you guessed, Xen also use event channel to handle it for guest(for we
called "passthrough devices"), the real interrupt delivered to the Xen, then
delivered through event channel to the guest.
>
> Don't get me wrong please, I'm just trying to understand.
> Perhaps Xen specifics will handle it (I didn't read Xen
> internals) by substituting all this with own handler.
>
> Since comments are requested I thought I may ask? :)
Oh, never mind. Glad to see your comments. :)
--
regards
Yang, Sheng
On Thursday 17 September 2009 04:25:30 Jeremy Fitzhardinge wrote:
> On 09/16/09 01:42, Sheng Yang wrote:
> > If we have event channel, we can use VIRQ_TIMER to deliver timer
> > interrupt, otherwise we would reuse IRQ0.
>
> What's the purpose of this? Why can we get a timer virq via event
> channel as usual? I'm very dubious about having any platform
> interrupts/apic interaction in this code.
>
> J
Before this patch, we didn't enable event channel for hybrid guest. But I
think it's reasonable to enable it. Separating the event channel patch into
two, one for generic usage, and another one QEmu devices. I would try to do
this.
--
regards
Yang, Sheng
On Thursday 17 September 2009 04:35:06 Jeremy Fitzhardinge wrote:
> On 09/16/09 01:42, Sheng Yang wrote:
> > We mapped each IOAPIC pin to a VIRQ, so that we can deliver interrupt
> > through these VIRQs.
> >
> > We also use GENERIC_INTERRUPT_VECTOR as the noficiation vector for
> > hypervisor to notify guest about the event.
> >
> > Then we don't need IOAPIC/LAPIC now...
>
> I commented a bit more below, but this patch is pretty unpleasant. It
> certainly can't be used in this form.
Thanks for the comments. I would try to limit the modification into Xen
specific files as much as possible.
> > Signed-off-by: Sheng Yang <[email protected]>
> > ---
> > arch/x86/kernel/smpboot.c | 14 ++++++++++++
> > arch/x86/xen/enlighten.c | 49
> > +++++++++++++++++++++++++++++++++++++++++++ arch/x86/xen/irq.c |
> > 15 +++++++++++-
> > drivers/xen/events.c | 47
> > +++++++++++++++++++++++++++++++++++++++++ include/xen/events.h |
> > 1 +
> > include/xen/hvm.h | 5 ++++
> > include/xen/interface/xen.h | 6 ++++-
> > 7 files changed, 134 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
> > index 58d24ef..39c1890 100644
> > --- a/arch/x86/kernel/smpboot.c
> > +++ b/arch/x86/kernel/smpboot.c
> > @@ -67,6 +67,10 @@
> >
> > #include <asm/smpboot_hooks.h>
> >
> > +#ifdef CONFIG_XEN
> > +#include <asm/xen/hypervisor.h>
> > +#endif
> > +
> > #ifdef CONFIG_X86_32
> > u8 apicid_2_node[MAX_APICID];
> > static int low_mappings;
> > @@ -1062,6 +1066,11 @@ void __init native_smp_prepare_cpus(unsigned int
> > max_cpus) }
> > set_cpu_sibling_map(0);
> >
> > +#ifdef CONFIG_XEN
> > + if (xen_hybrid_evtchn_enabled())
> > + goto out;
> > +#endif
> > +
> > enable_IR_x2apic();
> > #ifdef CONFIG_X86_64
> > default_setup_apic_routing();
> > @@ -1131,6 +1140,11 @@ void __init native_smp_cpus_done(unsigned int
> > max_cpus) {
> > pr_debug("Boot done.\n");
> >
> > +#ifdef CONFIG_XEN
> > + if (xen_hybrid_evtchn_enabled())
> > + return;
> > +#endif
>
> These changes will never fly. I'm aggressively moving away from making
> any Xen-specific changes in core files for dom0; I don't want to add any
> more for a hybrid mode. (I'd really prefer not to have a hybrid mode at
> all.)
Yes... I would add a pv_ops func for this, though would duplicate the code.
>
> > +
> > impress_friends();
> > #ifdef CONFIG_X86_IO_APIC
> > setup_ioapic_dest();
> > diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
> > index 18aba22..f515584 100644
> > --- a/arch/x86/xen/enlighten.c
> > +++ b/arch/x86/xen/enlighten.c
> > @@ -54,6 +54,10 @@
> > #include <asm/reboot.h>
> >
> > #include <xen/hvm.h>
> > +#include <xen/events.h>
> > +#include <asm/acpi.h>
> > +#include <asm/irq_vectors.h>
> > +#include <asm/irq.h>
> >
> > #include "xen-ops.h"
> > #include "mmu.h"
> > @@ -1055,6 +1059,8 @@ static void __init xen_hybrid_banner(void)
> >
> > if (xen_hybrid_timer_enabled())
> > printk(KERN_INFO "Hybrid feature: PV Timer enabled\n");
> > + if (xen_hybrid_evtchn_enabled())
> > + printk(KERN_INFO "Hybrid feature: Event channel enabled\n");
> > }
> >
> > static int xen_para_available(void)
> > @@ -1102,6 +1108,10 @@ static int init_hybrid_info(void)
> > xen_hybrid_status |= XEN_HYBRID_TIMER_ENABLED;
> > flags |= HVM_HYBRID_TIMER;
> > }
> > + if (edx & XEN_CPUID_FEAT2_HYBRID_EVTCHN) {
> > + xen_hybrid_status |= XEN_HYBRID_EVTCHN_ENABLED;
> > + flags |= HVM_HYBRID_EVTCHN;
> > + }
> >
> > /* We only support 1 page of hypercall for now */
> > if (pages != 1)
> > @@ -1144,9 +1154,27 @@ static int __init init_shared_info(void)
> > return 0;
> > }
> >
> > +static int set_callback_via(uint64_t via)
> > +{
> > + struct xen_hvm_param a;
> > +
> > + a.domid = DOMID_SELF;
> > + a.index = HVM_PARAM_CALLBACK_IRQ;
> > + a.value = via;
> > + return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
> > +}
> > +
> > +void do_hybrid_intr(void)
> > +{
> > + per_cpu(irq_count, smp_processor_id())++;
> > + xen_evtchn_do_upcall(get_irq_regs());
> > + per_cpu(irq_count, smp_processor_id())--;
> > +}
> > +
> > void __init xen_start_hybrid(void)
> > {
> > int r;
> > + uint64_t callback_via;
> >
> > if (!xen_para_available())
> > return;
> > @@ -1163,5 +1191,26 @@ void __init xen_start_hybrid(void)
> > pv_time_ops = xen_time_ops;
> > pv_apic_ops = xen_apic_ops;
> > }
> > +
> > + if (xen_hybrid_evtchn_enabled()) {
> > + pv_apic_ops = xen_apic_ops;
> > +#ifdef CONFIG_X86_LOCAL_APIC
> > + /*
> > + * set up the basic apic ops.
> > + */
> > + set_xen_basic_apic_ops();
> > +#endif
> > +
> > + callback_via = HVM_CALLBACK_VECTOR(GENERIC_INTERRUPT_VECTOR);
> > + set_callback_via(callback_via);
> > +
> > + generic_interrupt_extension = do_hybrid_intr;
> > +
> > + disable_acpi();
> > + disable_apic = 1;
> > +
> > + machine_ops = xen_machine_ops;
> > + smp_ops.smp_send_stop = paravirt_nop;
> > + }
> > }
> >
> > diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
> > index 52885c1..edca1c4 100644
> > --- a/arch/x86/xen/irq.c
> > +++ b/arch/x86/xen/irq.c
> > @@ -66,6 +66,9 @@ PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
> >
> > static void xen_irq_disable(void)
> > {
> > + if (xen_hybrid_evtchn_enabled())
> > + asm volatile("cli" : : : "memory");
>
> !!! We have pvops for a reason. If you want to override irq_disable,
> define a new pvop function.
OK. (I just think it's inside Xen file, and code is somehow duplicated...)
--
regards
Yang, Sheng
>
> J
On 16/09/2009 21:24, "Jeremy Fitzhardinge" <[email protected]> wrote:
>> Guest would detect Hybrid capability using CPUID 0x40000002.edx, then call
>> HVMOP_enable_hybrid hypercall to enable hybrid support in hypervisor.
>>
>
> I think having an option to put PV guests into an HVM container is a
> good one, but as I mentioned in the other mail, I don't think this is
> the right approach.
>
> It would be much better to make it so that an unmodified guest works in
> such a mode; even with no specific optimisations the guest would get
> benefit from faster kernel<->usermode switches.
By unmodified you mean ordinary PV guest? It's an interesting comparison --
PVing an HVM guest, versus HVMing (to some extent) a PV guest.
-- Keir
> Then we can add specific optimisations to take advantage of, say,
> running in ring 0 (=fast syscalls) and having access to HAP hardware
> (=direct pagetable updates, no pinning).
On Wednesday 16 September 2009 21:31:04 Konrad Rzeszutek Wilk wrote:
> On Wed, Sep 16, 2009 at 04:42:21PM +0800, Sheng Yang wrote:
> > Hi, Keir & Jeremy
> >
> > This patchset enabled Xen Hybrid extension support.
> >
> > As we know that PV guest have performance issue with x86_64 that guest
> > kernel and userspace resistent in the same ring, then the necessary TLB
> > flushes when switch between guest userspace and guest kernel cause
> > overhead, and much more syscall overhead is also introduced. The Hybrid
> > Extension estimated these overhead by putting guest kernel back in
> > (non-root) ring0 then achieve the better performance than PV guest.
>
> What was the overhead? Is there a step-by-step list of operations you did
> to figure out the performance numbers?
The overhead I mentioned is, in x86_64 pv guest, every syscall would be goes
to hypervisor first, then hypervisor transmit it to guest kernel, finally
guest kernel goes back to guest userspace. Due to the involvement of
hypervisor, there is certainly overhead. And every transition result in TLB
flush. In 32bit pv guest, guest use #int82 to emulate syscall, which can
specific the privilege level, so that hypervisor don't need involve.
And sorry, I don't have a step-by-step list for the performance tunning. All
above is a known issue of x86_64 pv guest.
>
> I am asking this b/c at some point I would like to compare the pv-ops vs
> native and I am not entirely sure what is the best way to do this.
Sorry, I don't have much advise on this. If you means tuning, what I can
purposed is just running some microbenchmark(lmbench is a favor of mine),
collect (guest) hot function with xenoprofile and compare the result of native
and pv-ops to figure out the gap...
--
regards
Yang, Sheng
On 09/16/09 23:22, Keir Fraser wrote:
>> I think having an option to put PV guests into an HVM container is a
>> good one, but as I mentioned in the other mail, I don't think this is
>> the right approach.
>>
>> It would be much better to make it so that an unmodified guest works in
>> such a mode; even with no specific optimisations the guest would get
>> benefit from faster kernel<->usermode switches.
>>
> By unmodified you mean ordinary PV guest?
Right.
> It's an interesting comparison --
> PVing an HVM guest, versus HVMing (to some extent) a PV guest.
>
KVM is basically using the model of starting with a fully emulated hvm
domain, then adding paravirtualizations as incremental extensions to
that. If you want to go that route, then we may as well just adopt
their interfaces and use the existing kernel support as-is (though their
most useful paravirtualization - time - is adopted from Xen's ABI).
If we want to get a PV kernel which makes use of hvm features, then we
should do the analogous thing in the other direction: use the current PV
ABI as baseline, then add small optional extensions to take advantage of
the HVM container's features.
J