Hi, Jeremy & Keir
Here is the second version of patchset to enable Xen Hybrid extension support
in Linux kernel.
The Hybrid Extension is started from real mode like HVM guest, but also with a
a range of PV features(e.g. PV halt, PV timer, event channel, as well as PV
drivers). So guest with Hybrid extension feature can takes the advantages of
both H/W virtualization and Para-Virtualization.
The first two of the patchset imported several header file from Jeremy's tree
and Xen tree, respect to Jeremy and Keir's works.
The whole patchset based on Linux upstream.
Current the patchset support x86_64 only.
The major change from v1:
1. SMP support.
2. Modify the entrance point to avoid most of genernic kernel modification.
3. Binding PV timer with event channel mechanism.
You need a line like:
cpuid = [ '0x40000002:edx=0x3' ]
in HVM configuration file to expose hybrid feature to guest, and
CONFIG_XEN
in the guest kernel configuration file to enable the hybrid support.
And the compiled image can be used as native/pv domU/hvm guest/hybrid kernel.
Comments are welcome!
BTW: For the MSI/MSI-X support, pv_ops dom0 can share the solution with hybrid.
We would try to figure out a elegant way to deal with it later.
--
regards
Yang, Sheng
--
arch/x86/include/asm/xen/cpuid.h | 73 +++++++++++++
arch/x86/include/asm/xen/hypercall.h | 6 +
arch/x86/kernel/setup.c | 8 ++
arch/x86/xen/enlighten.c | 192 ++++++++++++++++++++++++++++++++++
arch/x86/xen/irq.c | 54 ++++++++++
arch/x86/xen/smp.c | 144 +++++++++++++++++++++++++-
arch/x86/xen/xen-head.S | 6 +
arch/x86/xen/xen-ops.h | 4 +
drivers/block/xen-blkfront.c | 3 +
drivers/input/xen-kbdfront.c | 4 +
drivers/net/xen-netfront.c | 3 +
drivers/video/xen-fbfront.c | 4 +
drivers/xen/events.c | 66 +++++++++++-
drivers/xen/grant-table.c | 67 ++++++++++++-
drivers/xen/xenbus/xenbus_probe.c | 23 ++++-
include/xen/events.h | 1 +
include/xen/hvm.h | 28 +++++
include/xen/interface/hvm/hvm_op.h | 79 ++++++++++++++
include/xen/interface/hvm/params.h | 111 ++++++++++++++++++++
include/xen/interface/xen.h | 6 +-
include/xen/xen.h | 12 ++
21 files changed, 883 insertions(+), 11 deletions(-)
xen_hybrid_init() would setup Hybrid environment.
Cc: Ingo Molnar <[email protected]>
Signed-off-by: Sheng Yang <[email protected]>
---
arch/x86/kernel/setup.c | 8 ++++++++
1 files changed, 8 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f7b8b98..db82295 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -113,6 +113,10 @@
#endif
#include <asm/mce.h>
+#ifdef CONFIG_XEN
+#include <xen/xen.h>
+#endif
+
/*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
* The direct mapping extends to max_pfn_mapped, so that we can directly access
@@ -732,6 +736,10 @@ void __init setup_arch(char **cmdline_p)
x86_init.oem.arch_setup();
+#ifdef CONFIG_XEN
+ xen_hybrid_init();
+#endif
+
setup_memory_map();
parse_setup_data();
/* update the e820_saved too */
--
1.5.4.5
The Hybrid Extension is started from real mode like HVM guest, but also with a
component based PV feature selection(e.g. PV halt, PV timer, event channel,
then PV drivers). So guest with Hybrid extension feature can takes the
advantages of both H/W virtualization and Para-Virtualization.
This patch introduced the Hybrid Extension guest initialization.
Guest would detect Hybrid capability using CPUID 0x40000002.edx, then call
HVMOP_enable_hybrid hypercall to enable hybrid support in hypervisor.
Signed-off-by: Sheng Yang <[email protected]>
Signed-off-by: Yaozu (Eddie) Dong <[email protected]>
---
arch/x86/include/asm/xen/cpuid.h | 5 ++
arch/x86/xen/enlighten.c | 118 ++++++++++++++++++++++++++++++++++++
arch/x86/xen/irq.c | 21 ++++++
arch/x86/xen/xen-head.S | 6 ++
arch/x86/xen/xen-ops.h | 1 +
include/xen/interface/hvm/hvm_op.h | 7 ++
include/xen/xen.h | 9 +++
7 files changed, 167 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h
index 8787f03..b66fee2 100644
--- a/arch/x86/include/asm/xen/cpuid.h
+++ b/arch/x86/include/asm/xen/cpuid.h
@@ -65,4 +65,9 @@
#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0)
+#define _XEN_CPUID_FEAT2_HYBRID 0
+#define XEN_CPUID_FEAT2_HYBRID (1u<<0)
+#define _XEN_CPUID_FEAT2_HYBRID_EVTCHN 1
+#define XEN_CPUID_FEAT2_HYBRID_EVTCHN (1u<<1)
+
#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2b26dd5..2f1a3df 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -34,6 +34,8 @@
#include <xen/interface/version.h>
#include <xen/interface/physdev.h>
#include <xen/interface/vcpu.h>
+#include <xen/interface/memory.h>
+#include <xen/interface/hvm/hvm_op.h>
#include <xen/features.h>
#include <xen/page.h>
#include <xen/hvc-console.h>
@@ -43,6 +45,7 @@
#include <asm/page.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
+#include <asm/xen/cpuid.h>
#include <asm/fixmap.h>
#include <asm/processor.h>
#include <asm/proto.h>
@@ -1194,3 +1197,118 @@ asmlinkage void __init xen_start_kernel(void)
x86_64_start_reservations((char *)__pa_symbol(&boot_params));
#endif
}
+
+static void __init xen_hybrid_banner(void)
+{
+ unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
+ struct xen_extraversion extra;
+ HYPERVISOR_xen_version(XENVER_extraversion, &extra);
+
+ printk(KERN_INFO "Booting hybrid kernel on %s\n", pv_info.name);
+ printk(KERN_INFO "Xen version: %d.%d%s\n",
+ version >> 16, version & 0xffff, extra.extraversion);
+}
+
+static int xen_para_available(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+ cpuid(XEN_CPUID_LEAF(0), &eax, &ebx, &ecx, &edx);
+
+ if (ebx == XEN_CPUID_SIGNATURE_EBX &&
+ ecx == XEN_CPUID_SIGNATURE_ECX &&
+ edx == XEN_CPUID_SIGNATURE_EDX &&
+ ((eax - XEN_CPUID_LEAF(0)) >= 2))
+ return 1;
+
+ return 0;
+}
+
+u32 xen_hybrid_status;
+EXPORT_SYMBOL_GPL(xen_hybrid_status);
+
+static int enable_hybrid(u64 flags)
+{
+ struct xen_hvm_hybrid_type a;
+
+ a.domid = DOMID_SELF;
+ a.flags = flags;
+ return HYPERVISOR_hvm_op(HVMOP_enable_hybrid, &a);
+}
+
+static int init_hybrid_info(void)
+{
+ uint32_t ecx, edx, pages, msr;
+ u64 pfn;
+ u32 flags = 0;
+
+ if (!xen_para_available())
+ return -EINVAL;
+
+ cpuid(XEN_CPUID_LEAF(2), &pages, &msr, &ecx, &edx);
+
+ /* Check if hybrid mode is supported */
+ if (!(edx & XEN_CPUID_FEAT2_HYBRID))
+ return -ENODEV;
+
+ xen_hybrid_status = XEN_HYBRID_ENABLED;
+
+ /* We only support 1 page of hypercall for now */
+ if (pages != 1)
+ return -ENOMEM;
+
+ pfn = __pa(hypercall_page);
+ wrmsrl(msr, pfn);
+
+ xen_setup_features();
+
+ if (enable_hybrid(flags))
+ return -EINVAL;
+
+ x86_init.oem.banner = xen_hybrid_banner;
+ pv_info = xen_info;
+ pv_info.kernel_rpl = 0;
+
+ return 0;
+}
+
+extern struct shared_info shared_info_page;
+
+static void __init init_shared_info(void)
+{
+ struct xen_add_to_physmap xatp;
+
+ xatp.domid = DOMID_SELF;
+ xatp.idx = 0;
+ xatp.space = XENMAPSPACE_shared_info;
+ xatp.gpfn = __pa(&shared_info_page) >> PAGE_SHIFT;
+ if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
+ BUG();
+
+ HYPERVISOR_shared_info = (struct shared_info *)&shared_info_page;
+
+ /* Don't do the full vcpu_info placement stuff until we have a
+ possible map and a non-dummy shared_info. */
+ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
+}
+
+void __init xen_hybrid_init(void)
+{
+#ifdef CONFIG_X86_32
+ return;
+#else
+ int r;
+
+ /* Ensure the we won't confused with PV */
+ if (xen_domain_type == XEN_PV_DOMAIN)
+ return;
+
+ r = init_hybrid_info();
+ if (r < 0)
+ return;
+
+ init_shared_info();
+
+ xen_hybrid_init_irq_ops();
+#endif
+}
+
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 9d30105..da4faf4 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -131,3 +131,24 @@ void __init xen_init_irq_ops()
pv_irq_ops = xen_irq_ops;
x86_init.irqs.intr_init = xen_init_IRQ;
}
+
+static void xen_hybrid_safe_halt(void)
+{
+ /* Do local_irq_enable() explicitly in hybrid guest */
+ local_irq_enable();
+ xen_safe_halt();
+}
+
+static void xen_hybrid_halt(void)
+{
+ if (irqs_disabled())
+ HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
+ else
+ xen_hybrid_safe_halt();
+}
+
+void __init xen_hybrid_init_irq_ops(void)
+{
+ pv_irq_ops.safe_halt = xen_hybrid_safe_halt;
+ pv_irq_ops.halt = xen_hybrid_halt;
+}
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 1a5ff24..26041ce 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -33,6 +33,12 @@ ENTRY(hypercall_page)
.skip PAGE_SIZE_asm
.popsection
+.pushsection .data
+ .align PAGE_SIZE_asm
+ENTRY(shared_info_page)
+ .skip PAGE_SIZE_asm
+.popsection
+
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6")
ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0")
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index f9153a3..89e38ba 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -41,6 +41,7 @@ void xen_vcpu_restore(void);
void __init xen_build_dynamic_phys_to_machine(void);
void xen_init_irq_ops(void);
+void xen_hybrid_init_irq_ops(void);
void xen_setup_timer(int cpu);
void xen_setup_runstate_info(int cpu);
void xen_teardown_timer(int cpu);
diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h
index 7c74ba4..fac6e82 100644
--- a/include/xen/interface/hvm/hvm_op.h
+++ b/include/xen/interface/hvm/hvm_op.h
@@ -69,4 +69,11 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_pci_link_route);
/* Flushes all VCPU TLBs: @arg must be NULL. */
#define HVMOP_flush_tlbs 5
+#define HVMOP_enable_hybrid 9
+struct xen_hvm_hybrid_type {
+ domid_t domid;
+ uint32_t flags;
+#define HVM_HYBRID_EVTCHN (1ull<<1)
+};
+
#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
diff --git a/include/xen/xen.h b/include/xen/xen.h
index a164024..aace9cc 100644
--- a/include/xen/xen.h
+++ b/include/xen/xen.h
@@ -9,6 +9,7 @@ enum xen_domain_type {
#ifdef CONFIG_XEN
extern enum xen_domain_type xen_domain_type;
+extern void xen_hybrid_init(void);
#else
#define xen_domain_type XEN_NATIVE
#endif
@@ -19,6 +20,14 @@ extern enum xen_domain_type xen_domain_type;
#define xen_hvm_domain() (xen_domain() && \
xen_domain_type == XEN_HVM_DOMAIN)
+#define XEN_HYBRID_ENABLED (1u << 0)
+#define XEN_HYBRID_EVTCHN_ENABLED (1u << 1)
+extern u32 xen_hybrid_status;
+
+#define xen_hybrid_enabled() (xen_hybrid_status & XEN_HYBRID_ENABLED)
+#define xen_hybrid_evtchn_enabled() \
+ (xen_hybrid_status & XEN_HYBRID_EVTCHN_ENABLED)
+
#ifdef CONFIG_XEN_DOM0
#include <xen/interface/xen.h>
#include <asm/xen/hypervisor.h>
--
1.5.4.5
From: Jeremy Fitzhardinge <[email protected]>
Add support for hvm_op hypercall.
Signed-off-by: Jeremy Fitzhardinge <[email protected]>
Signed-off-by: Sheng Yang <[email protected]>
---
arch/x86/include/asm/xen/hypercall.h | 6 ++
include/xen/hvm.h | 23 +++++++
include/xen/interface/hvm/hvm_op.h | 72 ++++++++++++++++++++++
include/xen/interface/hvm/params.h | 111 ++++++++++++++++++++++++++++++++++
4 files changed, 212 insertions(+), 0 deletions(-)
create mode 100644 include/xen/hvm.h
create mode 100644 include/xen/interface/hvm/hvm_op.h
create mode 100644 include/xen/interface/hvm/params.h
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 9c371e4..47c2ebb 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -417,6 +417,12 @@ HYPERVISOR_nmi_op(unsigned long op, unsigned long arg)
return _hypercall2(int, nmi_op, op, arg);
}
+static inline unsigned long __must_check
+HYPERVISOR_hvm_op(int op, void *arg)
+{
+ return _hypercall2(unsigned long, hvm_op, op, arg);
+}
+
static inline void
MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set)
{
diff --git a/include/xen/hvm.h b/include/xen/hvm.h
new file mode 100644
index 0000000..4ea8887
--- /dev/null
+++ b/include/xen/hvm.h
@@ -0,0 +1,23 @@
+/* Simple wrappers around HVM functions */
+#ifndef XEN_HVM_H__
+#define XEN_HVM_H__
+
+#include <xen/interface/hvm/params.h>
+
+static inline unsigned long hvm_get_parameter(int idx)
+{
+ struct xen_hvm_param xhv;
+ int r;
+
+ xhv.domid = DOMID_SELF;
+ xhv.index = idx;
+ r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
+ if (r < 0) {
+ printk(KERN_ERR "cannot get hvm parameter %d: %d.\n",
+ idx, r);
+ return 0;
+ }
+ return xhv.value;
+}
+
+#endif /* XEN_HVM_H__ */
diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h
new file mode 100644
index 0000000..7c74ba4
--- /dev/null
+++ b/include/xen/interface/hvm/hvm_op.h
@@ -0,0 +1,72 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
+#define __XEN_PUBLIC_HVM_HVM_OP_H__
+
+/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */
+#define HVMOP_set_param 0
+#define HVMOP_get_param 1
+struct xen_hvm_param {
+ domid_t domid; /* IN */
+ uint32_t index; /* IN */
+ uint64_t value; /* IN/OUT */
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param);
+
+/* Set the logical level of one of a domain's PCI INTx wires. */
+#define HVMOP_set_pci_intx_level 2
+struct xen_hvm_set_pci_intx_level {
+ /* Domain to be updated. */
+ domid_t domid;
+ /* PCI INTx identification in PCI topology (domain:bus:device:intx). */
+ uint8_t domain, bus, device, intx;
+ /* Assertion level (0 = unasserted, 1 = asserted). */
+ uint8_t level;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_pci_intx_level);
+
+/* Set the logical level of one of a domain's ISA IRQ wires. */
+#define HVMOP_set_isa_irq_level 3
+struct xen_hvm_set_isa_irq_level {
+ /* Domain to be updated. */
+ domid_t domid;
+ /* ISA device identification, by ISA IRQ (0-15). */
+ uint8_t isa_irq;
+ /* Assertion level (0 = unasserted, 1 = asserted). */
+ uint8_t level;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_isa_irq_level);
+
+#define HVMOP_set_pci_link_route 4
+struct xen_hvm_set_pci_link_route {
+ /* Domain to be updated. */
+ domid_t domid;
+ /* PCI link identifier (0-3). */
+ uint8_t link;
+ /* ISA IRQ (1-15), or 0 (disable link). */
+ uint8_t isa_irq;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_set_pci_link_route);
+
+/* Flushes all VCPU TLBs: @arg must be NULL. */
+#define HVMOP_flush_tlbs 5
+
+#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h
new file mode 100644
index 0000000..15d828f
--- /dev/null
+++ b/include/xen/interface/hvm/params.h
@@ -0,0 +1,111 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_PARAMS_H__
+#define __XEN_PUBLIC_HVM_PARAMS_H__
+
+#include "hvm_op.h"
+
+/*
+ * Parameter space for HVMOP_{set,get}_param.
+ */
+
+/*
+ * How should CPU0 event-channel notifications be delivered?
+ * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt).
+ * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows:
+ * Domain = val[47:32], Bus = val[31:16],
+ * DevFn = val[15: 8], IntX = val[ 1: 0]
+ * If val == 0 then CPU0 event-channel notifications are not delivered.
+ */
+#define HVM_PARAM_CALLBACK_IRQ 0
+
+/*
+ * These are not used by Xen. They are here for convenience of HVM-guest
+ * xenbus implementations.
+ */
+#define HVM_PARAM_STORE_PFN 1
+#define HVM_PARAM_STORE_EVTCHN 2
+
+#define HVM_PARAM_PAE_ENABLED 4
+
+#define HVM_PARAM_IOREQ_PFN 5
+
+#define HVM_PARAM_BUFIOREQ_PFN 6
+
+#ifdef __ia64__
+
+#define HVM_PARAM_NVRAM_FD 7
+#define HVM_PARAM_VHPT_SIZE 8
+#define HVM_PARAM_BUFPIOREQ_PFN 9
+
+#elif defined(__i386__) || defined(__x86_64__)
+
+/* Expose Viridian interfaces to this HVM guest? */
+#define HVM_PARAM_VIRIDIAN 9
+
+#endif
+
+/*
+ * Set mode for virtual timers (currently x86 only):
+ * delay_for_missed_ticks (default):
+ * Do not advance a vcpu's time beyond the correct delivery time for
+ * interrupts that have been missed due to preemption. Deliver missed
+ * interrupts when the vcpu is rescheduled and advance the vcpu's virtual
+ * time stepwise for each one.
+ * no_delay_for_missed_ticks:
+ * As above, missed interrupts are delivered, but guest time always tracks
+ * wallclock (i.e., real) time while doing so.
+ * no_missed_ticks_pending:
+ * No missed interrupts are held pending. Instead, to ensure ticks are
+ * delivered at some non-zero rate, if we detect missed ticks then the
+ * internal tick alarm is not disabled if the VCPU is preempted during the
+ * next tick period.
+ * one_missed_tick_pending:
+ * Missed interrupts are collapsed together and delivered as one 'late tick'.
+ * Guest time always tracks wallclock (i.e., real) time.
+ */
+#define HVM_PARAM_TIMER_MODE 10
+#define HVMPTM_delay_for_missed_ticks 0
+#define HVMPTM_no_delay_for_missed_ticks 1
+#define HVMPTM_no_missed_ticks_pending 2
+#define HVMPTM_one_missed_tick_pending 3
+
+/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
+#define HVM_PARAM_HPET_ENABLED 11
+
+/* Identity-map page directory used by Intel EPT when CR0.PG=0. */
+#define HVM_PARAM_IDENT_PT 12
+
+/* Device Model domain, defaults to 0. */
+#define HVM_PARAM_DM_DOMAIN 13
+
+/* ACPI S state: currently support S0 and S3 on x86. */
+#define HVM_PARAM_ACPI_S_STATE 14
+
+/* TSS used on Intel when CR0.PE=0. */
+#define HVM_PARAM_VM86_TSS 15
+
+/* Boolean: Enable aligning all periodic vpts to reduce interrupts */
+#define HVM_PARAM_VPT_ALIGN 16
+
+#define HVM_NR_PARAMS 17
+
+#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
--
1.5.4.5
Now vnif and vbd drivers can work.
Notice one memory region(0xfbfe0000ul - 0xfc000000ul) would be reserved in the
bios E820 table. This memory region would be used as grant table.
Signed-off-by: Sheng Yang <[email protected]>
---
arch/x86/xen/enlighten.c | 1 +
drivers/block/xen-blkfront.c | 3 ++
drivers/input/xen-kbdfront.c | 4 ++
drivers/net/xen-netfront.c | 3 ++
drivers/video/xen-fbfront.c | 4 ++
drivers/xen/grant-table.c | 67 +++++++++++++++++++++++++++++++++++-
drivers/xen/xenbus/xenbus_probe.c | 23 +++++++++++--
include/xen/xen.h | 3 ++
8 files changed, 103 insertions(+), 5 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 369b250..5a2a73b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1278,6 +1278,7 @@ static int init_hybrid_info(void)
pv_info = xen_info;
pv_info.kernel_rpl = 0;
+ xen_domain_type = XEN_HYBRID_DOMAIN;
return 0;
}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 05a31e5..d7dfba9 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1071,6 +1071,9 @@ static int __init xlblk_init(void)
if (!xen_domain())
return -ENODEV;
+ if (xen_hybrid_domain() && !xen_hybrid_evtchn_enabled())
+ return -ENODEV;
+
if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
XENVBD_MAJOR, DEV_NAME);
diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c
index c721c0a..74cbb25 100644
--- a/drivers/input/xen-kbdfront.c
+++ b/drivers/input/xen-kbdfront.c
@@ -341,6 +341,10 @@ static int __init xenkbd_init(void)
if (!xen_domain())
return -ENODEV;
+ /* Xen Hybrid domain don't need vkbd */
+ if (xen_hybrid_domain())
+ return -ENODEV;
+
/* Nothing to do if running in dom0. */
if (xen_initial_domain())
return -ENODEV;
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index a869b45..568324a 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1807,6 +1807,9 @@ static int __init netif_init(void)
if (!xen_domain())
return -ENODEV;
+ if (xen_hybrid_domain() && !xen_hybrid_evtchn_enabled())
+ return -ENODEV;
+
if (xen_initial_domain())
return 0;
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c
index 603598f..2ba569a 100644
--- a/drivers/video/xen-fbfront.c
+++ b/drivers/video/xen-fbfront.c
@@ -686,6 +686,10 @@ static int __init xenfb_init(void)
if (!xen_domain())
return -ENODEV;
+ /* Don't enable vfb in Xen hybrid domain */
+ if (xen_hybrid_domain())
+ return -ENODEV;
+
/* Nothing to do if running in dom0. */
if (xen_initial_domain())
return -ENODEV;
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 4c6c0bd..29f2155 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -46,6 +46,9 @@
#include <asm/pgtable.h>
#include <asm/sync_bitops.h>
+#include <xen/interface/memory.h>
+#include <linux/io.h>
+#include <asm/e820.h>
/* External tools reserve first few grant table entries. */
#define NR_RESERVED_ENTRIES 8
@@ -441,12 +444,33 @@ static inline unsigned int max_nr_grant_frames(void)
return xen_max;
}
+static unsigned long hybrid_resume_frames;
+
static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
{
struct gnttab_setup_table setup;
unsigned long *frames;
unsigned int nr_gframes = end_idx + 1;
int rc;
+ struct xen_add_to_physmap xatp;
+ unsigned int i = end_idx;
+
+ if (xen_hybrid_domain() && xen_hybrid_evtchn_enabled()) {
+ /*
+ * Loop backwards, so that the first hypercall has the largest
+ * index, ensuring that the table will grow only once.
+ */
+ do {
+ xatp.domid = DOMID_SELF;
+ xatp.idx = i;
+ xatp.space = XENMAPSPACE_grant_table;
+ xatp.gpfn = (hybrid_resume_frames >> PAGE_SHIFT) + i;
+ if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
+ BUG();
+ } while (i-- > start_idx);
+
+ return 0;
+ }
frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
if (!frames)
@@ -473,11 +497,47 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
return 0;
}
+#define GNTTAB_START 0xfbfe0000ul
+#define GNTTAB_SIZE 0x20000ul
+
int gnttab_resume(void)
{
- if (max_nr_grant_frames() < nr_grant_frames)
+ unsigned int max_nr_gframes;
+
+ max_nr_gframes = max_nr_grant_frames();
+ if (max_nr_gframes < nr_grant_frames)
return -ENOSYS;
- return gnttab_map(0, nr_grant_frames - 1);
+
+ if (!(xen_hybrid_domain() && xen_hybrid_evtchn_enabled()))
+ return gnttab_map(0, nr_grant_frames - 1);
+
+ if (!hybrid_resume_frames) {
+ /* Check if e820 reserved the related region */
+ if (!e820_all_mapped(GNTTAB_START,
+ GNTTAB_START + GNTTAB_SIZE, 2)) {
+ printk(KERN_WARNING
+ "Fail to found grant table region in e820!\n");
+ return -ENODEV;
+ }
+ if (PAGE_SIZE * max_nr_gframes > GNTTAB_SIZE) {
+ printk(KERN_WARNING
+ "Grant table size exceed the limit!\n");
+ return -EINVAL;
+ }
+
+ hybrid_resume_frames = GNTTAB_START;
+ shared = ioremap(hybrid_resume_frames,
+ PAGE_SIZE * max_nr_gframes);
+ if (shared == NULL) {
+ printk(KERN_WARNING
+ "Fail to ioremap gnttab share frames\n");
+ return -ENOMEM;
+ }
+ }
+
+ gnttab_map(0, nr_grant_frames - 1);
+
+ return 0;
}
int gnttab_suspend(void)
@@ -513,6 +573,9 @@ static int __devinit gnttab_init(void)
if (!xen_domain())
return -ENODEV;
+ if (xen_hybrid_domain() && !xen_hybrid_evtchn_enabled())
+ return -ENODEV;
+
nr_grant_frames = 1;
boot_max_nr_grant_frames = __max_nr_grant_frames();
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 2f7aaa9..83964b9 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -55,6 +55,8 @@
#include <xen/events.h>
#include <xen/page.h>
+#include <xen/hvm.h>
+
#include "xenbus_comms.h"
#include "xenbus_probe.h"
@@ -789,6 +791,9 @@ static int __init xenbus_probe_init(void)
if (!xen_domain())
goto out_error;
+ if (xen_hybrid_domain() && !xen_hybrid_evtchn_enabled())
+ goto out_error;
+
/* Register ourselves with the kernel bus subsystem */
err = bus_register(&xenbus_frontend.bus);
if (err)
@@ -805,10 +810,19 @@ static int __init xenbus_probe_init(void)
/* dom0 not yet supported */
} else {
xenstored_ready = 1;
- xen_store_evtchn = xen_start_info->store_evtchn;
- xen_store_mfn = xen_start_info->store_mfn;
+ if (xen_hybrid_domain()) {
+ xen_store_evtchn =
+ hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
+ xen_store_mfn =
+ hvm_get_parameter(HVM_PARAM_STORE_PFN);
+ xen_store_interface =
+ ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
+ } else {
+ xen_store_evtchn = xen_start_info->store_evtchn;
+ xen_store_mfn = xen_start_info->store_mfn;
+ xen_store_interface = mfn_to_virt(xen_store_mfn);
+ }
}
- xen_store_interface = mfn_to_virt(xen_store_mfn);
/* Initialize the interface to xenstore. */
err = xs_init();
@@ -925,6 +939,9 @@ static void wait_for_devices(struct xenbus_driver *xendrv)
if (!ready_to_wait_for_devices || !xen_domain())
return;
+ if (xen_hybrid_domain() && !xen_hybrid_evtchn_enabled())
+ return;
+
while (exists_connecting_device(drv)) {
if (time_after(jiffies, start + (seconds_waited+5)*HZ)) {
if (!seconds_waited)
diff --git a/include/xen/xen.h b/include/xen/xen.h
index aace9cc..632e76f 100644
--- a/include/xen/xen.h
+++ b/include/xen/xen.h
@@ -5,6 +5,7 @@ enum xen_domain_type {
XEN_NATIVE, /* running on bare hardware */
XEN_PV_DOMAIN, /* running in a PV domain */
XEN_HVM_DOMAIN, /* running in a Xen hvm domain */
+ XEN_HYBRID_DOMAIN, /* running in a Xen hybrid hvm domain */
};
#ifdef CONFIG_XEN
@@ -19,6 +20,8 @@ extern void xen_hybrid_init(void);
xen_domain_type == XEN_PV_DOMAIN)
#define xen_hvm_domain() (xen_domain() && \
xen_domain_type == XEN_HVM_DOMAIN)
+#define xen_hybrid_domain() (xen_domain() && \
+ xen_domain_type == XEN_HYBRID_DOMAIN)
#define XEN_HYBRID_ENABLED (1u << 0)
#define XEN_HYBRID_EVTCHN_ENABLED (1u << 1)
--
1.5.4.5
From: Keir Fraser <[email protected]>
Which would be used by CPUID detection later
Signed-off-by: Keir Fraser <[email protected]>
Signed-off-by: Sheng Yang <[email protected]>
---
arch/x86/include/asm/xen/cpuid.h | 68 ++++++++++++++++++++++++++++++++++++++
1 files changed, 68 insertions(+), 0 deletions(-)
create mode 100644 arch/x86/include/asm/xen/cpuid.h
diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h
new file mode 100644
index 0000000..8787f03
--- /dev/null
+++ b/arch/x86/include/asm/xen/cpuid.h
@@ -0,0 +1,68 @@
+/******************************************************************************
+ * arch/include/asm/xen/cpuid.h
+ *
+ * CPUID interface to Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2007 Citrix Systems, Inc.
+ *
+ * Authors:
+ * Keir Fraser <[email protected]>
+ */
+
+#ifndef __ASM_X86_XEN_CPUID_H__
+#define __ASM_X86_XEN_CPUID_H__
+
+/* Xen identification leaves start at 0x40000000. */
+#define XEN_CPUID_FIRST_LEAF 0x40000000
+#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i))
+
+/*
+ * Leaf 1 (0x40000000)
+ * EAX: Largest Xen-information leaf. All leaves up to an including @EAX
+ * are supported by the Xen host.
+ * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification
+ * of a Xen host.
+ */
+#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */
+#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */
+#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */
+
+/*
+ * Leaf 2 (0x40000001)
+ * EAX[31:16]: Xen major version.
+ * EAX[15: 0]: Xen minor version.
+ * EBX-EDX: Reserved (currently all zeroes).
+ */
+
+/*
+ * Leaf 3 (0x40000002)
+ * EAX: Number of hypercall transfer pages. This register is always guaranteed
+ * to specify one hypercall page.
+ * EBX: Base address of Xen-specific MSRs.
+ * ECX: Features 1. Unused bits are set to zero.
+ * EDX: Features 2. Unused bits are set to zero.
+ */
+
+/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */
+#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
+#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0)
+
+#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */
--
1.5.4.5
We mapped each IOAPIC pin to a VIRQ, so that we can deliver interrupt through
these VIRQs.
We used X86_PLATFORM_IPI_VECTOR as the noficiation vector for hypervisor
to notify guest about the event.
The Xen PV timer is used to provide guest a reliable timer.
The patch also enabled SMP support, then we can support IPI through evtchn as well.
Then we don't need IOAPIC/LAPIC...
Signed-off-by: Sheng Yang <[email protected]>
---
arch/x86/xen/enlighten.c | 73 ++++++++++++++++++++++
arch/x86/xen/irq.c | 37 ++++++++++-
arch/x86/xen/smp.c | 144 ++++++++++++++++++++++++++++++++++++++++++-
arch/x86/xen/xen-ops.h | 3 +
drivers/xen/events.c | 66 ++++++++++++++++++-
include/xen/events.h | 1 +
include/xen/hvm.h | 5 ++
include/xen/interface/xen.h | 6 ++-
8 files changed, 327 insertions(+), 8 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2f1a3df..369b250 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -58,6 +58,9 @@
#include <asm/reboot.h>
#include <asm/stackprotector.h>
+#include <xen/hvm.h>
+#include <xen/events.h>
+
#include "xen-ops.h"
#include "mmu.h"
#include "multicalls.h"
@@ -1207,6 +1210,8 @@ static void __init xen_hybrid_banner(void)
printk(KERN_INFO "Booting hybrid kernel on %s\n", pv_info.name);
printk(KERN_INFO "Xen version: %d.%d%s\n",
version >> 16, version & 0xffff, extra.extraversion);
+ if (xen_hybrid_evtchn_enabled())
+ printk(KERN_INFO "Hybrid feature: Event channel enabled\n");
}
static int xen_para_available(void)
@@ -1252,6 +1257,11 @@ static int init_hybrid_info(void)
xen_hybrid_status = XEN_HYBRID_ENABLED;
+ if (edx & XEN_CPUID_FEAT2_HYBRID_EVTCHN) {
+ xen_hybrid_status |= XEN_HYBRID_EVTCHN_ENABLED;
+ flags |= HVM_HYBRID_EVTCHN;
+ }
+
/* We only support 1 page of hypercall for now */
if (pages != 1)
return -ENOMEM;
@@ -1291,12 +1301,42 @@ static void __init init_shared_info(void)
per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
}
+static int set_callback_via(uint64_t via)
+{
+ struct xen_hvm_param a;
+
+ a.domid = DOMID_SELF;
+ a.index = HVM_PARAM_CALLBACK_IRQ;
+ a.value = via;
+ return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
+}
+
+void do_hybrid_intr(void)
+{
+#ifdef CONFIG_X86_64
+ per_cpu(irq_count, smp_processor_id())++;
+#endif
+ xen_evtchn_do_upcall(get_irq_regs());
+#ifdef CONFIG_X86_64
+ per_cpu(irq_count, smp_processor_id())--;
+#endif
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static void xen_hybrid_apic_write(u32 reg, u32 val)
+{
+ /* The only one reached here should be EOI */
+ WARN_ON(reg != APIC_EOI);
+}
+#endif
+
void __init xen_hybrid_init(void)
{
#ifdef CONFIG_X86_32
return;
#else
int r;
+ uint64_t callback_via;
/* Ensure the we won't confused with PV */
if (xen_domain_type == XEN_PV_DOMAIN)
@@ -1309,6 +1349,39 @@ void __init xen_hybrid_init(void)
init_shared_info();
xen_hybrid_init_irq_ops();
+
+ init_shared_info();
+
+ if (xen_hybrid_evtchn_enabled()) {
+ pv_time_ops = xen_time_ops;
+
+ x86_init.timers.timer_init = xen_time_init;
+ x86_init.timers.setup_percpu_clockev = x86_init_noop;
+ x86_cpuinit.setup_percpu_clockev = x86_init_noop;
+
+ x86_platform.calibrate_tsc = xen_tsc_khz;
+ x86_platform.get_wallclock = xen_get_wallclock;
+ x86_platform.set_wallclock = xen_set_wallclock;
+
+ pv_apic_ops = xen_apic_ops;
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * set up the basic apic ops.
+ */
+ set_xen_basic_apic_ops();
+ apic->write = xen_hybrid_apic_write;
+#endif
+
+ callback_via = HVM_CALLBACK_VECTOR(X86_PLATFORM_IPI_VECTOR);
+ set_callback_via(callback_via);
+
+ x86_platform_ipi_callback = do_hybrid_intr;
+
+ disable_acpi();
+
+ xen_hybrid_smp_init();
+ machine_ops = xen_machine_ops;
+ }
#endif
}
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index da4faf4..5a449df 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -5,6 +5,7 @@
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
#include <xen/interface/vcpu.h>
+#include <xen/xen.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
@@ -132,6 +133,20 @@ void __init xen_init_irq_ops()
x86_init.irqs.intr_init = xen_init_IRQ;
}
+static void xen_hybrid_irq_disable(void)
+{
+ native_irq_disable();
+ xen_irq_disable();
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_hybrid_irq_disable);
+
+static void xen_hybrid_irq_enable(void)
+{
+ native_irq_enable();
+ xen_irq_enable();
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_hybrid_irq_enable);
+
static void xen_hybrid_safe_halt(void)
{
/* Do local_irq_enable() explicitly in hybrid guest */
@@ -147,8 +162,26 @@ static void xen_hybrid_halt(void)
xen_hybrid_safe_halt();
}
+static const struct pv_irq_ops xen_hybrid_irq_ops __initdata = {
+ .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
+ .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
+ .irq_disable = PV_CALLEE_SAVE(xen_hybrid_irq_disable),
+ .irq_enable = PV_CALLEE_SAVE(xen_hybrid_irq_enable),
+
+ .safe_halt = xen_hybrid_safe_halt,
+ .halt = xen_hybrid_halt,
+#ifdef CONFIG_X86_64
+ .adjust_exception_frame = paravirt_nop,
+#endif
+};
+
void __init xen_hybrid_init_irq_ops(void)
{
- pv_irq_ops.safe_halt = xen_hybrid_safe_halt;
- pv_irq_ops.halt = xen_hybrid_halt;
+ if (xen_hybrid_evtchn_enabled()) {
+ pv_irq_ops = xen_hybrid_irq_ops;
+ x86_init.irqs.intr_init = xen_hybrid_init_IRQ;
+ } else {
+ pv_irq_ops.safe_halt = xen_hybrid_safe_halt;
+ pv_irq_ops.halt = xen_hybrid_halt;
+ }
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 563d205..0087bd2 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -15,20 +15,26 @@
#include <linux/sched.h>
#include <linux/err.h>
#include <linux/smp.h>
+#include <linux/nmi.h>
#include <asm/paravirt.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
#include <asm/cpu.h>
+#include <asm/trampoline.h>
+#include <asm/tlbflush.h>
+#include <asm/mtrr.h>
#include <xen/interface/xen.h>
#include <xen/interface/vcpu.h>
#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
#include <xen/page.h>
#include <xen/events.h>
+#include <xen/xen.h>
#include "xen-ops.h"
#include "mmu.h"
@@ -171,7 +177,8 @@ static void __init xen_smp_prepare_boot_cpu(void)
/* We've switched to the "real" per-cpu gdt, so make sure the
old memory can be recycled */
- make_lowmem_page_readwrite(xen_initial_gdt);
+ if (xen_pv_domain())
+ make_lowmem_page_readwrite(xen_initial_gdt);
xen_setup_vcpu_info_placement();
}
@@ -480,3 +487,138 @@ void __init xen_smp_init(void)
xen_fill_possible_map();
xen_init_spinlocks();
}
+
+static __cpuinit void xen_hybrid_start_secondary(void)
+{
+ int cpu = smp_processor_id();
+
+ cpu_init();
+ touch_nmi_watchdog();
+ preempt_disable();
+
+ /* otherwise gcc will move up smp_processor_id before the cpu_init */
+ barrier();
+ /*
+ * Check TSC synchronization with the BSP:
+ */
+ check_tsc_sync_target();
+
+ /* Done in smp_callin(), move it here */
+ set_mtrr_aps_delayed_init();
+ smp_store_cpu_info(cpu);
+
+ /* This must be done before setting cpu_online_mask */
+ set_cpu_sibling_map(cpu);
+ wmb();
+
+ set_cpu_online(smp_processor_id(), true);
+ per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+
+ /* enable local interrupts */
+ local_irq_enable();
+
+ xen_setup_cpu_clockevents();
+
+ wmb();
+ cpu_idle();
+}
+
+static __cpuinit int
+hybrid_cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
+{
+ struct vcpu_guest_context *ctxt;
+ unsigned long start_ip;
+
+ if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
+ return 0;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (ctxt == NULL)
+ return -ENOMEM;
+
+ early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+ initial_code = (unsigned long)xen_hybrid_start_secondary;
+ stack_start.sp = (void *) idle->thread.sp;
+
+ /* start_ip had better be page-aligned! */
+ start_ip = setup_trampoline();
+
+ /* only start_ip is what we want */
+ ctxt->flags = VGCF_HVM_GUEST;
+ ctxt->user_regs.eip = start_ip;
+
+ printk(KERN_INFO "Booting processor %d ip 0x%lx\n", cpu, start_ip);
+
+ if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
+ BUG();
+
+ kfree(ctxt);
+ return 0;
+}
+
+static int __init xen_hybrid_cpu_up(unsigned int cpu)
+{
+ struct task_struct *idle = idle_task(cpu);
+ int rc;
+ unsigned long flags;
+
+ per_cpu(current_task, cpu) = idle;
+
+#ifdef CONFIG_X86_32
+ irq_ctx_init(cpu);
+#else
+ clear_tsk_thread_flag(idle, TIF_FORK);
+ initial_gs = per_cpu_offset(cpu);
+ per_cpu(kernel_stack, cpu) =
+ (unsigned long)task_stack_page(idle) -
+ KERNEL_STACK_OFFSET + THREAD_SIZE;
+#endif
+
+ xen_setup_timer(cpu);
+ xen_init_lock_cpu(cpu);
+
+ per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
+
+ rc = hybrid_cpu_initialize_context(cpu, idle);
+ if (rc)
+ return rc;
+
+ if (num_online_cpus() == 1)
+ alternatives_smp_switch(1);
+
+ rc = xen_smp_intr_init(cpu);
+ if (rc)
+ return rc;
+
+ rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
+ BUG_ON(rc);
+
+ /*
+ * Check TSC synchronization with the AP (keep irqs disabled
+ * while doing so):
+ */
+ local_irq_save(flags);
+ check_tsc_sync_source(cpu);
+ local_irq_restore(flags);
+
+ while (!cpu_online(cpu)) {
+ cpu_relax();
+ touch_nmi_watchdog();
+ }
+
+ return 0;
+}
+
+static void xen_hybrid_flush_tlb_others(const struct cpumask *cpumask,
+ struct mm_struct *mm, unsigned long va)
+{
+ /* TODO Make it more specific */
+ flush_tlb_all();
+}
+
+void __init xen_hybrid_smp_init(void)
+{
+ smp_ops = xen_smp_ops;
+ smp_ops.cpu_up = xen_hybrid_cpu_up;
+ pv_mmu_ops.flush_tlb_others = xen_hybrid_flush_tlb_others;
+}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 89e38ba..1eeb769 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -34,6 +34,7 @@ void xen_reserve_top(void);
char * __init xen_memory_setup(void);
void __init xen_arch_setup(void);
void __init xen_init_IRQ(void);
+void __init xen_hybrid_init_IRQ(void);
void xen_enable_sysenter(void);
void xen_enable_syscall(void);
void xen_vcpu_restore(void);
@@ -61,10 +62,12 @@ void xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP
void xen_smp_init(void);
+void xen_hybrid_smp_init(void);
extern cpumask_var_t xen_cpu_initialized_map;
#else
static inline void xen_smp_init(void) {}
+static inline void xen_hybrid_smp_init(void) {}
#endif
#ifdef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index ce602dd..3325f9e 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -37,9 +37,12 @@
#include <xen/xen-ops.h>
#include <xen/events.h>
+#include <xen/xen.h>
#include <xen/interface/xen.h>
#include <xen/interface/event_channel.h>
+#include <asm/desc.h>
+
/*
* This lock protects updates to the following mapping and reference-count
* arrays. The lock does not need to be acquired to read the mapping tables.
@@ -624,8 +627,13 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
unsigned count;
- exit_idle();
- irq_enter();
+ /*
+ * If in hybrid mode, smp_x86_platform_ipi() have already done these
+ */
+ if (!xen_hybrid_evtchn_enabled()) {
+ exit_idle();
+ irq_enter();
+ }
do {
unsigned long pending_words;
@@ -662,8 +670,10 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
} while(count != 1);
out:
- irq_exit();
- set_irq_regs(old_regs);
+ if (!xen_hybrid_evtchn_enabled()) {
+ irq_exit();
+ set_irq_regs(old_regs);
+ }
put_cpu();
}
@@ -944,3 +954,51 @@ void __init xen_init_IRQ(void)
irq_ctx_init(smp_processor_id());
}
+
+void __init xen_hybrid_init_IRQ(void)
+{
+ int i;
+
+ xen_init_IRQ();
+ for (i = 0; i < NR_IRQS_LEGACY; i++) {
+ struct evtchn_bind_virq bind_virq;
+ struct irq_desc *desc = irq_to_desc(i);
+ int virq, evtchn;
+
+ virq = i + VIRQ_EMUL_PIN_START;
+ bind_virq.virq = virq;
+ bind_virq.vcpu = 0;
+
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+ &bind_virq) != 0)
+ BUG();
+
+ evtchn = bind_virq.port;
+ evtchn_to_irq[evtchn] = i;
+ irq_info[i] = mk_virq_info(evtchn, virq);
+
+ desc->status = IRQ_DISABLED;
+ desc->action = NULL;
+ desc->depth = 1;
+
+ /*
+ * 16 old-style INTA-cycle interrupts:
+ */
+ set_irq_chip_and_handler_name(i, &xen_dynamic_chip,
+ handle_level_irq, "event");
+ }
+
+ /*
+ * Cover the whole vector space, no vector can escape
+ * us. (some of these will be overridden and become
+ * 'special' SMP interrupts)
+ */
+ for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
+ int vector = FIRST_EXTERNAL_VECTOR + i;
+ if (vector != IA32_SYSCALL_VECTOR)
+ set_intr_gate(vector, interrupt[i]);
+ }
+
+ /* generic IPI for platform specific use, now used for hybrid */
+ alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
+}
diff --git a/include/xen/events.h b/include/xen/events.h
index e68d59a..91755db 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -56,4 +56,5 @@ void xen_poll_irq(int irq);
/* Determine the IRQ which is bound to an event channel */
unsigned irq_from_evtchn(unsigned int evtchn);
+void xen_evtchn_do_upcall(struct pt_regs *regs);
#endif /* _XEN_EVENTS_H */
diff --git a/include/xen/hvm.h b/include/xen/hvm.h
index 4ea8887..c66d788 100644
--- a/include/xen/hvm.h
+++ b/include/xen/hvm.h
@@ -20,4 +20,9 @@ static inline unsigned long hvm_get_parameter(int idx)
return xhv.value;
}
+#define HVM_CALLBACK_VIA_TYPE_VECTOR 0x2
+#define HVM_CALLBACK_VIA_TYPE_SHIFT 56
+#define HVM_CALLBACK_VECTOR(x) (((uint64_t)HVM_CALLBACK_VIA_TYPE_VECTOR)<<\
+ HVM_CALLBACK_VIA_TYPE_SHIFT | (x))
+
#endif /* XEN_HVM_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 2befa3e..9282ff7 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -90,7 +90,11 @@
#define VIRQ_ARCH_6 22
#define VIRQ_ARCH_7 23
-#define NR_VIRQS 24
+#define VIRQ_EMUL_PIN_START 24
+#define VIRQ_EMUL_PIN_NUM 16
+
+#define NR_VIRQS 40
+
/*
* MMU-UPDATE REQUESTS
*
--
1.5.4.5
Oops... Miss the subject field when git-send-email...
--
regards
Yang, Sheng
On Tuesday 02 February 2010 16:19:01 Sheng Yang wrote:
> Hi, Jeremy & Keir
>
> Here is the second version of patchset to enable Xen Hybrid extension
> support in Linux kernel.
>
> The Hybrid Extension is started from real mode like HVM guest, but also
> with a a range of PV features(e.g. PV halt, PV timer, event channel, as
> well as PV drivers). So guest with Hybrid extension feature can takes the
> advantages of both H/W virtualization and Para-Virtualization.
>
> The first two of the patchset imported several header file from Jeremy's
> tree and Xen tree, respect to Jeremy and Keir's works.
>
> The whole patchset based on Linux upstream.
>
> Current the patchset support x86_64 only.
>
> The major change from v1:
> 1. SMP support.
> 2. Modify the entrance point to avoid most of genernic kernel modification.
> 3. Binding PV timer with event channel mechanism.
>
> You need a line like:
>
> cpuid = [ '0x40000002:edx=0x3' ]
>
> in HVM configuration file to expose hybrid feature to guest, and
>
> CONFIG_XEN
>
> in the guest kernel configuration file to enable the hybrid support.
>
> And the compiled image can be used as native/pv domU/hvm guest/hybrid
> kernel.
>
> Comments are welcome!
>
> BTW: For the MSI/MSI-X support, pv_ops dom0 can share the solution with
> hybrid. We would try to figure out a elegant way to deal with it later.
>
> --
> regards
> Yang, Sheng
>
> --
> arch/x86/include/asm/xen/cpuid.h | 73 +++++++++++++
> arch/x86/include/asm/xen/hypercall.h | 6 +
> arch/x86/kernel/setup.c | 8 ++
> arch/x86/xen/enlighten.c | 192
> ++++++++++++++++++++++++++++++++++ arch/x86/xen/irq.c |
> 54 ++++++++++
> arch/x86/xen/smp.c | 144 +++++++++++++++++++++++++-
> arch/x86/xen/xen-head.S | 6 +
> arch/x86/xen/xen-ops.h | 4 +
> drivers/block/xen-blkfront.c | 3 +
> drivers/input/xen-kbdfront.c | 4 +
> drivers/net/xen-netfront.c | 3 +
> drivers/video/xen-fbfront.c | 4 +
> drivers/xen/events.c | 66 +++++++++++-
> drivers/xen/grant-table.c | 67 ++++++++++++-
> drivers/xen/xenbus/xenbus_probe.c | 23 ++++-
> include/xen/events.h | 1 +
> include/xen/hvm.h | 28 +++++
> include/xen/interface/hvm/hvm_op.h | 79 ++++++++++++++
> include/xen/interface/hvm/params.h | 111 ++++++++++++++++++++
> include/xen/interface/xen.h | 6 +-
> include/xen/xen.h | 12 ++
> 21 files changed, 883 insertions(+), 11 deletions(-)
>
>
> _______________________________________________
> Xen-devel mailing list
> [email protected]
> http://lists.xensource.com/xen-devel
>
On Tue, 2010-02-02 at 08:19 +0000, Sheng Yang wrote:
> Notice one memory region(0xfbfe0000ul - 0xfc000000ul) would be reserved in the
> bios E820 table. This memory region would be used as grant table.
I queried this requirement in a reply to the hypervisor patch.
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 05a31e5..d7dfba9 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -1071,6 +1071,9 @@ static int __init xlblk_init(void)
> if (!xen_domain())
> return -ENODEV;
>
> + if (xen_hybrid_domain() && !xen_hybrid_evtchn_enabled())
> + return -ENODEV;
> +
This seems ugly, at the very least the check should be something like
xen_evtchn_enabled() but preferable would be to hook up evtchn's by
demuxing the PCI device IRQ (the exiting PVonHVM drivers mechanism) in
the case where hybrid evtchn's are not available and encapsulating the
differences inside the evtchn code, there should be no need to scatter
these sorts of checks throughout every driver.
If you don't want to demux the PCI device IRQ for the non-hybrid case
another option might be simply return failure from the evtchn operations
if hybrid evtchns are not available and to ensure that the drivers
handle that sort of error gracefully (which they should in any case). At
least the difference in mode would be encapsulated that way.
(same for netfront).
> if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
> printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
> XENVBD_MAJOR, DEV_NAME);
> diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c
> index c721c0a..74cbb25 100644
> --- a/drivers/input/xen-kbdfront.c
> +++ b/drivers/input/xen-kbdfront.c
> @@ -341,6 +341,10 @@ static int __init xenkbd_init(void)
> if (!xen_domain())
> return -ENODEV;
>
> + /* Xen Hybrid domain don't need vkbd */
> + if (xen_hybrid_domain())
> + return -ENODEV;
> +
Why disallow it if the platform has specified it (same for fbfront)?
> diff --git a/include/xen/xen.h b/include/xen/xen.h
> index aace9cc..632e76f 100644
> --- a/include/xen/xen.h
> +++ b/include/xen/xen.h
> @@ -5,6 +5,7 @@ enum xen_domain_type {
> XEN_NATIVE, /* running on bare hardware */
> XEN_PV_DOMAIN, /* running in a PV domain */
> XEN_HVM_DOMAIN, /* running in a Xen hvm domain */
> + XEN_HYBRID_DOMAIN, /* running in a Xen hybrid hvm domain */
> };
I don't think you should need to distinguish HYBRID from HVM mode...
Ian.
On Tuesday 02 February 2010 19:33:10 Ian Campbell wrote:
> On Tue, 2010-02-02 at 08:19 +0000, Sheng Yang wrote:
> > Notice one memory region(0xfbfe0000ul - 0xfc000000ul) would be reserved
> > in the bios E820 table. This memory region would be used as grant table.
>
> I queried this requirement in a reply to the hypervisor patch.
>
> > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> > index 05a31e5..d7dfba9 100644
> > --- a/drivers/block/xen-blkfront.c
> > +++ b/drivers/block/xen-blkfront.c
> > @@ -1071,6 +1071,9 @@ static int __init xlblk_init(void)
> > if (!xen_domain())
> > return -ENODEV;
> >
> > + if (xen_hybrid_domain() && !xen_hybrid_evtchn_enabled())
> > + return -ENODEV;
> > +
>
> This seems ugly, at the very least the check should be something like
> xen_evtchn_enabled()
Yeah, seems indeed ugly...
> but preferable would be to hook up evtchn's by
> demuxing the PCI device IRQ (the exiting PVonHVM drivers mechanism) in
> the case where hybrid evtchn's are not available and encapsulating the
> differences inside the evtchn code, there should be no need to scatter
> these sorts of checks throughout every driver.
>
> If you don't want to demux the PCI device IRQ for the non-hybrid case
> another option might be simply return failure from the evtchn operations
> if hybrid evtchns are not available and to ensure that the drivers
> handle that sort of error gracefully (which they should in any case). At
> least the difference in mode would be encapsulated that way.
>
> (same for netfront).
I am not sure if I understand you right, but I think the issue is, there is no
PVonHVM drivers in Linux upstream. The drivers are currently maintained by
OSVs, and the one in Xen upstream code only support 2.6.18. So I didn't take
them into consideration at the time.
I think the "xen_evtchn_enable()" looks much better. Would replace these ugly
lines in the next version.
>
> > if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
> > printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
> > XENVBD_MAJOR, DEV_NAME);
> > diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c
> > index c721c0a..74cbb25 100644
> > --- a/drivers/input/xen-kbdfront.c
> > +++ b/drivers/input/xen-kbdfront.c
> > @@ -341,6 +341,10 @@ static int __init xenkbd_init(void)
> > if (!xen_domain())
> > return -ENODEV;
> >
> > + /* Xen Hybrid domain don't need vkbd */
> > + if (xen_hybrid_domain())
> > + return -ENODEV;
> > +
>
> Why disallow it if the platform has specified it (same for fbfront)?
Well.. The direct reason is I didn't test them and don't know what would
happen... I would give it a try later.
>
> > diff --git a/include/xen/xen.h b/include/xen/xen.h
> > index aace9cc..632e76f 100644
> > --- a/include/xen/xen.h
> > +++ b/include/xen/xen.h
> > @@ -5,6 +5,7 @@ enum xen_domain_type {
> > XEN_NATIVE, /* running on bare hardware */
> > XEN_PV_DOMAIN, /* running in a PV domain */
> > XEN_HVM_DOMAIN, /* running in a Xen hvm domain */
> > + XEN_HYBRID_DOMAIN, /* running in a Xen hybrid hvm domain */
> > };
>
> I don't think you should need to distinguish HYBRID from HVM mode...
The only purpose is for event channel of hybrid... Yes, I think I can find
other way to indicate the availability of event channel. :)
--
regards
Yang, Sheng
On Tue, 2010-02-02 at 13:24 +0000, Sheng Yang wrote:
>
> I am not sure if I understand you right, but I think the issue is,
> there is no PVonHVM drivers in Linux upstream. The drivers are
> currently maintained by OSVs, and the one in Xen upstream code only
> support 2.6.18. So I didn't take them into consideration at the time.
True, but this is something which should be taken care of by the core
Xen-aware code not something which should be pushed down into each
driver.
Someone who wants to add PVonHVM functionality shouldn't have to go and
remove a bunch of conditionals from each driver (or worse add
alternative clauses to each check!).
> I think the "xen_evtchn_enable()" looks much better. Would replace
> these ugly lines in the next version.
I think it would be cleaner to encapsulate this in the evtchn code
rather than leaking platform knowledge into each driver. IOW the evtchn
functions should return failure if event channels are not enabled and
the driver should cope with this gracefully.
Or perhaps at the xenbus driver level we should be deciding whether or
not we have enough paravirtualisation to be worth probing the drivers at
all?
Ian.
On Wednesday 03 February 2010 00:24:04 Ian Campbell wrote:
> On Tue, 2010-02-02 at 13:24 +0000, Sheng Yang wrote:
> > I am not sure if I understand you right, but I think the issue is,
> > there is no PVonHVM drivers in Linux upstream. The drivers are
> > currently maintained by OSVs, and the one in Xen upstream code only
> > support 2.6.18. So I didn't take them into consideration at the time.
>
> True, but this is something which should be taken care of by the core
> Xen-aware code not something which should be pushed down into each
> driver.
>
> Someone who wants to add PVonHVM functionality shouldn't have to go and
> remove a bunch of conditionals from each driver (or worse add
> alternative clauses to each check!).
>
> > I think the "xen_evtchn_enable()" looks much better. Would replace
> > these ugly lines in the next version.
>
> I think it would be cleaner to encapsulate this in the evtchn code
> rather than leaking platform knowledge into each driver. IOW the evtchn
> functions should return failure if event channels are not enabled and
> the driver should cope with this gracefully.
Agree. That what I suppose to do. What the drivers should only know is, if
event channel is enabled.
> Or perhaps at the xenbus driver level we should be deciding whether or
> not we have enough paravirtualisation to be worth probing the drivers at
> all?
I think current scheme is direct enough for now. We can improve it later.
--
regards
Yang, Sheng
> +#define GNTTAB_START 0xfbfe0000ul
> +#define GNTTAB_SIZE 0x20000ul
Is it possible that there would be a PCI device that would be
passed in the guest that would conflict with the above mentioned
E820 region?
On Wednesday 03 February 2010 01:03:06 Konrad Rzeszutek Wilk wrote:
> > +#define GNTTAB_START 0xfbfe0000ul
> > +#define GNTTAB_SIZE 0x20000ul
>
> Is it possible that there would be a PCI device that would be
> passed in the guest that would conflict with the above mentioned
> E820 region?
>
I would change them to a dedicated PCI MMIO address in the next version.
Thanks.
--
regards
Yang, Sheng
On Tue, 2010-02-02 at 16:46 +0000, Sheng Yang wrote:
> On Wednesday 03 February 2010 00:24:04 Ian Campbell wrote:
> > On Tue, 2010-02-02 at 13:24 +0000, Sheng Yang wrote:
> > > I am not sure if I understand you right, but I think the issue is,
> > > there is no PVonHVM drivers in Linux upstream. The drivers are
> > > currently maintained by OSVs, and the one in Xen upstream code only
> > > support 2.6.18. So I didn't take them into consideration at the time.
> >
> > True, but this is something which should be taken care of by the core
> > Xen-aware code not something which should be pushed down into each
> > driver.
> >
> > Someone who wants to add PVonHVM functionality shouldn't have to go and
> > remove a bunch of conditionals from each driver (or worse add
> > alternative clauses to each check!).
> >
> > > I think the "xen_evtchn_enable()" looks much better. Would replace
> > > these ugly lines in the next version.
> >
> > I think it would be cleaner to encapsulate this in the evtchn code
> > rather than leaking platform knowledge into each driver. IOW the evtchn
> > functions should return failure if event channels are not enabled and
> > the driver should cope with this gracefully.
>
> Agree. That what I suppose to do. What the drivers should only know is, if
> event channel is enabled.
>
> > Or perhaps at the xenbus driver level we should be deciding whether or
> > not we have enough paravirtualisation to be worth probing the drivers at
> > all?
>
> I think current scheme is direct enough for now. We can improve it later.
Taking a step back I don't think any of these checks are necessary at
all -- in order to get as far as actually probing the devices xenbus
needs to be up and running, which implies event channels, as well as
everything else required for PV drivers to function, are working.
Drivers for PCI devices don't all start with "if (pci_bus_available())",
they just register the driver and let the kernel's driver core take care
of things. If someone is worried about the overhead of an extra driver
being registered, well that is what modular kernels are for.
I think that even the existing "if (xen_domain())" check is unnecessary,
at least in the frontend drivers.
Ian.
On Wednesday 03 February 2010 02:09:03 Ian Campbell wrote:
> On Tue, 2010-02-02 at 16:46 +0000, Sheng Yang wrote:
> > On Wednesday 03 February 2010 00:24:04 Ian Campbell wrote:
> > > On Tue, 2010-02-02 at 13:24 +0000, Sheng Yang wrote:
> > > > I am not sure if I understand you right, but I think the issue is,
> > > > there is no PVonHVM drivers in Linux upstream. The drivers are
> > > > currently maintained by OSVs, and the one in Xen upstream code only
> > > > support 2.6.18. So I didn't take them into consideration at the time.
> > >
> > > True, but this is something which should be taken care of by the core
> > > Xen-aware code not something which should be pushed down into each
> > > driver.
> > >
> > > Someone who wants to add PVonHVM functionality shouldn't have to go and
> > > remove a bunch of conditionals from each driver (or worse add
> > > alternative clauses to each check!).
> > >
> > > > I think the "xen_evtchn_enable()" looks much better. Would replace
> > > > these ugly lines in the next version.
> > >
> > > I think it would be cleaner to encapsulate this in the evtchn code
> > > rather than leaking platform knowledge into each driver. IOW the evtchn
> > > functions should return failure if event channels are not enabled and
> > > the driver should cope with this gracefully.
> >
> > Agree. That what I suppose to do. What the drivers should only know is,
> > if event channel is enabled.
> >
> > > Or perhaps at the xenbus driver level we should be deciding whether or
> > > not we have enough paravirtualisation to be worth probing the drivers
> > > at all?
> >
> > I think current scheme is direct enough for now. We can improve it later.
>
> Taking a step back I don't think any of these checks are necessary at
> all -- in order to get as far as actually probing the devices xenbus
> needs to be up and running, which implies event channels, as well as
> everything else required for PV drivers to function, are working.
>
> Drivers for PCI devices don't all start with "if (pci_bus_available())",
> they just register the driver and let the kernel's driver core take care
> of things. If someone is worried about the overhead of an extra driver
> being registered, well that is what modular kernels are for.
>
> I think that even the existing "if (xen_domain())" check is unnecessary,
> at least in the frontend drivers.
Um, very reasonable. I would provide another patch for this.
--
regards
Yang, Sheng