From: Jinank Jain <[email protected]>
This patch series plans to add support for running nested Microsoft
Hypervisor. In case of nested Microsoft Hypervisor there are few
privileged hypercalls which need to go L0 Hypervisor instead of L1
Hypervisor. This patches series basically identifies such hypercalls and
replace them with nested hypercalls.
Jinank Jain (6):
mshv: Add support for detecting nested hypervisor
hv: Setup synic registers in case of nested root partition
hv: Set the correct EOM register in case of nested hypervisor
hv: Add an interface to do nested hypercalls
hv: Enable vmbus driver for nested root partition
hv, mshv : Change interrupt vector for nested root partition
arch/x86/include/asm/hyperv-tlfs.h | 17 ++++++++-
arch/x86/include/asm/idtentry.h | 2 ++
arch/x86/include/asm/irq_vectors.h | 6 ++++
arch/x86/include/asm/mshyperv.h | 42 ++++++++++++++++++++---
arch/x86/kernel/cpu/mshyperv.c | 22 ++++++++++++
arch/x86/kernel/idt.c | 9 +++++
drivers/hv/hv.c | 55 ++++++++++++++++++------------
drivers/hv/vmbus_drv.c | 5 +--
include/asm-generic/hyperv-tlfs.h | 1 +
include/asm-generic/mshyperv.h | 7 +++-
10 files changed, 137 insertions(+), 29 deletions(-)
--
2.25.1
Traditionally we have been using the HYPERVISOR_CALLBACK_VECTOR to relay
the VMBus interrupt. But this does not work in case of nested
hypervisor. Microsoft Hypervisor reserves 0x31 to 0x34 as the interrupt
vector range for VMBus and thus we have to use one of the vectors from
that range and setup the IDT accordingly.
Signed-off-by: Jinank Jain <[email protected]>
---
arch/x86/include/asm/idtentry.h | 2 ++
arch/x86/include/asm/irq_vectors.h | 6 ++++++
arch/x86/kernel/cpu/mshyperv.c | 15 +++++++++++++++
arch/x86/kernel/idt.c | 9 +++++++++
drivers/hv/vmbus_drv.c | 3 ++-
5 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 72184b0b2219..c0648e3e4d4a 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -686,6 +686,8 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback);
DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment);
DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0);
+DECLARE_IDTENTRY_SYSVEC(HYPERV_INTR_NESTED_VMBUS_VECTOR,
+ sysvec_hyperv_nested_vmbus_intr);
#endif
#if IS_ENABLED(CONFIG_ACRN_GUEST)
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 43dcb9284208..729d19eab7f5 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -102,6 +102,12 @@
#if IS_ENABLED(CONFIG_HYPERV)
#define HYPERV_REENLIGHTENMENT_VECTOR 0xee
#define HYPERV_STIMER0_VECTOR 0xed
+/*
+ * FIXME: Change this, once Microsoft Hypervisor changes its assumption
+ * around VMBus interrupt vector allocation for nested root partition.
+ * Or provides a better interface to detect this instead of hardcoding.
+ */
+#define HYPERV_INTR_NESTED_VMBUS_VECTOR 0x31
#endif
#define LOCAL_TIMER_VECTOR 0xec
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 2555535f5237..83aab88bf298 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -61,6 +61,21 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
set_irq_regs(old_regs);
}
+DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_nested_vmbus_intr)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ inc_irq_stat(irq_hv_callback_count);
+
+ if (vmbus_handler)
+ vmbus_handler();
+
+ if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
+ ack_APIC_irq();
+
+ set_irq_regs(old_regs);
+}
+
void hv_setup_vmbus_handler(void (*handler)(void))
{
vmbus_handler = handler;
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index a58c6bc1cd68..ace648856a0b 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -160,6 +160,15 @@ static const __initconst struct idt_data apic_idts[] = {
# endif
INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt),
INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt),
+#ifdef CONFIG_HYPERV
+ /*
+ * This is a hack because we cannot install this interrupt handler via alloc_intr_gate
+ * as it does not allow interrupt vector less than FIRST_SYSTEM_VECTORS. And hyperv
+ * does not want anything other than 0x31-0x34 as the interrupt vector for vmbus
+ * interrupt in case of nested setup.
+ */
+ INTG(HYPERV_INTR_NESTED_VMBUS_VECTOR, asm_sysvec_hyperv_nested_vmbus_intr),
+#endif
#endif
};
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 2f0cf75e811b..e6fb77fb44b9 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2746,7 +2746,8 @@ static int __init hv_acpi_init(void)
* normal Linux IRQ mechanism is not used in this case.
*/
#ifdef HYPERVISOR_CALLBACK_VECTOR
- vmbus_interrupt = HYPERVISOR_CALLBACK_VECTOR;
+ vmbus_interrupt = hv_nested ? HYPERV_INTR_NESTED_VMBUS_VECTOR :
+ HYPERVISOR_CALLBACK_VECTOR;
vmbus_irq = -1;
#endif
--
2.25.1
Currently we are using the default EOM register value. But this needs to
changes when running under nested MSHV setup.
Signed-off-by: Jinank Jain <[email protected]>
---
include/asm-generic/mshyperv.h | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 49d2e9274379..7256e2cb7b67 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -117,6 +117,8 @@ static inline u64 hv_generate_guest_id(u64 kernel_version)
extern bool hv_nested;
+#define REG_EOM (hv_nested ? HV_REGISTER_NESTED_EOM : HV_REGISTER_EOM)
+
/* Free the message slot and signal end-of-message if required */
static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
{
@@ -148,7 +150,7 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
* possibly deliver another msg from the
* hypervisor
*/
- hv_set_register(HV_REGISTER_EOM, 0);
+ hv_set_register(REG_EOM, 0);
}
}
--
2.25.1
When Linux runs as a root partition for Microsoft Hypervisor. It is
possible to detect if it is running as nested hypervisor using
hints exposed by mshv. While at it expose a new variable called
hv_nested which can be used later for making decisions specific to
nested use case.
Signed-off-by: Jinank Jain <[email protected]>
---
arch/x86/include/asm/hyperv-tlfs.h | 3 +++
arch/x86/kernel/cpu/mshyperv.c | 7 +++++++
include/asm-generic/mshyperv.h | 2 ++
3 files changed, 12 insertions(+)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 3089ec352743..d9a611565859 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -114,6 +114,9 @@
/* Recommend using the newer ExProcessorMasks interface */
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
+/* Indicates that the hypervisor is nested within a Hyper-V partition. */
+#define HV_X64_HYPERV_NESTED BIT(12)
+
/* Recommend using enlightened VMCS */
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 831613959a92..2555535f5237 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -37,6 +37,8 @@
/* Is Linux running as the root partition? */
bool hv_root_partition;
+/* Is Linux running on nested Microsoft Hypervisor */
+bool hv_nested;
struct ms_hyperv_info ms_hyperv;
#if IS_ENABLED(CONFIG_HYPERV)
@@ -301,6 +303,11 @@ static void __init ms_hyperv_init_platform(void)
pr_info("Hyper-V: running as root partition\n");
}
+ if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) {
+ hv_nested = true;
+ pr_info("Hyper-V: Linux running on a nested hypervisor\n");
+ }
+
/*
* Extract host information.
*/
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index bfb9eb9d7215..49d2e9274379 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -115,6 +115,8 @@ static inline u64 hv_generate_guest_id(u64 kernel_version)
return guest_id;
}
+extern bool hv_nested;
+
/* Free the message slot and signal end-of-message if required */
static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
{
--
2.25.1
Currently VMBus driver is not initialized for root partition but we need
to enable the VMBus driver for nested root partition. This is required
to expose VMBus devices to the L2 guest in the nested setup.
Signed-off-by: Jinank Jain <[email protected]>
---
drivers/hv/vmbus_drv.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 8b2e413bf19c..2f0cf75e811b 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2723,7 +2723,7 @@ static int __init hv_acpi_init(void)
if (!hv_is_hyperv_initialized())
return -ENODEV;
- if (hv_root_partition)
+ if (hv_root_partition && !hv_nested)
return 0;
/*
--
2.25.1
Child partitions are free to allocate SynIC message and event page but in
case of root partition it must use the pages allocated by Microsoft
Hypervisor (MSHV). Base address for these pages can be found using
synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
for nested vs non-nested root partition.
Signed-off-by: Jinank Jain <[email protected]>
---
arch/x86/include/asm/hyperv-tlfs.h | 11 ++++++
drivers/hv/hv.c | 55 ++++++++++++++++++------------
2 files changed, 45 insertions(+), 21 deletions(-)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index d9a611565859..0319091e2019 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -225,6 +225,17 @@ enum hv_isolation_type {
#define HV_REGISTER_SINT14 0x4000009E
#define HV_REGISTER_SINT15 0x4000009F
+/*
+ * Define synthetic interrupt controller model specific registers for
+ * nested hypervisor.
+ */
+#define HV_REGISTER_NESTED_SCONTROL 0x40001080
+#define HV_REGISTER_NESTED_SVERSION 0x40001081
+#define HV_REGISTER_NESTED_SIEFP 0x40001082
+#define HV_REGISTER_NESTED_SIMP 0x40001083
+#define HV_REGISTER_NESTED_EOM 0x40001084
+#define HV_REGISTER_NESTED_SINT0 0x40001090
+
/*
* Synthetic Timer MSRs. Four timers per vcpu.
*/
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 4d6480d57546..92ee910561c4 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -25,6 +25,11 @@
/* The one and only */
struct hv_context hv_context;
+#define REG_SIMP (hv_nested ? HV_REGISTER_NESTED_SIMP : HV_REGISTER_SIMP)
+#define REG_SIEFP (hv_nested ? HV_REGISTER_NESTED_SIEFP : HV_REGISTER_SIEFP)
+#define REG_SCTRL (hv_nested ? HV_REGISTER_NESTED_SCONTROL : HV_REGISTER_SCONTROL)
+#define REG_SINT0 (hv_nested ? HV_REGISTER_NESTED_SINT0 : HV_REGISTER_SINT0)
+
/*
* hv_init - Main initialization routine.
*
@@ -147,7 +152,7 @@ int hv_synic_alloc(void)
* Synic message and event pages are allocated by paravisor.
* Skip these pages allocation here.
*/
- if (!hv_isolation_type_snp()) {
+ if (!hv_isolation_type_snp() && !hv_root_partition) {
hv_cpu->synic_message_page =
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_message_page == NULL) {
@@ -188,8 +193,16 @@ void hv_synic_free(void)
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
- free_page((unsigned long)hv_cpu->synic_event_page);
- free_page((unsigned long)hv_cpu->synic_message_page);
+ if (hv_root_partition) {
+ if (hv_cpu->synic_event_page != NULL)
+ memunmap(hv_cpu->synic_event_page);
+
+ if (hv_cpu->synic_message_page != NULL)
+ memunmap(hv_cpu->synic_message_page);
+ } else {
+ free_page((unsigned long)hv_cpu->synic_event_page);
+ free_page((unsigned long)hv_cpu->synic_message_page);
+ }
free_page((unsigned long)hv_cpu->post_msg_page);
}
@@ -213,10 +226,10 @@ void hv_synic_enable_regs(unsigned int cpu)
union hv_synic_scontrol sctrl;
/* Setup the Synic's message page */
- simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
+ simp.as_uint64 = hv_get_register(REG_SIMP);
simp.simp_enabled = 1;
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_message_page
= memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
@@ -227,13 +240,13 @@ void hv_synic_enable_regs(unsigned int cpu)
>> HV_HYP_PAGE_SHIFT;
}
- hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
+ hv_set_register(REG_SIMP, simp.as_uint64);
/* Setup the Synic's event page */
- siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
+ siefp.as_uint64 = hv_get_register(REG_SIEFP);
siefp.siefp_enabled = 1;
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_event_page =
memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
@@ -245,12 +258,12 @@ void hv_synic_enable_regs(unsigned int cpu)
>> HV_HYP_PAGE_SHIFT;
}
- hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
+ hv_set_register(REG_SIEFP, siefp.as_uint64);
/* Setup the shared SINT. */
if (vmbus_irq != -1)
enable_percpu_irq(vmbus_irq, 0);
- shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
+ shared_sint.as_uint64 = hv_get_register(REG_SINT0 +
VMBUS_MESSAGE_SINT);
shared_sint.vector = vmbus_interrupt;
@@ -266,14 +279,14 @@ void hv_synic_enable_regs(unsigned int cpu)
#else
shared_sint.auto_eoi = 0;
#endif
- hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
+ hv_set_register(REG_SINT0 + VMBUS_MESSAGE_SINT,
shared_sint.as_uint64);
/* Enable the global synic bit */
- sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
+ sctrl.as_uint64 = hv_get_register(REG_SCTRL);
sctrl.enable = 1;
- hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
+ hv_set_register(REG_SCTRL, sctrl.as_uint64);
}
int hv_synic_init(unsigned int cpu)
@@ -297,17 +310,17 @@ void hv_synic_disable_regs(unsigned int cpu)
union hv_synic_siefp siefp;
union hv_synic_scontrol sctrl;
- shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
+ shared_sint.as_uint64 = hv_get_register(REG_SINT0 +
VMBUS_MESSAGE_SINT);
shared_sint.masked = 1;
/* Need to correctly cleanup in the case of SMP!!! */
/* Disable the interrupt */
- hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
+ hv_set_register(REG_SINT0 + VMBUS_MESSAGE_SINT,
shared_sint.as_uint64);
- simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
+ simp.as_uint64 = hv_get_register(REG_SIMP);
/*
* In Isolation VM, sim and sief pages are allocated by
* paravisor. These pages also will be used by kdump
@@ -320,9 +333,9 @@ void hv_synic_disable_regs(unsigned int cpu)
else
simp.base_simp_gpa = 0;
- hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
+ hv_set_register(REG_SIMP, simp.as_uint64);
- siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
+ siefp.as_uint64 = hv_get_register(REG_SIEFP);
siefp.siefp_enabled = 0;
if (hv_isolation_type_snp())
@@ -330,12 +343,12 @@ void hv_synic_disable_regs(unsigned int cpu)
else
siefp.base_siefp_gpa = 0;
- hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
+ hv_set_register(REG_SIEFP, siefp.as_uint64);
/* Disable the global synic bit */
- sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
+ sctrl.as_uint64 = hv_get_register(REG_SCTRL);
sctrl.enable = 0;
- hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
+ hv_set_register(REG_SCTRL, sctrl.as_uint64);
if (vmbus_irq != -1)
disable_percpu_irq(vmbus_irq);
--
2.25.1
Jinank Jain <[email protected]> writes:
> Child partitions are free to allocate SynIC message and event page but in
> case of root partition it must use the pages allocated by Microsoft
> Hypervisor (MSHV). Base address for these pages can be found using
> synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
> for nested vs non-nested root partition.
>
> Signed-off-by: Jinank Jain <[email protected]>
> ---
> arch/x86/include/asm/hyperv-tlfs.h | 11 ++++++
> drivers/hv/hv.c | 55 ++++++++++++++++++------------
> 2 files changed, 45 insertions(+), 21 deletions(-)
>
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index d9a611565859..0319091e2019 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -225,6 +225,17 @@ enum hv_isolation_type {
> #define HV_REGISTER_SINT14 0x4000009E
> #define HV_REGISTER_SINT15 0x4000009F
>
> +/*
> + * Define synthetic interrupt controller model specific registers for
> + * nested hypervisor.
> + */
> +#define HV_REGISTER_NESTED_SCONTROL 0x40001080
> +#define HV_REGISTER_NESTED_SVERSION 0x40001081
> +#define HV_REGISTER_NESTED_SIEFP 0x40001082
> +#define HV_REGISTER_NESTED_SIMP 0x40001083
> +#define HV_REGISTER_NESTED_EOM 0x40001084
> +#define HV_REGISTER_NESTED_SINT0 0x40001090
> +
> /*
> * Synthetic Timer MSRs. Four timers per vcpu.
> */
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 4d6480d57546..92ee910561c4 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -25,6 +25,11 @@
> /* The one and only */
> struct hv_context hv_context;
>
> +#define REG_SIMP (hv_nested ? HV_REGISTER_NESTED_SIMP : HV_REGISTER_SIMP)
> +#define REG_SIEFP (hv_nested ? HV_REGISTER_NESTED_SIEFP : HV_REGISTER_SIEFP)
> +#define REG_SCTRL (hv_nested ? HV_REGISTER_NESTED_SCONTROL : HV_REGISTER_SCONTROL)
> +#define REG_SINT0 (hv_nested ? HV_REGISTER_NESTED_SINT0 : HV_REGISTER_SINT0)
> +
> /*
> * hv_init - Main initialization routine.
> *
> @@ -147,7 +152,7 @@ int hv_synic_alloc(void)
> * Synic message and event pages are allocated by paravisor.
> * Skip these pages allocation here.
> */
> - if (!hv_isolation_type_snp()) {
> + if (!hv_isolation_type_snp() && !hv_root_partition) {
> hv_cpu->synic_message_page =
> (void *)get_zeroed_page(GFP_ATOMIC);
> if (hv_cpu->synic_message_page == NULL) {
> @@ -188,8 +193,16 @@ void hv_synic_free(void)
> struct hv_per_cpu_context *hv_cpu
> = per_cpu_ptr(hv_context.cpu_context, cpu);
>
> - free_page((unsigned long)hv_cpu->synic_event_page);
> - free_page((unsigned long)hv_cpu->synic_message_page);
> + if (hv_root_partition) {
> + if (hv_cpu->synic_event_page != NULL)
> + memunmap(hv_cpu->synic_event_page);
> +
> + if (hv_cpu->synic_message_page != NULL)
> + memunmap(hv_cpu->synic_message_page);
> + } else {
> + free_page((unsigned long)hv_cpu->synic_event_page);
> + free_page((unsigned long)hv_cpu->synic_message_page);
> + }
> free_page((unsigned long)hv_cpu->post_msg_page);
> }
>
> @@ -213,10 +226,10 @@ void hv_synic_enable_regs(unsigned int cpu)
> union hv_synic_scontrol sctrl;
>
> /* Setup the Synic's message page */
> - simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> + simp.as_uint64 = hv_get_register(REG_SIMP);
To avoid all this code churn (here and in the next patch dealing with
EOM), would it make sense to move the logic picking nested/non-nested
register into hv_{get,set}_register() instead?
E.g. something like (untested, incomplete):
static inline u32 hv_get_nested_reg(u32 reg) {
switch (reg) {
HV_REGISTER_SIMP: return HV_REGISTER_NESTED_SIMP;
HV_REGISTER_NESTED_SVERSION: return HV_REGISTER_NESTED_SVERSION;
...
default: return reg;
}
}
static inline u64 hv_get_register(unsigned int reg)
{
u64 value;
if (hv_nested)
reg = hv_get_nested_reg(reg);
if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
hv_ghcb_msr_read(reg, &value);
else
rdmsrl(reg, value);
return value;
}
static inline void hv_set_register(unsigned int reg, u64 value)
{
if (hv_nested)
reg = hv_get_nested_reg(reg);
if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
hv_ghcb_msr_write(reg, value);
/* Write proxy bit via wrmsl instruction */
if (reg >= HV_REGISTER_SINT0 &&
reg <= HV_REGISTER_SINT15)
wrmsrl(reg, value | 1 << 20);
} else {
wrmsrl(reg, value);
}
}
> simp.simp_enabled = 1;
>
> - if (hv_isolation_type_snp()) {
> + if (hv_isolation_type_snp() || hv_root_partition) {
> hv_cpu->synic_message_page
> = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
> HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> @@ -227,13 +240,13 @@ void hv_synic_enable_regs(unsigned int cpu)
> >> HV_HYP_PAGE_SHIFT;
> }
>
> - hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
> + hv_set_register(REG_SIMP, simp.as_uint64);
>
> /* Setup the Synic's event page */
> - siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
> + siefp.as_uint64 = hv_get_register(REG_SIEFP);
> siefp.siefp_enabled = 1;
>
> - if (hv_isolation_type_snp()) {
> + if (hv_isolation_type_snp() || hv_root_partition) {
> hv_cpu->synic_event_page =
> memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
> HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> @@ -245,12 +258,12 @@ void hv_synic_enable_regs(unsigned int cpu)
> >> HV_HYP_PAGE_SHIFT;
> }
>
> - hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
> + hv_set_register(REG_SIEFP, siefp.as_uint64);
>
> /* Setup the shared SINT. */
> if (vmbus_irq != -1)
> enable_percpu_irq(vmbus_irq, 0);
> - shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
> + shared_sint.as_uint64 = hv_get_register(REG_SINT0 +
> VMBUS_MESSAGE_SINT);
>
> shared_sint.vector = vmbus_interrupt;
> @@ -266,14 +279,14 @@ void hv_synic_enable_regs(unsigned int cpu)
> #else
> shared_sint.auto_eoi = 0;
> #endif
> - hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
> + hv_set_register(REG_SINT0 + VMBUS_MESSAGE_SINT,
> shared_sint.as_uint64);
>
> /* Enable the global synic bit */
> - sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
> + sctrl.as_uint64 = hv_get_register(REG_SCTRL);
> sctrl.enable = 1;
>
> - hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
> + hv_set_register(REG_SCTRL, sctrl.as_uint64);
> }
>
> int hv_synic_init(unsigned int cpu)
> @@ -297,17 +310,17 @@ void hv_synic_disable_regs(unsigned int cpu)
> union hv_synic_siefp siefp;
> union hv_synic_scontrol sctrl;
>
> - shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
> + shared_sint.as_uint64 = hv_get_register(REG_SINT0 +
> VMBUS_MESSAGE_SINT);
>
> shared_sint.masked = 1;
>
> /* Need to correctly cleanup in the case of SMP!!! */
> /* Disable the interrupt */
> - hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
> + hv_set_register(REG_SINT0 + VMBUS_MESSAGE_SINT,
> shared_sint.as_uint64);
>
> - simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> + simp.as_uint64 = hv_get_register(REG_SIMP);
> /*
> * In Isolation VM, sim and sief pages are allocated by
> * paravisor. These pages also will be used by kdump
> @@ -320,9 +333,9 @@ void hv_synic_disable_regs(unsigned int cpu)
> else
> simp.base_simp_gpa = 0;
>
> - hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
> + hv_set_register(REG_SIMP, simp.as_uint64);
>
> - siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
> + siefp.as_uint64 = hv_get_register(REG_SIEFP);
> siefp.siefp_enabled = 0;
>
> if (hv_isolation_type_snp())
> @@ -330,12 +343,12 @@ void hv_synic_disable_regs(unsigned int cpu)
> else
> siefp.base_siefp_gpa = 0;
>
> - hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
> + hv_set_register(REG_SIEFP, siefp.as_uint64);
>
> /* Disable the global synic bit */
> - sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
> + sctrl.as_uint64 = hv_get_register(REG_SCTRL);
> sctrl.enable = 0;
> - hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
> + hv_set_register(REG_SCTRL, sctrl.as_uint64);
>
> if (vmbus_irq != -1)
> disable_percpu_irq(vmbus_irq);
--
Vitaly
On Wed, Nov 02, 2022 at 02:00:17PM +0000, Jinank Jain wrote:
> Traditionally we have been using the HYPERVISOR_CALLBACK_VECTOR to relay
> the VMBus interrupt. But this does not work in case of nested
> hypervisor. Microsoft Hypervisor reserves 0x31 to 0x34 as the interrupt
> vector range for VMBus and thus we have to use one of the vectors from
> that range and setup the IDT accordingly.
>
> Signed-off-by: Jinank Jain <[email protected]>
> ---
> arch/x86/include/asm/idtentry.h | 2 ++
> arch/x86/include/asm/irq_vectors.h | 6 ++++++
[...]
> #if IS_ENABLED(CONFIG_ACRN_GUEST)
> diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
> index 43dcb9284208..729d19eab7f5 100644
> --- a/arch/x86/include/asm/irq_vectors.h
> +++ b/arch/x86/include/asm/irq_vectors.h
> @@ -102,6 +102,12 @@
> #if IS_ENABLED(CONFIG_HYPERV)
> #define HYPERV_REENLIGHTENMENT_VECTOR 0xee
> #define HYPERV_STIMER0_VECTOR 0xed
> +/*
> + * FIXME: Change this, once Microsoft Hypervisor changes its assumption
> + * around VMBus interrupt vector allocation for nested root partition.
> + * Or provides a better interface to detect this instead of hardcoding.
> + */
> +#define HYPERV_INTR_NESTED_VMBUS_VECTOR 0x31
I would like to hear x86 maintainers opinion on this.
Thanks,
Wei.
This patch series plans to add support for running nested Microsoft
Hypervisor. In case of nested Microsoft Hypervisor there are few
privileged hypercalls which need to go L0 Hypervisor instead of L1
Hypervisor. This patches series basically identifies such hypercalls and
replace them with nested hypercalls.
Jinank Jain (5):
x86/hyperv: Add support for detecting nested hypervisor
Drivers: hv: Setup synic registers in case of nested root partition
x86/hyperv: Add an interface to do nested hypercalls
Drivers: hv: Enable vmbus driver for nested root partition
x86/hyperv: Change interrupt vector for nested root partition
[v4]
- Fix ARM64 compilation
[v5]
- Fix comments from Michael Kelly
arch/arm64/hyperv/mshyperv.c | 6 +++
arch/x86/include/asm/hyperv-tlfs.h | 17 ++++++-
arch/x86/include/asm/idtentry.h | 2 +
arch/x86/include/asm/irq_vectors.h | 6 +++
arch/x86/include/asm/mshyperv.h | 68 ++++++++++++++++------------
arch/x86/kernel/cpu/mshyperv.c | 71 ++++++++++++++++++++++++++++++
arch/x86/kernel/idt.c | 9 ++++
drivers/hv/hv.c | 18 +++++---
drivers/hv/hv_common.c | 7 ++-
drivers/hv/vmbus_drv.c | 5 ++-
include/asm-generic/hyperv-tlfs.h | 1 +
include/asm-generic/mshyperv.h | 1 +
12 files changed, 173 insertions(+), 38 deletions(-)
--
2.25.1
Please ignore the v5 patch series I posted the wrong set of patches.
Regards,
Jinank
On 11/24/2022 11:23 AM, Jinank Jain wrote:
> This patch series plans to add support for running nested Microsoft
> Hypervisor. In case of nested Microsoft Hypervisor there are few
> privileged hypercalls which need to go L0 Hypervisor instead of L1
> Hypervisor. This patches series basically identifies such hypercalls and
> replace them with nested hypercalls.
>
> Jinank Jain (5):
> x86/hyperv: Add support for detecting nested hypervisor
> Drivers: hv: Setup synic registers in case of nested root partition
> x86/hyperv: Add an interface to do nested hypercalls
> Drivers: hv: Enable vmbus driver for nested root partition
> x86/hyperv: Change interrupt vector for nested root partition
>
> [v4]
> - Fix ARM64 compilation
>
> [v5]
> - Fix comments from Michael Kelly
>
> arch/arm64/hyperv/mshyperv.c | 6 +++
> arch/x86/include/asm/hyperv-tlfs.h | 17 ++++++-
> arch/x86/include/asm/idtentry.h | 2 +
> arch/x86/include/asm/irq_vectors.h | 6 +++
> arch/x86/include/asm/mshyperv.h | 68 ++++++++++++++++------------
> arch/x86/kernel/cpu/mshyperv.c | 71 ++++++++++++++++++++++++++++++
> arch/x86/kernel/idt.c | 9 ++++
> drivers/hv/hv.c | 18 +++++---
> drivers/hv/hv_common.c | 7 ++-
> drivers/hv/vmbus_drv.c | 5 ++-
> include/asm-generic/hyperv-tlfs.h | 1 +
> include/asm-generic/mshyperv.h | 1 +
> 12 files changed, 173 insertions(+), 38 deletions(-)
>
Currently VMBus driver is not initialized for root partition but we need
to enable the VMBus driver for nested root partition. This is required,
so that L2 root can use the VMBus devices.
Signed-off-by: Jinank Jain <[email protected]>
---
drivers/hv/vmbus_drv.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index db00d20c726d..0937877eade9 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2744,7 +2744,7 @@ static int __init hv_acpi_init(void)
if (!hv_is_hyperv_initialized())
return -ENODEV;
- if (hv_root_partition)
+ if (hv_root_partition && !hv_nested)
return 0;
/*
--
2.25.1