Child partitions are free to allocate SynIC message and event page but in
case of root partition it must use the pages allocated by Microsoft
Hypervisor (MSHV). Base address for these pages can be found using
synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
for nested vs non-nested root partition.
Signed-off-by: Jinank Jain <[email protected]>
---
arch/x86/include/asm/hyperv-tlfs.h | 11 +++++
arch/x86/include/asm/mshyperv.h | 30 +++-----------
arch/x86/kernel/cpu/mshyperv.c | 65 ++++++++++++++++++++++++++++++
drivers/hv/hv.c | 19 ++++++---
4 files changed, 96 insertions(+), 29 deletions(-)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 58c03d18c235..b5019becb618 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -225,6 +225,17 @@ enum hv_isolation_type {
#define HV_REGISTER_SINT14 0x4000009E
#define HV_REGISTER_SINT15 0x4000009F
+/*
+ * Define synthetic interrupt controller model specific registers for
+ * nested hypervisor.
+ */
+#define HV_REGISTER_NESTED_SCONTROL 0x40001080
+#define HV_REGISTER_NESTED_SVERSION 0x40001081
+#define HV_REGISTER_NESTED_SIEFP 0x40001082
+#define HV_REGISTER_NESTED_SIMP 0x40001083
+#define HV_REGISTER_NESTED_EOM 0x40001084
+#define HV_REGISTER_NESTED_SINT0 0x40001090
+
/*
* Synthetic Timer MSRs. Four timers per vcpu.
*/
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 61f0c206bff0..c38e4c66a3ac 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg)
return false;
}
-static inline u64 hv_get_register(unsigned int reg)
-{
- u64 value;
-
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
- hv_ghcb_msr_read(reg, &value);
- else
- rdmsrl(reg, value);
- return value;
-}
-
-static inline void hv_set_register(unsigned int reg, u64 value)
-{
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
- hv_ghcb_msr_write(reg, value);
-
- /* Write proxy bit via wrmsl instruction */
- if (reg >= HV_REGISTER_SINT0 &&
- reg <= HV_REGISTER_SINT15)
- wrmsrl(reg, value | 1 << 20);
- } else {
- wrmsrl(reg, value);
- }
-}
+u64 hv_get_register(unsigned int reg);
+void hv_set_register(unsigned int reg, u64 value);
+u64 hv_get_non_nested_register(unsigned int reg);
+void hv_set_non_nested_register(unsigned int reg, u64 value);
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
@@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
}
static inline void hv_set_register(unsigned int reg, u64 value) { }
static inline u64 hv_get_register(unsigned int reg) { return 0; }
+static inline void hv_set_non_nested_register(unsigned int reg, u64 value) { }
+static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; }
static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages,
bool visible)
{
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index f9b78d4829e3..938fc82edf05 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -41,7 +41,72 @@ bool hv_root_partition;
bool hv_nested;
struct ms_hyperv_info ms_hyperv;
+static inline unsigned int hv_get_nested_reg(unsigned int reg)
+{
+ switch (reg) {
+ case HV_REGISTER_SIMP:
+ return HV_REGISTER_NESTED_SIMP;
+ case HV_REGISTER_SIEFP:
+ return HV_REGISTER_NESTED_SIEFP;
+ case HV_REGISTER_SVERSION:
+ return HV_REGISTER_NESTED_SVERSION;
+ case HV_REGISTER_SCONTROL:
+ return HV_REGISTER_NESTED_SCONTROL;
+ case HV_REGISTER_SINT0:
+ return HV_REGISTER_NESTED_SINT0;
+ case HV_REGISTER_EOM:
+ return HV_REGISTER_NESTED_EOM;
+ default:
+ return reg;
+ }
+}
+
#if IS_ENABLED(CONFIG_HYPERV)
+u64 hv_get_non_nested_register(unsigned int reg)
+{
+ u64 value;
+
+ if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
+ hv_ghcb_msr_read(reg, &value);
+ else
+ rdmsrl(reg, value);
+ return value;
+}
+EXPORT_SYMBOL_GPL(hv_get_non_nested_register);
+
+void hv_set_non_nested_register(unsigned int reg, u64 value)
+{
+ if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
+ hv_ghcb_msr_write(reg, value);
+
+ /* Write proxy bit via wrmsl instruction */
+ if (reg >= HV_REGISTER_SINT0 &&
+ reg <= HV_REGISTER_SINT15)
+ wrmsrl(reg, value | 1 << 20);
+ } else {
+ wrmsrl(reg, value);
+ }
+}
+EXPORT_SYMBOL_GPL(hv_set_non_nested_register);
+
+u64 hv_get_register(unsigned int reg)
+{
+ if (hv_nested)
+ reg = hv_get_nested_reg(reg);
+
+ return hv_get_non_nested_register(reg);
+}
+EXPORT_SYMBOL_GPL(hv_get_register);
+
+void hv_set_register(unsigned int reg, u64 value)
+{
+ if (hv_nested)
+ reg = hv_get_nested_reg(reg);
+
+ hv_set_non_nested_register(reg, value);
+}
+EXPORT_SYMBOL_GPL(hv_set_register);
+
static void (*vmbus_handler)(void);
static void (*hv_stimer0_handler)(void);
static void (*hv_kexec_handler)(void);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 4d6480d57546..986814a903ee 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -147,7 +147,7 @@ int hv_synic_alloc(void)
* Synic message and event pages are allocated by paravisor.
* Skip these pages allocation here.
*/
- if (!hv_isolation_type_snp()) {
+ if (!hv_isolation_type_snp() && !hv_root_partition) {
hv_cpu->synic_message_page =
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_message_page == NULL) {
@@ -188,8 +188,16 @@ void hv_synic_free(void)
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
- free_page((unsigned long)hv_cpu->synic_event_page);
- free_page((unsigned long)hv_cpu->synic_message_page);
+ if (hv_root_partition) {
+ if (hv_cpu->synic_event_page != NULL)
+ memunmap(hv_cpu->synic_event_page);
+
+ if (hv_cpu->synic_message_page != NULL)
+ memunmap(hv_cpu->synic_message_page);
+ } else {
+ free_page((unsigned long)hv_cpu->synic_event_page);
+ free_page((unsigned long)hv_cpu->synic_message_page);
+ }
free_page((unsigned long)hv_cpu->post_msg_page);
}
@@ -214,9 +222,10 @@ void hv_synic_enable_regs(unsigned int cpu)
/* Setup the Synic's message page */
simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
+
simp.simp_enabled = 1;
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_message_page
= memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
@@ -233,7 +242,7 @@ void hv_synic_enable_regs(unsigned int cpu)
siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
siefp.siefp_enabled = 1;
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_event_page =
memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
--
2.25.1
From: Jinank Jain <[email protected]> Sent: Tuesday, December 13, 2022 10:33 PM
>
> Child partitions are free to allocate SynIC message and event page but in
> case of root partition it must use the pages allocated by Microsoft
> Hypervisor (MSHV). Base address for these pages can be found using
> synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
> for nested vs non-nested root partition.
>
> Signed-off-by: Jinank Jain <[email protected]>
You have addressed all my previous comments and those areas
look good. But I see a new issue that I had not noticed before.
See comment below.
> ---
> arch/x86/include/asm/hyperv-tlfs.h | 11 +++++
> arch/x86/include/asm/mshyperv.h | 30 +++-----------
> arch/x86/kernel/cpu/mshyperv.c | 65 ++++++++++++++++++++++++++++++
> drivers/hv/hv.c | 19 ++++++---
> 4 files changed, 96 insertions(+), 29 deletions(-)
>
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index 58c03d18c235..b5019becb618 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -225,6 +225,17 @@ enum hv_isolation_type {
> #define HV_REGISTER_SINT14 0x4000009E
> #define HV_REGISTER_SINT15 0x4000009F
>
> +/*
> + * Define synthetic interrupt controller model specific registers for
> + * nested hypervisor.
> + */
> +#define HV_REGISTER_NESTED_SCONTROL 0x40001080
> +#define HV_REGISTER_NESTED_SVERSION 0x40001081
> +#define HV_REGISTER_NESTED_SIEFP 0x40001082
> +#define HV_REGISTER_NESTED_SIMP 0x40001083
> +#define HV_REGISTER_NESTED_EOM 0x40001084
> +#define HV_REGISTER_NESTED_SINT0 0x40001090
> +
> /*
> * Synthetic Timer MSRs. Four timers per vcpu.
> */
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 61f0c206bff0..c38e4c66a3ac 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg)
> return false;
> }
>
> -static inline u64 hv_get_register(unsigned int reg)
> -{
> - u64 value;
> -
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> - hv_ghcb_msr_read(reg, &value);
> - else
> - rdmsrl(reg, value);
> - return value;
> -}
> -
> -static inline void hv_set_register(unsigned int reg, u64 value)
> -{
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> - hv_ghcb_msr_write(reg, value);
> -
> - /* Write proxy bit via wrmsl instruction */
> - if (reg >= HV_REGISTER_SINT0 &&
> - reg <= HV_REGISTER_SINT15)
> - wrmsrl(reg, value | 1 << 20);
> - } else {
> - wrmsrl(reg, value);
> - }
> -}
> +u64 hv_get_register(unsigned int reg);
> +void hv_set_register(unsigned int reg, u64 value);
> +u64 hv_get_non_nested_register(unsigned int reg);
> +void hv_set_non_nested_register(unsigned int reg, u64 value);
>
> #else /* CONFIG_HYPERV */
> static inline void hyperv_init(void) {}
> @@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
> }
> static inline void hv_set_register(unsigned int reg, u64 value) { }
> static inline u64 hv_get_register(unsigned int reg) { return 0; }
> +static inline void hv_set_non_nested_register(unsigned int reg, u64 value) { }
> +static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; }
> static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages,
> bool visible)
> {
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index f9b78d4829e3..938fc82edf05 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -41,7 +41,72 @@ bool hv_root_partition;
> bool hv_nested;
> struct ms_hyperv_info ms_hyperv;
>
> +static inline unsigned int hv_get_nested_reg(unsigned int reg)
> +{
> + switch (reg) {
> + case HV_REGISTER_SIMP:
> + return HV_REGISTER_NESTED_SIMP;
> + case HV_REGISTER_SIEFP:
> + return HV_REGISTER_NESTED_SIEFP;
> + case HV_REGISTER_SVERSION:
> + return HV_REGISTER_NESTED_SVERSION;
> + case HV_REGISTER_SCONTROL:
> + return HV_REGISTER_NESTED_SCONTROL;
> + case HV_REGISTER_SINT0:
> + return HV_REGISTER_NESTED_SINT0;
> + case HV_REGISTER_EOM:
> + return HV_REGISTER_NESTED_EOM;
> + default:
> + return reg;
> + }
> +}
> +
> #if IS_ENABLED(CONFIG_HYPERV)
> +u64 hv_get_non_nested_register(unsigned int reg)
> +{
> + u64 value;
> +
> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> + hv_ghcb_msr_read(reg, &value);
> + else
> + rdmsrl(reg, value);
> + return value;
> +}
> +EXPORT_SYMBOL_GPL(hv_get_non_nested_register);
> +
> +void hv_set_non_nested_register(unsigned int reg, u64 value)
> +{
> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> + hv_ghcb_msr_write(reg, value);
> +
> + /* Write proxy bit via wrmsl instruction */
> + if (reg >= HV_REGISTER_SINT0 &&
> + reg <= HV_REGISTER_SINT15)
> + wrmsrl(reg, value | 1 << 20);
> + } else {
> + wrmsrl(reg, value);
> + }
> +}
> +EXPORT_SYMBOL_GPL(hv_set_non_nested_register);
> +
> +u64 hv_get_register(unsigned int reg)
> +{
> + if (hv_nested)
> + reg = hv_get_nested_reg(reg);
> +
> + return hv_get_non_nested_register(reg);
> +}
> +EXPORT_SYMBOL_GPL(hv_get_register);
> +
> +void hv_set_register(unsigned int reg, u64 value)
> +{
> + if (hv_nested)
> + reg = hv_get_nested_reg(reg);
> +
> + hv_set_non_nested_register(reg, value);
> +}
> +EXPORT_SYMBOL_GPL(hv_set_register);
> +
> static void (*vmbus_handler)(void);
> static void (*hv_stimer0_handler)(void);
> static void (*hv_kexec_handler)(void);
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 4d6480d57546..986814a903ee 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -147,7 +147,7 @@ int hv_synic_alloc(void)
> * Synic message and event pages are allocated by paravisor.
> * Skip these pages allocation here.
> */
> - if (!hv_isolation_type_snp()) {
> + if (!hv_isolation_type_snp() && !hv_root_partition) {
> hv_cpu->synic_message_page =
> (void *)get_zeroed_page(GFP_ATOMIC);
> if (hv_cpu->synic_message_page == NULL) {
> @@ -188,8 +188,16 @@ void hv_synic_free(void)
> struct hv_per_cpu_context *hv_cpu
> = per_cpu_ptr(hv_context.cpu_context, cpu);
>
> - free_page((unsigned long)hv_cpu->synic_event_page);
> - free_page((unsigned long)hv_cpu->synic_message_page);
> + if (hv_root_partition) {
> + if (hv_cpu->synic_event_page != NULL)
> + memunmap(hv_cpu->synic_event_page);
> +
> + if (hv_cpu->synic_message_page != NULL)
> + memunmap(hv_cpu->synic_message_page);
These memunmap() calls seem to be done in the wrong place. There are two
pairs of functions that should be symmetrical unless there's a really good
reason otherwise:
1. hv_synic_alloc() and hv_synic_free()
2. hv_synic_enable_regs() and hv_synic_disable_regs()
The functions in #1 handle the allocation and freeing of these three
pages: synic_event_page, synic_message_page, and post_msg_page.
If the synic_event_page and synic_message_page don't need to be
allocated because they are provided by the hypervisor or paravisor,
then the allocation is skipped in hv_synic_alloc(), and the free_page
operations in hv_synic_free() are no-ops if the corresponding pointer
is NULL.
The functions in #2 should handle the mapping and unmapping in the
case of pages provided by the hypervisor or paravisor. It appears
that the hv_isolation_type_snp() case does this (mostly) correctly.
But your code does the unmap in hv_synic_free(), which isn't
symmetrical. Unless there's something unique about the situation
when running in the root partition, the unmap should be done in
hv_synic_disable_regs() like it is for the SNP case.
My "mostly" correctly comment above is because the current code
in hv_synic_disable_regs() should be setting hv_cpu->synic_message_page
and hv_cpu->synic_event_page to NULL after the unmap is done.
Those pointers must be reverted to NULL so that if hv_synic_free() is
then run, it won't try to free pages that were provided by the hypervisor
or paravisor.
Michael
> + } else {
> + free_page((unsigned long)hv_cpu->synic_event_page);
> + free_page((unsigned long)hv_cpu->synic_message_page);
> + }
> free_page((unsigned long)hv_cpu->post_msg_page);
> }
>
> @@ -214,9 +222,10 @@ void hv_synic_enable_regs(unsigned int cpu)
>
> /* Setup the Synic's message page */
> simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> +
> simp.simp_enabled = 1;
>
> - if (hv_isolation_type_snp()) {
> + if (hv_isolation_type_snp() || hv_root_partition) {
> hv_cpu->synic_message_page
> = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
> HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> @@ -233,7 +242,7 @@ void hv_synic_enable_regs(unsigned int cpu)
> siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
> siefp.siefp_enabled = 1;
>
> - if (hv_isolation_type_snp()) {
> + if (hv_isolation_type_snp() || hv_root_partition) {
> hv_cpu->synic_event_page =
> memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
> HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> --
> 2.25.1