2021-02-28 15:10:41

by Tianyu Lan

[permalink] [raw]
Subject: [RFC PATCH 00/12] x86/Hyper-V: Add Hyper-V Isolation VM support

From: Tianyu Lan <[email protected]>

Hyper-V provides two kinds of Isolation VMs. VBS(Virtualization-based
security) and AMD SEV-SNP unenlightened Isolation VMs. This patchset
is to add support for these Isolation VM support in Linux.

The memory of these vms are encrypted and host can't access guest
memory directly. Hyper-V provides new host visibility hvcall and
the guest needs to call new hvcall to mark memory visible to host
before sharing memory with host. For security, all network/storage
stack memory should not be shared with host and so there is bounce
buffer requests.

Vmbus channel ring buffer already plays bounce buffer role because
all data from/to host needs to copy from/to between the ring buffer
and IO stack memory. So mark vmbus channel ring buffer visible.

There are two exceptions - packets sent by vmbus_sendpacket_
pagebuffer() and vmbus_sendpacket_mpb_desc(). These packets
contains IO stack memory address and host will access these memory.
So add allocation bounce buffer support in vmbus for these packets.

For SNP isolation VM, guest needs to access the shared memory via
extra address space which is specified by Hyper-V CPUID HYPERV_CPUID_
ISOLATION_CONFIG. The access physical address of the shared memory
should be bounce buffer memory GPA plus with shared_gpa_boundary
reported by CPUID.

Tianyu Lan (12):
x86/Hyper-V: Add visibility parameter for vmbus_establish_gpadl()
x86/Hyper-V: Add new hvcall guest address host visibility support
x86/HV: Initialize GHCB page and shared memory boundary
HV: Add Write/Read MSR registers via ghcb
HV: Add ghcb hvcall support for SNP VM
HV/Vmbus: Add SNP support for VMbus channel initiate message
hv/vmbus: Initialize VMbus ring buffer for Isolation VM
x86/Hyper-V: Initialize bounce buffer page cache and list
x86/Hyper-V: Add new parameter for
vmbus_sendpacket_pagebuffer()/mpb_desc()
HV: Add bounce buffer support for Isolation VM
HV/Netvsc: Add Isolation VM support for netvsc driver
HV/Storvsc: Add bounce buffer support for Storvsc

arch/x86/hyperv/Makefile | 2 +-
arch/x86/hyperv/hv_init.c | 70 +++-
arch/x86/hyperv/ivm.c | 257 ++++++++++++
arch/x86/include/asm/hyperv-tlfs.h | 22 +
arch/x86/include/asm/mshyperv.h | 26 +-
arch/x86/kernel/cpu/mshyperv.c | 2 +
drivers/hv/Makefile | 2 +-
drivers/hv/channel.c | 103 ++++-
drivers/hv/channel_mgmt.c | 30 +-
drivers/hv/connection.c | 68 +++-
drivers/hv/hv.c | 196 ++++++---
drivers/hv/hv_bounce.c | 619 +++++++++++++++++++++++++++++
drivers/hv/hyperv_vmbus.h | 42 ++
drivers/hv/ring_buffer.c | 83 +++-
drivers/net/hyperv/hyperv_net.h | 5 +
drivers/net/hyperv/netvsc.c | 111 +++++-
drivers/scsi/storvsc_drv.c | 46 ++-
drivers/uio/uio_hv_generic.c | 13 +-
include/asm-generic/hyperv-tlfs.h | 1 +
include/asm-generic/mshyperv.h | 24 +-
include/linux/hyperv.h | 46 ++-
mm/ioremap.c | 1 +
mm/vmalloc.c | 1 +
23 files changed, 1614 insertions(+), 156 deletions(-)
create mode 100644 arch/x86/hyperv/ivm.c
create mode 100644 drivers/hv/hv_bounce.c

--
2.25.1


2021-02-28 15:11:29

by Tianyu Lan

[permalink] [raw]
Subject: [RFC PATCH 4/12] HV: Add Write/Read MSR registers via ghcb

From: Tianyu Lan <[email protected]>

Hyper-V provides GHCB protocol to write Synthetic Interrupt
Controller MSR registers and these registers are emulated by
Hypervisor rather than paravisor.

Hyper-V requests to write SINTx MSR registers twice(once via
GHCB and once via wrmsr instruction including the proxy bit 21)
Guest OS ID MSR also needs to be set via GHCB.

Signed-off-by: Tianyu Lan <[email protected]>
---
arch/x86/hyperv/Makefile | 2 +-
arch/x86/hyperv/hv_init.c | 18 +--
arch/x86/hyperv/ivm.c | 178 ++++++++++++++++++++++++++++++
arch/x86/include/asm/mshyperv.h | 21 +++-
arch/x86/kernel/cpu/mshyperv.c | 46 --------
drivers/hv/channel.c | 2 +-
drivers/hv/hv.c | 188 ++++++++++++++++++++++----------
include/asm-generic/mshyperv.h | 10 +-
8 files changed, 343 insertions(+), 122 deletions(-)
create mode 100644 arch/x86/hyperv/ivm.c

diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index 48e2c51464e8..5d2de10809ae 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y := hv_init.o mmu.o nested.o irqdomain.o
+obj-y := hv_init.o mmu.o nested.o irqdomain.o ivm.o
obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o

ifdef CONFIG_X86_64
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 90e65fbf4c58..87b1dd9c84d6 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -475,6 +475,9 @@ void __init hyperv_init(void)

ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
*ghcb_base = ghcb_va;
+
+ /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
+ hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
}

rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
@@ -561,6 +564,7 @@ void hyperv_cleanup(void)

/* Reset our OS id */
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
+ hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);

/*
* Reset hypercall page reference before reset the page,
@@ -668,17 +672,3 @@ bool hv_is_hibernation_supported(void)
return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4);
}
EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
-
-enum hv_isolation_type hv_get_isolation_type(void)
-{
- if (!(ms_hyperv.features_b & HV_ISOLATION))
- return HV_ISOLATION_TYPE_NONE;
- return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b);
-}
-EXPORT_SYMBOL_GPL(hv_get_isolation_type);
-
-bool hv_is_isolation_supported(void)
-{
- return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
-}
-EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
new file mode 100644
index 000000000000..4332bf7aaf9b
--- /dev/null
+++ b/arch/x86/hyperv/ivm.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V Isolation VM interface with paravisor and hypervisor
+ *
+ * Author:
+ * Tianyu Lan <[email protected]>
+ */
+#include <linux/types.h>
+#include <linux/bitfield.h>
+#include <asm/io.h>
+#include <asm/svm.h>
+#include <asm/sev-es.h>
+#include <asm/mshyperv.h>
+
+union hv_ghcb {
+ struct ghcb ghcb;
+} __packed __aligned(PAGE_SIZE);
+
+void hv_ghcb_msr_write(u64 msr, u64 value)
+{
+ union hv_ghcb *hv_ghcb;
+ void **ghcb_base;
+ unsigned long flags;
+
+ if (!ms_hyperv.ghcb_base)
+ return;
+
+ local_irq_save(flags);
+ ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
+ hv_ghcb = (union hv_ghcb *)*ghcb_base;
+ if (!hv_ghcb) {
+ local_irq_restore(flags);
+ return;
+ }
+
+ memset(hv_ghcb, 0x00, HV_HYP_PAGE_SIZE);
+
+ hv_ghcb->ghcb.protocol_version = 1;
+ hv_ghcb->ghcb.ghcb_usage = 0;
+
+ ghcb_set_sw_exit_code(&hv_ghcb->ghcb, SVM_EXIT_MSR);
+ ghcb_set_rcx(&hv_ghcb->ghcb, msr);
+ ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value));
+ ghcb_set_rdx(&hv_ghcb->ghcb, value >> 32);
+ ghcb_set_sw_exit_info_1(&hv_ghcb->ghcb, 1);
+ ghcb_set_sw_exit_info_2(&hv_ghcb->ghcb, 0);
+
+ VMGEXIT();
+
+ if ((hv_ghcb->ghcb.save.sw_exit_info_1 & 0xffffffff) == 1)
+ pr_warn("Fail to write msr via ghcb.\n.");
+
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(hv_ghcb_msr_write);
+
+void hv_ghcb_msr_read(u64 msr, u64 *value)
+{
+ union hv_ghcb *hv_ghcb;
+ void **ghcb_base;
+ unsigned long flags;
+
+ if (!ms_hyperv.ghcb_base)
+ return;
+
+ local_irq_save(flags);
+ ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
+ hv_ghcb = (union hv_ghcb *)*ghcb_base;
+ if (!hv_ghcb) {
+ local_irq_restore(flags);
+ return;
+ }
+
+ memset(hv_ghcb, 0x00, PAGE_SIZE);
+ hv_ghcb->ghcb.protocol_version = 1;
+ hv_ghcb->ghcb.ghcb_usage = 0;
+
+ ghcb_set_sw_exit_code(&hv_ghcb->ghcb, SVM_EXIT_MSR);
+ ghcb_set_rcx(&hv_ghcb->ghcb, msr);
+ ghcb_set_sw_exit_info_1(&hv_ghcb->ghcb, 0);
+ ghcb_set_sw_exit_info_2(&hv_ghcb->ghcb, 0);
+
+ VMGEXIT();
+
+ if ((hv_ghcb->ghcb.save.sw_exit_info_1 & 0xffffffff) == 1)
+ pr_warn("Fail to write msr via ghcb.\n.");
+ else
+ *value = (u64)lower_32_bits(hv_ghcb->ghcb.save.rax)
+ | ((u64)lower_32_bits(hv_ghcb->ghcb.save.rdx) << 32);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(hv_ghcb_msr_read);
+
+void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value)
+{
+ hv_ghcb_msr_read(msr, value);
+}
+EXPORT_SYMBOL_GPL(hv_sint_rdmsrl_ghcb);
+
+void hv_sint_wrmsrl_ghcb(u64 msr, u64 value)
+{
+ hv_ghcb_msr_write(msr, value);
+
+ /* Write proxy bit vua wrmsrl instruction. */
+ if (msr >= HV_X64_MSR_SINT0 && msr <= HV_X64_MSR_SINT15)
+ wrmsrl(msr, value | 1 << 20);
+}
+EXPORT_SYMBOL_GPL(hv_sint_wrmsrl_ghcb);
+
+inline void hv_signal_eom_ghcb(void)
+{
+ hv_sint_wrmsrl_ghcb(HV_X64_MSR_EOM, 0);
+}
+EXPORT_SYMBOL_GPL(hv_signal_eom_ghcb);
+
+enum hv_isolation_type hv_get_isolation_type(void)
+{
+ if (!(ms_hyperv.features_b & HV_ISOLATION))
+ return HV_ISOLATION_TYPE_NONE;
+ return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b);
+}
+EXPORT_SYMBOL_GPL(hv_get_isolation_type);
+
+bool hv_is_isolation_supported(void)
+{
+ return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
+}
+EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
+
+bool hv_isolation_type_snp(void)
+{
+ return hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP;
+}
+EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
+
+int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility)
+{
+ struct hv_input_modify_sparse_gpa_page_host_visibility **input_pcpu;
+ struct hv_input_modify_sparse_gpa_page_host_visibility *input;
+ u16 pages_processed;
+ u64 hv_status;
+ unsigned long flags;
+
+ /* no-op if partition isolation is not enabled */
+ if (!hv_is_isolation_supported())
+ return 0;
+
+ if (count > HV_MAX_MODIFY_GPA_REP_COUNT) {
+ pr_err("Hyper-V: GPA count:%d exceeds supported:%lu\n", count,
+ HV_MAX_MODIFY_GPA_REP_COUNT);
+ return -EINVAL;
+ }
+
+ local_irq_save(flags);
+ input_pcpu = (struct hv_input_modify_sparse_gpa_page_host_visibility **)
+ this_cpu_ptr(hyperv_pcpu_input_arg);
+ input = *input_pcpu;
+ if (unlikely(!input)) {
+ local_irq_restore(flags);
+ return -1;
+ }
+
+ input->partition_id = HV_PARTITION_ID_SELF;
+ input->host_visibility = visibility;
+ input->reserved0 = 0;
+ input->reserved1 = 0;
+ memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn));
+ hv_status = hv_do_rep_hypercall(
+ HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count,
+ 0, input, &pages_processed);
+ local_irq_restore(flags);
+
+ if (!(hv_status & HV_HYPERCALL_RESULT_MASK))
+ return 0;
+
+ return -EFAULT;
+}
+EXPORT_SYMBOL(hv_mark_gpa_visibility);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 1e8275d35c1f..f624d72b99d3 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -269,6 +269,25 @@ int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
int hv_set_mem_host_visibility(void *kbuffer, u32 size, u32 visibility);
int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility);
+void hv_sint_wrmsrl_ghcb(u64 msr, u64 value);
+void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value);
+void hv_signal_eom_ghcb(void);
+void hv_ghcb_msr_write(u64 msr, u64 value);
+void hv_ghcb_msr_read(u64 msr, u64 *value);
+
+#define hv_get_synint_state_ghcb(int_num, val) \
+ hv_sint_rdmsrl_ghcb(HV_X64_MSR_SINT0 + int_num, val)
+#define hv_set_synint_state_ghcb(int_num, val) \
+ hv_sint_wrmsrl_ghcb(HV_X64_MSR_SINT0 + int_num, val)
+
+#define hv_get_simp_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SIMP, val)
+#define hv_set_simp_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SIMP, val)
+
+#define hv_get_siefp_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SIEFP, val)
+#define hv_set_siefp_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SIEFP, val)
+
+#define hv_get_synic_state_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SCONTROL, val)
+#define hv_set_synic_state_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SCONTROL, val)
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
static inline void hyperv_setup_mmu_ops(void) {}
@@ -287,9 +306,9 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
{
return -1;
}
+static inline void hv_signal_eom_ghcb(void) { };
#endif /* CONFIG_HYPERV */

-
#include <asm-generic/mshyperv.h>

#endif
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index d6c363456cbf..aeafd4017c89 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -37,8 +37,6 @@
bool hv_root_partition;
EXPORT_SYMBOL_GPL(hv_root_partition);

-#define HV_PARTITION_ID_SELF ((u64)-1)
-
struct ms_hyperv_info ms_hyperv;
EXPORT_SYMBOL_GPL(ms_hyperv);

@@ -481,47 +479,3 @@ const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
.init.msi_ext_dest_id = ms_hyperv_msi_ext_dest_id,
.init.init_platform = ms_hyperv_init_platform,
};
-
-int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility)
-{
- struct hv_input_modify_sparse_gpa_page_host_visibility **input_pcpu;
- struct hv_input_modify_sparse_gpa_page_host_visibility *input;
- u16 pages_processed;
- u64 hv_status;
- unsigned long flags;
-
- /* no-op if partition isolation is not enabled */
- if (!hv_is_isolation_supported())
- return 0;
-
- if (count > HV_MAX_MODIFY_GPA_REP_COUNT) {
- pr_err("Hyper-V: GPA count:%d exceeds supported:%lu\n", count,
- HV_MAX_MODIFY_GPA_REP_COUNT);
- return -EINVAL;
- }
-
- local_irq_save(flags);
- input_pcpu = (struct hv_input_modify_sparse_gpa_page_host_visibility **)
- this_cpu_ptr(hyperv_pcpu_input_arg);
- input = *input_pcpu;
- if (unlikely(!input)) {
- local_irq_restore(flags);
- return -1;
- }
-
- input->partition_id = HV_PARTITION_ID_SELF;
- input->host_visibility = visibility;
- input->reserved0 = 0;
- input->reserved1 = 0;
- memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn));
- hv_status = hv_do_rep_hypercall(
- HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count,
- 0, input, &pages_processed);
- local_irq_restore(flags);
-
- if (!(hv_status & HV_HYPERCALL_RESULT_MASK))
- return 0;
-
- return -EFAULT;
-}
-EXPORT_SYMBOL(hv_mark_gpa_visibility);
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 204e6f3598a5..f31b669a1ddf 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -247,7 +247,7 @@ int hv_set_mem_host_visibility(void *kbuffer, u32 size, u32 visibility)
u64 *pfn_array;
int ret = 0;

- if (!hv_isolation_type_snp())
+ if (!hv_is_isolation_supported())
return 0;

pfn_array = vzalloc(HV_HYP_PAGE_SIZE);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index f202ac7f4b3d..28e28ccc2081 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -99,17 +99,24 @@ int hv_synic_alloc(void)
tasklet_init(&hv_cpu->msg_dpc,
vmbus_on_msg_dpc, (unsigned long) hv_cpu);

- hv_cpu->synic_message_page =
- (void *)get_zeroed_page(GFP_ATOMIC);
- if (hv_cpu->synic_message_page == NULL) {
- pr_err("Unable to allocate SYNIC message page\n");
- goto err;
- }
+ /*
+ * Synic message and event pages are allocated by paravisor.
+ * Skip these pages allocation here.
+ */
+ if (!hv_isolation_type_snp()) {
+ hv_cpu->synic_message_page =
+ (void *)get_zeroed_page(GFP_ATOMIC);
+ if (hv_cpu->synic_message_page == NULL) {
+ pr_err("Unable to allocate SYNIC message page\n");
+ goto err;
+ }

- hv_cpu->synic_event_page = (void *)get_zeroed_page(GFP_ATOMIC);
- if (hv_cpu->synic_event_page == NULL) {
- pr_err("Unable to allocate SYNIC event page\n");
- goto err;
+ hv_cpu->synic_event_page =
+ (void *)get_zeroed_page(GFP_ATOMIC);
+ if (hv_cpu->synic_event_page == NULL) {
+ pr_err("Unable to allocate SYNIC event page\n");
+ goto err;
+ }
}

hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
@@ -136,10 +143,17 @@ void hv_synic_free(void)
for_each_present_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
+ free_page((unsigned long)hv_cpu->post_msg_page);
+
+ /*
+ * Synic message and event pages are allocated by paravisor.
+ * Skip free these pages here.
+ */
+ if (hv_isolation_type_snp())
+ continue;

free_page((unsigned long)hv_cpu->synic_event_page);
free_page((unsigned long)hv_cpu->synic_message_page);
- free_page((unsigned long)hv_cpu->post_msg_page);
}

kfree(hv_context.hv_numa_map);
@@ -161,35 +175,72 @@ void hv_synic_enable_regs(unsigned int cpu)
union hv_synic_sint shared_sint;
union hv_synic_scontrol sctrl;

- /* Setup the Synic's message page */
- hv_get_simp(simp.as_uint64);
- simp.simp_enabled = 1;
- simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
- >> HV_HYP_PAGE_SHIFT;
-
- hv_set_simp(simp.as_uint64);
-
- /* Setup the Synic's event page */
- hv_get_siefp(siefp.as_uint64);
- siefp.siefp_enabled = 1;
- siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
- >> HV_HYP_PAGE_SHIFT;
-
- hv_set_siefp(siefp.as_uint64);
-
- /* Setup the shared SINT. */
- hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
- shared_sint.vector = hv_get_vector();
- shared_sint.masked = false;
- shared_sint.auto_eoi = hv_recommend_using_aeoi();
- hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
- /* Enable the global synic bit */
- hv_get_synic_state(sctrl.as_uint64);
- sctrl.enable = 1;
-
- hv_set_synic_state(sctrl.as_uint64);
+ /*
+ * Setup Synic pages for CVM. Synic message and event page
+ * are allocated by paravisor in the SNP CVM.
+ */
+ if (hv_isolation_type_snp()) {
+ /* Setup the Synic's message. */
+ hv_get_simp_ghcb(&simp.as_uint64);
+ simp.simp_enabled = 1;
+ hv_cpu->synic_message_page
+ = ioremap_cache(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
+ PAGE_SIZE);
+ if (!hv_cpu->synic_message_page)
+ pr_warn("Fail to map syinc message page.\n");
+
+ hv_set_simp_ghcb(simp.as_uint64);
+
+ /* Setup the Synic's event page */
+ hv_get_siefp_ghcb(&siefp.as_uint64);
+ siefp.siefp_enabled = 1;
+ hv_cpu->synic_event_page = ioremap_cache(
+ siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT, PAGE_SIZE);
+ if (!hv_cpu->synic_event_page)
+ pr_warn("Fail to map syinc event page.\n");
+ hv_set_siefp_ghcb(siefp.as_uint64);
+
+ /* Setup the shared SINT. */
+ hv_get_synint_state_ghcb(VMBUS_MESSAGE_SINT,
+ &shared_sint.as_uint64);
+ shared_sint.vector = hv_get_vector();
+ shared_sint.masked = false;
+ shared_sint.auto_eoi = hv_recommend_using_aeoi();
+ hv_set_synint_state_ghcb(VMBUS_MESSAGE_SINT,
+ shared_sint.as_uint64);
+
+ /* Enable the global synic bit */
+ hv_get_synic_state_ghcb(&sctrl.as_uint64);
+ sctrl.enable = 1;
+ hv_set_synic_state_ghcb(sctrl.as_uint64);
+ } else {
+ /* Setup the Synic's message. */
+ hv_get_simp(simp.as_uint64);
+ simp.simp_enabled = 1;
+ simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
+ >> HV_HYP_PAGE_SHIFT;
+ hv_set_simp(simp.as_uint64);
+
+ /* Setup the Synic's event page */
+ hv_get_siefp(siefp.as_uint64);
+ siefp.siefp_enabled = 1;
+ siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
+ >> HV_HYP_PAGE_SHIFT;
+ hv_set_siefp(siefp.as_uint64);
+
+ /* Setup the shared SINT. */
+ hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+ shared_sint.vector = hv_get_vector();
+ shared_sint.masked = false;
+ shared_sint.auto_eoi = hv_recommend_using_aeoi();
+ hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+ /* Enable the global synic bit */
+ hv_get_synic_state(sctrl.as_uint64);
+ sctrl.enable = 1;
+ hv_set_synic_state(sctrl.as_uint64);
+ }
}

int hv_synic_init(unsigned int cpu)
@@ -211,30 +262,53 @@ void hv_synic_disable_regs(unsigned int cpu)
union hv_synic_siefp siefp;
union hv_synic_scontrol sctrl;

- hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+ if (hv_isolation_type_snp()) {
+ hv_get_synint_state_ghcb(VMBUS_MESSAGE_SINT,
+ &shared_sint.as_uint64);
+ shared_sint.masked = 1;
+ hv_set_synint_state_ghcb(VMBUS_MESSAGE_SINT,
+ shared_sint.as_uint64);
+
+ hv_get_simp_ghcb(&simp.as_uint64);
+ simp.simp_enabled = 0;
+ simp.base_simp_gpa = 0;
+ hv_set_simp_ghcb(simp.as_uint64);
+
+ hv_get_siefp_ghcb(&siefp.as_uint64);
+ siefp.siefp_enabled = 0;
+ siefp.base_siefp_gpa = 0;
+ hv_set_siefp_ghcb(siefp.as_uint64);

- shared_sint.masked = 1;
+ /* Disable the global synic bit */
+ hv_get_synic_state_ghcb(&sctrl.as_uint64);
+ sctrl.enable = 0;
+ hv_set_synic_state_ghcb(sctrl.as_uint64);
+ } else {
+ hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);

- /* Need to correctly cleanup in the case of SMP!!! */
- /* Disable the interrupt */
- hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+ shared_sint.masked = 1;

- hv_get_simp(simp.as_uint64);
- simp.simp_enabled = 0;
- simp.base_simp_gpa = 0;
+ /* Need to correctly cleanup in the case of SMP!!! */
+ /* Disable the interrupt */
+ hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);

- hv_set_simp(simp.as_uint64);
+ hv_get_simp(simp.as_uint64);
+ simp.simp_enabled = 0;
+ simp.base_simp_gpa = 0;

- hv_get_siefp(siefp.as_uint64);
- siefp.siefp_enabled = 0;
- siefp.base_siefp_gpa = 0;
+ hv_set_simp(simp.as_uint64);

- hv_set_siefp(siefp.as_uint64);
+ hv_get_siefp(siefp.as_uint64);
+ siefp.siefp_enabled = 0;
+ siefp.base_siefp_gpa = 0;

- /* Disable the global synic bit */
- hv_get_synic_state(sctrl.as_uint64);
- sctrl.enable = 0;
- hv_set_synic_state(sctrl.as_uint64);
+ hv_set_siefp(siefp.as_uint64);
+
+ /* Disable the global synic bit */
+ hv_get_synic_state(sctrl.as_uint64);
+ sctrl.enable = 0;
+ hv_set_synic_state(sctrl.as_uint64);
+ }
}

int hv_synic_cleanup(unsigned int cpu)
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index ad0e33776668..6727f4073b5a 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -23,6 +23,7 @@
#include <linux/bitops.h>
#include <linux/cpumask.h>
#include <asm/ptrace.h>
+#include <asm/mshyperv.h>
#include <asm/hyperv-tlfs.h>

struct ms_hyperv_info {
@@ -52,7 +53,7 @@ extern struct ms_hyperv_info ms_hyperv;

extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
-
+extern bool hv_isolation_type_snp(void);

/* Generate the guest OS identifier as described in the Hyper-V TLFS */
static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version,
@@ -100,7 +101,11 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
* possibly deliver another msg from the
* hypervisor
*/
- hv_signal_eom();
+ if (hv_isolation_type_snp() &&
+ old_msg_type != HVMSG_TIMER_EXPIRED)
+ hv_signal_eom_ghcb();
+ else
+ hv_signal_eom();
}
}

@@ -186,6 +191,7 @@ bool hv_is_hyperv_initialized(void);
bool hv_is_hibernation_supported(void);
enum hv_isolation_type hv_get_isolation_type(void);
bool hv_is_isolation_supported(void);
+bool hv_isolation_type_snp(void);
void hyperv_cleanup(void);
#else /* CONFIG_HYPERV */
static inline bool hv_is_hyperv_initialized(void) { return false; }
--
2.25.1

2021-02-28 15:12:08

by Tianyu Lan

[permalink] [raw]
Subject: [RFC PATCH 5/12] HV: Add ghcb hvcall support for SNP VM

From: Tianyu Lan <[email protected]>

Hyper-V provides ghcb hvcall to handle VMBus
HVCALL_SIGNAL_EVENT and HVCALL_POST_MESSAGE
msg in SNP Isolation VM. Add such support.

Signed-off-by: Tianyu Lan <[email protected]>
---
arch/x86/hyperv/ivm.c | 69 +++++++++++++++++++++++++++++++++
arch/x86/include/asm/mshyperv.h | 1 +
drivers/hv/connection.c | 6 ++-
drivers/hv/hv.c | 8 +++-
4 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 4332bf7aaf9b..feaabcd151f5 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -14,8 +14,77 @@

union hv_ghcb {
struct ghcb ghcb;
+ struct {
+ u64 hypercalldata[509];
+ u64 outputgpa;
+ union {
+ union {
+ struct {
+ u32 callcode : 16;
+ u32 isfast : 1;
+ u32 reserved1 : 14;
+ u32 isnested : 1;
+ u32 countofelements : 12;
+ u32 reserved2 : 4;
+ u32 repstartindex : 12;
+ u32 reserved3 : 4;
+ };
+ u64 asuint64;
+ } hypercallinput;
+ union {
+ struct {
+ u16 callstatus;
+ u16 reserved1;
+ u32 elementsprocessed : 12;
+ u32 reserved2 : 20;
+ };
+ u64 asunit64;
+ } hypercalloutput;
+ };
+ u64 reserved2;
+ } hypercall;
} __packed __aligned(PAGE_SIZE);

+u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
+{
+ union hv_ghcb *hv_ghcb;
+ void **ghcb_base;
+ unsigned long flags;
+
+ if (!ms_hyperv.ghcb_base)
+ return -EFAULT;
+
+ local_irq_save(flags);
+ ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
+ hv_ghcb = (union hv_ghcb *)*ghcb_base;
+ if (!hv_ghcb) {
+ local_irq_restore(flags);
+ return -EFAULT;
+ }
+
+ memset(hv_ghcb, 0x00, HV_HYP_PAGE_SIZE);
+ hv_ghcb->ghcb.protocol_version = 1;
+ hv_ghcb->ghcb.ghcb_usage = 1;
+
+ hv_ghcb->hypercall.outputgpa = (u64)output;
+ hv_ghcb->hypercall.hypercallinput.asuint64 = 0;
+ hv_ghcb->hypercall.hypercallinput.callcode = control;
+
+ if (input_size)
+ memcpy(hv_ghcb->hypercall.hypercalldata, input, input_size);
+
+ VMGEXIT();
+
+ hv_ghcb->ghcb.ghcb_usage = 0xffffffff;
+ memset(hv_ghcb->ghcb.save.valid_bitmap, 0,
+ sizeof(hv_ghcb->ghcb.save.valid_bitmap));
+
+ local_irq_restore(flags);
+
+ return hv_ghcb->hypercall.hypercalloutput.callstatus;
+}
+EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);
+
void hv_ghcb_msr_write(u64 msr, u64 value)
{
union hv_ghcb *hv_ghcb;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index f624d72b99d3..c8f66d269e5b 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -274,6 +274,7 @@ void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value);
void hv_signal_eom_ghcb(void);
void hv_ghcb_msr_write(u64 msr, u64 value);
void hv_ghcb_msr_read(u64 msr, u64 *value);
+u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size);

#define hv_get_synint_state_ghcb(int_num, val) \
hv_sint_rdmsrl_ghcb(HV_X64_MSR_SINT0 + int_num, val)
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index c83612cddb99..79bca653dce9 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -442,6 +442,10 @@ void vmbus_set_event(struct vmbus_channel *channel)

++channel->sig_events;

- hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event);
+ if (hv_isolation_type_snp())
+ hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event,
+ NULL, sizeof(u64));
+ else
+ hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event);
}
EXPORT_SYMBOL_GPL(vmbus_set_event);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 28e28ccc2081..6c64a7fd1ebd 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -60,7 +60,13 @@ int hv_post_message(union hv_connection_id connection_id,
aligned_msg->payload_size = payload_size;
memcpy((void *)aligned_msg->payload, payload, payload_size);

- status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL);
+ if (hv_isolation_type_snp())
+ status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE,
+ (void *)aligned_msg, NULL,
+ sizeof(struct hv_input_post_message));
+ else
+ status = hv_do_hypercall(HVCALL_POST_MESSAGE,
+ aligned_msg, NULL);

/* Preemption must remain disabled until after the hypercall
* so some other thread can't get scheduled onto this cpu and
--
2.25.1

2021-02-28 15:12:09

by Tianyu Lan

[permalink] [raw]
Subject: [RFC PATCH 7/12] hv/vmbus: Initialize VMbus ring buffer for Isolation VM

From: Tianyu Lan <[email protected]>

VMbus ring buffer are shared with host and it's need to
be accessed via extra address space of Isolation VM with
SNP support. This patch is to map the ring buffer
address in extra address space via ioremap(). HV host
visibility hvcall smears data in the ring buffer and
so reset the ring buffer memory to zero after calling
visibility hvcall.

Signed-off-by: Sunil Muthuswamy <[email protected]>
Co-Developed-by: Sunil Muthuswamy <[email protected]>
Signed-off-by: Tianyu Lan <[email protected]>
---
drivers/hv/channel.c | 10 +++++
drivers/hv/hyperv_vmbus.h | 2 +
drivers/hv/ring_buffer.c | 83 +++++++++++++++++++++++++++++----------
mm/ioremap.c | 1 +
mm/vmalloc.c | 1 +
5 files changed, 76 insertions(+), 21 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index f31b669a1ddf..4c05b1488649 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -657,6 +657,16 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
if (err)
goto error_clean_ring;

+ err = hv_ringbuffer_post_init(&newchannel->outbound,
+ page, send_pages);
+ if (err)
+ goto error_free_gpadl;
+
+ err = hv_ringbuffer_post_init(&newchannel->inbound,
+ &page[send_pages], recv_pages);
+ if (err)
+ goto error_free_gpadl;
+
/* Create and init the channel open message */
open_info = kzalloc(sizeof(*open_info) +
sizeof(struct vmbus_channel_open_channel),
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 0778add21a9c..d78a04ad5490 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -172,6 +172,8 @@ extern int hv_synic_cleanup(unsigned int cpu);
/* Interface */

void hv_ringbuffer_pre_init(struct vmbus_channel *channel);
+int hv_ringbuffer_post_init(struct hv_ring_buffer_info *ring_info,
+ struct page *pages, u32 page_cnt);

int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
struct page *pages, u32 pagecnt);
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 35833d4d1a1d..c8b0f7b45158 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -17,6 +17,8 @@
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/prefetch.h>
+#include <linux/io.h>
+#include <asm/mshyperv.h>

#include "hyperv_vmbus.h"

@@ -188,6 +190,44 @@ void hv_ringbuffer_pre_init(struct vmbus_channel *channel)
mutex_init(&channel->outbound.ring_buffer_mutex);
}

+int hv_ringbuffer_post_init(struct hv_ring_buffer_info *ring_info,
+ struct page *pages, u32 page_cnt)
+{
+ struct vm_struct *area;
+ u64 physic_addr = page_to_pfn(pages) << PAGE_SHIFT;
+ unsigned long vaddr;
+ int err = 0;
+
+ if (!hv_isolation_type_snp())
+ return 0;
+
+ physic_addr += ms_hyperv.shared_gpa_boundary;
+ area = get_vm_area((2 * page_cnt - 1) * PAGE_SIZE, VM_IOREMAP);
+ if (!area || !area->addr)
+ return -EFAULT;
+
+ vaddr = (unsigned long)area->addr;
+ err = ioremap_page_range(vaddr, vaddr + page_cnt * PAGE_SIZE,
+ physic_addr, PAGE_KERNEL_IO);
+ err |= ioremap_page_range(vaddr + page_cnt * PAGE_SIZE,
+ vaddr + (2 * page_cnt - 1) * PAGE_SIZE,
+ physic_addr + PAGE_SIZE, PAGE_KERNEL_IO);
+ if (err) {
+ vunmap((void *)vaddr);
+ return -EFAULT;
+ }
+
+ /* Clean memory after setting host visibility. */
+ memset((void *)vaddr, 0x00, page_cnt * PAGE_SIZE);
+
+ ring_info->ring_buffer = (struct hv_ring_buffer *)vaddr;
+ ring_info->ring_buffer->read_index = 0;
+ ring_info->ring_buffer->write_index = 0;
+ ring_info->ring_buffer->feature_bits.value = 1;
+
+ return 0;
+}
+
/* Initialize the ring buffer. */
int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
struct page *pages, u32 page_cnt)
@@ -197,33 +237,34 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,

BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));

- /*
- * First page holds struct hv_ring_buffer, do wraparound mapping for
- * the rest.
- */
- pages_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(struct page *),
- GFP_KERNEL);
- if (!pages_wraparound)
- return -ENOMEM;
-
- pages_wraparound[0] = pages;
- for (i = 0; i < 2 * (page_cnt - 1); i++)
- pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1];
+ if (!hv_isolation_type_snp()) {
+ /*
+ * First page holds struct hv_ring_buffer, do wraparound mapping for
+ * the rest.
+ */
+ pages_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(struct page *),
+ GFP_KERNEL);
+ if (!pages_wraparound)
+ return -ENOMEM;

- ring_info->ring_buffer = (struct hv_ring_buffer *)
- vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL);
+ pages_wraparound[0] = pages;
+ for (i = 0; i < 2 * (page_cnt - 1); i++)
+ pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1];

- kfree(pages_wraparound);
+ ring_info->ring_buffer = (struct hv_ring_buffer *)
+ vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL);

+ kfree(pages_wraparound);

- if (!ring_info->ring_buffer)
- return -ENOMEM;
+ if (!ring_info->ring_buffer)
+ return -ENOMEM;

- ring_info->ring_buffer->read_index =
- ring_info->ring_buffer->write_index = 0;
+ ring_info->ring_buffer->read_index =
+ ring_info->ring_buffer->write_index = 0;

- /* Set the feature bit for enabling flow control. */
- ring_info->ring_buffer->feature_bits.value = 1;
+ /* Set the feature bit for enabling flow control. */
+ ring_info->ring_buffer->feature_bits.value = 1;
+ }

ring_info->ring_size = page_cnt << PAGE_SHIFT;
ring_info->ring_size_div10_reciprocal =
diff --git a/mm/ioremap.c b/mm/ioremap.c
index 5fa1ab41d152..d63c4ba067f9 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -248,6 +248,7 @@ int ioremap_page_range(unsigned long addr,

return err;
}
+EXPORT_SYMBOL_GPL(ioremap_page_range);

#ifdef CONFIG_GENERIC_IOREMAP
void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index e6f352bf0498..19724a8ebcb7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2131,6 +2131,7 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
NUMA_NO_NODE, GFP_KERNEL,
__builtin_return_address(0));
}
+EXPORT_SYMBOL_GPL(get_vm_area);

struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
const void *caller)
--
2.25.1

2021-02-28 15:12:25

by Tianyu Lan

[permalink] [raw]
Subject: [RFC PATCH 8/12] x86/Hyper-V: Initialize bounce buffer page cache and list

From: Tianyu Lan <[email protected]>

Initialize/free bounce buffer resource when add/delete
vmbus channel in Isolation VM.

Signed-off-by: Sunil Muthuswamy <[email protected]>
Co-Developed-by: Sunil Muthuswamy <[email protected]>
Signed-off-by: Tianyu Lan <[email protected]>
---
drivers/hv/Makefile | 2 +-
drivers/hv/channel_mgmt.c | 29 +++++++++++++++++----------
drivers/hv/hv_bounce.c | 42 +++++++++++++++++++++++++++++++++++++++
drivers/hv/hyperv_vmbus.h | 14 +++++++++++++
include/linux/hyperv.h | 22 ++++++++++++++++++++
5 files changed, 97 insertions(+), 12 deletions(-)
create mode 100644 drivers/hv/hv_bounce.c

diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
index 94daf8240c95..b0c20fed9153 100644
--- a/drivers/hv/Makefile
+++ b/drivers/hv/Makefile
@@ -8,6 +8,6 @@ CFLAGS_hv_balloon.o = -I$(src)

hv_vmbus-y := vmbus_drv.o \
hv.o connection.o channel.o \
- channel_mgmt.o ring_buffer.o hv_trace.o
+ channel_mgmt.o ring_buffer.o hv_trace.o hv_bounce.o
hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o
hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_fcopy.o hv_utils_transport.o
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index f0ed730e2e4e..e2846cacfd70 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -336,6 +336,18 @@ bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,

EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);

+/*
+ * free_channel - Release the resources used by the vmbus channel object
+ */
+static void free_channel(struct vmbus_channel *channel)
+{
+ tasklet_kill(&channel->callback_event);
+ vmbus_remove_channel_attr_group(channel);
+
+ kobject_put(&channel->kobj);
+ hv_free_channel_ivm(channel);
+}
+
/*
* alloc_channel - Allocate and initialize a vmbus channel object
*/
@@ -360,17 +372,6 @@ static struct vmbus_channel *alloc_channel(void)
return channel;
}

-/*
- * free_channel - Release the resources used by the vmbus channel object
- */
-static void free_channel(struct vmbus_channel *channel)
-{
- tasklet_kill(&channel->callback_event);
- vmbus_remove_channel_attr_group(channel);
-
- kobject_put(&channel->kobj);
-}
-
void vmbus_channel_map_relid(struct vmbus_channel *channel)
{
if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
@@ -510,6 +511,8 @@ static void vmbus_add_channel_work(struct work_struct *work)
if (vmbus_add_channel_kobj(dev, newchannel))
goto err_deq_chan;

+ hv_init_channel_ivm(newchannel);
+
if (primary_channel->sc_creation_callback != NULL)
primary_channel->sc_creation_callback(newchannel);

@@ -543,6 +546,10 @@ static void vmbus_add_channel_work(struct work_struct *work)
}

newchannel->probe_done = true;
+
+ if (hv_init_channel_ivm(newchannel))
+ goto err_deq_chan;
+
return;

err_deq_chan:
diff --git a/drivers/hv/hv_bounce.c b/drivers/hv/hv_bounce.c
new file mode 100644
index 000000000000..c5898325b238
--- /dev/null
+++ b/drivers/hv/hv_bounce.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Bounce buffer code for Hyper-V Isolation VM support.
+ *
+ * Authors:
+ * Sunil Muthuswamy <[email protected]>
+ * Tianyu Lan <[email protected]>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "hyperv_vmbus.h"
+
+int hv_init_channel_ivm(struct vmbus_channel *channel)
+{
+ if (!hv_is_isolation_supported())
+ return 0;
+
+ INIT_LIST_HEAD(&channel->bounce_page_free_head);
+ INIT_LIST_HEAD(&channel->bounce_pkt_free_list_head);
+
+ channel->bounce_pkt_cache = KMEM_CACHE(hv_bounce_pkt, 0);
+ if (unlikely(!channel->bounce_pkt_cache))
+ return -ENOMEM;
+ channel->bounce_page_cache = KMEM_CACHE(hv_bounce_page_list, 0);
+ if (unlikely(!channel->bounce_page_cache))
+ return -ENOMEM;
+
+ return 0;
+}
+
+void hv_free_channel_ivm(struct vmbus_channel *channel)
+{
+ if (!hv_is_isolation_supported())
+ return;
+
+
+ cancel_delayed_work_sync(&channel->bounce_page_list_maintain);
+ hv_bounce_pkt_list_free(channel, &channel->bounce_pkt_free_list_head);
+ hv_bounce_page_list_free(channel, &channel->bounce_page_free_head);
+ kmem_cache_destroy(channel->bounce_pkt_cache);
+ kmem_cache_destroy(channel->bounce_page_cache);
+}
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index d78a04ad5490..7edf2be60d2c 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -19,6 +19,7 @@
#include <linux/hyperv.h>
#include <linux/interrupt.h>

+#include <asm/mshyperv.h>
#include "hv_trace.h"

/*
@@ -56,6 +57,19 @@ union hv_monitor_trigger_state {
};
};

+/*
+ * All vmbus channels initially start with zero bounce pages and are required
+ * to set any non-zero size, if needed.
+ */
+#define HV_DEFAULT_BOUNCE_BUFFER_PAGES 0
+
+/* MIN should be a power of 2 */
+#define HV_MIN_BOUNCE_BUFFER_PAGES 64
+
+extern int hv_init_channel_ivm(struct vmbus_channel *channel);
+
+extern void hv_free_channel_ivm(struct vmbus_channel *channel);
+
/* struct hv_monitor_page Layout */
/* ------------------------------------------------------ */
/* | 0 | TriggerState (4 bytes) | Rsvd1 (4 bytes) | */
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 41cbaa2db567..d518aba17565 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -25,6 +25,9 @@
#include <linux/interrupt.h>
#include <linux/reciprocal_div.h>
#include <asm/hyperv-tlfs.h>
+#include <linux/slab.h>
+#include <linux/mempool.h>
+#include <linux/mempool.h>

#define MAX_PAGE_BUFFER_COUNT 32
#define MAX_MULTIPAGE_BUFFER_COUNT 32 /* 128K */
@@ -1007,9 +1010,28 @@ struct vmbus_channel {
u32 fuzz_testing_interrupt_delay;
u32 fuzz_testing_message_delay;

+
/* request/transaction ids for VMBus */
struct vmbus_requestor requestor;
u32 rqstor_size;
+ /*
+ * Minimum number of bounce resources (i.e bounce packets & pages) that
+ * should be allocated and reserved for this channel. Allocation is
+ * permitted to go beyond this limit, and the maintenance task takes
+ * care of releasing the extra allocated resources.
+ */
+ u32 min_bounce_resource_count;
+
+ /* The free list of bounce pages is LRU sorted based on last used */
+ struct list_head bounce_page_free_head;
+ u32 bounce_page_alloc_count;
+ struct delayed_work bounce_page_list_maintain;
+
+ struct kmem_cache *bounce_page_cache;
+ struct kmem_cache *bounce_pkt_cache;
+ struct list_head bounce_pkt_free_list_head;
+ u32 bounce_pkt_free_count;
+ spinlock_t bp_lock;
};

u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr);
--
2.25.1

2021-02-28 15:12:34

by Tianyu Lan

[permalink] [raw]
Subject: [RFC PATCH 11/12] HV/Netvsc: Add Isolation VM support for netvsc driver

From: Tianyu Lan <[email protected]>

Add Isolation VM support for netvsc driver. Map send/receive
ring buffer in extra address space in SNP isolation VM, reserve
bounce buffer for packets sent via vmbus_sendpacket_pagebuffer()
and release bounce buffer via hv_pkt_bounce() when get send
complete response from host.

Signed-off-by: Sunil Muthuswamy <[email protected]>
Co-Developed-by: Sunil Muthuswamy <[email protected]>
Signed-off-by: Tianyu Lan <[email protected]>
---
drivers/net/hyperv/hyperv_net.h | 3 +
drivers/net/hyperv/netvsc.c | 97 ++++++++++++++++++++++++++++++---
2 files changed, 92 insertions(+), 8 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 11266b92bcf0..45d5838ff128 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -1027,14 +1027,17 @@ struct netvsc_device {

/* Receive buffer allocated by us but manages by NetVSP */
void *recv_buf;
+ void *recv_original_buf;
u32 recv_buf_size; /* allocated bytes */
u32 recv_buf_gpadl_handle;
u32 recv_section_cnt;
u32 recv_section_size;
u32 recv_completion_cnt;

+
/* Send buffer allocated by us */
void *send_buf;
+ void *send_original_buf;
u32 send_buf_size;
u32 send_buf_gpadl_handle;
u32 send_section_cnt;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 77657c5acc65..171af85e055d 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -26,7 +26,7 @@

#include "hyperv_net.h"
#include "netvsc_trace.h"
-
+#include "../../hv/hyperv_vmbus.h"
/*
* Switch the data path from the synthetic interface to the VF
* interface.
@@ -119,8 +119,21 @@ static void free_netvsc_device(struct rcu_head *head)
int i;

kfree(nvdev->extension);
- vfree(nvdev->recv_buf);
- vfree(nvdev->send_buf);
+
+ if (nvdev->recv_original_buf) {
+ iounmap(nvdev->recv_buf);
+ vfree(nvdev->recv_original_buf);
+ } else {
+ vfree(nvdev->recv_buf);
+ }
+
+ if (nvdev->send_original_buf) {
+ iounmap(nvdev->send_buf);
+ vfree(nvdev->send_original_buf);
+ } else {
+ vfree(nvdev->send_buf);
+ }
+
kfree(nvdev->send_section_map);

for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
@@ -241,13 +254,18 @@ static void netvsc_teardown_recv_gpadl(struct hv_device *device,
struct netvsc_device *net_device,
struct net_device *ndev)
{
+ void *recv_buf;
int ret;

if (net_device->recv_buf_gpadl_handle) {
+ if (net_device->recv_original_buf)
+ recv_buf = net_device->recv_original_buf;
+ else
+ recv_buf = net_device->recv_buf;
+
ret = vmbus_teardown_gpadl(device->channel,
net_device->recv_buf_gpadl_handle,
- net_device->recv_buf,
- net_device->recv_buf_size);
+ recv_buf, net_device->recv_buf_size);

/* If we failed here, we might as well return and have a leak
* rather than continue and a bugchk
@@ -265,13 +283,18 @@ static void netvsc_teardown_send_gpadl(struct hv_device *device,
struct netvsc_device *net_device,
struct net_device *ndev)
{
+ void *send_buf;
int ret;

if (net_device->send_buf_gpadl_handle) {
+ if (net_device->send_original_buf)
+ send_buf = net_device->send_original_buf;
+ else
+ send_buf = net_device->send_buf;
+
ret = vmbus_teardown_gpadl(device->channel,
net_device->send_buf_gpadl_handle,
- net_device->send_buf,
- net_device->send_buf_size);
+ send_buf, net_device->send_buf_size);

/* If we failed here, we might as well return and have a leak
* rather than continue and a bugchk
@@ -306,9 +329,19 @@ static int netvsc_init_buf(struct hv_device *device,
struct nvsp_1_message_send_receive_buffer_complete *resp;
struct net_device *ndev = hv_get_drvdata(device);
struct nvsp_message *init_packet;
+ struct vm_struct *area;
+ u64 extra_phys;
unsigned int buf_size;
+ unsigned long vaddr;
size_t map_words;
- int ret = 0;
+ int ret = 0, i;
+
+ ret = hv_bounce_resources_reserve(device->channel,
+ PAGE_SIZE * 1024);
+ if (ret) {
+ pr_warn("Fail to reserve bounce buffer.\n");
+ return -ENOMEM;
+ }

/* Get receive buffer area. */
buf_size = device_info->recv_sections * device_info->recv_section_size;
@@ -345,6 +378,28 @@ static int netvsc_init_buf(struct hv_device *device,
goto cleanup;
}

+ if (hv_isolation_type_snp()) {
+ area = get_vm_area(buf_size, VM_IOREMAP);
+ if (!area)
+ goto cleanup;
+
+ vaddr = (unsigned long)area->addr;
+ for (i = 0; i < buf_size / HV_HYP_PAGE_SIZE; i++) {
+ extra_phys = (virt_to_hvpfn(net_device->recv_buf + i * HV_HYP_PAGE_SIZE)
+ << HV_HYP_PAGE_SHIFT) + ms_hyperv.shared_gpa_boundary;
+ ret |= ioremap_page_range(vaddr + i * HV_HYP_PAGE_SIZE,
+ vaddr + (i + 1) * HV_HYP_PAGE_SIZE,
+ extra_phys, PAGE_KERNEL_IO);
+ }
+
+ if (ret)
+ goto cleanup;
+
+ net_device->recv_original_buf = net_device->recv_buf;
+ net_device->recv_buf = (void*)vaddr;
+ }
+
+
/* Notify the NetVsp of the gpadl handle */
init_packet = &net_device->channel_init_pkt;
memset(init_packet, 0, sizeof(struct nvsp_message));
@@ -435,12 +490,36 @@ static int netvsc_init_buf(struct hv_device *device,
buf_size,
&net_device->send_buf_gpadl_handle,
VMBUS_PAGE_VISIBLE_READ_WRITE);
+
if (ret != 0) {
netdev_err(ndev,
"unable to establish send buffer's gpadl\n");
goto cleanup;
}

+ if (hv_isolation_type_snp()) {
+ area = get_vm_area(buf_size , VM_IOREMAP);
+ if (!area)
+ goto cleanup;
+
+ vaddr = (unsigned long)area->addr;
+
+ for (i = 0; i < buf_size / HV_HYP_PAGE_SIZE; i++) {
+ extra_phys = (virt_to_hvpfn(net_device->send_buf + i * HV_HYP_PAGE_SIZE)
+ << HV_HYP_PAGE_SHIFT) + ms_hyperv.shared_gpa_boundary;
+ ret |= ioremap_page_range(vaddr + i * HV_HYP_PAGE_SIZE,
+ vaddr + (i + 1) * HV_HYP_PAGE_SIZE,
+ extra_phys, PAGE_KERNEL_IO);
+ }
+
+ if (ret)
+ goto cleanup;
+
+ net_device->send_original_buf = net_device->send_buf;
+ net_device->send_buf = (void*)vaddr;
+ }
+
+
/* Notify the NetVsp of the gpadl handle */
init_packet = &net_device->channel_init_pkt;
memset(init_packet, 0, sizeof(struct nvsp_message));
@@ -747,6 +826,8 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
tx_stats->bytes += packet->total_bytes;
u64_stats_update_end(&tx_stats->syncp);

+ if (desc->type == VM_PKT_COMP && packet->bounce_pkt)
+ hv_pkt_bounce(channel, packet->bounce_pkt);
napi_consume_skb(skb, budget);
}

--
2.25.1

2021-02-28 15:12:45

by Tianyu Lan

[permalink] [raw]
Subject: [RFC PATCH 12/12] HV/Storvsc: Add bounce buffer support for Storvsc

From: Tianyu Lan <[email protected]>

Storvsc driver needs to reverse additional bounce
buffers to receive multipagebuffer packet and copy
data from brounce buffer when get response messge
from message.

Signed-off-by: Sunil Muthuswamy <[email protected]>
Co-Developed-by: Sunil Muthuswamy <[email protected]>
Signed-off-by: Tianyu Lan <[email protected]>
---
drivers/scsi/storvsc_drv.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index c5b4974eb41f..4ae8e2a427e4 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -33,6 +33,8 @@
#include <scsi/scsi_transport.h>
#include <asm/mshyperv.h>

+#include "../hv/hyperv_vmbus.h"
+
/*
* All wire protocol details (storage protocol between the guest and the host)
* are consolidated here.
@@ -725,6 +727,10 @@ static void handle_sc_creation(struct vmbus_channel *new_sc)
/* Add the sub-channel to the array of available channels. */
stor_device->stor_chns[new_sc->target_cpu] = new_sc;
cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
+
+ if (hv_bounce_resources_reserve(device->channel,
+ stor_device->max_transfer_bytes))
+ pr_warn("Fail to reserve bounce buffer\n");
}

static void handle_multichannel_storage(struct hv_device *device, int max_chns)
@@ -964,6 +970,18 @@ static int storvsc_channel_init(struct hv_device *device, bool is_fc)
stor_device->max_transfer_bytes =
vstor_packet->storage_channel_properties.max_transfer_bytes;

+ /*
+ * Reserve enough bounce resources to be able to support paging
+ * operations under low memory conditions, that cannot rely on
+ * additional resources to be allocated.
+ */
+ ret = hv_bounce_resources_reserve(device->channel,
+ stor_device->max_transfer_bytes);
+ if (ret < 0) {
+ pr_warn("Fail to reserve bounce buffer\n");
+ goto done;
+ }
+
if (!is_fc)
goto done;

@@ -1263,6 +1281,11 @@ static void storvsc_on_channel_callback(void *context)

request = (struct storvsc_cmd_request *)(unsigned long)cmd_rqst;

+ if (desc->type == VM_PKT_COMP && request->bounce_pkt) {
+ hv_pkt_bounce(channel, request->bounce_pkt);
+ request->bounce_pkt = NULL;
+ }
+
if (request == &stor_device->init_request ||
request == &stor_device->reset_request) {
memcpy(&request->vstor_packet, packet,
--
2.25.1

2021-02-28 15:21:53

by Tianyu Lan

[permalink] [raw]
Subject: [RFC PATCH 9/12] x86/Hyper-V: Add new parameter for vmbus_sendpacket_pagebuffer()/mpb_desc()

From: Tianyu Lan <[email protected]>

Add new parameter io_type and struct bounce_pkt for vmbus_sendpacket_pagebuffer()
and vmbus_sendpacket_mpb_desc() in order to add bounce buffer support
later.

Signed-off-by: Sunil Muthuswamy <[email protected]>
Co-Developed-by: Sunil Muthuswamy <[email protected]>
Signed-off-by: Tianyu Lan <[email protected]>
---
drivers/hv/channel.c | 7 +++++--
drivers/hv/hyperv_vmbus.h | 12 ++++++++++++
drivers/net/hyperv/hyperv_net.h | 1 +
drivers/net/hyperv/netvsc.c | 5 ++++-
drivers/scsi/storvsc_drv.c | 23 +++++++++++++++++------
include/linux/hyperv.h | 16 ++++++++++++++--
6 files changed, 53 insertions(+), 11 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 4c05b1488649..976ef99dda28 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -1044,7 +1044,8 @@ EXPORT_SYMBOL(vmbus_sendpacket);
int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
struct hv_page_buffer pagebuffers[],
u32 pagecount, void *buffer, u32 bufferlen,
- u64 requestid)
+ u64 requestid, u8 io_type,
+ struct hv_bounce_pkt **bounce_pkt)
{
int i;
struct vmbus_channel_packet_page_buffer desc;
@@ -1101,7 +1102,9 @@ EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer);
int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
struct vmbus_packet_mpb_array *desc,
u32 desc_size,
- void *buffer, u32 bufferlen, u64 requestid)
+ void *buffer, u32 bufferlen, u64 requestid,
+ u32 pfn_count, u8 io_type,
+ struct hv_bounce_pkt **bounce_pkt)
{
u32 packetlen;
u32 packetlen_aligned;
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 7edf2be60d2c..7677f083d33a 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -57,6 +57,18 @@ union hv_monitor_trigger_state {
};
};

+/*
+ * Hyper-V bounce packet. Each in-use bounce packet is mapped to a vmbus
+ * transaction and contains a list of bounce pages for that transaction.
+ */
+struct hv_bounce_pkt {
+ /* Link to the next bounce packet, when it is in the free list */
+ struct list_head link;
+ struct list_head bounce_page_head;
+ u32 flags;
+};
+
+
/*
* All vmbus channels initially start with zero bounce pages and are required
* to set any non-zero size, if needed.
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index b3a43c4ec8ab..11266b92bcf0 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -130,6 +130,7 @@ struct hv_netvsc_packet {
u32 total_bytes;
u32 send_buf_index;
u32 total_data_buflen;
+ struct hv_bounce_pkt *bounce_pkt;
};

#define NETVSC_HASH_KEYLEN 40
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 08d73401bb28..77657c5acc65 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -926,14 +926,17 @@ static inline int netvsc_send_pkt(

trace_nvsp_send_pkt(ndev, out_channel, rpkt);

+ packet->bounce_pkt = NULL;
if (packet->page_buf_cnt) {
if (packet->cp_partial)
pb += packet->rmsg_pgcnt;

+ /* The I/O type is always 'write' for netvsc */
ret = vmbus_sendpacket_pagebuffer(out_channel,
pb, packet->page_buf_cnt,
&nvmsg, sizeof(nvmsg),
- req_id);
+ req_id, IO_TYPE_WRITE,
+ &packet->bounce_pkt);
} else {
ret = vmbus_sendpacket(out_channel,
&nvmsg, sizeof(nvmsg),
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 2e4fa77445fd..c5b4974eb41f 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -31,6 +31,7 @@
#include <scsi/scsi_dbg.h>
#include <scsi/scsi_transport_fc.h>
#include <scsi/scsi_transport.h>
+#include <asm/mshyperv.h>

/*
* All wire protocol details (storage protocol between the guest and the host)
@@ -427,6 +428,7 @@ struct storvsc_cmd_request {
u32 payload_sz;

struct vstor_packet vstor_packet;
+ struct hv_bounce_pkt *bounce_pkt;
};


@@ -1390,7 +1392,8 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,


static int storvsc_do_io(struct hv_device *device,
- struct storvsc_cmd_request *request, u16 q_num)
+ struct storvsc_cmd_request *request, u16 q_num,
+ u32 pfn_count)
{
struct storvsc_device *stor_device;
struct vstor_packet *vstor_packet;
@@ -1493,14 +1496,18 @@ static int storvsc_do_io(struct hv_device *device,

vstor_packet->operation = VSTOR_OPERATION_EXECUTE_SRB;

+ request->bounce_pkt = NULL;
if (request->payload->range.len) {
+ struct vmscsi_request *vm_srb = &request->vstor_packet.vm_srb;

ret = vmbus_sendpacket_mpb_desc(outgoing_channel,
request->payload, request->payload_sz,
vstor_packet,
(sizeof(struct vstor_packet) -
vmscsi_size_delta),
- (unsigned long)request);
+ (unsigned long)request,
+ pfn_count,
+ vm_srb->data_in, &request->bounce_pkt);
} else {
ret = vmbus_sendpacket(outgoing_channel, vstor_packet,
(sizeof(struct vstor_packet) -
@@ -1510,8 +1517,10 @@ static int storvsc_do_io(struct hv_device *device,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
}

- if (ret != 0)
+ if (ret != 0) {
+ request->bounce_pkt = NULL;
return ret;
+ }

atomic_inc(&stor_device->num_outstanding_req);

@@ -1825,14 +1834,16 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
cmd_request->payload_sz = payload_sz;

/* Invokes the vsc to start an IO */
- ret = storvsc_do_io(dev, cmd_request, get_cpu());
+ ret = storvsc_do_io(dev, cmd_request, get_cpu(), sg_count);
put_cpu();

- if (ret == -EAGAIN) {
+ if (ret) {
if (payload_sz > sizeof(cmd_request->mpb))
kfree(payload);
/* no more space */
- return SCSI_MLQUEUE_DEVICE_BUSY;
+ if (ret == -EAGAIN || ret == -ENOSPC)
+ return SCSI_MLQUEUE_DEVICE_BUSY;
+ return ret;
}

return 0;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index d518aba17565..d1a936091665 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1184,19 +1184,31 @@ extern int vmbus_sendpacket(struct vmbus_channel *channel,
enum vmbus_packet_type type,
u32 flags);

+#define IO_TYPE_WRITE 0
+#define IO_TYPE_READ 1
+#define IO_TYPE_UNKNOWN 2
+
+struct hv_bounce_pkt;
+
extern int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel,
struct hv_page_buffer pagebuffers[],
u32 pagecount,
void *buffer,
u32 bufferlen,
- u64 requestid);
+ u64 requestid,
+ u8 io_type,
+ struct hv_bounce_pkt **bounce_pkt);

extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
struct vmbus_packet_mpb_array *mpb,
u32 desc_size,
void *buffer,
u32 bufferlen,
- u64 requestid);
+ u64 requestid,
+ u32 pfn_count,
+ u8 io_type,
+ struct hv_bounce_pkt **bounce_pkt);
+

extern int vmbus_establish_gpadl(struct vmbus_channel *channel,
void *kbuffer,
--
2.25.1

2021-02-28 15:23:22

by Tianyu Lan

[permalink] [raw]
Subject: [RFC PATCH 6/12] HV/Vmbus: Add SNP support for VMbus channel initiate message

From: Tianyu Lan <[email protected]>

The physical address of monitor pages in the CHANNELMSG_INITIATE_CONTACT
msg should be in the extra address space for SNP support and these
pages also should be accessed via the extra address space inside Linux
guest and remap the extra address by ioremap function.

Signed-off-by: Tianyu Lan <[email protected]>
---
drivers/hv/connection.c | 62 +++++++++++++++++++++++++++++++++++++++
drivers/hv/hyperv_vmbus.h | 1 +
2 files changed, 63 insertions(+)

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 79bca653dce9..a0be9c11d737 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -101,6 +101,12 @@ int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)

msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]);
msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]);
+
+ if (hv_isolation_type_snp()) {
+ msg->monitor_page1 += ms_hyperv.shared_gpa_boundary;
+ msg->monitor_page2 += ms_hyperv.shared_gpa_boundary;
+ }
+
msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU);

/*
@@ -145,6 +151,29 @@ int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
return -ECONNREFUSED;
}

+ if (hv_isolation_type_snp()) {
+ vmbus_connection.monitor_pages_va[0]
+ = vmbus_connection.monitor_pages[0];
+ vmbus_connection.monitor_pages[0]
+ = ioremap_cache(msg->monitor_page1, HV_HYP_PAGE_SIZE);
+ if (!vmbus_connection.monitor_pages[0])
+ return -ENOMEM;
+
+ vmbus_connection.monitor_pages_va[1]
+ = vmbus_connection.monitor_pages[1];
+ vmbus_connection.monitor_pages[1]
+ = ioremap_cache(msg->monitor_page2, HV_HYP_PAGE_SIZE);
+ if (!vmbus_connection.monitor_pages[1]) {
+ vunmap(vmbus_connection.monitor_pages[0]);
+ return -ENOMEM;
+ }
+
+ memset(vmbus_connection.monitor_pages[0], 0x00,
+ HV_HYP_PAGE_SIZE);
+ memset(vmbus_connection.monitor_pages[1], 0x00,
+ HV_HYP_PAGE_SIZE);
+ }
+
return ret;
}

@@ -156,6 +185,7 @@ int vmbus_connect(void)
struct vmbus_channel_msginfo *msginfo = NULL;
int i, ret = 0;
__u32 version;
+ u64 pfn[2];

/* Initialize the vmbus connection */
vmbus_connection.conn_state = CONNECTING;
@@ -213,6 +243,16 @@ int vmbus_connect(void)
goto cleanup;
}

+ if (hv_isolation_type_snp()) {
+ pfn[0] = virt_to_hvpfn(vmbus_connection.monitor_pages[0]);
+ pfn[1] = virt_to_hvpfn(vmbus_connection.monitor_pages[1]);
+ if (hv_mark_gpa_visibility(2, pfn,
+ VMBUS_PAGE_VISIBLE_READ_WRITE)) {
+ ret = -EFAULT;
+ goto cleanup;
+ }
+ }
+
msginfo = kzalloc(sizeof(*msginfo) +
sizeof(struct vmbus_channel_initiate_contact),
GFP_KERNEL);
@@ -279,6 +319,8 @@ int vmbus_connect(void)

void vmbus_disconnect(void)
{
+ u64 pfn[2];
+
/*
* First send the unload request to the host.
*/
@@ -298,6 +340,26 @@ void vmbus_disconnect(void)
vmbus_connection.int_page = NULL;
}

+ if (hv_isolation_type_snp()) {
+ if (vmbus_connection.monitor_pages_va[0]) {
+ vunmap(vmbus_connection.monitor_pages[0]);
+ vmbus_connection.monitor_pages[0]
+ = vmbus_connection.monitor_pages_va[0];
+ vmbus_connection.monitor_pages_va[0] = NULL;
+ }
+
+ if (vmbus_connection.monitor_pages_va[1]) {
+ vunmap(vmbus_connection.monitor_pages[1]);
+ vmbus_connection.monitor_pages[1]
+ = vmbus_connection.monitor_pages_va[1];
+ vmbus_connection.monitor_pages_va[1] = NULL;
+ }
+
+ pfn[0] = virt_to_hvpfn(vmbus_connection.monitor_pages[0]);
+ pfn[1] = virt_to_hvpfn(vmbus_connection.monitor_pages[1]);
+ hv_mark_gpa_visibility(2, pfn, VMBUS_PAGE_NOT_VISIBLE);
+ }
+
hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[0]);
hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[1]);
vmbus_connection.monitor_pages[0] = NULL;
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 9416e09ebd58..0778add21a9c 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -240,6 +240,7 @@ struct vmbus_connection {
* is child->parent notification
*/
struct hv_monitor_page *monitor_pages[2];
+ void *monitor_pages_va[2];
struct list_head chn_msg_list;
spinlock_t channelmsg_lock;

--
2.25.1

2021-03-01 06:57:22

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [RFC PATCH 12/12] HV/Storvsc: Add bounce buffer support for Storvsc

This should be handled by the DMA mapping layer, just like for native
SEV support.

2021-03-03 03:36:23

by Tianyu Lan

[permalink] [raw]
Subject: Re: [RFC PATCH 12/12] HV/Storvsc: Add bounce buffer support for Storvsc

Hi Christoph:
Thanks a lot for your review. There are some reasons.
1) Vmbus drivers don't use DMA API now.
2) Hyper-V Vmbus channel ring buffer already play bounce buffer
role for most vmbus drivers. Just two kinds of packets from
netvsc/storvsc are uncovered.
3) In AMD SEV-SNP based Hyper-V guest, the access physical address
of shared memory should be bounce buffer memory physical address plus
with a shared memory boundary(e.g, 48bit) reported Hyper-V CPUID. It's
called virtual top of memory(vTom) in AMD spec and works as a watermark.
So it needs to ioremap/memremap the associated physical address above
the share memory boundary before accessing them. swiotlb_bounce() uses
low end physical address to access bounce buffer and this doesn't work
in this senario. If something wrong, please help me correct me.

Thanks.


On 3/1/2021 2:54 PM, Christoph Hellwig wrote:
> This should be handled by the DMA mapping layer, just like for native
> SEV support.
>

2021-03-04 04:58:57

by Sunil Muthuswamy

[permalink] [raw]
Subject: RE: [EXTERNAL] Re: [RFC PATCH 12/12] HV/Storvsc: Add bounce buffer support for Storvsc

> Hi Christoph:
> Thanks a lot for your review. There are some reasons.
> 1) Vmbus drivers don't use DMA API now.
What is blocking us from making the Hyper-V drivers use the DMA API's? They
will be a null-op generally, when there is no bounce buffer support needed.

> 2) Hyper-V Vmbus channel ring buffer already play bounce buffer
> role for most vmbus drivers. Just two kinds of packets from
> netvsc/storvsc are uncovered.
How does this make a difference here?

> 3) In AMD SEV-SNP based Hyper-V guest, the access physical address
> of shared memory should be bounce buffer memory physical address plus
> with a shared memory boundary(e.g, 48bit) reported Hyper-V CPUID. It's
> called virtual top of memory(vTom) in AMD spec and works as a watermark.
> So it needs to ioremap/memremap the associated physical address above
> the share memory boundary before accessing them. swiotlb_bounce() uses
> low end physical address to access bounce buffer and this doesn't work
> in this senario. If something wrong, please help me correct me.
>
There are alternative implementations of swiotlb on top of the core swiotlb
API's. One option is to have Hyper-V specific swiotlb wrapper DMA API's with
the custom logic above.

> Thanks.
>
>
> On 3/1/2021 2:54 PM, Christoph Hellwig wrote:
> > This should be handled by the DMA mapping layer, just like for native
> > SEV support.
I agree with Christoph's comment that in principle, this should be handled using
the DMA API's

2021-03-04 06:34:38

by Tianyu Lan

[permalink] [raw]
Subject: Re: [EXTERNAL] Re: [RFC PATCH 12/12] HV/Storvsc: Add bounce buffer support for Storvsc

Hi Sunil:
Thanks for your review.

On 3/2/2021 3:45 AM, Sunil Muthuswamy wrote:
>> Hi Christoph:
>> Thanks a lot for your review. There are some reasons.
>> 1) Vmbus drivers don't use DMA API now.
> What is blocking us from making the Hyper-V drivers use the DMA API's? They
> will be a null-op generally, when there is no bounce buffer support needed.
>
>> 2) Hyper-V Vmbus channel ring buffer already play bounce buffer
>> role for most vmbus drivers. Just two kinds of packets from
>> netvsc/storvsc are uncovered.
> How does this make a difference here?
>
>> 3) In AMD SEV-SNP based Hyper-V guest, the access physical address
>> of shared memory should be bounce buffer memory physical address plus
>> with a shared memory boundary(e.g, 48bit) reported Hyper-V CPUID. It's
>> called virtual top of memory(vTom) in AMD spec and works as a watermark.
>> So it needs to ioremap/memremap the associated physical address above
>> the share memory boundary before accessing them. swiotlb_bounce() uses
>> low end physical address to access bounce buffer and this doesn't work
>> in this senario. If something wrong, please help me correct me.
>>
> There are alternative implementations of swiotlb on top of the core swiotlb
> API's. One option is to have Hyper-V specific swiotlb wrapper DMA API's with
> the custom logic above.

Agree. Hyper-V should have its own DMA ops and put Hyper-V bounce buffer
code in DMA API callback. For vmbus channel ring buffer, it doesn't need
additional bounce buffer and there are two options. 1) Not call DMA API
around them 2) pass a flag in DMA API to notify Hyper-V DMA callback
and not allocate bounce buffer for them.

>
>> Thanks.
>>
>>
>> On 3/1/2021 2:54 PM, Christoph Hellwig wrote:
>>> This should be handled by the DMA mapping layer, just like for native
>>> SEV support.
> I agree with Christoph's comment that in principle, this should be handled using
> the DMA API's
>

2021-03-04 12:15:56

by Vitaly Kuznetsov

[permalink] [raw]
Subject: Re: [RFC PATCH 4/12] HV: Add Write/Read MSR registers via ghcb

Tianyu Lan <[email protected]> writes:

> From: Tianyu Lan <[email protected]>
>
> Hyper-V provides GHCB protocol to write Synthetic Interrupt
> Controller MSR registers and these registers are emulated by
> Hypervisor rather than paravisor.
>
> Hyper-V requests to write SINTx MSR registers twice(once via
> GHCB and once via wrmsr instruction including the proxy bit 21)
> Guest OS ID MSR also needs to be set via GHCB.
>
> Signed-off-by: Tianyu Lan <[email protected]>
> ---
> arch/x86/hyperv/Makefile | 2 +-
> arch/x86/hyperv/hv_init.c | 18 +--
> arch/x86/hyperv/ivm.c | 178 ++++++++++++++++++++++++++++++
> arch/x86/include/asm/mshyperv.h | 21 +++-
> arch/x86/kernel/cpu/mshyperv.c | 46 --------
> drivers/hv/channel.c | 2 +-
> drivers/hv/hv.c | 188 ++++++++++++++++++++++----------
> include/asm-generic/mshyperv.h | 10 +-
> 8 files changed, 343 insertions(+), 122 deletions(-)
> create mode 100644 arch/x86/hyperv/ivm.c
>
> diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
> index 48e2c51464e8..5d2de10809ae 100644
> --- a/arch/x86/hyperv/Makefile
> +++ b/arch/x86/hyperv/Makefile
> @@ -1,5 +1,5 @@
> # SPDX-License-Identifier: GPL-2.0-only
> -obj-y := hv_init.o mmu.o nested.o irqdomain.o
> +obj-y := hv_init.o mmu.o nested.o irqdomain.o ivm.o
> obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o
>
> ifdef CONFIG_X86_64
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 90e65fbf4c58..87b1dd9c84d6 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -475,6 +475,9 @@ void __init hyperv_init(void)
>
> ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
> *ghcb_base = ghcb_va;
> +
> + /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
> + hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
> }
>
> rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> @@ -561,6 +564,7 @@ void hyperv_cleanup(void)
>
> /* Reset our OS id */
> wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
> + hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
>
> /*
> * Reset hypercall page reference before reset the page,
> @@ -668,17 +672,3 @@ bool hv_is_hibernation_supported(void)
> return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4);
> }
> EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
> -
> -enum hv_isolation_type hv_get_isolation_type(void)
> -{
> - if (!(ms_hyperv.features_b & HV_ISOLATION))
> - return HV_ISOLATION_TYPE_NONE;
> - return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b);
> -}
> -EXPORT_SYMBOL_GPL(hv_get_isolation_type);
> -
> -bool hv_is_isolation_supported(void)
> -{
> - return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
> -}
> -EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> new file mode 100644
> index 000000000000..4332bf7aaf9b
> --- /dev/null
> +++ b/arch/x86/hyperv/ivm.c
> @@ -0,0 +1,178 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Hyper-V Isolation VM interface with paravisor and hypervisor
> + *
> + * Author:
> + * Tianyu Lan <[email protected]>
> + */
> +#include <linux/types.h>
> +#include <linux/bitfield.h>
> +#include <asm/io.h>
> +#include <asm/svm.h>
> +#include <asm/sev-es.h>
> +#include <asm/mshyperv.h>
> +
> +union hv_ghcb {
> + struct ghcb ghcb;
> +} __packed __aligned(PAGE_SIZE);
> +
> +void hv_ghcb_msr_write(u64 msr, u64 value)
> +{
> + union hv_ghcb *hv_ghcb;
> + void **ghcb_base;
> + unsigned long flags;
> +
> + if (!ms_hyperv.ghcb_base)
> + return;
> +
> + local_irq_save(flags);
> + ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
> + hv_ghcb = (union hv_ghcb *)*ghcb_base;
> + if (!hv_ghcb) {
> + local_irq_restore(flags);
> + return;
> + }
> +
> + memset(hv_ghcb, 0x00, HV_HYP_PAGE_SIZE);
> +
> + hv_ghcb->ghcb.protocol_version = 1;
> + hv_ghcb->ghcb.ghcb_usage = 0;
> +
> + ghcb_set_sw_exit_code(&hv_ghcb->ghcb, SVM_EXIT_MSR);
> + ghcb_set_rcx(&hv_ghcb->ghcb, msr);
> + ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value));
> + ghcb_set_rdx(&hv_ghcb->ghcb, value >> 32);
> + ghcb_set_sw_exit_info_1(&hv_ghcb->ghcb, 1);
> + ghcb_set_sw_exit_info_2(&hv_ghcb->ghcb, 0);
> +
> + VMGEXIT();
> +
> + if ((hv_ghcb->ghcb.save.sw_exit_info_1 & 0xffffffff) == 1)
> + pr_warn("Fail to write msr via ghcb.\n.");
> +
> + local_irq_restore(flags);
> +}
> +EXPORT_SYMBOL_GPL(hv_ghcb_msr_write);
> +
> +void hv_ghcb_msr_read(u64 msr, u64 *value)
> +{
> + union hv_ghcb *hv_ghcb;
> + void **ghcb_base;
> + unsigned long flags;
> +
> + if (!ms_hyperv.ghcb_base)
> + return;
> +
> + local_irq_save(flags);
> + ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
> + hv_ghcb = (union hv_ghcb *)*ghcb_base;
> + if (!hv_ghcb) {
> + local_irq_restore(flags);
> + return;
> + }
> +
> + memset(hv_ghcb, 0x00, PAGE_SIZE);
> + hv_ghcb->ghcb.protocol_version = 1;
> + hv_ghcb->ghcb.ghcb_usage = 0;
> +
> + ghcb_set_sw_exit_code(&hv_ghcb->ghcb, SVM_EXIT_MSR);
> + ghcb_set_rcx(&hv_ghcb->ghcb, msr);
> + ghcb_set_sw_exit_info_1(&hv_ghcb->ghcb, 0);
> + ghcb_set_sw_exit_info_2(&hv_ghcb->ghcb, 0);
> +
> + VMGEXIT();
> +
> + if ((hv_ghcb->ghcb.save.sw_exit_info_1 & 0xffffffff) == 1)
> + pr_warn("Fail to write msr via ghcb.\n.");
> + else
> + *value = (u64)lower_32_bits(hv_ghcb->ghcb.save.rax)
> + | ((u64)lower_32_bits(hv_ghcb->ghcb.save.rdx) << 32);
> + local_irq_restore(flags);
> +}
> +EXPORT_SYMBOL_GPL(hv_ghcb_msr_read);
> +
> +void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value)
> +{
> + hv_ghcb_msr_read(msr, value);
> +}
> +EXPORT_SYMBOL_GPL(hv_sint_rdmsrl_ghcb);
> +
> +void hv_sint_wrmsrl_ghcb(u64 msr, u64 value)
> +{
> + hv_ghcb_msr_write(msr, value);
> +
> + /* Write proxy bit vua wrmsrl instruction. */
> + if (msr >= HV_X64_MSR_SINT0 && msr <= HV_X64_MSR_SINT15)
> + wrmsrl(msr, value | 1 << 20);
> +}
> +EXPORT_SYMBOL_GPL(hv_sint_wrmsrl_ghcb);
> +
> +inline void hv_signal_eom_ghcb(void)
> +{
> + hv_sint_wrmsrl_ghcb(HV_X64_MSR_EOM, 0);
> +}
> +EXPORT_SYMBOL_GPL(hv_signal_eom_ghcb);
> +
> +enum hv_isolation_type hv_get_isolation_type(void)
> +{
> + if (!(ms_hyperv.features_b & HV_ISOLATION))
> + return HV_ISOLATION_TYPE_NONE;
> + return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b);
> +}
> +EXPORT_SYMBOL_GPL(hv_get_isolation_type);
> +
> +bool hv_is_isolation_supported(void)
> +{
> + return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
> +}
> +EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
> +
> +bool hv_isolation_type_snp(void)
> +{
> + return hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP;
> +}
> +EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
> +
> +int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility)
> +{
> + struct hv_input_modify_sparse_gpa_page_host_visibility **input_pcpu;
> + struct hv_input_modify_sparse_gpa_page_host_visibility *input;
> + u16 pages_processed;
> + u64 hv_status;
> + unsigned long flags;
> +
> + /* no-op if partition isolation is not enabled */
> + if (!hv_is_isolation_supported())
> + return 0;
> +
> + if (count > HV_MAX_MODIFY_GPA_REP_COUNT) {
> + pr_err("Hyper-V: GPA count:%d exceeds supported:%lu\n", count,
> + HV_MAX_MODIFY_GPA_REP_COUNT);
> + return -EINVAL;
> + }
> +
> + local_irq_save(flags);
> + input_pcpu = (struct hv_input_modify_sparse_gpa_page_host_visibility **)
> + this_cpu_ptr(hyperv_pcpu_input_arg);
> + input = *input_pcpu;
> + if (unlikely(!input)) {
> + local_irq_restore(flags);
> + return -1;
> + }
> +
> + input->partition_id = HV_PARTITION_ID_SELF;
> + input->host_visibility = visibility;
> + input->reserved0 = 0;
> + input->reserved1 = 0;
> + memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn));
> + hv_status = hv_do_rep_hypercall(
> + HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count,
> + 0, input, &pages_processed);
> + local_irq_restore(flags);
> +
> + if (!(hv_status & HV_HYPERCALL_RESULT_MASK))
> + return 0;
> +
> + return -EFAULT;
> +}
> +EXPORT_SYMBOL(hv_mark_gpa_visibility);

This looks like an unneeded code churn: first, you implement this in
arch/x86/kernel/cpu/mshyperv.c and several patches later you move it to
the dedicated arch/x86/hyperv/ivm.c. Let's just introduce this new
arch/x86/hyperv/ivm.c from the very beginning.

> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 1e8275d35c1f..f624d72b99d3 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -269,6 +269,25 @@ int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
> int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
> int hv_set_mem_host_visibility(void *kbuffer, u32 size, u32 visibility);
> int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility);
> +void hv_sint_wrmsrl_ghcb(u64 msr, u64 value);
> +void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value);
> +void hv_signal_eom_ghcb(void);
> +void hv_ghcb_msr_write(u64 msr, u64 value);
> +void hv_ghcb_msr_read(u64 msr, u64 *value);
> +
> +#define hv_get_synint_state_ghcb(int_num, val) \
> + hv_sint_rdmsrl_ghcb(HV_X64_MSR_SINT0 + int_num, val)
> +#define hv_set_synint_state_ghcb(int_num, val) \
> + hv_sint_wrmsrl_ghcb(HV_X64_MSR_SINT0 + int_num, val)
> +
> +#define hv_get_simp_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SIMP, val)
> +#define hv_set_simp_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SIMP, val)
> +
> +#define hv_get_siefp_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SIEFP, val)
> +#define hv_set_siefp_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SIEFP, val)
> +
> +#define hv_get_synic_state_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SCONTROL, val)
> +#define hv_set_synic_state_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SCONTROL, val)
> #else /* CONFIG_HYPERV */
> static inline void hyperv_init(void) {}
> static inline void hyperv_setup_mmu_ops(void) {}
> @@ -287,9 +306,9 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
> {
> return -1;
> }
> +static inline void hv_signal_eom_ghcb(void) { };
> #endif /* CONFIG_HYPERV */
>
> -
> #include <asm-generic/mshyperv.h>
>
> #endif
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index d6c363456cbf..aeafd4017c89 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -37,8 +37,6 @@
> bool hv_root_partition;
> EXPORT_SYMBOL_GPL(hv_root_partition);
>
> -#define HV_PARTITION_ID_SELF ((u64)-1)
> -
> struct ms_hyperv_info ms_hyperv;
> EXPORT_SYMBOL_GPL(ms_hyperv);
>
> @@ -481,47 +479,3 @@ const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
> .init.msi_ext_dest_id = ms_hyperv_msi_ext_dest_id,
> .init.init_platform = ms_hyperv_init_platform,
> };
> -
> -int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility)
> -{
> - struct hv_input_modify_sparse_gpa_page_host_visibility **input_pcpu;
> - struct hv_input_modify_sparse_gpa_page_host_visibility *input;
> - u16 pages_processed;
> - u64 hv_status;
> - unsigned long flags;
> -
> - /* no-op if partition isolation is not enabled */
> - if (!hv_is_isolation_supported())
> - return 0;
> -
> - if (count > HV_MAX_MODIFY_GPA_REP_COUNT) {
> - pr_err("Hyper-V: GPA count:%d exceeds supported:%lu\n", count,
> - HV_MAX_MODIFY_GPA_REP_COUNT);
> - return -EINVAL;
> - }
> -
> - local_irq_save(flags);
> - input_pcpu = (struct hv_input_modify_sparse_gpa_page_host_visibility **)
> - this_cpu_ptr(hyperv_pcpu_input_arg);
> - input = *input_pcpu;
> - if (unlikely(!input)) {
> - local_irq_restore(flags);
> - return -1;
> - }
> -
> - input->partition_id = HV_PARTITION_ID_SELF;
> - input->host_visibility = visibility;
> - input->reserved0 = 0;
> - input->reserved1 = 0;
> - memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn));
> - hv_status = hv_do_rep_hypercall(
> - HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count,
> - 0, input, &pages_processed);
> - local_irq_restore(flags);
> -
> - if (!(hv_status & HV_HYPERCALL_RESULT_MASK))
> - return 0;
> -
> - return -EFAULT;
> -}
> -EXPORT_SYMBOL(hv_mark_gpa_visibility);
> diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
> index 204e6f3598a5..f31b669a1ddf 100644
> --- a/drivers/hv/channel.c
> +++ b/drivers/hv/channel.c
> @@ -247,7 +247,7 @@ int hv_set_mem_host_visibility(void *kbuffer, u32 size, u32 visibility)
> u64 *pfn_array;
> int ret = 0;
>
> - if (!hv_isolation_type_snp())
> + if (!hv_is_isolation_supported())
> return 0;
>
> pfn_array = vzalloc(HV_HYP_PAGE_SIZE);
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index f202ac7f4b3d..28e28ccc2081 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -99,17 +99,24 @@ int hv_synic_alloc(void)
> tasklet_init(&hv_cpu->msg_dpc,
> vmbus_on_msg_dpc, (unsigned long) hv_cpu);
>
> - hv_cpu->synic_message_page =
> - (void *)get_zeroed_page(GFP_ATOMIC);
> - if (hv_cpu->synic_message_page == NULL) {
> - pr_err("Unable to allocate SYNIC message page\n");
> - goto err;
> - }
> + /*
> + * Synic message and event pages are allocated by paravisor.
> + * Skip these pages allocation here.
> + */
> + if (!hv_isolation_type_snp()) {
> + hv_cpu->synic_message_page =
> + (void *)get_zeroed_page(GFP_ATOMIC);
> + if (hv_cpu->synic_message_page == NULL) {
> + pr_err("Unable to allocate SYNIC message page\n");
> + goto err;
> + }
>
> - hv_cpu->synic_event_page = (void *)get_zeroed_page(GFP_ATOMIC);
> - if (hv_cpu->synic_event_page == NULL) {
> - pr_err("Unable to allocate SYNIC event page\n");
> - goto err;
> + hv_cpu->synic_event_page =
> + (void *)get_zeroed_page(GFP_ATOMIC);
> + if (hv_cpu->synic_event_page == NULL) {
> + pr_err("Unable to allocate SYNIC event page\n");
> + goto err;
> + }
> }
>
> hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
> @@ -136,10 +143,17 @@ void hv_synic_free(void)
> for_each_present_cpu(cpu) {
> struct hv_per_cpu_context *hv_cpu
> = per_cpu_ptr(hv_context.cpu_context, cpu);
> + free_page((unsigned long)hv_cpu->post_msg_page);
> +
> + /*
> + * Synic message and event pages are allocated by paravisor.
> + * Skip free these pages here.
> + */
> + if (hv_isolation_type_snp())
> + continue;
>
> free_page((unsigned long)hv_cpu->synic_event_page);
> free_page((unsigned long)hv_cpu->synic_message_page);
> - free_page((unsigned long)hv_cpu->post_msg_page);
> }
>
> kfree(hv_context.hv_numa_map);
> @@ -161,35 +175,72 @@ void hv_synic_enable_regs(unsigned int cpu)
> union hv_synic_sint shared_sint;
> union hv_synic_scontrol sctrl;
>
> - /* Setup the Synic's message page */
> - hv_get_simp(simp.as_uint64);
> - simp.simp_enabled = 1;
> - simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
> - >> HV_HYP_PAGE_SHIFT;
> -
> - hv_set_simp(simp.as_uint64);
> -
> - /* Setup the Synic's event page */
> - hv_get_siefp(siefp.as_uint64);
> - siefp.siefp_enabled = 1;
> - siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
> - >> HV_HYP_PAGE_SHIFT;
> -
> - hv_set_siefp(siefp.as_uint64);
> -
> - /* Setup the shared SINT. */
> - hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
> -
> - shared_sint.vector = hv_get_vector();
> - shared_sint.masked = false;
> - shared_sint.auto_eoi = hv_recommend_using_aeoi();
> - hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
> -
> - /* Enable the global synic bit */
> - hv_get_synic_state(sctrl.as_uint64);
> - sctrl.enable = 1;
> -
> - hv_set_synic_state(sctrl.as_uint64);
> + /*
> + * Setup Synic pages for CVM. Synic message and event page
> + * are allocated by paravisor in the SNP CVM.
> + */
> + if (hv_isolation_type_snp()) {
> + /* Setup the Synic's message. */
> + hv_get_simp_ghcb(&simp.as_uint64);
> + simp.simp_enabled = 1;
> + hv_cpu->synic_message_page
> + = ioremap_cache(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
> + PAGE_SIZE);
> + if (!hv_cpu->synic_message_page)
> + pr_warn("Fail to map syinc message page.\n");
> +
> + hv_set_simp_ghcb(simp.as_uint64);
> +
> + /* Setup the Synic's event page */
> + hv_get_siefp_ghcb(&siefp.as_uint64);
> + siefp.siefp_enabled = 1;
> + hv_cpu->synic_event_page = ioremap_cache(
> + siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT, PAGE_SIZE);
> + if (!hv_cpu->synic_event_page)
> + pr_warn("Fail to map syinc event page.\n");
> + hv_set_siefp_ghcb(siefp.as_uint64);
> +
> + /* Setup the shared SINT. */
> + hv_get_synint_state_ghcb(VMBUS_MESSAGE_SINT,
> + &shared_sint.as_uint64);
> + shared_sint.vector = hv_get_vector();
> + shared_sint.masked = false;
> + shared_sint.auto_eoi = hv_recommend_using_aeoi();
> + hv_set_synint_state_ghcb(VMBUS_MESSAGE_SINT,
> + shared_sint.as_uint64);
> +
> + /* Enable the global synic bit */
> + hv_get_synic_state_ghcb(&sctrl.as_uint64);
> + sctrl.enable = 1;
> + hv_set_synic_state_ghcb(sctrl.as_uint64);
> + } else {
> + /* Setup the Synic's message. */
> + hv_get_simp(simp.as_uint64);
> + simp.simp_enabled = 1;
> + simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
> + >> HV_HYP_PAGE_SHIFT;
> + hv_set_simp(simp.as_uint64);
> +
> + /* Setup the Synic's event page */
> + hv_get_siefp(siefp.as_uint64);
> + siefp.siefp_enabled = 1;
> + siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
> + >> HV_HYP_PAGE_SHIFT;
> + hv_set_siefp(siefp.as_uint64);
> +
> + /* Setup the shared SINT. */
> + hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
> +
> + shared_sint.vector = hv_get_vector();
> + shared_sint.masked = false;
> + shared_sint.auto_eoi = hv_recommend_using_aeoi();
> + hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
> +
> + /* Enable the global synic bit */
> + hv_get_synic_state(sctrl.as_uint64);
> + sctrl.enable = 1;
> + hv_set_synic_state(sctrl.as_uint64);

There's definitely some room for unification here. E.g. the part after
'Setup the shared SINT' looks identical, you can move it outside of the
if/else block.

> + }
> }
>
> int hv_synic_init(unsigned int cpu)
> @@ -211,30 +262,53 @@ void hv_synic_disable_regs(unsigned int cpu)
> union hv_synic_siefp siefp;
> union hv_synic_scontrol sctrl;
>
> - hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
> + if (hv_isolation_type_snp()) {
> + hv_get_synint_state_ghcb(VMBUS_MESSAGE_SINT,
> + &shared_sint.as_uint64);
> + shared_sint.masked = 1;
> + hv_set_synint_state_ghcb(VMBUS_MESSAGE_SINT,
> + shared_sint.as_uint64);
> +
> + hv_get_simp_ghcb(&simp.as_uint64);
> + simp.simp_enabled = 0;
> + simp.base_simp_gpa = 0;
> + hv_set_simp_ghcb(simp.as_uint64);
> +
> + hv_get_siefp_ghcb(&siefp.as_uint64);
> + siefp.siefp_enabled = 0;
> + siefp.base_siefp_gpa = 0;
> + hv_set_siefp_ghcb(siefp.as_uint64);
>
> - shared_sint.masked = 1;
> + /* Disable the global synic bit */
> + hv_get_synic_state_ghcb(&sctrl.as_uint64);
> + sctrl.enable = 0;
> + hv_set_synic_state_ghcb(sctrl.as_uint64);
> + } else {
> + hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
>
> - /* Need to correctly cleanup in the case of SMP!!! */
> - /* Disable the interrupt */
> - hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
> + shared_sint.masked = 1;
>
> - hv_get_simp(simp.as_uint64);
> - simp.simp_enabled = 0;
> - simp.base_simp_gpa = 0;
> + /* Need to correctly cleanup in the case of SMP!!! */
> + /* Disable the interrupt */
> + hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
>
> - hv_set_simp(simp.as_uint64);
> + hv_get_simp(simp.as_uint64);
> + simp.simp_enabled = 0;
> + simp.base_simp_gpa = 0;
>
> - hv_get_siefp(siefp.as_uint64);
> - siefp.siefp_enabled = 0;
> - siefp.base_siefp_gpa = 0;
> + hv_set_simp(simp.as_uint64);
>
> - hv_set_siefp(siefp.as_uint64);
> + hv_get_siefp(siefp.as_uint64);
> + siefp.siefp_enabled = 0;
> + siefp.base_siefp_gpa = 0;
>
> - /* Disable the global synic bit */
> - hv_get_synic_state(sctrl.as_uint64);
> - sctrl.enable = 0;
> - hv_set_synic_state(sctrl.as_uint64);
> + hv_set_siefp(siefp.as_uint64);
> +
> + /* Disable the global synic bit */
> + hv_get_synic_state(sctrl.as_uint64);
> + sctrl.enable = 0;
> + hv_set_synic_state(sctrl.as_uint64);
> + }
> }
>
> int hv_synic_cleanup(unsigned int cpu)
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index ad0e33776668..6727f4073b5a 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -23,6 +23,7 @@
> #include <linux/bitops.h>
> #include <linux/cpumask.h>
> #include <asm/ptrace.h>
> +#include <asm/mshyperv.h>
> #include <asm/hyperv-tlfs.h>
>
> struct ms_hyperv_info {
> @@ -52,7 +53,7 @@ extern struct ms_hyperv_info ms_hyperv;
>
> extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
> extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
> -
> +extern bool hv_isolation_type_snp(void);
>
> /* Generate the guest OS identifier as described in the Hyper-V TLFS */
> static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version,
> @@ -100,7 +101,11 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
> * possibly deliver another msg from the
> * hypervisor
> */
> - hv_signal_eom();
> + if (hv_isolation_type_snp() &&
> + old_msg_type != HVMSG_TIMER_EXPIRED)
> + hv_signal_eom_ghcb();
> + else
> + hv_signal_eom();

Would it be better to hide SNP specifics into hv_signal_eom()? Also, out
of pure curiosity, why are timer messages special?

> }
> }
>
> @@ -186,6 +191,7 @@ bool hv_is_hyperv_initialized(void);
> bool hv_is_hibernation_supported(void);
> enum hv_isolation_type hv_get_isolation_type(void);
> bool hv_is_isolation_supported(void);
> +bool hv_isolation_type_snp(void);
> void hyperv_cleanup(void);
> #else /* CONFIG_HYPERV */
> static inline bool hv_is_hyperv_initialized(void) { return false; }

--
Vitaly

2021-03-04 12:19:01

by Vitaly Kuznetsov

[permalink] [raw]
Subject: Re: [RFC PATCH 5/12] HV: Add ghcb hvcall support for SNP VM

Tianyu Lan <[email protected]> writes:

> From: Tianyu Lan <[email protected]>
>
> Hyper-V provides ghcb hvcall to handle VMBus
> HVCALL_SIGNAL_EVENT and HVCALL_POST_MESSAGE
> msg in SNP Isolation VM. Add such support.
>
> Signed-off-by: Tianyu Lan <[email protected]>
> ---
> arch/x86/hyperv/ivm.c | 69 +++++++++++++++++++++++++++++++++
> arch/x86/include/asm/mshyperv.h | 1 +
> drivers/hv/connection.c | 6 ++-
> drivers/hv/hv.c | 8 +++-
> 4 files changed, 82 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
> index 4332bf7aaf9b..feaabcd151f5 100644
> --- a/arch/x86/hyperv/ivm.c
> +++ b/arch/x86/hyperv/ivm.c
> @@ -14,8 +14,77 @@
>
> union hv_ghcb {
> struct ghcb ghcb;
> + struct {
> + u64 hypercalldata[509];
> + u64 outputgpa;
> + union {
> + union {
> + struct {
> + u32 callcode : 16;
> + u32 isfast : 1;
> + u32 reserved1 : 14;
> + u32 isnested : 1;
> + u32 countofelements : 12;
> + u32 reserved2 : 4;
> + u32 repstartindex : 12;
> + u32 reserved3 : 4;
> + };
> + u64 asuint64;
> + } hypercallinput;
> + union {
> + struct {
> + u16 callstatus;
> + u16 reserved1;
> + u32 elementsprocessed : 12;
> + u32 reserved2 : 20;
> + };
> + u64 asunit64;
> + } hypercalloutput;
> + };
> + u64 reserved2;
> + } hypercall;
> } __packed __aligned(PAGE_SIZE);
>
> +u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
> +{
> + union hv_ghcb *hv_ghcb;
> + void **ghcb_base;
> + unsigned long flags;
> +
> + if (!ms_hyperv.ghcb_base)
> + return -EFAULT;
> +
> + local_irq_save(flags);
> + ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
> + hv_ghcb = (union hv_ghcb *)*ghcb_base;
> + if (!hv_ghcb) {
> + local_irq_restore(flags);
> + return -EFAULT;
> + }
> +
> + memset(hv_ghcb, 0x00, HV_HYP_PAGE_SIZE);
> + hv_ghcb->ghcb.protocol_version = 1;
> + hv_ghcb->ghcb.ghcb_usage = 1;
> +
> + hv_ghcb->hypercall.outputgpa = (u64)output;
> + hv_ghcb->hypercall.hypercallinput.asuint64 = 0;
> + hv_ghcb->hypercall.hypercallinput.callcode = control;
> +
> + if (input_size)
> + memcpy(hv_ghcb->hypercall.hypercalldata, input, input_size);
> +
> + VMGEXIT();
> +
> + hv_ghcb->ghcb.ghcb_usage = 0xffffffff;
> + memset(hv_ghcb->ghcb.save.valid_bitmap, 0,
> + sizeof(hv_ghcb->ghcb.save.valid_bitmap));
> +
> + local_irq_restore(flags);
> +
> + return hv_ghcb->hypercall.hypercalloutput.callstatus;
> +}
> +EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);
> +
> void hv_ghcb_msr_write(u64 msr, u64 value)
> {
> union hv_ghcb *hv_ghcb;
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index f624d72b99d3..c8f66d269e5b 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -274,6 +274,7 @@ void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value);
> void hv_signal_eom_ghcb(void);
> void hv_ghcb_msr_write(u64 msr, u64 value);
> void hv_ghcb_msr_read(u64 msr, u64 *value);
> +u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size);
>
> #define hv_get_synint_state_ghcb(int_num, val) \
> hv_sint_rdmsrl_ghcb(HV_X64_MSR_SINT0 + int_num, val)
> diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
> index c83612cddb99..79bca653dce9 100644
> --- a/drivers/hv/connection.c
> +++ b/drivers/hv/connection.c
> @@ -442,6 +442,10 @@ void vmbus_set_event(struct vmbus_channel *channel)
>
> ++channel->sig_events;
>
> - hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event);
> + if (hv_isolation_type_snp())
> + hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event,
> + NULL, sizeof(u64));
> + else
> + hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event);

vmbus_set_event() is a hotpath so I'd suggest we introduce a static
branch instead of checking hv_isolation_type_snp() every time.

> }
> EXPORT_SYMBOL_GPL(vmbus_set_event);
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 28e28ccc2081..6c64a7fd1ebd 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -60,7 +60,13 @@ int hv_post_message(union hv_connection_id connection_id,
> aligned_msg->payload_size = payload_size;
> memcpy((void *)aligned_msg->payload, payload, payload_size);
>
> - status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL);
> + if (hv_isolation_type_snp())
> + status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE,
> + (void *)aligned_msg, NULL,
> + sizeof(struct hv_input_post_message));
> + else
> + status = hv_do_hypercall(HVCALL_POST_MESSAGE,
> + aligned_msg, NULL);

and, if we are to introduce a static branch, we could use it here
(though it doesn't matter much for messages).

>
> /* Preemption must remain disabled until after the hypercall
> * so some other thread can't get scheduled onto this cpu and

--
Vitaly

2021-03-05 06:39:12

by Tianyu Lan

[permalink] [raw]
Subject: Re: [RFC PATCH 4/12] HV: Add Write/Read MSR registers via ghcb



On 3/4/2021 1:16 AM, Vitaly Kuznetsov wrote:
> Tianyu Lan <[email protected]> writes:
>
>> From: Tianyu Lan <[email protected]>
>>
>> Hyper-V provides GHCB protocol to write Synthetic Interrupt
>> Controller MSR registers and these registers are emulated by
>> Hypervisor rather than paravisor.
>>
>> Hyper-V requests to write SINTx MSR registers twice(once via
>> GHCB and once via wrmsr instruction including the proxy bit 21)
>> Guest OS ID MSR also needs to be set via GHCB.
>>
>> Signed-off-by: Tianyu Lan <[email protected]>
>> ---
>> arch/x86/hyperv/Makefile | 2 +-
>> arch/x86/hyperv/hv_init.c | 18 +--
>> arch/x86/hyperv/ivm.c | 178 ++++++++++++++++++++++++++++++
>> arch/x86/include/asm/mshyperv.h | 21 +++-
>> arch/x86/kernel/cpu/mshyperv.c | 46 --------
>> drivers/hv/channel.c | 2 +-
>> drivers/hv/hv.c | 188 ++++++++++++++++++++++----------
>> include/asm-generic/mshyperv.h | 10 +-
>> 8 files changed, 343 insertions(+), 122 deletions(-)
>> create mode 100644 arch/x86/hyperv/ivm.c
>>
>> diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
>> index 48e2c51464e8..5d2de10809ae 100644
>> --- a/arch/x86/hyperv/Makefile
>> +++ b/arch/x86/hyperv/Makefile
>> @@ -1,5 +1,5 @@
>> # SPDX-License-Identifier: GPL-2.0-only
>> -obj-y := hv_init.o mmu.o nested.o irqdomain.o
>> +obj-y := hv_init.o mmu.o nested.o irqdomain.o ivm.o
>> obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o
>>
>> ifdef CONFIG_X86_64
>> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
>> index 90e65fbf4c58..87b1dd9c84d6 100644
>> --- a/arch/x86/hyperv/hv_init.c
>> +++ b/arch/x86/hyperv/hv_init.c
>> @@ -475,6 +475,9 @@ void __init hyperv_init(void)
>>
>> ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
>> *ghcb_base = ghcb_va;
>> +
>> + /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
>> + hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id);
>> }
>>
>> rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
>> @@ -561,6 +564,7 @@ void hyperv_cleanup(void)
>>
>> /* Reset our OS id */
>> wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
>> + hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, 0);
>>
>> /*
>> * Reset hypercall page reference before reset the page,
>> @@ -668,17 +672,3 @@ bool hv_is_hibernation_supported(void)
>> return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4);
>> }
>> EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
>> -
>> -enum hv_isolation_type hv_get_isolation_type(void)
>> -{
>> - if (!(ms_hyperv.features_b & HV_ISOLATION))
>> - return HV_ISOLATION_TYPE_NONE;
>> - return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b);
>> -}
>> -EXPORT_SYMBOL_GPL(hv_get_isolation_type);
>> -
>> -bool hv_is_isolation_supported(void)
>> -{
>> - return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
>> -}
>> -EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
>> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
>> new file mode 100644
>> index 000000000000..4332bf7aaf9b
>> --- /dev/null
>> +++ b/arch/x86/hyperv/ivm.c
>> @@ -0,0 +1,178 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Hyper-V Isolation VM interface with paravisor and hypervisor
>> + *
>> + * Author:
>> + * Tianyu Lan <[email protected]>
>> + */
>> +#include <linux/types.h>
>> +#include <linux/bitfield.h>
>> +#include <asm/io.h>
>> +#include <asm/svm.h>
>> +#include <asm/sev-es.h>
>> +#include <asm/mshyperv.h>
>> +
>> +union hv_ghcb {
>> + struct ghcb ghcb;
>> +} __packed __aligned(PAGE_SIZE);
>> +
>> +void hv_ghcb_msr_write(u64 msr, u64 value)
>> +{
>> + union hv_ghcb *hv_ghcb;
>> + void **ghcb_base;
>> + unsigned long flags;
>> +
>> + if (!ms_hyperv.ghcb_base)
>> + return;
>> +
>> + local_irq_save(flags);
>> + ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
>> + hv_ghcb = (union hv_ghcb *)*ghcb_base;
>> + if (!hv_ghcb) {
>> + local_irq_restore(flags);
>> + return;
>> + }
>> +
>> + memset(hv_ghcb, 0x00, HV_HYP_PAGE_SIZE);
>> +
>> + hv_ghcb->ghcb.protocol_version = 1;
>> + hv_ghcb->ghcb.ghcb_usage = 0;
>> +
>> + ghcb_set_sw_exit_code(&hv_ghcb->ghcb, SVM_EXIT_MSR);
>> + ghcb_set_rcx(&hv_ghcb->ghcb, msr);
>> + ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value));
>> + ghcb_set_rdx(&hv_ghcb->ghcb, value >> 32);
>> + ghcb_set_sw_exit_info_1(&hv_ghcb->ghcb, 1);
>> + ghcb_set_sw_exit_info_2(&hv_ghcb->ghcb, 0);
>> +
>> + VMGEXIT();
>> +
>> + if ((hv_ghcb->ghcb.save.sw_exit_info_1 & 0xffffffff) == 1)
>> + pr_warn("Fail to write msr via ghcb.\n.");
>> +
>> + local_irq_restore(flags);
>> +}
>> +EXPORT_SYMBOL_GPL(hv_ghcb_msr_write);
>> +
>> +void hv_ghcb_msr_read(u64 msr, u64 *value)
>> +{
>> + union hv_ghcb *hv_ghcb;
>> + void **ghcb_base;
>> + unsigned long flags;
>> +
>> + if (!ms_hyperv.ghcb_base)
>> + return;
>> +
>> + local_irq_save(flags);
>> + ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
>> + hv_ghcb = (union hv_ghcb *)*ghcb_base;
>> + if (!hv_ghcb) {
>> + local_irq_restore(flags);
>> + return;
>> + }
>> +
>> + memset(hv_ghcb, 0x00, PAGE_SIZE);
>> + hv_ghcb->ghcb.protocol_version = 1;
>> + hv_ghcb->ghcb.ghcb_usage = 0;
>> +
>> + ghcb_set_sw_exit_code(&hv_ghcb->ghcb, SVM_EXIT_MSR);
>> + ghcb_set_rcx(&hv_ghcb->ghcb, msr);
>> + ghcb_set_sw_exit_info_1(&hv_ghcb->ghcb, 0);
>> + ghcb_set_sw_exit_info_2(&hv_ghcb->ghcb, 0);
>> +
>> + VMGEXIT();
>> +
>> + if ((hv_ghcb->ghcb.save.sw_exit_info_1 & 0xffffffff) == 1)
>> + pr_warn("Fail to write msr via ghcb.\n.");
>> + else
>> + *value = (u64)lower_32_bits(hv_ghcb->ghcb.save.rax)
>> + | ((u64)lower_32_bits(hv_ghcb->ghcb.save.rdx) << 32);
>> + local_irq_restore(flags);
>> +}
>> +EXPORT_SYMBOL_GPL(hv_ghcb_msr_read);
>> +
>> +void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value)
>> +{
>> + hv_ghcb_msr_read(msr, value);
>> +}
>> +EXPORT_SYMBOL_GPL(hv_sint_rdmsrl_ghcb);
>> +
>> +void hv_sint_wrmsrl_ghcb(u64 msr, u64 value)
>> +{
>> + hv_ghcb_msr_write(msr, value);
>> +
>> + /* Write proxy bit vua wrmsrl instruction. */
>> + if (msr >= HV_X64_MSR_SINT0 && msr <= HV_X64_MSR_SINT15)
>> + wrmsrl(msr, value | 1 << 20);
>> +}
>> +EXPORT_SYMBOL_GPL(hv_sint_wrmsrl_ghcb);
>> +
>> +inline void hv_signal_eom_ghcb(void)
>> +{
>> + hv_sint_wrmsrl_ghcb(HV_X64_MSR_EOM, 0);
>> +}
>> +EXPORT_SYMBOL_GPL(hv_signal_eom_ghcb);
>> +
>> +enum hv_isolation_type hv_get_isolation_type(void)
>> +{
>> + if (!(ms_hyperv.features_b & HV_ISOLATION))
>> + return HV_ISOLATION_TYPE_NONE;
>> + return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b);
>> +}
>> +EXPORT_SYMBOL_GPL(hv_get_isolation_type);
>> +
>> +bool hv_is_isolation_supported(void)
>> +{
>> + return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
>> +}
>> +EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
>> +
>> +bool hv_isolation_type_snp(void)
>> +{
>> + return hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP;
>> +}
>> +EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
>> +
>> +int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility)
>> +{
>> + struct hv_input_modify_sparse_gpa_page_host_visibility **input_pcpu;
>> + struct hv_input_modify_sparse_gpa_page_host_visibility *input;
>> + u16 pages_processed;
>> + u64 hv_status;
>> + unsigned long flags;
>> +
>> + /* no-op if partition isolation is not enabled */
>> + if (!hv_is_isolation_supported())
>> + return 0;
>> +
>> + if (count > HV_MAX_MODIFY_GPA_REP_COUNT) {
>> + pr_err("Hyper-V: GPA count:%d exceeds supported:%lu\n", count,
>> + HV_MAX_MODIFY_GPA_REP_COUNT);
>> + return -EINVAL;
>> + }
>> +
>> + local_irq_save(flags);
>> + input_pcpu = (struct hv_input_modify_sparse_gpa_page_host_visibility **)
>> + this_cpu_ptr(hyperv_pcpu_input_arg);
>> + input = *input_pcpu;
>> + if (unlikely(!input)) {
>> + local_irq_restore(flags);
>> + return -1;
>> + }
>> +
>> + input->partition_id = HV_PARTITION_ID_SELF;
>> + input->host_visibility = visibility;
>> + input->reserved0 = 0;
>> + input->reserved1 = 0;
>> + memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn));
>> + hv_status = hv_do_rep_hypercall(
>> + HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count,
>> + 0, input, &pages_processed);
>> + local_irq_restore(flags);
>> +
>> + if (!(hv_status & HV_HYPERCALL_RESULT_MASK))
>> + return 0;
>> +
>> + return -EFAULT;
>> +}
>> +EXPORT_SYMBOL(hv_mark_gpa_visibility);
>
> This looks like an unneeded code churn: first, you implement this in
> arch/x86/kernel/cpu/mshyperv.c and several patches later you move it to
> the dedicated arch/x86/hyperv/ivm.c. Let's just introduce this new
> arch/x86/hyperv/ivm.c from the very beginning.

OK. Will update.

>
>> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
>> index 1e8275d35c1f..f624d72b99d3 100644
>> --- a/arch/x86/include/asm/mshyperv.h
>> +++ b/arch/x86/include/asm/mshyperv.h
>> @@ -269,6 +269,25 @@ int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
>> int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
>> int hv_set_mem_host_visibility(void *kbuffer, u32 size, u32 visibility);
>> int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility);
>> +void hv_sint_wrmsrl_ghcb(u64 msr, u64 value);
>> +void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value);
>> +void hv_signal_eom_ghcb(void);
>> +void hv_ghcb_msr_write(u64 msr, u64 value);
>> +void hv_ghcb_msr_read(u64 msr, u64 *value);
>> +
>> +#define hv_get_synint_state_ghcb(int_num, val) \
>> + hv_sint_rdmsrl_ghcb(HV_X64_MSR_SINT0 + int_num, val)
>> +#define hv_set_synint_state_ghcb(int_num, val) \
>> + hv_sint_wrmsrl_ghcb(HV_X64_MSR_SINT0 + int_num, val)
>> +
>> +#define hv_get_simp_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SIMP, val)
>> +#define hv_set_simp_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SIMP, val)
>> +
>> +#define hv_get_siefp_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SIEFP, val)
>> +#define hv_set_siefp_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SIEFP, val)
>> +
>> +#define hv_get_synic_state_ghcb(val) hv_sint_rdmsrl_ghcb(HV_X64_MSR_SCONTROL, val)
>> +#define hv_set_synic_state_ghcb(val) hv_sint_wrmsrl_ghcb(HV_X64_MSR_SCONTROL, val)
>> #else /* CONFIG_HYPERV */
>> static inline void hyperv_init(void) {}
>> static inline void hyperv_setup_mmu_ops(void) {}
>> @@ -287,9 +306,9 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
>> {
>> return -1;
>> }
>> +static inline void hv_signal_eom_ghcb(void) { };
>> #endif /* CONFIG_HYPERV */
>>
>> -
>> #include <asm-generic/mshyperv.h>
>>
>> #endif
>> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
>> index d6c363456cbf..aeafd4017c89 100644
>> --- a/arch/x86/kernel/cpu/mshyperv.c
>> +++ b/arch/x86/kernel/cpu/mshyperv.c
>> @@ -37,8 +37,6 @@
>> bool hv_root_partition;
>> EXPORT_SYMBOL_GPL(hv_root_partition);
>>
>> -#define HV_PARTITION_ID_SELF ((u64)-1)
>> -
>> struct ms_hyperv_info ms_hyperv;
>> EXPORT_SYMBOL_GPL(ms_hyperv);
>>
>> @@ -481,47 +479,3 @@ const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
>> .init.msi_ext_dest_id = ms_hyperv_msi_ext_dest_id,
>> .init.init_platform = ms_hyperv_init_platform,
>> };
>> -
>> -int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility)
>> -{
>> - struct hv_input_modify_sparse_gpa_page_host_visibility **input_pcpu;
>> - struct hv_input_modify_sparse_gpa_page_host_visibility *input;
>> - u16 pages_processed;
>> - u64 hv_status;
>> - unsigned long flags;
>> -
>> - /* no-op if partition isolation is not enabled */
>> - if (!hv_is_isolation_supported())
>> - return 0;
>> -
>> - if (count > HV_MAX_MODIFY_GPA_REP_COUNT) {
>> - pr_err("Hyper-V: GPA count:%d exceeds supported:%lu\n", count,
>> - HV_MAX_MODIFY_GPA_REP_COUNT);
>> - return -EINVAL;
>> - }
>> -
>> - local_irq_save(flags);
>> - input_pcpu = (struct hv_input_modify_sparse_gpa_page_host_visibility **)
>> - this_cpu_ptr(hyperv_pcpu_input_arg);
>> - input = *input_pcpu;
>> - if (unlikely(!input)) {
>> - local_irq_restore(flags);
>> - return -1;
>> - }
>> -
>> - input->partition_id = HV_PARTITION_ID_SELF;
>> - input->host_visibility = visibility;
>> - input->reserved0 = 0;
>> - input->reserved1 = 0;
>> - memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn));
>> - hv_status = hv_do_rep_hypercall(
>> - HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count,
>> - 0, input, &pages_processed);
>> - local_irq_restore(flags);
>> -
>> - if (!(hv_status & HV_HYPERCALL_RESULT_MASK))
>> - return 0;
>> -
>> - return -EFAULT;
>> -}
>> -EXPORT_SYMBOL(hv_mark_gpa_visibility);
>> diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
>> index 204e6f3598a5..f31b669a1ddf 100644
>> --- a/drivers/hv/channel.c
>> +++ b/drivers/hv/channel.c
>> @@ -247,7 +247,7 @@ int hv_set_mem_host_visibility(void *kbuffer, u32 size, u32 visibility)
>> u64 *pfn_array;
>> int ret = 0;
>>
>> - if (!hv_isolation_type_snp())
>> + if (!hv_is_isolation_supported())
>> return 0;
>>
>> pfn_array = vzalloc(HV_HYP_PAGE_SIZE);
>> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
>> index f202ac7f4b3d..28e28ccc2081 100644
>> --- a/drivers/hv/hv.c
>> +++ b/drivers/hv/hv.c
>> @@ -99,17 +99,24 @@ int hv_synic_alloc(void)
>> tasklet_init(&hv_cpu->msg_dpc,
>> vmbus_on_msg_dpc, (unsigned long) hv_cpu);
>>
>> - hv_cpu->synic_message_page =
>> - (void *)get_zeroed_page(GFP_ATOMIC);
>> - if (hv_cpu->synic_message_page == NULL) {
>> - pr_err("Unable to allocate SYNIC message page\n");
>> - goto err;
>> - }
>> + /*
>> + * Synic message and event pages are allocated by paravisor.
>> + * Skip these pages allocation here.
>> + */
>> + if (!hv_isolation_type_snp()) {
>> + hv_cpu->synic_message_page =
>> + (void *)get_zeroed_page(GFP_ATOMIC);
>> + if (hv_cpu->synic_message_page == NULL) {
>> + pr_err("Unable to allocate SYNIC message page\n");
>> + goto err;
>> + }
>>
>> - hv_cpu->synic_event_page = (void *)get_zeroed_page(GFP_ATOMIC);
>> - if (hv_cpu->synic_event_page == NULL) {
>> - pr_err("Unable to allocate SYNIC event page\n");
>> - goto err;
>> + hv_cpu->synic_event_page =
>> + (void *)get_zeroed_page(GFP_ATOMIC);
>> + if (hv_cpu->synic_event_page == NULL) {
>> + pr_err("Unable to allocate SYNIC event page\n");
>> + goto err;
>> + }
>> }
>>
>> hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
>> @@ -136,10 +143,17 @@ void hv_synic_free(void)
>> for_each_present_cpu(cpu) {
>> struct hv_per_cpu_context *hv_cpu
>> = per_cpu_ptr(hv_context.cpu_context, cpu);
>> + free_page((unsigned long)hv_cpu->post_msg_page);
>> +
>> + /*
>> + * Synic message and event pages are allocated by paravisor.
>> + * Skip free these pages here.
>> + */
>> + if (hv_isolation_type_snp())
>> + continue;
>>
>> free_page((unsigned long)hv_cpu->synic_event_page);
>> free_page((unsigned long)hv_cpu->synic_message_page);
>> - free_page((unsigned long)hv_cpu->post_msg_page);
>> }
>>
>> kfree(hv_context.hv_numa_map);
>> @@ -161,35 +175,72 @@ void hv_synic_enable_regs(unsigned int cpu)
>> union hv_synic_sint shared_sint;
>> union hv_synic_scontrol sctrl;
>>
>> - /* Setup the Synic's message page */
>> - hv_get_simp(simp.as_uint64);
>> - simp.simp_enabled = 1;
>> - simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
>> - >> HV_HYP_PAGE_SHIFT;
>> -
>> - hv_set_simp(simp.as_uint64);
>> -
>> - /* Setup the Synic's event page */
>> - hv_get_siefp(siefp.as_uint64);
>> - siefp.siefp_enabled = 1;
>> - siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
>> - >> HV_HYP_PAGE_SHIFT;
>> -
>> - hv_set_siefp(siefp.as_uint64);
>> -
>> - /* Setup the shared SINT. */
>> - hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
>> -
>> - shared_sint.vector = hv_get_vector();
>> - shared_sint.masked = false;
>> - shared_sint.auto_eoi = hv_recommend_using_aeoi();
>> - hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
>> -
>> - /* Enable the global synic bit */
>> - hv_get_synic_state(sctrl.as_uint64);
>> - sctrl.enable = 1;
>> -
>> - hv_set_synic_state(sctrl.as_uint64);
>> + /*
>> + * Setup Synic pages for CVM. Synic message and event page
>> + * are allocated by paravisor in the SNP CVM.
>> + */
>> + if (hv_isolation_type_snp()) {
>> + /* Setup the Synic's message. */
>> + hv_get_simp_ghcb(&simp.as_uint64);
>> + simp.simp_enabled = 1;
>> + hv_cpu->synic_message_page
>> + = ioremap_cache(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
>> + PAGE_SIZE);
>> + if (!hv_cpu->synic_message_page)
>> + pr_warn("Fail to map syinc message page.\n");
>> +
>> + hv_set_simp_ghcb(simp.as_uint64);
>> +
>> + /* Setup the Synic's event page */
>> + hv_get_siefp_ghcb(&siefp.as_uint64);
>> + siefp.siefp_enabled = 1;
>> + hv_cpu->synic_event_page = ioremap_cache(
>> + siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT, PAGE_SIZE);
>> + if (!hv_cpu->synic_event_page)
>> + pr_warn("Fail to map syinc event page.\n");
>> + hv_set_siefp_ghcb(siefp.as_uint64);
>> +
>> + /* Setup the shared SINT. */
>> + hv_get_synint_state_ghcb(VMBUS_MESSAGE_SINT,
>> + &shared_sint.as_uint64);
>> + shared_sint.vector = hv_get_vector();
>> + shared_sint.masked = false;
>> + shared_sint.auto_eoi = hv_recommend_using_aeoi();
>> + hv_set_synint_state_ghcb(VMBUS_MESSAGE_SINT,
>> + shared_sint.as_uint64);
>> +
>> + /* Enable the global synic bit */
>> + hv_get_synic_state_ghcb(&sctrl.as_uint64);
>> + sctrl.enable = 1;
>> + hv_set_synic_state_ghcb(sctrl.as_uint64);
>> + } else {
>> + /* Setup the Synic's message. */
>> + hv_get_simp(simp.as_uint64);
>> + simp.simp_enabled = 1;
>> + simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
>> + >> HV_HYP_PAGE_SHIFT;
>> + hv_set_simp(simp.as_uint64);
>> +
>> + /* Setup the Synic's event page */
>> + hv_get_siefp(siefp.as_uint64);
>> + siefp.siefp_enabled = 1;
>> + siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
>> + >> HV_HYP_PAGE_SHIFT;
>> + hv_set_siefp(siefp.as_uint64);
>> +
>> + /* Setup the shared SINT. */
>> + hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
>> +
>> + shared_sint.vector = hv_get_vector();
>> + shared_sint.masked = false;
>> + shared_sint.auto_eoi = hv_recommend_using_aeoi();
>> + hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
>> +
>> + /* Enable the global synic bit */
>> + hv_get_synic_state(sctrl.as_uint64);
>> + sctrl.enable = 1;
>> + hv_set_synic_state(sctrl.as_uint64);
>
> There's definitely some room for unification here. E.g. the part after
> 'Setup the shared SINT' looks identical, you can move it outside of the
> if/else block.

Yes, will rework it. Thanks.


>
>> + }
>> }
>>
>> int hv_synic_init(unsigned int cpu)
>> @@ -211,30 +262,53 @@ void hv_synic_disable_regs(unsigned int cpu)
>> union hv_synic_siefp siefp;
>> union hv_synic_scontrol sctrl;
>>
>> - hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
>> + if (hv_isolation_type_snp()) {
>> + hv_get_synint_state_ghcb(VMBUS_MESSAGE_SINT,
>> + &shared_sint.as_uint64);
>> + shared_sint.masked = 1;
>> + hv_set_synint_state_ghcb(VMBUS_MESSAGE_SINT,
>> + shared_sint.as_uint64);
>> +
>> + hv_get_simp_ghcb(&simp.as_uint64);
>> + simp.simp_enabled = 0;
>> + simp.base_simp_gpa = 0;
>> + hv_set_simp_ghcb(simp.as_uint64);
>> +
>> + hv_get_siefp_ghcb(&siefp.as_uint64);
>> + siefp.siefp_enabled = 0;
>> + siefp.base_siefp_gpa = 0;
>> + hv_set_siefp_ghcb(siefp.as_uint64);
>>
>> - shared_sint.masked = 1;
>> + /* Disable the global synic bit */
>> + hv_get_synic_state_ghcb(&sctrl.as_uint64);
>> + sctrl.enable = 0;
>> + hv_set_synic_state_ghcb(sctrl.as_uint64);
>> + } else {
>> + hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
>>
>> - /* Need to correctly cleanup in the case of SMP!!! */
>> - /* Disable the interrupt */
>> - hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
>> + shared_sint.masked = 1;
>>
>> - hv_get_simp(simp.as_uint64);
>> - simp.simp_enabled = 0;
>> - simp.base_simp_gpa = 0;
>> + /* Need to correctly cleanup in the case of SMP!!! */
>> + /* Disable the interrupt */
>> + hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
>>
>> - hv_set_simp(simp.as_uint64);
>> + hv_get_simp(simp.as_uint64);
>> + simp.simp_enabled = 0;
>> + simp.base_simp_gpa = 0;
>>
>> - hv_get_siefp(siefp.as_uint64);
>> - siefp.siefp_enabled = 0;
>> - siefp.base_siefp_gpa = 0;
>> + hv_set_simp(simp.as_uint64);
>>
>> - hv_set_siefp(siefp.as_uint64);
>> + hv_get_siefp(siefp.as_uint64);
>> + siefp.siefp_enabled = 0;
>> + siefp.base_siefp_gpa = 0;
>>
>> - /* Disable the global synic bit */
>> - hv_get_synic_state(sctrl.as_uint64);
>> - sctrl.enable = 0;
>> - hv_set_synic_state(sctrl.as_uint64);
>> + hv_set_siefp(siefp.as_uint64);
>> +
>> + /* Disable the global synic bit */
>> + hv_get_synic_state(sctrl.as_uint64);
>> + sctrl.enable = 0;
>> + hv_set_synic_state(sctrl.as_uint64);
>> + }
>> }
>>
>> int hv_synic_cleanup(unsigned int cpu)
>> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
>> index ad0e33776668..6727f4073b5a 100644
>> --- a/include/asm-generic/mshyperv.h
>> +++ b/include/asm-generic/mshyperv.h
>> @@ -23,6 +23,7 @@
>> #include <linux/bitops.h>
>> #include <linux/cpumask.h>
>> #include <asm/ptrace.h>
>> +#include <asm/mshyperv.h>
>> #include <asm/hyperv-tlfs.h>
>>
>> struct ms_hyperv_info {
>> @@ -52,7 +53,7 @@ extern struct ms_hyperv_info ms_hyperv;
>>
>> extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
>> extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
>> -
>> +extern bool hv_isolation_type_snp(void);
>>
>> /* Generate the guest OS identifier as described in the Hyper-V TLFS */
>> static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version,
>> @@ -100,7 +101,11 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
>> * possibly deliver another msg from the
>> * hypervisor
>> */
>> - hv_signal_eom();
>> + if (hv_isolation_type_snp() &&
>> + old_msg_type != HVMSG_TIMER_EXPIRED)
>> + hv_signal_eom_ghcb();
>> + else
>> + hv_signal_eom();
>
> Would it be better to hide SNP specifics into hv_signal_eom()? Also, out
> of pure curiosity, why are timer messages special?
>
>> }
>> }
>>
>> @@ -186,6 +191,7 @@ bool hv_is_hyperv_initialized(void);
>> bool hv_is_hibernation_supported(void);
>> enum hv_isolation_type hv_get_isolation_type(void);
>> bool hv_is_isolation_supported(void);
>> +bool hv_isolation_type_snp(void);
>> void hyperv_cleanup(void);
>> #else /* CONFIG_HYPERV */
>> static inline bool hv_is_hyperv_initialized(void) { return false; }
>

2021-03-05 15:22:58

by Tianyu Lan

[permalink] [raw]
Subject: Re: [RFC PATCH 5/12] HV: Add ghcb hvcall support for SNP VM



On 3/4/2021 1:21 AM, Vitaly Kuznetsov wrote:
> Tianyu Lan <[email protected]> writes:
>
>> From: Tianyu Lan <[email protected]>
>>
>> Hyper-V provides ghcb hvcall to handle VMBus
>> HVCALL_SIGNAL_EVENT and HVCALL_POST_MESSAGE
>> msg in SNP Isolation VM. Add such support.
>>
>> Signed-off-by: Tianyu Lan <[email protected]>
>> ---
>> arch/x86/hyperv/ivm.c | 69 +++++++++++++++++++++++++++++++++
>> arch/x86/include/asm/mshyperv.h | 1 +
>> drivers/hv/connection.c | 6 ++-
>> drivers/hv/hv.c | 8 +++-
>> 4 files changed, 82 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
>> index 4332bf7aaf9b..feaabcd151f5 100644
>> --- a/arch/x86/hyperv/ivm.c
>> +++ b/arch/x86/hyperv/ivm.c
>> @@ -14,8 +14,77 @@
>>
>> union hv_ghcb {
>> struct ghcb ghcb;
>> + struct {
>> + u64 hypercalldata[509];
>> + u64 outputgpa;
>> + union {
>> + union {
>> + struct {
>> + u32 callcode : 16;
>> + u32 isfast : 1;
>> + u32 reserved1 : 14;
>> + u32 isnested : 1;
>> + u32 countofelements : 12;
>> + u32 reserved2 : 4;
>> + u32 repstartindex : 12;
>> + u32 reserved3 : 4;
>> + };
>> + u64 asuint64;
>> + } hypercallinput;
>> + union {
>> + struct {
>> + u16 callstatus;
>> + u16 reserved1;
>> + u32 elementsprocessed : 12;
>> + u32 reserved2 : 20;
>> + };
>> + u64 asunit64;
>> + } hypercalloutput;
>> + };
>> + u64 reserved2;
>> + } hypercall;
>> } __packed __aligned(PAGE_SIZE);
>>
>> +u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
>> +{
>> + union hv_ghcb *hv_ghcb;
>> + void **ghcb_base;
>> + unsigned long flags;
>> +
>> + if (!ms_hyperv.ghcb_base)
>> + return -EFAULT;
>> +
>> + local_irq_save(flags);
>> + ghcb_base = (void **)this_cpu_ptr(ms_hyperv.ghcb_base);
>> + hv_ghcb = (union hv_ghcb *)*ghcb_base;
>> + if (!hv_ghcb) {
>> + local_irq_restore(flags);
>> + return -EFAULT;
>> + }
>> +
>> + memset(hv_ghcb, 0x00, HV_HYP_PAGE_SIZE);
>> + hv_ghcb->ghcb.protocol_version = 1;
>> + hv_ghcb->ghcb.ghcb_usage = 1;
>> +
>> + hv_ghcb->hypercall.outputgpa = (u64)output;
>> + hv_ghcb->hypercall.hypercallinput.asuint64 = 0;
>> + hv_ghcb->hypercall.hypercallinput.callcode = control;
>> +
>> + if (input_size)
>> + memcpy(hv_ghcb->hypercall.hypercalldata, input, input_size);
>> +
>> + VMGEXIT();
>> +
>> + hv_ghcb->ghcb.ghcb_usage = 0xffffffff;
>> + memset(hv_ghcb->ghcb.save.valid_bitmap, 0,
>> + sizeof(hv_ghcb->ghcb.save.valid_bitmap));
>> +
>> + local_irq_restore(flags);
>> +
>> + return hv_ghcb->hypercall.hypercalloutput.callstatus;
>> +}
>> +EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);
>> +
>> void hv_ghcb_msr_write(u64 msr, u64 value)
>> {
>> union hv_ghcb *hv_ghcb;
>> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
>> index f624d72b99d3..c8f66d269e5b 100644
>> --- a/arch/x86/include/asm/mshyperv.h
>> +++ b/arch/x86/include/asm/mshyperv.h
>> @@ -274,6 +274,7 @@ void hv_sint_rdmsrl_ghcb(u64 msr, u64 *value);
>> void hv_signal_eom_ghcb(void);
>> void hv_ghcb_msr_write(u64 msr, u64 value);
>> void hv_ghcb_msr_read(u64 msr, u64 *value);
>> +u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size);
>>
>> #define hv_get_synint_state_ghcb(int_num, val) \
>> hv_sint_rdmsrl_ghcb(HV_X64_MSR_SINT0 + int_num, val)
>> diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
>> index c83612cddb99..79bca653dce9 100644
>> --- a/drivers/hv/connection.c
>> +++ b/drivers/hv/connection.c
>> @@ -442,6 +442,10 @@ void vmbus_set_event(struct vmbus_channel *channel)
>>
>> ++channel->sig_events;
>>
>> - hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event);
>> + if (hv_isolation_type_snp())
>> + hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event,
>> + NULL, sizeof(u64));
>> + else
>> + hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event);
>
> vmbus_set_event() is a hotpath so I'd suggest we introduce a static
> branch instead of checking hv_isolation_type_snp() every time.
>

Good suggestion. Will add it in the next version. Thanks.