2021-06-02 17:23:33

by Vineeth Pillai

[permalink] [raw]
Subject: [PATCH 00/17] irqfd and ioeventfd support for mshv

This patch series adds irqfd and ioeventfd support for VMMs on Hyper-V.
Also adds support for in-kernel MSI irq routing framework. Both these
features are inspired from the kvm implementation and all credits to kvm
developers.

Patches 1-11 are preparatory patches for enabling irqfd/ioeventfd. Hyper-v
features like ports, connections, doorbell etc needs to be enabled to
implement irqfd and ioeventfd feature.

Patches 12-15 implements irqfd and ioeventfd, and 16 and 17 implements the
in-kernel MSI routing framework.

This patchset is rebased on Nuno's root partition ioctl interface series:
https://lkml.org/lkml/2021/5/28/820
---

Vineeth Pillai (17):
hyperv: Few TLFS definitions
drivers: hv: vmbus: Use TLFS definition for VMBUS_MESSAGE_SINT
acpi: export node_to_pxm
hyperv: Wrapper for setting proximity_domain_info
mshv: SynIC event ring and event flags support
mshv: SynIC port and connection hypercalls
hyperv: Configure SINT for Doorbell
mshv: Port id management
mshv: Doorbell handler in hypercall ISR
mshv: Doorbell register/unregister API
mshv: HvClearVirtualInterrupt hypercall
mshv: Add irqfd support for mshv
mshv: Add ioeventfd support for mshv
mshv: Notifier framework for EOI for level triggered interrupts
mshv: Level-triggered interrupt support for irqfd
mshv: User space controlled MSI irq routing for mshv
mshv: Use in kernel MSI routing for irqfd

arch/x86/hyperv/hv_init.c | 32 +-
arch/x86/hyperv/hv_proc.c | 15 +-
arch/x86/include/asm/hyperv-tlfs.h | 2 +
arch/x86/include/asm/mshyperv.h | 2 +
arch/x86/include/uapi/asm/hyperv-tlfs.h | 2 +
drivers/acpi/numa/srat.c | 1 +
drivers/hv/Kconfig | 1 +
drivers/hv/Makefile | 3 +-
drivers/hv/hv_call.c | 181 ++++++
drivers/hv/hv_eventfd.c | 723 ++++++++++++++++++++++++
drivers/hv/hv_portid_table.c | 83 +++
drivers/hv/hv_synic.c | 383 +++++++++++--
drivers/hv/hyperv_vmbus.h | 2 +-
drivers/hv/mshv.h | 52 ++
drivers/hv/mshv_main.c | 96 +++-
drivers/hv/mshv_msi.c | 128 +++++
include/asm-generic/hyperv-tlfs.h | 106 +++-
include/asm-generic/mshyperv.h | 14 +
include/linux/hyperv.h | 9 -
include/linux/mshv.h | 65 ++-
include/linux/mshv_eventfd.h | 78 +++
include/uapi/asm-generic/hyperv-tlfs.h | 81 +++
include/uapi/linux/mshv.h | 48 ++
23 files changed, 2043 insertions(+), 64 deletions(-)
create mode 100644 drivers/hv/hv_eventfd.c
create mode 100644 drivers/hv/hv_portid_table.c
create mode 100644 drivers/hv/mshv_msi.c
create mode 100644 include/linux/mshv_eventfd.h

--
2.25.1


2021-06-02 17:23:33

by Vineeth Pillai

[permalink] [raw]
Subject: [PATCH 09/17] mshv: Doorbell handler in hypercall ISR

Doorbell is a mechanism by which a parent partition can register for
notification if a specified mmio address is touched by a child partition.
Parent partition can setup the notification by specifying mmio address,
size of the data written(1/2/4/8 bytes) and optionally the data as well.

Setup doorbell signal to be delivered by intercept interrupt and handle
the doorbell signal.

Signed-off-by: Vineeth Pillai <[email protected]>
---
arch/x86/hyperv/hv_init.c | 32 ++++-
arch/x86/include/asm/mshyperv.h | 2 +
drivers/hv/hv_synic.c | 175 +++++++++++++++++++++----
drivers/hv/mshv.h | 2 +-
include/uapi/asm-generic/hyperv-tlfs.h | 4 +
5 files changed, 186 insertions(+), 29 deletions(-)

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 722bafdb2225..c295ccfdffd7 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -51,6 +51,16 @@ EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
void __percpu **hyperv_pcpu_output_arg;
EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);

+/*
+ * Per-cpu array holding the tail pointer for the SynIC event ring buffer
+ * for each SINT.
+ *
+ * We cannot maintain this in mshv driver because the tail pointer should
+ * persist even if the mshv driver is unloaded.
+ */
+u8 __percpu **hv_synic_eventring_tail;
+EXPORT_SYMBOL_GPL(hv_synic_eventring_tail);
+
u32 hv_max_vp_index;
EXPORT_SYMBOL_GPL(hv_max_vp_index);

@@ -58,11 +68,13 @@ static int hv_cpu_init(unsigned int cpu)
{
u64 msr_vp_index;
struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
+ unsigned int order = hv_root_partition ? 1 : 0;
+ u8 **synic_eventring_tail;
void **input_arg;
struct page *pg;

/* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
- pg = alloc_pages(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL, hv_root_partition ? 1 : 0);
+ pg = alloc_pages(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL, order);
if (unlikely(!pg))
return -ENOMEM;

@@ -73,6 +85,14 @@ static int hv_cpu_init(unsigned int cpu)

output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
*output_arg = page_address(pg + 1);
+
+ synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
+ *synic_eventring_tail = kcalloc(HV_SYNIC_SINT_COUNT, sizeof(u8),
+ irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL);
+ if (unlikely(!*synic_eventring_tail)) {
+ __free_pages(pg, order);
+ return -ENOMEM;
+ }
}

msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
@@ -197,6 +217,7 @@ EXPORT_SYMBOL_GPL(clear_hv_tscchange_cb);
static int hv_cpu_die(unsigned int cpu)
{
struct hv_reenlightenment_control re_ctrl;
+ u8 **synic_eventring_tail;
unsigned int new_cpu;
unsigned long flags;
void **input_arg;
@@ -212,6 +233,10 @@ static int hv_cpu_die(unsigned int cpu)

output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
*output_arg = NULL;
+
+ synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
+ kfree(*synic_eventring_tail);
+ *synic_eventring_tail = NULL;
}

local_irq_restore(flags);
@@ -390,10 +415,13 @@ void __init hyperv_init(void)

BUG_ON(hyperv_pcpu_input_arg == NULL);

- /* Allocate the per-CPU state for output arg for root */
if (hv_root_partition) {
+ /* Allocate the per-CPU state for output arg for root */
hyperv_pcpu_output_arg = alloc_percpu(void *);
BUG_ON(hyperv_pcpu_output_arg == NULL);
+
+ hv_synic_eventring_tail = alloc_percpu(u8 *);
+ BUG_ON(hv_synic_eventring_tail == NULL);
}

/* Allocate percpu VP index */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index c6eb01f3864d..f780ec35ac44 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -39,6 +39,8 @@ extern void *hv_hypercall_pg;
extern void __percpu **hyperv_pcpu_input_arg;
extern void __percpu **hyperv_pcpu_output_arg;

+extern u8 __percpu **hv_synic_eventring_tail;
+
extern u64 hv_current_partition_id;

int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
diff --git a/drivers/hv/hv_synic.c b/drivers/hv/hv_synic.c
index 6a00c66edc3f..e3262f6d3daa 100644
--- a/drivers/hv/hv_synic.c
+++ b/drivers/hv/hv_synic.c
@@ -17,34 +17,124 @@

#include "mshv.h"

-void mshv_isr(void)
+u32
+synic_event_ring_get_queued_port(u32 sint_index)
{
- struct hv_synic_pages *spages = this_cpu_ptr(mshv.synic_pages);
- struct hv_message_page **msg_page = &spages->synic_message_page;
- struct hv_message *msg;
- u32 message_type;
- struct mshv_partition *partition;
- struct mshv_vp *vp;
- u64 partition_id;
- u32 vp_index;
- int i;
- unsigned long flags;
- struct task_struct *task;
-
- if (unlikely(!(*msg_page))) {
- pr_err("%s: Missing synic page!\n", __func__);
- return;
+ struct hv_synic_event_ring_page **event_ring_page;
+ volatile struct hv_synic_event_ring *ring;
+ struct hv_synic_pages *spages;
+ u8 **synic_eventring_tail;
+ u32 message;
+ u8 tail;
+
+ spages = this_cpu_ptr(mshv.synic_pages);
+ event_ring_page = &spages->synic_event_ring_page;
+ synic_eventring_tail = (u8 **)this_cpu_ptr(hv_synic_eventring_tail);
+ tail = (*synic_eventring_tail)[sint_index];
+
+ if (unlikely(!(*event_ring_page))) {
+ pr_err("%s: Missing synic event ring page!\n", __func__);
+ return 0;
}

- msg = &((*msg_page)->sint_message[HV_SYNIC_INTERCEPTION_SINT_INDEX]);
+ ring = &(*event_ring_page)->sint_event_ring[sint_index];

/*
- * If the type isn't set, there isn't really a message;
- * it may be some other hyperv interrupt
+ * Get the message.
*/
- message_type = msg->header.message_type;
- if (message_type == HVMSG_NONE)
- return;
+ message = ring->data[tail];
+
+ if (!message) {
+ if (ring->ring_full) {
+ /*
+ * Ring is marked full, but we would have consumed all
+ * the messages. Notify the hypervisor that ring is now
+ * empty and check again.
+ */
+ ring->ring_full = 0;
+ hv_call_notify_port_ring_empty(sint_index);
+ message = ring->data[tail];
+ }
+
+ if (!message) {
+ ring->signal_masked = 0;
+ /*
+ * Unmask the signal and sync with hypervisor
+ * before one last check for any message.
+ */
+ mb();
+ message = ring->data[tail];
+
+ /*
+ * Ok, lets bail out.
+ */
+ if (!message)
+ return 0;
+ }
+
+ ring->signal_masked = 1;
+
+ }
+
+ /*
+ * Clear the message in the ring buffer.
+ */
+ ring->data[tail] = 0;
+
+ if (++tail == HV_SYNIC_EVENT_RING_MESSAGE_COUNT)
+ tail = 0;
+
+ (*synic_eventring_tail)[sint_index] = tail;
+
+ return message;
+}
+
+static bool
+mshv_doorbell_isr(struct hv_message *msg)
+{
+ struct hv_notification_message_payload *notification;
+ u32 port;
+
+ if (msg->header.message_type != HVMSG_SYNIC_SINT_INTERCEPT)
+ return false;
+
+ notification = (struct hv_notification_message_payload *)msg->u.payload;
+ if (notification->sint_index != HV_SYNIC_DOORBELL_SINT_INDEX)
+ return false;
+
+ while ((port = synic_event_ring_get_queued_port(
+ HV_SYNIC_DOORBELL_SINT_INDEX))) {
+ struct port_table_info ptinfo = { 0 };
+
+ if (hv_portid_lookup(port, &ptinfo)) {
+ pr_err("Failed to get port information from port_table!\n");
+ continue;
+ }
+
+ if (ptinfo.port_type != HV_PORT_TYPE_DOORBELL) {
+ pr_warn("Not a doorbell port!, port: %d, port_type: %d\n",
+ port, ptinfo.port_type);
+ continue;
+ }
+
+ /* Invoke the callback */
+ ptinfo.port_doorbell.doorbell_cb(port, ptinfo.port_doorbell.data);
+ }
+
+ return true;
+}
+
+static bool
+mshv_intercept_isr(struct hv_message *msg)
+{
+ struct mshv_partition *partition;
+ struct task_struct *task;
+ bool handled = false;
+ unsigned long flags;
+ struct mshv_vp *vp;
+ u64 partition_id;
+ u32 vp_index;
+ int i;

/* Look for the partition */
partition_id = msg->header.sender;
@@ -102,14 +192,47 @@ void mshv_isr(void)
*/
wake_up_process(task);

+ handled = true;
+
unlock_out:
spin_unlock_irqrestore(&mshv.partitions.lock, flags);

- /* Acknowledge message with hypervisor */
- msg->header.message_type = HVMSG_NONE;
- wrmsrl(HV_X64_MSR_EOM, 0);
+ return handled;
+}
+
+void mshv_isr(void)
+{
+ struct hv_synic_pages *spages = this_cpu_ptr(mshv.synic_pages);
+ struct hv_message_page **msg_page = &spages->synic_message_page;
+ struct hv_message *msg;
+ bool handled;
+
+ if (unlikely(!(*msg_page))) {
+ pr_err("%s: Missing synic page!\n", __func__);
+ return;
+ }
+
+ msg = &((*msg_page)->sint_message[HV_SYNIC_INTERCEPTION_SINT_INDEX]);
+
+ /*
+ * If the type isn't set, there isn't really a message;
+ * it may be some other hyperv interrupt
+ */
+ if (msg->header.message_type == HVMSG_NONE)
+ return;

- add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
+ handled = mshv_doorbell_isr(msg);
+
+ if (!handled)
+ handled = mshv_intercept_isr(msg);
+
+ if (handled) {
+ /* Acknowledge message with hypervisor */
+ msg->header.message_type = HVMSG_NONE;
+ wrmsrl(HV_X64_MSR_EOM, 0);
+
+ add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
+ }
}

static inline bool hv_recommend_using_aeoi(void)
diff --git a/drivers/hv/mshv.h b/drivers/hv/mshv.h
index ff5dc02cd8b6..07b0e7865a4c 100644
--- a/drivers/hv/mshv.h
+++ b/drivers/hv/mshv.h
@@ -44,7 +44,7 @@ int mshv_synic_cleanup(unsigned int cpu);
* NOTE: This is called in interrupt context. Callback
* should defer slow and sleeping logic to later.
*/
-typedef void (*doorbell_cb_t) (void *);
+typedef void (*doorbell_cb_t) (int doorbell_id, void *);

/*
* port table information
diff --git a/include/uapi/asm-generic/hyperv-tlfs.h b/include/uapi/asm-generic/hyperv-tlfs.h
index 2031115c6cce..ef91b11a12cf 100644
--- a/include/uapi/asm-generic/hyperv-tlfs.h
+++ b/include/uapi/asm-generic/hyperv-tlfs.h
@@ -31,6 +31,10 @@ enum hv_message_type {
/* Trace buffer complete messages. */
HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040,

+ /* SynIC intercepts */
+ HVMSG_SYNIC_EVENT_INTERCEPT = 0x80000060,
+ HVMSG_SYNIC_SINT_INTERCEPT = 0x80000061,
+
/* Platform-specific processor intercept messages. */
HVMSG_X64_IO_PORT_INTERCEPT = 0x80010000,
HVMSG_X64_MSR_INTERCEPT = 0x80010001,
--
2.25.1

2021-06-02 17:23:34

by Vineeth Pillai

[permalink] [raw]
Subject: [PATCH 11/17] mshv: HvClearVirtualInterrupt hypercall

For HvX64InterruptTypeExtInt interrupts, before asserting a new
interrupt, previous inteerupt's acknowledgment should be cleared
by this hypercall HvClearVirtualInterrupt. (TLFS 10.3.2)

This is to be used in a later patch in this series.

Signed-off-by: Vineeth Pillai <[email protected]>
---
drivers/hv/hv_call.c | 20 ++++++++++++++++++++
drivers/hv/mshv.h | 1 +
include/asm-generic/hyperv-tlfs.h | 1 +
3 files changed, 22 insertions(+)

diff --git a/drivers/hv/hv_call.c b/drivers/hv/hv_call.c
index 57db3a8ac94a..d5cdbe4e93da 100644
--- a/drivers/hv/hv_call.c
+++ b/drivers/hv/hv_call.c
@@ -743,6 +743,26 @@ int hv_call_translate_virtual_address(
}


+int
+hv_call_clear_virtual_interrupt(u64 partition_id)
+{
+ unsigned long flags;
+ int status;
+
+ local_irq_save(flags);
+ status = hv_do_fast_hypercall8(HVCALL_CLEAR_VIRTUAL_INTERRUPT,
+ partition_id) &
+ HV_HYPERCALL_RESULT_MASK;
+ local_irq_restore(flags);
+
+ if (status != HV_STATUS_SUCCESS) {
+ pr_err("%s: %s\n", __func__, hv_status_to_string(status));
+ return -hv_status_to_errno(status);
+ }
+
+ return 0;
+}
+
int
hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
u64 connection_partition_id,
diff --git a/drivers/hv/mshv.h b/drivers/hv/mshv.h
index 76cd00fd4b3f..404807c98512 100644
--- a/drivers/hv/mshv.h
+++ b/drivers/hv/mshv.h
@@ -117,6 +117,7 @@ int hv_call_assert_virtual_interrupt(
u32 vector,
u64 dest_addr,
union hv_interrupt_control control);
+int hv_call_clear_virtual_interrupt(u64 partition_id);
int hv_call_get_vp_state(
u32 vp_index,
u64 partition_id,
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index 3ed4f532ed57..693d41192e9e 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -159,6 +159,7 @@ struct ms_hyperv_tsc_page {
#define HVCALL_GET_VP_REGISTERS 0x0050
#define HVCALL_SET_VP_REGISTERS 0x0051
#define HVCALL_TRANSLATE_VIRTUAL_ADDRESS 0x0052
+#define HVCALL_CLEAR_VIRTUAL_INTERRUPT 0x0056
#define HVCALL_DELETE_PORT 0x0058
#define HVCALL_DISCONNECT_PORT 0x005b
#define HVCALL_POST_MESSAGE 0x005c
--
2.25.1

2021-06-02 17:23:39

by Vineeth Pillai

[permalink] [raw]
Subject: [PATCH 03/17] acpi: export node_to_pxm

This is needed in the next patch in the series for a
code refactor. No intended change in functionality.

Signed-off-by: Vineeth Pillai <[email protected]>
---
drivers/acpi/numa/srat.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c
index 6021a1013442..c9e610b4b642 100644
--- a/drivers/acpi/numa/srat.c
+++ b/drivers/acpi/numa/srat.c
@@ -48,6 +48,7 @@ int node_to_pxm(int node)
return PXM_INVAL;
return node_to_pxm_map[node];
}
+EXPORT_SYMBOL(node_to_pxm);

static void __acpi_map_pxm_to_node(int pxm, int node)
{
--
2.25.1

2021-06-02 17:23:48

by Vineeth Pillai

[permalink] [raw]
Subject: [PATCH 13/17] mshv: Add ioeventfd support for mshv

ioeventfd is a mechanism to register PIO/MMIO regions to trigger an
eventfd signal when written to by a guest. Host userspace can register
any arbitrary IO address with a corresponding eventfd and then pass the
eventfd to a specific end-point of interest for handling.

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d34e6b175e61821026893ec5298cc8e7558df43a

Basic framework code is taken from kvm implementation. Credit goes to
kvm irqfd/ioeventfd developers.

Signed-off-by: Vineeth Pillai <[email protected]>
---
drivers/hv/hv_eventfd.c | 248 +++++++++++++++++++++++++++++++++--
drivers/hv/mshv_main.c | 20 ++-
include/linux/mshv.h | 4 +
include/linux/mshv_eventfd.h | 21 ++-
include/uapi/linux/mshv.h | 26 +++-
5 files changed, 300 insertions(+), 19 deletions(-)

diff --git a/drivers/hv/hv_eventfd.c b/drivers/hv/hv_eventfd.c
index 11fcafd1df08..5ed77901fb0b 100644
--- a/drivers/hv/hv_eventfd.c
+++ b/drivers/hv/hv_eventfd.c
@@ -2,8 +2,8 @@
/*
* eventfd support for mshv
*
- * Heavily inspired from KVM implementation of irqfd. The basic framework
- * code is taken from the kvm implementation.
+ * Heavily inspired from KVM implementation of irqfd/ioeventfd. The basic
+ * framework code is taken from the kvm implementation.
*
* All credits to kvm developers.
*/
@@ -210,13 +210,6 @@ mshv_irqfd_assign(struct mshv_partition *partition,
return ret;
}

-void
-mshv_irqfd_init(struct mshv_partition *partition)
-{
- spin_lock_init(&partition->irqfds.lock);
- INIT_LIST_HEAD(&partition->irqfds.items);
-}
-
/*
* shutdown any irqfd's that match fd+gsi
*/
@@ -261,10 +254,10 @@ mshv_irqfd(struct mshv_partition *partition, struct mshv_irqfd *args)
}

/*
- * This function is called as the mshv VM fd is being released. Shutdown all
- * irqfds that still remain open
+ * This function is called as the mshv VM fd is being released.
+ * Shutdown all irqfds that still remain open
*/
-void
+static void
mshv_irqfd_release(struct mshv_partition *partition)
{
struct mshv_kernel_irqfd *irqfd, *tmp;
@@ -297,3 +290,234 @@ void mshv_irqfd_wq_cleanup(void)
{
destroy_workqueue(irqfd_cleanup_wq);
}
+
+/*
+ * --------------------------------------------------------------------
+ * ioeventfd: translate a MMIO memory write to an eventfd signal.
+ *
+ * userspace can register a MMIO address with an eventfd for receiving
+ * notification when the memory has been touched.
+ *
+ * TODO: Implement eventfd for PIO as well.
+ * --------------------------------------------------------------------
+ */
+
+static void
+ioeventfd_release(struct kernel_mshv_ioeventfd *p, u64 partition_id)
+{
+ if (p->doorbell_id > 0)
+ hv_unregister_doorbell(partition_id, p->doorbell_id);
+ eventfd_ctx_put(p->eventfd);
+ list_del(&p->list);
+ kfree(p);
+}
+
+/* MMIO writes trigger an event if the addr/val match */
+static void
+ioeventfd_mmio_write(int doorbell_id, void *data)
+{
+ struct mshv_partition *partition = (struct mshv_partition *)data;
+ struct kernel_mshv_ioeventfd *p;
+ unsigned long flags;
+
+ spin_lock_irqsave(&partition->ioeventfds.lock, flags);
+ list_for_each_entry(p, &partition->ioeventfds.items, list) {
+ if (p->doorbell_id == doorbell_id) {
+ eventfd_signal(p->eventfd, 1);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&partition->ioeventfds.lock, flags);
+}
+
+static bool
+ioeventfd_check_collision(struct mshv_partition *partition,
+ struct kernel_mshv_ioeventfd *p)
+{
+ struct kernel_mshv_ioeventfd *_p;
+
+ list_for_each_entry(_p, &partition->ioeventfds.items, list)
+ if (_p->addr == p->addr && _p->length == p->length &&
+ (_p->wildcard || p->wildcard ||
+ _p->datamatch == p->datamatch))
+ return true;
+
+ return false;
+}
+
+static int
+mshv_assign_ioeventfd(struct mshv_partition *partition,
+ struct mshv_ioeventfd *args)
+{
+ struct kernel_mshv_ioeventfd *p;
+ struct eventfd_ctx *eventfd;
+ u64 doorbell_flags = 0;
+ unsigned long irqflags;
+ int ret;
+
+ if (args->flags & MSHV_IOEVENTFD_FLAG_PIO)
+ return -EOPNOTSUPP;
+
+ /* must be natural-word sized */
+ switch (args->len) {
+ case 0:
+ doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_ANY;
+ break;
+ case 1:
+ doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_BYTE;
+ break;
+ case 2:
+ doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_WORD;
+ break;
+ case 4:
+ doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_DWORD;
+ break;
+ case 8:
+ doorbell_flags = HV_DOORBELL_FLAG_TRIGGER_SIZE_QWORD;
+ break;
+ default:
+ pr_warn("ioeventfd: invalid length specified\n");
+ return -EINVAL;
+ }
+
+ /* check for range overflow */
+ if (args->addr + args->len < args->addr)
+ return -EINVAL;
+
+ /* check for extra flags that we don't understand */
+ if (args->flags & ~MSHV_IOEVENTFD_VALID_FLAG_MASK)
+ return -EINVAL;
+
+ eventfd = eventfd_ctx_fdget(args->fd);
+ if (IS_ERR(eventfd))
+ return PTR_ERR(eventfd);
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ INIT_LIST_HEAD(&p->list);
+ p->addr = args->addr;
+ p->length = args->len;
+ p->eventfd = eventfd;
+
+ /* The datamatch feature is optional, otherwise this is a wildcard */
+ if (args->flags & MSHV_IOEVENTFD_FLAG_DATAMATCH)
+ p->datamatch = args->datamatch;
+ else {
+ p->wildcard = true;
+ doorbell_flags |= HV_DOORBELL_FLAG_TRIGGER_ANY_VALUE;
+ }
+
+ spin_lock_irqsave(&partition->ioeventfds.lock, irqflags);
+
+ if (ioeventfd_check_collision(partition, p)) {
+ ret = -EEXIST;
+ goto unlock_fail;
+ }
+
+ ret = hv_register_doorbell(partition->id, ioeventfd_mmio_write,
+ (void *)partition, p->addr,
+ p->datamatch, doorbell_flags);
+ if (ret < 0) {
+ pr_err("Failed to register ioeventfd doorbell!\n");
+ goto unlock_fail;
+ }
+
+ p->doorbell_id = ret;
+ list_add_tail(&p->list, &partition->ioeventfds.items);
+
+ spin_unlock_irqrestore(&partition->ioeventfds.lock, irqflags);
+
+ return 0;
+
+unlock_fail:
+ spin_unlock_irqrestore(&partition->ioeventfds.lock, irqflags);
+
+ kfree(p);
+
+fail:
+ eventfd_ctx_put(eventfd);
+
+ return ret;
+}
+
+static int
+mshv_deassign_ioeventfd(struct mshv_partition *partition,
+ struct mshv_ioeventfd *args)
+{
+ struct kernel_mshv_ioeventfd *p, *tmp;
+ struct eventfd_ctx *eventfd;
+ unsigned long flags;
+ int ret = -ENOENT;
+
+ eventfd = eventfd_ctx_fdget(args->fd);
+ if (IS_ERR(eventfd))
+ return PTR_ERR(eventfd);
+
+ spin_lock_irqsave(&partition->ioeventfds.lock, flags);
+
+ list_for_each_entry_safe(p, tmp, &partition->ioeventfds.items, list) {
+ bool wildcard = !(args->flags & MSHV_IOEVENTFD_FLAG_DATAMATCH);
+
+ if (p->eventfd != eventfd ||
+ p->addr != args->addr ||
+ p->length != args->len ||
+ p->wildcard != wildcard)
+ continue;
+
+ if (!p->wildcard && p->datamatch != args->datamatch)
+ continue;
+
+ ioeventfd_release(p, partition->id);
+ ret = 0;
+ break;
+ }
+
+ spin_unlock_irqrestore(&partition->ioeventfds.lock, flags);
+
+ eventfd_ctx_put(eventfd);
+
+ return ret;
+}
+
+int
+mshv_ioeventfd(struct mshv_partition *partition,
+ struct mshv_ioeventfd *args)
+{
+ /* PIO not yet implemented */
+ if (args->flags & MSHV_IOEVENTFD_FLAG_PIO)
+ return -EOPNOTSUPP;
+
+ if (args->flags & MSHV_IOEVENTFD_FLAG_DEASSIGN)
+ return mshv_deassign_ioeventfd(partition, args);
+
+ return mshv_assign_ioeventfd(partition, args);
+}
+
+void
+mshv_eventfd_init(struct mshv_partition *partition)
+{
+ spin_lock_init(&partition->irqfds.lock);
+ INIT_LIST_HEAD(&partition->irqfds.items);
+
+ spin_lock_init(&partition->ioeventfds.lock);
+ INIT_LIST_HEAD(&partition->ioeventfds.items);
+}
+
+void
+mshv_eventfd_release(struct mshv_partition *partition)
+{
+ struct kernel_mshv_ioeventfd *p, *tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&partition->ioeventfds.lock, flags);
+ list_for_each_entry_safe(p, tmp, &partition->ioeventfds.items, list) {
+ ioeventfd_release(p, partition->id);
+ }
+ spin_unlock_irqrestore(&partition->ioeventfds.lock, flags);
+
+ mshv_irqfd_release(partition);
+}
diff --git a/drivers/hv/mshv_main.c b/drivers/hv/mshv_main.c
index e124119e65eb..e1caecd27f09 100644
--- a/drivers/hv/mshv_main.c
+++ b/drivers/hv/mshv_main.c
@@ -828,6 +828,18 @@ mshv_partition_ioctl_assert_interrupt(struct mshv_partition *partition,
args.control);
}

+static long
+mshv_partition_ioctl_ioeventfd(struct mshv_partition *partition,
+ void __user *user_args)
+{
+ struct mshv_ioeventfd args;
+
+ if (copy_from_user(&args, user_args, sizeof(args)))
+ return -EFAULT;
+
+ return mshv_ioeventfd(partition, &args);
+}
+
static long
mshv_partition_ioctl_irqfd(struct mshv_partition *partition,
void __user *user_args)
@@ -882,6 +894,10 @@ mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
ret = mshv_partition_ioctl_irqfd(partition,
(void __user *)arg);
break;
+ case MSHV_IOEVENTFD:
+ ret = mshv_partition_ioctl_ioeventfd(partition,
+ (void __user *)arg);
+ break;
default:
ret = -ENOTTY;
}
@@ -972,7 +988,7 @@ mshv_partition_release(struct inode *inode, struct file *filp)
{
struct mshv_partition *partition = filp->private_data;

- mshv_irqfd_release(partition);
+ mshv_eventfd_release(partition);

mshv_partition_put(partition);

@@ -1068,7 +1084,7 @@ mshv_ioctl_create_partition(void __user *user_arg)

fd_install(fd, file);

- mshv_irqfd_init(partition);
+ mshv_eventfd_init(partition);

return fd;

diff --git a/include/linux/mshv.h b/include/linux/mshv.h
index 5707c457b72d..217c91725828 100644
--- a/include/linux/mshv.h
+++ b/include/linux/mshv.h
@@ -52,6 +52,10 @@ struct mshv_partition {
spinlock_t lock;
struct list_head items;
} irqfds;
+ struct {
+ spinlock_t lock;
+ struct list_head items;
+ } ioeventfds;
};

struct mshv_lapic_irq {
diff --git a/include/linux/mshv_eventfd.h b/include/linux/mshv_eventfd.h
index 3e7b16d0717f..fd0012f72616 100644
--- a/include/linux/mshv_eventfd.h
+++ b/include/linux/mshv_eventfd.h
@@ -1,7 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
*
- * irqfd: Allows an fd to be used to inject an interrupt to the guest
+ * irqfd: Allows an fd to be used to inject an interrupt to the guest.
+ * ioeventfd: Allow an fd to be used to receive a signal from the guest.
* All credit goes to kvm developers.
*/

@@ -11,6 +12,9 @@
#include <linux/mshv.h>
#include <linux/poll.h>

+void mshv_eventfd_init(struct mshv_partition *partition);
+void mshv_eventfd_release(struct mshv_partition *partition);
+
struct mshv_kernel_irqfd {
struct mshv_partition *partition;
struct eventfd_ctx *eventfd;
@@ -26,10 +30,19 @@ struct mshv_kernel_irqfd {
int mshv_irqfd(struct mshv_partition *partition,
struct mshv_irqfd *args);

-void mshv_irqfd_init(struct mshv_partition *partition);
-void mshv_irqfd_release(struct mshv_partition *partition);
-
int mshv_irqfd_wq_init(void);
void mshv_irqfd_wq_cleanup(void);

+struct kernel_mshv_ioeventfd {
+ struct list_head list;
+ u64 addr;
+ int length;
+ struct eventfd_ctx *eventfd;
+ u64 datamatch;
+ int doorbell_id;
+ bool wildcard;
+};
+
+int mshv_ioeventfd(struct mshv_partition *kvm, struct mshv_ioeventfd *args);
+
#endif /* __LINUX_MSHV_EVENTFD_H */
diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
index 792844276134..e32dee679360 100644
--- a/include/uapi/linux/mshv.h
+++ b/include/uapi/linux/mshv.h
@@ -93,6 +93,29 @@ struct mshv_irqfd {
__u8 pad[2];
};

+enum {
+ mshv_ioeventfd_flag_nr_datamatch,
+ mshv_ioeventfd_flag_nr_pio,
+ mshv_ioeventfd_flag_nr_deassign,
+ mshv_ioeventfd_flag_nr_max,
+};
+
+#define MSHV_IOEVENTFD_FLAG_DATAMATCH (1 << mshv_ioeventfd_flag_nr_datamatch)
+#define MSHV_IOEVENTFD_FLAG_PIO (1 << mshv_ioeventfd_flag_nr_pio)
+#define MSHV_IOEVENTFD_FLAG_DEASSIGN (1 << mshv_ioeventfd_flag_nr_deassign)
+
+#define MSHV_IOEVENTFD_VALID_FLAG_MASK ((1 << mshv_ioeventfd_flag_nr_max) - 1)
+
+struct mshv_ioeventfd {
+ __u64 datamatch;
+ __u64 addr; /* legal pio/mmio address */
+ __u32 len; /* 1, 2, 4, or 8 bytes */
+ __s32 fd;
+ __u32 flags;
+ __u8 pad[4];
+};
+
+
#define MSHV_IOCTL 0xB8

/* mshv device */
@@ -109,7 +132,8 @@ struct mshv_irqfd {
_IOW(MSHV_IOCTL, 0xC, struct mshv_partition_property)
#define MSHV_GET_PARTITION_PROPERTY \
_IOWR(MSHV_IOCTL, 0xD, struct mshv_partition_property)
-#define MSHV_IRQFD _IOW(MSHV_IOCTL, 0xE, struct mshv_irqfd)
+#define MSHV_IRQFD _IOW(MSHV_IOCTL, 0xE, struct mshv_irqfd)
+#define MSHV_IOEVENTFD _IOW(MSHV_IOCTL, 0xF, struct mshv_ioeventfd)

/* vp device */
#define MSHV_GET_VP_REGISTERS _IOWR(MSHV_IOCTL, 0x05, struct mshv_vp_registers)
--
2.25.1

2021-06-02 17:24:22

by Vineeth Pillai

[permalink] [raw]
Subject: [PATCH 17/17] mshv: Use in kernel MSI routing for irqfd

Use the in-kernel routing information for irqfd interrupts.

Signed-off-by: Vineeth Pillai <[email protected]>
---
drivers/hv/hv_eventfd.c | 94 +++++++++++++++++++++++++++---------
drivers/hv/mshv_msi.c | 1 +
include/linux/mshv.h | 2 +
include/linux/mshv_eventfd.h | 26 +++++-----
include/uapi/linux/mshv.h | 6 ---
5 files changed, 87 insertions(+), 42 deletions(-)

diff --git a/drivers/hv/hv_eventfd.c b/drivers/hv/hv_eventfd.c
index 6404624b3bc6..729b5f67aa9a 100644
--- a/drivers/hv/hv_eventfd.c
+++ b/drivers/hv/hv_eventfd.c
@@ -88,13 +88,32 @@ irqfd_resampler_ack(struct mshv_irq_ack_notifier *mian)
static void
irqfd_inject(struct mshv_kernel_irqfd *irqfd)
{
+ struct mshv_partition *partition = irqfd->partition;
struct mshv_lapic_irq *irq = &irqfd->lapic_irq;
+ unsigned int seq;
+ int idx;

WARN_ON(irqfd->resampler &&
!irq->control.level_triggered);
+
+ idx = srcu_read_lock(&partition->irq_srcu);
+ if (irqfd->msi_entry.gsi) {
+ if (!irqfd->msi_entry.entry_valid) {
+ pr_warn("Invalid routing info for gsi %u",
+ irqfd->msi_entry.gsi);
+ srcu_read_unlock(&partition->irq_srcu, idx);
+ return;
+ }
+
+ do {
+ seq = read_seqcount_begin(&irqfd->msi_entry_sc);
+ } while (read_seqcount_retry(&irqfd->msi_entry_sc, seq));
+ }
+
hv_call_assert_virtual_interrupt(irqfd->partition->id,
irq->vector, irq->apic_id,
irq->control);
+ srcu_read_unlock(&partition->irq_srcu, idx);
}

static void
@@ -206,6 +225,27 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode,
return 0;
}

+/* Must be called under irqfds.lock */
+static void irqfd_update(struct mshv_partition *partition,
+ struct mshv_kernel_irqfd *irqfd)
+{
+ write_seqcount_begin(&irqfd->msi_entry_sc);
+ irqfd->msi_entry = mshv_msi_map_gsi(partition, irqfd->gsi);
+ mshv_set_msi_irq(&irqfd->msi_entry, &irqfd->lapic_irq);
+ write_seqcount_end(&irqfd->msi_entry_sc);
+}
+
+void mshv_irqfd_routing_update(struct mshv_partition *partition)
+{
+ struct mshv_kernel_irqfd *irqfd;
+
+ spin_lock_irq(&partition->irqfds.lock);
+ list_for_each_entry(irqfd, &partition->irqfds.items, list) {
+ irqfd_update(partition, irqfd);
+ }
+ spin_unlock_irq(&partition->irqfds.lock);
+}
+
static void
irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
poll_table *pt)
@@ -221,29 +261,23 @@ static int
mshv_irqfd_assign(struct mshv_partition *partition,
struct mshv_irqfd *args)
{
+ struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
struct mshv_kernel_irqfd *irqfd, *tmp;
+ unsigned int events;
struct fd f;
- struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
int ret;
- unsigned int events;
+ int idx;

irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
if (!irqfd)
return -ENOMEM;

- if (args->flags & MSHV_IRQFD_FLAG_RESAMPLE &&
- !args->level_triggered)
- return -EINVAL;
-
irqfd->partition = partition;
irqfd->gsi = args->gsi;
- irqfd->lapic_irq.vector = args->vector;
- irqfd->lapic_irq.apic_id = args->apic_id;
- irqfd->lapic_irq.control.interrupt_type = args->interrupt_type;
- irqfd->lapic_irq.control.level_triggered = args->level_triggered;
- irqfd->lapic_irq.control.logical_dest_mode = args->logical_dest_mode;
INIT_LIST_HEAD(&irqfd->list);
INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
+ seqcount_spinlock_init(&irqfd->msi_entry_sc,
+ &partition->irqfds.lock);

f = fdget(args->fd);
if (!f.file) {
@@ -259,6 +293,31 @@ mshv_irqfd_assign(struct mshv_partition *partition,

irqfd->eventfd = eventfd;

+
+ spin_lock_irq(&partition->irqfds.lock);
+ idx = srcu_read_lock(&partition->irq_srcu);
+ irqfd_update(partition, irqfd);
+ srcu_read_unlock(&partition->irq_srcu, idx);
+
+ if (args->flags & MSHV_IRQFD_FLAG_RESAMPLE &&
+ !irqfd->lapic_irq.control.level_triggered) {
+ spin_unlock_irq(&partition->irqfds.lock);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ ret = 0;
+ list_for_each_entry(tmp, &partition->irqfds.items, list) {
+ if (irqfd->eventfd != tmp->eventfd)
+ continue;
+ /* This fd is used for another irq already. */
+ ret = -EBUSY;
+ spin_unlock_irq(&partition->irqfds.lock);
+ goto fail;
+ }
+ list_add_tail(&irqfd->list, &partition->irqfds.items);
+ spin_unlock_irq(&partition->irqfds.lock);
+
if (args->flags & MSHV_IRQFD_FLAG_RESAMPLE) {
struct mshv_kernel_irqfd_resampler *resampler;

@@ -314,19 +373,6 @@ mshv_irqfd_assign(struct mshv_partition *partition,
init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);

- spin_lock_irq(&partition->irqfds.lock);
- ret = 0;
- list_for_each_entry(tmp, &partition->irqfds.items, list) {
- if (irqfd->eventfd != tmp->eventfd)
- continue;
- /* This fd is used for another irq already. */
- ret = -EBUSY;
- spin_unlock_irq(&partition->irqfds.lock);
- goto fail;
- }
- list_add_tail(&irqfd->list, &partition->irqfds.items);
- spin_unlock_irq(&partition->irqfds.lock);
-
/*
* Check if there was an event already pending on the eventfd
* before we registered, and trigger it as if we didn't miss it.
diff --git a/drivers/hv/mshv_msi.c b/drivers/hv/mshv_msi.c
index ae25ed8dfef4..c5dc9c6fe4d5 100644
--- a/drivers/hv/mshv_msi.c
+++ b/drivers/hv/mshv_msi.c
@@ -71,6 +71,7 @@ int mshv_set_msi_routing(struct mshv_partition *partition,
spin_lock(&partition->irq_lock);
old = rcu_dereference_protected(partition->msi_routing, 1);
rcu_assign_pointer(partition->msi_routing, new);
+ mshv_irqfd_routing_update(partition);
spin_unlock(&partition->irq_lock);

synchronize_srcu_expedited(&partition->irq_srcu);
diff --git a/include/linux/mshv.h b/include/linux/mshv.h
index ec349be0ba91..fc655b60c5cd 100644
--- a/include/linux/mshv.h
+++ b/include/linux/mshv.h
@@ -104,6 +104,8 @@ struct mshv_kernel_msi_routing_entry mshv_msi_map_gsi(
void mshv_set_msi_irq(struct mshv_kernel_msi_routing_entry *e,
struct mshv_lapic_irq *irq);

+void mshv_irqfd_routing_update(struct mshv_partition *partition);
+
struct hv_synic_pages {
struct hv_message_page *synic_message_page;
struct hv_synic_event_flags_page *synic_event_flags_page;
diff --git a/include/linux/mshv_eventfd.h b/include/linux/mshv_eventfd.h
index fa5d46d2eb85..a24ca167f3e9 100644
--- a/include/linux/mshv_eventfd.h
+++ b/include/linux/mshv_eventfd.h
@@ -39,20 +39,22 @@ struct mshv_kernel_irqfd_resampler {
};

struct mshv_kernel_irqfd {
- struct mshv_partition *partition;
- struct eventfd_ctx *eventfd;
- u32 gsi;
- struct mshv_lapic_irq lapic_irq;
- struct list_head list;
- poll_table pt;
- wait_queue_head_t *wqh;
- wait_queue_entry_t wait;
- struct work_struct shutdown;
+ struct mshv_partition *partition;
+ struct eventfd_ctx *eventfd;
+ struct mshv_kernel_msi_routing_entry msi_entry;
+ seqcount_spinlock_t msi_entry_sc;
+ u32 gsi;
+ struct mshv_lapic_irq lapic_irq;
+ struct list_head list;
+ poll_table pt;
+ wait_queue_head_t *wqh;
+ wait_queue_entry_t wait;
+ struct work_struct shutdown;

/* Resampler related */
- struct mshv_kernel_irqfd_resampler *resampler;
- struct eventfd_ctx *resamplefd;
- struct list_head resampler_link;
+ struct mshv_kernel_irqfd_resampler *resampler;
+ struct eventfd_ctx *resamplefd;
+ struct list_head resampler_link;
};

int mshv_irqfd(struct mshv_partition *partition,
diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
index ac58f2ded79c..b5826120ba7c 100644
--- a/include/uapi/linux/mshv.h
+++ b/include/uapi/linux/mshv.h
@@ -83,16 +83,10 @@ struct mshv_translate_gva {
#define MSHV_IRQFD_FLAG_RESAMPLE (1 << 1)

struct mshv_irqfd {
- __u64 apic_id;
__s32 fd;
__s32 resamplefd;
__u32 gsi;
- __u32 vector;
- __u32 interrupt_type;
__u32 flags;
- __u8 level_triggered;
- __u8 logical_dest_mode;
- __u8 pad[6];
};

enum {
--
2.25.1

2021-06-02 17:24:23

by Vineeth Pillai

[permalink] [raw]
Subject: [PATCH 16/17] mshv: User space controlled MSI irq routing for mshv

Implementation of an in-kernel MSI irq routing mechanism for mshv.

Inspired from the KVM irq routing implementation but adapted
only for MSI interrupts.
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=399ec807ddc38ecccf8c06dbde04531cbdc63e11

All credit goes to kvm developers.

Signed-off-by: Vineeth Pillai <[email protected]>
---
drivers/hv/Makefile | 2 +-
drivers/hv/mshv_main.c | 34 ++++++++++
drivers/hv/mshv_msi.c | 127 ++++++++++++++++++++++++++++++++++++++
include/linux/mshv.h | 27 ++++++++
include/uapi/linux/mshv.h | 13 ++++
5 files changed, 202 insertions(+), 1 deletion(-)
create mode 100644 drivers/hv/mshv_msi.c

diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
index 5cb738c10a2d..370d126252ef 100644
--- a/drivers/hv/Makefile
+++ b/drivers/hv/Makefile
@@ -14,4 +14,4 @@ hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o
hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_fcopy.o hv_utils_transport.o

mshv-y += mshv_main.o hv_call.o hv_synic.o hv_portid_table.o \
- hv_eventfd.o
+ hv_eventfd.o mshv_msi.o
diff --git a/drivers/hv/mshv_main.c b/drivers/hv/mshv_main.c
index 0f083447c553..f7ca0f082b75 100644
--- a/drivers/hv/mshv_main.c
+++ b/drivers/hv/mshv_main.c
@@ -852,6 +852,35 @@ mshv_partition_ioctl_irqfd(struct mshv_partition *partition,
return mshv_irqfd(partition, &args);
}

+static long
+mshv_partition_ioctl_set_msi_routing(struct mshv_partition *partition,
+ void __user *user_args)
+{
+ struct mshv_msi_routing_entry *entries = NULL;
+ struct mshv_msi_routing args;
+ long ret;
+
+ if (copy_from_user(&args, user_args, sizeof(args)))
+ return -EFAULT;
+
+ if (args.nr > MSHV_MAX_MSI_ROUTES)
+ return -EINVAL;
+
+ if (args.nr) {
+ struct mshv_msi_routing __user *urouting = user_args;
+
+ entries = vmemdup_user(urouting->entries,
+ array_size(sizeof(*entries),
+ args.nr));
+ if (IS_ERR(entries))
+ return PTR_ERR(entries);
+ }
+ ret = mshv_set_msi_routing(partition, entries, args.nr);
+ kvfree(entries);
+
+ return ret;
+}
+
static long
mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
{
@@ -898,6 +927,10 @@ mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
ret = mshv_partition_ioctl_ioeventfd(partition,
(void __user *)arg);
break;
+ case MSHV_SET_MSI_ROUTING:
+ ret = mshv_partition_ioctl_set_msi_routing(partition,
+ (void __user *)arg);
+ break;
default:
ret = -ENOTTY;
}
@@ -965,6 +998,7 @@ destroy_partition(struct mshv_partition *partition)
vfree(region->pages);
}

+ mshv_free_msi_routing(partition);
kfree(partition);
}

diff --git a/drivers/hv/mshv_msi.c b/drivers/hv/mshv_msi.c
new file mode 100644
index 000000000000..ae25ed8dfef4
--- /dev/null
+++ b/drivers/hv/mshv_msi.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020, Microsoft Corporation.
+ *
+ * Authors:
+ * Vineeth Remanan Pillai <[email protected]>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/mshv.h>
+#include <linux/mshv_eventfd.h>
+#include <linux/hyperv.h>
+#include <asm/mshyperv.h>
+
+#include "mshv.h"
+
+MODULE_AUTHOR("Microsoft");
+MODULE_LICENSE("GPL");
+
+int mshv_set_msi_routing(struct mshv_partition *partition,
+ const struct mshv_msi_routing_entry *ue,
+ unsigned int nr)
+{
+ struct mshv_msi_routing_table *new = NULL, *old;
+ u32 i, nr_rt_entries = 0;
+ int r = 0;
+
+ if (nr == 0)
+ goto swap_routes;
+
+ for (i = 0; i < nr; i++) {
+ if (ue[i].gsi >= MSHV_MAX_MSI_ROUTES)
+ return -EINVAL;
+
+ if (ue[i].address_hi)
+ return -EINVAL;
+
+ nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
+ }
+ nr_rt_entries += 1;
+
+ new = kzalloc(struct_size(new, entries, nr_rt_entries),
+ GFP_KERNEL_ACCOUNT);
+ if (!new)
+ return -ENOMEM;
+
+ new->nr_rt_entries = nr_rt_entries;
+ for (i = 0; i < nr; i++) {
+ struct mshv_kernel_msi_routing_entry *e;
+
+ e = &new->entries[ue[i].gsi];
+
+ /*
+ * Allow only one to one mapping between GSI and MSI routing.
+ */
+ if (e->gsi != 0) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ e->gsi = ue[i].gsi;
+ e->address_lo = ue[i].address_lo;
+ e->address_hi = ue[i].address_hi;
+ e->data = ue[i].data;
+ e->entry_valid = true;
+ }
+
+swap_routes:
+ spin_lock(&partition->irq_lock);
+ old = rcu_dereference_protected(partition->msi_routing, 1);
+ rcu_assign_pointer(partition->msi_routing, new);
+ spin_unlock(&partition->irq_lock);
+
+ synchronize_srcu_expedited(&partition->irq_srcu);
+ new = old;
+
+out:
+ kfree(new);
+
+ return r;
+}
+
+void mshv_free_msi_routing(struct mshv_partition *partition)
+{
+ /*
+ * Called only during vm destruction.
+ * Nobody can use the pointer at this stage
+ */
+ struct mshv_msi_routing_table *rt = rcu_access_pointer(partition->msi_routing);
+
+ kfree(rt);
+}
+
+struct mshv_kernel_msi_routing_entry
+mshv_msi_map_gsi(struct mshv_partition *partition, u32 gsi)
+{
+ struct mshv_kernel_msi_routing_entry entry = { 0 };
+ struct mshv_msi_routing_table *msi_rt;
+
+ msi_rt = srcu_dereference_check(partition->msi_routing,
+ &partition->irq_srcu,
+ lockdep_is_held(&partition->irq_lock));
+ if (!msi_rt) {
+ pr_warn("No valid routing information found for gsi: %u\n",
+ gsi);
+ entry.gsi = gsi;
+ return entry;
+ }
+
+ return msi_rt->entries[gsi];
+}
+
+void mshv_set_msi_irq(struct mshv_kernel_msi_routing_entry *e,
+ struct mshv_lapic_irq *irq)
+{
+ memset(irq, 0, sizeof(*irq));
+ if (!e || !e->entry_valid)
+ return;
+
+ irq->vector = e->data & 0xFF;
+ irq->apic_id = (e->address_lo >> 12) & 0xFF;
+ irq->control.interrupt_type = (e->data & 0x700) >> 8;
+ irq->control.level_triggered = (e->data >> 15) & 0x1;
+ irq->control.logical_dest_mode = (e->address_lo >> 2) & 0x1;
+}
diff --git a/include/linux/mshv.h b/include/linux/mshv.h
index 5968b49b9c27..ec349be0ba91 100644
--- a/include/linux/mshv.h
+++ b/include/linux/mshv.h
@@ -69,6 +69,7 @@ struct mshv_partition {
spinlock_t lock;
struct list_head items;
} ioeventfds;
+ struct mshv_msi_routing_table __rcu *msi_routing;
};

struct mshv_lapic_irq {
@@ -77,6 +78,32 @@ struct mshv_lapic_irq {
union hv_interrupt_control control;
};

+#define MSHV_MAX_MSI_ROUTES 4096
+
+struct mshv_kernel_msi_routing_entry {
+ u32 entry_valid;
+ u32 gsi;
+ u32 address_lo;
+ u32 address_hi;
+ u32 data;
+};
+
+struct mshv_msi_routing_table {
+ u32 nr_rt_entries;
+ struct mshv_kernel_msi_routing_entry entries[];
+};
+
+int mshv_set_msi_routing(struct mshv_partition *partition,
+ const struct mshv_msi_routing_entry *entries,
+ unsigned int nr);
+void mshv_free_msi_routing(struct mshv_partition *partition);
+
+struct mshv_kernel_msi_routing_entry mshv_msi_map_gsi(
+ struct mshv_partition *partition, u32 gsi);
+
+void mshv_set_msi_irq(struct mshv_kernel_msi_routing_entry *e,
+ struct mshv_lapic_irq *irq);
+
struct hv_synic_pages {
struct hv_message_page *synic_message_page;
struct hv_synic_event_flags_page *synic_event_flags_page;
diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
index 008e68bde56d..ac58f2ded79c 100644
--- a/include/uapi/linux/mshv.h
+++ b/include/uapi/linux/mshv.h
@@ -117,6 +117,18 @@ struct mshv_ioeventfd {
__u8 pad[4];
};

+struct mshv_msi_routing_entry {
+ __u32 gsi;
+ __u32 address_lo;
+ __u32 address_hi;
+ __u32 data;
+};
+
+struct mshv_msi_routing {
+ __u32 nr;
+ __u32 pad;
+ struct mshv_msi_routing_entry entries[0];
+};

#define MSHV_IOCTL 0xB8

@@ -136,6 +148,7 @@ struct mshv_ioeventfd {
_IOWR(MSHV_IOCTL, 0xD, struct mshv_partition_property)
#define MSHV_IRQFD _IOW(MSHV_IOCTL, 0xE, struct mshv_irqfd)
#define MSHV_IOEVENTFD _IOW(MSHV_IOCTL, 0xF, struct mshv_ioeventfd)
+#define MSHV_SET_MSI_ROUTING _IOW(MSHV_IOCTL, 0x11, struct mshv_msi_routing)

/* vp device */
#define MSHV_GET_VP_REGISTERS _IOWR(MSHV_IOCTL, 0x05, struct mshv_vp_registers)
--
2.25.1

2021-06-02 17:24:28

by Vineeth Pillai

[permalink] [raw]
Subject: [PATCH 12/17] mshv: Add irqfd support for mshv

irqfd is a mechanism to inject a specific interrupt to a guest using a
decoupled eventfd mechnanism: Any legal signal on the irqfd (using
eventfd semantics from either userspace or kernel) will translate into
an injected interrupt in the guest at the next available interrupt window.

This is the implementation of irqfd feature in kvm:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=721eecbf4fe995ca94a9edec0c9843b1cc0eaaf3

The basic framework code is taken as it its from the kvm implementation.
All credit goes to kvm irqfd/ioeventfd developers.

Signed-off-by: Vineeth Pillai <[email protected]>
---
drivers/hv/Kconfig | 1 +
drivers/hv/Makefile | 3 +-
drivers/hv/hv_eventfd.c | 299 +++++++++++++++++++++++++++++++++++
drivers/hv/mshv_main.c | 26 +++
include/linux/mshv.h | 11 ++
include/linux/mshv_eventfd.h | 35 ++++
include/uapi/linux/mshv.h | 15 ++
7 files changed, 389 insertions(+), 1 deletion(-)
create mode 100644 drivers/hv/hv_eventfd.c
create mode 100644 include/linux/mshv_eventfd.h

diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index d618b1fab2bb..3bf911aac5c7 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -30,6 +30,7 @@ config HYPERV_BALLOON
config HYPERV_ROOT_API
tristate "Microsoft Hypervisor root partition interfaces: /dev/mshv"
depends on HYPERV
+ select EVENTFD
help
Provides access to interfaces for managing guest virtual machines
running under the Microsoft Hypervisor.
diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
index 455a2c01f52c..5cb738c10a2d 100644
--- a/drivers/hv/Makefile
+++ b/drivers/hv/Makefile
@@ -13,4 +13,5 @@ hv_vmbus-y := vmbus_drv.o \
hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o
hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_fcopy.o hv_utils_transport.o

-mshv-y += mshv_main.o hv_call.o hv_synic.o hv_portid_table.o
+mshv-y += mshv_main.o hv_call.o hv_synic.o hv_portid_table.o \
+ hv_eventfd.o
diff --git a/drivers/hv/hv_eventfd.c b/drivers/hv/hv_eventfd.c
new file mode 100644
index 000000000000..11fcafd1df08
--- /dev/null
+++ b/drivers/hv/hv_eventfd.c
@@ -0,0 +1,299 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * eventfd support for mshv
+ *
+ * Heavily inspired from KVM implementation of irqfd. The basic framework
+ * code is taken from the kvm implementation.
+ *
+ * All credits to kvm developers.
+ */
+
+#include <linux/syscalls.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/eventfd.h>
+#include <linux/mshv.h>
+#include <linux/mshv_eventfd.h>
+
+#include "mshv.h"
+
+static struct workqueue_struct *irqfd_cleanup_wq;
+
+static void
+irqfd_inject(struct mshv_kernel_irqfd *irqfd)
+{
+ struct mshv_lapic_irq *irq = &irqfd->lapic_irq;
+
+ hv_call_assert_virtual_interrupt(irqfd->partition->id,
+ irq->vector, irq->apic_id,
+ irq->control);
+}
+
+static void
+irqfd_shutdown(struct work_struct *work)
+{
+ struct mshv_kernel_irqfd *irqfd =
+ container_of(work, struct mshv_kernel_irqfd, shutdown);
+
+ /*
+ * Synchronize with the wait-queue and unhook ourselves to prevent
+ * further events.
+ */
+ remove_wait_queue(irqfd->wqh, &irqfd->wait);
+
+ /*
+ * It is now safe to release the object's resources
+ */
+ eventfd_ctx_put(irqfd->eventfd);
+ kfree(irqfd);
+}
+
+/* assumes partition->irqfds.lock is held */
+static bool
+irqfd_is_active(struct mshv_kernel_irqfd *irqfd)
+{
+ return list_empty(&irqfd->list) ? false : true;
+}
+
+/*
+ * Mark the irqfd as inactive and schedule it for removal
+ *
+ * assumes partition->irqfds.lock is held
+ */
+static void
+irqfd_deactivate(struct mshv_kernel_irqfd *irqfd)
+{
+ BUG_ON(!irqfd_is_active(irqfd));
+
+ list_del_init(&irqfd->list);
+
+ queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
+}
+
+/*
+ * Called with wqh->lock held and interrupts disabled
+ */
+static int
+irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode,
+ int sync, void *key)
+{
+ struct mshv_kernel_irqfd *irqfd =
+ container_of(wait, struct mshv_kernel_irqfd, wait);
+ unsigned long flags = (unsigned long)key;
+
+ if (flags & POLLIN)
+ /* An event has been signaled, inject an interrupt */
+ irqfd_inject(irqfd);
+
+ if (flags & POLLHUP) {
+ /* The eventfd is closing, detach from Partition */
+ struct mshv_partition *partition = irqfd->partition;
+ unsigned long flags;
+
+ spin_lock_irqsave(&partition->irqfds.lock, flags);
+
+ /*
+ * We must check if someone deactivated the irqfd before
+ * we could acquire the irqfds.lock since the item is
+ * deactivated from the mshv side before it is unhooked from
+ * the wait-queue. If it is already deactivated, we can
+ * simply return knowing the other side will cleanup for us.
+ * We cannot race against the irqfd going away since the
+ * other side is required to acquire wqh->lock, which we hold
+ */
+ if (irqfd_is_active(irqfd))
+ irqfd_deactivate(irqfd);
+
+ spin_unlock_irqrestore(&partition->irqfds.lock, flags);
+ }
+
+ return 0;
+}
+
+static void
+irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
+ poll_table *pt)
+{
+ struct mshv_kernel_irqfd *irqfd =
+ container_of(pt, struct mshv_kernel_irqfd, pt);
+
+ irqfd->wqh = wqh;
+ add_wait_queue(wqh, &irqfd->wait);
+}
+
+static int
+mshv_irqfd_assign(struct mshv_partition *partition,
+ struct mshv_irqfd *args)
+{
+ struct mshv_kernel_irqfd *irqfd, *tmp;
+ struct fd f;
+ struct eventfd_ctx *eventfd = NULL;
+ int ret;
+ unsigned int events;
+
+ irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
+ if (!irqfd)
+ return -ENOMEM;
+
+ irqfd->partition = partition;
+ irqfd->gsi = args->gsi;
+ irqfd->lapic_irq.vector = args->vector;
+ irqfd->lapic_irq.apic_id = args->apic_id;
+ irqfd->lapic_irq.control.interrupt_type = args->interrupt_type;
+ irqfd->lapic_irq.control.level_triggered = args->level_triggered;
+ irqfd->lapic_irq.control.logical_dest_mode = args->logical_dest_mode;
+ INIT_LIST_HEAD(&irqfd->list);
+ INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
+
+ f = fdget(args->fd);
+ if (!f.file) {
+ ret = -EBADF;
+ goto out;
+ }
+
+ eventfd = eventfd_ctx_fileget(f.file);
+ if (IS_ERR(eventfd)) {
+ ret = PTR_ERR(eventfd);
+ goto fail;
+ }
+
+ irqfd->eventfd = eventfd;
+
+ /*
+ * Install our own custom wake-up handling so we are notified via
+ * a callback whenever someone signals the underlying eventfd
+ */
+ init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
+ init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
+
+ spin_lock_irq(&partition->irqfds.lock);
+ ret = 0;
+ list_for_each_entry(tmp, &partition->irqfds.items, list) {
+ if (irqfd->eventfd != tmp->eventfd)
+ continue;
+ /* This fd is used for another irq already. */
+ ret = -EBUSY;
+ spin_unlock_irq(&partition->irqfds.lock);
+ goto fail;
+ }
+ list_add_tail(&irqfd->list, &partition->irqfds.items);
+ spin_unlock_irq(&partition->irqfds.lock);
+
+ /*
+ * Check if there was an event already pending on the eventfd
+ * before we registered, and trigger it as if we didn't miss it.
+ */
+ events = vfs_poll(f.file, &irqfd->pt);
+
+ if (events & POLLIN)
+ irqfd_inject(irqfd);
+
+ /*
+ * do not drop the file until the irqfd is fully initialized, otherwise
+ * we might race against the POLLHUP
+ */
+ fdput(f);
+
+ return 0;
+
+fail:
+ if (eventfd && !IS_ERR(eventfd))
+ eventfd_ctx_put(eventfd);
+
+ fdput(f);
+
+out:
+ kfree(irqfd);
+ return ret;
+}
+
+void
+mshv_irqfd_init(struct mshv_partition *partition)
+{
+ spin_lock_init(&partition->irqfds.lock);
+ INIT_LIST_HEAD(&partition->irqfds.items);
+}
+
+/*
+ * shutdown any irqfd's that match fd+gsi
+ */
+static int
+mshv_irqfd_deassign(struct mshv_partition *partition,
+ struct mshv_irqfd *args)
+{
+ struct mshv_kernel_irqfd *irqfd, *tmp;
+ struct eventfd_ctx *eventfd;
+
+ eventfd = eventfd_ctx_fdget(args->fd);
+ if (IS_ERR(eventfd))
+ return PTR_ERR(eventfd);
+
+ spin_lock_irq(&partition->irqfds.lock);
+
+ list_for_each_entry_safe(irqfd, tmp, &partition->irqfds.items, list) {
+ if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi)
+ irqfd_deactivate(irqfd);
+ }
+
+ spin_unlock_irq(&partition->irqfds.lock);
+ eventfd_ctx_put(eventfd);
+
+ /*
+ * Block until we know all outstanding shutdown jobs have completed
+ * so that we guarantee there will not be any more interrupts on this
+ * gsi once this deassign function returns.
+ */
+ flush_workqueue(irqfd_cleanup_wq);
+
+ return 0;
+}
+
+int
+mshv_irqfd(struct mshv_partition *partition, struct mshv_irqfd *args)
+{
+ if (args->flags & MSHV_IRQFD_FLAG_DEASSIGN)
+ return mshv_irqfd_deassign(partition, args);
+
+ return mshv_irqfd_assign(partition, args);
+}
+
+/*
+ * This function is called as the mshv VM fd is being released. Shutdown all
+ * irqfds that still remain open
+ */
+void
+mshv_irqfd_release(struct mshv_partition *partition)
+{
+ struct mshv_kernel_irqfd *irqfd, *tmp;
+
+ spin_lock_irq(&partition->irqfds.lock);
+
+ list_for_each_entry_safe(irqfd, tmp, &partition->irqfds.items, list)
+ irqfd_deactivate(irqfd);
+
+ spin_unlock_irq(&partition->irqfds.lock);
+
+ /*
+ * Block until we know all outstanding shutdown jobs have completed
+ * since we do not take a mshv_partition* reference.
+ */
+ flush_workqueue(irqfd_cleanup_wq);
+
+}
+
+int mshv_irqfd_wq_init(void)
+{
+ irqfd_cleanup_wq = alloc_workqueue("mshv-irqfd-cleanup", 0, 0);
+ if (!irqfd_cleanup_wq)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void mshv_irqfd_wq_cleanup(void)
+{
+ destroy_workqueue(irqfd_cleanup_wq);
+}
diff --git a/drivers/hv/mshv_main.c b/drivers/hv/mshv_main.c
index ccf0971d0d39..e124119e65eb 100644
--- a/drivers/hv/mshv_main.c
+++ b/drivers/hv/mshv_main.c
@@ -19,6 +19,7 @@
#include <linux/cpuhotplug.h>
#include <linux/random.h>
#include <linux/mshv.h>
+#include <linux/mshv_eventfd.h>
#include <asm/mshyperv.h>

#include "mshv.h"
@@ -827,6 +828,18 @@ mshv_partition_ioctl_assert_interrupt(struct mshv_partition *partition,
args.control);
}

+static long
+mshv_partition_ioctl_irqfd(struct mshv_partition *partition,
+ void __user *user_args)
+{
+ struct mshv_irqfd args;
+
+ if (copy_from_user(&args, user_args, sizeof(args)))
+ return -EFAULT;
+
+ return mshv_irqfd(partition, &args);
+}
+
static long
mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
{
@@ -865,6 +878,10 @@ mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
ret = mshv_partition_ioctl_set_property(partition,
(void __user *)arg);
break;
+ case MSHV_IRQFD:
+ ret = mshv_partition_ioctl_irqfd(partition,
+ (void __user *)arg);
+ break;
default:
ret = -ENOTTY;
}
@@ -955,6 +972,8 @@ mshv_partition_release(struct inode *inode, struct file *filp)
{
struct mshv_partition *partition = filp->private_data;

+ mshv_irqfd_release(partition);
+
mshv_partition_put(partition);

return 0;
@@ -1049,6 +1068,8 @@ mshv_ioctl_create_partition(void __user *user_arg)

fd_install(fd, file);

+ mshv_irqfd_init(partition);
+
return fd;

release_file:
@@ -1137,12 +1158,17 @@ __init mshv_init(void)
mshv_cpuhp_online = ret;
spin_lock_init(&mshv.partitions.lock);

+ if (mshv_irqfd_wq_init())
+ mshv_irqfd_wq_cleanup();
+
return 0;
}

static void
__exit mshv_exit(void)
{
+ mshv_irqfd_wq_cleanup();
+
cpuhp_remove_state(mshv_cpuhp_online);
free_percpu(mshv.synic_pages);

diff --git a/include/linux/mshv.h b/include/linux/mshv.h
index 679aa3fa8cdb..5707c457b72d 100644
--- a/include/linux/mshv.h
+++ b/include/linux/mshv.h
@@ -47,6 +47,17 @@ struct mshv_partition {
u32 count;
struct mshv_vp *array[MSHV_MAX_VPS];
} vps;
+
+ struct {
+ spinlock_t lock;
+ struct list_head items;
+ } irqfds;
+};
+
+struct mshv_lapic_irq {
+ u32 vector;
+ u64 apic_id;
+ union hv_interrupt_control control;
};

struct hv_synic_pages {
diff --git a/include/linux/mshv_eventfd.h b/include/linux/mshv_eventfd.h
new file mode 100644
index 000000000000..3e7b16d0717f
--- /dev/null
+++ b/include/linux/mshv_eventfd.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * irqfd: Allows an fd to be used to inject an interrupt to the guest
+ * All credit goes to kvm developers.
+ */
+
+#ifndef __LINUX_MSHV_EVENTFD_H
+#define __LINUX_MSHV_EVENTFD_H
+
+#include <linux/mshv.h>
+#include <linux/poll.h>
+
+struct mshv_kernel_irqfd {
+ struct mshv_partition *partition;
+ struct eventfd_ctx *eventfd;
+ u32 gsi;
+ struct mshv_lapic_irq lapic_irq;
+ struct list_head list;
+ poll_table pt;
+ wait_queue_head_t *wqh;
+ wait_queue_entry_t wait;
+ struct work_struct shutdown;
+};
+
+int mshv_irqfd(struct mshv_partition *partition,
+ struct mshv_irqfd *args);
+
+void mshv_irqfd_init(struct mshv_partition *partition);
+void mshv_irqfd_release(struct mshv_partition *partition);
+
+int mshv_irqfd_wq_init(void);
+void mshv_irqfd_wq_cleanup(void);
+
+#endif /* __LINUX_MSHV_EVENTFD_H */
diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
index 0c46ce77cbb3..792844276134 100644
--- a/include/uapi/linux/mshv.h
+++ b/include/uapi/linux/mshv.h
@@ -79,6 +79,20 @@ struct mshv_translate_gva {
__u64 *gpa;
};

+#define MSHV_IRQFD_FLAG_DEASSIGN (1 << 0)
+
+struct mshv_irqfd {
+ __u64 apic_id;
+ __s32 fd;
+ __u32 gsi;
+ __u32 vector;
+ __u32 interrupt_type;
+ __u32 flags;
+ __u8 level_triggered;
+ __u8 logical_dest_mode;
+ __u8 pad[2];
+};
+
#define MSHV_IOCTL 0xB8

/* mshv device */
@@ -95,6 +109,7 @@ struct mshv_translate_gva {
_IOW(MSHV_IOCTL, 0xC, struct mshv_partition_property)
#define MSHV_GET_PARTITION_PROPERTY \
_IOWR(MSHV_IOCTL, 0xD, struct mshv_partition_property)
+#define MSHV_IRQFD _IOW(MSHV_IOCTL, 0xE, struct mshv_irqfd)

/* vp device */
#define MSHV_GET_VP_REGISTERS _IOWR(MSHV_IOCTL, 0x05, struct mshv_vp_registers)
--
2.25.1

2021-06-02 17:24:40

by Vineeth Pillai

[permalink] [raw]
Subject: [PATCH 10/17] mshv: Doorbell register/unregister API

Doorbell is a mechanism by which a parent partition can register for
notification if a specified mmio address is touched by a child partition.
Parent partition can setup the notification by specifying mmio address,
size of the data written(1/2/4/8 bytes) and optionally the data as well.

APIs for registering and unregistering doorbell notification.

Signed-off-by: Vineeth Pillai <[email protected]>
---
drivers/hv/hv_synic.c | 81 +++++++++++++++++++++++++++++++++++++++++++
drivers/hv/mshv.h | 4 +++
2 files changed, 85 insertions(+)

diff --git a/drivers/hv/hv_synic.c b/drivers/hv/hv_synic.c
index e3262f6d3daa..af6653967209 100644
--- a/drivers/hv/hv_synic.c
+++ b/drivers/hv/hv_synic.c
@@ -9,6 +9,7 @@
*/

#include <linux/kernel.h>
+#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/io.h>
#include <linux/random.h>
@@ -389,3 +390,83 @@ int mshv_synic_cleanup(unsigned int cpu)

return 0;
}
+
+int
+hv_register_doorbell(u64 partition_id, doorbell_cb_t doorbell_cb, void *data,
+ u64 gpa, u64 val, u64 flags)
+{
+ struct hv_connection_info connection_info = { 0 };
+ union hv_connection_id connection_id = { 0 };
+ struct port_table_info *port_table_info;
+ struct hv_port_info port_info = { 0 };
+ union hv_port_id port_id = { 0 };
+ int ret;
+
+ port_table_info = kmalloc(sizeof(struct port_table_info),
+ GFP_KERNEL);
+ if (!port_table_info)
+ return -ENOMEM;
+
+ port_table_info->port_type = HV_PORT_TYPE_DOORBELL;
+ port_table_info->port_doorbell.doorbell_cb = doorbell_cb;
+ port_table_info->port_doorbell.data = data;
+ ret = hv_portid_alloc(port_table_info);
+ if (ret < 0) {
+ pr_err("Failed to create the doorbell port!\n");
+ kfree(port_table_info);
+ return ret;
+ }
+
+ port_id.u.id = ret;
+ port_info.port_type = HV_PORT_TYPE_DOORBELL;
+ port_info.doorbell_port_info.target_sint = HV_SYNIC_DOORBELL_SINT_INDEX;
+ port_info.doorbell_port_info.target_vp = HV_ANY_VP;
+ ret = hv_call_create_port(hv_current_partition_id, port_id, partition_id,
+ &port_info,
+ 0, 0, NUMA_NO_NODE);
+
+ if (ret < 0) {
+ pr_err("Failed to create the port!\n");
+ hv_portid_free(port_id.u.id);
+ return ret;
+ }
+
+ connection_id.u.id = port_id.u.id;
+ connection_info.port_type = HV_PORT_TYPE_DOORBELL;
+ connection_info.doorbell_connection_info.gpa = gpa;
+ connection_info.doorbell_connection_info.trigger_value = val;
+ connection_info.doorbell_connection_info.flags = flags;
+
+ ret = hv_call_connect_port(hv_current_partition_id, port_id, partition_id,
+ connection_id, &connection_info, 0, NUMA_NO_NODE);
+ if (ret < 0) {
+ hv_call_delete_port(hv_current_partition_id, port_id);
+ hv_portid_free(port_id.u.id);
+ return ret;
+ }
+
+ // lets use the port_id as the doorbell_id
+ return port_id.u.id;
+}
+
+int
+hv_unregister_doorbell(u64 partition_id, int doorbell_portid)
+{
+ int ret = 0;
+ union hv_port_id port_id = { 0 };
+ union hv_connection_id connection_id = { 0 };
+
+ connection_id.u.id = doorbell_portid;
+ ret = hv_call_disconnect_port(partition_id, connection_id);
+ if (ret < 0)
+ pr_err("Failed to disconnect the doorbell connection!\n");
+
+ port_id.u.id = doorbell_portid;
+ ret = hv_call_delete_port(hv_current_partition_id, port_id);
+ if (ret < 0)
+ pr_err("Failed to disconnect the doorbell connection!\n");
+
+ hv_portid_free(doorbell_portid);
+
+ return ret;
+}
diff --git a/drivers/hv/mshv.h b/drivers/hv/mshv.h
index 07b0e7865a4c..76cd00fd4b3f 100644
--- a/drivers/hv/mshv.h
+++ b/drivers/hv/mshv.h
@@ -74,6 +74,10 @@ int hv_portid_alloc(struct port_table_info *info);
int hv_portid_lookup(int port_id, struct port_table_info *info);
void hv_portid_free(int port_id);

+int hv_register_doorbell(u64 partition_id, doorbell_cb_t doorbell_cb,
+ void *data, u64 gpa, u64 val, u64 flags);
+int hv_unregister_doorbell(u64 partition_id, int doorbell_portid);
+
/*
* Hyper-V hypercalls
*/
--
2.25.1

2021-06-02 17:24:55

by Vineeth Pillai

[permalink] [raw]
Subject: [PATCH 04/17] hyperv: Wrapper for setting proximity_domain_info

Refactor the code to populate proximity_domain_info from numa node
as a wrapper function. This wrapper is needed in future patches in
this series. No intended change in functionality.

Signed-off-by: Vineeth Pillai <[email protected]>
---
arch/x86/hyperv/hv_proc.c | 15 ++++-----------
include/asm-generic/mshyperv.h | 14 ++++++++++++++
2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c
index ec9b0c69603e..30c88f1ec558 100644
--- a/arch/x86/hyperv/hv_proc.c
+++ b/arch/x86/hyperv/hv_proc.c
@@ -3,7 +3,6 @@
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/clockchips.h>
-#include <linux/acpi.h>
#include <linux/hyperv.h>
#include <linux/slab.h>
#include <linux/cpuhotplug.h>
@@ -146,7 +145,6 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
u64 status;
unsigned long flags;
int ret = HV_STATUS_SUCCESS;
- int pxm = node_to_pxm(node);

/*
* When adding a logical processor, the hypervisor may return
@@ -163,10 +161,8 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
input->lp_index = lp_index;
input->apic_id = apic_id;
input->flags = 0;
- input->proximity_domain_info.domain_id = pxm;
- input->proximity_domain_info.flags.reserved = 0;
- input->proximity_domain_info.flags.proximity_info_valid = 1;
- input->proximity_domain_info.flags.proximity_preferred = 1;
+ input->proximity_domain_info =
+ numa_node_to_proximity_domain_info(node);
status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR,
input, output);
local_irq_restore(flags);
@@ -191,7 +187,6 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
u64 status;
unsigned long irq_flags;
int ret = HV_STATUS_SUCCESS;
- int pxm = node_to_pxm(node);

/* Root VPs don't seem to need pages deposited */
if (partition_id != hv_current_partition_id) {
@@ -211,10 +206,8 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
input->flags = flags;
input->subnode_type = HvSubnodeAny;
if (node != NUMA_NO_NODE) {
- input->proximity_domain_info.domain_id = pxm;
- input->proximity_domain_info.flags.reserved = 0;
- input->proximity_domain_info.flags.proximity_info_valid = 1;
- input->proximity_domain_info.flags.proximity_preferred = 1;
+ input->proximity_domain_info =
+ numa_node_to_proximity_domain_info(node);
} else {
input->proximity_domain_info.as_uint64 = 0;
}
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index ec9afca749f0..d9b91b8f63c8 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -21,10 +21,24 @@
#include <linux/types.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
+#include <acpi/acpi_numa.h>
#include <linux/cpumask.h>
#include <asm/ptrace.h>
#include <asm/hyperv-tlfs.h>

+static inline union hv_proximity_domain_info
+numa_node_to_proximity_domain_info(int node)
+{
+ union hv_proximity_domain_info proximity_domain_info;
+
+ proximity_domain_info.domain_id = node_to_pxm(node);
+ proximity_domain_info.flags.reserved = 0;
+ proximity_domain_info.flags.proximity_info_valid = 1;
+ proximity_domain_info.flags.proximity_preferred = 1;
+
+ return proximity_domain_info;
+}
+
struct ms_hyperv_info {
u32 features;
u32 priv_high;
--
2.25.1

2021-06-02 17:25:12

by Vineeth Pillai

[permalink] [raw]
Subject: [PATCH 06/17] mshv: SynIC port and connection hypercalls

Hyper-V enables inter-partition communication through the port and
connection constructs. More details about ports and connections in
TLFS chapter 11.

Implement hypercalls related to ports and connections for enabling
inter-partiion communication.

Signed-off-by: Vineeth Pillai <[email protected]>
---
drivers/hv/hv_call.c | 161 +++++++++++++++++++++++++
drivers/hv/mshv.h | 12 ++
include/asm-generic/hyperv-tlfs.h | 55 +++++++++
include/linux/hyperv.h | 9 --
include/uapi/asm-generic/hyperv-tlfs.h | 76 ++++++++++++
5 files changed, 304 insertions(+), 9 deletions(-)

diff --git a/drivers/hv/hv_call.c b/drivers/hv/hv_call.c
index 025d4e2b892f..57db3a8ac94a 100644
--- a/drivers/hv/hv_call.c
+++ b/drivers/hv/hv_call.c
@@ -742,3 +742,164 @@ int hv_call_translate_virtual_address(
return hv_status_to_errno(status);
}

+
+int
+hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
+ u64 connection_partition_id,
+ struct hv_port_info *port_info,
+ u8 port_vtl, u8 min_connection_vtl, int node)
+{
+ struct hv_create_port *input;
+ unsigned long flags;
+ int ret = 0;
+ int status;
+
+ do {
+ local_irq_save(flags);
+ input = (struct hv_create_port *)(*this_cpu_ptr(
+ hyperv_pcpu_input_arg));
+ memset(input, 0, sizeof(*input));
+
+ input->port_partition_id = port_partition_id;
+ input->port_id = port_id;
+ input->connection_partition_id = connection_partition_id;
+ input->port_info = *port_info;
+ input->port_vtl = port_vtl;
+ input->min_connection_vtl = min_connection_vtl;
+ input->proximity_domain_info =
+ numa_node_to_proximity_domain_info(node);
+ status = hv_do_hypercall(HVCALL_CREATE_PORT, input,
+ NULL) & HV_HYPERCALL_RESULT_MASK;
+ local_irq_restore(flags);
+ if (status == HV_STATUS_SUCCESS)
+ break;
+
+ if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
+ pr_err("%s: %s\n",
+ __func__, hv_status_to_string(status));
+ ret = -hv_status_to_errno(status);
+ break;
+ }
+ ret = hv_call_deposit_pages(NUMA_NO_NODE,
+ port_partition_id, 1);
+
+ } while (!ret);
+
+ return ret;
+}
+
+int
+hv_call_delete_port(u64 port_partition_id, union hv_port_id port_id)
+{
+ union hv_delete_port input = { 0 };
+ unsigned long flags;
+ int status;
+
+ local_irq_save(flags);
+ input.port_partition_id = port_partition_id;
+ input.port_id = port_id;
+ status = hv_do_fast_hypercall16(HVCALL_DELETE_PORT,
+ input.as_uint64[0],
+ input.as_uint64[1]) &
+ HV_HYPERCALL_RESULT_MASK;
+ local_irq_restore(flags);
+
+ if (status != HV_STATUS_SUCCESS) {
+ pr_err("%s: %s\n", __func__, hv_status_to_string(status));
+ return -hv_status_to_errno(status);
+ }
+
+ return 0;
+}
+
+int
+hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id,
+ u64 connection_partition_id,
+ union hv_connection_id connection_id,
+ struct hv_connection_info *connection_info,
+ u8 connection_vtl, int node)
+{
+ struct hv_connect_port *input;
+ unsigned long flags;
+ int ret = 0, status;
+
+ do {
+ local_irq_save(flags);
+ input = (struct hv_connect_port *)(*this_cpu_ptr(
+ hyperv_pcpu_input_arg));
+ memset(input, 0, sizeof(*input));
+ input->port_partition_id = port_partition_id;
+ input->port_id = port_id;
+ input->connection_partition_id = connection_partition_id;
+ input->connection_id = connection_id;
+ input->connection_info = *connection_info;
+ input->connection_vtl = connection_vtl;
+ input->proximity_domain_info =
+ numa_node_to_proximity_domain_info(node);
+ status = hv_do_hypercall(HVCALL_CONNECT_PORT, input,
+ NULL) & HV_HYPERCALL_RESULT_MASK;
+
+ local_irq_restore(flags);
+ if (status == HV_STATUS_SUCCESS)
+ break;
+
+ if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
+ pr_err("%s: %s\n",
+ __func__, hv_status_to_string(status));
+ ret = -hv_status_to_errno(status);
+ break;
+ }
+ ret = hv_call_deposit_pages(NUMA_NO_NODE,
+ connection_partition_id, 1);
+ } while (!ret);
+
+ return ret;
+}
+
+int
+hv_call_disconnect_port(u64 connection_partition_id,
+ union hv_connection_id connection_id)
+{
+ union hv_disconnect_port input = { 0 };
+ unsigned long flags;
+ int status;
+
+ local_irq_save(flags);
+ input.connection_partition_id = connection_partition_id;
+ input.connection_id = connection_id;
+ input.is_doorbell = 1;
+ status = hv_do_fast_hypercall16(HVCALL_DISCONNECT_PORT,
+ input.as_uint64[0],
+ input.as_uint64[1]) &
+ HV_HYPERCALL_RESULT_MASK;
+ local_irq_restore(flags);
+
+ if (status != HV_STATUS_SUCCESS) {
+ pr_err("%s: %s\n", __func__, hv_status_to_string(status));
+ return -hv_status_to_errno(status);
+ }
+
+ return 0;
+}
+
+int
+hv_call_notify_port_ring_empty(u32 sint_index)
+{
+ union hv_notify_port_ring_empty input = { 0 };
+ unsigned long flags;
+ int status;
+
+ local_irq_save(flags);
+ input.sint_index = sint_index;
+ status = hv_do_fast_hypercall8(HVCALL_NOTIFY_PORT_RING_EMPTY,
+ input.as_uint64) &
+ HV_HYPERCALL_RESULT_MASK;
+ local_irq_restore(flags);
+
+ if (status != HV_STATUS_SUCCESS) {
+ pr_err("%s: %s\n", __func__, hv_status_to_string(status));
+ return -hv_status_to_errno(status);
+ }
+
+ return 0;
+}
diff --git a/drivers/hv/mshv.h b/drivers/hv/mshv.h
index 037291a0ad45..e16818e977b9 100644
--- a/drivers/hv/mshv.h
+++ b/drivers/hv/mshv.h
@@ -117,4 +117,16 @@ int hv_call_translate_virtual_address(
u64 *gpa,
union hv_translate_gva_result *result);

+int hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
+ u64 connection_partition_id, struct hv_port_info *port_info,
+ u8 port_vtl, u8 min_connection_vtl, int node);
+int hv_call_delete_port(u64 port_partition_id, union hv_port_id port_id);
+int hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id,
+ u64 connection_partition_id,
+ union hv_connection_id connection_id,
+ struct hv_connection_info *connection_info,
+ u8 connection_vtl, int node);
+int hv_call_disconnect_port(u64 connection_partition_id,
+ union hv_connection_id connection_id);
+int hv_call_notify_port_ring_empty(u32 sint_index);
#endif /* _MSHV_H */
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index f70391a3320f..42e0237b0da8 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -159,6 +159,8 @@ struct ms_hyperv_tsc_page {
#define HVCALL_GET_VP_REGISTERS 0x0050
#define HVCALL_SET_VP_REGISTERS 0x0051
#define HVCALL_TRANSLATE_VIRTUAL_ADDRESS 0x0052
+#define HVCALL_DELETE_PORT 0x0058
+#define HVCALL_DISCONNECT_PORT 0x005b
#define HVCALL_POST_MESSAGE 0x005c
#define HVCALL_SIGNAL_EVENT 0x005d
#define HVCALL_POST_DEBUG_DATA 0x0069
@@ -168,7 +170,10 @@ struct ms_hyperv_tsc_page {
#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
#define HVCALL_RETARGET_INTERRUPT 0x007e
+#define HVCALL_NOTIFY_PORT_RING_EMPTY 0x008b
#define HVCALL_ASSERT_VIRTUAL_INTERRUPT 0x0094
+#define HVCALL_CREATE_PORT 0x0095
+#define HVCALL_CONNECT_PORT 0x0096
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
#define HVCALL_MAP_VP_STATE_PAGE 0x00e1
@@ -949,4 +954,54 @@ struct hv_translate_virtual_address_out {
u64 gpa_page;
} __packed;

+struct hv_create_port {
+ u64 port_partition_id;
+ union hv_port_id port_id;
+ u8 port_vtl;
+ u8 min_connection_vtl;
+ u16 padding;
+ u64 connection_partition_id;
+ struct hv_port_info port_info;
+ union hv_proximity_domain_info proximity_domain_info;
+} __packed;
+
+union hv_delete_port {
+ u64 as_uint64[2];
+ struct {
+ u64 port_partition_id;
+ union hv_port_id port_id;
+ u32 reserved;
+ } __packed;
+};
+
+union hv_notify_port_ring_empty {
+ u64 as_uint64;
+ struct {
+ u32 sint_index;
+ u32 reserved;
+ } __packed;
+};
+
+struct hv_connect_port {
+ u64 connection_partition_id;
+ union hv_connection_id connection_id;
+ u8 connection_vtl;
+ u8 rsvdz0;
+ u16 rsvdz1;
+ u64 port_partition_id;
+ union hv_port_id port_id;
+ u32 reserved2;
+ struct hv_connection_info connection_info;
+ union hv_proximity_domain_info proximity_domain_info;
+} __packed;
+
+union hv_disconnect_port {
+ u64 as_uint64[2];
+ struct {
+ u64 connection_partition_id;
+ union hv_connection_id connection_id;
+ u32 is_doorbell: 1;
+ u32 reserved: 31;
+ } __packed;
+};
#endif
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2e859d2f9609..76ff26579622 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -750,15 +750,6 @@ struct vmbus_close_msg {
struct vmbus_channel_close_channel msg;
};

-/* Define connection identifier type. */
-union hv_connection_id {
- u32 asu32;
- struct {
- u32 id:24;
- u32 reserved:8;
- } u;
-};
-
enum vmbus_device_type {
HV_IDE = 0,
HV_SCSI,
diff --git a/include/uapi/asm-generic/hyperv-tlfs.h b/include/uapi/asm-generic/hyperv-tlfs.h
index 388c4eb29212..2031115c6cce 100644
--- a/include/uapi/asm-generic/hyperv-tlfs.h
+++ b/include/uapi/asm-generic/hyperv-tlfs.h
@@ -53,6 +53,25 @@ union hv_message_flags {
} __packed;
};

+enum hv_port_type {
+ HV_PORT_TYPE_MESSAGE = 1,
+ HV_PORT_TYPE_EVENT = 2,
+ HV_PORT_TYPE_MONITOR = 3,
+ HV_PORT_TYPE_DOORBELL = 4 // Root Partition only
+};
+
+
+/*
+ * Doorbell connection_info flags.
+ */
+#define HV_DOORBELL_FLAG_TRIGGER_SIZE_MASK 0x00000007
+#define HV_DOORBELL_FLAG_TRIGGER_SIZE_ANY 0x00000000
+#define HV_DOORBELL_FLAG_TRIGGER_SIZE_BYTE 0x00000001
+#define HV_DOORBELL_FLAG_TRIGGER_SIZE_WORD 0x00000002
+#define HV_DOORBELL_FLAG_TRIGGER_SIZE_DWORD 0x00000003
+#define HV_DOORBELL_FLAG_TRIGGER_SIZE_QWORD 0x00000004
+#define HV_DOORBELL_FLAG_TRIGGER_ANY_VALUE 0x80000000
+
/* Define port identifier type. */
union hv_port_id {
__u32 asu32;
@@ -62,6 +81,63 @@ union hv_port_id {
} __packed u;
};

+struct hv_port_info {
+ enum hv_port_type port_type;
+ __u32 padding;
+ union {
+ struct {
+ __u32 target_sint;
+ __u32 target_vp;
+ __u64 rsvdz;
+ } message_port_info;
+ struct {
+ __u32 target_sint;
+ __u32 target_vp;
+ __u16 base_flag_number;
+ __u16 flag_count;
+ __u32 rsvdz;
+ } event_port_info;
+ struct {
+ __u64 monitor_address;
+ __u64 rsvdz;
+ } monitor_port_info;
+ struct {
+ __u32 target_sint;
+ __u32 target_vp;
+ __u64 rsvdz;
+ } doorbell_port_info;
+ };
+};
+
+union hv_connection_id {
+ __u32 asu32;
+ struct {
+ __u32 id:24;
+ __u32 reserved:8;
+ } u;
+};
+
+struct hv_connection_info {
+ enum hv_port_type port_type;
+ __u32 padding;
+ union {
+ struct {
+ __u64 rsvdz;
+ } message_connection_info;
+ struct {
+ __u64 rsvdz;
+ } event_connection_info;
+ struct {
+ __u64 monitor_address;
+ } monitor_connection_info;
+ struct {
+ __u64 gpa;
+ __u64 trigger_value;
+ __u64 flags;
+ } doorbell_connection_info;
+ };
+};
+
/* Define synthetic interrupt controller message header. */
struct hv_message_header {
__u32 message_type;
--
2.25.1

2021-06-02 17:25:13

by Vineeth Pillai

[permalink] [raw]
Subject: [PATCH 05/17] mshv: SynIC event ring and event flags support

Doorbell is a mechanism by which a parent partition can register for
notification if a specified mmio address is touched by a child partition.
Parent partition can setup the notification by specifying mmio address,
size of the data written(1/2/4/8 bytes) and optionally the data as well.

Hyper-V makes use of an event ring buffer to signal events. This
buffer is implemented as a GPA overlay page. Doorbell notifications are
delivered via this event ring buffer.

Enable SynIC event ring buffer.

While at it, enable SynIC event flags. It is a lightweight inter-partition
communication mechanism to signal events between partitions or from
hypervisor to a partition.

Signed-off-by: Vineeth Pillai <[email protected]>
---
arch/x86/include/asm/hyperv-tlfs.h | 2 +
drivers/hv/hv_synic.c | 81 +++++++++++++++++++++++++++---
drivers/hv/mshv_main.c | 6 +--
include/asm-generic/hyperv-tlfs.h | 29 +++++++++++
include/linux/mshv.h | 8 ++-
5 files changed, 115 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 871f5d014ae0..e4b0eea1703e 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -189,6 +189,7 @@ enum hv_isolation_type {
#define HV_REGISTER_SIEFP 0x40000082
#define HV_REGISTER_SIMP 0x40000083
#define HV_REGISTER_EOM 0x40000084
+#define HV_REGISTER_SIRBP 0x40000085
#define HV_REGISTER_SINT0 0x40000090
#define HV_REGISTER_SINT1 0x40000091
#define HV_REGISTER_SINT2 0x40000092
@@ -252,6 +253,7 @@ enum hv_isolation_type {
#define HV_X64_MSR_SIEFP HV_REGISTER_SIEFP
#define HV_X64_MSR_VP_INDEX HV_REGISTER_VP_INDEX
#define HV_X64_MSR_EOM HV_REGISTER_EOM
+#define HV_X64_MSR_SIRBP HV_REGISTER_SIRBP
#define HV_X64_MSR_SINT0 HV_REGISTER_SINT0
#define HV_X64_MSR_SINT15 HV_REGISTER_SINT15
#define HV_X64_MSR_CRASH_P0 HV_REGISTER_CRASH_P0
diff --git a/drivers/hv/hv_synic.c b/drivers/hv/hv_synic.c
index 9800ae6693a9..a2f712acca82 100644
--- a/drivers/hv/hv_synic.c
+++ b/drivers/hv/hv_synic.c
@@ -19,8 +19,8 @@

void mshv_isr(void)
{
- struct hv_message_page **msg_page =
- this_cpu_ptr(mshv.synic_message_page);
+ struct hv_synic_pages *spages = this_cpu_ptr(mshv.synic_pages);
+ struct hv_message_page **msg_page = &spages->synic_message_page;
struct hv_message *msg;
u32 message_type;
struct mshv_partition *partition;
@@ -115,10 +115,16 @@ void mshv_isr(void)
int mshv_synic_init(unsigned int cpu)
{
union hv_synic_simp simp;
+ union hv_synic_siefp siefp;
+ union hv_synic_sirbp sirbp;
union hv_synic_sint sint;
union hv_synic_scontrol sctrl;
- struct hv_message_page **msg_page =
- this_cpu_ptr(mshv.synic_message_page);
+ struct hv_synic_pages *spages = this_cpu_ptr(mshv.synic_pages);
+ struct hv_message_page **msg_page = &spages->synic_message_page;
+ struct hv_synic_event_flags_page **event_flags_page =
+ &spages->synic_event_flags_page;
+ struct hv_synic_event_ring_page **event_ring_page =
+ &spages->synic_event_ring_page;

/* Setup the Synic's message page */
simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
@@ -127,11 +133,35 @@ int mshv_synic_init(unsigned int cpu)
HV_HYP_PAGE_SIZE,
MEMREMAP_WB);
if (!(*msg_page)) {
- pr_err("%s: memremap failed\n", __func__);
+ pr_err("%s: SIMP memremap failed\n", __func__);
return -EFAULT;
}
hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);

+ /* Setup the Synic's event flags page */
+ siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
+ siefp.siefp_enabled = true;
+ *event_flags_page = memremap(siefp.base_siefp_gpa << PAGE_SHIFT,
+ PAGE_SIZE, MEMREMAP_WB);
+
+ if (!(*event_flags_page)) {
+ pr_err("%s: SIEFP memremap failed\n", __func__);
+ goto cleanup;
+ }
+ hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
+
+ /* Setup the Synic's event ring page */
+ sirbp.as_uint64 = hv_get_register(HV_REGISTER_SIRBP);
+ sirbp.sirbp_enabled = true;
+ *event_ring_page = memremap(sirbp.base_sirbp_gpa << PAGE_SHIFT,
+ PAGE_SIZE, MEMREMAP_WB);
+
+ if (!(*event_ring_page)) {
+ pr_err("%s: SIRBP memremap failed\n", __func__);
+ goto cleanup;
+ }
+ hv_set_register(HV_REGISTER_SIRBP, sirbp.as_uint64);
+
/* Enable intercepts */
sint.as_uint64 = 0;
sint.vector = HYPERVISOR_CALLBACK_VECTOR;
@@ -150,15 +180,40 @@ int mshv_synic_init(unsigned int cpu)
hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);

return 0;
+
+cleanup:
+ if (*event_ring_page) {
+ sirbp.sirbp_enabled = false;
+ hv_set_register(HV_REGISTER_SIRBP, sirbp.as_uint64);
+ memunmap(*event_ring_page);
+ }
+ if (*event_flags_page) {
+ siefp.siefp_enabled = false;
+ hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
+ memunmap(*event_flags_page);
+ }
+ if (*msg_page) {
+ simp.simp_enabled = false;
+ hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
+ memunmap(*msg_page);
+ }
+
+ return -EFAULT;
}

int mshv_synic_cleanup(unsigned int cpu)
{
union hv_synic_sint sint;
union hv_synic_simp simp;
+ union hv_synic_siefp siefp;
+ union hv_synic_sirbp sirbp;
union hv_synic_scontrol sctrl;
- struct hv_message_page **msg_page =
- this_cpu_ptr(mshv.synic_message_page);
+ struct hv_synic_pages *spages = this_cpu_ptr(mshv.synic_pages);
+ struct hv_message_page **msg_page = &spages->synic_message_page;
+ struct hv_synic_event_flags_page **event_flags_page =
+ &spages->synic_event_flags_page;
+ struct hv_synic_event_ring_page **event_ring_page =
+ &spages->synic_event_ring_page;

/* Disable the interrupt */
sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX);
@@ -166,6 +221,18 @@ int mshv_synic_cleanup(unsigned int cpu)
hv_set_register(HV_REGISTER_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX,
sint.as_uint64);

+ /* Disable Synic's event ring page */
+ sirbp.as_uint64 = hv_get_register(HV_REGISTER_SIRBP);
+ sirbp.sirbp_enabled = false;
+ hv_set_register(HV_REGISTER_SIRBP, sirbp.as_uint64);
+ memunmap(*event_ring_page);
+
+ /* Disable Synic's event flags page */
+ siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
+ siefp.siefp_enabled = false;
+ hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
+ memunmap(*event_flags_page);
+
/* Disable Synic's message page */
simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
simp.simp_enabled = false;
diff --git a/drivers/hv/mshv_main.c b/drivers/hv/mshv_main.c
index fe6fb2668d36..2adae676dba5 100644
--- a/drivers/hv/mshv_main.c
+++ b/drivers/hv/mshv_main.c
@@ -1118,8 +1118,8 @@ __init mshv_init(void)
return ret;
}

- mshv.synic_message_page = alloc_percpu(struct hv_message_page *);
- if (!mshv.synic_message_page) {
+ mshv.synic_pages = alloc_percpu(struct hv_synic_pages);
+ if (!mshv.synic_pages) {
pr_err("%s: failed to allocate percpu synic page\n", __func__);
misc_deregister(&mshv_dev);
return -ENOMEM;
@@ -1144,7 +1144,7 @@ static void
__exit mshv_exit(void)
{
cpuhp_remove_state(mshv_cpuhp_online);
- free_percpu(mshv.synic_message_page);
+ free_percpu(mshv.synic_pages);

misc_deregister(&mshv_dev);
}
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index 8f08d0e9163d..f70391a3320f 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -279,8 +279,23 @@ struct hv_timer_message_payload {
__u64 delivery_time; /* When the message was delivered */
} __packed;

+/* Define the synthentic interrupt controller event ring format */
+#define HV_SYNIC_EVENT_RING_MESSAGE_COUNT 63
+
+struct hv_synic_event_ring {
+ u8 signal_masked;
+ u8 ring_full;
+ u16 reserved_z;
+ u32 data[HV_SYNIC_EVENT_RING_MESSAGE_COUNT];
+} __packed;
+
+struct hv_synic_event_ring_page {
+ volatile struct hv_synic_event_ring sint_event_ring[HV_SYNIC_SINT_COUNT];
+};
+
/* Define synthetic interrupt controller flag constants. */
#define HV_EVENT_FLAGS_COUNT (256 * 8)
+#define HV_EVENT_FLAGS_BYTE_COUNT (256)
#define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long))

/*
@@ -304,9 +319,14 @@ union hv_stimer_config {

/* Define the synthetic interrupt controller event flags format. */
union hv_synic_event_flags {
+ unsigned char flags8[HV_EVENT_FLAGS_BYTE_COUNT];
unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT];
};

+struct hv_synic_event_flags_page {
+ volatile union hv_synic_event_flags event_flags[HV_SYNIC_SINT_COUNT];
+};
+
/* Define SynIC control register. */
union hv_synic_scontrol {
u64 as_uint64;
@@ -349,6 +369,15 @@ union hv_synic_siefp {
} __packed;
};

+union hv_synic_sirbp {
+ u64 as_uint64;
+ struct {
+ u64 sirbp_enabled:1;
+ u64 preserved:11;
+ u64 base_sirbp_gpa:52;
+ } __packed;
+};
+
struct hv_vpset {
u64 format;
u64 valid_bank_mask;
diff --git a/include/linux/mshv.h b/include/linux/mshv.h
index 33f4d0cfee11..679aa3fa8cdb 100644
--- a/include/linux/mshv.h
+++ b/include/linux/mshv.h
@@ -49,8 +49,14 @@ struct mshv_partition {
} vps;
};

+struct hv_synic_pages {
+ struct hv_message_page *synic_message_page;
+ struct hv_synic_event_flags_page *synic_event_flags_page;
+ struct hv_synic_event_ring_page *synic_event_ring_page;
+};
+
struct mshv {
- struct hv_message_page __percpu **synic_message_page;
+ struct hv_synic_pages __percpu *synic_pages;
struct {
spinlock_t lock;
u64 count;
--
2.25.1

2021-06-02 17:25:25

by Vineeth Pillai

[permalink] [raw]
Subject: [PATCH 08/17] mshv: Port id management

Each port in the partition should be uniquely identified by an id.
Partition is responsible for managing the port id.

Signed-off-by: Vineeth Pillai <[email protected]>
---
drivers/hv/Makefile | 2 +-
drivers/hv/hv_portid_table.c | 83 ++++++++++++++++++++++++++++++++++++
drivers/hv/mshv.h | 35 +++++++++++++++
drivers/hv/mshv_main.c | 2 +
4 files changed, 121 insertions(+), 1 deletion(-)
create mode 100644 drivers/hv/hv_portid_table.c

diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
index a2b698661b5e..455a2c01f52c 100644
--- a/drivers/hv/Makefile
+++ b/drivers/hv/Makefile
@@ -13,4 +13,4 @@ hv_vmbus-y := vmbus_drv.o \
hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o
hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_fcopy.o hv_utils_transport.o

-mshv-y += mshv_main.o hv_call.o hv_synic.o
+mshv-y += mshv_main.o hv_call.o hv_synic.o hv_portid_table.o
diff --git a/drivers/hv/hv_portid_table.c b/drivers/hv/hv_portid_table.c
new file mode 100644
index 000000000000..3e8feefc3fc9
--- /dev/null
+++ b/drivers/hv/hv_portid_table.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <asm/mshyperv.h>
+
+#include "mshv.h"
+
+/*
+ * Ports and connections are hypervisor struct used for inter-partition
+ * communication. Port represents the source and connection represents
+ * the destination. Partitions are responsible for managing the port and
+ * connection ids.
+ *
+ */
+
+#define PORTID_MIN 1
+#define PORTID_MAX INT_MAX
+
+static DEFINE_IDR(port_table_idr);
+
+void
+hv_port_table_fini(void)
+{
+ struct port_table_info *port_info;
+ unsigned long i, tmp;
+
+ idr_lock(&port_table_idr);
+ if (!idr_is_empty(&port_table_idr)) {
+ idr_for_each_entry_ul(&port_table_idr, port_info, tmp, i) {
+ port_info = idr_remove(&port_table_idr, i);
+ kfree_rcu(port_info, rcu);
+ }
+ }
+ idr_unlock(&port_table_idr);
+}
+
+int
+hv_portid_alloc(struct port_table_info *info)
+{
+ int ret = 0;
+
+ idr_lock(&port_table_idr);
+ ret = idr_alloc(&port_table_idr, info, PORTID_MIN,
+ PORTID_MAX, GFP_KERNEL);
+ idr_unlock(&port_table_idr);
+
+ return ret;
+}
+
+void
+hv_portid_free(int port_id)
+{
+ struct port_table_info *info;
+
+ idr_lock(&port_table_idr);
+ info = idr_remove(&port_table_idr, port_id);
+ WARN_ON(!info);
+ idr_unlock(&port_table_idr);
+
+ synchronize_rcu();
+ kfree(info);
+}
+
+int
+hv_portid_lookup(int port_id, struct port_table_info *info)
+{
+ struct port_table_info *_info;
+ int ret = -ENOENT;
+
+ rcu_read_lock();
+ _info = idr_find(&port_table_idr, port_id);
+ rcu_read_unlock();
+
+ if (_info) {
+ *info = *_info;
+ ret = 0;
+ }
+
+ return ret;
+}
diff --git a/drivers/hv/mshv.h b/drivers/hv/mshv.h
index e16818e977b9..ff5dc02cd8b6 100644
--- a/drivers/hv/mshv.h
+++ b/drivers/hv/mshv.h
@@ -39,6 +39,41 @@ void mshv_isr(void);
int mshv_synic_init(unsigned int cpu);
int mshv_synic_cleanup(unsigned int cpu);

+/*
+ * Callback for doorbell events.
+ * NOTE: This is called in interrupt context. Callback
+ * should defer slow and sleeping logic to later.
+ */
+typedef void (*doorbell_cb_t) (void *);
+
+/*
+ * port table information
+ */
+struct port_table_info {
+ struct rcu_head rcu;
+ enum hv_port_type port_type;
+ union {
+ struct {
+ u64 reserved[2];
+ } port_message;
+ struct {
+ u64 reserved[2];
+ } port_event;
+ struct {
+ u64 reserved[2];
+ } port_monitor;
+ struct {
+ doorbell_cb_t doorbell_cb;
+ void *data;
+ } port_doorbell;
+ };
+};
+
+void hv_port_table_fini(void);
+int hv_portid_alloc(struct port_table_info *info);
+int hv_portid_lookup(int port_id, struct port_table_info *info);
+void hv_portid_free(int port_id);
+
/*
* Hyper-V hypercalls
*/
diff --git a/drivers/hv/mshv_main.c b/drivers/hv/mshv_main.c
index 2adae676dba5..ccf0971d0d39 100644
--- a/drivers/hv/mshv_main.c
+++ b/drivers/hv/mshv_main.c
@@ -1146,6 +1146,8 @@ __exit mshv_exit(void)
cpuhp_remove_state(mshv_cpuhp_online);
free_percpu(mshv.synic_pages);

+ hv_port_table_fini();
+
misc_deregister(&mshv_dev);
}

--
2.25.1

2021-06-02 17:25:55

by Vineeth Pillai

[permalink] [raw]
Subject: [PATCH 14/17] mshv: Notifier framework for EOI for level triggered interrupts

A simple in-kernel notifier framework for EOI. Callbacks can be
registered for EOI for level-triggered interrupts. This is to be
used for irqfd support for level-triggered interrupts.

Signed-off-by: Vineeth Pillai <[email protected]>
---
drivers/hv/hv_eventfd.c | 38 ++++++++++++++++++++++++++++++++++++
drivers/hv/hv_synic.c | 22 +++++++++++++++++++++
drivers/hv/mshv_main.c | 4 ++++
include/linux/mshv.h | 9 +++++++++
include/linux/mshv_eventfd.h | 6 ++++++
5 files changed, 79 insertions(+)

diff --git a/drivers/hv/hv_eventfd.c b/drivers/hv/hv_eventfd.c
index 5ed77901fb0b..0bfb088dcb80 100644
--- a/drivers/hv/hv_eventfd.c
+++ b/drivers/hv/hv_eventfd.c
@@ -22,6 +22,44 @@

static struct workqueue_struct *irqfd_cleanup_wq;

+void
+mshv_register_irq_ack_notifier(struct mshv_partition *partition,
+ struct mshv_irq_ack_notifier *mian)
+{
+ spin_lock(&partition->irq_lock);
+ hlist_add_head_rcu(&mian->link, &partition->irq_ack_notifier_list);
+ spin_unlock(&partition->irq_lock);
+}
+
+void
+mshv_unregister_irq_ack_notifier(struct mshv_partition *partition,
+ struct mshv_irq_ack_notifier *mian)
+{
+ spin_lock(&partition->irq_lock);
+ hlist_del_init_rcu(&mian->link);
+ spin_unlock(&partition->irq_lock);
+ synchronize_rcu();
+}
+
+bool
+mshv_notify_acked_gsi(struct mshv_partition *partition, int gsi)
+{
+ struct mshv_irq_ack_notifier *mian;
+ bool acked = false;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(mian, &partition->irq_ack_notifier_list,
+ link) {
+ if (mian->gsi == gsi) {
+ mian->irq_acked(mian);
+ acked = true;
+ }
+ }
+ rcu_read_unlock();
+
+ return acked;
+}
+
static void
irqfd_inject(struct mshv_kernel_irqfd *irqfd)
{
diff --git a/drivers/hv/hv_synic.c b/drivers/hv/hv_synic.c
index af6653967209..1296928675e3 100644
--- a/drivers/hv/hv_synic.c
+++ b/drivers/hv/hv_synic.c
@@ -14,6 +14,7 @@
#include <linux/io.h>
#include <linux/random.h>
#include <linux/mshv.h>
+#include <linux/mshv_eventfd.h>
#include <asm/mshyperv.h>

#include "mshv.h"
@@ -158,6 +159,27 @@ mshv_intercept_isr(struct hv_message *msg)
goto unlock_out;
}

+ if (msg->header.message_type == HVMSG_X64_APIC_EOI) {
+ /*
+ * Check if this gsi is registered in the
+ * ack_notifier list and invoke the callback
+ * if registered.
+ */
+ struct hv_x64_apic_eoi_message *eoi_msg =
+ (struct hv_x64_apic_eoi_message *)msg->u.payload;
+
+ /*
+ * If there is a notifier, the ack callback is supposed
+ * to handle the VMEXIT. So we need not pass this message
+ * to vcpu thread.
+ */
+ if (mshv_notify_acked_gsi(partition,
+ eoi_msg->interrupt_vector)) {
+ handled = true;
+ goto unlock_out;
+ }
+ }
+
/*
* Since we directly index the vp, and it has to exist for us to be here
* (because the vp is only deleted when the partition is), no additional
diff --git a/drivers/hv/mshv_main.c b/drivers/hv/mshv_main.c
index e1caecd27f09..6f93813ad465 100644
--- a/drivers/hv/mshv_main.c
+++ b/drivers/hv/mshv_main.c
@@ -1047,6 +1047,10 @@ mshv_ioctl_create_partition(void __user *user_arg)

mutex_init(&partition->mutex);

+ spin_lock_init(&partition->irq_lock);
+
+ INIT_HLIST_HEAD(&partition->irq_ack_notifier_list);
+
fd = get_unused_fd_flags(O_CLOEXEC);
if (fd < 0) {
ret = fd;
diff --git a/include/linux/mshv.h b/include/linux/mshv.h
index 217c91725828..2cee4832fc7f 100644
--- a/include/linux/mshv.h
+++ b/include/linux/mshv.h
@@ -35,6 +35,12 @@ struct mshv_mem_region {
struct page **pages;
};

+struct mshv_irq_ack_notifier {
+ struct hlist_node link;
+ unsigned int gsi;
+ void (*irq_acked)(struct mshv_irq_ack_notifier *mian);
+};
+
struct mshv_partition {
u64 id;
refcount_t ref_count;
@@ -48,6 +54,9 @@ struct mshv_partition {
struct mshv_vp *array[MSHV_MAX_VPS];
} vps;

+ spinlock_t irq_lock;
+ struct hlist_head irq_ack_notifier_list;
+
struct {
spinlock_t lock;
struct list_head items;
diff --git a/include/linux/mshv_eventfd.h b/include/linux/mshv_eventfd.h
index fd0012f72616..b4d587208294 100644
--- a/include/linux/mshv_eventfd.h
+++ b/include/linux/mshv_eventfd.h
@@ -15,6 +15,12 @@
void mshv_eventfd_init(struct mshv_partition *partition);
void mshv_eventfd_release(struct mshv_partition *partition);

+void mshv_register_irq_ack_notifier(struct mshv_partition *partition,
+ struct mshv_irq_ack_notifier *mian);
+void mshv_unregister_irq_ack_notifier(struct mshv_partition *partition,
+ struct mshv_irq_ack_notifier *mian);
+bool mshv_notify_acked_gsi(struct mshv_partition *partition, int gsi);
+
struct mshv_kernel_irqfd {
struct mshv_partition *partition;
struct eventfd_ctx *eventfd;
--
2.25.1

2021-06-02 19:23:08

by Vineeth Pillai

[permalink] [raw]
Subject: [PATCH 15/17] mshv: Level-triggered interrupt support for irqfd

To emulate level triggered interrupts, add a resample option to
MSHV_IRQFD. When specified, a new resamplefd is provided that notifies
the user when VM EOI a level-triggered interrupt. Also in this mode,
posting of an interrupt through an irqfd only asserts the interrupt.

Inspired from the KVM counterpart:
https://patchwork.kernel.org/project/kvm/patch/[email protected]/

Signed-off-by: Vineeth Pillai <[email protected]>
---
drivers/hv/hv_eventfd.c | 118 ++++++++++++++++++++++++++++++++++-
drivers/hv/mshv_main.c | 8 +++
include/linux/mshv.h | 4 ++
include/linux/mshv_eventfd.h | 22 +++++++
include/uapi/linux/mshv.h | 4 +-
5 files changed, 154 insertions(+), 2 deletions(-)

diff --git a/drivers/hv/hv_eventfd.c b/drivers/hv/hv_eventfd.c
index 0bfb088dcb80..6404624b3bc6 100644
--- a/drivers/hv/hv_eventfd.c
+++ b/drivers/hv/hv_eventfd.c
@@ -60,16 +60,66 @@ mshv_notify_acked_gsi(struct mshv_partition *partition, int gsi)
return acked;
}

+static void
+irqfd_resampler_ack(struct mshv_irq_ack_notifier *mian)
+{
+ struct mshv_kernel_irqfd_resampler *resampler;
+ struct mshv_partition *partition;
+ struct mshv_kernel_irqfd *irqfd;
+ int idx;
+
+ resampler = container_of(mian,
+ struct mshv_kernel_irqfd_resampler, notifier);
+ partition = resampler->partition;
+
+ idx = srcu_read_lock(&partition->irq_srcu);
+
+ list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link) {
+ if (irqfd->lapic_irq.control.interrupt_type ==
+ HV_X64_INTERRUPT_TYPE_EXTINT)
+ hv_call_clear_virtual_interrupt(partition->id);
+
+ eventfd_signal(irqfd->resamplefd, 1);
+ }
+
+ srcu_read_unlock(&partition->irq_srcu, idx);
+}
+
static void
irqfd_inject(struct mshv_kernel_irqfd *irqfd)
{
struct mshv_lapic_irq *irq = &irqfd->lapic_irq;

+ WARN_ON(irqfd->resampler &&
+ !irq->control.level_triggered);
hv_call_assert_virtual_interrupt(irqfd->partition->id,
irq->vector, irq->apic_id,
irq->control);
}

+static void
+irqfd_resampler_shutdown(struct mshv_kernel_irqfd *irqfd)
+{
+ struct mshv_kernel_irqfd_resampler *resampler = irqfd->resampler;
+ struct mshv_partition *partition = resampler->partition;
+
+ mutex_lock(&partition->irqfds.resampler_lock);
+
+ list_del_rcu(&irqfd->resampler_link);
+ synchronize_srcu(&partition->irq_srcu);
+
+ if (list_empty(&resampler->list)) {
+ list_del(&resampler->link);
+ mshv_unregister_irq_ack_notifier(partition, &resampler->notifier);
+ kfree(resampler);
+ }
+
+ mutex_unlock(&partition->irqfds.resampler_lock);
+}
+
+/*
+ * Race-free decouple logic (ordering is critical)
+ */
static void
irqfd_shutdown(struct work_struct *work)
{
@@ -82,6 +132,11 @@ irqfd_shutdown(struct work_struct *work)
*/
remove_wait_queue(irqfd->wqh, &irqfd->wait);

+ if (irqfd->resampler) {
+ irqfd_resampler_shutdown(irqfd);
+ eventfd_ctx_put(irqfd->resamplefd);
+ }
+
/*
* It is now safe to release the object's resources
*/
@@ -168,7 +223,7 @@ mshv_irqfd_assign(struct mshv_partition *partition,
{
struct mshv_kernel_irqfd *irqfd, *tmp;
struct fd f;
- struct eventfd_ctx *eventfd = NULL;
+ struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
int ret;
unsigned int events;

@@ -176,6 +231,10 @@ mshv_irqfd_assign(struct mshv_partition *partition,
if (!irqfd)
return -ENOMEM;

+ if (args->flags & MSHV_IRQFD_FLAG_RESAMPLE &&
+ !args->level_triggered)
+ return -EINVAL;
+
irqfd->partition = partition;
irqfd->gsi = args->gsi;
irqfd->lapic_irq.vector = args->vector;
@@ -200,6 +259,54 @@ mshv_irqfd_assign(struct mshv_partition *partition,

irqfd->eventfd = eventfd;

+ if (args->flags & MSHV_IRQFD_FLAG_RESAMPLE) {
+ struct mshv_kernel_irqfd_resampler *resampler;
+
+ resamplefd = eventfd_ctx_fdget(args->resamplefd);
+ if (IS_ERR(resamplefd)) {
+ ret = PTR_ERR(resamplefd);
+ goto fail;
+ }
+
+ irqfd->resamplefd = resamplefd;
+ INIT_LIST_HEAD(&irqfd->resampler_link);
+
+ mutex_lock(&partition->irqfds.resampler_lock);
+
+ list_for_each_entry(resampler,
+ &partition->irqfds.resampler_list, link) {
+ if (resampler->notifier.gsi == irqfd->gsi) {
+ irqfd->resampler = resampler;
+ break;
+ }
+ }
+
+ if (!irqfd->resampler) {
+ resampler = kzalloc(sizeof(*resampler),
+ GFP_KERNEL_ACCOUNT);
+ if (!resampler) {
+ ret = -ENOMEM;
+ mutex_unlock(&partition->irqfds.resampler_lock);
+ goto fail;
+ }
+
+ resampler->partition = partition;
+ INIT_LIST_HEAD(&resampler->list);
+ resampler->notifier.gsi = irqfd->gsi;
+ resampler->notifier.irq_acked = irqfd_resampler_ack;
+ INIT_LIST_HEAD(&resampler->link);
+
+ list_add(&resampler->link, &partition->irqfds.resampler_list);
+ mshv_register_irq_ack_notifier(partition,
+ &resampler->notifier);
+ irqfd->resampler = resampler;
+ }
+
+ list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
+
+ mutex_unlock(&partition->irqfds.resampler_lock);
+ }
+
/*
* Install our own custom wake-up handling so we are notified via
* a callback whenever someone signals the underlying eventfd
@@ -238,6 +345,12 @@ mshv_irqfd_assign(struct mshv_partition *partition,
return 0;

fail:
+ if (irqfd->resampler)
+ irqfd_resampler_shutdown(irqfd);
+
+ if (resamplefd && !IS_ERR(resamplefd))
+ eventfd_ctx_put(resamplefd);
+
if (eventfd && !IS_ERR(eventfd))
eventfd_ctx_put(eventfd);

@@ -541,6 +654,9 @@ mshv_eventfd_init(struct mshv_partition *partition)
spin_lock_init(&partition->irqfds.lock);
INIT_LIST_HEAD(&partition->irqfds.items);

+ INIT_LIST_HEAD(&partition->irqfds.resampler_list);
+ mutex_init(&partition->irqfds.resampler_lock);
+
spin_lock_init(&partition->ioeventfds.lock);
INIT_LIST_HEAD(&partition->ioeventfds.items);
}
diff --git a/drivers/hv/mshv_main.c b/drivers/hv/mshv_main.c
index 6f93813ad465..0f083447c553 100644
--- a/drivers/hv/mshv_main.c
+++ b/drivers/hv/mshv_main.c
@@ -990,6 +990,8 @@ mshv_partition_release(struct inode *inode, struct file *filp)

mshv_eventfd_release(partition);

+ cleanup_srcu_struct(&partition->irq_srcu);
+
mshv_partition_put(partition);

return 0;
@@ -1088,10 +1090,16 @@ mshv_ioctl_create_partition(void __user *user_arg)

fd_install(fd, file);

+ ret = init_srcu_struct(&partition->irq_srcu);
+ if (ret)
+ goto cleanup_irq_srcu;
+
mshv_eventfd_init(partition);

return fd;

+cleanup_irq_srcu:
+ cleanup_srcu_struct(&partition->irq_srcu);
release_file:
file->f_op->release(file->f_inode, file);
finalize_partition:
diff --git a/include/linux/mshv.h b/include/linux/mshv.h
index 2cee4832fc7f..5968b49b9c27 100644
--- a/include/linux/mshv.h
+++ b/include/linux/mshv.h
@@ -10,6 +10,7 @@
#include <linux/mutex.h>
#include <linux/semaphore.h>
#include <linux/sched.h>
+#include <linux/srcu.h>
#include <uapi/linux/mshv.h>

#define MSHV_MAX_PARTITIONS 128
@@ -55,11 +56,14 @@ struct mshv_partition {
} vps;

spinlock_t irq_lock;
+ struct srcu_struct irq_srcu;
struct hlist_head irq_ack_notifier_list;

struct {
spinlock_t lock;
struct list_head items;
+ struct mutex resampler_lock;
+ struct list_head resampler_list;
} irqfds;
struct {
spinlock_t lock;
diff --git a/include/linux/mshv_eventfd.h b/include/linux/mshv_eventfd.h
index b4d587208294..fa5d46d2eb85 100644
--- a/include/linux/mshv_eventfd.h
+++ b/include/linux/mshv_eventfd.h
@@ -21,6 +21,23 @@ void mshv_unregister_irq_ack_notifier(struct mshv_partition *partition,
struct mshv_irq_ack_notifier *mian);
bool mshv_notify_acked_gsi(struct mshv_partition *partition, int gsi);

+struct mshv_kernel_irqfd_resampler {
+ struct mshv_partition *partition;
+ /*
+ * List of irqfds sharing this gsi.
+ * Protected by irqfds.resampler_lock
+ * and irq_srcu.
+ */
+ struct list_head list;
+ struct mshv_irq_ack_notifier notifier;
+ /*
+ * Entry in the list of partition->irqfd.resampler_list.
+ * Protected by irqfds.resampler_lock
+ *
+ */
+ struct list_head link;
+};
+
struct mshv_kernel_irqfd {
struct mshv_partition *partition;
struct eventfd_ctx *eventfd;
@@ -31,6 +48,11 @@ struct mshv_kernel_irqfd {
wait_queue_head_t *wqh;
wait_queue_entry_t wait;
struct work_struct shutdown;
+
+ /* Resampler related */
+ struct mshv_kernel_irqfd_resampler *resampler;
+ struct eventfd_ctx *resamplefd;
+ struct list_head resampler_link;
};

int mshv_irqfd(struct mshv_partition *partition,
diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
index e32dee679360..008e68bde56d 100644
--- a/include/uapi/linux/mshv.h
+++ b/include/uapi/linux/mshv.h
@@ -80,17 +80,19 @@ struct mshv_translate_gva {
};

#define MSHV_IRQFD_FLAG_DEASSIGN (1 << 0)
+#define MSHV_IRQFD_FLAG_RESAMPLE (1 << 1)

struct mshv_irqfd {
__u64 apic_id;
__s32 fd;
+ __s32 resamplefd;
__u32 gsi;
__u32 vector;
__u32 interrupt_type;
__u32 flags;
__u8 level_triggered;
__u8 logical_dest_mode;
- __u8 pad[2];
+ __u8 pad[6];
};

enum {
--
2.25.1

2021-06-10 12:23:03

by Vitaly Kuznetsov

[permalink] [raw]
Subject: Re: [PATCH 06/17] mshv: SynIC port and connection hypercalls

Vineeth Pillai <[email protected]> writes:

> Hyper-V enables inter-partition communication through the port and
> connection constructs. More details about ports and connections in
> TLFS chapter 11.
>
> Implement hypercalls related to ports and connections for enabling
> inter-partiion communication.
>
> Signed-off-by: Vineeth Pillai <[email protected]>
> ---
> drivers/hv/hv_call.c | 161 +++++++++++++++++++++++++
> drivers/hv/mshv.h | 12 ++
> include/asm-generic/hyperv-tlfs.h | 55 +++++++++
> include/linux/hyperv.h | 9 --
> include/uapi/asm-generic/hyperv-tlfs.h | 76 ++++++++++++
> 5 files changed, 304 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/hv/hv_call.c b/drivers/hv/hv_call.c
> index 025d4e2b892f..57db3a8ac94a 100644
> --- a/drivers/hv/hv_call.c
> +++ b/drivers/hv/hv_call.c
> @@ -742,3 +742,164 @@ int hv_call_translate_virtual_address(
> return hv_status_to_errno(status);
> }
>
> +
> +int
> +hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
> + u64 connection_partition_id,
> + struct hv_port_info *port_info,
> + u8 port_vtl, u8 min_connection_vtl, int node)
> +{
> + struct hv_create_port *input;
> + unsigned long flags;
> + int ret = 0;
> + int status;
> +
> + do {
> + local_irq_save(flags);
> + input = (struct hv_create_port *)(*this_cpu_ptr(
> + hyperv_pcpu_input_arg));
> + memset(input, 0, sizeof(*input));
> +
> + input->port_partition_id = port_partition_id;
> + input->port_id = port_id;
> + input->connection_partition_id = connection_partition_id;
> + input->port_info = *port_info;
> + input->port_vtl = port_vtl;
> + input->min_connection_vtl = min_connection_vtl;
> + input->proximity_domain_info =
> + numa_node_to_proximity_domain_info(node);
> + status = hv_do_hypercall(HVCALL_CREATE_PORT, input,
> + NULL) & HV_HYPERCALL_RESULT_MASK;
> + local_irq_restore(flags);
> + if (status == HV_STATUS_SUCCESS)
> + break;
> +
> + if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
> + pr_err("%s: %s\n",
> + __func__, hv_status_to_string(status));
> + ret = -hv_status_to_errno(status);

In Nuno's "x86/hyperv: convert hyperv statuses to linux error codes"
patch, hv_status_to_errno() already returns negatives:

+int hv_status_to_errno(u64 hv_status)
+{
+ switch (hv_result(hv_status)) {
+ case HV_STATUS_SUCCESS:
+ return 0;
+ case HV_STATUS_INVALID_PARAMETER:
+ case HV_STATUS_UNKNOWN_PROPERTY:
+ case HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE:
+ case HV_STATUS_INVALID_VP_INDEX:
+ case HV_STATUS_INVALID_REGISTER_VALUE:
+ case HV_STATUS_INVALID_LP_INDEX:
+ return -EINVAL;
+ case HV_STATUS_ACCESS_DENIED:
+ case HV_STATUS_OPERATION_DENIED:
+ return -EACCES;
+ case HV_STATUS_NOT_ACKNOWLEDGED:
+ case HV_STATUS_INVALID_VP_STATE:
+ case HV_STATUS_INVALID_PARTITION_STATE:
+ return -EBADFD;
+ }
+ return -ENOTRECOVERABLE;
+}
+EXPORT_SYMBOL_GPL(hv_status_to_errno);
+

> + break;
> + }
> + ret = hv_call_deposit_pages(NUMA_NO_NODE,
> + port_partition_id, 1);
> +
> + } while (!ret);
> +
> + return ret;
> +}
> +
> +int
> +hv_call_delete_port(u64 port_partition_id, union hv_port_id port_id)
> +{
> + union hv_delete_port input = { 0 };
> + unsigned long flags;
> + int status;
> +
> + local_irq_save(flags);
> + input.port_partition_id = port_partition_id;
> + input.port_id = port_id;
> + status = hv_do_fast_hypercall16(HVCALL_DELETE_PORT,
> + input.as_uint64[0],
> + input.as_uint64[1]) &
> + HV_HYPERCALL_RESULT_MASK;
> + local_irq_restore(flags);
> +
> + if (status != HV_STATUS_SUCCESS) {
> + pr_err("%s: %s\n", __func__, hv_status_to_string(status));
> + return -hv_status_to_errno(status);
> + }
> +
> + return 0;
> +}
> +
> +int
> +hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id,
> + u64 connection_partition_id,
> + union hv_connection_id connection_id,
> + struct hv_connection_info *connection_info,
> + u8 connection_vtl, int node)
> +{
> + struct hv_connect_port *input;
> + unsigned long flags;
> + int ret = 0, status;
> +
> + do {
> + local_irq_save(flags);
> + input = (struct hv_connect_port *)(*this_cpu_ptr(
> + hyperv_pcpu_input_arg));
> + memset(input, 0, sizeof(*input));
> + input->port_partition_id = port_partition_id;
> + input->port_id = port_id;
> + input->connection_partition_id = connection_partition_id;
> + input->connection_id = connection_id;
> + input->connection_info = *connection_info;
> + input->connection_vtl = connection_vtl;
> + input->proximity_domain_info =
> + numa_node_to_proximity_domain_info(node);
> + status = hv_do_hypercall(HVCALL_CONNECT_PORT, input,
> + NULL) & HV_HYPERCALL_RESULT_MASK;
> +
> + local_irq_restore(flags);
> + if (status == HV_STATUS_SUCCESS)
> + break;
> +
> + if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
> + pr_err("%s: %s\n",
> + __func__, hv_status_to_string(status));
> + ret = -hv_status_to_errno(status);
> + break;
> + }
> + ret = hv_call_deposit_pages(NUMA_NO_NODE,
> + connection_partition_id, 1);
> + } while (!ret);
> +
> + return ret;
> +}
> +
> +int
> +hv_call_disconnect_port(u64 connection_partition_id,
> + union hv_connection_id connection_id)
> +{
> + union hv_disconnect_port input = { 0 };
> + unsigned long flags;
> + int status;
> +
> + local_irq_save(flags);
> + input.connection_partition_id = connection_partition_id;
> + input.connection_id = connection_id;
> + input.is_doorbell = 1;
> + status = hv_do_fast_hypercall16(HVCALL_DISCONNECT_PORT,
> + input.as_uint64[0],
> + input.as_uint64[1]) &
> + HV_HYPERCALL_RESULT_MASK;
> + local_irq_restore(flags);
> +
> + if (status != HV_STATUS_SUCCESS) {
> + pr_err("%s: %s\n", __func__, hv_status_to_string(status));
> + return -hv_status_to_errno(status);
> + }
> +
> + return 0;
> +}
> +
> +int
> +hv_call_notify_port_ring_empty(u32 sint_index)
> +{
> + union hv_notify_port_ring_empty input = { 0 };
> + unsigned long flags;
> + int status;
> +
> + local_irq_save(flags);
> + input.sint_index = sint_index;
> + status = hv_do_fast_hypercall8(HVCALL_NOTIFY_PORT_RING_EMPTY,
> + input.as_uint64) &
> + HV_HYPERCALL_RESULT_MASK;
> + local_irq_restore(flags);
> +
> + if (status != HV_STATUS_SUCCESS) {
> + pr_err("%s: %s\n", __func__, hv_status_to_string(status));
> + return -hv_status_to_errno(status);
> + }
> +
> + return 0;
> +}
> diff --git a/drivers/hv/mshv.h b/drivers/hv/mshv.h
> index 037291a0ad45..e16818e977b9 100644
> --- a/drivers/hv/mshv.h
> +++ b/drivers/hv/mshv.h
> @@ -117,4 +117,16 @@ int hv_call_translate_virtual_address(
> u64 *gpa,
> union hv_translate_gva_result *result);
>
> +int hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
> + u64 connection_partition_id, struct hv_port_info *port_info,
> + u8 port_vtl, u8 min_connection_vtl, int node);
> +int hv_call_delete_port(u64 port_partition_id, union hv_port_id port_id);
> +int hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id,
> + u64 connection_partition_id,
> + union hv_connection_id connection_id,
> + struct hv_connection_info *connection_info,
> + u8 connection_vtl, int node);
> +int hv_call_disconnect_port(u64 connection_partition_id,
> + union hv_connection_id connection_id);
> +int hv_call_notify_port_ring_empty(u32 sint_index);
> #endif /* _MSHV_H */
> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
> index f70391a3320f..42e0237b0da8 100644
> --- a/include/asm-generic/hyperv-tlfs.h
> +++ b/include/asm-generic/hyperv-tlfs.h
> @@ -159,6 +159,8 @@ struct ms_hyperv_tsc_page {
> #define HVCALL_GET_VP_REGISTERS 0x0050
> #define HVCALL_SET_VP_REGISTERS 0x0051
> #define HVCALL_TRANSLATE_VIRTUAL_ADDRESS 0x0052
> +#define HVCALL_DELETE_PORT 0x0058
> +#define HVCALL_DISCONNECT_PORT 0x005b
> #define HVCALL_POST_MESSAGE 0x005c
> #define HVCALL_SIGNAL_EVENT 0x005d
> #define HVCALL_POST_DEBUG_DATA 0x0069
> @@ -168,7 +170,10 @@ struct ms_hyperv_tsc_page {
> #define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
> #define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
> #define HVCALL_RETARGET_INTERRUPT 0x007e
> +#define HVCALL_NOTIFY_PORT_RING_EMPTY 0x008b
> #define HVCALL_ASSERT_VIRTUAL_INTERRUPT 0x0094
> +#define HVCALL_CREATE_PORT 0x0095
> +#define HVCALL_CONNECT_PORT 0x0096
> #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
> #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
> #define HVCALL_MAP_VP_STATE_PAGE 0x00e1
> @@ -949,4 +954,54 @@ struct hv_translate_virtual_address_out {
> u64 gpa_page;
> } __packed;
>
> +struct hv_create_port {
> + u64 port_partition_id;
> + union hv_port_id port_id;
> + u8 port_vtl;
> + u8 min_connection_vtl;
> + u16 padding;
> + u64 connection_partition_id;
> + struct hv_port_info port_info;
> + union hv_proximity_domain_info proximity_domain_info;
> +} __packed;
> +
> +union hv_delete_port {
> + u64 as_uint64[2];
> + struct {
> + u64 port_partition_id;
> + union hv_port_id port_id;
> + u32 reserved;
> + } __packed;
> +};
> +
> +union hv_notify_port_ring_empty {
> + u64 as_uint64;
> + struct {
> + u32 sint_index;
> + u32 reserved;
> + } __packed;
> +};
> +
> +struct hv_connect_port {
> + u64 connection_partition_id;
> + union hv_connection_id connection_id;
> + u8 connection_vtl;
> + u8 rsvdz0;
> + u16 rsvdz1;
> + u64 port_partition_id;
> + union hv_port_id port_id;
> + u32 reserved2;
> + struct hv_connection_info connection_info;
> + union hv_proximity_domain_info proximity_domain_info;
> +} __packed;
> +
> +union hv_disconnect_port {
> + u64 as_uint64[2];
> + struct {
> + u64 connection_partition_id;
> + union hv_connection_id connection_id;
> + u32 is_doorbell: 1;
> + u32 reserved: 31;
> + } __packed;
> +};
> #endif
> diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
> index 2e859d2f9609..76ff26579622 100644
> --- a/include/linux/hyperv.h
> +++ b/include/linux/hyperv.h
> @@ -750,15 +750,6 @@ struct vmbus_close_msg {
> struct vmbus_channel_close_channel msg;
> };
>
> -/* Define connection identifier type. */
> -union hv_connection_id {
> - u32 asu32;
> - struct {
> - u32 id:24;
> - u32 reserved:8;
> - } u;
> -};
> -
> enum vmbus_device_type {
> HV_IDE = 0,
> HV_SCSI,
> diff --git a/include/uapi/asm-generic/hyperv-tlfs.h b/include/uapi/asm-generic/hyperv-tlfs.h
> index 388c4eb29212..2031115c6cce 100644
> --- a/include/uapi/asm-generic/hyperv-tlfs.h
> +++ b/include/uapi/asm-generic/hyperv-tlfs.h
> @@ -53,6 +53,25 @@ union hv_message_flags {
> } __packed;
> };
>
> +enum hv_port_type {
> + HV_PORT_TYPE_MESSAGE = 1,
> + HV_PORT_TYPE_EVENT = 2,
> + HV_PORT_TYPE_MONITOR = 3,
> + HV_PORT_TYPE_DOORBELL = 4 // Root Partition only
> +};
> +
> +
> +/*
> + * Doorbell connection_info flags.
> + */
> +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_MASK 0x00000007
> +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_ANY 0x00000000
> +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_BYTE 0x00000001
> +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_WORD 0x00000002
> +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_DWORD 0x00000003
> +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_QWORD 0x00000004
> +#define HV_DOORBELL_FLAG_TRIGGER_ANY_VALUE 0x80000000
> +
> /* Define port identifier type. */
> union hv_port_id {
> __u32 asu32;
> @@ -62,6 +81,63 @@ union hv_port_id {
> } __packed u;
> };
>
> +struct hv_port_info {
> + enum hv_port_type port_type;
> + __u32 padding;
> + union {
> + struct {
> + __u32 target_sint;
> + __u32 target_vp;
> + __u64 rsvdz;
> + } message_port_info;
> + struct {
> + __u32 target_sint;
> + __u32 target_vp;
> + __u16 base_flag_number;
> + __u16 flag_count;
> + __u32 rsvdz;
> + } event_port_info;
> + struct {
> + __u64 monitor_address;
> + __u64 rsvdz;
> + } monitor_port_info;
> + struct {
> + __u32 target_sint;
> + __u32 target_vp;
> + __u64 rsvdz;
> + } doorbell_port_info;
> + };
> +};
> +
> +union hv_connection_id {
> + __u32 asu32;
> + struct {
> + __u32 id:24;
> + __u32 reserved:8;
> + } u;
> +};
> +
> +struct hv_connection_info {
> + enum hv_port_type port_type;
> + __u32 padding;
> + union {
> + struct {
> + __u64 rsvdz;
> + } message_connection_info;
> + struct {
> + __u64 rsvdz;
> + } event_connection_info;
> + struct {
> + __u64 monitor_address;
> + } monitor_connection_info;
> + struct {
> + __u64 gpa;
> + __u64 trigger_value;
> + __u64 flags;
> + } doorbell_connection_info;
> + };
> +};
> +
> /* Define synthetic interrupt controller message header. */
> struct hv_message_header {
> __u32 message_type;

--
Vitaly

2021-06-29 13:09:13

by Wei Liu

[permalink] [raw]
Subject: Re: [PATCH 06/17] mshv: SynIC port and connection hypercalls

On Wed, Jun 02, 2021 at 05:20:51PM +0000, Vineeth Pillai wrote:
> Hyper-V enables inter-partition communication through the port and
> connection constructs. More details about ports and connections in
> TLFS chapter 11.
>
> Implement hypercalls related to ports and connections for enabling
> inter-partiion communication.
>
> Signed-off-by: Vineeth Pillai <[email protected]>

Vineeth, feel free to squash the following patch.

---8<---
From afb9ab422895364216acb4261399f6f5154eea17 Mon Sep 17 00:00:00 2001
From: Wei Liu <[email protected]>
Date: Tue, 29 Jun 2021 12:58:47 +0000
Subject: [PATCH] fixup! mshv: SynIC port and connection hypercalls

Signed-off-by: Wei Liu <[email protected]>
---
drivers/hv/hv_call.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/hv/hv_call.c b/drivers/hv/hv_call.c
index d5cdbe4e93da..30aefcbdda85 100644
--- a/drivers/hv/hv_call.c
+++ b/drivers/hv/hv_call.c
@@ -797,7 +797,7 @@ hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
pr_err("%s: %s\n",
__func__, hv_status_to_string(status));
- ret = -hv_status_to_errno(status);
+ ret = hv_status_to_errno(status);
break;
}
ret = hv_call_deposit_pages(NUMA_NO_NODE,
@@ -826,7 +826,7 @@ hv_call_delete_port(u64 port_partition_id, union hv_port_id port_id)

if (status != HV_STATUS_SUCCESS) {
pr_err("%s: %s\n", __func__, hv_status_to_string(status));
- return -hv_status_to_errno(status);
+ return hv_status_to_errno(status);
}

return 0;
@@ -866,7 +866,7 @@ hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id,
if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
pr_err("%s: %s\n",
__func__, hv_status_to_string(status));
- ret = -hv_status_to_errno(status);
+ ret = hv_status_to_errno(status);
break;
}
ret = hv_call_deposit_pages(NUMA_NO_NODE,
@@ -896,7 +896,7 @@ hv_call_disconnect_port(u64 connection_partition_id,

if (status != HV_STATUS_SUCCESS) {
pr_err("%s: %s\n", __func__, hv_status_to_string(status));
- return -hv_status_to_errno(status);
+ return hv_status_to_errno(status);
}

return 0;
@@ -918,7 +918,7 @@ hv_call_notify_port_ring_empty(u32 sint_index)

if (status != HV_STATUS_SUCCESS) {
pr_err("%s: %s\n", __func__, hv_status_to_string(status));
- return -hv_status_to_errno(status);
+ return hv_status_to_errno(status);
}

return 0;
--
2.30.2

2021-06-29 15:39:06

by Wei Liu

[permalink] [raw]
Subject: Re: [PATCH 06/17] mshv: SynIC port and connection hypercalls

On Thu, Jun 10, 2021 at 02:19:28PM +0200, Vitaly Kuznetsov wrote:
> Vineeth Pillai <[email protected]> writes:
>
> > Hyper-V enables inter-partition communication through the port and
> > connection constructs. More details about ports and connections in
> > TLFS chapter 11.
> >
> > Implement hypercalls related to ports and connections for enabling
> > inter-partiion communication.
> >
> > Signed-off-by: Vineeth Pillai <[email protected]>
> > ---
> > drivers/hv/hv_call.c | 161 +++++++++++++++++++++++++
> > drivers/hv/mshv.h | 12 ++
> > include/asm-generic/hyperv-tlfs.h | 55 +++++++++
> > include/linux/hyperv.h | 9 --
> > include/uapi/asm-generic/hyperv-tlfs.h | 76 ++++++++++++
> > 5 files changed, 304 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/hv/hv_call.c b/drivers/hv/hv_call.c
> > index 025d4e2b892f..57db3a8ac94a 100644
> > --- a/drivers/hv/hv_call.c
> > +++ b/drivers/hv/hv_call.c
> > @@ -742,3 +742,164 @@ int hv_call_translate_virtual_address(
> > return hv_status_to_errno(status);
> > }
> >
> > +
> > +int
> > +hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
> > + u64 connection_partition_id,
> > + struct hv_port_info *port_info,
> > + u8 port_vtl, u8 min_connection_vtl, int node)
> > +{
> > + struct hv_create_port *input;
> > + unsigned long flags;
> > + int ret = 0;
> > + int status;
> > +
> > + do {
> > + local_irq_save(flags);
> > + input = (struct hv_create_port *)(*this_cpu_ptr(
> > + hyperv_pcpu_input_arg));
> > + memset(input, 0, sizeof(*input));
> > +
> > + input->port_partition_id = port_partition_id;
> > + input->port_id = port_id;
> > + input->connection_partition_id = connection_partition_id;
> > + input->port_info = *port_info;
> > + input->port_vtl = port_vtl;
> > + input->min_connection_vtl = min_connection_vtl;
> > + input->proximity_domain_info =
> > + numa_node_to_proximity_domain_info(node);
> > + status = hv_do_hypercall(HVCALL_CREATE_PORT, input,
> > + NULL) & HV_HYPERCALL_RESULT_MASK;
> > + local_irq_restore(flags);
> > + if (status == HV_STATUS_SUCCESS)
> > + break;
> > +
> > + if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
> > + pr_err("%s: %s\n",
> > + __func__, hv_status_to_string(status));
> > + ret = -hv_status_to_errno(status);
>
> In Nuno's "x86/hyperv: convert hyperv statuses to linux error codes"
> patch, hv_status_to_errno() already returns negatives:

Yes, this needs to be fixed otherwise one of the following patch has the
error handling check reversed.

Wei.

2021-06-29 16:24:30

by Vineeth Pillai

[permalink] [raw]
Subject: Re: [PATCH 06/17] mshv: SynIC port and connection hypercalls


On 6/29/2021 9:06 AM, Wei Liu wrote:
> On Wed, Jun 02, 2021 at 05:20:51PM +0000, Vineeth Pillai wrote:
>> Hyper-V enables inter-partition communication through the port and
>> connection constructs. More details about ports and connections in
>> TLFS chapter 11.
>>
>> Implement hypercalls related to ports and connections for enabling
>> inter-partiion communication.
>>
>> Signed-off-by: Vineeth Pillai <[email protected]>
> Vineeth, feel free to squash the following patch.
Thanks Wei, I will have this in the next iteration.

~Vineeth
>
> ---8<---
> >From afb9ab422895364216acb4261399f6f5154eea17 Mon Sep 17 00:00:00 2001
> From: Wei Liu <[email protected]>
> Date: Tue, 29 Jun 2021 12:58:47 +0000
> Subject: [PATCH] fixup! mshv: SynIC port and connection hypercalls
>
> Signed-off-by: Wei Liu <[email protected]>
> ---
> drivers/hv/hv_call.c | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/hv/hv_call.c b/drivers/hv/hv_call.c
> index d5cdbe4e93da..30aefcbdda85 100644
> --- a/drivers/hv/hv_call.c
> +++ b/drivers/hv/hv_call.c
> @@ -797,7 +797,7 @@ hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
> if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
> pr_err("%s: %s\n",
> __func__, hv_status_to_string(status));
> - ret = -hv_status_to_errno(status);
> + ret = hv_status_to_errno(status);
> break;
> }
> ret = hv_call_deposit_pages(NUMA_NO_NODE,
> @@ -826,7 +826,7 @@ hv_call_delete_port(u64 port_partition_id, union hv_port_id port_id)
>
> if (status != HV_STATUS_SUCCESS) {
> pr_err("%s: %s\n", __func__, hv_status_to_string(status));
> - return -hv_status_to_errno(status);
> + return hv_status_to_errno(status);
> }
>
> return 0;
> @@ -866,7 +866,7 @@ hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id,
> if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
> pr_err("%s: %s\n",
> __func__, hv_status_to_string(status));
> - ret = -hv_status_to_errno(status);
> + ret = hv_status_to_errno(status);
> break;
> }
> ret = hv_call_deposit_pages(NUMA_NO_NODE,
> @@ -896,7 +896,7 @@ hv_call_disconnect_port(u64 connection_partition_id,
>
> if (status != HV_STATUS_SUCCESS) {
> pr_err("%s: %s\n", __func__, hv_status_to_string(status));
> - return -hv_status_to_errno(status);
> + return hv_status_to_errno(status);
> }
>
> return 0;
> @@ -918,7 +918,7 @@ hv_call_notify_port_ring_empty(u32 sint_index)
>
> if (status != HV_STATUS_SUCCESS) {
> pr_err("%s: %s\n", __func__, hv_status_to_string(status));
> - return -hv_status_to_errno(status);
> + return hv_status_to_errno(status);
> }
>
> return 0;

2021-06-29 17:01:18

by Vineeth Pillai

[permalink] [raw]
Subject: Re: [PATCH 06/17] mshv: SynIC port and connection hypercalls


On 6/29/2021 8:55 AM, Wei Liu wrote:
>
>>> + if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
>>> + pr_err("%s: %s\n",
>>> + __func__, hv_status_to_string(status));
>>> + ret = -hv_status_to_errno(status);
>> In Nuno's "x86/hyperv: convert hyperv statuses to linux error codes"
>> patch, hv_status_to_errno() already returns negatives:
> Yes, this needs to be fixed otherwise one of the following patch has the
> error handling check reversed.
Sorry I missed replying to this. Thanks Vitaly and Wei, I will have this
fixed in the
next iteration.

~Vineeth

2021-06-30 10:48:21

by Wei Liu

[permalink] [raw]
Subject: Re: [PATCH 06/17] mshv: SynIC port and connection hypercalls

On Wed, Jun 02, 2021 at 05:20:51PM +0000, Vineeth Pillai wrote:
[...]
> +int
> +hv_call_notify_port_ring_empty(u32 sint_index)
> +{
> + union hv_notify_port_ring_empty input = { 0 };
> + unsigned long flags;
> + int status;
> +
> + local_irq_save(flags);
> + input.sint_index = sint_index;
> + status = hv_do_fast_hypercall8(HVCALL_NOTIFY_PORT_RING_EMPTY,
> + input.as_uint64) &
> + HV_HYPERCALL_RESULT_MASK;
> + local_irq_restore(flags);
> +
> + if (status != HV_STATUS_SUCCESS) {
> + pr_err("%s: %s\n", __func__, hv_status_to_string(status));
> + return -hv_status_to_errno(status);
> + }
> +
> + return 0;
> +}
> diff --git a/drivers/hv/mshv.h b/drivers/hv/mshv.h
> index 037291a0ad45..e16818e977b9 100644
> --- a/drivers/hv/mshv.h
> +++ b/drivers/hv/mshv.h
> @@ -117,4 +117,16 @@ int hv_call_translate_virtual_address(
> u64 *gpa,
> union hv_translate_gva_result *result);
>
> +int hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
> + u64 connection_partition_id, struct hv_port_info *port_info,
> + u8 port_vtl, u8 min_connection_vtl, int node);
> +int hv_call_delete_port(u64 port_partition_id, union hv_port_id port_id);
> +int hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id,
> + u64 connection_partition_id,
> + union hv_connection_id connection_id,
> + struct hv_connection_info *connection_info,
> + u8 connection_vtl, int node);
> +int hv_call_disconnect_port(u64 connection_partition_id,
> + union hv_connection_id connection_id);
> +int hv_call_notify_port_ring_empty(u32 sint_index);
> #endif /* _MSHV_H */
> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
> index f70391a3320f..42e0237b0da8 100644
> --- a/include/asm-generic/hyperv-tlfs.h
> +++ b/include/asm-generic/hyperv-tlfs.h
> @@ -159,6 +159,8 @@ struct ms_hyperv_tsc_page {
> #define HVCALL_GET_VP_REGISTERS 0x0050
> #define HVCALL_SET_VP_REGISTERS 0x0051
> #define HVCALL_TRANSLATE_VIRTUAL_ADDRESS 0x0052
> +#define HVCALL_DELETE_PORT 0x0058
> +#define HVCALL_DISCONNECT_PORT 0x005b
> #define HVCALL_POST_MESSAGE 0x005c
> #define HVCALL_SIGNAL_EVENT 0x005d
> #define HVCALL_POST_DEBUG_DATA 0x0069
> @@ -168,7 +170,10 @@ struct ms_hyperv_tsc_page {
> #define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
> #define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
> #define HVCALL_RETARGET_INTERRUPT 0x007e
> +#define HVCALL_NOTIFY_PORT_RING_EMPTY 0x008b
> #define HVCALL_ASSERT_VIRTUAL_INTERRUPT 0x0094
> +#define HVCALL_CREATE_PORT 0x0095
> +#define HVCALL_CONNECT_PORT 0x0096
> #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
> #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
> #define HVCALL_MAP_VP_STATE_PAGE 0x00e1
> @@ -949,4 +954,54 @@ struct hv_translate_virtual_address_out {
> u64 gpa_page;
> } __packed;
>
> +struct hv_create_port {
> + u64 port_partition_id;
> + union hv_port_id port_id;
> + u8 port_vtl;
> + u8 min_connection_vtl;
> + u16 padding;
> + u64 connection_partition_id;
> + struct hv_port_info port_info;
> + union hv_proximity_domain_info proximity_domain_info;
> +} __packed;
> +
> +union hv_delete_port {
> + u64 as_uint64[2];
> + struct {
> + u64 port_partition_id;
> + union hv_port_id port_id;
> + u32 reserved;
> + } __packed;
> +};
> +
> +union hv_notify_port_ring_empty {
> + u64 as_uint64;
> + struct {
> + u32 sint_index;
> + u32 reserved;
> + } __packed;
> +};
> +
> +struct hv_connect_port {
> + u64 connection_partition_id;
> + union hv_connection_id connection_id;
> + u8 connection_vtl;
> + u8 rsvdz0;
> + u16 rsvdz1;
> + u64 port_partition_id;
> + union hv_port_id port_id;
> + u32 reserved2;
> + struct hv_connection_info connection_info;
> + union hv_proximity_domain_info proximity_domain_info;
> +} __packed;
> +
> +union hv_disconnect_port {
> + u64 as_uint64[2];
> + struct {
> + u64 connection_partition_id;
> + union hv_connection_id connection_id;
> + u32 is_doorbell: 1;
> + u32 reserved: 31;
> + } __packed;
> +};
> #endif
> diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
> index 2e859d2f9609..76ff26579622 100644
> --- a/include/linux/hyperv.h
> +++ b/include/linux/hyperv.h
> @@ -750,15 +750,6 @@ struct vmbus_close_msg {
> struct vmbus_channel_close_channel msg;
> };
>
> -/* Define connection identifier type. */
> -union hv_connection_id {
> - u32 asu32;
> - struct {
> - u32 id:24;
> - u32 reserved:8;
> - } u;
> -};
> -
> enum vmbus_device_type {
> HV_IDE = 0,
> HV_SCSI,
> diff --git a/include/uapi/asm-generic/hyperv-tlfs.h b/include/uapi/asm-generic/hyperv-tlfs.h
> index 388c4eb29212..2031115c6cce 100644
> --- a/include/uapi/asm-generic/hyperv-tlfs.h
> +++ b/include/uapi/asm-generic/hyperv-tlfs.h
> @@ -53,6 +53,25 @@ union hv_message_flags {
> } __packed;
> };
>
> +enum hv_port_type {
> + HV_PORT_TYPE_MESSAGE = 1,
> + HV_PORT_TYPE_EVENT = 2,
> + HV_PORT_TYPE_MONITOR = 3,
> + HV_PORT_TYPE_DOORBELL = 4 // Root Partition only
> +};
> +
> +
> +/*
> + * Doorbell connection_info flags.
> + */
> +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_MASK 0x00000007
> +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_ANY 0x00000000
> +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_BYTE 0x00000001
> +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_WORD 0x00000002
> +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_DWORD 0x00000003
> +#define HV_DOORBELL_FLAG_TRIGGER_SIZE_QWORD 0x00000004
> +#define HV_DOORBELL_FLAG_TRIGGER_ANY_VALUE 0x80000000
> +
> /* Define port identifier type. */
> union hv_port_id {
> __u32 asu32;
> @@ -62,6 +81,63 @@ union hv_port_id {
> } __packed u;
> };
>
> +struct hv_port_info {
> + enum hv_port_type port_type;

Can you please replace the enum from the input / output structures with
__u32? I don't think the C standard specifies the exact size of enum so
this is prone to error. You can see in other places in this header when
we need to store an enum we use __u32.

Wei.

2021-06-30 11:14:31

by Wei Liu

[permalink] [raw]
Subject: Re: [PATCH 06/17] mshv: SynIC port and connection hypercalls

On Wed, Jun 02, 2021 at 05:20:51PM +0000, Vineeth Pillai wrote:
> Hyper-V enables inter-partition communication through the port and
> connection constructs. More details about ports and connections in
> TLFS chapter 11.
>
> Implement hypercalls related to ports and connections for enabling
> inter-partiion communication.
>
> Signed-off-by: Vineeth Pillai <[email protected]>
> ---
> drivers/hv/hv_call.c | 161 +++++++++++++++++++++++++
> drivers/hv/mshv.h | 12 ++
> include/asm-generic/hyperv-tlfs.h | 55 +++++++++
> include/linux/hyperv.h | 9 --
> include/uapi/asm-generic/hyperv-tlfs.h | 76 ++++++++++++
> 5 files changed, 304 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/hv/hv_call.c b/drivers/hv/hv_call.c
> index 025d4e2b892f..57db3a8ac94a 100644
> --- a/drivers/hv/hv_call.c
> +++ b/drivers/hv/hv_call.c
> @@ -742,3 +742,164 @@ int hv_call_translate_virtual_address(
> return hv_status_to_errno(status);
> }
>
> +
> +int
> +hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
> + u64 connection_partition_id,
> + struct hv_port_info *port_info,
> + u8 port_vtl, u8 min_connection_vtl, int node)
> +{
> + struct hv_create_port *input;
> + unsigned long flags;
> + int ret = 0;
> + int status;
> +
> + do {
> + local_irq_save(flags);
> + input = (struct hv_create_port *)(*this_cpu_ptr(
> + hyperv_pcpu_input_arg));
> + memset(input, 0, sizeof(*input));
> +
> + input->port_partition_id = port_partition_id;
> + input->port_id = port_id;
> + input->connection_partition_id = connection_partition_id;
> + input->port_info = *port_info;
> + input->port_vtl = port_vtl;
> + input->min_connection_vtl = min_connection_vtl;
> + input->proximity_domain_info =
> + numa_node_to_proximity_domain_info(node);

This misses the check for NUMA_NO_NODE, so does the function for port
connection (see below).

I think it would actually be better to leave the check in
numa_node_to_proximity_domain_info to avoid problems like this.

Of course, adapting this approach means some call sites for that
function will need to be changed too.

---8<---
From 8705857c62b3e5f13d415736ca8b508c22e3f5ba Mon Sep 17 00:00:00 2001
From: Wei Liu <[email protected]>
Date: Wed, 30 Jun 2021 11:08:31 +0000
Subject: [PATCH] numa_node_to_proximity_domain_info should cope with
NUMA_NO_NODE

Signed-off-by: Wei Liu <[email protected]>
---
include/asm-generic/mshyperv.h | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index d9b91b8f63c8..44552b7a02ef 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -31,10 +31,14 @@ numa_node_to_proximity_domain_info(int node)
{
union hv_proximity_domain_info proximity_domain_info;

- proximity_domain_info.domain_id = node_to_pxm(node);
- proximity_domain_info.flags.reserved = 0;
- proximity_domain_info.flags.proximity_info_valid = 1;
- proximity_domain_info.flags.proximity_preferred = 1;
+ proximity_domain_info.as_uint64 = 0;
+
+ if (node != NUMA_NO_NODE) {
+ proximity_domain_info.domain_id = node_to_pxm(node);
+ proximity_domain_info.flags.reserved = 0;
+ proximity_domain_info.flags.proximity_info_valid = 1;
+ proximity_domain_info.flags.proximity_preferred = 1;
+ }

return proximity_domain_info;
}
--
2.30.2


[...]
> +int
> +hv_call_connect_port(u64 port_partition_id, union hv_port_id port_id,
> + u64 connection_partition_id,
> + union hv_connection_id connection_id,
> + struct hv_connection_info *connection_info,
> + u8 connection_vtl, int node)
> +{
> + struct hv_connect_port *input;
> + unsigned long flags;
> + int ret = 0, status;
> +
> + do {
> + local_irq_save(flags);
> + input = (struct hv_connect_port *)(*this_cpu_ptr(
> + hyperv_pcpu_input_arg));
> + memset(input, 0, sizeof(*input));
> + input->port_partition_id = port_partition_id;
> + input->port_id = port_id;
> + input->connection_partition_id = connection_partition_id;
> + input->connection_id = connection_id;
> + input->connection_info = *connection_info;
> + input->connection_vtl = connection_vtl;
> + input->proximity_domain_info =
> + numa_node_to_proximity_domain_info(node);

Here...

2021-06-30 15:05:38

by Vineeth Pillai

[permalink] [raw]
Subject: Re: [PATCH 06/17] mshv: SynIC port and connection hypercalls


>> +
>> +int
>> +hv_call_create_port(u64 port_partition_id, union hv_port_id port_id,
>> + u64 connection_partition_id,
>> + struct hv_port_info *port_info,
>> + u8 port_vtl, u8 min_connection_vtl, int node)
>> +{
>> + struct hv_create_port *input;
>> + unsigned long flags;
>> + int ret = 0;
>> + int status;
>> +
>> + do {
>> + local_irq_save(flags);
>> + input = (struct hv_create_port *)(*this_cpu_ptr(
>> + hyperv_pcpu_input_arg));
>> + memset(input, 0, sizeof(*input));
>> +
>> + input->port_partition_id = port_partition_id;
>> + input->port_id = port_id;
>> + input->connection_partition_id = connection_partition_id;
>> + input->port_info = *port_info;
>> + input->port_vtl = port_vtl;
>> + input->min_connection_vtl = min_connection_vtl;
>> + input->proximity_domain_info =
>> + numa_node_to_proximity_domain_info(node);
> This misses the check for NUMA_NO_NODE, so does the function for port
> connection (see below).
>
> I think it would actually be better to leave the check in
> numa_node_to_proximity_domain_info to avoid problems like this.
>
> Of course, adapting this approach means some call sites for that
> function will need to be changed too.
Thanks for catching this and fixing Wei, will roll it into my branch.

~Vineeth


>
> ---8<---
> >From 8705857c62b3e5f13d415736ca8b508c22e3f5ba Mon Sep 17 00:00:00 2001
> From: Wei Liu <[email protected]>
> Date: Wed, 30 Jun 2021 11:08:31 +0000
> Subject: [PATCH] numa_node_to_proximity_domain_info should cope with
> NUMA_NO_NODE
>
> Signed-off-by: Wei Liu <[email protected]>
> ---
> include/asm-generic/mshyperv.h | 12 ++++++++----
> 1 file changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index d9b91b8f63c8..44552b7a02ef 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -31,10 +31,14 @@ numa_node_to_proximity_domain_info(int node)
> {
> union hv_proximity_domain_info proximity_domain_info;
>
> - proximity_domain_info.domain_id = node_to_pxm(node);
> - proximity_domain_info.flags.reserved = 0;
> - proximity_domain_info.flags.proximity_info_valid = 1;
> - proximity_domain_info.flags.proximity_preferred = 1;
> + proximity_domain_info.as_uint64 = 0;
> +
> + if (node != NUMA_NO_NODE) {
> + proximity_domain_info.domain_id = node_to_pxm(node);
> + proximity_domain_info.flags.reserved = 0;
> + proximity_domain_info.flags.proximity_info_valid = 1;
> + proximity_domain_info.flags.proximity_preferred = 1;
> + }
>
> return proximity_domain_info;
> }