2021-04-14 17:05:07

by Andrea Parri

[permalink] [raw]
Subject: [PATCH v2 0/3] Drivers: hv: vmbus: Introduce CHANNELMSG_MODIFYCHANNEL_RESPONSE

Changes since v1[1]:
- rebase on hyperv-next
- split changes into three patches
- fix&simplify error handling in send_modifychannel_with_ack()
- remove rescind checks in send_modifychannel_with_ack()
- remove unneeded test in hv_synic_event_pending()
- add/amend inline comments
- style changes

[1] https://lkml.kernel.org/r/[email protected]

Andrea Parri (Microsoft) (3):
Drivers: hv: vmbus: Introduce and negotiate VMBus protocol version 5.3
Drivers: hv: vmbus: Drivers: hv: vmbus: Introduce
CHANNELMSG_MODIFYCHANNEL_RESPONSE
Drivers: hv: vmbus: Check for pending channel interrupts before taking
a CPU offline

drivers/hv/channel.c | 99 ++++++++++++++++++++++++++++++++-------
drivers/hv/channel_mgmt.c | 42 +++++++++++++++++
drivers/hv/connection.c | 3 +-
drivers/hv/hv.c | 49 +++++++++++++++++++
drivers/hv/hv_trace.h | 15 ++++++
drivers/hv/vmbus_drv.c | 4 +-
include/linux/hyperv.h | 13 ++++-
7 files changed, 205 insertions(+), 20 deletions(-)

--
2.25.1


2021-04-14 17:05:41

by Andrea Parri

[permalink] [raw]
Subject: [PATCH v2 1/3] Drivers: hv: vmbus: Introduce and negotiate VMBus protocol version 5.3

Hyper-V has added VMBus protocol version 5.3. Allow Linux guests to
negotiate the new version on version of Hyper-V that support it.

Signed-off-by: Andrea Parri (Microsoft) <[email protected]>
---
drivers/hv/connection.c | 3 ++-
include/linux/hyperv.h | 2 ++
2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 350e8c5cafa8c..dc19d5ae4373c 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -45,6 +45,7 @@ EXPORT_SYMBOL_GPL(vmbus_proto_version);
* Table of VMBus versions listed from newest to oldest.
*/
static __u32 vmbus_versions[] = {
+ VERSION_WIN10_V5_3,
VERSION_WIN10_V5_2,
VERSION_WIN10_V5_1,
VERSION_WIN10_V5,
@@ -60,7 +61,7 @@ static __u32 vmbus_versions[] = {
* Maximal VMBus protocol version guests can negotiate. Useful to cap the
* VMBus version for testing and debugging purpose.
*/
-static uint max_version = VERSION_WIN10_V5_2;
+static uint max_version = VERSION_WIN10_V5_3;

module_param(max_version, uint, S_IRUGO);
MODULE_PARM_DESC(max_version,
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2c18c8e768efe..d6a6f76040b5f 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -234,6 +234,7 @@ static inline u32 hv_get_avail_to_write_percent(
* 5 . 0 (Newer Windows 10)
* 5 . 1 (Windows 10 RS4)
* 5 . 2 (Windows Server 2019, RS5)
+ * 5 . 3 (Windows Server 2021) // FIXME: use proper version number/name
*/

#define VERSION_WS2008 ((0 << 16) | (13))
@@ -245,6 +246,7 @@ static inline u32 hv_get_avail_to_write_percent(
#define VERSION_WIN10_V5 ((5 << 16) | (0))
#define VERSION_WIN10_V5_1 ((5 << 16) | (1))
#define VERSION_WIN10_V5_2 ((5 << 16) | (2))
+#define VERSION_WIN10_V5_3 ((5 << 16) | (3))

/* Make maximum size of pipe payload of 16K */
#define MAX_PIPE_DATA_PAYLOAD (sizeof(u8) * 16384)
--
2.25.1

2021-04-15 00:31:48

by Andrea Parri

[permalink] [raw]
Subject: [PATCH v2 2/3] Drivers: hv: vmbus: Drivers: hv: vmbus: Introduce CHANNELMSG_MODIFYCHANNEL_RESPONSE

Introduce the CHANNELMSG_MODIFYCHANNEL_RESPONSE message type, and code
to receive and process such a message.

Signed-off-by: Andrea Parri (Microsoft) <[email protected]>
---
drivers/hv/channel.c | 99 ++++++++++++++++++++++++++++++++-------
drivers/hv/channel_mgmt.c | 42 +++++++++++++++++
drivers/hv/hv_trace.h | 15 ++++++
drivers/hv/vmbus_drv.c | 4 +-
include/linux/hyperv.h | 11 ++++-
5 files changed, 152 insertions(+), 19 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index db30be8f9ccea..aa4ef75d8dee2 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -209,31 +209,96 @@ int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
}
EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request);

+static int send_modifychannel_without_ack(struct vmbus_channel *channel, u32 target_vp)
+{
+ struct vmbus_channel_modifychannel msg;
+ int ret;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.header.msgtype = CHANNELMSG_MODIFYCHANNEL;
+ msg.child_relid = channel->offermsg.child_relid;
+ msg.target_vp = target_vp;
+
+ ret = vmbus_post_msg(&msg, sizeof(msg), true);
+ trace_vmbus_send_modifychannel(&msg, ret);
+
+ return ret;
+}
+
+static int send_modifychannel_with_ack(struct vmbus_channel *channel, u32 target_vp)
+{
+ struct vmbus_channel_modifychannel *msg;
+ struct vmbus_channel_msginfo *info;
+ unsigned long flags;
+ int ret;
+
+ info = kzalloc(sizeof(struct vmbus_channel_msginfo) +
+ sizeof(struct vmbus_channel_modifychannel),
+ GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ init_completion(&info->waitevent);
+ info->waiting_channel = channel;
+
+ msg = (struct vmbus_channel_modifychannel *)info->msg;
+ msg->header.msgtype = CHANNELMSG_MODIFYCHANNEL;
+ msg->child_relid = channel->offermsg.child_relid;
+ msg->target_vp = target_vp;
+
+ spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
+ list_add_tail(&info->msglistentry, &vmbus_connection.chn_msg_list);
+ spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+
+ ret = vmbus_post_msg(msg, sizeof(*msg), true);
+ trace_vmbus_send_modifychannel(msg, ret);
+ if (ret != 0) {
+ spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
+ list_del(&info->msglistentry);
+ spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+ goto free_info;
+ }
+
+ /*
+ * Release channel_mutex; otherwise, vmbus_onoffer_rescind() could block on
+ * the mutex and be unable to signal the completion.
+ *
+ * See the caller target_cpu_store() for information about the usage of the
+ * mutex.
+ */
+ mutex_unlock(&vmbus_connection.channel_mutex);
+ wait_for_completion(&info->waitevent);
+ mutex_lock(&vmbus_connection.channel_mutex);
+
+ spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
+ list_del(&info->msglistentry);
+ spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+
+ if (info->response.modify_response.status)
+ ret = -EAGAIN;
+
+free_info:
+ kfree(info);
+ return ret;
+}
+
/*
* Set/change the vCPU (@target_vp) the channel (@child_relid) will interrupt.
*
- * CHANNELMSG_MODIFYCHANNEL messages are aynchronous. Also, Hyper-V does not
- * ACK such messages. IOW we can't know when the host will stop interrupting
- * the "old" vCPU and start interrupting the "new" vCPU for the given channel.
+ * CHANNELMSG_MODIFYCHANNEL messages are aynchronous. When VMbus version 5.3
+ * or later is negotiated, Hyper-V always sends an ACK in response to such a
+ * message. For VMbus version 5.2 and earlier, it never sends an ACK. With-
+ * out an ACK, we can not know when the host will stop interrupting the "old"
+ * vCPU and start interrupting the "new" vCPU for the given channel.
*
* The CHANNELMSG_MODIFYCHANNEL message type is supported since VMBus version
* VERSION_WIN10_V4_1.
*/
-int vmbus_send_modifychannel(u32 child_relid, u32 target_vp)
+int vmbus_send_modifychannel(struct vmbus_channel *channel, u32 target_vp)
{
- struct vmbus_channel_modifychannel conn_msg;
- int ret;
-
- memset(&conn_msg, 0, sizeof(conn_msg));
- conn_msg.header.msgtype = CHANNELMSG_MODIFYCHANNEL;
- conn_msg.child_relid = child_relid;
- conn_msg.target_vp = target_vp;
-
- ret = vmbus_post_msg(&conn_msg, sizeof(conn_msg), true);
-
- trace_vmbus_send_modifychannel(&conn_msg, ret);
-
- return ret;
+ if (vmbus_proto_version >= VERSION_WIN10_V5_3)
+ return send_modifychannel_with_ack(channel, target_vp);
+ return send_modifychannel_without_ack(channel, target_vp);
}
EXPORT_SYMBOL_GPL(vmbus_send_modifychannel);

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index f3cf4af01e102..4c9e45d1f462c 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -1311,6 +1311,46 @@ static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
}

+/*
+ * vmbus_onmodifychannel_response - Modify Channel response handler.
+ *
+ * This is invoked when we received a response to our channel modify request.
+ * Find the matching request, copy the response and signal the requesting thread.
+ */
+static void vmbus_onmodifychannel_response(struct vmbus_channel_message_header *hdr)
+{
+ struct vmbus_channel_modifychannel_response *response;
+ struct vmbus_channel_msginfo *msginfo;
+ unsigned long flags;
+
+ response = (struct vmbus_channel_modifychannel_response *)hdr;
+
+ trace_vmbus_onmodifychannel_response(response);
+
+ /*
+ * Find the modify msg, copy the response and signal/unblock the wait event.
+ */
+ spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
+
+ list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, msglistentry) {
+ struct vmbus_channel_message_header *responseheader =
+ (struct vmbus_channel_message_header *)msginfo->msg;
+
+ if (responseheader->msgtype == CHANNELMSG_MODIFYCHANNEL) {
+ struct vmbus_channel_modifychannel *modifymsg;
+
+ modifymsg = (struct vmbus_channel_modifychannel *)msginfo->msg;
+ if (modifymsg->child_relid == response->child_relid) {
+ memcpy(&msginfo->response.modify_response, response,
+ sizeof(*response));
+ complete(&msginfo->waitevent);
+ break;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+}
+
/*
* vmbus_ongpadl_torndown - GPADL torndown handler.
*
@@ -1428,6 +1468,8 @@ channel_message_table[CHANNELMSG_COUNT] = {
{ CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL, 0},
{ CHANNELMSG_MODIFYCHANNEL, 0, NULL, 0},
{ CHANNELMSG_TL_CONNECT_RESULT, 0, NULL, 0},
+ { CHANNELMSG_MODIFYCHANNEL_RESPONSE, 1, vmbus_onmodifychannel_response,
+ sizeof(struct vmbus_channel_modifychannel_response)},
};

/*
diff --git a/drivers/hv/hv_trace.h b/drivers/hv/hv_trace.h
index 6063bb21bb137..c02a1719e92f2 100644
--- a/drivers/hv/hv_trace.h
+++ b/drivers/hv/hv_trace.h
@@ -103,6 +103,21 @@ TRACE_EVENT(vmbus_ongpadl_created,
)
);

+TRACE_EVENT(vmbus_onmodifychannel_response,
+ TP_PROTO(const struct vmbus_channel_modifychannel_response *response),
+ TP_ARGS(response),
+ TP_STRUCT__entry(
+ __field(u32, child_relid)
+ __field(u32, status)
+ ),
+ TP_fast_assign(__entry->child_relid = response->child_relid;
+ __entry->status = response->status;
+ ),
+ TP_printk("child_relid 0x%x, status %d",
+ __entry->child_relid, __entry->status
+ )
+ );
+
TRACE_EVENT(vmbus_ongpadl_torndown,
TP_PROTO(const struct vmbus_channel_gpadl_torndown *gpadltorndown),
TP_ARGS(gpadltorndown),
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 51c40d5e3c8ac..b12d6827b222b 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1848,13 +1848,15 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
if (target_cpu == origin_cpu)
goto cpu_store_unlock;

- if (vmbus_send_modifychannel(channel->offermsg.child_relid,
+ if (vmbus_send_modifychannel(channel,
hv_cpu_number_to_vp_number(target_cpu))) {
ret = -EIO;
goto cpu_store_unlock;
}

/*
+ * For version before VERSION_WIN10_V5_3, the following warning holds:
+ *
* Warning. At this point, there is *no* guarantee that the host will
* have successfully processed the vmbus_send_modifychannel() request.
* See the header comment of vmbus_send_modifychannel() for more info.
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index d6a6f76040b5f..876003512d4d2 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -477,6 +477,7 @@ enum vmbus_channel_message_type {
CHANNELMSG_TL_CONNECT_REQUEST = 21,
CHANNELMSG_MODIFYCHANNEL = 22,
CHANNELMSG_TL_CONNECT_RESULT = 23,
+ CHANNELMSG_MODIFYCHANNEL_RESPONSE = 24,
CHANNELMSG_COUNT
};

@@ -590,6 +591,13 @@ struct vmbus_channel_open_result {
u32 status;
} __packed;

+/* Modify Channel Result parameters */
+struct vmbus_channel_modifychannel_response {
+ struct vmbus_channel_message_header header;
+ u32 child_relid;
+ u32 status;
+} __packed;
+
/* Close channel parameters; */
struct vmbus_channel_close_channel {
struct vmbus_channel_message_header header;
@@ -722,6 +730,7 @@ struct vmbus_channel_msginfo {
struct vmbus_channel_gpadl_torndown gpadl_torndown;
struct vmbus_channel_gpadl_created gpadl_created;
struct vmbus_channel_version_response version_response;
+ struct vmbus_channel_modifychannel_response modify_response;
} response;

u32 msgsize;
@@ -1596,7 +1605,7 @@ extern __u32 vmbus_proto_version;

int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
const guid_t *shv_host_servie_id);
-int vmbus_send_modifychannel(u32 child_relid, u32 target_vp);
+int vmbus_send_modifychannel(struct vmbus_channel *channel, u32 target_vp);
void vmbus_set_event(struct vmbus_channel *channel);

/* Get the start of the ring buffer. */
--
2.25.1

2021-04-15 00:32:21

by Andrea Parri

[permalink] [raw]
Subject: [PATCH v2 3/3] Drivers: hv: vmbus: Check for pending channel interrupts before taking a CPU offline

Check that enough time has passed such that the modify channel message
has been processed before taking a CPU offline.

Signed-off-by: Andrea Parri (Microsoft) <[email protected]>
---
drivers/hv/hv.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 49 insertions(+)

diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 3e6ff83adff42..dc9aa1130b22f 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -15,6 +15,7 @@
#include <linux/hyperv.h>
#include <linux/random.h>
#include <linux/clockchips.h>
+#include <linux/delay.h>
#include <linux/interrupt.h>
#include <clocksource/hyperv_timer.h>
#include <asm/mshyperv.h>
@@ -292,6 +293,41 @@ void hv_synic_disable_regs(unsigned int cpu)
disable_percpu_irq(vmbus_irq);
}

+#define HV_MAX_TRIES 3
+/*
+ * Scan the event flags page of 'this' CPU looking for any bit that is set. If we find one
+ * bit set, then wait for a few milliseconds. Repeat these steps for a maximum of 3 times.
+ * Return 'true', if there is still any set bit after this operation; 'false', otherwise.
+ *
+ * If a bit is set, that means there is a pending channel interrupt. The expectation is
+ * that the normal interrupt handling mechanism will find and process the channel interrupt
+ * "very soon", and in the process clear the bit.
+ */
+static bool hv_synic_event_pending(void)
+{
+ struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context);
+ union hv_synic_event_flags *event =
+ (union hv_synic_event_flags *)hv_cpu->synic_event_page + VMBUS_MESSAGE_SINT;
+ unsigned long *recv_int_page = event->flags; /* assumes VMBus version >= VERSION_WIN8 */
+ bool pending;
+ u32 relid;
+ int tries = 0;
+
+retry:
+ pending = false;
+ for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) {
+ /* Special case - VMBus channel protocol messages */
+ if (relid == 0)
+ continue;
+ pending = true;
+ break;
+ }
+ if (pending && tries++ < HV_MAX_TRIES) {
+ usleep_range(10000, 20000);
+ goto retry;
+ }
+ return pending;
+}

int hv_synic_cleanup(unsigned int cpu)
{
@@ -336,6 +372,19 @@ int hv_synic_cleanup(unsigned int cpu)
if (channel_found && vmbus_connection.conn_state == CONNECTED)
return -EBUSY;

+ if (vmbus_proto_version >= VERSION_WIN10_V4_1) {
+ /*
+ * channel_found == false means that any channels that were previously
+ * assigned to the CPU have been reassigned elsewhere with a call of
+ * vmbus_send_modifychannel(). Scan the event flags page looking for
+ * bits that are set and waiting with a timeout for vmbus_chan_sched()
+ * to process such bits. If bits are still set after this operation
+ * and VMBus is connected, fail the CPU offlining operation.
+ */
+ if (hv_synic_event_pending() && vmbus_connection.conn_state == CONNECTED)
+ return -EBUSY;
+ }
+
hv_stimer_legacy_cleanup(cpu);

hv_synic_disable_regs(cpu);
--
2.25.1

2021-04-15 00:40:08

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: RE: [PATCH v2 1/3] Drivers: hv: vmbus: Introduce and negotiate VMBus protocol version 5.3

From: Andrea Parri (Microsoft) <[email protected]> Sent: Wednesday, April 14, 2021 8:01 AM
>
> Hyper-V has added VMBus protocol version 5.3. Allow Linux guests to
> negotiate the new version on version of Hyper-V that support it.
>
> Signed-off-by: Andrea Parri (Microsoft) <[email protected]>
> ---
> drivers/hv/connection.c | 3 ++-
> include/linux/hyperv.h | 2 ++
> 2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
> index 350e8c5cafa8c..dc19d5ae4373c 100644
> --- a/drivers/hv/connection.c
> +++ b/drivers/hv/connection.c
> @@ -45,6 +45,7 @@ EXPORT_SYMBOL_GPL(vmbus_proto_version);
> * Table of VMBus versions listed from newest to oldest.
> */
> static __u32 vmbus_versions[] = {
> + VERSION_WIN10_V5_3,
> VERSION_WIN10_V5_2,
> VERSION_WIN10_V5_1,
> VERSION_WIN10_V5,
> @@ -60,7 +61,7 @@ static __u32 vmbus_versions[] = {
> * Maximal VMBus protocol version guests can negotiate. Useful to cap the
> * VMBus version for testing and debugging purpose.
> */
> -static uint max_version = VERSION_WIN10_V5_2;
> +static uint max_version = VERSION_WIN10_V5_3;
>
> module_param(max_version, uint, S_IRUGO);
> MODULE_PARM_DESC(max_version,
> diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
> index 2c18c8e768efe..d6a6f76040b5f 100644
> --- a/include/linux/hyperv.h
> +++ b/include/linux/hyperv.h
> @@ -234,6 +234,7 @@ static inline u32 hv_get_avail_to_write_percent(
> * 5 . 0 (Newer Windows 10)
> * 5 . 1 (Windows 10 RS4)
> * 5 . 2 (Windows Server 2019, RS5)
> + * 5 . 3 (Windows Server 2021) // FIXME: use proper version number/name

The official name is now public information as "Windows Server 2022".

> */
>
> #define VERSION_WS2008 ((0 << 16) | (13))
> @@ -245,6 +246,7 @@ static inline u32 hv_get_avail_to_write_percent(
> #define VERSION_WIN10_V5 ((5 << 16) | (0))
> #define VERSION_WIN10_V5_1 ((5 << 16) | (1))
> #define VERSION_WIN10_V5_2 ((5 << 16) | (2))
> +#define VERSION_WIN10_V5_3 ((5 << 16) | (3))
>
> /* Make maximum size of pipe payload of 16K */
> #define MAX_PIPE_DATA_PAYLOAD (sizeof(u8) * 16384)
> --
> 2.25.1

2021-04-15 00:40:40

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: RE: [PATCH v2 2/3] Drivers: hv: vmbus: Drivers: hv: vmbus: Introduce CHANNELMSG_MODIFYCHANNEL_RESPONSE

From: Andrea Parri (Microsoft) <[email protected]> Sent: Wednesday, April 14, 2021 8:01 AM
>
> Introduce the CHANNELMSG_MODIFYCHANNEL_RESPONSE message type, and code
> to receive and process such a message.
>
> Signed-off-by: Andrea Parri (Microsoft) <[email protected]>
> ---
> drivers/hv/channel.c | 99 ++++++++++++++++++++++++++++++++-------
> drivers/hv/channel_mgmt.c | 42 +++++++++++++++++
> drivers/hv/hv_trace.h | 15 ++++++
> drivers/hv/vmbus_drv.c | 4 +-
> include/linux/hyperv.h | 11 ++++-
> 5 files changed, 152 insertions(+), 19 deletions(-)
>

Reviewed-by: Michael Kelley <[email protected]>

2021-04-15 00:42:08

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: RE: [PATCH v2 3/3] Drivers: hv: vmbus: Check for pending channel interrupts before taking a CPU offline

From: Andrea Parri (Microsoft) <[email protected]> Sent: Wednesday, April 14, 2021 8:01 AM
>
> Check that enough time has passed such that the modify channel message
> has been processed before taking a CPU offline.
>
> Signed-off-by: Andrea Parri (Microsoft) <[email protected]>
> ---
> drivers/hv/hv.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 49 insertions(+)
>
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 3e6ff83adff42..dc9aa1130b22f 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -15,6 +15,7 @@
> #include <linux/hyperv.h>
> #include <linux/random.h>
> #include <linux/clockchips.h>
> +#include <linux/delay.h>
> #include <linux/interrupt.h>
> #include <clocksource/hyperv_timer.h>
> #include <asm/mshyperv.h>
> @@ -292,6 +293,41 @@ void hv_synic_disable_regs(unsigned int cpu)
> disable_percpu_irq(vmbus_irq);
> }
>
> +#define HV_MAX_TRIES 3
> +/*
> + * Scan the event flags page of 'this' CPU looking for any bit that is set. If we find one
> + * bit set, then wait for a few milliseconds. Repeat these steps for a maximum of 3 times.
> + * Return 'true', if there is still any set bit after this operation; 'false', otherwise.
> + *
> + * If a bit is set, that means there is a pending channel interrupt. The expectation is
> + * that the normal interrupt handling mechanism will find and process the channel
> interrupt
> + * "very soon", and in the process clear the bit.
> + */
> +static bool hv_synic_event_pending(void)
> +{
> + struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context);
> + union hv_synic_event_flags *event =
> + (union hv_synic_event_flags *)hv_cpu->synic_event_page +
> VMBUS_MESSAGE_SINT;
> + unsigned long *recv_int_page = event->flags; /* assumes VMBus version >=
> VERSION_WIN8 */
> + bool pending;
> + u32 relid;
> + int tries = 0;
> +
> +retry:
> + pending = false;
> + for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) {
> + /* Special case - VMBus channel protocol messages */
> + if (relid == 0)
> + continue;
> + pending = true;
> + break;
> + }
> + if (pending && tries++ < HV_MAX_TRIES) {
> + usleep_range(10000, 20000);
> + goto retry;
> + }
> + return pending;
> +}
>
> int hv_synic_cleanup(unsigned int cpu)
> {
> @@ -336,6 +372,19 @@ int hv_synic_cleanup(unsigned int cpu)
> if (channel_found && vmbus_connection.conn_state == CONNECTED)
> return -EBUSY;
>
> + if (vmbus_proto_version >= VERSION_WIN10_V4_1) {
> + /*
> + * channel_found == false means that any channels that were previously
> + * assigned to the CPU have been reassigned elsewhere with a call of
> + * vmbus_send_modifychannel(). Scan the event flags page looking for
> + * bits that are set and waiting with a timeout for vmbus_chan_sched()
> + * to process such bits. If bits are still set after this operation
> + * and VMBus is connected, fail the CPU offlining operation.
> + */
> + if (hv_synic_event_pending() && vmbus_connection.conn_state == CONNECTED)
> + return -EBUSY;
> + }
> +

Perhaps the test for conn_state == CONNECTED could be factored out as follows. If we're
not CONNECTED (i.e., in the panic or kexec path) we should be able to also skip the search
for channels that are bound to the CPU since we will ignore the result anyway.

if (vmbus_connection.conn_state != CONNECTED)
goto always_cleanup;

if (cpu == VMBUS_CONNECT_CPU)
return -EBUSY;

[Code to search for channels that are bound to the CPU we're about to clean up]

if (channel_found)
return -EBUSY;

/*
* channel_found == false means that any channels that were previously
* assigned to the CPU have been reassigned elsewhere with a call of
* vmbus_send_modifychannel(). Scan the event flags page looking for
* bits that are set and waiting with a timeout for vmbus_chan_sched()
* to process such bits. If bits are still set after this operation
* and VMBus is connected, fail the CPU offlining operation.
*/
if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending())
return -EBUSY;

always_cleanup:

> hv_stimer_legacy_cleanup(cpu);
>
> hv_synic_disable_regs(cpu);
> --
> 2.25.1

2021-04-15 11:11:55

by Andrea Parri

[permalink] [raw]
Subject: Re: [PATCH v2 1/3] Drivers: hv: vmbus: Introduce and negotiate VMBus protocol version 5.3

> > @@ -234,6 +234,7 @@ static inline u32 hv_get_avail_to_write_percent(
> > * 5 . 0 (Newer Windows 10)
> > * 5 . 1 (Windows 10 RS4)
> > * 5 . 2 (Windows Server 2019, RS5)
> > + * 5 . 3 (Windows Server 2021) // FIXME: use proper version number/name
>
> The official name is now public information as "Windows Server 2022".

Thank you, I've updated the name and removed the FIXME.

Andrea

2021-04-15 11:41:46

by Andrea Parri

[permalink] [raw]
Subject: Re: [PATCH v2 3/3] Drivers: hv: vmbus: Check for pending channel interrupts before taking a CPU offline

> > @@ -336,6 +372,19 @@ int hv_synic_cleanup(unsigned int cpu)
> > if (channel_found && vmbus_connection.conn_state == CONNECTED)
> > return -EBUSY;
> >
> > + if (vmbus_proto_version >= VERSION_WIN10_V4_1) {
> > + /*
> > + * channel_found == false means that any channels that were previously
> > + * assigned to the CPU have been reassigned elsewhere with a call of
> > + * vmbus_send_modifychannel(). Scan the event flags page looking for
> > + * bits that are set and waiting with a timeout for vmbus_chan_sched()
> > + * to process such bits. If bits are still set after this operation
> > + * and VMBus is connected, fail the CPU offlining operation.
> > + */
> > + if (hv_synic_event_pending() && vmbus_connection.conn_state == CONNECTED)
> > + return -EBUSY;
> > + }
> > +
>
> Perhaps the test for conn_state == CONNECTED could be factored out as follows. If we're
> not CONNECTED (i.e., in the panic or kexec path) we should be able to also skip the search
> for channels that are bound to the CPU since we will ignore the result anyway.
>
> if (vmbus_connection.conn_state != CONNECTED)
> goto always_cleanup;
>
> if (cpu == VMBUS_CONNECT_CPU)
> return -EBUSY;
>
> [Code to search for channels that are bound to the CPU we're about to clean up]
>
> if (channel_found)
> return -EBUSY;
>
> /*
> * channel_found == false means that any channels that were previously
> * assigned to the CPU have been reassigned elsewhere with a call of
> * vmbus_send_modifychannel(). Scan the event flags page looking for
> * bits that are set and waiting with a timeout for vmbus_chan_sched()
> * to process such bits. If bits are still set after this operation
> * and VMBus is connected, fail the CPU offlining operation.
> */
> if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending())
> return -EBUSY;
>
> always_cleanup:

Agreed, applied. Thank you for the suggestion,

Andrea