2022-05-28 09:47:31

by Stephen Hemminger

[permalink] [raw]
Subject: RE: [PATCH v2] Drivers: hv: vmbus: Don't assign VMbus channel interrupts to isolated CPUs

Would this have impact for DPDK applications using isolated cpus?

-----Original Message-----
From: Saurabh Sengar <[email protected]>
Sent: Friday, May 27, 2022 12:22 AM
To: KY Srinivasan <[email protected]>; Haiyang Zhang <[email protected]>; Stephen Hemminger <[email protected]>; [email protected]; Dexuan Cui <[email protected]>; [email protected]; [email protected]; Saurabh Singh Sengar <[email protected]>; Michael Kelley (LINUX) <[email protected]>
Subject: [PATCH v2] Drivers: hv: vmbus: Don't assign VMbus channel interrupts to isolated CPUs

When initially assigning a VMbus channel interrupt to a CPU, don’t choose
a managed IRQ isolated CPU (as specified on the kernel boot line with
parameter 'isolcpus=managed_irq,<#cpu>'). Also, when using sysfs to change
the CPU that a VMbus channel will interrupt, don't allow changing to a
managed IRQ isolated CPU.

Signed-off-by: Saurabh Sengar <[email protected]>
---
v2: * better commit message
* Added back empty line, removed by mistake
* Removed error print for sysfs error

drivers/hv/channel_mgmt.c | 18 ++++++++++++------
drivers/hv/vmbus_drv.c | 4 ++++
2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 97d8f56..e1fe029 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -21,6 +21,7 @@
#include <linux/cpu.h>
#include <linux/hyperv.h>
#include <asm/mshyperv.h>
+#include <linux/sched/isolation.h>

#include "hyperv_vmbus.h"

@@ -728,16 +729,20 @@ static void init_vp_index(struct vmbus_channel *channel)
u32 i, ncpu = num_online_cpus();
cpumask_var_t available_mask;
struct cpumask *allocated_mask;
+ const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
u32 target_cpu;
int numa_node;

if (!perf_chn ||
- !alloc_cpumask_var(&available_mask, GFP_KERNEL)) {
+ !alloc_cpumask_var(&available_mask, GFP_KERNEL) ||
+ cpumask_empty(hk_mask)) {
/*
* If the channel is not a performance critical
* channel, bind it to VMBUS_CONNECT_CPU.
* In case alloc_cpumask_var() fails, bind it to
* VMBUS_CONNECT_CPU.
+ * If all the cpus are isolated, bind it to
+ * VMBUS_CONNECT_CPU.
*/
channel->target_cpu = VMBUS_CONNECT_CPU;
if (perf_chn)
@@ -758,17 +763,19 @@ static void init_vp_index(struct vmbus_channel *channel)
}
allocated_mask = &hv_context.hv_numa_map[numa_node];

- if (cpumask_equal(allocated_mask, cpumask_of_node(numa_node))) {
+retry:
+ cpumask_xor(available_mask, allocated_mask, cpumask_of_node(numa_node));
+ cpumask_and(available_mask, available_mask, hk_mask);
+
+ if (cpumask_empty(available_mask)) {
/*
* We have cycled through all the CPUs in the node;
* reset the allocated map.
*/
cpumask_clear(allocated_mask);
+ goto retry;
}

- cpumask_xor(available_mask, allocated_mask,
- cpumask_of_node(numa_node));
-
target_cpu = cpumask_first(available_mask);
cpumask_set_cpu(target_cpu, allocated_mask);

@@ -778,7 +785,6 @@ static void init_vp_index(struct vmbus_channel *channel)
}

channel->target_cpu = target_cpu;
-
free_cpumask_var(available_mask);
}

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 714d549..547ae33 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -21,6 +21,7 @@
#include <linux/kernel_stat.h>
#include <linux/clockchips.h>
#include <linux/cpu.h>
+#include <linux/sched/isolation.h>
#include <linux/sched/task_stack.h>

#include <linux/delay.h>
@@ -1770,6 +1771,9 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
if (target_cpu >= nr_cpumask_bits)
return -EINVAL;

+ if (!cpumask_test_cpu(target_cpu, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ)))
+ return -EINVAL;
+
/* No CPUs should come up or down during this. */
cpus_read_lock();

--
1.8.3.1


2022-05-28 19:30:57

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: RE: [PATCH v2] Drivers: hv: vmbus: Don't assign VMbus channel interrupts to isolated CPUs

From: Stephen Hemminger <[email protected]> Sent: Friday, May 27, 2022 8:41 AM
>
> Would this have impact for DPDK applications using isolated cpus?

I don't have any existing knowledge of DPDK use of isolated CPUs,
so someone with more expertise feel free to correct me.

From what I see in the DPDK documentation (Section 8.3 here:
https://doc.dpdk.org/guides/linux_gsg/enable_func.html), there's
no impact. The example in that documentation does CPU isolation
only for the purpose of scheduling, not for interrupts. The
example kernel command line is:

isolcpus=2,4,6

which defaults to "domain" as the "flag" and is equivalent to:

isolcpus=domain,2,4,6.

VMbus channel interrupts are affected only if "managed_irq" is
specified as the flag per the commit message below.

And FWIW, cpusets provide a better way to doing scheduler
isolation than the isolcpus kernel boot option. Perhaps the
DPDK documentation should be updated. :-)

Michael

>
> -----Original Message-----
> From: Saurabh Sengar <[email protected]>
> Sent: Friday, May 27, 2022 12:22 AM
> To: KY Srinivasan <[email protected]>; Haiyang Zhang <[email protected]>;
> Stephen Hemminger <[email protected]>; [email protected]; Dexuan Cui
> <[email protected]>; [email protected]; [email protected];
> Saurabh Singh Sengar <[email protected]>; Michael Kelley (LINUX)
> <[email protected]>
> Subject: [PATCH v2] Drivers: hv: vmbus: Don't assign VMbus channel interrupts to
> isolated CPUs
>
> When initially assigning a VMbus channel interrupt to a CPU, don’t choose
> a managed IRQ isolated CPU (as specified on the kernel boot line with
> parameter 'isolcpus=managed_irq,<#cpu>'). Also, when using sysfs to change
> the CPU that a VMbus channel will interrupt, don't allow changing to a
> managed IRQ isolated CPU.
>
> Signed-off-by: Saurabh Sengar <[email protected]>
> ---
> v2: * better commit message
> * Added back empty line, removed by mistake
> * Removed error print for sysfs error
>
> drivers/hv/channel_mgmt.c | 18 ++++++++++++------
> drivers/hv/vmbus_drv.c | 4 ++++
> 2 files changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
> index 97d8f56..e1fe029 100644
> --- a/drivers/hv/channel_mgmt.c
> +++ b/drivers/hv/channel_mgmt.c
> @@ -21,6 +21,7 @@
> #include <linux/cpu.h>
> #include <linux/hyperv.h>
> #include <asm/mshyperv.h>
> +#include <linux/sched/isolation.h>
>
> #include "hyperv_vmbus.h"
>
> @@ -728,16 +729,20 @@ static void init_vp_index(struct vmbus_channel *channel)
> u32 i, ncpu = num_online_cpus();
> cpumask_var_t available_mask;
> struct cpumask *allocated_mask;
> + const struct cpumask *hk_mask =
> housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
> u32 target_cpu;
> int numa_node;
>
> if (!perf_chn ||
> - !alloc_cpumask_var(&available_mask, GFP_KERNEL)) {
> + !alloc_cpumask_var(&available_mask, GFP_KERNEL) ||
> + cpumask_empty(hk_mask)) {
> /*
> * If the channel is not a performance critical
> * channel, bind it to VMBUS_CONNECT_CPU.
> * In case alloc_cpumask_var() fails, bind it to
> * VMBUS_CONNECT_CPU.
> + * If all the cpus are isolated, bind it to
> + * VMBUS_CONNECT_CPU.
> */
> channel->target_cpu = VMBUS_CONNECT_CPU;
> if (perf_chn)
> @@ -758,17 +763,19 @@ static void init_vp_index(struct vmbus_channel *channel)
> }
> allocated_mask = &hv_context.hv_numa_map[numa_node];
>
> - if (cpumask_equal(allocated_mask, cpumask_of_node(numa_node))) {
> +retry:
> + cpumask_xor(available_mask, allocated_mask,
> cpumask_of_node(numa_node));
> + cpumask_and(available_mask, available_mask, hk_mask);
> +
> + if (cpumask_empty(available_mask)) {
> /*
> * We have cycled through all the CPUs in the node;
> * reset the allocated map.
> */
> cpumask_clear(allocated_mask);
> + goto retry;
> }
>
> - cpumask_xor(available_mask, allocated_mask,
> - cpumask_of_node(numa_node));
> -
> target_cpu = cpumask_first(available_mask);
> cpumask_set_cpu(target_cpu, allocated_mask);
>
> @@ -778,7 +785,6 @@ static void init_vp_index(struct vmbus_channel *channel)
> }
>
> channel->target_cpu = target_cpu;
> -
> free_cpumask_var(available_mask);
> }
>
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index 714d549..547ae33 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -21,6 +21,7 @@
> #include <linux/kernel_stat.h>
> #include <linux/clockchips.h>
> #include <linux/cpu.h>
> +#include <linux/sched/isolation.h>
> #include <linux/sched/task_stack.h>
>
> #include <linux/delay.h>
> @@ -1770,6 +1771,9 @@ static ssize_t target_cpu_store(struct vmbus_channel
> *channel,
> if (target_cpu >= nr_cpumask_bits)
> return -EINVAL;
>
> + if (!cpumask_test_cpu(target_cpu,
> housekeeping_cpumask(HK_TYPE_MANAGED_IRQ)))
> + return -EINVAL;
> +
> /* No CPUs should come up or down during this. */
> cpus_read_lock();
>
> --
> 1.8.3.1