2014-11-26 13:52:32

by Vitaly Kuznetsov

[permalink] [raw]
Subject: [PATCH] Drivers: hv: vmbus: prevent cpu offlining on newer hypervisors

When an SMP Hyper-V guest is running on top of 2012R2 Server and secondary
cpus are sent offline (with echo 0 > /sys/devices/system/cpu/cpu$cpu/online)
the system freeze is observed. This happens due to the fact that on newer
hypervisors (Win8, WS2012R2, ...) vmbus channel handlers are distributed
across all cpus (see init_vp_index() function in drivers/hv/channel_mgmt.c)
and on cpu offlining nobody reassigns them to CPU0. Prevent cpu offlining
when vmbus is loaded until the issue is fixed host-side.

This patch also disables hibernation but it is OK as it is also broken (MCE
error is hit on resume). Suspend still works.

Tested with WS2008R2 and WS2012R2.

Signed-off-by: Vitaly Kuznetsov <[email protected]>
---
drivers/hv/vmbus_drv.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 4d6b269..9a82249 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -32,6 +32,7 @@
#include <linux/completion.h>
#include <linux/hyperv.h>
#include <linux/kernel_stat.h>
+#include <linux/cpu.h>
#include <asm/hyperv.h>
#include <asm/hypervisor.h>
#include <asm/mshyperv.h>
@@ -671,6 +672,13 @@ static void vmbus_isr(void)
tasklet_schedule(&msg_dpc);
}

+#ifdef CONFIG_HOTPLUG_CPU
+static int hyperv_cpu_disable(void)
+{
+ return -1;
+}
+#endif
+
/*
* vmbus_bus_init -Main vmbus driver initialization routine.
*
@@ -711,6 +719,12 @@ static int vmbus_bus_init(int irq)
if (ret)
goto err_alloc;

+#ifdef CONFIG_HOTPLUG_CPU
+ if ((vmbus_proto_version != VERSION_WS2008) &&
+ (vmbus_proto_version != VERSION_WIN7))
+ smp_ops.cpu_disable = hyperv_cpu_disable;
+#endif
+
vmbus_request_offers();

return 0;
@@ -964,6 +978,11 @@ static void __exit vmbus_exit(void)
bus_unregister(&hv_bus);
hv_cleanup();
acpi_bus_unregister_driver(&vmbus_acpi_driver);
+#ifdef CONFIG_HOTPLUG_CPU
+ if ((vmbus_proto_version != VERSION_WS2008) &&
+ (vmbus_proto_version != VERSION_WIN7))
+ smp_ops.cpu_disable = native_cpu_disable;
+#endif
}


--
1.9.3


2014-11-27 03:04:16

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH] Drivers: hv: vmbus: prevent cpu offlining on newer hypervisors

On Wed, Nov 26, 2014 at 02:52:22PM +0100, Vitaly Kuznetsov wrote:
> When an SMP Hyper-V guest is running on top of 2012R2 Server and secondary
> cpus are sent offline (with echo 0 > /sys/devices/system/cpu/cpu$cpu/online)
> the system freeze is observed. This happens due to the fact that on newer
> hypervisors (Win8, WS2012R2, ...) vmbus channel handlers are distributed
> across all cpus (see init_vp_index() function in drivers/hv/channel_mgmt.c)
> and on cpu offlining nobody reassigns them to CPU0. Prevent cpu offlining
> when vmbus is loaded until the issue is fixed host-side.
>
> This patch also disables hibernation but it is OK as it is also broken (MCE
> error is hit on resume). Suspend still works.
>
> Tested with WS2008R2 and WS2012R2.
>
> Signed-off-by: Vitaly Kuznetsov <[email protected]>
> ---
> drivers/hv/vmbus_drv.c | 19 +++++++++++++++++++
> 1 file changed, 19 insertions(+)
>
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index 4d6b269..9a82249 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -32,6 +32,7 @@
> #include <linux/completion.h>
> #include <linux/hyperv.h>
> #include <linux/kernel_stat.h>
> +#include <linux/cpu.h>
> #include <asm/hyperv.h>
> #include <asm/hypervisor.h>
> #include <asm/mshyperv.h>
> @@ -671,6 +672,13 @@ static void vmbus_isr(void)
> tasklet_schedule(&msg_dpc);
> }
>
> +#ifdef CONFIG_HOTPLUG_CPU
> +static int hyperv_cpu_disable(void)
> +{
> + return -1;
> +}
> +#endif
> +
> /*
> * vmbus_bus_init -Main vmbus driver initialization routine.
> *
> @@ -711,6 +719,12 @@ static int vmbus_bus_init(int irq)
> if (ret)
> goto err_alloc;
>
> +#ifdef CONFIG_HOTPLUG_CPU
> + if ((vmbus_proto_version != VERSION_WS2008) &&
> + (vmbus_proto_version != VERSION_WIN7))
> + smp_ops.cpu_disable = hyperv_cpu_disable;
> +#endif
> +
> vmbus_request_offers();
>
> return 0;
> @@ -964,6 +978,11 @@ static void __exit vmbus_exit(void)
> bus_unregister(&hv_bus);
> hv_cleanup();
> acpi_bus_unregister_driver(&vmbus_acpi_driver);
> +#ifdef CONFIG_HOTPLUG_CPU
> + if ((vmbus_proto_version != VERSION_WS2008) &&
> + (vmbus_proto_version != VERSION_WIN7))
> + smp_ops.cpu_disable = native_cpu_disable;
> +#endif
> }

#ifdef in a .c file is not a good idea to do if at all possible, please
only put this in one place, using a function call to "hide" the mess.

greg k-h

2014-11-27 03:40:03

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH] Drivers: hv: vmbus: prevent cpu offlining on newer hypervisors

> -----Original Message-----
> From: devel [mailto:[email protected]] On
> Behalf Of Greg Kroah-Hartman
> Sent: Thursday, November 27, 2014 11:03 AM
> To: Vitaly Kuznetsov
> Cc: [email protected]; Haiyang Zhang; linux-
> [email protected]
> Subject: Re: [PATCH] Drivers: hv: vmbus: prevent cpu offlining on newer
> hypervisors
>
> On Wed, Nov 26, 2014 at 02:52:22PM +0100, Vitaly Kuznetsov wrote:
> > When an SMP Hyper-V guest is running on top of 2012R2 Server and
> secondary
> > cpus are sent offline (with echo 0 >
> /sys/devices/system/cpu/cpu$cpu/online)
> > the system freeze is observed. This happens due to the fact that on newer
> > hypervisors (Win8, WS2012R2, ...) vmbus channel handlers are
> distributed
> > across all cpus (see init_vp_index() function in
> drivers/hv/channel_mgmt.c)
> > and on cpu offlining nobody reassigns them to CPU0. Prevent cpu
> offlining
> > when vmbus is loaded until the issue is fixed host-side.
> >
> > This patch also disables hibernation but it is OK as it is also broken (MCE
> > error is hit on resume). Suspend still works.
> >
> > Tested with WS2008R2 and WS2012R2.
> >
> > Signed-off-by: Vitaly Kuznetsov <[email protected]>
> > ---
> > drivers/hv/vmbus_drv.c | 19 +++++++++++++++++++
> > 1 file changed, 19 insertions(+)
> >
> > diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> > index 4d6b269..9a82249 100644
> > --- a/drivers/hv/vmbus_drv.c
> > +++ b/drivers/hv/vmbus_drv.c
> > @@ -32,6 +32,7 @@
> > #include <linux/completion.h>
> > #include <linux/hyperv.h>
> > #include <linux/kernel_stat.h>
> > +#include <linux/cpu.h>
> > #include <asm/hyperv.h>
> > #include <asm/hypervisor.h>
> > #include <asm/mshyperv.h>
> > @@ -671,6 +672,13 @@ static void vmbus_isr(void)
> > tasklet_schedule(&msg_dpc);
> > }
> >
> > +#ifdef CONFIG_HOTPLUG_CPU
> > +static int hyperv_cpu_disable(void)
> > +{
> > + return -1;
> > +}
> > +#endif
> > +
> > /*
> > * vmbus_bus_init -Main vmbus driver initialization routine.
> > *
> > @@ -711,6 +719,12 @@ static int vmbus_bus_init(int irq)
> > if (ret)
> > goto err_alloc;
> >
> > +#ifdef CONFIG_HOTPLUG_CPU
> > + if ((vmbus_proto_version != VERSION_WS2008) &&
> > + (vmbus_proto_version != VERSION_WIN7))
> > + smp_ops.cpu_disable = hyperv_cpu_disable;
> > +#endif
> > +
> > vmbus_request_offers();
> >
> > return 0;
> > @@ -964,6 +978,11 @@ static void __exit vmbus_exit(void)
> > bus_unregister(&hv_bus);
> > hv_cleanup();
> > acpi_bus_unregister_driver(&vmbus_acpi_driver);
> > +#ifdef CONFIG_HOTPLUG_CPU
> > + if ((vmbus_proto_version != VERSION_WS2008) &&
> > + (vmbus_proto_version != VERSION_WIN7))
> > + smp_ops.cpu_disable = native_cpu_disable;
> > +#endif
> > }
>
> #ifdef in a .c file is not a good idea to do if at all possible, please
> only put this in one place, using a function call to "hide" the mess.
>
> greg k-h

Hi Vitaly,
The idea of the patch is good to me.

I agree with Greg.
BTW, maybe hv_cpu_hotplug_quirk() is a better name?

Thanks,
-- Dexuan

2014-11-27 09:52:18

by Vitaly Kuznetsov

[permalink] [raw]
Subject: Re: [PATCH] Drivers: hv: vmbus: prevent cpu offlining on newer hypervisors

Dexuan Cui <[email protected]> writes:

>> -----Original Message-----
>> From: devel [mailto:[email protected]] On
>> Behalf Of Greg Kroah-Hartman
>> Sent: Thursday, November 27, 2014 11:03 AM
>> To: Vitaly Kuznetsov
>> Cc: [email protected]; Haiyang Zhang; linux-
>> [email protected]
>> Subject: Re: [PATCH] Drivers: hv: vmbus: prevent cpu offlining on newer
>> hypervisors
>>
>> On Wed, Nov 26, 2014 at 02:52:22PM +0100, Vitaly Kuznetsov wrote:
>> > When an SMP Hyper-V guest is running on top of 2012R2 Server and
>> secondary
>> > cpus are sent offline (with echo 0 >
>> /sys/devices/system/cpu/cpu$cpu/online)
>> > the system freeze is observed. This happens due to the fact that on newer
>> > hypervisors (Win8, WS2012R2, ...) vmbus channel handlers are
>> distributed
>> > across all cpus (see init_vp_index() function in
>> drivers/hv/channel_mgmt.c)
>> > and on cpu offlining nobody reassigns them to CPU0. Prevent cpu
>> offlining
>> > when vmbus is loaded until the issue is fixed host-side.
>> >
>> > This patch also disables hibernation but it is OK as it is also broken (MCE
>> > error is hit on resume). Suspend still works.
>> >
>> > Tested with WS2008R2 and WS2012R2.
>> >
>> > Signed-off-by: Vitaly Kuznetsov <[email protected]>
>> > ---
>> > drivers/hv/vmbus_drv.c | 19 +++++++++++++++++++
>> > 1 file changed, 19 insertions(+)
>> >
>> > diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
>> > index 4d6b269..9a82249 100644
>> > --- a/drivers/hv/vmbus_drv.c
>> > +++ b/drivers/hv/vmbus_drv.c
>> > @@ -32,6 +32,7 @@
>> > #include <linux/completion.h>
>> > #include <linux/hyperv.h>
>> > #include <linux/kernel_stat.h>
>> > +#include <linux/cpu.h>
>> > #include <asm/hyperv.h>
>> > #include <asm/hypervisor.h>
>> > #include <asm/mshyperv.h>
>> > @@ -671,6 +672,13 @@ static void vmbus_isr(void)
>> > tasklet_schedule(&msg_dpc);
>> > }
>> >
>> > +#ifdef CONFIG_HOTPLUG_CPU
>> > +static int hyperv_cpu_disable(void)
>> > +{
>> > + return -1;
>> > +}
>> > +#endif
>> > +
>> > /*
>> > * vmbus_bus_init -Main vmbus driver initialization routine.
>> > *
>> > @@ -711,6 +719,12 @@ static int vmbus_bus_init(int irq)
>> > if (ret)
>> > goto err_alloc;
>> >
>> > +#ifdef CONFIG_HOTPLUG_CPU
>> > + if ((vmbus_proto_version != VERSION_WS2008) &&
>> > + (vmbus_proto_version != VERSION_WIN7))
>> > + smp_ops.cpu_disable = hyperv_cpu_disable;
>> > +#endif
>> > +
>> > vmbus_request_offers();
>> >
>> > return 0;
>> > @@ -964,6 +978,11 @@ static void __exit vmbus_exit(void)
>> > bus_unregister(&hv_bus);
>> > hv_cleanup();
>> > acpi_bus_unregister_driver(&vmbus_acpi_driver);
>> > +#ifdef CONFIG_HOTPLUG_CPU
>> > + if ((vmbus_proto_version != VERSION_WS2008) &&
>> > + (vmbus_proto_version != VERSION_WIN7))
>> > + smp_ops.cpu_disable = native_cpu_disable;
>> > +#endif
>> > }
>>
>> #ifdef in a .c file is not a good idea to do if at all possible, please
>> only put this in one place, using a function call to "hide" the mess.
>>
>> greg k-h
>
> Hi Vitaly,
> The idea of the patch is good to me.
>
> I agree with Greg.
> BTW, maybe hv_cpu_hotplug_quirk() is a better name?

My idea was that eventually this function will start doing something
real (e.g. switching channels to cpu0 if it doesn't happen fully
host-side) so I called it with a general name 'hyperv_cpu_disable'.

I'll try addressing our and Greg's comments in v2, thanks!

>
> Thanks,
> -- Dexuan

--
Vitaly

2014-12-01 10:52:51

by Vitaly Kuznetsov

[permalink] [raw]
Subject: [PATCH v2] Drivers: hv: vmbus: prevent cpu offlining on newer hypervisors

When an SMP Hyper-V guest is running on top of 2012R2 Server and secondary
cpus are sent offline (with echo 0 > /sys/devices/system/cpu/cpu$cpu/online)
the system freeze is observed. This happens due to the fact that on newer
hypervisors (Win8, WS2012R2, ...) vmbus channel handlers are distributed
across all cpus (see init_vp_index() function in drivers/hv/channel_mgmt.c)
and on cpu offlining nobody reassigns them to CPU0. Prevent cpu offlining
when vmbus is loaded until the issue is fixed host-side.

This patch also disables hibernation but it is OK as it is also broken (MCE
error is hit on resume). Suspend still works.

Tested with WS2008R2 and WS2012R2.

Signed-off-by: Vitaly Kuznetsov <[email protected]>

---
Changes since v1:
- introduce hv_cpu_hotplug_quirk() function to not spread #ifdefs [Greg KH]
- add pr_notice() message "hv_vmbus: CPU offlining is not supported by hypervisor"
---
drivers/hv/vmbus_drv.c | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 4d6b269..2e6b38e 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -32,6 +32,7 @@
#include <linux/completion.h>
#include <linux/hyperv.h>
#include <linux/kernel_stat.h>
+#include <linux/cpu.h>
#include <asm/hyperv.h>
#include <asm/hypervisor.h>
#include <asm/mshyperv.h>
@@ -671,6 +672,36 @@ static void vmbus_isr(void)
tasklet_schedule(&msg_dpc);
}

+#ifdef CONFIG_HOTPLUG_CPU
+static int hyperv_cpu_disable(void)
+{
+ return -1;
+}
+
+static void hv_cpu_hotplug_quirk(bool vmbus_loaded)
+{
+ /*
+ * Offlining a CPU when running on newer hypervisors (WS2012R2, Win8,
+ * ...) is not supported at this moment as channel interrupts are
+ * distributed across all of them.
+ */
+
+ if ((vmbus_proto_version == VERSION_WS2008) ||
+ (vmbus_proto_version == VERSION_WIN7))
+ return;
+
+ if (vmbus_loaded) {
+ smp_ops.cpu_disable = hyperv_cpu_disable;
+ pr_notice("CPU offlining is not supported by hypervisor");
+ } else
+ smp_ops.cpu_disable = native_cpu_disable;
+}
+#else
+static void hv_cpu_hotplug_quirk(bool vmbus_loaded)
+{
+}
+#endif
+
/*
* vmbus_bus_init -Main vmbus driver initialization routine.
*
@@ -711,6 +742,7 @@ static int vmbus_bus_init(int irq)
if (ret)
goto err_alloc;

+ hv_cpu_hotplug_quirk(true);
vmbus_request_offers();

return 0;
@@ -964,6 +996,7 @@ static void __exit vmbus_exit(void)
bus_unregister(&hv_bus);
hv_cleanup();
acpi_bus_unregister_driver(&vmbus_acpi_driver);
+ hv_cpu_hotplug_quirk(false);
}


--
1.9.3

2014-12-01 11:13:46

by Dexuan Cui

[permalink] [raw]
Subject: RE: [PATCH v2] Drivers: hv: vmbus: prevent cpu offlining on newer hypervisors

> -----Original Message-----
> From: Vitaly Kuznetsov [mailto:[email protected]]
> Sent: Monday, December 1, 2014 18:53 PM
> To: KY Srinivasan; Haiyang Zhang
> Cc: [email protected]; Greg Kroah-Hartman; linux-
> [email protected]; Dexuan Cui
> Subject: [PATCH v2] Drivers: hv: vmbus: prevent cpu offlining on newer
> hypervisors
>
> When an SMP Hyper-V guest is running on top of 2012R2 Server and
> secondary
> cpus are sent offline (with echo 0 > /sys/devices/system/cpu/cpu$cpu/online)
> the system freeze is observed. This happens due to the fact that on newer
> hypervisors (Win8, WS2012R2, ...) vmbus channel handlers are distributed
> across all cpus (see init_vp_index() function in drivers/hv/channel_mgmt.c)
> and on cpu offlining nobody reassigns them to CPU0. Prevent cpu offlining
> when vmbus is loaded until the issue is fixed host-side.
>
> This patch also disables hibernation but it is OK as it is also broken (MCE
> error is hit on resume). Suspend still works.
>
> Tested with WS2008R2 and WS2012R2.
>
> Signed-off-by: Vitaly Kuznetsov <[email protected]>
>
> ---
> Changes since v1:
> - introduce hv_cpu_hotplug_quirk() function to not spread #ifdefs [Greg KH]
> - add pr_notice() message "hv_vmbus: CPU offlining is not supported by
> hypervisor"
> ---
> drivers/hv/vmbus_drv.c | 33 +++++++++++++++++++++++++++++++++
> 1 file changed, 33 insertions(+)
>
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index 4d6b269..2e6b38e 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -32,6 +32,7 @@
> #include <linux/completion.h>
> #include <linux/hyperv.h>
> #include <linux/kernel_stat.h>
> +#include <linux/cpu.h>
> #include <asm/hyperv.h>
> #include <asm/hypervisor.h>
> #include <asm/mshyperv.h>
> @@ -671,6 +672,36 @@ static void vmbus_isr(void)
> tasklet_schedule(&msg_dpc);
> }
>
> +#ifdef CONFIG_HOTPLUG_CPU
> +static int hyperv_cpu_disable(void)
> +{
> + return -1;
> +}
> +
> +static void hv_cpu_hotplug_quirk(bool vmbus_loaded)
> +{
> + /*
> + * Offlining a CPU when running on newer hypervisors (WS2012R2,
> Win8,
> + * ...) is not supported at this moment as channel interrupts are
> + * distributed across all of them.
> + */
> +
> + if ((vmbus_proto_version == VERSION_WS2008) ||
> + (vmbus_proto_version == VERSION_WIN7))
> + return;
> +
> + if (vmbus_loaded) {
> + smp_ops.cpu_disable = hyperv_cpu_disable;
> + pr_notice("CPU offlining is not supported by hypervisor");
> + } else
> + smp_ops.cpu_disable = native_cpu_disable;
> +}
> +#else
> +static void hv_cpu_hotplug_quirk(bool vmbus_loaded)
> +{
> +}
> +#endif
> +
> /*
> * vmbus_bus_init -Main vmbus driver initialization routine.
> *
> @@ -711,6 +742,7 @@ static int vmbus_bus_init(int irq)
> if (ret)
> goto err_alloc;
>
> + hv_cpu_hotplug_quirk(true);
> vmbus_request_offers();
>
> return 0;
> @@ -964,6 +996,7 @@ static void __exit vmbus_exit(void)
> bus_unregister(&hv_bus);
> hv_cleanup();
> acpi_bus_unregister_driver(&vmbus_acpi_driver);
> + hv_cpu_hotplug_quirk(false);
> }

Acked-by: Dexuan Cui <[email protected]>