cpu hotplug may be disabled via cpu_hotplug_enable/cpu_hotplug_disable.
When disabled, cpu_down and cpu_up will fail with -EBUSY. Users of the
cpu_up/cpu_down should handle this situation as this is mostly temporal
disablement and exception should be made for EBUSY, assuming that EBUSY
always stands for this situation and is worth repeating execution. One
of the users of cpu_hotplug_enable/disable is pci_device_probe yielding
errors on bringing cpu cores up/down if pci devices are getting probed.
Problem was observed on x86 board by having partitioning of the system
to RT/NRT cpu sets failing (of which part is to bring cpus down/up via
sysfs) if pci devices would be getting probed at the same time. This is
confusing for userspace as dependency to pci devices is not clear.
Fix this behavior by waiting for cpu hotplug to be ready. Return -EBUSY
only after hotplugging was not enabled for about 10 seconds.
Fixes: 1ddd45f8d76f ("PCI: Use cpu_hotplug_disable() instead of get_online_cpus()")
Signed-off-by: Matija Glavinic Pecotic <[email protected]>
---
kernel/cpu.c | 50 ++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 40 insertions(+), 10 deletions(-)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 4dc279e..2e06ca9 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -31,6 +31,7 @@
#include <linux/relay.h>
#include <linux/slab.h>
#include <linux/percpu-rwsem.h>
+#include <linux/wait.h>
#include <trace/events/power.h>
#define CREATE_TRACE_POINTS
@@ -278,11 +279,22 @@ void cpu_maps_update_done(void)
}
/*
- * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
+ * If set, cpu_up and cpu_down will retry for cpu_hotplug_retries and
+ * eventually return -EBUSY if unsuccessful.
* Should always be manipulated under cpu_add_remove_lock
*/
static int cpu_hotplug_disabled;
+/*
+ * waitqueue for waiting on cpu_hotplug_disabled
+ */
+static DECLARE_WAIT_QUEUE_HEAD(wait_cpu_hp_enabled);
+
+/*
+ * Retries for cpu_hotplug to be enabled by cpu_up/cpu_down.
+ */
+static int cpu_hotplug_retries = 10;
+
#ifdef CONFIG_HOTPLUG_CPU
DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
@@ -341,7 +353,7 @@ static void lockdep_release_cpus_lock(void)
/*
* Wait for currently running CPU hotplug operations to complete (if any) and
- * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
+ * briefly disable CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
* the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
* hotplug path before performing hotplug operations. So acquiring that lock
* guarantees mutual exclusion from any currently running hotplug operations.
@@ -366,6 +378,7 @@ void cpu_hotplug_enable(void)
cpu_maps_update_begin();
__cpu_hotplug_enable();
cpu_maps_update_done();
+ wake_up(&wait_cpu_hp_enabled);
}
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
@@ -1044,11 +1057,21 @@ static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
{
- int err;
+ int err = -EBUSY, retries = cpu_hotplug_retries;
- cpu_maps_update_begin();
- err = cpu_down_maps_locked(cpu, target);
- cpu_maps_update_done();
+ while (retries--) {
+ wait_event_timeout(wait_cpu_hp_enabled,
+ !cpu_hotplug_disabled,
+ HZ);
+ cpu_maps_update_begin();
+ if (cpu_hotplug_disabled) {
+ cpu_maps_update_done();
+ continue;
+ }
+ err = _cpu_down(cpu, 0, target);
+ cpu_maps_update_done();
+ break;
+ }
return err;
}
@@ -1166,7 +1189,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
{
- int err = 0;
+ int err = 0, retries = cpu_hotplug_retries;
if (!cpu_possible(cpu)) {
pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
@@ -1181,9 +1204,16 @@ static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
if (err)
return err;
- cpu_maps_update_begin();
-
- if (cpu_hotplug_disabled) {
+ while (--retries) {
+ wait_event_timeout(wait_cpu_hp_enabled,
+ !cpu_hotplug_disabled,
+ HZ);
+ cpu_maps_update_begin();
+ if (!cpu_hotplug_disabled)
+ break;
+ cpu_maps_update_done();
+ }
+ if (!retries) {
err = -EBUSY;
goto out;
}
--
2.10.2
Hi!
On 24/01/2020 09:18, Matija Glavinic Pecotic wrote:
> cpu hotplug may be disabled via cpu_hotplug_enable/cpu_hotplug_disable.
> When disabled, cpu_down and cpu_up will fail with -EBUSY. Users of the
> cpu_up/cpu_down should handle this situation as this is mostly temporal
> disablement and exception should be made for EBUSY, assuming that EBUSY
> always stands for this situation and is worth repeating execution. One
> of the users of cpu_hotplug_enable/disable is pci_device_probe yielding
> errors on bringing cpu cores up/down if pci devices are getting probed.
>
> Problem was observed on x86 board by having partitioning of the system
> to RT/NRT cpu sets failing (of which part is to bring cpus down/up via
> sysfs) if pci devices would be getting probed at the same time. This is
> confusing for userspace as dependency to pci devices is not clear.
>
> Fix this behavior by waiting for cpu hotplug to be ready. Return -EBUSY
> only after hotplugging was not enabled for about 10 seconds.
>
> Fixes: 1ddd45f8d76f ("PCI: Use cpu_hotplug_disable() instead of get_online_cpus()")
> Signed-off-by: Matija Glavinic Pecotic <[email protected]>
Reviewed-by: Alexander Sverdlin <[email protected]>
> ---
> kernel/cpu.c | 50 ++++++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 40 insertions(+), 10 deletions(-)
>
> diff --git a/kernel/cpu.c b/kernel/cpu.c
> index 4dc279e..2e06ca9 100644
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -31,6 +31,7 @@
> #include <linux/relay.h>
> #include <linux/slab.h>
> #include <linux/percpu-rwsem.h>
> +#include <linux/wait.h>
>
> #include <trace/events/power.h>
> #define CREATE_TRACE_POINTS
> @@ -278,11 +279,22 @@ void cpu_maps_update_done(void)
> }
>
> /*
> - * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
> + * If set, cpu_up and cpu_down will retry for cpu_hotplug_retries and
> + * eventually return -EBUSY if unsuccessful.
> * Should always be manipulated under cpu_add_remove_lock
> */
> static int cpu_hotplug_disabled;
>
> +/*
> + * waitqueue for waiting on cpu_hotplug_disabled
> + */
> +static DECLARE_WAIT_QUEUE_HEAD(wait_cpu_hp_enabled);
> +
> +/*
> + * Retries for cpu_hotplug to be enabled by cpu_up/cpu_down.
> + */
> +static int cpu_hotplug_retries = 10;
> +
> #ifdef CONFIG_HOTPLUG_CPU
>
> DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
> @@ -341,7 +353,7 @@ static void lockdep_release_cpus_lock(void)
>
> /*
> * Wait for currently running CPU hotplug operations to complete (if any) and
> - * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
> + * briefly disable CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
> * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
> * hotplug path before performing hotplug operations. So acquiring that lock
> * guarantees mutual exclusion from any currently running hotplug operations.
> @@ -366,6 +378,7 @@ void cpu_hotplug_enable(void)
> cpu_maps_update_begin();
> __cpu_hotplug_enable();
> cpu_maps_update_done();
> + wake_up(&wait_cpu_hp_enabled);
> }
> EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
>
> @@ -1044,11 +1057,21 @@ static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
>
> static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
> {
> - int err;
> + int err = -EBUSY, retries = cpu_hotplug_retries;
>
> - cpu_maps_update_begin();
> - err = cpu_down_maps_locked(cpu, target);
> - cpu_maps_update_done();
> + while (retries--) {
> + wait_event_timeout(wait_cpu_hp_enabled,
> + !cpu_hotplug_disabled,
> + HZ);
> + cpu_maps_update_begin();
> + if (cpu_hotplug_disabled) {
> + cpu_maps_update_done();
> + continue;
> + }
> + err = _cpu_down(cpu, 0, target);
> + cpu_maps_update_done();
> + break;
> + }
> return err;
> }
>
> @@ -1166,7 +1189,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
>
> static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
> {
> - int err = 0;
> + int err = 0, retries = cpu_hotplug_retries;
>
> if (!cpu_possible(cpu)) {
> pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
> @@ -1181,9 +1204,16 @@ static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
> if (err)
> return err;
>
> - cpu_maps_update_begin();
> -
> - if (cpu_hotplug_disabled) {
> + while (--retries) {
> + wait_event_timeout(wait_cpu_hp_enabled,
> + !cpu_hotplug_disabled,
> + HZ);
> + cpu_maps_update_begin();
> + if (!cpu_hotplug_disabled)
> + break;
> + cpu_maps_update_done();
> + }
> + if (!retries) {
> err = -EBUSY;
> goto out;
> }
>
--
Best regards,
Alexander Sverdlin.