This series improves the startup of SMP processors for MIPS. Firstly,
replace the use of a bitmask of CPUs to detect secondard CPUs starting
with a completion event. This change means that secondary CPUs can fail
to start, and this will be detected and handled rather than hanging the
kernel.
The second patch removes the now redundant CPU bitmask.
The third patch improves error handling in the CPS SMP implementation.
In an unlikely corner case where no online CPU is available in a core
to start a secondary VPE, previously the kernel would BUG(), this patch
causes a warning to be printed and the situation handled more
gracefully.
This series is based on v4.9-rc1 and has been tested on Boston, Malta,
SEAD3, Octeon and Pistachio Ci40 platforms.
Matt Redfearn (3):
MIPS: smp: Use a completion event to signal CPU up
MIPS: smp: Remove cpu_callin_map
MIPS: smp-cps: Don't BUG if a CPU fails to start
arch/mips/cavium-octeon/smp.c | 1 -
arch/mips/include/asm/smp.h | 2 --
arch/mips/kernel/process.c | 4 +---
arch/mips/kernel/smp-bmips.c | 1 -
arch/mips/kernel/smp-cps.c | 7 +++++--
arch/mips/kernel/smp.c | 17 +++++++++--------
arch/mips/loongson64/loongson-3/smp.c | 1 -
7 files changed, 15 insertions(+), 18 deletions(-)
--
2.7.4
If there is no online CPU within a core which could receive the IPI to
start another VP in that core, a BUG() is triggered. Instead print a
warning and gracefully handle the failure such that the system remains
usable, albeit without the requested secondary CPU.
Signed-off-by: Matt Redfearn <[email protected]>
---
arch/mips/kernel/smp-cps.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 44339b470ef4..a2544c2394e4 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -326,7 +326,11 @@ static void cps_boot_secondary(int cpu, struct task_struct *idle)
if (cpu_online(remote))
break;
}
- BUG_ON(remote >= NR_CPUS);
+ if (remote >= NR_CPUS) {
+ pr_crit("No online CPU in core %u to start CPU%d\n",
+ core, cpu);
+ goto out;
+ }
err = smp_call_function_single(remote, remote_vpe_boot,
NULL, 1);
--
2.7.4
The previous commit made cpu_callin_map redundant, since it is no longer
used to signal secondary CPUs starting, or going offline. Remove it now.
Signed-off-by: Matt Redfearn <[email protected]>
---
arch/mips/cavium-octeon/smp.c | 1 -
arch/mips/include/asm/smp.h | 2 --
arch/mips/kernel/smp-bmips.c | 1 -
arch/mips/kernel/smp-cps.c | 1 -
arch/mips/kernel/smp.c | 2 --
arch/mips/loongson64/loongson-3/smp.c | 1 -
6 files changed, 8 deletions(-)
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 256fe6f65cf2..edaf59647da8 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -272,7 +272,6 @@ static int octeon_cpu_disable(void)
set_cpu_online(cpu, false);
calculate_cpu_foreign_map();
- cpumask_clear_cpu(cpu, &cpu_callin_map);
octeon_fixup_irqs();
__flush_cache_all();
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index 060f23ff1817..f8c5faa93584 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -46,8 +46,6 @@ extern int __cpu_logical_map[NR_CPUS];
#define SMP_DUMP 0x8
#define SMP_ASK_C0COUNT 0x10
-extern cpumask_t cpu_callin_map;
-
/* Mask of CPUs which are currently definitely operating coherently */
extern cpumask_t cpu_coherent_mask;
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 6d0f1321e084..f6700dc2fb09 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -364,7 +364,6 @@ static int bmips_cpu_disable(void)
set_cpu_online(cpu, false);
calculate_cpu_foreign_map();
- cpumask_clear_cpu(cpu, &cpu_callin_map);
clear_c0_status(IE_IRQ5);
local_flush_tlb_all();
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index 6183ad84cc73..44339b470ef4 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -399,7 +399,6 @@ static int cps_cpu_disable(void)
smp_mb__after_atomic();
set_cpu_online(cpu, false);
calculate_cpu_foreign_map();
- cpumask_clear_cpu(cpu, &cpu_callin_map);
return 0;
}
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 03daf9008124..0a831f63b0ec 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -48,8 +48,6 @@
#include <asm/setup.h>
#include <asm/maar.h>
-cpumask_t cpu_callin_map; /* Bitmask of started secondaries */
-
int __cpu_number_map[NR_CPUS]; /* Map physical to logical */
EXPORT_SYMBOL(__cpu_number_map);
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index 99aab9f85904..cfcf240cedbe 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -418,7 +418,6 @@ static int loongson3_cpu_disable(void)
set_cpu_online(cpu, false);
calculate_cpu_foreign_map();
- cpumask_clear_cpu(cpu, &cpu_callin_map);
local_irq_save(flags);
fixup_irqs();
local_irq_restore(flags);
--
2.7.4
If a secondary CPU failed to start, for any reason, the CPU requesting
the secondary to start would get stuck in the loop waiting for the
secondary to be present in the cpu_callin_map.
Rather than that, use a completion event to signal that the secondary
CPU has started and is waiting to synchronise counters.
Since the CPU presence will no longer be marked in cpu_callin_map,
remove the redundant test from arch_cpu_idle_dead().
Signed-off-by: Matt Redfearn <[email protected]>
---
arch/mips/kernel/process.c | 4 +---
arch/mips/kernel/smp.c | 15 +++++++++------
2 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 9514e5f2209f..9a8f61d7c83e 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -49,9 +49,7 @@
#ifdef CONFIG_HOTPLUG_CPU
void arch_cpu_idle_dead(void)
{
- /* What the heck is this check doing ? */
- if (!cpumask_test_cpu(smp_processor_id(), &cpu_callin_map))
- play_dead();
+ play_dead();
}
#endif
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 7ebb1918e2ac..03daf9008124 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -68,6 +68,8 @@ EXPORT_SYMBOL(cpu_sibling_map);
cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(cpu_core_map);
+static DECLARE_COMPLETION(cpu_running);
+
/*
* A logcal cpu mask containing only one VPE per core to
* reduce the number of IPIs on large MT systems.
@@ -369,7 +371,7 @@ asmlinkage void start_secondary(void)
cpumask_set_cpu(cpu, &cpu_coherent_mask);
notify_cpu_starting(cpu);
- cpumask_set_cpu(cpu, &cpu_callin_map);
+ complete(&cpu_running);
synchronise_count_slave(cpu);
set_cpu_online(cpu, true);
@@ -430,7 +432,6 @@ void smp_prepare_boot_cpu(void)
{
set_cpu_possible(0, true);
set_cpu_online(0, true);
- cpumask_set_cpu(0, &cpu_callin_map);
}
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
@@ -438,11 +439,13 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
mp_ops->boot_secondary(cpu, tidle);
/*
- * Trust is futile. We should really have timeouts ...
+ * We must check for timeout here, as the CPU will not be marked
+ * online until the counters are synchronised.
*/
- while (!cpumask_test_cpu(cpu, &cpu_callin_map)) {
- udelay(100);
- schedule();
+ if (!wait_for_completion_timeout(&cpu_running,
+ msecs_to_jiffies(1000))) {
+ pr_crit("CPU%u: failed to start\n", cpu);
+ return -EIO;
}
synchronise_count_master(cpu);
--
2.7.4