2021-09-26 02:53:31

by Xiaoming Ni

[permalink] [raw]
Subject: [PATCH] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n

When CONFIG_SMP=y, timebase synchronization is required when the second
kernel is started.
arch/powerpc/kernel/smp.c:
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
...
if (smp_ops->give_timebase)
smp_ops->give_timebase();
...
}

void start_secondary(void *unused)
{
...
if (smp_ops->take_timebase)
smp_ops->take_timebase();
...
}

When CONFIG_HOTPLUG_CPU=n and CONFIG_KEXEC_CORE=n,
smp_85xx_ops.give_timebase is NULL,
smp_85xx_ops.take_timebase is NULL,
As a result, the timebase is not synchronized.

Timebase synchronization does not depend on CONFIG_HOTPLUG_CPU.

Fixes: 56f1ba280719 ("powerpc/mpc85xx: refactor the PM operations")
Cc: [email protected] #v4.6
Signed-off-by: Xiaoming Ni <[email protected]>
---
arch/powerpc/platforms/85xx/Makefile | 2 +-
arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 4 ++++
arch/powerpc/platforms/85xx/smp.c | 9 ++++-----
3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index 60e4e97a929d..71ce1f6b6966 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -3,7 +3,7 @@
# Makefile for the PowerPC 85xx linux kernel.
#
obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_FSL_PMC) += mpc85xx_pm_ops.o
+obj-$(CONFIG_SMP) += mpc85xx_pm_ops.o

obj-y += common.o

diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
index 7c0133f558d0..a5656b3e9701 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
@@ -17,6 +17,7 @@

static struct ccsr_guts __iomem *guts;

+#ifdef CONFIG_FSL_PMC
static void mpc85xx_irq_mask(int cpu)
{

@@ -49,6 +50,7 @@ static void mpc85xx_cpu_up_prepare(int cpu)
{

}
+#endif

static void mpc85xx_freeze_time_base(bool freeze)
{
@@ -76,10 +78,12 @@ static const struct of_device_id mpc85xx_smp_guts_ids[] = {

static const struct fsl_pm_ops mpc85xx_pm_ops = {
.freeze_time_base = mpc85xx_freeze_time_base,
+#ifdef CONFIG_FSL_PMC
.irq_mask = mpc85xx_irq_mask,
.irq_unmask = mpc85xx_irq_unmask,
.cpu_die = mpc85xx_cpu_die,
.cpu_up_prepare = mpc85xx_cpu_up_prepare,
+#endif
};

int __init mpc85xx_setup_pmc(void)
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index c6df294054fe..349298cd9671 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -40,7 +40,6 @@ struct epapr_spin_table {
u32 pir;
};

-#ifdef CONFIG_HOTPLUG_CPU
static u64 timebase;
static int tb_req;
static int tb_valid;
@@ -112,6 +111,7 @@ static void mpc85xx_take_timebase(void)
local_irq_restore(flags);
}

+#ifdef CONFIG_HOTPLUG_CPU
static void smp_85xx_cpu_offline_self(void)
{
unsigned int cpu = smp_processor_id();
@@ -499,17 +499,16 @@ void __init mpc85xx_smp_init(void)
#ifdef CONFIG_FSL_CORENET_RCPM
fsl_rcpm_init();
#endif
-
-#ifdef CONFIG_FSL_PMC
- mpc85xx_setup_pmc();
#endif
+ mpc85xx_setup_pmc();
if (qoriq_pm_ops) {
smp_85xx_ops.give_timebase = mpc85xx_give_timebase;
smp_85xx_ops.take_timebase = mpc85xx_take_timebase;
+#ifdef CONFIG_HOTPLUG_CPU
smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self;
smp_85xx_ops.cpu_die = qoriq_cpu_kill;
- }
#endif
+ }
smp_ops = &smp_85xx_ops;

#ifdef CONFIG_KEXEC_CORE
--
2.27.0


2021-09-26 12:36:44

by Xiaoming Ni

[permalink] [raw]
Subject: Re: [PATCH] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n

On 2021/9/26 10:51, Xiaoming Ni wrote:
> When CONFIG_SMP=y, timebase synchronization is required when the second
> kernel is started.
> arch/powerpc/kernel/smp.c:
> int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> {
> ...
> if (smp_ops->give_timebase)
> smp_ops->give_timebase();
> ...
> }
>
> void start_secondary(void *unused)
> {
> ...
> if (smp_ops->take_timebase)
> smp_ops->take_timebase();
> ...
> }
>
> When CONFIG_HOTPLUG_CPU=n and CONFIG_KEXEC_CORE=n,
> smp_85xx_ops.give_timebase is NULL,
> smp_85xx_ops.take_timebase is NULL,
> As a result, the timebase is not synchronized.
>
> Timebase synchronization does not depend on CONFIG_HOTPLUG_CPU.
>
> Fixes: 56f1ba280719 ("powerpc/mpc85xx: refactor the PM operations")
> Cc: [email protected] #v4.6
> Signed-off-by: Xiaoming Ni <[email protected]>
> ---
> arch/powerpc/platforms/85xx/Makefile | 2 +-
> arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 4 ++++
> arch/powerpc/platforms/85xx/smp.c | 9 ++++-----
> 3 files changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
> index 60e4e97a929d..71ce1f6b6966 100644
> --- a/arch/powerpc/platforms/85xx/Makefile
> +++ b/arch/powerpc/platforms/85xx/Makefile
> @@ -3,7 +3,7 @@
> # Makefile for the PowerPC 85xx linux kernel.
> #
> obj-$(CONFIG_SMP) += smp.o
> -obj-$(CONFIG_FSL_PMC) += mpc85xx_pm_ops.o
> +obj-$(CONFIG_SMP) += mpc85xx_pm_ops.o
>
> obj-y += common.o
>
> diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
> index 7c0133f558d0..a5656b3e9701 100644
> --- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
> +++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
> @@ -17,6 +17,7 @@
>
> static struct ccsr_guts __iomem *guts;
>
> +#ifdef CONFIG_FSL_PMC
> static void mpc85xx_irq_mask(int cpu)
> {
>
> @@ -49,6 +50,7 @@ static void mpc85xx_cpu_up_prepare(int cpu)
> {
>
> }
> +#endif
>
> static void mpc85xx_freeze_time_base(bool freeze)
> {
> @@ -76,10 +78,12 @@ static const struct of_device_id mpc85xx_smp_guts_ids[] = {
>
> static const struct fsl_pm_ops mpc85xx_pm_ops = {
> .freeze_time_base = mpc85xx_freeze_time_base,
> +#ifdef CONFIG_FSL_PMC
> .irq_mask = mpc85xx_irq_mask,
> .irq_unmask = mpc85xx_irq_unmask,
> .cpu_die = mpc85xx_cpu_die,
> .cpu_up_prepare = mpc85xx_cpu_up_prepare,
> +#endif
> };
>
> int __init mpc85xx_setup_pmc(void)
> diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
> index c6df294054fe..349298cd9671 100644
> --- a/arch/powerpc/platforms/85xx/smp.c
> +++ b/arch/powerpc/platforms/85xx/smp.c
> @@ -40,7 +40,6 @@ struct epapr_spin_table {
> u32 pir;
> };
>
> -#ifdef CONFIG_HOTPLUG_CPU
> static u64 timebase;
> static int tb_req;
> static int tb_valid;
> @@ -112,6 +111,7 @@ static void mpc85xx_take_timebase(void)
> local_irq_restore(flags);
> }
>
> +#ifdef CONFIG_HOTPLUG_CPU
> static void smp_85xx_cpu_offline_self(void)
> {
> unsigned int cpu = smp_processor_id();
> @@ -499,17 +499,16 @@ void __init mpc85xx_smp_init(void)
> #ifdef CONFIG_FSL_CORENET_RCPM
> fsl_rcpm_init();
> #endif
> -
> -#ifdef CONFIG_FSL_PMC
> - mpc85xx_setup_pmc();
> #endif
> + mpc85xx_setup_pmc();
> if (qoriq_pm_ops) {
> smp_85xx_ops.give_timebase = mpc85xx_give_timebase;
> smp_85xx_ops.take_timebase = mpc85xx_take_timebase;
> +#ifdef CONFIG_HOTPLUG_CPU
> smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self;
> smp_85xx_ops.cpu_die = qoriq_cpu_kill;
> - }
> #endif
> + }
> smp_ops = &smp_85xx_ops;
>
> #ifdef CONFIG_KEXEC_CORE
>


I found inconsistent time values on different CPUs on my mpc8572 and
used this patch to fix it.
But today I found out in ppc64 testing that this patch causes the system
to trigger oops in the function mpc85xx_freeze_time_base(): the variable
"guts" is a null pointer.

I'm sorry to bother you.
I'll fix it and resend v2 later,

Thanks
Xiaoming Ni

2021-09-29 03:39:00

by Xiaoming Ni

[permalink] [raw]
Subject: [PATCH v2 0/2] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n

When CONFIG_SMP=y, timebase synchronization is required for mpc8572 when
the second kernel is started
arch/powerpc/kernel/smp.c:
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
...
if (smp_ops->give_timebase)
smp_ops->give_timebase();
...
}

void start_secondary(void *unused)
{
...
if (smp_ops->take_timebase)
smp_ops->take_timebase();
...
}

When CONFIG_HOTPLUG_CPU=n and CONFIG_KEXEC_CORE=n,
smp_85xx_ops.give_timebase is NULL,
smp_85xx_ops.take_timebase is NULL,
As a result, the timebase is not synchronized.

test code:
for i in $(seq 1 3); do taskset 1 date; taskset 2 date; sleep 1; echo;done
log:
Sat Sep 25 18:50:00 CST 2021
Sat Sep 25 19:07:47 CST 2021

Sat Sep 25 18:50:01 CST 2021
Sat Sep 25 19:07:48 CST 2021

Sat Sep 25 18:50:02 CST 2021
Sat Sep 25 19:07:49 CST 2021

Code snippet about give_timebase and take_timebase assignments:
arch/powerpc/platforms/85xx/smp.c:
#ifdef CONFIG_HOTPLUG_CPU
#ifdef CONFIG_FSL_CORENET_RCPM
fsl_rcpm_init();
#endif
#ifdef CONFIG_FSL_PMC
mpc85xx_setup_pmc();
#endif
if (qoriq_pm_ops) {
smp_85xx_ops.give_timebase = mpc85xx_give_timebase;
smp_85xx_ops.take_timebase = mpc85xx_take_timebase;

config dependency:
FSL_CORENET_RCPM depends on the PPC_E500MC.
FSL_PMC depends on SUSPEND.
SUSPEND depends on ARCH_SUSPEND_POSSIBLE.
ARCH_SUSPEND_POSSIBLE depends on !PPC_E500MC.

CONFIG_HOTPLUG_CPU and CONFIG_FSL_PMC require the timebase function, but
the timebase should not depend on CONFIG_HOTPLUG_CPU and CONFIG_FSL_PMC.
Therefore, adjust the macro control range. Ensure that the corresponding
timebase hook function is not empty when the dtsi node is configured.

-----
changes in v2:
1. add new patch: "powerpc:85xx:Fix oops when mpc85xx_smp_guts_ids node
cannot be found"
2. Using !CONFIG_FSL_CORENET_RCPM to manage the timebase code of !PPC_E500MC

v1:
https://lore.kernel.org/lkml/[email protected]
------

Xiaoming Ni (2):
powerpc:85xx:Fix oops when mpc85xx_smp_guts_ids node cannot be found
powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n

arch/powerpc/platforms/85xx/Makefile | 4 +++-
arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 7 +++++--
arch/powerpc/platforms/85xx/smp.c | 12 ++++++------
3 files changed, 14 insertions(+), 9 deletions(-)

--
2.27.0

2021-09-29 04:01:21

by Xiaoming Ni

[permalink] [raw]
Subject: [PATCH v2 2/2] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n

When CONFIG_SMP=y, timebase synchronization is required when the second
kernel is started.
arch/powerpc/kernel/smp.c:
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
...
if (smp_ops->give_timebase)
smp_ops->give_timebase();
...
}

void start_secondary(void *unused)
{
...
if (smp_ops->take_timebase)
smp_ops->take_timebase();
...
}

When CONFIG_HOTPLUG_CPU=n and CONFIG_KEXEC_CORE=n,
smp_85xx_ops.give_timebase is NULL,
smp_85xx_ops.take_timebase is NULL,
As a result, the timebase is not synchronized.

Timebase synchronization does not depend on CONFIG_HOTPLUG_CPU.

Fixes: 56f1ba280719 ("powerpc/mpc85xx: refactor the PM operations")
Cc: [email protected] #v4.6
Signed-off-by: Xiaoming Ni <[email protected]>
---
arch/powerpc/platforms/85xx/Makefile | 4 +++-
arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 4 ++++
arch/powerpc/platforms/85xx/smp.c | 12 ++++++------
3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index 60e4e97a929d..260fbad7967b 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -3,7 +3,9 @@
# Makefile for the PowerPC 85xx linux kernel.
#
obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_FSL_PMC) += mpc85xx_pm_ops.o
+ifneq ($(CONFIG_FSL_CORENET_RCPM),y)
+obj-$(CONFIG_SMP) += mpc85xx_pm_ops.o
+endif

obj-y += common.o

diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
index ffa8a7a6a2db..4a8af80011a6 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
@@ -17,6 +17,7 @@

static struct ccsr_guts __iomem *guts;

+#ifdef CONFIG_FSL_PMC
static void mpc85xx_irq_mask(int cpu)
{

@@ -49,6 +50,7 @@ static void mpc85xx_cpu_up_prepare(int cpu)
{

}
+#endif

static void mpc85xx_freeze_time_base(bool freeze)
{
@@ -76,10 +78,12 @@ static const struct of_device_id mpc85xx_smp_guts_ids[] = {

static const struct fsl_pm_ops mpc85xx_pm_ops = {
.freeze_time_base = mpc85xx_freeze_time_base,
+#ifdef CONFIG_FSL_PMC
.irq_mask = mpc85xx_irq_mask,
.irq_unmask = mpc85xx_irq_unmask,
.cpu_die = mpc85xx_cpu_die,
.cpu_up_prepare = mpc85xx_cpu_up_prepare,
+#endif
};

int __init mpc85xx_setup_pmc(void)
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index c6df294054fe..83f4a6389a28 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -40,7 +40,6 @@ struct epapr_spin_table {
u32 pir;
};

-#ifdef CONFIG_HOTPLUG_CPU
static u64 timebase;
static int tb_req;
static int tb_valid;
@@ -112,6 +111,7 @@ static void mpc85xx_take_timebase(void)
local_irq_restore(flags);
}

+#ifdef CONFIG_HOTPLUG_CPU
static void smp_85xx_cpu_offline_self(void)
{
unsigned int cpu = smp_processor_id();
@@ -495,21 +495,21 @@ void __init mpc85xx_smp_init(void)
smp_85xx_ops.probe = NULL;
}

-#ifdef CONFIG_HOTPLUG_CPU
#ifdef CONFIG_FSL_CORENET_RCPM
+ /* Assign a value to qoriq_pm_ops on PPC_E500MC */
fsl_rcpm_init();
-#endif
-
-#ifdef CONFIG_FSL_PMC
+#else
+ /* Assign a value to qoriq_pm_ops on !PPC_E500MC */
mpc85xx_setup_pmc();
#endif
if (qoriq_pm_ops) {
smp_85xx_ops.give_timebase = mpc85xx_give_timebase;
smp_85xx_ops.take_timebase = mpc85xx_take_timebase;
+#ifdef CONFIG_HOTPLUG_CPU
smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self;
smp_85xx_ops.cpu_die = qoriq_cpu_kill;
- }
#endif
+ }
smp_ops = &smp_85xx_ops;

#ifdef CONFIG_KEXEC_CORE
--
2.27.0

2021-09-29 04:31:02

by Xiaoming Ni

[permalink] [raw]
Subject: [PATCH v2 1/2] powerpc:85xx:Fix oops when mpc85xx_smp_guts_ids node cannot be found

When the field described in mpc85xx_smp_guts_ids[] is not configured in
dtb, the mpc85xx_setup_pmc() does not assign a value to the "guts"
variable. As a result, the oops is triggered when
mpc85xx_freeze_time_base() is executed.

Fixes:56f1ba280719 ("powerpc/mpc85xx: refactor the PM operations")
Cc: [email protected] #v4.6
Signed-off-by: Xiaoming Ni <[email protected]>
---
arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
index 7c0133f558d0..ffa8a7a6a2db 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
@@ -94,9 +94,8 @@ int __init mpc85xx_setup_pmc(void)
pr_err("Could not map guts node address\n");
return -ENOMEM;
}
+ qoriq_pm_ops = &mpc85xx_pm_ops;
}

- qoriq_pm_ops = &mpc85xx_pm_ops;
-
return 0;
}
--
2.27.0

2021-10-11 16:23:58

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH v2 0/2] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n

On Wed, 29 Sep 2021 11:36:44 +0800, Xiaoming Ni wrote:
> When CONFIG_SMP=y, timebase synchronization is required for mpc8572 when
> the second kernel is started
> arch/powerpc/kernel/smp.c:
> int __cpu_up(unsigned int cpu, struct task_struct *tidle)
> {
> ...
> if (smp_ops->give_timebase)
> smp_ops->give_timebase();
> ...
> }
>
> [...]

Applied to powerpc/next.

[1/2] powerpc:85xx:Fix oops when mpc85xx_smp_guts_ids node cannot be found
https://git.kernel.org/powerpc/c/3c2172c1c47b4079c29f0e6637d764a99355ebcd
[2/2] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n
https://git.kernel.org/powerpc/c/c45361abb9185b1e172bd75eff51ad5f601ccae4

cheers

2021-11-25 04:23:03

by Martin Kennedy

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n

Hi there,

I have bisected OpenWrt master, and then the Linux kernel down to this
change, to confirm that this change causes a kernel panic on my
P1020RDB-based, dual-core Aerohive HiveAP 370, at initialization of
the second CPU:

:
[ 0.000000] Linux version 5.10.80 (labby@lobon)
(powerpc-openwrt-linux-musl-gcc (OpenWrt GCC 11.2.0
r18111+1-ebb6f9287e) 11.2.0, GNU ld (GNU Binutils) 2.37) #0 SMP Thu
Nov 25 02:49:35 2021
[ 0.000000] Using P1020 RDB machine description
:
[ 0.627233] smp: Bringing up secondary CPUs ...
[ 0.681659] kernel tried to execute user page (0) - exploit attempt? (uid: 0)
[ 0.766618] BUG: Unable to handle kernel instruction fetch (NULL pointer?)
[ 0.848899] Faulting instruction address: 0x00000000
[ 0.908273] Oops: Kernel access of bad area, sig: 11 [#1]
[ 0.972851] BE PAGE_SIZE=4K SMP NR_CPUS=2 P1020 RDB
[ 1.031179] Modules linked in:
[ 1.067640] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.10.80 #0
[ 1.139507] NIP: 00000000 LR: c0021d2c CTR: 00000000
[ 1.199921] REGS: c1051cf0 TRAP: 0400 Not tainted (5.10.80)
[ 1.269705] MSR: 00021000 <CE,ME> CR: 84020202 XER: 00000000
[ 1.340534]
[ 1.340534] GPR00: c0021cb8 c1051da8 c1048000 00000001 00029000
00000000 00000001 00000000
[ 1.340534] GPR08: 00000001 00000000 c08b0000 00000040 22000208
00000000 c00032c4 00000000
[ 1.340534] GPR16: 00000000 00000000 00000000 00000000 00000000
00000000 00029000 00000001
[ 1.340534] GPR24: 1ffff240 20000000 dffff240 c080a1f4 00000001
c08ae0a8 00000001 dffff240
[ 1.758220] NIP [00000000] 0x0
[ 1.794688] LR [c0021d2c] smp_85xx_kick_cpu+0xe8/0x568
[ 1.856126] Call Trace:
[ 1.885295] [c1051da8] [c0021cb8] smp_85xx_kick_cpu+0x74/0x568 (unreliable)
[ 1.968633] [c1051de8] [c0011460] __cpu_up+0xc0/0x228
[ 2.029038] [c1051e18] [c0031bbc] bringup_cpu+0x30/0x224
[ 2.092572] [c1051e48] [c0031f3c] cpu_up.constprop.0+0x180/0x33c
[ 2.164443] [c1051e88] [c00322e8] bringup_nonboot_cpus+0x88/0xc8
[ 2.236326] [c1051eb8] [c07e67bc] smp_init+0x30/0x78
[ 2.295698] [c1051ed8] [c07d9e28] kernel_init_freeable+0x118/0x2a8
[ 2.369641] [c1051f18] [c00032d8] kernel_init+0x14/0x124
[ 2.433176] [c1051f38] [c0010278] ret_from_kernel_thread+0x14/0x1c
[ 2.507125] Instruction dump:
[ 2.542541] XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
XXXXXXXX XXXXXXXX
[ 2.635242] XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
XXXXXXXX XXXXXXXX
[ 2.727952] ---[ end trace 9b796a4bafb6bc14 ]---
[ 2.783149]
[ 3.800879] Kernel panic - not syncing: Fatal exception
[ 3.862353] Rebooting in 1 seconds..
[ 5.905097] System Halted, OK to turn off power

Without this patch, the kernel no longer panics:

[ 0.627232] smp: Bringing up secondary CPUs ...
[ 0.681857] smp: Brought up 1 node, 2 CPUs

Here is the kernel configuration for this built kernel:
https://git.openwrt.org/?p=openwrt/openwrt.git;a=blob_plain;f=target/linux/mpc85xx/config-5.10;hb=HEAD

In case a force-push is needed for the source repository
(https://github.com/Hurricos/openwrt/commit/ad19bdfc77d60ee1c52b41bb4345fdd02284c4cf),
here is the device tree for this board:
https://paste.c-net.org/TrousersSliced

Martin

2021-11-25 07:25:35

by Xiaoming Ni

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n

On 2021/11/25 12:20, Martin Kennedy wrote:
> Hi there,
>
> I have bisected OpenWrt master, and then the Linux kernel down to this
> change, to confirm that this change causes a kernel panic on my
> P1020RDB-based, dual-core Aerohive HiveAP 370, at initialization of
> the second CPU:
>
> :
> [ 0.000000] Linux version 5.10.80 (labby@lobon)
> (powerpc-openwrt-linux-musl-gcc (OpenWrt GCC 11.2.0
> r18111+1-ebb6f9287e) 11.2.0, GNU ld (GNU Binutils) 2.37) #0 SMP Thu
> Nov 25 02:49:35 2021
> [ 0.000000] Using P1020 RDB machine description
> :
> [ 0.627233] smp: Bringing up secondary CPUs ...
> [ 0.681659] kernel tried to execute user page (0) - exploit attempt? (uid: 0)
> [ 0.766618] BUG: Unable to handle kernel instruction fetch (NULL pointer?)
> [ 0.848899] Faulting instruction address: 0x00000000
> [ 0.908273] Oops: Kernel access of bad area, sig: 11 [#1]
> [ 0.972851] BE PAGE_SIZE=4K SMP NR_CPUS=2 P1020 RDB
> [ 1.031179] Modules linked in:
> [ 1.067640] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.10.80 #0
> [ 1.139507] NIP: 00000000 LR: c0021d2c CTR: 00000000
> [ 1.199921] REGS: c1051cf0 TRAP: 0400 Not tainted (5.10.80)
> [ 1.269705] MSR: 00021000 <CE,ME> CR: 84020202 XER: 00000000
> [ 1.340534]
> [ 1.340534] GPR00: c0021cb8 c1051da8 c1048000 00000001 00029000
> 00000000 00000001 00000000
> [ 1.340534] GPR08: 00000001 00000000 c08b0000 00000040 22000208
> 00000000 c00032c4 00000000
> [ 1.340534] GPR16: 00000000 00000000 00000000 00000000 00000000
> 00000000 00029000 00000001
> [ 1.340534] GPR24: 1ffff240 20000000 dffff240 c080a1f4 00000001
> c08ae0a8 00000001 dffff240
> [ 1.758220] NIP [00000000] 0x0
> [ 1.794688] LR [c0021d2c] smp_85xx_kick_cpu+0xe8/0x568
> [ 1.856126] Call Trace:
> [ 1.885295] [c1051da8] [c0021cb8] smp_85xx_kick_cpu+0x74/0x568 (unreliable)
> [ 1.968633] [c1051de8] [c0011460] __cpu_up+0xc0/0x228
> [ 2.029038] [c1051e18] [c0031bbc] bringup_cpu+0x30/0x224
> [ 2.092572] [c1051e48] [c0031f3c] cpu_up.constprop.0+0x180/0x33c
> [ 2.164443] [c1051e88] [c00322e8] bringup_nonboot_cpus+0x88/0xc8
> [ 2.236326] [c1051eb8] [c07e67bc] smp_init+0x30/0x78
> [ 2.295698] [c1051ed8] [c07d9e28] kernel_init_freeable+0x118/0x2a8
> [ 2.369641] [c1051f18] [c00032d8] kernel_init+0x14/0x124
> [ 2.433176] [c1051f38] [c0010278] ret_from_kernel_thread+0x14/0x1c
> [ 2.507125] Instruction dump:
> [ 2.542541] XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
> XXXXXXXX XXXXXXXX
> [ 2.635242] XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
> XXXXXXXX XXXXXXXX
> [ 2.727952] ---[ end trace 9b796a4bafb6bc14 ]---
> [ 2.783149]
> [ 3.800879] Kernel panic - not syncing: Fatal exception
> [ 3.862353] Rebooting in 1 seconds..
> [ 5.905097] System Halted, OK to turn off power
>
> Without this patch, the kernel no longer panics:
>
> [ 0.627232] smp: Bringing up secondary CPUs ...
> [ 0.681857] smp: Brought up 1 node, 2 CPUs
>
> Here is the kernel configuration for this built kernel:
> https://git.openwrt.org/?p=openwrt/openwrt.git;a=blob_plain;f=target/linux/mpc85xx/config-5.10;hb=HEAD
>
> In case a force-push is needed for the source repository
> (https://github.com/Hurricos/openwrt/commit/ad19bdfc77d60ee1c52b41bb4345fdd02284c4cf),
> here is the device tree for this board:
> https://paste.c-net.org/TrousersSliced
>
> Martin
> .
>
When CONFIG_FSL_PMC is set to n, cpu_up_prepare is not assigned to
mpc85xx_pm_ops. I suspect that this is the cause of the current null
pointer access.
I do not have the corresponding board test environment. Can you help me
to test whether the following patch solves the problem?

diff --git a/arch/powerpc/platforms/85xx/smp.c
b/arch/powerpc/platforms/85xx/smp.c
index 83f4a6389a28..d7081e9af65c 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -220,7 +220,7 @@ static int smp_85xx_start_cpu(int cpu)
local_irq_save(flags);
hard_irq_disable();

- if (qoriq_pm_ops)
+ if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare)
qoriq_pm_ops->cpu_up_prepare(cpu);

/* if cpu is not spinning, reset it */
@@ -292,7 +292,7 @@ static int smp_85xx_kick_cpu(int nr)
booting_thread_hwid = cpu_thread_in_core(nr);
primary = cpu_first_thread_sibling(nr);

- if (qoriq_pm_ops)
+ if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare)
qoriq_pm_ops->cpu_up_prepare(nr);

/*



2021-11-25 14:36:57

by Martin Kennedy

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n

Hi there,

Yes, I can test this patch.

I have added it to my tree and removed the reversion, and can confirm
that the second CPU comes up correctly now.

Martin

On Thu, Nov 25, 2021 at 2:23 AM Xiaoming Ni <[email protected]> wrote:
>
> On 2021/11/25 12:20, Martin Kennedy wrote:
> > Hi there,
> >
> > I have bisected OpenWrt master, and then the Linux kernel down to this
> > change, to confirm that this change causes a kernel panic on my
> > P1020RDB-based, dual-core Aerohive HiveAP 370, at initialization of
> > the second CPU:
> >
> > :
> > [ 0.000000] Linux version 5.10.80 (labby@lobon)
> > (powerpc-openwrt-linux-musl-gcc (OpenWrt GCC 11.2.0
> > r18111+1-ebb6f9287e) 11.2.0, GNU ld (GNU Binutils) 2.37) #0 SMP Thu
> > Nov 25 02:49:35 2021
> > [ 0.000000] Using P1020 RDB machine description
> > :
> > [ 0.627233] smp: Bringing up secondary CPUs ...
> > [ 0.681659] kernel tried to execute user page (0) - exploit attempt? (uid: 0)
> > [ 0.766618] BUG: Unable to handle kernel instruction fetch (NULL pointer?)
> > [ 0.848899] Faulting instruction address: 0x00000000
> > [ 0.908273] Oops: Kernel access of bad area, sig: 11 [#1]
> > [ 0.972851] BE PAGE_SIZE=4K SMP NR_CPUS=2 P1020 RDB
> > [ 1.031179] Modules linked in:
> > [ 1.067640] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.10.80 #0
> > [ 1.139507] NIP: 00000000 LR: c0021d2c CTR: 00000000
> > [ 1.199921] REGS: c1051cf0 TRAP: 0400 Not tainted (5.10.80)
> > [ 1.269705] MSR: 00021000 <CE,ME> CR: 84020202 XER: 00000000
> > [ 1.340534]
> > [ 1.340534] GPR00: c0021cb8 c1051da8 c1048000 00000001 00029000
> > 00000000 00000001 00000000
> > [ 1.340534] GPR08: 00000001 00000000 c08b0000 00000040 22000208
> > 00000000 c00032c4 00000000
> > [ 1.340534] GPR16: 00000000 00000000 00000000 00000000 00000000
> > 00000000 00029000 00000001
> > [ 1.340534] GPR24: 1ffff240 20000000 dffff240 c080a1f4 00000001
> > c08ae0a8 00000001 dffff240
> > [ 1.758220] NIP [00000000] 0x0
> > [ 1.794688] LR [c0021d2c] smp_85xx_kick_cpu+0xe8/0x568
> > [ 1.856126] Call Trace:
> > [ 1.885295] [c1051da8] [c0021cb8] smp_85xx_kick_cpu+0x74/0x568 (unreliable)
> > [ 1.968633] [c1051de8] [c0011460] __cpu_up+0xc0/0x228
> > [ 2.029038] [c1051e18] [c0031bbc] bringup_cpu+0x30/0x224
> > [ 2.092572] [c1051e48] [c0031f3c] cpu_up.constprop.0+0x180/0x33c
> > [ 2.164443] [c1051e88] [c00322e8] bringup_nonboot_cpus+0x88/0xc8
> > [ 2.236326] [c1051eb8] [c07e67bc] smp_init+0x30/0x78
> > [ 2.295698] [c1051ed8] [c07d9e28] kernel_init_freeable+0x118/0x2a8
> > [ 2.369641] [c1051f18] [c00032d8] kernel_init+0x14/0x124
> > [ 2.433176] [c1051f38] [c0010278] ret_from_kernel_thread+0x14/0x1c
> > [ 2.507125] Instruction dump:
> > [ 2.542541] XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
> > XXXXXXXX XXXXXXXX
> > [ 2.635242] XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
> > XXXXXXXX XXXXXXXX
> > [ 2.727952] ---[ end trace 9b796a4bafb6bc14 ]---
> > [ 2.783149]
> > [ 3.800879] Kernel panic - not syncing: Fatal exception
> > [ 3.862353] Rebooting in 1 seconds..
> > [ 5.905097] System Halted, OK to turn off power
> >
> > Without this patch, the kernel no longer panics:
> >
> > [ 0.627232] smp: Bringing up secondary CPUs ...
> > [ 0.681857] smp: Brought up 1 node, 2 CPUs
> >
> > Here is the kernel configuration for this built kernel:
> > https://git.openwrt.org/?p=openwrt/openwrt.git;a=blob_plain;f=target/linux/mpc85xx/config-5.10;hb=HEAD
> >
> > In case a force-push is needed for the source repository
> > (https://github.com/Hurricos/openwrt/commit/ad19bdfc77d60ee1c52b41bb4345fdd02284c4cf),
> > here is the device tree for this board:
> > https://paste.c-net.org/TrousersSliced
> >
> > Martin
> > .
> >
> When CONFIG_FSL_PMC is set to n, cpu_up_prepare is not assigned to
> mpc85xx_pm_ops. I suspect that this is the cause of the current null
> pointer access.
> I do not have the corresponding board test environment. Can you help me
> to test whether the following patch solves the problem?
>
> diff --git a/arch/powerpc/platforms/85xx/smp.c
> b/arch/powerpc/platforms/85xx/smp.c
> index 83f4a6389a28..d7081e9af65c 100644
> --- a/arch/powerpc/platforms/85xx/smp.c
> +++ b/arch/powerpc/platforms/85xx/smp.c
> @@ -220,7 +220,7 @@ static int smp_85xx_start_cpu(int cpu)
> local_irq_save(flags);
> hard_irq_disable();
>
> - if (qoriq_pm_ops)
> + if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare)
> qoriq_pm_ops->cpu_up_prepare(cpu);
>
> /* if cpu is not spinning, reset it */
> @@ -292,7 +292,7 @@ static int smp_85xx_kick_cpu(int nr)
> booting_thread_hwid = cpu_thread_in_core(nr);
> primary = cpu_first_thread_sibling(nr);
>
> - if (qoriq_pm_ops)
> + if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare)
> qoriq_pm_ops->cpu_up_prepare(nr);
>
> /*
>
>

2021-11-26 01:24:08

by Xiaoming Ni

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] powerpc:85xx: fix timebase sync issue when CONFIG_HOTPLUG_CPU=n

On 2021/11/25 22:34, Martin Kennedy wrote:
> Hi there,
>
> Yes, I can test this patch.
>
> I have added it to my tree and removed the reversion, and can confirm
> that the second CPU comes up correctly now.
>
> Martin
>
Thank you very much for your report and testing, I'll send a patch later

Thanks
Xiaoming Ni

> On Thu, Nov 25, 2021 at 2:23 AM Xiaoming Ni <[email protected]> wrote:
>>
>> On 2021/11/25 12:20, Martin Kennedy wrote:
>>> Hi there,
>>>
>>> I have bisected OpenWrt master, and then the Linux kernel down to this
>>> change, to confirm that this change causes a kernel panic on my
>>> P1020RDB-based, dual-core Aerohive HiveAP 370, at initialization of
>>> the second CPU:
>>>
>>> :
>>> [ 0.000000] Linux version 5.10.80 (labby@lobon)
>>> (powerpc-openwrt-linux-musl-gcc (OpenWrt GCC 11.2.0
>>> r18111+1-ebb6f9287e) 11.2.0, GNU ld (GNU Binutils) 2.37) #0 SMP Thu
>>> Nov 25 02:49:35 2021
>>> [ 0.000000] Using P1020 RDB machine description
>>> :
>>> [ 0.627233] smp: Bringing up secondary CPUs ...
>>> [ 0.681659] kernel tried to execute user page (0) - exploit attempt? (uid: 0)
>>> [ 0.766618] BUG: Unable to handle kernel instruction fetch (NULL pointer?)
>>> [ 0.848899] Faulting instruction address: 0x00000000
>>> [ 0.908273] Oops: Kernel access of bad area, sig: 11 [#1]
>>> [ 0.972851] BE PAGE_SIZE=4K SMP NR_CPUS=2 P1020 RDB
>>> [ 1.031179] Modules linked in:
>>> [ 1.067640] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.10.80 #0
>>> [ 1.139507] NIP: 00000000 LR: c0021d2c CTR: 00000000
>>> [ 1.199921] REGS: c1051cf0 TRAP: 0400 Not tainted (5.10.80)
>>> [ 1.269705] MSR: 00021000 <CE,ME> CR: 84020202 XER: 00000000
>>> [ 1.340534]
>>> [ 1.340534] GPR00: c0021cb8 c1051da8 c1048000 00000001 00029000
>>> 00000000 00000001 00000000
>>> [ 1.340534] GPR08: 00000001 00000000 c08b0000 00000040 22000208
>>> 00000000 c00032c4 00000000
>>> [ 1.340534] GPR16: 00000000 00000000 00000000 00000000 00000000
>>> 00000000 00029000 00000001
>>> [ 1.340534] GPR24: 1ffff240 20000000 dffff240 c080a1f4 00000001
>>> c08ae0a8 00000001 dffff240
>>> [ 1.758220] NIP [00000000] 0x0
>>> [ 1.794688] LR [c0021d2c] smp_85xx_kick_cpu+0xe8/0x568
>>> [ 1.856126] Call Trace:
>>> [ 1.885295] [c1051da8] [c0021cb8] smp_85xx_kick_cpu+0x74/0x568 (unreliable)
>>> [ 1.968633] [c1051de8] [c0011460] __cpu_up+0xc0/0x228
>>> [ 2.029038] [c1051e18] [c0031bbc] bringup_cpu+0x30/0x224
>>> [ 2.092572] [c1051e48] [c0031f3c] cpu_up.constprop.0+0x180/0x33c
>>> [ 2.164443] [c1051e88] [c00322e8] bringup_nonboot_cpus+0x88/0xc8
>>> [ 2.236326] [c1051eb8] [c07e67bc] smp_init+0x30/0x78
>>> [ 2.295698] [c1051ed8] [c07d9e28] kernel_init_freeable+0x118/0x2a8
>>> [ 2.369641] [c1051f18] [c00032d8] kernel_init+0x14/0x124
>>> [ 2.433176] [c1051f38] [c0010278] ret_from_kernel_thread+0x14/0x1c
>>> [ 2.507125] Instruction dump:
>>> [ 2.542541] XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
>>> XXXXXXXX XXXXXXXX
>>> [ 2.635242] XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX
>>> XXXXXXXX XXXXXXXX
>>> [ 2.727952] ---[ end trace 9b796a4bafb6bc14 ]---
>>> [ 2.783149]
>>> [ 3.800879] Kernel panic - not syncing: Fatal exception
>>> [ 3.862353] Rebooting in 1 seconds..
>>> [ 5.905097] System Halted, OK to turn off power
>>>
>>> Without this patch, the kernel no longer panics:
>>>
>>> [ 0.627232] smp: Bringing up secondary CPUs ...
>>> [ 0.681857] smp: Brought up 1 node, 2 CPUs
>>>
>>> Here is the kernel configuration for this built kernel:
>>> https://git.openwrt.org/?p=openwrt/openwrt.git;a=blob_plain;f=target/linux/mpc85xx/config-5.10;hb=HEAD
>>>
>>> In case a force-push is needed for the source repository
>>> (https://github.com/Hurricos/openwrt/commit/ad19bdfc77d60ee1c52b41bb4345fdd02284c4cf),
>>> here is the device tree for this board:
>>> https://paste.c-net.org/TrousersSliced
>>>
>>> Martin
>>> .
>>>
>> When CONFIG_FSL_PMC is set to n, cpu_up_prepare is not assigned to
>> mpc85xx_pm_ops. I suspect that this is the cause of the current null
>> pointer access.
>> I do not have the corresponding board test environment. Can you help me
>> to test whether the following patch solves the problem?
>>
>> diff --git a/arch/powerpc/platforms/85xx/smp.c
>> b/arch/powerpc/platforms/85xx/smp.c
>> index 83f4a6389a28..d7081e9af65c 100644
>> --- a/arch/powerpc/platforms/85xx/smp.c
>> +++ b/arch/powerpc/platforms/85xx/smp.c
>> @@ -220,7 +220,7 @@ static int smp_85xx_start_cpu(int cpu)
>> local_irq_save(flags);
>> hard_irq_disable();
>>
>> - if (qoriq_pm_ops)
>> + if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare)
>> qoriq_pm_ops->cpu_up_prepare(cpu);
>>
>> /* if cpu is not spinning, reset it */
>> @@ -292,7 +292,7 @@ static int smp_85xx_kick_cpu(int nr)
>> booting_thread_hwid = cpu_thread_in_core(nr);
>> primary = cpu_first_thread_sibling(nr);
>>
>> - if (qoriq_pm_ops)
>> + if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare)
>> qoriq_pm_ops->cpu_up_prepare(nr);
>>
>> /*
>>
>>
> .
>


2021-11-26 04:14:00

by Xiaoming Ni

[permalink] [raw]
Subject: [PATCH] powerpc/85xx: fix oops when CONFIG_FSL_PMC=n

When CONFIG_FSL_PMC is set to n, no value is assigned to cpu_up_prepare
in the mpc85xx_pm_ops structure. As a result, oops is triggered in
smp_85xx_start_cpu().

[ 0.627233] smp: Bringing up secondary CPUs ...
[ 0.681659] kernel tried to execute user page (0) - exploit attempt? (uid: 0)
[ 0.766618] BUG: Unable to handle kernel instruction fetch (NULL pointer?)
[ 0.848899] Faulting instruction address: 0x00000000
[ 0.908273] Oops: Kernel access of bad area, sig: 11 [#1]
...
[ 1.758220] NIP [00000000] 0x0
[ 1.794688] LR [c0021d2c] smp_85xx_kick_cpu+0xe8/0x568
[ 1.856126] Call Trace:
[ 1.885295] [c1051da8] [c0021cb8] smp_85xx_kick_cpu+0x74/0x568 (unreliable)
[ 1.968633] [c1051de8] [c0011460] __cpu_up+0xc0/0x228
[ 2.029038] [c1051e18] [c0031bbc] bringup_cpu+0x30/0x224
[ 2.092572] [c1051e48] [c0031f3c] cpu_up.constprop.0+0x180/0x33c
[ 2.164443] [c1051e88] [c00322e8] bringup_nonboot_cpus+0x88/0xc8
[ 2.236326] [c1051eb8] [c07e67bc] smp_init+0x30/0x78
[ 2.295698] [c1051ed8] [c07d9e28] kernel_init_freeable+0x118/0x2a8
[ 2.369641] [c1051f18] [c00032d8] kernel_init+0x14/0x124
[ 2.433176] [c1051f38] [c0010278] ret_from_kernel_thread+0x14/0x1c

Fixes: c45361abb9185b ("powerpc/85xx: fix timebase sync issue when
CONFIG_HOTPLUG_CPU=n")
Link: https://lore.kernel.org/lkml/CANA18Uyba4kMJQrbCSZVTFep2Exe5izE45whNJgwwUvNSEcNLg@mail.gmail.com/
Reported-by: Martin Kennedy <[email protected]>
Signed-off-by: Xiaoming Ni <[email protected]>
Tested-by: Martin Kennedy <[email protected]>
Cc: [email protected]
---
arch/powerpc/platforms/85xx/smp.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 83f4a6389a28..d7081e9af65c 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -220,7 +220,7 @@ static int smp_85xx_start_cpu(int cpu)
local_irq_save(flags);
hard_irq_disable();

- if (qoriq_pm_ops)
+ if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare)
qoriq_pm_ops->cpu_up_prepare(cpu);

/* if cpu is not spinning, reset it */
@@ -292,7 +292,7 @@ static int smp_85xx_kick_cpu(int nr)
booting_thread_hwid = cpu_thread_in_core(nr);
primary = cpu_first_thread_sibling(nr);

- if (qoriq_pm_ops)
+ if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare)
qoriq_pm_ops->cpu_up_prepare(nr);

/*
--
2.27.0


2021-12-03 11:53:33

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH] powerpc/85xx: fix oops when CONFIG_FSL_PMC=n

On Fri, 26 Nov 2021 12:11:53 +0800, Xiaoming Ni wrote:
> When CONFIG_FSL_PMC is set to n, no value is assigned to cpu_up_prepare
> in the mpc85xx_pm_ops structure. As a result, oops is triggered in
> smp_85xx_start_cpu().
>
> [ 0.627233] smp: Bringing up secondary CPUs ...
> [ 0.681659] kernel tried to execute user page (0) - exploit attempt? (uid: 0)
> [ 0.766618] BUG: Unable to handle kernel instruction fetch (NULL pointer?)
> [ 0.848899] Faulting instruction address: 0x00000000
> [ 0.908273] Oops: Kernel access of bad area, sig: 11 [#1]
> ...
> [ 1.758220] NIP [00000000] 0x0
> [ 1.794688] LR [c0021d2c] smp_85xx_kick_cpu+0xe8/0x568
> [ 1.856126] Call Trace:
> [ 1.885295] [c1051da8] [c0021cb8] smp_85xx_kick_cpu+0x74/0x568 (unreliable)
> [ 1.968633] [c1051de8] [c0011460] __cpu_up+0xc0/0x228
> [ 2.029038] [c1051e18] [c0031bbc] bringup_cpu+0x30/0x224
> [ 2.092572] [c1051e48] [c0031f3c] cpu_up.constprop.0+0x180/0x33c
> [ 2.164443] [c1051e88] [c00322e8] bringup_nonboot_cpus+0x88/0xc8
> [ 2.236326] [c1051eb8] [c07e67bc] smp_init+0x30/0x78
> [ 2.295698] [c1051ed8] [c07d9e28] kernel_init_freeable+0x118/0x2a8
> [ 2.369641] [c1051f18] [c00032d8] kernel_init+0x14/0x124
> [ 2.433176] [c1051f38] [c0010278] ret_from_kernel_thread+0x14/0x1c
>
> [...]

Applied to powerpc/fixes.

[1/1] powerpc/85xx: fix oops when CONFIG_FSL_PMC=n
https://git.kernel.org/powerpc/c/3dc709e518b47386e6af937eaec37bb36539edfd

cheers