Greeting's
Today's mainline kernel panics when booting on my powerpc lpar
devtmpfs: initialized
clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 19112604462750000 ns
futex hash table entries: 8192 (order: 4, 1048576 bytes, linear)
thermal_sys: Registered thermal governor 'fair_share'
thermal_sys: Registered thermal governor 'step_wise'
BUG: Unable to handle kernel data access on read at 0xc000006000000000
Faulting instruction address: 0xc000000000393b9c
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
Modules linked in:
CPU: 17 PID: 1 Comm: swapper/0 Not tainted 5.7.0+ #1
NIP: c000000000393b9c LR: c00000000039598c CTR: 0000000000000000
REGS: c000000777503660 TRAP: 0300 Not tainted (5.7.0+)
MSR: 8000000000009033 <SF,EE,ME,IR,DR,RI,LE> CR: 84024484 XER: 00000006
CFAR: c000000000393aa0 DAR: c000006000000000 DSISR: 40000000 IRQMASK: 0
GPR00: c00000000039598c c0000007775038f0 c00000000157fa00 6000000060000000
GPR04: ffffffffffffffff 00000000000003ff 0000000000000018 0000000000000000
GPR08: 0000000000000060 0000000000000008 0000000060000000 0000000000000100
GPR12: 0000000024024282 c000000007fa6600 c000006000000000 c008000000000000
GPR16: 800000000000018e 0000000000000000 0000000000000000 c008000000010000
GPR20: c008000000010000 c00800000000ffff 0000000000000000 c000000001533390
GPR24: c008000000010000 c00000000162d9e0 0000000000000001 0000000000000001
GPR28: c000000001600400 c00000000162d9e8 0000000000000000 c000000000000000
NIP [c000000000393b9c] map_kernel_range_noflush+0x23c/0x550
LR [c00000000039598c] __vmalloc_node_range+0x2fc/0x3f0
Call Trace:
[c0000007775039c0] [c00000000039598c] __vmalloc_node_range+0x2fc/0x3f0
[c000000777503a70] [c000000000395bb8] __vmalloc+0x58/0x70
[c000000777503ae0] [c0000000002e5f3c] bpf_prog_alloc_no_stats+0x4c/0x120
[c000000777503b20] [c0000000002e6040] bpf_prog_alloc+0x30/0xe0
[c000000777503b60] [c0000000009c4678] bpf_prog_create+0x68/0x100
[c000000777503ba0] [c000000000fbfafc] ptp_classifier_init+0x4c/0x80
[c000000777503be0] [c000000000fbeac0] sock_init+0xe0/0x100
[c000000777503c10] [c000000000011444] do_one_initcall+0x64/0x2c0
[c000000777503ce0] [c000000000f643d0] kernel_init_freeable+0x2d8/0x39c
[c000000777503db0] [c000000000011b14] kernel_init+0x2c/0x148
[c000000777503e20] [c00000000000cba8] ret_from_kernel_thread+0x5c/0x74
Instruction dump:
7ca507b4 5148463e 79ce07c6 7cc62838 7dce4378 78c61f24 79ce1564 79cef082
788ef00e 7dce3215 4182030c 38c90010 <e8ae0000> 3ad3ffff 7f663036 7d867a14
---[ end trace 598b3823c4d29f01 ]---
Kernel panic - not syncing: Fatal exception
the fault instruction 0xc000000000393b9c is in map_kernel_range_noflush
(mm/vmalloc.c:227)
System : Power 9 Power VM LPAR
kernel : mainline 5.7.0
test : boot
The patch series could have caused this regression
6c0c7d mm/ioremap: track which page-table levels were modified
2ba3e6 mm/vmalloc: track which page-table levels were modified << bad commit
d86261 mm: add functions to track page directory modifications
@Joerg, Could you please have a look?
Attaching the kernel config
--
Regard's
Abdul Haleem
IBM Linux Technology Centre
hi Abdul,
On Wed, Jun 03, 2020 at 04:20:57PM +0530, Abdul Haleem wrote:
> Greeting's
>
> Today's mainline kernel panics when booting on my powerpc lpar
Thanks for the report, I am looking into it with my limited powerpc
knowledge. But I have an idea and will send you something to test later
today.
Thanks,
Joerg
On Wed, Jun 03, 2020 at 04:20:57PM +0530, Abdul Haleem wrote:
> @Joerg, Could you please have a look?
Can you please try the attached patch?
diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h
index 58046ddc08d0..afbab31fbd7e 100644
--- a/include/asm-generic/5level-fixup.h
+++ b/include/asm-generic/5level-fixup.h
@@ -17,6 +17,11 @@
((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
NULL : pud_offset(p4d, address))
+#define pud_alloc_track(mm, p4d, address, mask) \
+ ((unlikely(pgd_none(*(p4d))) && \
+ (__pud_alloc(mm, p4d, address) || ({*(mask)|=PGTBL_P4D_MODIFIED;0;})))? \
+ NULL : pud_offset(p4d, address))
+
#define p4d_alloc(mm, pgd, address) (pgd)
#define p4d_alloc_track(mm, pgd, address, mask) (pgd)
#define p4d_offset(pgd, start) (pgd)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7e07f4f490cb..d46bf03b804f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2088,35 +2088,35 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
NULL : pud_offset(p4d, address);
}
-static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
+static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
unsigned long address,
pgtbl_mod_mask *mod_mask)
-
{
- if (unlikely(pgd_none(*pgd))) {
- if (__p4d_alloc(mm, pgd, address))
+ if (unlikely(p4d_none(*p4d))) {
+ if (__pud_alloc(mm, p4d, address))
return NULL;
- *mod_mask |= PGTBL_PGD_MODIFIED;
+ *mod_mask |= PGTBL_P4D_MODIFIED;
}
- return p4d_offset(pgd, address);
+ return pud_offset(p4d, address);
}
-#endif /* !__ARCH_HAS_5LEVEL_HACK */
-
-static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
+static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
unsigned long address,
pgtbl_mod_mask *mod_mask)
+
{
- if (unlikely(p4d_none(*p4d))) {
- if (__pud_alloc(mm, p4d, address))
+ if (unlikely(pgd_none(*pgd))) {
+ if (__p4d_alloc(mm, pgd, address))
return NULL;
- *mod_mask |= PGTBL_P4D_MODIFIED;
+ *mod_mask |= PGTBL_PGD_MODIFIED;
}
- return pud_offset(p4d, address);
+ return p4d_offset(pgd, address);
}
+#endif /* !__ARCH_HAS_5LEVEL_HACK */
+
static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
On Wed, Jun 03, 2020 at 03:32:57PM +0200, Joerg Roedel wrote:
> On Wed, Jun 03, 2020 at 04:20:57PM +0530, Abdul Haleem wrote:
> > @Joerg, Could you please have a look?
>
> Can you please try the attached patch?
Hi Joerg,
I did hit the similar boot failue on a Power9 baremetal box(mentioned in Note) and
your below patch helped solving that for my environment and
am able to boot the system fine.
...
Fedora 31 (Thirty One)
Kernel 5.7.0-gd6f9469a0-dirty on an ppc64le (hvc0)
login:
Tested-by: Satheesh Rajendran <[email protected]>
Note: for the record, here is the boot failure call trace.
[ 0.023555] mempolicy: Enabling automatic NUMA balancing. Configure with numa_balancing= or the kernel.numa_balancing sysctl
[ 0.023582] pid_max: default: 163840 minimum: 1280
[ 0.035014] BUG: Unable to handle kernel data access on read at 0xc000006000000000
[ 0.035058] Faulting instruction address: 0xc000000000382304
[ 0.035074] Oops: Kernel access of bad area, sig: 11 [#1]
[ 0.035097] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV
[ 0.035113] Modules linked in:
[ 0.035136] CPU: 24 PID: 0 Comm: swapper/24 Not tainted 5.7.0-gd6f9469a0 #1
[ 0.035161] NIP: c000000000382304 LR: c00000000038407c CTR: 0000000000000000
[ 0.035197] REGS: c00000000167f930 TRAP: 0300 Not tainted (5.7.0-gd6f9469a0)
[ 0.035241] MSR: 9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE> CR: 42022422 XER: 00000000
[ 0.035294] CFAR: c0000000003822fc DAR: c000006000000000 DSISR: 40000000 IRQMASK: 0
[ 0.035294] GPR00: c00000000038407c c00000000167fbc0 c00000000168090[ 150.252645597,5] OPAL: Reboot request...
[ 150.252928266,5] RESET: Initiating fast reboot 1...
0 c008000000000000
[ 0.035294] GPR04: ffffffffffffffff 00000000000001ff c0080000001fffff 0000000000000060
[ 0.035294] GPR08: 0000000060000000 0000000000000005 c000006000000000 c008000000200000
[ 0.035294] GPR12: 0000000022022422 c000000001870000 c000000000000000 c008000000000000
[ 0.035294] GPR16: c008000007ffffff c008000000200000 0000000000000000 c000006000000000
[ 0.035294] GPR20: c008000008000000 c008000008000000 c008000007ffffff c008000007ffffff
[ 0.035294] GPR24: c00000000163f7c8 c00000000172d0c0 0000000000000001 0000000000000001
[ 0.035294] GPR28: c000000001708000 c00000000172d0c8 0000000000000000 c008000008000000
[ 0.035622] NIP [c000000000382304] map_kernel_range_noflush+0x274/0x510
[ 0.035657] LR [c00000000038407c] __vmalloc_node_range+0x2ec/0x3a0
[ 0.035690] Call Trace:
[ 0.035709] [c00000000167fbc0] [c00000000038d848] __alloc_pages_nodemask+0x158/0x3f0 (unreliable)
[ 0.035750] [c00000000167fc90] [c00000000038407c] __vmalloc_node_range+0x2ec/0x3a0
[ 0.035787] [c00000000167fd40] [c000000000384268] __vmalloc+0x58/0x70
[ 0.035823] [c00000000167fdb0] [c000000001056db8] alloc_large_system_hash+0x204/0x304
[ 0.035870] [c00000000167fe60] [c00000000105c1f0] vfs_caches_init+0xd8/0x138
[ 0.035916] [c00000000167fee0] [c0000000010242a0] start_kernel+0x644/0x6ec
[ 0.035960] [c00000000167ff90] [c00000000000ca9c] start_here_common+0x1c/0x400
[ 0.036004] Instruction dump:
[ 0.036016] 3af4ffff 60000000 60000000 38c90010 7f663036 7d667a14 7cc600d0 7d713038
[ 0.036038] 38d1ffff 7c373040 41810008 7e91a378 <e8b30000> 2c250000 418201b4 7f464830
[ 0.036083] ---[ end trace c7e72029dfacc217 ]---
[ 0.036114]
[ 1.036223] Kernel panic - not syncing: Attempted to kill the idle task!
[ 1.036858] Rebooting in 10 seconds..
Regards,
-Satheesh.
>
> diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h
> index 58046ddc08d0..afbab31fbd7e 100644
> --- a/include/asm-generic/5level-fixup.h
> +++ b/include/asm-generic/5level-fixup.h
> @@ -17,6 +17,11 @@
> ((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
> NULL : pud_offset(p4d, address))
>
> +#define pud_alloc_track(mm, p4d, address, mask) \
> + ((unlikely(pgd_none(*(p4d))) && \
> + (__pud_alloc(mm, p4d, address) || ({*(mask)|=PGTBL_P4D_MODIFIED;0;})))? \
> + NULL : pud_offset(p4d, address))
> +
> #define p4d_alloc(mm, pgd, address) (pgd)
> #define p4d_alloc_track(mm, pgd, address, mask) (pgd)
> #define p4d_offset(pgd, start) (pgd)
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 7e07f4f490cb..d46bf03b804f 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2088,35 +2088,35 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
> NULL : pud_offset(p4d, address);
> }
>
> -static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
> +static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
> unsigned long address,
> pgtbl_mod_mask *mod_mask)
> -
> {
> - if (unlikely(pgd_none(*pgd))) {
> - if (__p4d_alloc(mm, pgd, address))
> + if (unlikely(p4d_none(*p4d))) {
> + if (__pud_alloc(mm, p4d, address))
> return NULL;
> - *mod_mask |= PGTBL_PGD_MODIFIED;
> + *mod_mask |= PGTBL_P4D_MODIFIED;
> }
>
> - return p4d_offset(pgd, address);
> + return pud_offset(p4d, address);
> }
>
> -#endif /* !__ARCH_HAS_5LEVEL_HACK */
> -
> -static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
> +static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
> unsigned long address,
> pgtbl_mod_mask *mod_mask)
> +
> {
> - if (unlikely(p4d_none(*p4d))) {
> - if (__pud_alloc(mm, p4d, address))
> + if (unlikely(pgd_none(*pgd))) {
> + if (__p4d_alloc(mm, pgd, address))
> return NULL;
> - *mod_mask |= PGTBL_P4D_MODIFIED;
> + *mod_mask |= PGTBL_PGD_MODIFIED;
> }
>
> - return pud_offset(p4d, address);
> + return p4d_offset(pgd, address);
> }
>
> +#endif /* !__ARCH_HAS_5LEVEL_HACK */
> +
> static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
> {
> return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
On Wed, 2020-06-03 at 15:32 +0200, Joerg Roedel wrote:
> On Wed, Jun 03, 2020 at 04:20:57PM +0530, Abdul Haleem wrote:
> > @Joerg, Could you please have a look?
>
> Can you please try the attached patch?
Thanks Joerg, The given patch fixes the boot problem.
Please add Reported-by in fix commit.
Reported-by: Abdul Haleem <[email protected]>
>
> diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h
> index 58046ddc08d0..afbab31fbd7e 100644
> --- a/include/asm-generic/5level-fixup.h
> +++ b/include/asm-generic/5level-fixup.h
> @@ -17,6 +17,11 @@
> ((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
> NULL : pud_offset(p4d, address))
>
> +#define pud_alloc_track(mm, p4d, address, mask) \
> + ((unlikely(pgd_none(*(p4d))) && \
> + (__pud_alloc(mm, p4d, address) || ({*(mask)|=PGTBL_P4D_MODIFIED;0;})))? \
> + NULL : pud_offset(p4d, address))
> +
> #define p4d_alloc(mm, pgd, address) (pgd)
> #define p4d_alloc_track(mm, pgd, address, mask) (pgd)
> #define p4d_offset(pgd, start) (pgd)
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 7e07f4f490cb..d46bf03b804f 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2088,35 +2088,35 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
> NULL : pud_offset(p4d, address);
> }
>
> -static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
> +static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
> unsigned long address,
> pgtbl_mod_mask *mod_mask)
> -
> {
> - if (unlikely(pgd_none(*pgd))) {
> - if (__p4d_alloc(mm, pgd, address))
> + if (unlikely(p4d_none(*p4d))) {
> + if (__pud_alloc(mm, p4d, address))
> return NULL;
> - *mod_mask |= PGTBL_PGD_MODIFIED;
> + *mod_mask |= PGTBL_P4D_MODIFIED;
> }
>
> - return p4d_offset(pgd, address);
> + return pud_offset(p4d, address);
> }
>
> -#endif /* !__ARCH_HAS_5LEVEL_HACK */
> -
> -static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
> +static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
> unsigned long address,
> pgtbl_mod_mask *mod_mask)
> +
> {
> - if (unlikely(p4d_none(*p4d))) {
> - if (__pud_alloc(mm, p4d, address))
> + if (unlikely(pgd_none(*pgd))) {
> + if (__p4d_alloc(mm, pgd, address))
> return NULL;
> - *mod_mask |= PGTBL_P4D_MODIFIED;
> + *mod_mask |= PGTBL_PGD_MODIFIED;
> }
>
> - return pud_offset(p4d, address);
> + return p4d_offset(pgd, address);
> }
>
> +#endif /* !__ARCH_HAS_5LEVEL_HACK */
> +
> static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
> {
> return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
--
Regard's
Abdul Haleem
IBM Linux Technology Centre
On Wed, 3 Jun 2020 at 19:03, Joerg Roedel <[email protected]> wrote:
>
> On Wed, Jun 03, 2020 at 04:20:57PM +0530, Abdul Haleem wrote:
> > @Joerg, Could you please have a look?
>
> Can you please try the attached patch?
@Joerg, Linaro test farm noticed this kernel crash on nxp ls2088
Machine model: Freescale Layerscape 2088A RDB Board
while booting Linux mainline 5.7.0 version kernel.
After applying your proposed patch fixed boot problem.
Tested-by: Naresh Kamboju <[email protected]>
Test ref:
https://lavalab.nxp.com/scheduler/job/23787#L426
Here is the kernel crash log before patch applied,
[ 0.000000] Linux version 5.7.0-03887-gf6aee505c71b
(TuxBuild@ecb9ef34f06f) (gcc version 9.3.0 (Debian 9.3.0-8), GNU ld
(GNU Binutils for Debian) 2.34) #1 SMP PREEMPT Wed Jun 3 18:21:26 UTC
2020
[ 0.000000] Machine model: Freescale Layerscape 2088A RDB Board
<>
[ 0.000000] NR_IRQS: 64, nr_irqs: 64, preallocated irqs: 0
[ 0.000000] Unable to handle kernel paging request at virtual
address fffeffff80000000
[ 0.000000] Mem abort info:
[ 0.000000] ESR = 0x96000004
[ 0.000000] EC = 0x25: DABT (current EL), IL = 32 bits
[ 0.000000] SET = 0, FnV = 0
[ 0.000000] EA = 0, S1PTW = 0
[ 0.000000] Data abort info:
[ 0.000000] ISV = 0, ISS = 0x00000004
[ 0.000000] CM = 0, WnR = 0
[ 0.000000] [fffeffff80000000] address between user and kernel address ranges
[ 0.000000] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[ 0.000000] Modules linked in:
[ 0.000000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted
5.7.0-03887-gf6aee505c71b #1
[ 0.000000] Hardware name: Freescale Layerscape 2088A RDB Board (DT)
[ 0.000000] pstate: 80000085 (Nzcv daIf -PAN -UAO BTYPE=--)
[ 0.000000] pc : map_kernel_range_noflush+0xc0/0x280
[ 0.000000] lr : __vmalloc_node_range+0x154/0x2a0
[ 0.000000] sp : ffffb3b1dcbc3e20
[ 0.000000] x29: ffffb3b1dcbc3e20 x28: fffeffff80000000
[ 0.000000] x27: ffff800010004000 x26: ffff800010000000
[ 0.000000] x25: 0000000000402dc2 x24: ffffb3b1dc53c000
[ 0.000000] x23: 0068000000000f13 x22: 0000000000000004
[ 0.000000] x21: ffffb3b1dc53cf48 x20: 0000000000000000
[ 0.000000] x19: ffffb3b1dc627800 x18: 00000000000000c0
[ 0.000000] x17: 0000000000000000 x16: 0000000000000007
[ 0.000000] x15: dead000000000100 x14: fffffe020b990600
[ 0.000000] x13: dead000000000122 x12: 0000000000000001
[ 0.000000] x11: 0000000000000000 x10: ffff0082fe3fdec0
[ 0.000000] x9 : ffff0082fe342d58 x8 : ffff4cd121ba5000
[ 0.000000] x7 : ffff808010000000 x6 : 0000000000000004
[ 0.000000] x5 : 000000000000fffd x4 : 0000000000004000
[ 0.000000] x3 : ffff800050000000 x2 : 0001000080000000
[ 0.000000] x1 : 0000000000000000 x0 : ffff800010003fff
[ 0.000000] Call trace:
[ 0.000000] map_kernel_range_noflush+0xc0/0x280
[ 0.000000] __vmalloc_node_range+0x154/0x2a0
[ 0.000000] __vmalloc_node+0x5c/0x70
[ 0.000000] init_IRQ+0xac/0xf8
[ 0.000000] start_kernel+0x2d0/0x4dc
[ 0.000000] Code: f90047e0 d503201f d2a80003 8b030343 (f9400380)
[ 0.000000] random: get_random_bytes called from
print_oops_end_marker+0x2c/0x58 with crng_init=0
[ 0.000000] ---[ end trace 0000000000000000 ]---
[ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task!
ref:
https://lavalab.nxp.com/scheduler/job/23596#L603
--
Linaro LKFT
https://lkft.linaro.org