Greetings,
Todays mainline kernel has Oops messages for memory hot-unplug
operation.
Machine: Power 8 bare-metal
Kernel: 4.15.0
Config: attached
gcc: 4.8.5
Test: Memory hot-unplug
echo offline > /sys/devices/system/memory/memory<x>/state
the above command triggered 2 kernel Oops messages and the bad address
from first Oops maps to:
# gdb -batch vmlinux -ex 'list *(0xc000000000a15a18)'
0xc000000000a15a18 is in _raw_spin_lock
(./arch/powerpc/include/asm/spinlock.h:82).
77 */
78 static inline unsigned long __arch_spin_trylock(arch_spinlock_t
*lock)
79 {
80 unsigned long tmp, token;
81
82 token = LOCK_TOKEN;
83 __asm__ __volatile__(
84 "1: " PPC_LWARX(%0,0,%2,1) "\n\
85 cmpwi 0,%0,0\n\
86 bne- 2f\n\
and the second Oops with bad address maps to:
# gdb -batch vmlinux -ex 'list *(0xc00000000029f7c8)'
0xc00000000029f7c8 is in page_vma_mapped_walk
(./arch/powerpc/include/asm/book3s/64/pgtable.h:571).
566 }
567 #endif /* CONFIG_NUMA_BALANCING */
568
569 static inline int pte_present(pte_t pte)
570 {
571 return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT));
572 }
573
574 #ifdef CONFIG_PPC_MEM_KEYS
575 extern bool arch_pte_access_permitted(u64 pte, bool write, bool
execute);
traces logs:
Offlined Pages 4096
Offlined Pages 4096
Offlined Pages 4096
Unable to handle kernel paging request for data at address 0xf000400000000030
Faulting instruction address: 0xc000000000a15a18
Oops: Kernel access of bad area, sig: 11 [#1]
LE SMP NR_CPUS=2048 NUMA PowerNV
Modules linked in: iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4
iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4
xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp tun bridge
stp llc kvm_hv kvm iptable_filter vmx_crypto ipmi_powernv ipmi_devintf
ipmi_msghandler powernv_rng leds_powernv led_class powernv_op_panel
rng_core nfsd binfmt_misc ip_tables x_tables autofs4
CPU: 2 PID: 50585 Comm: stress Not tainted 4.15.0-11704-ga2e5790-dirty #1
NIP: c000000000a15a18 LR: c00000000028ea14 CTR: c000000000280ca0
REGS: c0000007f704f8a0 TRAP: 0300 Not tainted (4.15.0-11704-ga2e5790-dirty)
MSR: 900000000280b033 <SF,HV,VEC,VSX,EE,FP,ME,IR,DR,RI,LE> CR: 28824828 XER: 00000000
CFAR: c00000000000884c DAR: f000400000000030 DSISR: 40000000 SOFTE: 0
GPR00: c00000000028ea14 c0000007f704fb20 c0000000010e3300 f000400000000030
GPR04: 000000000020bc02 0000000002bc2000 0000000000000000 00000000000004b0
GPR08: c100000000000000 0008000000000040 0000000080000002 c000000000000000
GPR12: 0000000000004400 c00000000fd00c00 0000000110960000 0000000000000000
GPR16: c0000007f704c000 0000000000000000 c000000001281b70 c0000007a11ccf00
GPR20: 0000000000000000 0000000000000000 c1000000000004b0 fe7fffffffffefff
GPR24: c0000007e861a500 0000000000011096 f000400000000000 0000000000000000
GPR28: c0000007f1518880 c0000007f0720880 0000000110970000 f000400000000030
NIP [c000000000a15a18] _raw_spin_lock+0x28/0xc0
LR [c00000000028ea14] copy_page_range+0x604/0x1390
Call Trace:
[c0000007f704fb50] [c00000000028ea14] copy_page_range+0x604/0x1390
[c0000007f704fce0] [c0000000000ea84c] copy_process.isra.40.part.41+0xbdc/0x18b0
[c0000007f704fdc0] [c0000000000eb704] _do_fork+0xd4/0x4a0
[c0000007f704fe30] [c00000000000bbc8] ppc_clone+0x8/0xc
Instruction dump:
990d028c 4bffffc8 3c4c006d 3842d910 7c0802a6 fbe1fff8 7c7f1b78 f8010010
f821ffd1 39400000 994d028c 814d0008 <7d201829> 2c090000 40c20010 7d40192d
---[ end trace b21abd323ba17f9c ]---
Unable to handle kernel paging request for data at address 0xc1000000000004a8
Faulting instruction address: 0xc00000000029f7c8
Oops: Kernel access of bad area, sig: 11 [#2]
LE SMP NR_CPUS=2048 NUMA PowerNV
Modules linked in: iptable_mangle ipt_MASQUERADE nf_nat_masquerade_ipv4
iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4
xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp tun bridge
stp llc kvm_hv kvm iptable_filter vmx_crypto ipmi_powernv ipmi_devintf
ipmi_msghandler powernv_rng leds_powernv led_class powernv_op_panel
rng_core nfsd binfmt_misc ip_tables x_tables autofs4
CPU: 14 PID: 1025 Comm: kswapd0 Tainted: G D 4.15.0-11704-ga2e5790-dirty #1
NIP: c00000000029f7c8 LR: c00000000029f39c CTR: c0000000002a1170
REGS: c0000007f1d0f580 TRAP: 0380 Tainted: G D (4.15.0-11704-ga2e5790-dirty)
MSR: 9000000000009033 <SF,HV,EE,ME,IR,DR,RI,LE> CR: 28002084 XER: 00000000
CFAR: c00000000029f624 SOFTE: 0
GPR00: 00000007e8fad800 c0000007f1d0f800 c0000000010e3300 000000000000000a
GPR04: 0000000000000000 c0000007f0720880 0000000000000000 0000000000000000
GPR08: c1000000000004a8 00000000000004a8 c100000000000000 0000000000000000
GPR12: c0000000002a1170 c00000000fd05400 f000000001f61ca0 c0000007fffffe00
GPR16: f000000001f61c80 c0000007ffff6300 c0000007fc055808 0000000000000003
GPR20: c000000800006300 c0000007f1d0fa30 0000000000000001 0000000000000000
GPR24: c0000007a11ccf00 0000000000000000 0000000000000001 f000000001f61c80
GPR28: 0000000000000000 00000000f0720000 c000000001281b70 c0000007f1d0f890
NIP [c00000000029f7c8] page_vma_mapped_walk+0x6b8/0x960
LR [c00000000029f39c] page_vma_mapped_walk+0x28c/0x960
Call Trace:
[c0000007f1d0f800] [c0800007d8720105] 0xc0800007d8720105 (unreliable)
[c0000007f1d0f870] [c0000000002a122c] page_referenced_one+0xbc/0x250
[c0000007f1d0f900] [c0000000002a1a90] rmap_walk_file+0x160/0x340
[c0000007f1d0f970] [c0000000002a4604] page_referenced+0x184/0x200
[c0000007f1d0fa00] [c00000000025efac] shrink_active_list+0x24c/0x580
[c0000007f1d0fb00] [c00000000025f664] shrink_node_memcg+0x384/0x7d0
[c0000007f1d0fc00] [c00000000025fc0c] shrink_node+0x15c/0x3f0
[c0000007f1d0fcb0] [c0000000002611a0] kswapd+0x370/0x9a0
[c0000007f1d0fdc0] [c0000000001188cc] kthread+0x15c/0x1a0
[c0000007f1d0fe30] [c00000000000bca0] ret_from_kernel_thread+0x5c/0xbc
Instruction dump:
713e0002 41820088 2fbd0000 419efcdc 892d028c 2f890000 40de026c 7c2004ac
39200000 9138005c 38600000 4bfff9e4 <7d29502a> 7923cfe3 4082fea4 e87f0000
---[ end trace b21abd323ba17f9d ]---
--
Regard's
Abdul Haleem
IBM Linux Technology Centre