Hi Wensong,
0day kernel testing robot got the below dmesg.
+-------------------------------------------------------+----+
| boot_successes | 26 |
| boot_failures | 4 |
| BUG:unable_to_handle_kernel_NULL_pointer_dereference | 4 |
| Oops | 4 |
| EIP_is_at_ip_vs_stop_estimator | 4 |
| Kernel_panic-not_syncing:Fatal_exception_in_interrupt | 4 |
| backtrace:cleanup_net | 4 |
+-------------------------------------------------------+----+
[child0:2725] process_vm_readv (347) returned ENOSYS, marking as inactive.
[child0:2725] uid changed! Was: 0, now -788547075
Bailing main loop. Exit reason: UID changed.
[ 12.182233] BUG: unable to handle kernel NULL pointer dereference at 00000004
[ 12.183011] IP: [<4c2f6567>] ip_vs_stop_estimator+0x20/0x3e
[ 12.183011] *pdpt = 0000000000000000 *pde = f000ff53f000ff53 [ 12.183011] Oops: 0002 [#1] DEBUG_PAGEALLOC
[ 12.183011] Modules linked in:
[ 12.183011] CPU: 0 PID: 57 Comm: kworker/u2:1 Not tainted 3.15.0-rc8 #1
[ 12.183011] Workqueue: netns cleanup_net
[ 12.183011] task: 528773f0 ti: 52878000 task.ti: 52878000
[ 12.183011] EIP: 0060:[<4c2f6567>] EFLAGS: 00010206 CPU: 0
[ 12.183011] EIP is at ip_vs_stop_estimator+0x20/0x3e
[ 12.183011] EAX: 00000000 EBX: 51c39a54 ECX: 00000000 EDX: 00000000
[ 12.183011] ESI: 51c39bf0 EDI: 51c399cc EBP: 52879e70 ESP: 52879e68
[ 12.183011] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
[ 12.183011] CR0: 8005003b CR2: 00000004 CR3: 12fbe000 CR4: 000006b0
[ 12.183011] DR0: 080cb000 DR1: c0100220 DR2: 00000000 DR3: 00000000
[ 12.183011] DR6: ffff0ff0 DR7: 00000600
[ 12.183011] Stack:
[ 12.183011] 51c39800 51c3d340 52879e88 4c2f3400 51c39b14 51c3d340 4c7630bc 52879eb8
[ 12.183011] 52879e94 4c2ef3b2 51c3d340 52879ea8 4c2b7509 4c7630bc 52879eb8 00000000
[ 12.183011] 52879ecc 4c2b7c5b 51c3d370 51c3d370 51c3d378 51c3d378 52817b80 00000000
[ 12.183011] Call Trace:
[ 12.183011] [<4c2f3400>] ip_vs_control_net_cleanup+0x8b/0x9c
[ 12.183011] [<4c2ef3b2>] __ip_vs_cleanup+0x27/0x3b
[ 12.183011] [<4c2b7509>] ops_exit_list+0x2e/0x3b
[ 12.183011] [<4c2b7c5b>] cleanup_net+0xbf/0x138
[ 12.183011] [<4be33e6a>] process_one_work+0x1c5/0x2dc
[ 12.183011] [<4be33de7>] ? process_one_work+0x142/0x2dc
[ 12.183011] [<4be348cc>] worker_thread+0x126/0x1c3
[ 12.183011] [<4be347a6>] ? rescuer_thread+0x203/0x203
[ 12.183011] [<4be37e01>] kthread+0x86/0x8b
[ 12.183011] [<4c41b380>] ret_from_kernel_thread+0x20/0x30
[ 12.183011] [<4be37d7b>] ? init_completion+0x1e/0x1e
[ 12.183011] Code: e8 0b 40 12 00 5b 5b 5e 5f 5d c3 55 89 e5 56 53 8b b0 38 09 00 00 89 d3 81 c6 f0 03 00 00 89 f0 e8 f2 3d 12 00 8b 43 34 8b 53 30 <89> 42 04 89 10 89 f0 c7 43 30 00 01 10 00 c7 43 34 00 02 20 00
[ 12.183011] EIP: [<4c2f6567>] ip_vs_stop_estimator+0x20/0x3e SS:ESP 0068:52879e68
[ 12.183011] CR2: 0000000000000004
[ 12.183011] ---[ end trace ac697d9f45d2a83f ]---
[ 12.183011] Kernel panic - not syncing: Fatal exception in interrupt
git bisect bad fad01e866afdbe01a1f3ec06a39c3a8b9e197014 # 15:45 0- 5 Linux 3.15-rc8
git bisect bad fad01e866afdbe01a1f3ec06a39c3a8b9e197014 # 15:45 0- 5 Linux 3.15-rc8
git bisect bad 455c6fdbd219161bd09b1165f11699d6d73de11c # 15:50 18- 1 Linux 3.14
git bisect bad d8ec26d7f8287f5788a494f56e8814210f0e64be # 15:53 3- 1 Linux 3.13
git bisect bad 5e01dc7b26d9f24f39abace5da98ccbd6a5ceb52 # 16:04 18- 1 Linux 3.12
git bisect bad 6e4664525b1db28f8c4e1130957f70a94c19213e # 16:10 1- 1 Linux 3.11
git bisect bad 8bb495e3f02401ee6f76d1b1d77f3ac9f079e376 # 16:13 5- 2 Linux 3.10
git bisect bad c1be5a5b1b355d40e6cf79cc979eb66dafa24ad1 # 16:25 37- 6 Linux 3.9
git bisect bad 19f949f52599ba7c3f67a5897ac6be14bfcb1200 # 16:27 28- 32 Linux 3.8
git bisect bad 29594404d7fe73cd80eaa4ee8c43dcc53970c60e # 16:35 41- 39 Linux 3.7
git bisect bad a0d271cbfed1dd50278c6b06bead3d00ba0a88f9 # 16:41 17- 19 Linux 3.6
git bisect bad 28a33cbc24e4256c143dce96c7d93bf423229f92 # 16:45 149- 104 Linux 3.5
git bisect bad 76e10d158efb6d4516018846f60c2ab5501900bc # 16:48 20- 21 Linux 3.4
git bisect bad c16fa4f2ad19908a47c63d8fa436a1178438c7e7 # 16:50 13- 14 Linux 3.3
git bisect bad 805a6af8dba5dfdd35ec35dc52ec0122400b2610 # 16:53 7- 8 Linux 3.2
git bisect bad c3b92c8787367a8bb53d57d9789b558f1295cc96 # 16:57 17- 18 Linux 3.1
git bisect bad 02f8c6aee8df3cdc935e9bdd4f2d020306035dbe # 17:03 2- 3 Linux 3.0
This script may reproduce the error.
-----------------------------------------------------------------------------
#!/bin/bash
kernel=$1
kvm=(
qemu-system-x86_64 -cpu kvm64 -enable-kvm
-kernel $kernel
-smp 2
-m 256M
-net nic,vlan=0,macaddr=00:00:00:00:00:00,model=virtio
-net user,vlan=0
-net nic,vlan=1,model=e1000
-net user,vlan=1
-boot order=nc
-no-reboot
-watchdog i6300esb
-serial stdio
-display none
-monitor null
)
append=(
debug
sched_debug
apic=debug
ignore_loglevel
sysrq_always_enabled
panic=10
prompt_ramdisk=0
earlyprintk=ttyS0,115200
console=ttyS0,115200
console=tty0
vga=normal
root=/dev/ram0
rw
)
"${kvm[@]}" --append "${append[*]}"
-----------------------------------------------------------------------------
Thanks,
Jet
Hello,
On Wed, 11 Jun 2014, Jet Chen wrote:
> Hi Wensong,
>
> 0day kernel testing robot got the below dmesg.
>
> +-------------------------------------------------------+----+
> | boot_successes | 26 |
> | boot_failures | 4 |
> | BUG:unable_to_handle_kernel_NULL_pointer_dereference | 4 |
> | Oops | 4 |
> | EIP_is_at_ip_vs_stop_estimator | 4 |
> | Kernel_panic-not_syncing:Fatal_exception_in_interrupt | 4 |
> | backtrace:cleanup_net | 4 |
> +-------------------------------------------------------+----+
>
>
> [child0:2725] process_vm_readv (347) returned ENOSYS, marking as inactive.
> [child0:2725] uid changed! Was: 0, now -788547075
> Bailing main loop. Exit reason: UID changed.
> [ 12.182233] BUG: unable to handle kernel NULL pointer dereference at 00000004
> [ 12.183011] IP: [<4c2f6567>] ip_vs_stop_estimator+0x20/0x3e
> [ 12.183011] *pdpt = 0000000000000000 *pde = f000ff53f000ff53 [ 12.183011] Oops: 0002 [#1] DEBUG_PAGEALLOC
> [ 12.183011] Modules linked in:
> [ 12.183011] CPU: 0 PID: 57 Comm: kworker/u2:1 Not tainted 3.15.0-rc8 #1
> [ 12.183011] Workqueue: netns cleanup_net
> [ 12.183011] task: 528773f0 ti: 52878000 task.ti: 52878000
> [ 12.183011] EIP: 0060:[<4c2f6567>] EFLAGS: 00010206 CPU: 0
> [ 12.183011] EIP is at ip_vs_stop_estimator+0x20/0x3e
> [ 12.183011] EAX: 00000000 EBX: 51c39a54 ECX: 00000000 EDX: 00000000
ip_vs_stop_estimator fails at list_del(&est->list)
on mov %eax,0x4(%edx) instruction and EDX is 0. It means,
this estimator was never started (initialized with
INIT_LIST_HEAD in ip_vs_start_estimator) or stopped
before with the same list_del.
At first look, it is strange but I think the reason
is the missing CONFIG_SYSCTL. ip_vs_control_net_cleanup
fails at ip_vs_stop_estimator(net, &ipvs->tot_stats)
because it is called not depending on CONFIG_SYSCTL but
without CONFIG_SYSCTL ip_vs_start_estimator was never
called.
Can you test such patch?
ipvs: stop tot_stats estimator only under CONFIG_SYSCTL
The tot_stats estimator is started only when CONFIG_SYSCTL
is defined. But it is stopped without checking CONFIG_SYSCTL.
Fix the crash by moving ip_vs_stop_estimator into
ip_vs_control_net_cleanup_sysctl.
Signed-off-by: Julian Anastasov <[email protected]>
---
net/netfilter/ipvs/ip_vs_ctl.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c42e83d..581a658 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3778,6 +3778,7 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
cancel_delayed_work_sync(&ipvs->defense_work);
cancel_work_sync(&ipvs->defense_work.work);
unregister_net_sysctl_table(ipvs->sysctl_hdr);
+ ip_vs_stop_estimator(net, &ipvs->tot_stats);
}
#else
@@ -3840,7 +3841,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_trash_cleanup(net);
- ip_vs_stop_estimator(net, &ipvs->tot_stats);
ip_vs_control_net_cleanup_sysctl(net);
remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
remove_proc_entry("ip_vs_stats", net->proc_net);
--
1.9.0
> [ 12.183011] ESI: 51c39bf0 EDI: 51c399cc EBP: 52879e70 ESP: 52879e68
> [ 12.183011] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
> [ 12.183011] CR0: 8005003b CR2: 00000004 CR3: 12fbe000 CR4: 000006b0
> [ 12.183011] DR0: 080cb000 DR1: c0100220 DR2: 00000000 DR3: 00000000
> [ 12.183011] DR6: ffff0ff0 DR7: 00000600
> [ 12.183011] Stack:
> [ 12.183011] 51c39800 51c3d340 52879e88 4c2f3400 51c39b14 51c3d340 4c7630bc 52879eb8
> [ 12.183011] 52879e94 4c2ef3b2 51c3d340 52879ea8 4c2b7509 4c7630bc 52879eb8 00000000
> [ 12.183011] 52879ecc 4c2b7c5b 51c3d370 51c3d370 51c3d378 51c3d378 52817b80 00000000
> [ 12.183011] Call Trace:
> [ 12.183011] [<4c2f3400>] ip_vs_control_net_cleanup+0x8b/0x9c
> [ 12.183011] [<4c2ef3b2>] __ip_vs_cleanup+0x27/0x3b
> [ 12.183011] [<4c2b7509>] ops_exit_list+0x2e/0x3b
> [ 12.183011] [<4c2b7c5b>] cleanup_net+0xbf/0x138
> [ 12.183011] [<4be33e6a>] process_one_work+0x1c5/0x2dc
> [ 12.183011] [<4be33de7>] ? process_one_work+0x142/0x2dc
> [ 12.183011] [<4be348cc>] worker_thread+0x126/0x1c3
> [ 12.183011] [<4be347a6>] ? rescuer_thread+0x203/0x203
> [ 12.183011] [<4be37e01>] kthread+0x86/0x8b
> [ 12.183011] [<4c41b380>] ret_from_kernel_thread+0x20/0x30
> [ 12.183011] [<4be37d7b>] ? init_completion+0x1e/0x1e
> [ 12.183011] Code: e8 0b 40 12 00 5b 5b 5e 5f 5d c3 55 89 e5 56 53 8b b0 38 09 00 00 89 d3 81 c6 f0 03 00 00 89 f0 e8 f2 3d 12 00 8b 43 34 8b 53 30 <89> 42 04 89 10 89 f0 c7 43 30 00 01 10 00 c7 43 34 00 02 20 00
> [ 12.183011] EIP: [<4c2f6567>] ip_vs_stop_estimator+0x20/0x3e SS:ESP 0068:52879e68
> [ 12.183011] CR2: 0000000000000004
> [ 12.183011] ---[ end trace ac697d9f45d2a83f ]---
> [ 12.183011] Kernel panic - not syncing: Fatal exception in interrupt
Regards
--
Julian Anastasov <[email protected]>
On 06/11/2014 01:59 PM, Julian Anastasov wrote:
>
> Hello,
>
> On Wed, 11 Jun 2014, Jet Chen wrote:
>
>> Hi Wensong,
>>
>> 0day kernel testing robot got the below dmesg.
>>
>> +-------------------------------------------------------+----+
>> | boot_successes | 26 |
>> | boot_failures | 4 |
>> | BUG:unable_to_handle_kernel_NULL_pointer_dereference | 4 |
>> | Oops | 4 |
>> | EIP_is_at_ip_vs_stop_estimator | 4 |
>> | Kernel_panic-not_syncing:Fatal_exception_in_interrupt | 4 |
>> | backtrace:cleanup_net | 4 |
>> +-------------------------------------------------------+----+
>>
>>
>> [child0:2725] process_vm_readv (347) returned ENOSYS, marking as inactive.
>> [child0:2725] uid changed! Was: 0, now -788547075
>> Bailing main loop. Exit reason: UID changed.
>> [ 12.182233] BUG: unable to handle kernel NULL pointer dereference at 00000004
>> [ 12.183011] IP: [<4c2f6567>] ip_vs_stop_estimator+0x20/0x3e
>> [ 12.183011] *pdpt = 0000000000000000 *pde = f000ff53f000ff53 [ 12.183011] Oops: 0002 [#1] DEBUG_PAGEALLOC
>> [ 12.183011] Modules linked in:
>> [ 12.183011] CPU: 0 PID: 57 Comm: kworker/u2:1 Not tainted 3.15.0-rc8 #1
>> [ 12.183011] Workqueue: netns cleanup_net
>> [ 12.183011] task: 528773f0 ti: 52878000 task.ti: 52878000
>> [ 12.183011] EIP: 0060:[<4c2f6567>] EFLAGS: 00010206 CPU: 0
>> [ 12.183011] EIP is at ip_vs_stop_estimator+0x20/0x3e
>> [ 12.183011] EAX: 00000000 EBX: 51c39a54 ECX: 00000000 EDX: 00000000
>
> ip_vs_stop_estimator fails at list_del(&est->list)
> on mov %eax,0x4(%edx) instruction and EDX is 0. It means,
> this estimator was never started (initialized with
> INIT_LIST_HEAD in ip_vs_start_estimator) or stopped
> before with the same list_del.
>
> At first look, it is strange but I think the reason
> is the missing CONFIG_SYSCTL. ip_vs_control_net_cleanup
> fails at ip_vs_stop_estimator(net, &ipvs->tot_stats)
> because it is called not depending on CONFIG_SYSCTL but
> without CONFIG_SYSCTL ip_vs_start_estimator was never
> called.
>
> Can you test such patch?
Julian, your patch works. Thanks.
Tested-by: Jet Chen <[email protected]>
>
> ipvs: stop tot_stats estimator only under CONFIG_SYSCTL
>
> The tot_stats estimator is started only when CONFIG_SYSCTL
> is defined. But it is stopped without checking CONFIG_SYSCTL.
> Fix the crash by moving ip_vs_stop_estimator into
> ip_vs_control_net_cleanup_sysctl.
>
> Signed-off-by: Julian Anastasov <[email protected]>
> ---
> net/netfilter/ipvs/ip_vs_ctl.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> index c42e83d..581a658 100644
> --- a/net/netfilter/ipvs/ip_vs_ctl.c
> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
> @@ -3778,6 +3778,7 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
> cancel_delayed_work_sync(&ipvs->defense_work);
> cancel_work_sync(&ipvs->defense_work.work);
> unregister_net_sysctl_table(ipvs->sysctl_hdr);
> + ip_vs_stop_estimator(net, &ipvs->tot_stats);
> }
>
> #else
> @@ -3840,7 +3841,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
> struct netns_ipvs *ipvs = net_ipvs(net);
>
> ip_vs_trash_cleanup(net);
> - ip_vs_stop_estimator(net, &ipvs->tot_stats);
> ip_vs_control_net_cleanup_sysctl(net);
> remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
> remove_proc_entry("ip_vs_stats", net->proc_net);
>
On Wed, Jun 11, 2014 at 04:34:19PM +0800, Jet Chen wrote:
> On 06/11/2014 01:59 PM, Julian Anastasov wrote:
> >
> > Hello,
> >
> > On Wed, 11 Jun 2014, Jet Chen wrote:
> >
> >> Hi Wensong,
> >>
> >> 0day kernel testing robot got the below dmesg.
> >>
> >> +-------------------------------------------------------+----+
> >> | boot_successes | 26 |
> >> | boot_failures | 4 |
> >> | BUG:unable_to_handle_kernel_NULL_pointer_dereference | 4 |
> >> | Oops | 4 |
> >> | EIP_is_at_ip_vs_stop_estimator | 4 |
> >> | Kernel_panic-not_syncing:Fatal_exception_in_interrupt | 4 |
> >> | backtrace:cleanup_net | 4 |
> >> +-------------------------------------------------------+----+
> >>
> >>
> >> [child0:2725] process_vm_readv (347) returned ENOSYS, marking as inactive.
> >> [child0:2725] uid changed! Was: 0, now -788547075
> >> Bailing main loop. Exit reason: UID changed.
> >> [ 12.182233] BUG: unable to handle kernel NULL pointer dereference at 00000004
> >> [ 12.183011] IP: [<4c2f6567>] ip_vs_stop_estimator+0x20/0x3e
> >> [ 12.183011] *pdpt = 0000000000000000 *pde = f000ff53f000ff53 [ 12.183011] Oops: 0002 [#1] DEBUG_PAGEALLOC
> >> [ 12.183011] Modules linked in:
> >> [ 12.183011] CPU: 0 PID: 57 Comm: kworker/u2:1 Not tainted 3.15.0-rc8 #1
> >> [ 12.183011] Workqueue: netns cleanup_net
> >> [ 12.183011] task: 528773f0 ti: 52878000 task.ti: 52878000
> >> [ 12.183011] EIP: 0060:[<4c2f6567>] EFLAGS: 00010206 CPU: 0
> >> [ 12.183011] EIP is at ip_vs_stop_estimator+0x20/0x3e
> >> [ 12.183011] EAX: 00000000 EBX: 51c39a54 ECX: 00000000 EDX: 00000000
> >
> > ip_vs_stop_estimator fails at list_del(&est->list)
> > on mov %eax,0x4(%edx) instruction and EDX is 0. It means,
> > this estimator was never started (initialized with
> > INIT_LIST_HEAD in ip_vs_start_estimator) or stopped
> > before with the same list_del.
> >
> > At first look, it is strange but I think the reason
> > is the missing CONFIG_SYSCTL. ip_vs_control_net_cleanup
> > fails at ip_vs_stop_estimator(net, &ipvs->tot_stats)
> > because it is called not depending on CONFIG_SYSCTL but
> > without CONFIG_SYSCTL ip_vs_start_estimator was never
> > called.
> >
> > Can you test such patch?
>
> Julian, your patch works. Thanks.
>
> Tested-by: Jet Chen <[email protected]>
Thanks, Julian, should I take this one?
I'm assuming this problem has been present for quite a number of releases.
> > ipvs: stop tot_stats estimator only under CONFIG_SYSCTL
> >
> > The tot_stats estimator is started only when CONFIG_SYSCTL
> > is defined. But it is stopped without checking CONFIG_SYSCTL.
> > Fix the crash by moving ip_vs_stop_estimator into
> > ip_vs_control_net_cleanup_sysctl.
> >
> > Signed-off-by: Julian Anastasov <[email protected]>
> > ---
> > net/netfilter/ipvs/ip_vs_ctl.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> > index c42e83d..581a658 100644
> > --- a/net/netfilter/ipvs/ip_vs_ctl.c
> > +++ b/net/netfilter/ipvs/ip_vs_ctl.c
> > @@ -3778,6 +3778,7 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
> > cancel_delayed_work_sync(&ipvs->defense_work);
> > cancel_work_sync(&ipvs->defense_work.work);
> > unregister_net_sysctl_table(ipvs->sysctl_hdr);
> > + ip_vs_stop_estimator(net, &ipvs->tot_stats);
> > }
> >
> > #else
> > @@ -3840,7 +3841,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
> > struct netns_ipvs *ipvs = net_ipvs(net);
> >
> > ip_vs_trash_cleanup(net);
> > - ip_vs_stop_estimator(net, &ipvs->tot_stats);
> > ip_vs_control_net_cleanup_sysctl(net);
> > remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
> > remove_proc_entry("ip_vs_stats", net->proc_net);
> >
>
Hello,
On Thu, 12 Jun 2014, Simon Horman wrote:
> Thanks, Julian, should I take this one?
> I'm assuming this problem has been present for quite a number of releases.
I'll post new version with extended comments.
Regards
--
Julian Anastasov <[email protected]>
Hello,
On Wed, 11 Jun 2014, Jet Chen wrote:
> On 06/11/2014 01:59 PM, Julian Anastasov wrote:
> >
> > At first look, it is strange but I think the reason
> > is the missing CONFIG_SYSCTL. ip_vs_control_net_cleanup
> > fails at ip_vs_stop_estimator(net, &ipvs->tot_stats)
> > because it is called not depending on CONFIG_SYSCTL but
> > without CONFIG_SYSCTL ip_vs_start_estimator was never
> > called.
> >
> > Can you test such patch?
>
> Julian, your patch works. Thanks.
>
> Tested-by: Jet Chen <[email protected]>
Thanks for the confirmation!
Regards
--
Julian Anastasov <[email protected]>