2014-02-15 12:17:49

by John

[permalink] [raw]
Subject: [BUG] unable to handle kernel NULL pointer dereference

When booting into linux v3.13.3, I am unable to mount an nfs share on this particular hardware. ?I get the same problem using v3.12.11. ?Only the 3.10.x series allows normal operation. ?Partial dmesg output shown inline, additional logs available upon request.

PLEASE cc me on my replies as I am not subscribed to lkml.

Hardware: Athlon XP 3200+ on an NVIDIA nForce2 Ultra 400 motherboard.
Distro: Arch Linux i686.

% dmesg
...
[ 137.616014] NFS: Registering the id_resolver key type
[ 137.616036] Key type id_resolver registered
[ 137.616038] Key type id_legacy registered
[ 137.686758] BUG: unable to handle kernel NULL pointer dereference at 00000858
[ 137.689996] IP: [<f8aa2d99>] put_pipe_version+0x19/0x60 [auth_rpcgss]
[ 137.689996] *pde = 00000000?
[ 137.689996] Oops: 0000 [#1] PREEMPT SMP?
[ 137.689996] Modules linked in: rpcsec_gss_krb5 auth_rpcgss oid_registry nfsv4 asb100 hwmon_vid snd_wavefront ir_mce_kbd_decoder ir_lirc_codec ir_rc5_sz_decoder ir_sony_decoder lirc_dev ir_rc5_decoder ir_jvc_decoder ir_sanyo_decoder ir_rc6_decoder ir_nec_decoder rc_streamzap streamzap mousedev snd_cs4236 rc_core snd_intel8x0 snd_wss_lib snd_opl3_lib snd_hwdep snd_ac97_codec evdev snd_mpu401 ac97_bus snd_mpu401_uart snd_pcm snd_rawmidi snd_page_alloc snd_seq_device snd_timer snd pcspkr skge shpchp i2c_nforce2 i2c_core soundcore ns558 gameport processor button nvidia_agp agpgart nfs lockd sunrpc fscache ext4 crc16 mbcache jbd2 hid_generic usbhid hid sr_mod cdrom sd_mod ata_generic pata_acpi sata_sil pata_amd libata ehci_pci ohci_pci ohci_hcd ehci_hcd scsi_mod usbcore usb_common
[ 137.689996] CPU: 0 PID: 534 Comm: rpc.gssd Not tainted 3.13.3-1-ARCH #1
[ 137.689996] Hardware name: ASUSTeK Computer INC. A7N8X-E/A7N8X-E, BIOS ASUS A7N8X-E Deluxe ACPI BIOS Rev 1013 11/12/2004
[ 137.689996] task: f4633210 ti: f568e000 task.ti: f568e000
[ 137.689996] EIP: 0060:[<f8aa2d99>] EFLAGS: 00010202 CPU: 0
[ 137.689996] EIP is at put_pipe_version+0x19/0x60 [auth_rpcgss]
[ 137.689996] EAX: f4633210 EBX: 00000001 ECX: f56efca8 EDX: 00000296
[ 137.689996] ESI: 00000000 EDI: f56efc00 EBP: f568fee8 ESP: f568fee0
[ 137.689996] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
[ 137.689996] CR0: 8005003b CR2: 00000858 CR3: 34523000 CR4: 000007d0
[ 137.689996] Stack:
[ 137.689996] f56efc00 f6c64f78 f568fef4 f8aa2e05 00000010 f568ff40 f8aa3b38 00000374
[ 137.689996] 00000080 b74dde40 f4644a80 f568ff30 00000246 f8ac1080 ffff41c9 f6c64f78
[ 137.689996] fffffff3 00000010 f4460140 f44d5820 f44d5810 f53df7ec f57595a0 f8aa93e8
[ 137.689996] Call Trace:
[ 137.689996] [<f8aa2e05>] gss_release_msg+0x25/0x70 [auth_rpcgss]
[ 137.689996] [<f8aa3b38>] gss_pipe_downcall+0x208/0x4b0 [auth_rpcgss]
[ 137.689996] [<f8a2f9ab>] rpc_pipe_write+0x3b/0x60 [sunrpc]
[ 137.689996] [<f8a2f970>] ? rpc_pipe_poll+0x90/0x90 [sunrpc]
[ 137.689996] [<c1156bd5>] vfs_write+0x95/0x1c0
[ 137.689996] [<c11572a1>] SyS_write+0x51/0x90
[ 137.689996] [<c145cc0d>] sysenter_do_call+0x12/0x28
[ 137.689996] Code: f8 e8 4f b8 9a c8 31 c0 eb c6 90 8d b4 26 00 00 00 00 55 89 e5 56 53 3e 8d 74 26 00 8b 1d 28 e9 a3 f8 89 c6 e8 59 64 5f c8 85 db <8b> 86 58 08 00 00 74 3a 3b 18 77 36 8b 5c 98 08 e8 32 66 5f c8
[ 137.689996] EIP: [<f8aa2d99>] put_pipe_version+0x19/0x60 [auth_rpcgss] SS:ESP 0068:f568fee0
[ 137.689996] CR2: 0000000000000858
[ 138.578433] ---[ end trace 3dcb8d5c35b64fbd ]---
[ 142.979263] type=1006 audit(1392415950.632:4): pid=540 uid=0 old auid=4294967295 new auid=1000 old ses=4294967295 new ses=3 res=1


2014-02-15 20:08:40

by John

[permalink] [raw]
Subject: Re: [BUG] unable to handle kernel NULL pointer dereference

> When booting into linux v3.13.3, I am unable to mount an nfs share on this?

> particular hardware. ?I get the same problem using v3.12.11. ?Only the 3.10.x
> series allows normal operation. ?Partial dmesg output shown inline, additional
> logs available upon request.
>
> PLEASE cc me on my replies as I am not subscribed to lkml.
>
> Hardware: Athlon XP 3200+ on an NVIDIA nForce2 Ultra 400 motherboard.
> Distro: Arch Linux i686.
>
> % dmesg
> ...
> [ 137.616014] NFS: Registering the id_resolver key type
> [ 137.616036] Key type id_resolver registered
> [ 137.616038] Key type id_legacy registered
> [ 137.686758] BUG: unable to handle kernel NULL pointer dereference at 00000858
> [ 137.689996] IP: [<f8aa2d99>] put_pipe_version+0x19/0x60 [auth_rpcgss]
> [ 137.689996] *pde = 00000000?
> [ 137.689996] Oops: 0000 [#1] PREEMPT SMP?
> [ 137.689996] Modules linked in: rpcsec_gss_krb5 auth_rpcgss oid_registry nfsv4
> asb100 hwmon_vid snd_wavefront ir_mce_kbd_decoder ir_lirc_codec
> ir_rc5_sz_decoder ir_sony_decoder lirc_dev ir_rc5_decoder ir_jvc_decoder
> ir_sanyo_decoder ir_rc6_decoder ir_nec_decoder rc_streamzap streamzap mousedev
> snd_cs4236 rc_core snd_intel8x0 snd_wss_lib snd_opl3_lib snd_hwdep
> snd_ac97_codec evdev snd_mpu401 ac97_bus snd_mpu401_uart snd_pcm snd_rawmidi
> snd_page_alloc snd_seq_device snd_timer snd pcspkr skge shpchp i2c_nforce2
> i2c_core soundcore ns558 gameport processor button nvidia_agp agpgart nfs lockd
> sunrpc fscache ext4 crc16 mbcache jbd2 hid_generic usbhid hid sr_mod cdrom
> sd_mod ata_generic pata_acpi sata_sil pata_amd libata ehci_pci ohci_pci ohci_hcd
> ehci_hcd scsi_mod usbcore usb_common
> [ 137.689996] CPU: 0 PID: 534 Comm: rpc.gssd Not tainted 3.13.3-1-ARCH #1
> [ 137.689996] Hardware name: ASUSTeK Computer INC. A7N8X-E/A7N8X-E, BIOS ASUS
> A7N8X-E Deluxe ACPI BIOS Rev 1013 11/12/2004
> [ 137.689996] task: f4633210 ti: f568e000 task.ti: f568e000
> [ 137.689996] EIP: 0060:[<f8aa2d99>] EFLAGS: 00010202 CPU: 0
> [ 137.689996] EIP is at put_pipe_version+0x19/0x60 [auth_rpcgss]
> [ 137.689996] EAX: f4633210 EBX: 00000001 ECX: f56efca8 EDX: 00000296
> [ 137.689996] ESI: 00000000 EDI: f56efc00 EBP: f568fee8 ESP: f568fee0
> [ 137.689996] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
> [ 137.689996] CR0: 8005003b CR2: 00000858 CR3: 34523000 CR4: 000007d0
> [ 137.689996] Stack:
> [ 137.689996] f56efc00 f6c64f78 f568fef4 f8aa2e05 00000010 f568ff40 f8aa3b38
> 00000374
> [ 137.689996] 00000080 b74dde40 f4644a80 f568ff30 00000246 f8ac1080 ffff41c9
> f6c64f78
> [ 137.689996] fffffff3 00000010 f4460140 f44d5820 f44d5810 f53df7ec f57595a0
> f8aa93e8
> [ 137.689996] Call Trace:
> [ 137.689996] [<f8aa2e05>] gss_release_msg+0x25/0x70 [auth_rpcgss]
> [ 137.689996] [<f8aa3b38>] gss_pipe_downcall+0x208/0x4b0 [auth_rpcgss]
> [ 137.689996] [<f8a2f9ab>] rpc_pipe_write+0x3b/0x60 [sunrpc]
> [ 137.689996] [<f8a2f970>] ? rpc_pipe_poll+0x90/0x90 [sunrpc]
> [ 137.689996] [<c1156bd5>] vfs_write+0x95/0x1c0
> [ 137.689996] [<c11572a1>] SyS_write+0x51/0x90
> [ 137.689996] [<c145cc0d>] sysenter_do_call+0x12/0x28
> [ 137.689996] Code: f8 e8 4f b8 9a c8 31 c0 eb c6 90 8d b4 26 00 00 00 00 55 89
> e5 56 53 3e 8d 74 26 00 8b 1d 28 e9 a3 f8 89 c6 e8 59 64 5f c8 85 db <8b>
> 86 58 08 00 00 74 3a 3b 18 77 36 8b 5c 98 08 e8 32 66 5f c8
> [ 137.689996] EIP: [<f8aa2d99>] put_pipe_version+0x19/0x60 [auth_rpcgss]
> SS:ESP 0068:f568fee0
> [ 137.689996] CR2: 0000000000000858
> [ 138.578433] ---[ end trace 3dcb8d5c35b64fbd ]---
> [ 142.979263] type=1006 audit(1392415950.632:4): pid=540 uid=0 old
> auid=4294967295 new auid=1000 old ses=4294967295 new ses=3 res=1


I should add that if I test the same kernel version (v3.13.3 compiled for i686) on a similar machine of the same vintage, there is not a problem. ?When I looked into the `lspci -v` output on the machine that has the problems, I found that it seems to be related to the skge driver as shown below; the similar machine that does not have the problem is using the forcedeth driver so I am hypothesizing that the error is with the skge driver.

01:04.0 Ethernet controller: Marvell Technology Group Ltd. 88E8001 Gigabit Ethernet Controller (rev 13)
? ? ? ? Subsystem: ASUSTeK Computer Inc. Marvell 88E8001 Gigabit Ethernet Controller (Asus)
? ? ? ? Flags: bus master, 66MHz, medium devsel, latency 32, IRQ 17
? ? ? ? Memory at d5000000 (32-bit, non-prefetchable) [size=16K]
? ? ? ? I/O ports at a000 [size=256]
? ? ? ? [virtual] Expansion ROM at 80080000 [disabled] [size=128K]
? ? ? ? Capabilities: [48] Power Management version 2
? ? ? ? Capabilities: [50] Vital Product Data
? ? ? ? Kernel driver in use: skge
? ? ? ? Kernel modules: skge

2014-02-15 20:30:21

by Borislav Petkov

[permalink] [raw]
Subject: Re: [BUG] unable to handle kernel NULL pointer dereference

If I'd have to guess, that's trying to rcu deref that struct net_generic
*ng in net_generic() but this is only guesswork as I don't have your
.config.

Anyway, adding some more people to CC.

[ 137.689996] Code: f8 e8 4f b8 9a c8 31 c0 eb c6 90 8d b4 26 00 00 00 00 55 89 e5 56 53 3e 8d 74 26 00 8b 1d 28 e9 a3 f8 89 c6 e8 59 64 5f c8 85 db <8b> 86 58 08 00 00 74 3a 3b 18 77 36 8b 5c 98 08 e8 32 66 5f c8
All code
========
0: f8 clc
1: e8 4f b8 9a c8 call 0xc89ab855
6: 31 c0 xor %eax,%eax
8: eb c6 jmp 0xffffffd0
a: 90 nop
b: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi
12: 55 push %ebp
13: 89 e5 mov %esp,%ebp
15: 56 push %esi
16: 53 push %ebx
17: 3e 8d 74 26 00 lea %ds:0x0(%esi,%eiz,1),%esi
1c: 8b 1d 28 e9 a3 f8 mov 0xf8a3e928,%ebx
22: 89 c6 mov %eax,%esi
24: e8 59 64 5f c8 call 0xc85f6482
29: 85 db test %ebx,%ebx
2b:* 8b 86 58 08 00 00 mov 0x858(%esi),%eax <-- trapping instruction
31: 74 3a je 0x6d
33: 3b 18 cmp (%eax),%ebx
35: 77 36 ja 0x6d
37: 8b 5c 98 08 mov 0x8(%eax,%ebx,4),%ebx
3b: e8 32 66 5f c8 call 0xc85f6672

Code starting with the faulting instruction
===========================================
0: 8b 86 58 08 00 00 mov 0x858(%esi),%eax
6: 74 3a je 0x42
8: 3b 18 cmp (%eax),%ebx
a: 77 36 ja 0x42
c: 8b 5c 98 08 mov 0x8(%eax,%ebx,4),%ebx
10: e8 32 66 5f c8 call 0xc85f6647


On Sat, Feb 15, 2014 at 12:08:37PM -0800, John wrote:
> > When booting into linux v3.13.3, I am unable to mount an nfs share on this 
>
> > particular hardware.  I get the same problem using v3.12.11.  Only the 3.10.x
> > series allows normal operation.  Partial dmesg output shown inline, additional
> > logs available upon request.
> >
> > PLEASE cc me on my replies as I am not subscribed to lkml.
> >
> > Hardware: Athlon XP 3200+ on an NVIDIA nForce2 Ultra 400 motherboard.
> > Distro: Arch Linux i686.
> >
> > % dmesg
> > ...
> > [ 137.616014] NFS: Registering the id_resolver key type
> > [ 137.616036] Key type id_resolver registered
> > [ 137.616038] Key type id_legacy registered
> > [ 137.686758] BUG: unable to handle kernel NULL pointer dereference at 00000858
> > [ 137.689996] IP: [<f8aa2d99>] put_pipe_version+0x19/0x60 [auth_rpcgss]
> > [ 137.689996] *pde = 00000000 
> > [ 137.689996] Oops: 0000 [#1] PREEMPT SMP 
> > [ 137.689996] Modules linked in: rpcsec_gss_krb5 auth_rpcgss oid_registry nfsv4
> > asb100 hwmon_vid snd_wavefront ir_mce_kbd_decoder ir_lirc_codec
> > ir_rc5_sz_decoder ir_sony_decoder lirc_dev ir_rc5_decoder ir_jvc_decoder
> > ir_sanyo_decoder ir_rc6_decoder ir_nec_decoder rc_streamzap streamzap mousedev
> > snd_cs4236 rc_core snd_intel8x0 snd_wss_lib snd_opl3_lib snd_hwdep
> > snd_ac97_codec evdev snd_mpu401 ac97_bus snd_mpu401_uart snd_pcm snd_rawmidi
> > snd_page_alloc snd_seq_device snd_timer snd pcspkr skge shpchp i2c_nforce2
> > i2c_core soundcore ns558 gameport processor button nvidia_agp agpgart nfs lockd
> > sunrpc fscache ext4 crc16 mbcache jbd2 hid_generic usbhid hid sr_mod cdrom
> > sd_mod ata_generic pata_acpi sata_sil pata_amd libata ehci_pci ohci_pci ohci_hcd
> > ehci_hcd scsi_mod usbcore usb_common
> > [ 137.689996] CPU: 0 PID: 534 Comm: rpc.gssd Not tainted 3.13.3-1-ARCH #1
> > [ 137.689996] Hardware name: ASUSTeK Computer INC. A7N8X-E/A7N8X-E, BIOS ASUS
> > A7N8X-E Deluxe ACPI BIOS Rev 1013 11/12/2004
> > [ 137.689996] task: f4633210 ti: f568e000 task.ti: f568e000
> > [ 137.689996] EIP: 0060:[<f8aa2d99>] EFLAGS: 00010202 CPU: 0
> > [ 137.689996] EIP is at put_pipe_version+0x19/0x60 [auth_rpcgss]
> > [ 137.689996] EAX: f4633210 EBX: 00000001 ECX: f56efca8 EDX: 00000296
> > [ 137.689996] ESI: 00000000 EDI: f56efc00 EBP: f568fee8 ESP: f568fee0
> > [ 137.689996] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
> > [ 137.689996] CR0: 8005003b CR2: 00000858 CR3: 34523000 CR4: 000007d0
> > [ 137.689996] Stack:
> > [ 137.689996] f56efc00 f6c64f78 f568fef4 f8aa2e05 00000010 f568ff40 f8aa3b38
> > 00000374
> > [ 137.689996] 00000080 b74dde40 f4644a80 f568ff30 00000246 f8ac1080 ffff41c9
> > f6c64f78
> > [ 137.689996] fffffff3 00000010 f4460140 f44d5820 f44d5810 f53df7ec f57595a0
> > f8aa93e8
> > [ 137.689996] Call Trace:
> > [ 137.689996] [<f8aa2e05>] gss_release_msg+0x25/0x70 [auth_rpcgss]
> > [ 137.689996] [<f8aa3b38>] gss_pipe_downcall+0x208/0x4b0 [auth_rpcgss]
> > [ 137.689996] [<f8a2f9ab>] rpc_pipe_write+0x3b/0x60 [sunrpc]
> > [ 137.689996] [<f8a2f970>] ? rpc_pipe_poll+0x90/0x90 [sunrpc]
> > [ 137.689996] [<c1156bd5>] vfs_write+0x95/0x1c0
> > [ 137.689996] [<c11572a1>] SyS_write+0x51/0x90
> > [ 137.689996] [<c145cc0d>] sysenter_do_call+0x12/0x28
> > [ 137.689996] Code: f8 e8 4f b8 9a c8 31 c0 eb c6 90 8d b4 26 00 00 00 00 55 89
> > e5 56 53 3e 8d 74 26 00 8b 1d 28 e9 a3 f8 89 c6 e8 59 64 5f c8 85 db <8b>
> > 86 58 08 00 00 74 3a 3b 18 77 36 8b 5c 98 08 e8 32 66 5f c8
> > [ 137.689996] EIP: [<f8aa2d99>] put_pipe_version+0x19/0x60 [auth_rpcgss]
> > SS:ESP 0068:f568fee0
> > [ 137.689996] CR2: 0000000000000858
> > [ 138.578433] ---[ end trace 3dcb8d5c35b64fbd ]---
> > [ 142.979263] type=1006 audit(1392415950.632:4): pid=540 uid=0 old
> > auid=4294967295 new auid=1000 old ses=4294967295 new ses=3 res=1
>
>
> I should add that if I test the same kernel version (v3.13.3 compiled for i686) on a similar machine of the same vintage, there is not a problem.  When I looked into the `lspci -v` output on the machine that has the problems, I found that it seems to be related to the skge driver as shown below; the similar machine that does not have the problem is using the forcedeth driver so I am hypothesizing that the error is with the skge driver.
>
> 01:04.0 Ethernet controller: Marvell Technology Group Ltd. 88E8001 Gigabit Ethernet Controller (rev 13)
>         Subsystem: ASUSTeK Computer Inc. Marvell 88E8001 Gigabit Ethernet Controller (Asus)
>         Flags: bus master, 66MHz, medium devsel, latency 32, IRQ 17
>         Memory at d5000000 (32-bit, non-prefetchable) [size=16K]
>         I/O ports at a000 [size=256]
>         [virtual] Expansion ROM at 80080000 [disabled] [size=128K]
>         Capabilities: [48] Power Management version 2
>         Capabilities: [50] Vital Product Data
>         Kernel driver in use: skge
>         Kernel modules: skge
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>

--
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

2014-02-15 21:07:24

by John

[permalink] [raw]
Subject: Re: [BUG] unable to handle kernel NULL pointer dereference



----- Original Message -----
> From: Borislav Petkov <>
> Sent: Saturday, February 15, 2014 3:30 PM
> Subject: Re: [BUG] unable to handle kernel NULL pointer dereference
>
> If I'd have to guess, that's trying to rcu deref that struct net_generic
> *ng in net_generic() but this is only guesswork as I don't have your
> .config.
>
> Anyway, adding some more people to CC.
>


Thanks for the reply, Boris. ?The .config is unmodified from the Arch Distro default for 3.13.3-1 which can be found here:?http://pastebin.com/LPGZ8ZqA

2014-02-15 23:25:13

by Borislav Petkov

[permalink] [raw]
Subject: Re: [BUG] unable to handle kernel NULL pointer dereference

On Sat, Feb 15, 2014 at 01:04:22PM -0800, John wrote:
> Thanks for the reply, Boris.  The .config is unmodified
> from the Arch Distro default for 3.13.3-1 which can be found
> here: http://pastebin.com/LPGZ8ZqA

Yep, it is that struct net *net argument to put_pipe_version() which is NULL:

12: 55 push %ebp
13: 89 e5 mov %esp,%ebp
15: 56 push %esi
16: 53 push %ebx
17: 3e 8d 74 26 00 lea %ds:0x0(%esi,%eiz,1),%esi
1c: 8b 1d 28 e9 a3 f8 mov 0xf8a3e928,%ebx
22: 89 c6 mov %eax,%esi
24: e8 59 64 5f c8 call 0xc85f6482
29: 85 db test %ebx,%ebx
2b:* 8b 86 58 08 00 00 mov 0x858(%esi),%eax <-- trapping instruction

put_pipe_version:
pushl %ebp #
movl %esp, %ebp #,
pushl %esi #
pushl %ebx #
call mcount
movl sunrpc_net_id, %ebx # sunrpc_net_id, sunrpc_net_id.130
movl %eax, %esi # net, net
call __rcu_read_lock #
testl %ebx, %ebx # sunrpc_net_id.130
movl 2136(%esi), %eax # MEM[(struct net_generic * const *)net_4(D) + 2136B], ng <-- trapping insn


[ 137.689996] ESI: 00000000 EDI: f56efc00 EBP: f568fee8 ESP: f568fee0
^^^^^^^^

Here's the c/asm interleaved version:

static void put_pipe_version(struct net *net)
{
d80: 55 push %ebp
d81: 89 e5 mov %esp,%ebp
d83: 56 push %esi
d84: 53 push %ebx
d85: e8 fc ff ff ff call d86 <put_pipe_version+0x6>
d86: R_386_PC32 mcount
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
d8a: 8b 1d 00 00 00 00 mov 0x0,%ebx
d8c: R_386_32 sunrpc_net_id
spin_unlock(&pipe_version_lock);
return ret;
}

static void put_pipe_version(struct net *net)
{
d90: 89 c6 mov %eax,%esi
* block, but only when acquiring spinlocks that are subject to priority
* inheritance.
*/
static inline void rcu_read_lock(void)
{
__rcu_read_lock();
d92: e8 fc ff ff ff call d93 <put_pipe_version+0x13>
d93: R_386_PC32 __rcu_read_lock
struct net_generic *ng;
void *ptr;

rcu_read_lock();
ng = rcu_dereference(net->gen);
BUG_ON(id == 0 || id > ng->len);
d97: 85 db test %ebx,%ebx
{
struct net_generic *ng;
void *ptr;

rcu_read_lock();
ng = rcu_dereference(net->gen);
d99: 8b 86 58 08 00 00 mov 0x858(%esi),%eax <-- trapping insn


I guess you could avoid the crash if you did

if (!net)
return;

in put_pipe_version() but this hardly is the right solution. Someone
else has to make sense of this thing, not me. :-)

HTH.

--
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

2014-02-16 02:16:50

by John

[permalink] [raw]
Subject: Re: [BUG] unable to handle kernel NULL pointer dereference





----- Original Message -----
> From: Borislav Petkov <>
> Sent: Saturday, February 15, 2014 6:25 PM
> Subject: Re: [BUG] unable to handle kernel NULL pointer dereference
>
> On Sat, Feb 15, 2014 at 01:04:22PM -0800, John wrote:
>> Thanks for the reply, Boris. ?The .config is unmodified
>> from the Arch Distro default for 3.13.3-1 which can be found
>> here:?http://pastebin.com/LPGZ8ZqA
>
> Yep, it is that struct net *net argument to put_pipe_version() which is NULL:
>
> ? 12:? 55? ? ? ? ? ? ? ? ? ? ? push? %ebp
> ? 13:? 89 e5? ? ? ? ? ? ? ? ? mov? ? %esp,%ebp
> ? 15:? 56? ? ? ? ? ? ? ? ? ? ? push? %esi
> ? 16:? 53? ? ? ? ? ? ? ? ? ? ? push? %ebx
> ? 17:? 3e 8d 74 26 00? ? ? ? ? lea? ? %ds:0x0(%esi,%eiz,1),%esi
> ? 1c:? 8b 1d 28 e9 a3 f8? ? ? mov? ? 0xf8a3e928,%ebx
> ? 22:? 89 c6? ? ? ? ? ? ? ? ? mov? ? %eax,%esi
> ? 24:? e8 59 64 5f c8? ? ? ? ? call? 0xc85f6482
> ? 29:? 85 db? ? ? ? ? ? ? ? ? test? %ebx,%ebx
> ? 2b:*? 8b 86 58 08 00 00? ? ? mov? ? 0x858(%esi),%eax? ? ? ? <-- trapping
> instruction
>
> put_pipe_version:
> ??? pushl??? %ebp??? #
> ??? movl??? %esp, %ebp??? #,
> ??? pushl??? %esi??? #
> ??? pushl??? %ebx??? #
> ??? call??? mcount
> ??? movl??? sunrpc_net_id, %ebx??? # sunrpc_net_id, sunrpc_net_id.130
> ??? movl??? %eax, %esi??? # net, net
> ??? call??? __rcu_read_lock??? #
> ??? testl??? %ebx, %ebx??? # sunrpc_net_id.130
> ??? movl??? 2136(%esi), %eax??? # MEM[(struct net_generic * const *)net_4(D) +
> 2136B], ng <-- trapping insn
>
>
> ??? [ 137.689996] ESI: 00000000 EDI: f56efc00 EBP: f568fee8 ESP: f568fee0
> ??? ??? ??? ? ^^^^^^^^
>
> Here's the c/asm interleaved version:
>
> static void put_pipe_version(struct net *net)
> {
> ? ? d80:? ? ? 55? ? ? ? ? ? ? ? ? ? ? push? %ebp
> ? ? d81:? ? ? 89 e5? ? ? ? ? ? ? ? ? mov? ? %esp,%ebp
> ? ? d83:? ? ? 56? ? ? ? ? ? ? ? ? ? ? push? %esi
> ? ? d84:? ? ? 53? ? ? ? ? ? ? ? ? ? ? push? %ebx
> ? ? d85:? ? ? e8 fc ff ff ff? ? ? ? ? call? d86 <put_pipe_version+0x6>
> ? ? ? ? ? ? ? ? ? ? ? ? d86: R_386_PC32 mcount
> ? ? ? ? struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
> ? ? d8a:? ? ? 8b 1d 00 00 00 00? ? ? mov? ? 0x0,%ebx
> ? ? ? ? ? ? ? ? ? ? ? ? d8c: R_386_32? sunrpc_net_id
> ? ? ? ? spin_unlock(&pipe_version_lock);
> ? ? ? ? return ret;
> }
>
> static void put_pipe_version(struct net *net)
> {
> ? ? d90:? ? ? 89 c6? ? ? ? ? ? ? ? ? mov? ? %eax,%esi
> * block, but only when acquiring spinlocks that are subject to priority
> * inheritance.
> */
> static inline void rcu_read_lock(void)
> {
> ? ? ? ? __rcu_read_lock();
> ? ? d92:? ? ? e8 fc ff ff ff? ? ? ? ? call? d93 <put_pipe_version+0x13>
> ? ? ? ? ? ? ? ? ? ? ? ? d93: R_386_PC32 __rcu_read_lock
> ? ? ? ? struct net_generic *ng;
> ? ? ? ? void *ptr;
>
> ? ? ? ? rcu_read_lock();
> ? ? ? ? ng = rcu_dereference(net->gen);
> ? ? ? ? BUG_ON(id == 0 || id > ng->len);
> ? ? d97:? ? ? 85 db? ? ? ? ? ? ? ? ? test? %ebx,%ebx
> {
> ? ? ? ? struct net_generic *ng;
> ? ? ? ? void *ptr;
>
> ? ? ? ? rcu_read_lock();
> ? ? ? ? ng = rcu_dereference(net->gen);
> ? ? d99:? ? ? 8b 86 58 08 00 00? ? ? mov? ? 0x858(%esi),%eax??? ??? ???
> <-- trapping insn
>
>
> I guess you could avoid the crash if you did
>
> ??? if (!net)
> ??? ??? return;
>
> in put_pipe_version() but this hardly is the right solution. Someone
> else has to make sense of this thing, not me. :-)
>
> HTH.


I copy someone you cc'ed on this understands it. ?I have no idea what you wrote :)

2014-02-16 17:27:39

by Trond Myklebust

[permalink] [raw]
Subject: Re: [BUG] unable to handle kernel NULL pointer dereference

Please ensure that you post to the [email protected] when
reporting NFS and RPC related bugs.

On Sun, 2014-02-16 at 00:25 +0100, Borislav Petkov wrote:
> On Sat, Feb 15, 2014 at 01:04:22PM -0800, John wrote:
> > Thanks for the reply, Boris. The .config is unmodified
> > from the Arch Distro default for 3.13.3-1 which can be found
> > here: http://pastebin.com/LPGZ8ZqA
>
> Yep, it is that struct net *net argument to put_pipe_version() which is NULL:
>
> 12: 55 push %ebp
> 13: 89 e5 mov %esp,%ebp
> 15: 56 push %esi
> 16: 53 push %ebx
> 17: 3e 8d 74 26 00 lea %ds:0x0(%esi,%eiz,1),%esi
> 1c: 8b 1d 28 e9 a3 f8 mov 0xf8a3e928,%ebx
> 22: 89 c6 mov %eax,%esi
> 24: e8 59 64 5f c8 call 0xc85f6482
> 29: 85 db test %ebx,%ebx
> 2b:* 8b 86 58 08 00 00 mov 0x858(%esi),%eax <-- trapping instruction
>
> put_pipe_version:
> pushl %ebp #
> movl %esp, %ebp #,
> pushl %esi #
> pushl %ebx #
> call mcount
> movl sunrpc_net_id, %ebx # sunrpc_net_id, sunrpc_net_id.130
> movl %eax, %esi # net, net
> call __rcu_read_lock #
> testl %ebx, %ebx # sunrpc_net_id.130
> movl 2136(%esi), %eax # MEM[(struct net_generic * const *)net_4(D) + 2136B], ng <-- trapping insn
>
>
> [ 137.689996] ESI: 00000000 EDI: f56efc00 EBP: f568fee8 ESP: f568fee0
> ^^^^^^^^
>
> Here's the c/asm interleaved version:
>
> static void put_pipe_version(struct net *net)
> {
> d80: 55 push %ebp
> d81: 89 e5 mov %esp,%ebp
> d83: 56 push %esi
> d84: 53 push %ebx
> d85: e8 fc ff ff ff call d86 <put_pipe_version+0x6>
> d86: R_386_PC32 mcount
> struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
> d8a: 8b 1d 00 00 00 00 mov 0x0,%ebx
> d8c: R_386_32 sunrpc_net_id
> spin_unlock(&pipe_version_lock);
> return ret;
> }
>
> static void put_pipe_version(struct net *net)
> {
> d90: 89 c6 mov %eax,%esi
> * block, but only when acquiring spinlocks that are subject to priority
> * inheritance.
> */
> static inline void rcu_read_lock(void)
> {
> __rcu_read_lock();
> d92: e8 fc ff ff ff call d93 <put_pipe_version+0x13>
> d93: R_386_PC32 __rcu_read_lock
> struct net_generic *ng;
> void *ptr;
>
> rcu_read_lock();
> ng = rcu_dereference(net->gen);
> BUG_ON(id == 0 || id > ng->len);
> d97: 85 db test %ebx,%ebx
> {
> struct net_generic *ng;
> void *ptr;
>
> rcu_read_lock();
> ng = rcu_dereference(net->gen);
> d99: 8b 86 58 08 00 00 mov 0x858(%esi),%eax <-- trapping insn
>
>
> I guess you could avoid the crash if you did
>
> if (!net)
> return;
>
> in put_pipe_version() but this hardly is the right solution. Someone
> else has to make sense of this thing, not me. :-)

Does the following patch help?

8<-------------------------------------------------------------------
>From 0e57b109cd7b17d6e6f16c3454427372a583b18a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <[email protected]>
Date: Sun, 16 Feb 2014 12:14:13 -0500
Subject: [PATCH] SUNRPC: Ensure that gss_auth isn't freed before its upcall
messages

Fix a race in which the RPC client is shutting down while the
gss daemon is processing a downcall. If the RPC client manages to
shut down before the gss daemon is done, then the struct gss_auth
used in gss_release_msg() may have already been freed.

Link: http://lkml.kernel.org/r/[email protected]
Reported-by: John <[email protected]>
Reported-by: Borislav Petkov <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/auth_gss/auth_gss.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 44a61e8fda6f..1ba1fd114912 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -108,6 +108,7 @@ struct gss_auth {
static DEFINE_SPINLOCK(pipe_version_lock);
static struct rpc_wait_queue pipe_version_rpc_waitqueue;
static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue);
+static void gss_put_auth(struct gss_auth *gss_auth);

static void gss_free_ctx(struct gss_cl_ctx *);
static const struct rpc_pipe_ops gss_upcall_ops_v0;
@@ -320,6 +321,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
if (gss_msg->ctx != NULL)
gss_put_ctx(gss_msg->ctx);
rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue);
+ gss_put_auth(gss_msg->auth);
kfree(gss_msg);
}

@@ -500,6 +502,7 @@ gss_alloc_msg(struct gss_auth *gss_auth,
if (err)
goto err_free_msg;
};
+ kref_get(&gss_auth->kref);
return gss_msg;
err_free_msg:
kfree(gss_msg);
@@ -1064,6 +1067,12 @@ gss_free_callback(struct kref *kref)
}

static void
+gss_put_auth(struct gss_auth *gss_auth)
+{
+ kref_put(&gss_auth->kref, gss_free_callback);
+}
+
+static void
gss_destroy(struct rpc_auth *auth)
{
struct gss_auth *gss_auth = container_of(auth,
@@ -1084,7 +1093,7 @@ gss_destroy(struct rpc_auth *auth)
gss_auth->gss_pipe[1] = NULL;
rpcauth_destroy_credcache(auth);

- kref_put(&gss_auth->kref, gss_free_callback);
+ gss_put_auth(gss_auth);
}

/*
@@ -1255,7 +1264,7 @@ gss_destroy_nullcred(struct rpc_cred *cred)
call_rcu(&cred->cr_rcu, gss_free_cred_callback);
if (ctx)
gss_put_ctx(ctx);
- kref_put(&gss_auth->kref, gss_free_callback);
+ gss_put_auth(gss_auth);
}

static void
--
1.8.5.3


--
Trond Myklebust
Linux NFS client maintainer, PrimaryData
[email protected]

2014-02-16 17:35:53

by Borislav Petkov

[permalink] [raw]
Subject: Re: [BUG] unable to handle kernel NULL pointer dereference

On Sun, Feb 16, 2014 at 12:27:33PM -0500, Trond Myklebust wrote:
> Please ensure that you post to the [email protected] when
> reporting NFS and RPC related bugs.

Sorry, get_maintainer.pl gave it too but far down in an already too
long list and I wasn't sure who to spam so I picked up supporter and
maintainer:

$ ./scripts/get_maintainer.pl -f net/sunrpc/auth_gss/auth_gss.c
"J. Bruce Fields" <[email protected]> (supporter:KERNEL NFSD, SUNR...,commit_signer:3/26=12%)
Trond Myklebust <[email protected]> (maintainer:NFS, SUNRPC, AND...,commit_signer:24/26=92%,authored:14/26=54%,added_lines:394/475=83%,removed_lines:189/215=88%)
"David S. Miller" <[email protected]> (maintainer:NETWORKING [GENERAL])
Andy Adamson <[email protected]> (commit_signer:3/26=12%,authored:3/26=12%,added_lines:56/475=12%)
Chuck Lever <[email protected]> (commit_signer:3/26=12%,authored:3/26=12%)
Jeff Layton <[email protected]> (commit_signer:3/26=12%,authored:3/26=12%,removed_lines:19/215=9%)
[email protected] (open list:KERNEL NFSD, SUNR...)
[email protected] (open list:NETWORKING [GENERAL])
[email protected] (open list)

--
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

2014-02-17 20:19:56

by John

[permalink] [raw]
Subject: Re: [BUG] unable to handle kernel NULL pointer dereference





----- Original Message -----
> From: Trond Myklebust <[email protected]>
> To: Borislav Petkov <[email protected]>; Linux NFS Mailing List <[email protected]>
> Cc: John <[email protected]>; lkml <[email protected]>; "[email protected]" <[email protected]>; "[email protected]" <[email protected]>; "[email protected]" <[email protected]>; J. Bruce Fields <[email protected]>
> Sent: Sunday, February 16, 2014 12:27 PM
> Subject: Re: [BUG] unable to handle kernel NULL pointer dereference
>
> Please ensure that you post to the [email protected] when
> reporting NFS and RPC related bugs.
> ?
> Does the following patch help?
>


Trond. ?Yes, your patch fixes the regression for me; tested on v3.13.3, ?I do not know the process by which patches get into the next stable release (minor version). ?My hope is that once peer-reviewed, this patch gets into the 3.13.4 and since the 3.12 series is not EOL yet, into 3.12.12 as well. ?Thank you for the time and effort!

2014-02-17 20:31:03

by Borislav Petkov

[permalink] [raw]
Subject: Re: [BUG] unable to handle kernel NULL pointer dereference

On Mon, Feb 17, 2014 at 12:12:54PM -0800, John wrote:
> Trond.  Yes, your patch fixes the regression for me; tested on
> v3.13.3,  I do not know the process by which patches get into
> the next stable release (minor version).  My hope is that once
> peer-reviewed, this patch gets into the 3.13.4 and since the 3.12
> series is not EOL yet, into 3.12.12 as well.  Thank you for the time
> and effort!

Basically you say

Tested-by: John <[email protected]>

Trond adds stable to CC, sends it to Linus and it trickles down to
3.12.x and 3.13.x stable.

--
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

2014-02-17 20:41:26

by John

[permalink] [raw]
Subject: Re: [BUG] unable to handle kernel NULL pointer dereference





----- Original Message -----
> From: Trond Myklebust <[email protected]>
> To: Borislav Petkov <[email protected]>; Linux NFS Mailing List <[email protected]>
> Cc: John <[email protected]>; lkml <[email protected]>; "[email protected]" <[email protected]>; "[email protected]" <[email protected]>; "[email protected]" <[email protected]>; J. Bruce Fields <[email protected]>
> Sent: Sunday, February 16, 2014 12:27 PM
> Subject: Re: [BUG] unable to handle kernel NULL pointer dereference
>
> Please ensure that you post to the [email protected] when
> reporting NFS and RPC related bugs.
>
> Does the following patch help?
>
> 8<-------------------------------------------------------------------
> From 0e57b109cd7b17d6e6f16c3454427372a583b18a Mon Sep 17 00:00:00 2001
> From: Trond Myklebust <[email protected]>
> Date: Sun, 16 Feb 2014 12:14:13 -0500
> Subject: [PATCH] SUNRPC: Ensure that gss_auth isn't freed before its upcall
> messages
>
> Fix a race in which the RPC client is shutting down while the
> gss daemon is processing a downcall. If the RPC client manages to
> shut down before the gss daemon is done, then the struct gss_auth
> used in gss_release_msg() may have already been freed.
>
> Link:
> http://lkml.kernel.org/r/[email protected]
> Reported-by: John <[email protected]>
> Reported-by: Borislav Petkov <[email protected]>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> net/sunrpc/auth_gss/auth_gss.c | 13 +++++++++++--
> 1 file changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
> index 44a61e8fda6f..1ba1fd114912 100644
> --- a/net/sunrpc/auth_gss/auth_gss.c
> +++ b/net/sunrpc/auth_gss/auth_gss.c
> @@ -108,6 +108,7 @@ struct gss_auth {
> static DEFINE_SPINLOCK(pipe_version_lock);
> static struct rpc_wait_queue pipe_version_rpc_waitqueue;
> static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue);
> +static void gss_put_auth(struct gss_auth *gss_auth);
>
> static void gss_free_ctx(struct gss_cl_ctx *);
> static const struct rpc_pipe_ops gss_upcall_ops_v0;
> @@ -320,6 +321,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
> ??? if (gss_msg->ctx != NULL)
> ??? ??? gss_put_ctx(gss_msg->ctx);
> ??? rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue);
> +??? gss_put_auth(gss_msg->auth);
> ??? kfree(gss_msg);
> }
>
> @@ -500,6 +502,7 @@ gss_alloc_msg(struct gss_auth *gss_auth,
> ??? ??? if (err)
> ??? ??? ??? goto err_free_msg;
> ??? };
> +??? kref_get(&gss_auth->kref);
> ??? return gss_msg;
> err_free_msg:
> ??? kfree(gss_msg);
> @@ -1064,6 +1067,12 @@ gss_free_callback(struct kref *kref)
> }
>
> static void
> +gss_put_auth(struct gss_auth *gss_auth)
> +{
> +??? kref_put(&gss_auth->kref, gss_free_callback);
> +}
> +
> +static void
> gss_destroy(struct rpc_auth *auth)
> {
> ??? struct gss_auth *gss_auth = container_of(auth,
> @@ -1084,7 +1093,7 @@ gss_destroy(struct rpc_auth *auth)
> ??? gss_auth->gss_pipe[1] = NULL;
> ??? rpcauth_destroy_credcache(auth);
>
> -??? kref_put(&gss_auth->kref, gss_free_callback);
> +??? gss_put_auth(gss_auth);
> }
>
> /*
> @@ -1255,7 +1264,7 @@ gss_destroy_nullcred(struct rpc_cred *cred)
> ??? call_rcu(&cred->cr_rcu, gss_free_cred_callback);
> ??? if (ctx)
> ??? ??? gss_put_ctx(ctx);
> -??? kref_put(&gss_auth->kref, gss_free_callback);
> +??? gss_put_auth(gss_auth);
>
> }
>
> static void
> --


Tested-by: John <[email protected]>


Fixes the problem on 3.13.3 for me (i686). Thank you.