LinuxLists.cc - [syzbot] WARNING: bad unlock balance in rxrpc_do

[permalink] [raw]

Subject: [PATCH] rxrpc: fix bad unlock balance in rxrpc_do_sendmsg

Syzkaller reports bad unlock balance bug as follows:
------------[ cut here ]------------
WARNING: bad unlock balance detected!
syz-executor.0/4094 is trying to release lock (&call->user_mutex) at:
[<ffffffff87c1d8d1>] rxrpc_do_sendmsg+0x851/0x1110 net/rxrpc/sendmsg.c:754
but there are no more locks to release!

other info that might help us debug this:
no locks held by syz-executor.0/4094.

stack backtrace:
[...]
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0x57/0x7d lib/dump_stack.c:106
print_unlock_imbalance_bug include/trace/events/lock.h:69 [inline]
__lock_release kernel/locking/lockdep.c:5333 [inline]
lock_release.cold+0x49/0x4e kernel/locking/lockdep.c:5686
__mutex_unlock_slowpath+0x99/0x5e0 kernel/locking/mutex.c:907
rxrpc_do_sendmsg+0x851/0x1110 net/rxrpc/sendmsg.c:754
sock_sendmsg_nosec net/socket.c:714 [inline]
sock_sendmsg+0xab/0xe0 net/socket.c:734
____sys_sendmsg+0x5c2/0x7a0 net/socket.c:2485
___sys_sendmsg+0xdb/0x160 net/socket.c:2539
__sys_sendmsg+0xc3/0x160 net/socket.c:2568
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd
[...]
</TASK>
------------------------------------

When kernel wants to send a message through an RxRPC socket in
rxrpc_do_sendmsg(), kernel should hold the call->user_mutex lock,
or it will triggers bug when releasing this lock before returning
from rxrpc_do_sendmsg().

Yet the problem is that during rxrpc_do_sendmsg(), kernel may call
rxrpc_wait_for_tx_window_intr() to wait for space to appear in the
tx queue or a signal to occur. When kernel fails the
mutex_lock_interruptible(), kernel will returns from the
rxrpc_wait_for_tx_window_intr() without acquiring the mutex lock, then
triggers bug when releasing the mutex lock in rxrpc_do_sendmsg().

This patch solves it by acquiring the call->user_mutex lock, when
kernel fails the mutex_lock_interruptible() before returning from
the rxrpc_wait_for_tx_window_intr().

Reported-and-tested-by: [email protected]
Signed-off-by: Hawkins Jiawei <[email protected]>
---
net/rxrpc/sendmsg.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 1d38e279e2ef..e13043d357d5 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -53,8 +53,10 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
trace_rxrpc_transmit(call, rxrpc_transmit_wait);
mutex_unlock(&call->user_mutex);
*timeo = schedule_timeout(*timeo);
- if (mutex_lock_interruptible(&call->user_mutex) < 0)
+ if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+ mutex_lock(&call->user_mutex);
return sock_intr_errno(*timeo);
+ }
}
}

--
2.25.1

2022-08-21 16:51:53

[permalink] [raw]

Subject: Re: [PATCH] rxrpc: fix bad unlock balance in rxrpc_do_sendmsg

On Sun, Aug 21, 2022 at 6:58 PM Hawkins Jiawei <[email protected]> wrote:
>
> Syzkaller reports bad unlock balance bug as follows:
> ------------[ cut here ]------------
> WARNING: bad unlock balance detected!
> syz-executor.0/4094 is trying to release lock (&call->user_mutex) at:
> [<ffffffff87c1d8d1>] rxrpc_do_sendmsg+0x851/0x1110 net/rxrpc/sendmsg.c:754
> but there are no more locks to release!
>
> other info that might help us debug this:
> no locks held by syz-executor.0/4094.
>
> stack backtrace:
> [...]
> Call Trace:
> <TASK>
> __dump_stack lib/dump_stack.c:88 [inline]
> dump_stack_lvl+0x57/0x7d lib/dump_stack.c:106
> print_unlock_imbalance_bug include/trace/events/lock.h:69 [inline]
> __lock_release kernel/locking/lockdep.c:5333 [inline]
> lock_release.cold+0x49/0x4e kernel/locking/lockdep.c:5686
> __mutex_unlock_slowpath+0x99/0x5e0 kernel/locking/mutex.c:907
> rxrpc_do_sendmsg+0x851/0x1110 net/rxrpc/sendmsg.c:754
> sock_sendmsg_nosec net/socket.c:714 [inline]
> sock_sendmsg+0xab/0xe0 net/socket.c:734
> ____sys_sendmsg+0x5c2/0x7a0 net/socket.c:2485
> ___sys_sendmsg+0xdb/0x160 net/socket.c:2539
> __sys_sendmsg+0xc3/0x160 net/socket.c:2568
> do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
> entry_SYSCALL_64_after_hwframe+0x63/0xcd
> [...]
> </TASK>
> ------------------------------------
>
> When kernel wants to send a message through an RxRPC socket in
> rxrpc_do_sendmsg(), kernel should hold the call->user_mutex lock,
> or it will triggers bug when releasing this lock before returning
> from rxrpc_do_sendmsg().
>
> Yet the problem is that during rxrpc_do_sendmsg(), kernel may call
> rxrpc_wait_for_tx_window_intr() to wait for space to appear in the
> tx queue or a signal to occur. When kernel fails the
> mutex_lock_interruptible(), kernel will returns from the
> rxrpc_wait_for_tx_window_intr() without acquiring the mutex lock, then
> triggers bug when releasing the mutex lock in rxrpc_do_sendmsg().
>
> This patch solves it by acquiring the call->user_mutex lock, when
> kernel fails the mutex_lock_interruptible() before returning from
> the rxrpc_wait_for_tx_window_intr().
>
> Reported-and-tested-by: [email protected]
> Signed-off-by: Hawkins Jiawei <[email protected]>
> ---
> net/rxrpc/sendmsg.c | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
> index 1d38e279e2ef..e13043d357d5 100644
> --- a/net/rxrpc/sendmsg.c
> +++ b/net/rxrpc/sendmsg.c
> @@ -53,8 +53,10 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
> trace_rxrpc_transmit(call, rxrpc_transmit_wait);
> mutex_unlock(&call->user_mutex);
> *timeo = schedule_timeout(*timeo);
> - if (mutex_lock_interruptible(&call->user_mutex) < 0)
> + if (mutex_lock_interruptible(&call->user_mutex) < 0) {
> + mutex_lock(&call->user_mutex);

The interruptible version fails to acquire the lock. So why is it okay to
force it to acquire the mutex_lock since we are in the interrupt context?
> return sock_intr_errno(*timeo);
> + }
> }
> }

thanks,
-- Khalid Masum

2022-08-21 17:11:41

[permalink] [raw]

Subject: Re: [PATCH] rxrpc: fix bad unlock balance in rxrpc_do_sendmsg

On Sun, Aug 21, 2022 at 9:58 PM Khalid Masum <[email protected]> wrote:
>
> On Sun, Aug 21, 2022 at 6:58 PM Hawkins Jiawei <[email protected]> wrote:
> >
> The interruptible version fails to acquire the lock. So why is it okay to
> force it to acquire the mutex_lock since we are in the interrupt context?

Sorry, I mean, won't the function lose its ability of being interruptible?
Since we are forcing it to acquire the lock.
> > return sock_intr_errno(*timeo);
> > + }
> > }
> > }
>
> thanks,
> -- Khalid Masum

2022-08-21 19:30:54

[permalink] [raw]

Subject: Re: [syzbot] WARNING: bad unlock balance in rxrpc_do_sendmsg

Maybe we do not need to lock since no other timer_schedule needs
it.

Test if this fixes the issue.
---
#syz test https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 568035b01cfb
net/rxrpc/sendmsg.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 1d38e279e2ef..640e2ab2cc35 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -51,10 +51,8 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
return sock_intr_errno(*timeo);

trace_rxrpc_transmit(call, rxrpc_transmit_wait);
- mutex_unlock(&call->user_mutex);
*timeo = schedule_timeout(*timeo);
- if (mutex_lock_interruptible(&call->user_mutex) < 0)
- return sock_intr_errno(*timeo);
+ return sock_intr_errno(*timeo);
}
}

--
2.37.1

2022-08-21 23:34:13

by syzbot

[permalink] [raw]

Subject: Re: [syzbot] WARNING: bad unlock balance in rxrpc_do_sendmsg

Hello,

syzbot tried to test the proposed patch but the build/boot failed:

: BNEP filters: protocol multicast
[ 11.858163][ T1] Bluetooth: BNEP socket layer initialized
[ 11.864007][ T1] Bluetooth: CMTP (CAPI Emulation) ver 1.0
[ 11.870006][ T1] Bluetooth: CMTP socket layer initialized
[ 11.876108][ T1] Bluetooth: HIDP (Human Interface Emulation) ver 1.2
[ 11.883027][ T1] Bluetooth: HIDP socket layer initialized
[ 11.892802][ T1] NET: Registered PF_RXRPC protocol family
[ 11.898816][ T1] Key type rxrpc registered
[ 11.903368][ T1] Key type rxrpc_s registered
[ 11.909035][ T1] NET: Registered PF_KCM protocol family
[ 11.915665][ T1] lec:lane_module_init: lec.c: initialized
[ 11.921574][ T1] mpoa:atm_mpoa_init: mpc.c: initialized
[ 11.928018][ T1] l2tp_core: L2TP core driver, V2.0
[ 11.933453][ T1] l2tp_ppp: PPPoL2TP kernel driver, V2.0
[ 11.939099][ T1] l2tp_ip: L2TP IP encapsulation support (L2TPv3)
[ 11.945982][ T1] l2tp_netlink: L2TP netlink interface
[ 11.951559][ T1] l2tp_eth: L2TP ethernet pseudowire support (L2TPv3)
[ 11.958477][ T1] l2tp_ip6: L2TP IP encapsulation support for IPv6 (L2TPv3)
[ 11.966178][ T1] NET: Registered PF_PHONET protocol family
[ 11.972631][ T1] 8021q: 802.1Q VLAN Support v1.8
[ 11.987664][ T1] DCCP: Activated CCID 2 (TCP-like)
[ 11.993261][ T1] DCCP: Activated CCID 3 (TCP-Friendly Rate Control)
[ 12.000884][ T1] sctp: Hash tables configured (bind 32/56)
[ 12.009112][ T1] NET: Registered PF_RDS protocol family
[ 12.015798][ T1] Registered RDS/infiniband transport
[ 12.022034][ T1] Registered RDS/tcp transport
[ 12.027101][ T1] tipc: Activated (version 2.0.0)
[ 12.032836][ T1] NET: Registered PF_TIPC protocol family
[ 12.039346][ T1] tipc: Started in single node mode
[ 12.045614][ T1] NET: Registered PF_SMC protocol family
[ 12.052053][ T1] 9pnet: Installing 9P2000 support
[ 12.058118][ T1] NET: Registered PF_CAIF protocol family
[ 12.069167][ T1] NET: Registered PF_IEEE802154 protocol family
[ 12.075802][ T1] Key type dns_resolver registered
[ 12.080965][ T1] Key type ceph registered
[ 12.085898][ T1] libceph: loaded (mon/osd proto 15/24)
[ 12.092926][ T1] batman_adv: B.A.T.M.A.N. advanced 2022.2 (compatibility version 15) loaded
[ 12.101941][ T1] openvswitch: Open vSwitch switching datapath
[ 12.111525][ T1] NET: Registered PF_VSOCK protocol family
[ 12.117955][ T1] mpls_gso: MPLS GSO support
[ 12.134629][ T1] IPI shorthand broadcast: enabled
[ 12.139859][ T1] AVX2 version of gcm_enc/dec engaged.
[ 12.145876][ T1] AES CTR mode by8 optimization enabled
[ 12.155779][ T1] sched_clock: Marking stable (12138076585, 17368961)->(12161137287, -5691741)
[ 12.167112][ T1] registered taskstats version 1
[ 12.234177][ T1] Loading compiled-in X.509 certificates
[ 12.245779][ T1] Loaded X.509 cert 'Build time autogenerated kernel key: 16fa2afaa7210bc2438dbf8447100de29a0bc70d'
[ 12.259289][ T1] zswap: loaded using pool lzo/zbud
[ 12.266413][ T1] debug_vm_pgtable: [debug_vm_pgtable ]: Validating architecture page table helpers
[ 13.815072][ T1] Key type ._fscrypt registered
[ 13.820356][ T1] Key type .fscrypt registered
[ 13.825167][ T1] Key type fscrypt-provisioning registered
[ 13.837059][ T1] kAFS: Red Hat AFS client v0.1 registering.
[ 13.852391][ T1] Btrfs loaded, crc32c=crc32c-intel, assert=on, zoned=yes, fsverity=yes
[ 13.861737][ T1] Key type big_key registered
[ 13.870272][ T1] Key type encrypted registered
[ 13.875671][ T1] ima: No TPM chip found, activating TPM-bypass!
[ 13.882332][ T1] Loading compiled-in module X.509 certificates
[ 13.891979][ T1] Loaded X.509 cert 'Build time autogenerated kernel key: 16fa2afaa7210bc2438dbf8447100de29a0bc70d'
[ 13.904089][ T1] ima: Allocated hash algorithm: sha256
[ 13.909975][ T1] ima: No architecture policies found
[ 13.916111][ T1] evm: Initialising EVM extended attributes:
[ 13.922235][ T1] evm: security.selinux
[ 13.926735][ T1] evm: security.SMACK64 (disabled)
[ 13.931859][ T1] evm: security.SMACK64EXEC (disabled)
[ 13.937528][ T1] evm: security.SMACK64TRANSMUTE (disabled)
[ 13.943548][ T1] evm: security.SMACK64MMAP (disabled)
[ 13.949022][ T1] evm: security.apparmor (disabled)
[ 13.954231][ T1] evm: security.ima
[ 13.958042][ T1] evm: security.capability
[ 13.962504][ T1] evm: HMAC attrs: 0x1
[ 14.046064][ T1] PM: Magic number: 2:748:401
[ 14.051323][ T1] usb usb38-port7: hash matches
[ 14.058853][ T1] printk: console [netcon0] enabled
[ 14.064737][ T1] netconsole: network logging started
[ 14.070553][ T1] gtp: GTP module loaded (pdp ctx size 104 bytes)
[ 14.080150][ T1] rdma_rxe: loaded
[ 14.084458][ T1] cfg80211: Loading compiled-in X.509 certificates for regulatory database
[ 14.096699][ T1] cfg80211: Loaded X.509 cert 'sforshee: 00b28ddf47aef9cea7'
[ 14.105001][ T1] ALSA device list:
[ 14.106011][ T14] platform regulatory.0: Direct firmware load for regulatory.db failed with error -2
[ 14.108906][ T1] #0: Dummy 1
[ 14.108920][ T1] #1: Loopback 1
[ 14.118716][ T14] platform regulatory.0: Falling back to sysfs fallback for: regulatory.db
[ 14.124736][ T1] #2: Virtual MIDI Card 1
[ 14.142829][ T1] md: Waiting for all devices to be available before autodetect
[ 14.150560][ T1] md: If you don't use raid, use raid=noautodetect
[ 14.157094][ T1] md: Autodetecting RAID arrays.
[ 14.162108][ T1] md: autorun ...
[ 14.165854][ T1] md: ... autorun DONE.
[ 14.276553][ T1] EXT4-fs (sda1): mounted filesystem with ordered data mode. Quota mode: none.
[ 14.285966][ T1] VFS: Mounted root (ext4 filesystem) readonly on device 8:1.
[ 14.323634][ T1] devtmpfs: mounted
[ 14.339508][ T1] Freeing unused kernel image (initmem) memory: 2744K
[ 14.352992][ T1] Write protecting the kernel read-only data: 176128k
[ 14.364738][ T1] Freeing unused kernel image (text/rodata gap) memory: 2016K
[ 14.372956][ T1] Freeing unused kernel image (rodata/data gap) memory: 392K
[ 14.386193][ T1] Failed to set sysctl parameter 'max_rcu_stall_to_panic=1': parameter not found
[ 14.396254][ T1] Run /sbin/init as init process
[ 14.748233][ T1] SELinux: Class mctp_socket not defined in policy.
[ 14.755142][ T1] SELinux: Class anon_inode not defined in policy.
[ 14.761846][ T1] SELinux: Class io_uring not defined in policy.
[ 14.768317][ T1] SELinux: the above unknown classes and permissions will be denied
[ 14.827412][ T1] SELinux: policy capability network_peer_controls=1
[ 14.834281][ T1] SELinux: policy capability open_perms=1
[ 14.840096][ T1] SELinux: policy capability extended_socket_class=1
[ 14.847142][ T1] SELinux: policy capability always_check_network=0
[ 14.853909][ T1] SELinux: policy capability cgroup_seclabel=1
[ 14.860245][ T1] SELinux: policy capability nnp_nosuid_transition=1
[ 14.867038][ T1] SELinux: policy capability genfs_seclabel_symlinks=0
[ 14.874017][ T1] SELinux: policy capability ioctl_skip_cloexec=0
[ 15.372859][ T27] audit: type=1403 audit(1661120546.300:2): auid=4294967295 ses=4294967295 lsm=selinux res=1
[ 15.419548][ T2939] mount (2939) used greatest stack depth: 25104 bytes left
[ 15.445064][ T2940] EXT4-fs (sda1): re-mounted. Quota mode: none.
mount: mounting smackfs on /sys/fs/smackfs failed: No such file or directory
mount: mounting mqueue on /dev/mqueue failed: No such file or directory
mount: mounting hugetlbfs on /dev/hugepages failed: No such file or directory
mount: mounting fuse.lxcfs on /var/lib/lxcfs fai[ 15.591258][ T2942] mount (2942) used greatest stack depth: 24232 bytes left
led: No such file or directory
Starting syslogd: [ 15.765727][ T27] audit: type=1400 audit(1661120546.690:3): avc: denied { read write } for pid=2954 comm="syslogd" path="/dev/null" dev="devtmpfs" ino=5 scontext=system_u:system_r:syslogd_t tcontext=system_u:object_r:device_t tclass=chr_file permissive=1
OK
[ 15.814376][ T27] audit: type=1400 audit(1661120546.740:4): avc: denied { read } for pid=2954 comm="syslogd" name="log" dev="sda1" ino=1125 scontext=system_u:system_r:syslogd_t tcontext=system_u:object_r:var_t tclass=lnk_file permissive=1
Starting acpid: [ 15.839700][ T27] audit: type=1400 audit(1661120546.760:5): avc: denied { search } for pid=2954 comm="syslogd" name="/" dev="tmpfs" ino=1 scontext=system_u:system_r:syslogd_t tcontext=system_u:object_r:tmpfs_t tclass=dir permissive=1
[ 15.861223][ T27] audit: type=1400 audit(1661120546.760:6): avc: denied { write } for pid=2954 comm="syslogd" name="/" dev="tmpfs" ino=1 scontext=system_u:system_r:syslogd_t tcontext=system_u:object_r:tmpfs_t tclass=dir permissive=1
[ 15.882689][ T27] audit: type=1400 audit(1661120546.760:7): avc: denied { add_name } for pid=2954 comm="syslogd" name="messages" scontext=system_u:system_r:syslogd_t tcontext=system_u:object_r:tmpfs_t tclass=dir permissive=1
OK
[ 15.923298][ T27] audit: type=1400 audit(1661120546.760:8): avc: denied { create } for pid=2954 comm="syslogd" name="messages" scontext=system_u:system_r:syslogd_t tcontext=system_u:object_r:tmpfs_t tclass=file permissive=1
[ 15.944279][ T27] audit: type=1400 audit(1661120546.760:9): avc: denied { append open } for pid=2954 comm="syslogd" path="/tmp/messages" dev="tmpfs" ino=2 scontext=system_u:system_r:syslogd_t tcontext=system_u:object_r:tmpfs_t tclass=file permissive=1
Starting klogd: [ 15.967402][ T27] audit: type=1400 audit(1661120546.760:10): avc: denied { getattr } for pid=2954 comm="syslogd" path="/tmp/messages" dev="tmpfs" ino=2 scontext=system_u:system_r:syslogd_t tcontext=system_u:object_r:tmpfs_t tclass=file permissive=1
[ 15.992369][ T27] audit: type=1400 audit(1661120546.800:11): avc: denied { use } for pid=2956 comm="acpid" path="/dev/console" dev="rootfs" ino=1076 scontext=system_u:system_r:acpid_t tcontext=system_u:system_r:kernel_t tclass=fd permissive=1
OK
Running sysctl: OK
[ 16.225258][ T2967] logger (2967) used greatest stack depth: 22976 bytes left
Populating /dev using udev: [ 16.435978][ T2971] udevd[2971]: starting version 3.2.10
[ 16.686715][ T2972] udevd[2972]: starting eudev-3.2.10
done
Starting system message bus: [ 28.569901][ T27] kauditd_printk_skb: 14 callbacks suppressed
[ 28.569911][ T27] audit: type=1400 audit(1661120559.490:26): avc: denied { use } for pid=3171 comm="dbus-daemon" path="/dev/console" dev="rootfs" ino=1076 scontext=system_u:system_r:system_dbusd_t tcontext=system_u:system_r:kernel_t tclass=fd permissive=1
[ 28.599571][ T27] audit: type=1400 audit(1661120559.490:27): avc: denied { read write } for pid=3171 comm="dbus-daemon" path="/dev/console" dev="rootfs" ino=1076 scontext=system_u:system_r:system_dbusd_t tcontext=system_u:object_r:root_t tclass=chr_file permissive=1
[ 28.642390][ T27] audit: type=1400 audit(1661120559.560:28): avc: denied { search } for pid=3171 comm="dbus-daemon" name="/" dev="tmpfs" ino=1 scontext=system_u:system_r:system_dbusd_t tcontext=system_u:object_r:tmpfs_t tclass=dir permissive=1
[ 28.665649][ T27] audit: type=1400 audit(1661120559.580:29): avc: denied { write } for pid=3171 comm="dbus-daemon" name="dbus" dev="tmpfs" ino=1393 scontext=system_u:system_r:system_dbusd_t tcontext=system_u:object_r:tmpfs_t tclass=dir permissive=1
done
[ 28.689039][ T27] audit: type=1400 audit(1661120559.580:30): avc: denied { add_name } for pid=3171 comm="dbus-daemon" name="system_bus_socket" scontext=system_u:system_r:system_dbusd_t tcontext=system_u:object_r:tmpfs_t tclass=dir permissive=1
[ 28.711654][ T27] audit: type=1400 audit(1661120559.580:31): avc: denied { create } for pid=3171 comm="dbus-daemon" name="system_bus_socket" scontext=system_u:system_r:system_dbusd_t tcontext=system_u:object_r:tmpfs_t tclass=sock_file permissive=1
[ 28.734633][ T27] audit: type=1400 audit(1661120559.580:32): avc: denied { setattr } for pid=3171 comm="dbus-daemon" name="system_bus_socket" dev="tmpfs" ino=1394 scontext=system_u:system_r:system_dbusd_t tcontext=system_u:object_r:tmpfs_t tclass=sock_file permissive=1
[ 28.759424][ T27] audit: type=1400 audit(1661120559.590:33): avc: denied { create } for pid=3171 comm="dbus-daemon" name="messagebus.pid" scontext=system_u:system_r:system_dbusd_t tcontext=system_u:object_r:tmpfs_t tclass=file permissive=1
[ 28.782144][ T27] audit: type=1400 audit(1661120559.590:34): avc: denied { write open } for pid=3171 comm="dbus-daemon" path="/run/messagebus.pid" dev="tmpfs" ino=1395 scontext=system_u:system_r:system_dbusd_t tcontext=system_u:object_r:tmpfs_t tclass=file permissive=1
[ 28.807197][ T27] audit: type=1400 audit(1661120559.590:35): avc: denied { getattr } for pid=3171 comm="dbus-daemon" path="/run/messagebus.pid" dev="tmpfs" ino=1395 scontext=system_u:system_r:system_dbusd_t tcontext=system_u:object_r:tmpfs_t tclass=file permissive=1
Starting network: OK
Starting dhcpcd...
dhcpcd-9.4.0 starting
dev: loaded udev
DUID 00:04:18:1b:51:b6:f4:39:97:fb:3c:5d:6f:3f:70:18:1a:62
forked to background, child pid 3185
[ 30.625552][ T3186] 8021q: adding VLAN 0 to HW filter on device bond0
[ 30.638978][ T3186] eql: remember to turn off Van-Jacobson compression on your slave devices
Starting sshd: OK

syzkaller

syzkaller login: [ 76.294776][ T14] cfg80211: failed to load regulatory.db

syzkaller build log:
go env (err=<nil>)
GO111MODULE="auto"
GOARCH="amd64"
GOBIN=""
GOCACHE="/syzkaller/.cache/go-build"
GOENV="/syzkaller/.config/go/env"
GOEXE=""
GOEXPERIMENT=""
GOFLAGS=""
GOHOSTARCH="amd64"
GOHOSTOS="linux"
GOINSECURE=""
GOMODCACHE="/syzkaller/jobs/linux/gopath/pkg/mod"
GONOPROXY=""
GONOSUMDB=""
GOOS="linux"
GOPATH="/syzkaller/jobs/linux/gopath"
GOPRIVATE=""
GOPROXY="https://proxy.golang.org,direct"
GOROOT="/usr/local/go"
GOSUMDB="sum.golang.org"
GOTMPDIR=""
GOTOOLDIR="/usr/local/go/pkg/tool/linux_amd64"
GOVCS=""
GOVERSION="go1.17"
GCCGO="gccgo"
AR="ar"
CC="gcc"
CXX="g++"
CGO_ENABLED="1"
GOMOD="/syzkaller/jobs/linux/gopath/src/github.com/google/syzkaller/go.mod"
CGO_CFLAGS="-g -O2"
CGO_CPPFLAGS=""
CGO_CXXFLAGS="-g -O2"
CGO_FFLAGS="-g -O2"
CGO_LDFLAGS="-g -O2"
PKG_CONFIG="pkg-config"
GOGCCFLAGS="-fPIC -m64 -pthread -fmessage-length=0 -fdebug-prefix-map=/tmp/go-build994994397=/tmp/go-build -gno-record-gcc-switches"

git status (err=<nil>)
HEAD detached at 1434eec0b
nothing to commit, working tree clean

go list -f '{{.Stale}}' ./sys/syz-sysgen | grep -q false || go install ./sys/syz-sysgen
make .descriptions
bin/syz-sysgen
touch .descriptions
GOOS=linux GOARCH=amd64 go build "-ldflags=-s -w -X github.com/google/syzkaller/prog.GitRevision=1434eec0b84075b7246560cfa89f20cdb3d8077f -X 'github.com/google/syzkaller/prog.gitRevisionDate=20220629-111539'" "-tags=syz_target syz_os_linux syz_arch_amd64 " -o ./bin/linux_amd64/syz-fuzzer github.com/google/syzkaller/syz-fuzzer
GOOS=linux GOARCH=amd64 go build "-ldflags=-s -w -X github.com/google/syzkaller/prog.GitRevision=1434eec0b84075b7246560cfa89f20cdb3d8077f -X 'github.com/google/syzkaller/prog.gitRevisionDate=20220629-111539'" "-tags=syz_target syz_os_linux syz_arch_amd64 " -o ./bin/linux_amd64/syz-execprog github.com/google/syzkaller/tools/syz-execprog
GOOS=linux GOARCH=amd64 go build "-ldflags=-s -w -X github.com/google/syzkaller/prog.GitRevision=1434eec0b84075b7246560cfa89f20cdb3d8077f -X 'github.com/google/syzkaller/prog.gitRevisionDate=20220629-111539'" "-tags=syz_target syz_os_linux syz_arch_amd64 " -o ./bin/linux_amd64/syz-stress github.com/google/syzkaller/tools/syz-stress
mkdir -p ./bin/linux_amd64
gcc -o ./bin/linux_amd64/syz-executor executor/executor.cc \
-m64 -O2 -pthread -Wall -Werror -Wparentheses -Wunused-const-variable -Wframe-larger-than=16384 -Wno-stringop-overflow -Wno-array-bounds -Wno-format-overflow -static-pie -fpermissive -w -DGOOS_linux=1 -DGOARCH_amd64=1 \
-DHOSTGOOS_linux=1 -DGIT_REVISION=\"1434eec0b84075b7246560cfa89f20cdb3d8077f\"

Error text is too large and was truncated, full error text is at:
https://syzkaller.appspot.com/x/error.txt?x=14b8bffd080000

Tested on:

commit: 568035b0 Linux 6.0-rc1
git tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel config: https://syzkaller.appspot.com/x/.config?x=930fc801ca92783a
dashboard link: https://syzkaller.appspot.com/bug?extid=7f0483225d0c94cb3441
compiler: gcc (Debian 10.2.1-6) 10.2.1 20210110, GNU ld (GNU Binutils for Debian) 2.35.2
patch: https://syzkaller.appspot.com/x/patch.diff?x=17e2730d080000

2022-08-22 05:35:21

[permalink] [raw]

Subject: Re: [PATCH] rxrpc: fix bad unlock balance in rxrpc_do_sendmsg

On Mon, 22 Aug 2022 at 00:42, Khalid Masum <[email protected]> wrote:
>
> On Sun, Aug 21, 2022 at 9:58 PM Khalid Masum <[email protected]> wrote:
> >
> > On Sun, Aug 21, 2022 at 6:58 PM Hawkins Jiawei <[email protected]> wrote:
> > >
> > The interruptible version fails to acquire the lock. So why is it okay to
> > force it to acquire the mutex_lock since we are in the interrupt context?
>
> Sorry, I mean, won't the function lose its ability of being interruptible?
> Since we are forcing it to acquire the lock.
> > > return sock_intr_errno(*timeo);
> > > + }
> > > }
> > > }
> >
> > thanks,
> > -- Khalid Masum
Hi, Khalid

In my opinion, _intr in rxrpc_wait_for_tx_window_intr() seems referring
that, the loop in function should be interrupted when a signal
arrives(Please correct me if I am wrong):
> /*
> * Wait for space to appear in the Tx queue or a signal to occur.
> */
> static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
> struct rxrpc_call *call,
> long *timeo)
> {
> for (;;) {
> set_current_state(TASK_INTERRUPTIBLE);
> if (rxrpc_check_tx_space(call, NULL))
> return 0;
>
> if (call->state >= RXRPC_CALL_COMPLETE)
> return call->error;
>
> if (signal_pending(current))
> return sock_intr_errno(*timeo);
>
> trace_rxrpc_transmit(call, rxrpc_transmit_wait);
> mutex_unlock(&call->user_mutex);
> *timeo = schedule_timeout(*timeo);
> if (mutex_lock_interruptible(&call->user_mutex) < 0)
> return sock_intr_errno(*timeo);
> }
> }

To be more specific, when a signal arrives,
rxrpc_wait_for_tx_window_intr() should know when executing
mutex_lock_interruptible() and get a non-zero value. Then
rxrpc_wait_for_tx_window_intr() should be interrupted, which means
function should be returned.

So I think, acquiring mutex_lock() seems won't effect its ability
of being interruptible.(Please correct me if I am wrong).

What's more, when the kernel return from
rxrpc_wait_for_tx_window_intr(), it will only handles the error case
before unlocking the call->user_mutex, which won't cost a long time.
So I think it seems Ok to acquire the call->user_mutex when
rxrpc_wait_for_tx_window_intr() is interrupted by a signal.

On Mon, 22 Aug 2022 at 03:18, Khalid Masum <[email protected]> wrote:
>
> Maybe we do not need to lock since no other timer_schedule needs
> it.
>
> Test if this fixes the issue.
> ---
> diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
> index 1d38e279e2ef..640e2ab2cc35 100644
> --- a/net/rxrpc/sendmsg.c
> +++ b/net/rxrpc/sendmsg.c
> @@ -51,10 +51,8 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
> return sock_intr_errno(*timeo);
>
> trace_rxrpc_transmit(call, rxrpc_transmit_wait);
> - mutex_unlock(&call->user_mutex);
> *timeo = schedule_timeout(*timeo);
> - if (mutex_lock_interruptible(&call->user_mutex) < 0)
> - return sock_intr_errno(*timeo);
> + return sock_intr_errno(*timeo);
> }
> }
>
> --
> 2.37.1
>

If it is still improper to patch this bug by acquiring the
call->user_mutex, I wonder if it is better to check before unlocking the lock
in rxrpc_do_sendmsg(), because kernel will always unlocking the call->user_mutex
in the end of the rxrpc_do_sendmsg():
> int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
> __releases(&rx->sk.sk_lock.slock)
> __releases(&call->user_mutex)
> {
> ...
> out_put_unlock:
> mutex_unlock(&call->user_mutex);
> error_put:
> rxrpc_put_call(call, rxrpc_call_put);
> _leave(" = %d", ret);
> return ret;
>
> error_release_sock:
> release_sock(&rx->sk);
> return ret;
> }

2022-08-22 09:05:31

[permalink] [raw]

Subject: Re: [PATCH] rxrpc: fix bad unlock balance in rxrpc_do_sendmsg

Hawkins Jiawei <[email protected]> wrote:

> - if (mutex_lock_interruptible(&call->user_mutex) < 0)
> + if (mutex_lock_interruptible(&call->user_mutex) < 0) {
> + mutex_lock(&call->user_mutex);

Yeah, as Khalid points out that kind of makes the interruptible lock
pointless. Either rxrpc_send_data() needs to return a separate indication
that we returned without the lock held or it needs to always drop the lock on
error (at least for ERESTARTSYS/EINTR) which can be checked for higher up.

David

2022-08-22 09:58:09

[permalink] [raw]

Subject: Re: [PATCH] rxrpc: fix bad unlock balance in rxrpc_do_sendmsg

Actually, there's another bug here too: if rxrpc_wait_for_tx_window() drops
the call mutex then it needs to reload the pending packet state in
rxrpc_send_data() as it may have raced with another sendmsg().

David

2022-08-22 11:33:31

[permalink] [raw]

Subject: Re: [PATCH] rxrpc: fix bad unlock balance in rxrpc_do_sendmsg

On Mon, 22 Aug 2022 at 16:48, David Howells <[email protected]> wrote:
>
> Hawkins Jiawei <[email protected]> wrote:
>
> > - if (mutex_lock_interruptible(&call->user_mutex) < 0)
> > + if (mutex_lock_interruptible(&call->user_mutex) < 0) {
> > + mutex_lock(&call->user_mutex);
>
> Yeah, as Khalid points out that kind of makes the interruptible lock
> pointless. Either rxrpc_send_data() needs to return a separate indication
> that we returned without the lock held or it needs to always drop the lock on
> error (at least for ERESTARTSYS/EINTR) which can be checked for higher up.
Hi David,

For second option, I think it may meet some difficulty, because we
cannot figure out whether rxrpc_send_data() meets lock error.
To be more specific, rxrpc_send_data() may still returns the number
it has copied even rxrpc_send_data() meets lock error, if
rxrpc_send_data() has successfully dealed some data.(Please correct me
if I am wrong)

So I think the first option seems better. I wonder if we can add an
argument in rxrpc_send_data() as an indication you pointed out? Maybe:

diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 1d38e279e2ef..0801325a7c7f 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -284,13 +284,18 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,

/*
* send data through a socket
+ * @holding_mutex: rxrpc_send_data() will assign this pointer with True
+ * if functions still holds the call user access mutex when returned to caller.
+ * This argument can be NULL, which will effect nothing.
+ *
* - must be called in process context
* - The caller holds the call user access mutex, but not the socket lock.
*/
static int rxrpc_send_data(struct rxrpc_sock *rx,
struct rxrpc_call *call,
struct msghdr *msg, size_t len,
- rxrpc_notify_end_tx_t notify_end_tx)
+ rxrpc_notify_end_tx_t notify_end_tx,
+ bool *holding_mutex)
{
struct rxrpc_skb_priv *sp;
struct sk_buff *skb;
@@ -299,6 +304,9 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
bool more;
int ret, copied;

+ if (holding_mutex)
+ *holding_mutex = true;
+
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);

/* this should be in poll */
@@ -338,8 +346,11 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
ret = rxrpc_wait_for_tx_window(rx, call,
&timeo,
msg->msg_flags & MSG_WAITALL);
- if (ret < 0)
+ if (ret < 0) {
+ if (holding_mutex)
+ *holding_mutex = false;
goto maybe_error;
+ }
}

/* Work out the maximum size of a packet. Assume that
@@ -630,6 +641,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
struct rxrpc_call *call;
unsigned long now, j;
int ret;
+ bool holding_user_mutex;

struct rxrpc_send_params p = {
.call.tx_total_len = -1,
@@ -747,7 +759,9 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
/* Reply phase not begun or not complete for service call. */
ret = -EPROTO;
} else {
- ret = rxrpc_send_data(rx, call, msg, len, NULL);
+ ret = rxrpc_send_data(rx, call, msg, len, NULL, &holding_user_mutex);
+ if (!holding_user_mutex)
+ goto error_put;
}

out_put_unlock:
@@ -796,7 +810,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
case RXRPC_CALL_SERVER_ACK_REQUEST:
case RXRPC_CALL_SERVER_SEND_REPLY:
ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
- notify_end_tx);
+ notify_end_tx, NULL);
break;
case RXRPC_CALL_COMPLETE:
read_lock_bh(&call->state_lock);

On Mon, 22 Aug 2022 at 17:21, David Howells <[email protected]> wrote:
>
> Actually, there's another bug here too: if rxrpc_wait_for_tx_window() drops
> the call mutex then it needs to reload the pending packet state in
> rxrpc_send_data() as it may have raced with another sendmsg().
>
> David
>
After applying the above patch, kernel still panic, but seems not the
bad unlock balance bug before. yet I am not sure if this is the same bug you
mentioned. Kernel output as below:
[ 39.115966][ T6508] ====================================
[ 39.116940][ T6508] WARNING: syz/6508 still has locks held!
[ 39.117978][ T6508] 6.0.0-rc1-00066-g3b06a2755758-dirty #186 Not tainted
[ 39.119353][ T6508] ------------------------------------
[ 39.120321][ T6508] 1 lock held by syz/6508:
[ 39.121122][ T6508] #0: ffff88801f472b20 (&call->user_mutex){....}-{3:3}0
[ 39.123014][ T6508]
[ 39.123014][ T6508] stack backtrace:
[ 39.123925][ T6508] CPU: 0 PID: 6508 Comm: syz Not tainted 6.0.0-rc1-00066
[ 39.125304][ T6508] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)4
[ 39.125304][ T6508] Call Trace:
[ 39.125304][ T6508] <TASK>
[ 39.125304][ T6508] dump_stack_lvl+0x8e/0xdd
[ 39.125304][ T6508] get_signal+0x1866/0x24d0
[ 39.125304][ T6508] ? lock_acquire+0x172/0x310
[ 39.125304][ T6508] ? exit_signals+0x7b0/0x7b0
[ 39.125304][ T6508] arch_do_signal_or_restart+0x82/0x23f0
[ 39.125304][ T6508] ? __sanitizer_cov_trace_pc+0x1a/0x40
[ 39.125304][ T6508] ? __fget_light+0x20d/0x270
[ 39.125304][ T6508] ? get_sigframe_size+0x10/0x10
[ 39.125304][ T6508] ? __sanitizer_cov_trace_pc+0x1a/0x40
[ 39.125304][ T6508] ? __sys_sendmsg+0x11a/0x1c0
[ 39.125304][ T6508] ? __sys_sendmsg_sock+0x30/0x30
[ 39.125304][ T6508] exit_to_user_mode_prepare+0x146/0x1b0
[ 39.125304][ T6508] syscall_exit_to_user_mode+0x12/0x30
[ 39.125304][ T6508] do_syscall_64+0x42/0xb0
[ 39.125304][ T6508] entry_SYSCALL_64_after_hwframe+0x63/0xcd
[ 39.125304][ T6508] RIP: 0033:0x44fbad
[ 39.125304][ T6508] Code: c3 e8 97 29 00 00 0f 1f 80 00 00 00 00 f3 0f 1e8
[ 39.125304][ T6508] RSP: 002b:00007f4b8ae22d48 EFLAGS: 00000246 ORIG_RAX:e
[ 39.125304][ T6508] RAX: fffffffffffffffc RBX: 0000000000000000 RCX: 0000d
[ 39.125304][ T6508] RDX: 0000000000000186 RSI: 0000000020000000 RDI: 00003
[ 39.125304][ T6508] RBP: 00007f4b8ae22d80 R08: 00007f4b8ae23700 R09: 00000
[ 39.125304][ T6508] R10: 00007f4b8ae23700 R11: 0000000000000246 R12: 0000e
[ 39.125304][ T6508] R13: 00007ffe483304af R14: 00007ffe48330550 R15: 00000
[ 39.125304][ T6508] </TASK>
====================================

I will make a deeper look and try to patch it.

2022-08-22 11:41:55

[permalink] [raw]

Subject: Re: [PATCH] rxrpc: fix bad unlock balance in rxrpc_do_sendmsg

2022-08-22 13:23:06

[permalink] [raw]

Subject: Re: [PATCH] rxrpc: fix bad unlock balance in rxrpc_do_sendmsg

On Mon, 22 Aug 2022 at 19:30, Hawkins Jiawei <[email protected]> wrote:
>
> On Mon, 22 Aug 2022 at 16:48, David Howells <[email protected]> wrote:
> >
> > Hawkins Jiawei <[email protected]> wrote:
> >
> > > - if (mutex_lock_interruptible(&call->user_mutex) < 0)
> > > + if (mutex_lock_interruptible(&call->user_mutex) < 0) {
> > > + mutex_lock(&call->user_mutex);
> >
> > Yeah, as Khalid points out that kind of makes the interruptible lock
> > pointless. Either rxrpc_send_data() needs to return a separate indication
> > that we returned without the lock held or it needs to always drop the lock on
> > error (at least for ERESTARTSYS/EINTR) which can be checked for higher up.
> Hi David,
>
> For second option, I think it may meet some difficulty, because we
> cannot figure out whether rxrpc_send_data() meets lock error.
> To be more specific, rxrpc_send_data() may still returns the number
> it has copied even rxrpc_send_data() meets lock error, if
> rxrpc_send_data() has successfully dealed some data.(Please correct me
> if I am wrong)
>
> So I think the first option seems better. I wonder if we can add an
> argument in rxrpc_send_data() as an indication you pointed out? Maybe:
>
> diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
> index 1d38e279e2ef..0801325a7c7f 100644
> --- a/net/rxrpc/sendmsg.c
> +++ b/net/rxrpc/sendmsg.c
> @@ -284,13 +284,18 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
>
> /*
> * send data through a socket
> + * @holding_mutex: rxrpc_send_data() will assign this pointer with True
> + * if functions still holds the call user access mutex when returned to caller.
> + * This argument can be NULL, which will effect nothing.
> + *
> * - must be called in process context
> * - The caller holds the call user access mutex, but not the socket lock.
> */
> static int rxrpc_send_data(struct rxrpc_sock *rx,
> struct rxrpc_call *call,
> struct msghdr *msg, size_t len,
> - rxrpc_notify_end_tx_t notify_end_tx)
> + rxrpc_notify_end_tx_t notify_end_tx,
> + bool *holding_mutex)
> {
> struct rxrpc_skb_priv *sp;
> struct sk_buff *skb;
> @@ -299,6 +304,9 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
> bool more;
> int ret, copied;
>
> + if (holding_mutex)
> + *holding_mutex = true;
> +
> timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
>
> /* this should be in poll */
> @@ -338,8 +346,11 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
> ret = rxrpc_wait_for_tx_window(rx, call,
> &timeo,
> msg->msg_flags & MSG_WAITALL);
> - if (ret < 0)
> + if (ret < 0) {
> + if (holding_mutex)
> + *holding_mutex = false;
> goto maybe_error;
> + }
> }
>
> /* Work out the maximum size of a packet. Assume that
> @@ -630,6 +641,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
> struct rxrpc_call *call;
> unsigned long now, j;
> int ret;
> + bool holding_user_mutex;
>
> struct rxrpc_send_params p = {
> .call.tx_total_len = -1,
> @@ -747,7 +759,9 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
> /* Reply phase not begun or not complete for service call. */
> ret = -EPROTO;
> } else {
> - ret = rxrpc_send_data(rx, call, msg, len, NULL);
> + ret = rxrpc_send_data(rx, call, msg, len, NULL, &holding_user_mutex);
> + if (!holding_user_mutex)
> + goto error_put;
> }
>
> out_put_unlock:
> @@ -796,7 +810,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
> case RXRPC_CALL_SERVER_ACK_REQUEST:
> case RXRPC_CALL_SERVER_SEND_REPLY:
> ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
> - notify_end_tx);
> + notify_end_tx, NULL);
> break;
> case RXRPC_CALL_COMPLETE:
> read_lock_bh(&call->state_lock);
>
> After applying the above patch, kernel still panic, but seems not the
> bad unlock balance bug before. yet I am not sure if this is the same bug you
> mentioned. Kernel output as below:
> [ 39.115966][ T6508] ====================================
> [ 39.116940][ T6508] WARNING: syz/6508 still has locks held!
> [ 39.117978][ T6508] 6.0.0-rc1-00066-g3b06a2755758-dirty #186 Not tainted
> [ 39.119353][ T6508] ------------------------------------
> [ 39.120321][ T6508] 1 lock held by syz/6508:
> [ 39.121122][ T6508] #0: ffff88801f472b20 (&call->user_mutex){....}-{3:3}0
> [ 39.123014][ T6508]
> [ 39.123014][ T6508] stack backtrace:
> [ 39.123925][ T6508] CPU: 0 PID: 6508 Comm: syz Not tainted 6.0.0-rc1-00066
> [ 39.125304][ T6508] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)4
> [ 39.125304][ T6508] Call Trace:
> [ 39.125304][ T6508] <TASK>
> [ 39.125304][ T6508] dump_stack_lvl+0x8e/0xdd
> [ 39.125304][ T6508] get_signal+0x1866/0x24d0
> [ 39.125304][ T6508] ? lock_acquire+0x172/0x310
> [ 39.125304][ T6508] ? exit_signals+0x7b0/0x7b0
> [ 39.125304][ T6508] arch_do_signal_or_restart+0x82/0x23f0
> [ 39.125304][ T6508] ? __sanitizer_cov_trace_pc+0x1a/0x40
> [ 39.125304][ T6508] ? __fget_light+0x20d/0x270
> [ 39.125304][ T6508] ? get_sigframe_size+0x10/0x10
> [ 39.125304][ T6508] ? __sanitizer_cov_trace_pc+0x1a/0x40
> [ 39.125304][ T6508] ? __sys_sendmsg+0x11a/0x1c0
> [ 39.125304][ T6508] ? __sys_sendmsg_sock+0x30/0x30
> [ 39.125304][ T6508] exit_to_user_mode_prepare+0x146/0x1b0
> [ 39.125304][ T6508] syscall_exit_to_user_mode+0x12/0x30
> [ 39.125304][ T6508] do_syscall_64+0x42/0xb0
> [ 39.125304][ T6508] entry_SYSCALL_64_after_hwframe+0x63/0xcd
> [ 39.125304][ T6508] RIP: 0033:0x44fbad
> [ 39.125304][ T6508] Code: c3 e8 97 29 00 00 0f 1f 80 00 00 00 00 f3 0f 1e8
> [ 39.125304][ T6508] RSP: 002b:00007f4b8ae22d48 EFLAGS: 00000246 ORIG_RAX:e
> [ 39.125304][ T6508] RAX: fffffffffffffffc RBX: 0000000000000000 RCX: 0000d
> [ 39.125304][ T6508] RDX: 0000000000000186 RSI: 0000000020000000 RDI: 00003
> [ 39.125304][ T6508] RBP: 00007f4b8ae22d80 R08: 00007f4b8ae23700 R09: 00000
> [ 39.125304][ T6508] R10: 00007f4b8ae23700 R11: 0000000000000246 R12: 0000e
> [ 39.125304][ T6508] R13: 00007ffe483304af R14: 00007ffe48330550 R15: 00000
> [ 39.125304][ T6508] </TASK>
> ====================================
>
> I will make a deeper look and try to patch it.
The cause is that patch assigns False to pointer holding_mutex in
rxrpc_send_data() by only juding whether the return value from
rxrpc_wait_for_tx_window() is less than 0, yet this is not correct.

So we should just only assign False to holding_mutex when
unlocking the call->user_mutex in rxrpc_wait_for_tx_window_intr().
Maybe:
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 1d38e279e2ef..1050cc2f5c89 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -33,11 +33,16 @@ static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win)
}

/*
+ * @holding_mutex: An indication whether caller can still holds
+ * the call->user_mutex when returned to caller.
+ * This argument can be NULL, which will effect nothing.
+ *
* Wait for space to appear in the Tx queue or a signal to occur.
*/
static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
struct rxrpc_call *call,
- long *timeo)
+ long *timeo,
+ bool *holding_mutex)
{
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
@@ -53,8 +58,11 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
trace_rxrpc_transmit(call, rxrpc_transmit_wait);
mutex_unlock(&call->user_mutex);
*timeo = schedule_timeout(*timeo);
- if (mutex_lock_interruptible(&call->user_mutex) < 0)
+ if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+ if (holding_mutex)
+ *holding_mutex = false;
return sock_intr_errno(*timeo);
+ }
}
}

@@ -121,13 +129,18 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
}

/*
+ * @holding_mutex: An indication whether caller can still holds
+ * the call->user_mutex when returned to caller.
+ * This argument can be NULL, which will effect nothing.
+ *
* wait for space to appear in the transmit/ACK window
* - caller holds the socket locked
*/
static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
struct rxrpc_call *call,
long *timeo,
- bool waitall)
+ bool waitall,
+ bool *holding_mutex)
{
DECLARE_WAITQUEUE(myself, current);
int ret;
@@ -142,7 +155,7 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
if (waitall)
ret = rxrpc_wait_for_tx_window_waitall(rx, call);
else
- ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo);
+ ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo, holding_mutex);
break;
case RXRPC_PREINTERRUPTIBLE:
case RXRPC_UNINTERRUPTIBLE:
@@ -284,13 +297,19 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,

/*
* send data through a socket
+ *
+ * @holding_mutex: An indication whether caller can still holds
+ * the call->user_mutex when returned to caller.
+ * This argument can be NULL, which will effect nothing.
+ *
* - must be called in process context
* - The caller holds the call user access mutex, but not the socket lock.
*/
static int rxrpc_send_data(struct rxrpc_sock *rx,
struct rxrpc_call *call,
struct msghdr *msg, size_t len,
- rxrpc_notify_end_tx_t notify_end_tx)
+ rxrpc_notify_end_tx_t notify_end_tx,
+ bool *holding_mutex)
{
struct rxrpc_skb_priv *sp;
struct sk_buff *skb;
@@ -299,6 +318,13 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
bool more;
int ret, copied;

+ /*
+ * The caller holds the call->user_mutex when calls
+ * rxrpc_send_data(), so initial it with True
+ */
+ if (holding_mutex)
+ *holding_mutex = true;
+
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);

/* this should be in poll */
@@ -337,7 +363,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
goto maybe_error;
ret = rxrpc_wait_for_tx_window(rx, call,
&timeo,
- msg->msg_flags & MSG_WAITALL);
+ msg->msg_flags & MSG_WAITALL,
+ holding_mutex);
if (ret < 0)
goto maybe_error;
}
@@ -630,6 +657,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
struct rxrpc_call *call;
unsigned long now, j;
int ret;
+ bool holding_user_mutex;

struct rxrpc_send_params p = {
.call.tx_total_len = -1,
@@ -747,7 +775,9 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
/* Reply phase not begun or not complete for service call. */
ret = -EPROTO;
} else {
- ret = rxrpc_send_data(rx, call, msg, len, NULL);
+ ret = rxrpc_send_data(rx, call, msg, len, NULL, &holding_user_mutex);
+ if (!holding_user_mutex)
+ goto error_put;
}

out_put_unlock:
@@ -796,7 +826,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
case RXRPC_CALL_SERVER_ACK_REQUEST:
case RXRPC_CALL_SERVER_SEND_REPLY:
ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
- notify_end_tx);
+ notify_end_tx, NULL);
break;
case RXRPC_CALL_COMPLETE:
read_lock_bh(&call->state_lock);

Reproducer did not trigger any issue locally. I will propose a test by
syzbot. Maybe I will send v2 patch if patch can pass the syzbot tesing.

2022-08-22 14:04:51

[permalink] [raw]

Subject: Re: [PATCH] rxrpc: fix bad unlock balance in rxrpc_do_sendmsg

>
> /*
> + * @holding_mutex: An indication whether caller can still holds
> + * the call->user_mutex when returned to caller.

Maybe we can use mutex_is_locked instead of using the holding_mutex
parameter to get whether call->user_mutex is still held.

https://www.kernel.org/doc/htmldocs/kernel-locking/API-mutex-is-locked.html
> + * This argument can be NULL, which will effect nothing.
> + *
> * Wait for space to appear in the Tx queue or a signal to occur.
> */
>

thanks,
-- Khalid Masum

2022-08-22 14:16:40

by Dan Carpenter

[permalink] [raw]

Subject: Re: [PATCH] rxrpc: fix bad unlock balance in rxrpc_do_sendmsg

On Mon, Aug 22, 2022 at 09:04:14PM +0800, Hawkins Jiawei wrote:
> static int rxrpc_send_data(struct rxrpc_sock *rx,
> struct rxrpc_call *call,
> struct msghdr *msg, size_t len,
> - rxrpc_notify_end_tx_t notify_end_tx)
> + rxrpc_notify_end_tx_t notify_end_tx,
> + bool *holding_mutex)
> {
> struct rxrpc_skb_priv *sp;
> struct sk_buff *skb;
> @@ -299,6 +318,13 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
> bool more;
> int ret, copied;
>
> + /*
> + * The caller holds the call->user_mutex when calls
> + * rxrpc_send_data(), so initial it with True
> + */
> + if (holding_mutex)
> + *holding_mutex = true;

Don't make this optional. All callers have to care if we dropped the
lock.

regards,
dan carpenter

2022-08-22 14:29:46

by Dan Carpenter

[permalink] [raw]

Subject: Re: [PATCH] rxrpc: fix bad unlock balance in rxrpc_do_sendmsg

On Mon, Aug 22, 2022 at 07:55:27PM +0600, Khalid Masum wrote:
> >
> > /*
> > + * @holding_mutex: An indication whether caller can still holds
> > + * the call->user_mutex when returned to caller.
>
> Maybe we can use mutex_is_locked instead of using the holding_mutex
> parameter to get whether call->user_mutex is still held.

That doesn't work. What if there is contention for the lock and someone
else took it. Locks under contention are sort of the whole point of
locking so it's highly likely.

I do kind of feel the code has a layering violation. I'd prefer instead
of "I am setting this variable to true to reflect the caller blah blah",
the variable could just be set in the caller. I'd probably flip it
around and call it "dropped_lock" instead of "holding_lock".

bool dropped_lock = false;

...

if (!dropped_lock)
mutex_unlock();

That way only the callers and the function which drops the lock need to
be involved with setting the variable.

regards,
dan carpenter

2022-08-22 15:03:34

[permalink] [raw]

Subject: Re: [syzbot] WARNING: bad unlock balance in rxrpc_do_sendmsg

#syzbot git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git master

rxrpc: Fix locking in rxrpc's sendmsg

Fix three bugs in the rxrpc's sendmsg implementation:

(1) rxrpc_new_client_call() should release the socket lock when returning
an error from rxrpc_get_call_slot().

(2) rxrpc_wait_for_tx_window_intr() will return without the call mutex
held in the event that we're interrupted by a signal whilst waiting
for tx space on the socket or relocking the call mutex afterwards.

Fix this by: (a) moving the unlock/lock of the call mutex up to
rxrpc_send_data() such that the lock is not held around all of
rxrpc_wait_for_tx_window*() and (b) indicating to higher callers
whether we're return with the lock dropped. Note that this means
recvmsg() will not block on this call whilst we're waiting.

(3) After dropping and regaining the call mutex, rxrpc_send_data() needs
to go and recheck the state of the tx_pending buffer and the
tx_total_len check in case we raced with another sendmsg() on the same
call.

Thinking on this some more, it might make sense to have different locks for
sendmsg() and recvmsg(). There's probably no need to make recvmsg() wait
for sendmsg(). It does mean that recvmsg() can return MSG_EOR indicating
that a call is dead before a sendmsg() to that call returns - but that can
currently happen anyway.

Without fix (2), something like the following can be induced:

WARNING: bad unlock balance detected!
5.16.0-rc6-syzkaller #0 Not tainted
-------------------------------------
syz-executor011/3597 is trying to release lock (&call->user_mutex) at:
[<ffffffff885163a3>] rxrpc_do_sendmsg+0xc13/0x1350 net/rxrpc/sendmsg.c:748
but there are no more locks to release!

other info that might help us debug this:
no locks held by syz-executor011/3597.
...
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
print_unlock_imbalance_bug include/trace/events/lock.h:58 [inline]
__lock_release kernel/locking/lockdep.c:5306 [inline]
lock_release.cold+0x49/0x4e kernel/locking/lockdep.c:5657
__mutex_unlock_slowpath+0x99/0x5e0 kernel/locking/mutex.c:900
rxrpc_do_sendmsg+0xc13/0x1350 net/rxrpc/sendmsg.c:748
rxrpc_sendmsg+0x420/0x630 net/rxrpc/af_rxrpc.c:561
sock_sendmsg_nosec net/socket.c:704 [inline]
sock_sendmsg+0xcf/0x120 net/socket.c:724
____sys_sendmsg+0x6e8/0x810 net/socket.c:2409
___sys_sendmsg+0xf3/0x170 net/socket.c:2463
__sys_sendmsg+0xe5/0x1b0 net/socket.c:2492
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae

[Thanks to Hawkins Jiawei and Khalid Masum for their attempts to fix this]

Fixes: bc5e3a546d55 ("rxrpc: Use MSG_WAITALL to tell sendmsg() to temporarily ignore signals")
Reported-by: [email protected]
cc: Hawkins Jiawei <[email protected]>
cc: Khalid Masum <[email protected]>
cc: Dan Carpenter <[email protected]>
cc: Marc Dionne <[email protected]>
cc: [email protected]
---
net/rxrpc/call_object.c | 4 +-
net/rxrpc/sendmsg.c | 92 ++++++++++++++++++++++++++++--------------------
2 files changed, 57 insertions(+), 39 deletions(-)

diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 84d0a4109645..6401cdf7a624 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -285,8 +285,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
_enter("%p,%lx", rx, p->user_call_ID);

limiter = rxrpc_get_call_slot(p, gfp);
- if (!limiter)
+ if (!limiter) {
+ release_sock(&rx->sk);
return ERR_PTR(-ERESTARTSYS);
+ }

call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id);
if (IS_ERR(call)) {
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 1d38e279e2ef..3c3a626459de 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -51,10 +51,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
return sock_intr_errno(*timeo);

trace_rxrpc_transmit(call, rxrpc_transmit_wait);
- mutex_unlock(&call->user_mutex);
*timeo = schedule_timeout(*timeo);
- if (mutex_lock_interruptible(&call->user_mutex) < 0)
- return sock_intr_errno(*timeo);
}
}

@@ -290,37 +287,48 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
static int rxrpc_send_data(struct rxrpc_sock *rx,
struct rxrpc_call *call,
struct msghdr *msg, size_t len,
- rxrpc_notify_end_tx_t notify_end_tx)
+ rxrpc_notify_end_tx_t notify_end_tx,
+ bool *_dropped_lock)
{
struct rxrpc_skb_priv *sp;
struct sk_buff *skb;
struct sock *sk = &rx->sk;
+ enum rxrpc_call_state state;
long timeo;
- bool more;
- int ret, copied;
+ bool more = msg->msg_flags & MSG_MORE;
+ int ret, copied = 0;

timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);

/* this should be in poll */
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);

+reload:
+ ret = -EPIPE;
if (sk->sk_shutdown & SEND_SHUTDOWN)
- return -EPIPE;
-
- more = msg->msg_flags & MSG_MORE;
-
+ goto maybe_error;
+ state = READ_ONCE(call->state);
+ ret = -ESHUTDOWN;
+ if (state >= RXRPC_CALL_COMPLETE)
+ goto maybe_error;
+ ret = -EPROTO;
+ if (state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+ state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ state != RXRPC_CALL_SERVER_SEND_REPLY)
+ goto maybe_error;
+
+ ret = -EMSGSIZE;
if (call->tx_total_len != -1) {
- if (len > call->tx_total_len)
- return -EMSGSIZE;
- if (!more && len != call->tx_total_len)
- return -EMSGSIZE;
+ if (len - copied > call->tx_total_len)
+ goto maybe_error;
+ if (!more && len - copied != call->tx_total_len)
+ goto maybe_error;
}

skb = call->tx_pending;
call->tx_pending = NULL;
rxrpc_see_skb(skb, rxrpc_skb_seen);

- copied = 0;
do {
/* Check to see if there's a ping ACK to reply to. */
if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
@@ -331,16 +339,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,

_debug("alloc");

- if (!rxrpc_check_tx_space(call, NULL)) {
- ret = -EAGAIN;
- if (msg->msg_flags & MSG_DONTWAIT)
- goto maybe_error;
- ret = rxrpc_wait_for_tx_window(rx, call,
- &timeo,
- msg->msg_flags & MSG_WAITALL);
- if (ret < 0)
- goto maybe_error;
- }
+ if (!rxrpc_check_tx_space(call, NULL))
+ goto wait_for_space;

/* Work out the maximum size of a packet. Assume that
* the security header is going to be in the padded
@@ -468,6 +468,27 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
efault:
ret = -EFAULT;
goto out;
+
+wait_for_space:
+ ret = -EAGAIN;
+ if (msg->msg_flags & MSG_DONTWAIT)
+ goto maybe_error;
+ mutex_unlock(&call->user_mutex);
+ *_dropped_lock = true;
+ ret = rxrpc_wait_for_tx_window(rx, call, &timeo,
+ msg->msg_flags & MSG_WAITALL);
+ if (ret < 0)
+ goto maybe_error;
+ if (call->interruptibility == RXRPC_INTERRUPTIBLE) {
+ if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+ ret = sock_intr_errno(timeo);
+ goto maybe_error;
+ }
+ } else {
+ mutex_lock(&call->user_mutex);
+ }
+ *_dropped_lock = false;
+ goto reload;
}

/*
@@ -629,6 +650,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
enum rxrpc_call_state state;
struct rxrpc_call *call;
unsigned long now, j;
+ bool dropped_lock = false;
int ret;

struct rxrpc_send_params p = {
@@ -737,21 +759,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
ret = rxrpc_send_abort_packet(call);
} else if (p.command != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
- } else if (rxrpc_is_client_call(call) &&
- state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
- /* request phase complete for this client call */
- ret = -EPROTO;
- } else if (rxrpc_is_service_call(call) &&
- state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- state != RXRPC_CALL_SERVER_SEND_REPLY) {
- /* Reply phase not begun or not complete for service call. */
- ret = -EPROTO;
} else {
- ret = rxrpc_send_data(rx, call, msg, len, NULL);
+ ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock);
}

out_put_unlock:
- mutex_unlock(&call->user_mutex);
+ if (!dropped_lock)
+ mutex_unlock(&call->user_mutex);
error_put:
rxrpc_put_call(call, rxrpc_call_put);
_leave(" = %d", ret);
@@ -779,6 +793,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
struct msghdr *msg, size_t len,
rxrpc_notify_end_tx_t notify_end_tx)
{
+ bool dropped_lock = false;
int ret;

_enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
@@ -796,7 +811,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
case RXRPC_CALL_SERVER_ACK_REQUEST:
case RXRPC_CALL_SERVER_SEND_REPLY:
ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
- notify_end_tx);
+ notify_end_tx, &dropped_lock);
break;
case RXRPC_CALL_COMPLETE:
read_lock_bh(&call->state_lock);
@@ -810,7 +825,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
break;
}

- mutex_unlock(&call->user_mutex);
+ if (!dropped_lock)
+ mutex_unlock(&call->user_mutex);
_leave(" = %d", ret);
return ret;
}

2022-08-22 15:44:53

[permalink] [raw]

Subject: Re: [PATCH] rxrpc: fix bad unlock balance in rxrpc_do_sendmsg

On Mon, 22 Aug 2022 at 22:06, Dan Carpenter <[email protected]> wrote:
>
> On Mon, Aug 22, 2022 at 07:55:27PM +0600, Khalid Masum wrote:
> > >
> > > /*
> > > + * @holding_mutex: An indication whether caller can still holds
> > > + * the call->user_mutex when returned to caller.
> >
> > Maybe we can use mutex_is_locked instead of using the holding_mutex
> > parameter to get whether call->user_mutex is still held.
>
> That doesn't work. What if there is contention for the lock and someone
> else took it. Locks under contention are sort of the whole point of
> locking so it's highly likely.
I trid this before, it doesn't work as Dan points out. I think
it seems that mutex_is_locked() only checks whether there is a task
holding the mutex, but do not check whether it is current task holding
mutex. I also tried lockdep_is_held(), lockdep_is_held() seems can detect
call->user_mutex is still held accurately, although this cannot be the patch.

2022-08-22 16:04:47

[permalink] [raw]

Subject: Re: [syzbot] WARNING: bad unlock balance in rxrpc_do_sendmsg

#syz test: git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git master

rxrpc: Fix locking in rxrpc's sendmsg

Fix three bugs in the rxrpc's sendmsg implementation:

(1) rxrpc_new_client_call() should release the socket lock when returning
an error from rxrpc_get_call_slot().

(2) rxrpc_wait_for_tx_window_intr() will return without the call mutex
held in the event that we're interrupted by a signal whilst waiting
for tx space on the socket or relocking the call mutex afterwards.

Fix this by: (a) moving the unlock/lock of the call mutex up to
rxrpc_send_data() such that the lock is not held around all of
rxrpc_wait_for_tx_window*() and (b) indicating to higher callers
whether we're return with the lock dropped. Note that this means
recvmsg() will not block on this call whilst we're waiting.

(3) After dropping and regaining the call mutex, rxrpc_send_data() needs
to go and recheck the state of the tx_pending buffer and the
tx_total_len check in case we raced with another sendmsg() on the same
call.

Thinking on this some more, it might make sense to have different locks for
sendmsg() and recvmsg(). There's probably no need to make recvmsg() wait
for sendmsg(). It does mean that recvmsg() can return MSG_EOR indicating
that a call is dead before a sendmsg() to that call returns - but that can
currently happen anyway.

Without fix (2), something like the following can be induced:

WARNING: bad unlock balance detected!
5.16.0-rc6-syzkaller #0 Not tainted
-------------------------------------
syz-executor011/3597 is trying to release lock (&call->user_mutex) at:
[<ffffffff885163a3>] rxrpc_do_sendmsg+0xc13/0x1350 net/rxrpc/sendmsg.c:748
but there are no more locks to release!

other info that might help us debug this:
no locks held by syz-executor011/3597.
...
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
print_unlock_imbalance_bug include/trace/events/lock.h:58 [inline]
__lock_release kernel/locking/lockdep.c:5306 [inline]
lock_release.cold+0x49/0x4e kernel/locking/lockdep.c:5657
__mutex_unlock_slowpath+0x99/0x5e0 kernel/locking/mutex.c:900
rxrpc_do_sendmsg+0xc13/0x1350 net/rxrpc/sendmsg.c:748
rxrpc_sendmsg+0x420/0x630 net/rxrpc/af_rxrpc.c:561
sock_sendmsg_nosec net/socket.c:704 [inline]
sock_sendmsg+0xcf/0x120 net/socket.c:724
____sys_sendmsg+0x6e8/0x810 net/socket.c:2409
___sys_sendmsg+0xf3/0x170 net/socket.c:2463
__sys_sendmsg+0xe5/0x1b0 net/socket.c:2492
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae

[Thanks to Hawkins Jiawei and Khalid Masum for their attempts to fix this]

Fixes: bc5e3a546d55 ("rxrpc: Use MSG_WAITALL to tell sendmsg() to temporarily ignore signals")
Reported-by: [email protected]
cc: Hawkins Jiawei <[email protected]>
cc: Khalid Masum <[email protected]>
cc: Dan Carpenter <[email protected]>
cc: Marc Dionne <[email protected]>
cc: [email protected]
---
net/rxrpc/call_object.c | 4 +-
net/rxrpc/sendmsg.c | 92 ++++++++++++++++++++++++++++--------------------
2 files changed, 57 insertions(+), 39 deletions(-)

diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 84d0a4109645..6401cdf7a624 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -285,8 +285,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
_enter("%p,%lx", rx, p->user_call_ID);

limiter = rxrpc_get_call_slot(p, gfp);
- if (!limiter)
+ if (!limiter) {
+ release_sock(&rx->sk);
return ERR_PTR(-ERESTARTSYS);
+ }

call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id);
if (IS_ERR(call)) {
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 1d38e279e2ef..3c3a626459de 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -51,10 +51,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
return sock_intr_errno(*timeo);

trace_rxrpc_transmit(call, rxrpc_transmit_wait);
- mutex_unlock(&call->user_mutex);
*timeo = schedule_timeout(*timeo);
- if (mutex_lock_interruptible(&call->user_mutex) < 0)
- return sock_intr_errno(*timeo);
}
}

@@ -290,37 +287,48 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
static int rxrpc_send_data(struct rxrpc_sock *rx,
struct rxrpc_call *call,
struct msghdr *msg, size_t len,
- rxrpc_notify_end_tx_t notify_end_tx)
+ rxrpc_notify_end_tx_t notify_end_tx,
+ bool *_dropped_lock)
{
struct rxrpc_skb_priv *sp;
struct sk_buff *skb;
struct sock *sk = &rx->sk;
+ enum rxrpc_call_state state;
long timeo;
- bool more;
- int ret, copied;
+ bool more = msg->msg_flags & MSG_MORE;
+ int ret, copied = 0;

timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);

/* this should be in poll */
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);

+reload:
+ ret = -EPIPE;
if (sk->sk_shutdown & SEND_SHUTDOWN)
- return -EPIPE;
-
- more = msg->msg_flags & MSG_MORE;
-
+ goto maybe_error;
+ state = READ_ONCE(call->state);
+ ret = -ESHUTDOWN;
+ if (state >= RXRPC_CALL_COMPLETE)
+ goto maybe_error;
+ ret = -EPROTO;
+ if (state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+ state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ state != RXRPC_CALL_SERVER_SEND_REPLY)
+ goto maybe_error;
+
+ ret = -EMSGSIZE;
if (call->tx_total_len != -1) {
- if (len > call->tx_total_len)
- return -EMSGSIZE;
- if (!more && len != call->tx_total_len)
- return -EMSGSIZE;
+ if (len - copied > call->tx_total_len)
+ goto maybe_error;
+ if (!more && len - copied != call->tx_total_len)
+ goto maybe_error;
}

skb = call->tx_pending;
call->tx_pending = NULL;
rxrpc_see_skb(skb, rxrpc_skb_seen);

- copied = 0;
do {
/* Check to see if there's a ping ACK to reply to. */
if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
@@ -331,16 +339,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,

_debug("alloc");

- if (!rxrpc_check_tx_space(call, NULL)) {
- ret = -EAGAIN;
- if (msg->msg_flags & MSG_DONTWAIT)
- goto maybe_error;
- ret = rxrpc_wait_for_tx_window(rx, call,
- &timeo,
- msg->msg_flags & MSG_WAITALL);
- if (ret < 0)
- goto maybe_error;
- }
+ if (!rxrpc_check_tx_space(call, NULL))
+ goto wait_for_space;

/* Work out the maximum size of a packet. Assume that
* the security header is going to be in the padded
@@ -468,6 +468,27 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
efault:
ret = -EFAULT;
goto out;
+
+wait_for_space:
+ ret = -EAGAIN;
+ if (msg->msg_flags & MSG_DONTWAIT)
+ goto maybe_error;
+ mutex_unlock(&call->user_mutex);
+ *_dropped_lock = true;
+ ret = rxrpc_wait_for_tx_window(rx, call, &timeo,
+ msg->msg_flags & MSG_WAITALL);
+ if (ret < 0)
+ goto maybe_error;
+ if (call->interruptibility == RXRPC_INTERRUPTIBLE) {
+ if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+ ret = sock_intr_errno(timeo);
+ goto maybe_error;
+ }
+ } else {
+ mutex_lock(&call->user_mutex);
+ }
+ *_dropped_lock = false;
+ goto reload;
}

/*
@@ -629,6 +650,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
enum rxrpc_call_state state;
struct rxrpc_call *call;
unsigned long now, j;
+ bool dropped_lock = false;
int ret;

struct rxrpc_send_params p = {
@@ -737,21 +759,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
ret = rxrpc_send_abort_packet(call);
} else if (p.command != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
- } else if (rxrpc_is_client_call(call) &&
- state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
- /* request phase complete for this client call */
- ret = -EPROTO;
- } else if (rxrpc_is_service_call(call) &&
- state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- state != RXRPC_CALL_SERVER_SEND_REPLY) {
- /* Reply phase not begun or not complete for service call. */
- ret = -EPROTO;
} else {
- ret = rxrpc_send_data(rx, call, msg, len, NULL);
+ ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock);
}

out_put_unlock:
- mutex_unlock(&call->user_mutex);
+ if (!dropped_lock)
+ mutex_unlock(&call->user_mutex);
error_put:
rxrpc_put_call(call, rxrpc_call_put);
_leave(" = %d", ret);
@@ -779,6 +793,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
struct msghdr *msg, size_t len,
rxrpc_notify_end_tx_t notify_end_tx)
{
+ bool dropped_lock = false;
int ret;

_enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
@@ -796,7 +811,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
case RXRPC_CALL_SERVER_ACK_REQUEST:
case RXRPC_CALL_SERVER_SEND_REPLY:
ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
- notify_end_tx);
+ notify_end_tx, &dropped_lock);
break;
case RXRPC_CALL_COMPLETE:
read_lock_bh(&call->state_lock);
@@ -810,7 +825,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
break;
}

- mutex_unlock(&call->user_mutex);
+ if (!dropped_lock)
+ mutex_unlock(&call->user_mutex);
_leave(" = %d", ret);
return ret;
}

2022-08-22 16:45:39