Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755062AbbKXVaK (ORCPT ); Tue, 24 Nov 2015 16:30:10 -0500 Received: from prod-mail-xrelay05.akamai.com ([23.79.238.179]:61567 "EHLO prod-mail-xrelay05.akamai.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755032AbbKXVaE (ORCPT ); Tue, 24 Nov 2015 16:30:04 -0500 Subject: Re: use-after-free in sock_wake_async To: Eric Dumazet , Dmitry Vyukov , Benjamin LaHaise References: Cc: "David S. Miller" , Hannes Frederic Sowa , Al Viro , David Howells , Ying Xue , "Eric W. Biederman" , Rainer Weikusat , netdev , LKML , syzkaller , Kostya Serebryany , Alexander Potapenko , Sasha Levin From: Jason Baron X-Enigmail-Draft-Status: N1110 Message-ID: <5654D6D9.1050108@akamai.com> Date: Tue, 24 Nov 2015 16:30:01 -0500 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.2.0 MIME-Version: 1.0 In-Reply-To: Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 9335 Lines: 253 On 11/24/2015 10:21 AM, Eric Dumazet wrote: > On Tue, Nov 24, 2015 at 6:18 AM, Dmitry Vyukov wrote: >> Hello, >> >> The following program triggers use-after-free in sock_wake_async: >> >> // autogenerated by syzkaller (http://github.com/google/syzkaller) >> #include >> #include >> #include >> #include >> >> long r2 = -1; >> long r3 = -1; >> long r7 = -1; >> >> void *thr0(void *arg) >> { >> syscall(SYS_splice, r2, 0x0ul, r7, 0x0ul, 0x4ul, 0x8ul); >> return 0; >> } >> >> void *thr1(void *arg) >> { >> syscall(SYS_close, r2, 0, 0, 0, 0, 0); >> return 0; >> } >> >> void *thr2(void *arg) >> { >> syscall(SYS_write, r3, 0x20003000ul, 0xe7ul, 0, 0, 0); >> return 0; >> } >> >> int main() >> { >> long r0 = syscall(SYS_mmap, 0x20000000ul, 0x10000ul, 0x3ul, >> 0x32ul, 0xfffffffffffffffful, 0x0ul); >> long r1 = syscall(SYS_socketpair, 0x1ul, 0x1ul, 0x0ul, >> 0x20000000ul, 0, 0); >> r2 = *(uint32_t*)0x20000000; >> r3 = *(uint32_t*)0x20000004; >> >> *(uint64_t*)0x20001000 = 0x4; >> long r5 = syscall(SYS_ioctl, r2, 0x5452ul, 0x20001000ul, 0, 0, 0); >> >> long r6 = syscall(SYS_pipe2, 0x20002000ul, 0x80800ul, 0, 0, 0, 0); >> r7 = *(uint32_t*)0x20002004; >> >> pthread_t th[3]; >> pthread_create(&th[0], 0, thr0, 0); >> pthread_create(&th[1], 0, thr1, 0); >> pthread_create(&th[2], 0, thr2, 0); >> pthread_join(th[0], 0); >> pthread_join(th[1], 0); >> pthread_join(th[2], 0); >> return 0; >> } >> >> >> The use-after-free fires after a minute of running it in a tight >> parallel loop. I use the stress utility for this: >> >> $ go get golang.org/x/tools/cmd/stress >> $ stress -p 128 -failure "ignore" ./a.out >> >> >> ================================================================== >> BUG: KASAN: use-after-free in sock_wake_async+0x325/0x340 at addr >> ffff880061d1ad10 >> Read of size 8 by task a.out/23178 >> ============================================================================= >> BUG sock_inode_cache (Not tainted): kasan: bad access detected >> ----------------------------------------------------------------------------- >> >> Disabling lock debugging due to kernel taint >> INFO: Allocated in sock_alloc_inode+0x1d/0x220 age=0 cpu=2 pid=23183 >> [< none >] kmem_cache_alloc+0x1a6/0x1f0 mm/slub.c:2514 >> [< none >] sock_alloc_inode+0x1d/0x220 net/socket.c:250 >> [< none >] alloc_inode+0x61/0x180 fs/inode.c:198 >> [< none >] new_inode_pseudo+0x17/0xe0 fs/inode.c:878 >> [< none >] sock_alloc+0x3d/0x260 net/socket.c:540 >> [< none >] __sock_create+0xa7/0x620 net/socket.c:1133 >> [< inline >] sock_create net/socket.c:1209 >> [< inline >] SYSC_socketpair net/socket.c:1281 >> [< none >] SyS_socketpair+0x112/0x4e0 net/socket.c:1260 >> [< none >] entry_SYSCALL_64_fastpath+0x16/0x7a >> arch/x86/entry/entry_64.S:185 >> >> INFO: Freed in sock_destroy_inode+0x56/0x70 age=0 cpu=2 pid=23185 >> [< none >] kmem_cache_free+0x24e/0x260 mm/slub.c:2742 >> [< none >] sock_destroy_inode+0x56/0x70 net/socket.c:279 >> [< none >] destroy_inode+0xc4/0x120 fs/inode.c:255 >> [< none >] evict+0x36b/0x580 fs/inode.c:559 >> [< inline >] iput_final fs/inode.c:1477 >> [< none >] iput+0x4a0/0x790 fs/inode.c:1504 >> [< inline >] dentry_iput fs/dcache.c:358 >> [< none >] __dentry_kill+0x4fe/0x700 fs/dcache.c:543 >> [< inline >] dentry_kill fs/dcache.c:587 >> [< none >] dput+0x6ab/0x7a0 fs/dcache.c:796 >> [< none >] __fput+0x3fb/0x6e0 fs/file_table.c:226 >> [< none >] ____fput+0x15/0x20 fs/file_table.c:244 >> [< none >] task_work_run+0x163/0x1f0 kernel/task_work.c:115 >> (discriminator 1) >> [< inline >] tracehook_notify_resume include/linux/tracehook.h:191 >> [< none >] exit_to_usermode_loop+0x180/0x1a0 >> arch/x86/entry/common.c:251 >> [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:282 >> [< none >] syscall_return_slowpath+0x19f/0x210 >> arch/x86/entry/common.c:344 >> [< none >] int_ret_from_sys_call+0x25/0x9f >> arch/x86/entry/entry_64.S:281 >> >> INFO: Slab 0xffffea0001874600 objects=25 used=2 fp=0xffff880061d1c100 >> flags=0x500000000004080 >> INFO: Object 0xffff880061d1ad00 @offset=11520 fp=0xffff880061d1a300 >> CPU: 3 PID: 23178 Comm: a.out Tainted: G B 4.4.0-rc1+ #84 >> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 >> 00000000ffffffff ffff880061baf8f0 ffffffff825d3336 ffff88003e0dc280 >> ffff880061d1ad00 ffff880061d18000 ffff880061baf920 ffffffff81618784 >> ffff88003e0dc280 ffffea0001874600 ffff880061d1ad00 00000000000000e7 >> >> Call Trace: >> [] __asan_report_load8_noabort+0x3e/0x40 >> mm/kasan/report.c:280 >> [< inline >] __read_once_size include/linux/compiler.h:218 >> [] sock_wake_async+0x325/0x340 net/socket.c:1068 >> [< inline >] sk_wake_async include/net/sock.h:2011 >> [] sock_def_readable+0x1e4/0x290 net/core/sock.c:2312 >> [] unix_stream_sendmsg+0x4db/0x930 net/unix/af_unix.c:1864 >> [< inline >] sock_sendmsg_nosec net/socket.c:610 >> [] sock_sendmsg+0xca/0x110 net/socket.c:620 >> [] sock_write_iter+0x216/0x3a0 net/socket.c:819 >> [< inline >] new_sync_write fs/read_write.c:478 >> [] __vfs_write+0x300/0x470 fs/read_write.c:491 >> [] vfs_write+0x16e/0x490 fs/read_write.c:538 >> [< inline >] SYSC_write fs/read_write.c:585 >> [] SyS_write+0x111/0x220 fs/read_write.c:577 >> [] entry_SYSCALL_64_fastpath+0x16/0x7a >> arch/x86/entry/entry_64.S:185 >> ================================================================== >> >> >> I am on commit 8005c49d9aea74d382f474ce11afbbc7d7130bec (Nov 15) but >> also merged in 7d267278a9ece963d77eefec61630223fce08c6c (unix: avoid >> use-after-free in ep_remove_wait_queue) from net repo. >> >> Thanks > > Looks like commit 830a1e5c212fb3fdc83b66359c780c3b3a294897 should be reverted ? > > commit 830a1e5c212fb3fdc83b66359c780c3b3a294897 > Author: Benjamin LaHaise > Date: Tue Dec 13 23:22:32 2005 -0800 > > [AF_UNIX]: Remove superfluous reference counting in unix_stream_sendmsg > > AF_UNIX stream socket performance on P4 CPUs tends to suffer due to a > lot of pipeline flushes from atomic operations. The patch below > removes the sock_hold() and sock_put() in unix_stream_sendmsg(). This > should be safe as the socket still holds a reference to its peer which > is only released after the file descriptor's final user invokes > unix_release_sock(). The only consideration is that we must add a > memory barrier before setting the peer initially. > > Signed-off-by: Benjamin LaHaise > Signed-off-by: David S. Miller > -- So looking at this trace I think its the other->sk_socket that gets freed and then we call sk_wake_async() on it. We could I think grab the socket reference there with unix_state_lock(), since that is held by unix_release_sock() before the final iput() is called. So something like below might work (compile tested only): diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index aaa0b58..2b014f1 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -196,6 +196,19 @@ static inline int unix_recvq_full(struct sock const *sk) return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; } +struct socket *unix_peer_get_socket(struct sock *s) +{ + struct socket *peer; + + unix_state_lock(s); + peer = s->sk_socket; + if (peer) + __iget(SOCK_INODE(s->sk_socket)); + unix_state_unlock(s); + + return peer; +} + struct sock *unix_peer_get(struct sock *s) { struct sock *peer; @@ -1639,6 +1652,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, { struct sock *sk = sock->sk; struct sock *other = NULL; + struct socket *other_socket = NULL; int err, size; struct sk_buff *skb; int sent = 0; @@ -1662,7 +1676,10 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, } else { err = -ENOTCONN; other = unix_peer(sk); - if (!other) + if (other) + other_socket = unix_peer_get_socket(other); + + if (!other_socket) goto out_err; } @@ -1721,6 +1738,9 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, sent += size; } + if (other_socket) + iput(SOCK_INODE(other_socket)); + scm_destroy(&scm); return sent; @@ -1733,6 +1753,8 @@ pipe_err: send_sig(SIGPIPE, current, 0); err = -EPIPE; out_err: + if (other_socket) + iput(SOCK_INODE(other_socket)); scm_destroy(&scm); return sent ? : err; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/