Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S968436AbXFHIDm (ORCPT ); Fri, 8 Jun 2007 04:03:42 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S968113AbXFHH3n (ORCPT ); Fri, 8 Jun 2007 03:29:43 -0400 Received: from 216-99-217-87.dsl.aracnet.com ([216.99.217.87]:60666 "EHLO sous-sol.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S968110AbXFHH3l (ORCPT ); Fri, 8 Jun 2007 03:29:41 -0400 Message-Id: <20070608072225.346346000@sous-sol.org> References: <20070608072127.352723000@sous-sol.org> User-Agent: quilt/0.46-1 Date: Fri, 08 Jun 2007 00:22:07 -0700 From: Chris Wright To: linux-kernel@vger.kernel.org, stable@kernel.org Cc: Justin Forbes , Zwane Mwaikambo , "Theodore Ts'o" , Randy Dunlap , Dave Jones , Chuck Wolber , Chris Wedgwood , Michael Krufky , Chuck Ebbert , Domenico Andreoli , torvalds@linux-foundation.org, akpm@linux-foundation.org, alan@lxorguk.ukuu.org.uk, David Miller , bunk@stusta.de, Greg Kroah-Hartman Subject: [patch 40/54] Fix AF_UNIX OOPS Content-Disposition: inline; filename=fix-af_unix-oops.patch Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 13083 Lines: 486 -stable review patch. If anyone has any objections, please let us know. --------------------- From: David Miller This combines two upstream commits to fix an OOPS with AF_UNIX and SELINUX. Basically, sk->sk_socket can become NULL because we access a peer socket without any locking, so it can be shut down and released in another thread. Commit: d410b81b4eef2e4409f9c38ef201253fbbcc7d94 [AF_UNIX]: Make socket locking much less confusing. The unix_state_*() locking macros imply that there is some rwlock kind of thing going on, but the implementation is actually a spinlock which makes the code more confusing than it needs to be. So use plain unix_state_lock and unix_state_unlock. Signed-off-by: David S. Miller Commit: 19fec3e807a487415e77113cb9dbdaa2da739836 [AF_UNIX]: Fix datagram connect race causing an OOPS. Based upon an excellent bug report and initial patch by Frederik Deweerdt. The UNIX datagram connect code blindly dereferences other->sk_socket via the call down to the security_unix_may_send() function. Without locking 'other' that pointer can go NULL via unix_release_sock() which does sock_orphan() which also marks the socket SOCK_DEAD. So we have to lock both 'sk' and 'other' yet avoid all kinds of potential deadlocks (connect to self is OK for datagram sockets and it is possible for two datagram sockets to perform a simultaneous connect to each other). So what we do is have a "double lock" function similar to how we handle this situation in other areas of the kernel. We take the lock of the socket pointer with the smallest address first in order to avoid ABBA style deadlocks. Once we have them both locked, we check to see if SOCK_DEAD is set for 'other' and if so, drop everything and retry the lookup. Signed-off-by: David S. Miller Signed-off-by: Chris Wright Signed-off-by: Greg Kroah-Hartman --- include/net/af_unix.h | 8 +-- net/unix/af_unix.c | 127 +++++++++++++++++++++++++++++++------------------- 2 files changed, 83 insertions(+), 52 deletions(-) --- linux-2.6.21.4.orig/include/net/af_unix.h +++ linux-2.6.21.4/include/net/af_unix.h @@ -62,13 +62,11 @@ struct unix_skb_parms { #define UNIXCREDS(skb) (&UNIXCB((skb)).creds) #define UNIXSID(skb) (&UNIXCB((skb)).secid) -#define unix_state_rlock(s) spin_lock(&unix_sk(s)->lock) -#define unix_state_runlock(s) spin_unlock(&unix_sk(s)->lock) -#define unix_state_wlock(s) spin_lock(&unix_sk(s)->lock) -#define unix_state_wlock_nested(s) \ +#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) +#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) +#define unix_state_lock_nested(s) \ spin_lock_nested(&unix_sk(s)->lock, \ SINGLE_DEPTH_NESTING) -#define unix_state_wunlock(s) spin_unlock(&unix_sk(s)->lock) #ifdef __KERNEL__ /* The AF_UNIX socket */ --- linux-2.6.21.4.orig/net/unix/af_unix.c +++ linux-2.6.21.4/net/unix/af_unix.c @@ -175,11 +175,11 @@ static struct sock *unix_peer_get(struct { struct sock *peer; - unix_state_rlock(s); + unix_state_lock(s); peer = unix_peer(s); if (peer) sock_hold(peer); - unix_state_runlock(s); + unix_state_unlock(s); return peer; } @@ -370,7 +370,7 @@ static int unix_release_sock (struct soc unix_remove_socket(sk); /* Clear state */ - unix_state_wlock(sk); + unix_state_lock(sk); sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; dentry = u->dentry; @@ -379,7 +379,7 @@ static int unix_release_sock (struct soc u->mnt = NULL; state = sk->sk_state; sk->sk_state = TCP_CLOSE; - unix_state_wunlock(sk); + unix_state_unlock(sk); wake_up_interruptible_all(&u->peer_wait); @@ -387,12 +387,12 @@ static int unix_release_sock (struct soc if (skpair!=NULL) { if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { - unix_state_wlock(skpair); + unix_state_lock(skpair); /* No more writes */ skpair->sk_shutdown = SHUTDOWN_MASK; if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) skpair->sk_err = ECONNRESET; - unix_state_wunlock(skpair); + unix_state_unlock(skpair); skpair->sk_state_change(skpair); read_lock(&skpair->sk_callback_lock); sk_wake_async(skpair,1,POLL_HUP); @@ -449,7 +449,7 @@ static int unix_listen(struct socket *so err = -EINVAL; if (!u->addr) goto out; /* No listens on an unbound socket */ - unix_state_wlock(sk); + unix_state_lock(sk); if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN) goto out_unlock; if (backlog > sk->sk_max_ack_backlog) @@ -463,7 +463,7 @@ static int unix_listen(struct socket *so err = 0; out_unlock: - unix_state_wunlock(sk); + unix_state_unlock(sk); out: return err; } @@ -859,6 +859,31 @@ out_mknod_parent: goto out_up; } +static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) +{ + if (unlikely(sk1 == sk2) || !sk2) { + unix_state_lock(sk1); + return; + } + if (sk1 < sk2) { + unix_state_lock(sk1); + unix_state_lock_nested(sk2); + } else { + unix_state_lock(sk2); + unix_state_lock_nested(sk1); + } +} + +static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) +{ + if (unlikely(sk1 == sk2) || !sk2) { + unix_state_unlock(sk1); + return; + } + unix_state_unlock(sk1); + unix_state_unlock(sk2); +} + static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { @@ -878,11 +903,19 @@ static int unix_dgram_connect(struct soc !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0) goto out; +restart: other=unix_find_other(sunaddr, alen, sock->type, hash, &err); if (!other) goto out; - unix_state_wlock(sk); + unix_state_double_lock(sk, other); + + /* Apparently VFS overslept socket death. Retry. */ + if (sock_flag(other, SOCK_DEAD)) { + unix_state_double_unlock(sk, other); + sock_put(other); + goto restart; + } err = -EPERM; if (!unix_may_send(sk, other)) @@ -897,7 +930,7 @@ static int unix_dgram_connect(struct soc * 1003.1g breaking connected state with AF_UNSPEC */ other = NULL; - unix_state_wlock(sk); + unix_state_double_lock(sk, other); } /* @@ -906,19 +939,19 @@ static int unix_dgram_connect(struct soc if (unix_peer(sk)) { struct sock *old_peer = unix_peer(sk); unix_peer(sk)=other; - unix_state_wunlock(sk); + unix_state_double_unlock(sk, other); if (other != old_peer) unix_dgram_disconnected(sk, old_peer); sock_put(old_peer); } else { unix_peer(sk)=other; - unix_state_wunlock(sk); + unix_state_double_unlock(sk, other); } return 0; out_unlock: - unix_state_wunlock(sk); + unix_state_double_unlock(sk, other); sock_put(other); out: return err; @@ -937,7 +970,7 @@ static long unix_wait_for_peer(struct so (skb_queue_len(&other->sk_receive_queue) > other->sk_max_ack_backlog); - unix_state_runlock(other); + unix_state_unlock(other); if (sched) timeo = schedule_timeout(timeo); @@ -995,11 +1028,11 @@ restart: goto out; /* Latch state of peer */ - unix_state_rlock(other); + unix_state_lock(other); /* Apparently VFS overslept socket death. Retry. */ if (sock_flag(other, SOCK_DEAD)) { - unix_state_runlock(other); + unix_state_unlock(other); sock_put(other); goto restart; } @@ -1049,18 +1082,18 @@ restart: goto out_unlock; } - unix_state_wlock_nested(sk); + unix_state_lock_nested(sk); if (sk->sk_state != st) { - unix_state_wunlock(sk); - unix_state_runlock(other); + unix_state_unlock(sk); + unix_state_unlock(other); sock_put(other); goto restart; } err = security_unix_stream_connect(sock, other->sk_socket, newsk); if (err) { - unix_state_wunlock(sk); + unix_state_unlock(sk); goto out_unlock; } @@ -1097,7 +1130,7 @@ restart: smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */ unix_peer(sk) = newsk; - unix_state_wunlock(sk); + unix_state_unlock(sk); /* take ten and and send info to listening sock */ spin_lock(&other->sk_receive_queue.lock); @@ -1106,14 +1139,14 @@ restart: * is installed to listening socket. */ atomic_inc(&newu->inflight); spin_unlock(&other->sk_receive_queue.lock); - unix_state_runlock(other); + unix_state_unlock(other); other->sk_data_ready(other, 0); sock_put(other); return 0; out_unlock: if (other) - unix_state_runlock(other); + unix_state_unlock(other); out: if (skb) @@ -1179,10 +1212,10 @@ static int unix_accept(struct socket *so wake_up_interruptible(&unix_sk(sk)->peer_wait); /* attach accepted sock to socket */ - unix_state_wlock(tsk); + unix_state_lock(tsk); newsock->state = SS_CONNECTED; sock_graft(tsk, newsock); - unix_state_wunlock(tsk); + unix_state_unlock(tsk); return 0; out: @@ -1209,7 +1242,7 @@ static int unix_getname(struct socket *s } u = unix_sk(sk); - unix_state_rlock(sk); + unix_state_lock(sk); if (!u->addr) { sunaddr->sun_family = AF_UNIX; sunaddr->sun_path[0] = 0; @@ -1220,7 +1253,7 @@ static int unix_getname(struct socket *s *uaddr_len = addr->len; memcpy(sunaddr, addr->name, *uaddr_len); } - unix_state_runlock(sk); + unix_state_unlock(sk); sock_put(sk); out: return err; @@ -1338,7 +1371,7 @@ restart: goto out_free; } - unix_state_rlock(other); + unix_state_lock(other); err = -EPERM; if (!unix_may_send(sk, other)) goto out_unlock; @@ -1348,20 +1381,20 @@ restart: * Check with 1003.1g - what should * datagram error */ - unix_state_runlock(other); + unix_state_unlock(other); sock_put(other); err = 0; - unix_state_wlock(sk); + unix_state_lock(sk); if (unix_peer(sk) == other) { unix_peer(sk)=NULL; - unix_state_wunlock(sk); + unix_state_unlock(sk); unix_dgram_disconnected(sk, other); sock_put(other); err = -ECONNREFUSED; } else { - unix_state_wunlock(sk); + unix_state_unlock(sk); } other = NULL; @@ -1398,14 +1431,14 @@ restart: } skb_queue_tail(&other->sk_receive_queue, skb); - unix_state_runlock(other); + unix_state_unlock(other); other->sk_data_ready(other, len); sock_put(other); scm_destroy(siocb->scm); return len; out_unlock: - unix_state_runlock(other); + unix_state_unlock(other); out_free: kfree_skb(skb); out: @@ -1495,14 +1528,14 @@ static int unix_stream_sendmsg(struct ki goto out_err; } - unix_state_rlock(other); + unix_state_lock(other); if (sock_flag(other, SOCK_DEAD) || (other->sk_shutdown & RCV_SHUTDOWN)) goto pipe_err_free; skb_queue_tail(&other->sk_receive_queue, skb); - unix_state_runlock(other); + unix_state_unlock(other); other->sk_data_ready(other, size); sent+=size; } @@ -1513,7 +1546,7 @@ static int unix_stream_sendmsg(struct ki return sent; pipe_err_free: - unix_state_runlock(other); + unix_state_unlock(other); kfree_skb(skb); pipe_err: if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL)) @@ -1642,7 +1675,7 @@ static long unix_stream_data_wait(struct { DEFINE_WAIT(wait); - unix_state_rlock(sk); + unix_state_lock(sk); for (;;) { prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); @@ -1655,14 +1688,14 @@ static long unix_stream_data_wait(struct break; set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - unix_state_runlock(sk); + unix_state_unlock(sk); timeo = schedule_timeout(timeo); - unix_state_rlock(sk); + unix_state_lock(sk); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); } finish_wait(sk->sk_sleep, &wait); - unix_state_runlock(sk); + unix_state_unlock(sk); return timeo; } @@ -1817,12 +1850,12 @@ static int unix_shutdown(struct socket * mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN); if (mode) { - unix_state_wlock(sk); + unix_state_lock(sk); sk->sk_shutdown |= mode; other=unix_peer(sk); if (other) sock_hold(other); - unix_state_wunlock(sk); + unix_state_unlock(sk); sk->sk_state_change(sk); if (other && @@ -1834,9 +1867,9 @@ static int unix_shutdown(struct socket * peer_mode |= SEND_SHUTDOWN; if (mode&SEND_SHUTDOWN) peer_mode |= RCV_SHUTDOWN; - unix_state_wlock(other); + unix_state_lock(other); other->sk_shutdown |= peer_mode; - unix_state_wunlock(other); + unix_state_unlock(other); other->sk_state_change(other); read_lock(&other->sk_callback_lock); if (peer_mode == SHUTDOWN_MASK) @@ -1974,7 +2007,7 @@ static int unix_seq_show(struct seq_file else { struct sock *s = v; struct unix_sock *u = unix_sk(s); - unix_state_rlock(s); + unix_state_lock(s); seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu", s, @@ -2002,7 +2035,7 @@ static int unix_seq_show(struct seq_file for ( ; i < len; i++) seq_putc(seq, u->addr->name->sun_path[i]); } - unix_state_runlock(s); + unix_state_unlock(s); seq_putc(seq, '\n'); } -- - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/