Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1031079AbWKUQai (ORCPT ); Tue, 21 Nov 2006 11:30:38 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1031092AbWKUQai (ORCPT ); Tue, 21 Nov 2006 11:30:38 -0500 Received: from dea.vocord.ru ([217.67.177.50]:20906 "EHLO kano.factory.vocord.ru") by vger.kernel.org with ESMTP id S1031079AbWKUQag convert rfc822-to-8bit (ORCPT ); Tue, 21 Nov 2006 11:30:36 -0500 Cc: David Miller , Ulrich Drepper , Andrew Morton , Evgeniy Polyakov , netdev , Zach Brown , Christoph Hellwig , Chase Venters , Johann Borck , linux-kernel@vger.kernel.org, Jeff Garzik Subject: [take25 4/6] kevent: Socket notifications. In-Reply-To: <11641265982927@2ka.mipt.ru> X-Mailer: gregkh_patchbomb Date: Tue, 21 Nov 2006 19:29:58 +0300 Message-Id: <11641265981667@2ka.mipt.ru> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Reply-To: Evgeniy Polyakov To: Evgeniy Polyakov Content-Transfer-Encoding: 7BIT From: Evgeniy Polyakov Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 11160 Lines: 394 Socket notifications. This patch includes socket send/recv/accept notifications. Using trivial web server based on kevent and this features instead of epoll it's performance increased more than noticebly. More details about various benchmarks and server itself (evserver_kevent.c) can be found on project's homepage. Signed-off-by: Evgeniy Polyakov diff --git a/fs/inode.c b/fs/inode.c index ada7643..2740617 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -21,6 +21,7 @@ #include #include #include +#include #include /* @@ -164,12 +165,18 @@ static struct inode *alloc_inode(struct } inode->i_private = 0; inode->i_mapping = mapping; +#if defined CONFIG_KEVENT_SOCKET || defined CONFIG_KEVENT_PIPE + kevent_storage_init(inode, &inode->st); +#endif } return inode; } void destroy_inode(struct inode *inode) { +#if defined CONFIG_KEVENT_SOCKET || defined CONFIG_KEVENT_PIPE + kevent_storage_fini(&inode->st); +#endif BUG_ON(inode_has_buffers(inode)); security_inode_free(inode); if (inode->i_sb->s_op->destroy_inode) diff --git a/include/net/sock.h b/include/net/sock.h index edd4d73..d48ded8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -48,6 +48,7 @@ #include #include /* struct sk_buff */ #include +#include #include @@ -450,6 +451,21 @@ static inline int sk_stream_memory_free( extern void sk_stream_rfree(struct sk_buff *skb); +struct socket_alloc { + struct socket socket; + struct inode vfs_inode; +}; + +static inline struct socket *SOCKET_I(struct inode *inode) +{ + return &container_of(inode, struct socket_alloc, vfs_inode)->socket; +} + +static inline struct inode *SOCK_INODE(struct socket *socket) +{ + return &container_of(socket, struct socket_alloc, socket)->vfs_inode; +} + static inline void sk_stream_set_owner_r(struct sk_buff *skb, struct sock *sk) { skb->sk = sk; @@ -477,6 +493,7 @@ static inline void sk_add_backlog(struct sk->sk_backlog.tail = skb; } skb->next = NULL; + kevent_socket_notify(sk, KEVENT_SOCKET_RECV); } #define sk_wait_event(__sk, __timeo, __condition) \ @@ -679,21 +696,6 @@ static inline struct kiocb *siocb_to_kio return si->kiocb; } -struct socket_alloc { - struct socket socket; - struct inode vfs_inode; -}; - -static inline struct socket *SOCKET_I(struct inode *inode) -{ - return &container_of(inode, struct socket_alloc, vfs_inode)->socket; -} - -static inline struct inode *SOCK_INODE(struct socket *socket) -{ - return &container_of(socket, struct socket_alloc, socket)->vfs_inode; -} - extern void __sk_stream_mem_reclaim(struct sock *sk); extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind); diff --git a/include/net/tcp.h b/include/net/tcp.h index 7a093d0..69f4ad2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -857,6 +857,7 @@ static inline int tcp_prequeue(struct so tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { wake_up_interruptible(sk->sk_sleep); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, (3 * TCP_RTO_MIN) / 4, diff --git a/kernel/kevent/kevent_socket.c b/kernel/kevent/kevent_socket.c new file mode 100644 index 0000000..9c24b5b --- /dev/null +++ b/kernel/kevent/kevent_socket.c @@ -0,0 +1,142 @@ +/* + * kevent_socket.c + * + * 2006 Copyright (c) Evgeniy Polyakov + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int kevent_socket_callback(struct kevent *k) +{ + struct inode *inode = k->st->origin; + unsigned int events = SOCKET_I(inode)->ops->poll(SOCKET_I(inode)->file, SOCKET_I(inode), NULL); + + if ((events & (POLLIN | POLLRDNORM)) && (k->event.event & (KEVENT_SOCKET_RECV | KEVENT_SOCKET_ACCEPT))) + return 1; + if ((events & (POLLOUT | POLLWRNORM)) && (k->event.event & KEVENT_SOCKET_SEND)) + return 1; + if (events & (POLLERR | POLLHUP)) + return -1; + return 0; +} + +int kevent_socket_enqueue(struct kevent *k) +{ + struct inode *inode; + struct socket *sock; + int err = -EBADF; + + sock = sockfd_lookup(k->event.id.raw[0], &err); + if (!sock) + goto err_out_exit; + + inode = igrab(SOCK_INODE(sock)); + if (!inode) + goto err_out_fput; + + err = kevent_storage_enqueue(&inode->st, k); + if (err) + goto err_out_iput; + + if (k->event.req_flags & KEVENT_REQ_ALWAYS_QUEUE) { + kevent_requeue(k); + err = 0; + } else { + err = k->callbacks.callback(k); + if (err) + goto err_out_dequeue; + } + + return err; + +err_out_dequeue: + kevent_storage_dequeue(k->st, k); +err_out_iput: + iput(inode); +err_out_fput: + sockfd_put(sock); +err_out_exit: + return err; +} + +int kevent_socket_dequeue(struct kevent *k) +{ + struct inode *inode = k->st->origin; + struct socket *sock; + + kevent_storage_dequeue(k->st, k); + + sock = SOCKET_I(inode); + iput(inode); + sockfd_put(sock); + + return 0; +} + +void kevent_socket_notify(struct sock *sk, u32 event) +{ + if (sk->sk_socket) + kevent_storage_ready(&SOCK_INODE(sk->sk_socket)->st, NULL, event); +} + +/* + * It is required for network protocols compiled as modules, like IPv6. + */ +EXPORT_SYMBOL_GPL(kevent_socket_notify); + +#ifdef CONFIG_LOCKDEP +static struct lock_class_key kevent_sock_key; + +void kevent_socket_reinit(struct socket *sock) +{ + struct inode *inode = SOCK_INODE(sock); + + lockdep_set_class(&inode->st.lock, &kevent_sock_key); +} + +void kevent_sk_reinit(struct sock *sk) +{ + if (sk->sk_socket) { + struct inode *inode = SOCK_INODE(sk->sk_socket); + + lockdep_set_class(&inode->st.lock, &kevent_sock_key); + } +} +#endif +static int __init kevent_init_socket(void) +{ + struct kevent_callbacks sc = { + .callback = &kevent_socket_callback, + .enqueue = &kevent_socket_enqueue, + .dequeue = &kevent_socket_dequeue}; + + return kevent_add_callbacks(&sc, KEVENT_SOCKET); +} +module_init(kevent_init_socket); diff --git a/net/core/sock.c b/net/core/sock.c index b77e155..7d5fa3e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1402,6 +1402,7 @@ static void sock_def_wakeup(struct sock if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible_all(sk->sk_sleep); read_unlock(&sk->sk_callback_lock); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); } static void sock_def_error_report(struct sock *sk) @@ -1411,6 +1412,7 @@ static void sock_def_error_report(struct wake_up_interruptible(sk->sk_sleep); sk_wake_async(sk,0,POLL_ERR); read_unlock(&sk->sk_callback_lock); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); } static void sock_def_readable(struct sock *sk, int len) @@ -1420,6 +1422,7 @@ static void sock_def_readable(struct soc wake_up_interruptible(sk->sk_sleep); sk_wake_async(sk,1,POLL_IN); read_unlock(&sk->sk_callback_lock); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); } static void sock_def_write_space(struct sock *sk) @@ -1439,6 +1442,7 @@ static void sock_def_write_space(struct } read_unlock(&sk->sk_callback_lock); + kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV); } static void sock_def_destruct(struct sock *sk) @@ -1489,6 +1493,8 @@ void sock_init_data(struct socket *sock, sk->sk_state = TCP_CLOSE; sk->sk_socket = sock; + kevent_sk_reinit(sk); + sock_set_flag(sk, SOCK_ZAPPED); if(sock) @@ -1555,8 +1561,10 @@ void fastcall release_sock(struct sock * if (sk->sk_backlog.tail) __release_sock(sk); sk->sk_lock.owner = NULL; - if (waitqueue_active(&sk->sk_lock.wq)) + if (waitqueue_active(&sk->sk_lock.wq)) { wake_up(&sk->sk_lock.wq); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); + } spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL(release_sock); diff --git a/net/core/stream.c b/net/core/stream.c index d1d7dec..2878c2a 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -36,6 +36,7 @@ void sk_stream_write_space(struct sock * wake_up_interruptible(sk->sk_sleep); if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(sock, 2, POLL_OUT); + kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV); } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3f884ce..e7dd989 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3119,6 +3119,7 @@ static void tcp_ofo_queue(struct sock *s __skb_unlink(skb, &tp->out_of_order_queue); __skb_queue_tail(&sk->sk_receive_queue, skb); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; if(skb->h.th->fin) tcp_fin(skb, sk, skb->h.th); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c83938b..b0dd70d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include @@ -870,6 +871,7 @@ int tcp_v4_conn_request(struct sock *sk, reqsk_free(req); } else { inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + kevent_socket_notify(sk, KEVENT_SOCKET_ACCEPT); } return 0; diff --git a/net/socket.c b/net/socket.c index 1bc4167..5582b4a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -85,6 +85,7 @@ #include #include #include +#include #include #include @@ -490,6 +491,8 @@ static struct socket *sock_alloc(void) inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; + kevent_socket_reinit(sock); + get_cpu_var(sockets_in_use)++; put_cpu_var(sockets_in_use); return sock; - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/