Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1750848AbWITJLM (ORCPT ); Wed, 20 Sep 2006 05:11:12 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1750841AbWITJLL (ORCPT ); Wed, 20 Sep 2006 05:11:11 -0400 Received: from dea.vocord.ru ([217.67.177.50]:3054 "EHLO uganda.factory.vocord.ru") by vger.kernel.org with ESMTP id S1750823AbWITJKq convert rfc822-to-8bit (ORCPT ); Wed, 20 Sep 2006 05:10:46 -0400 Cc: David Miller , Ulrich Drepper , Andrew Morton , Evgeniy Polyakov , netdev , Zach Brown , Christoph Hellwig , Chase Venters , Johann Borck Subject: [take19 3/4] kevent: Socket notifications. In-Reply-To: <11587449512515@2ka.mipt.ru> X-Mailer: gregkh_patchbomb Date: Wed, 20 Sep 2006 13:35:51 +0400 Message-Id: <11587449512479@2ka.mipt.ru> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Reply-To: Evgeniy Polyakov To: lkml Content-Transfer-Encoding: 7BIT From: Evgeniy Polyakov Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 11543 Lines: 411 Socket notifications. This patch include socket send/recv/accept notifications. Using trivial web server based on kevent and this features instead of epoll it's performance increased more than noticebly. More details about benchmark and server itself (evserver_kevent.c) can be found on project's homepage. Signed-off-by: Evgeniy Polyakov diff --git a/fs/inode.c b/fs/inode.c index 0bf9f04..181521d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -21,6 +21,7 @@ #include #include #include #include +#include #include /* @@ -165,12 +166,18 @@ #endif } memset(&inode->u, 0, sizeof(inode->u)); inode->i_mapping = mapping; +#if defined CONFIG_KEVENT_SOCKET + kevent_storage_init(inode, &inode->st); +#endif } return inode; } void destroy_inode(struct inode *inode) { +#if defined CONFIG_KEVENT_SOCKET + kevent_storage_fini(&inode->st); +#endif BUG_ON(inode_has_buffers(inode)); security_inode_free(inode); if (inode->i_sb->s_op->destroy_inode) diff --git a/include/linux/fs.h b/include/linux/fs.h index 2561020..a697930 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -236,6 +236,7 @@ #include #include #include #include +#include #include #include @@ -546,6 +547,10 @@ #ifdef CONFIG_INOTIFY struct mutex inotify_mutex; /* protects the watches list */ #endif +#ifdef CONFIG_KEVENT_SOCKET + struct kevent_storage st; +#endif + unsigned long i_state; unsigned long dirtied_when; /* jiffies of first dirtying */ @@ -698,6 +703,9 @@ #ifdef CONFIG_EPOLL struct list_head f_ep_links; spinlock_t f_ep_lock; #endif /* #ifdef CONFIG_EPOLL */ +#ifdef CONFIG_KEVENT_POLL + struct kevent_storage st; +#endif struct address_space *f_mapping; }; extern spinlock_t files_lock; diff --git a/include/net/sock.h b/include/net/sock.h index 324b3ea..5d71ed7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -48,6 +48,7 @@ #include #include #include /* struct sk_buff */ #include +#include #include @@ -450,6 +451,21 @@ static inline int sk_stream_memory_free( extern void sk_stream_rfree(struct sk_buff *skb); +struct socket_alloc { + struct socket socket; + struct inode vfs_inode; +}; + +static inline struct socket *SOCKET_I(struct inode *inode) +{ + return &container_of(inode, struct socket_alloc, vfs_inode)->socket; +} + +static inline struct inode *SOCK_INODE(struct socket *socket) +{ + return &container_of(socket, struct socket_alloc, socket)->vfs_inode; +} + static inline void sk_stream_set_owner_r(struct sk_buff *skb, struct sock *sk) { skb->sk = sk; @@ -477,6 +493,7 @@ static inline void sk_add_backlog(struct sk->sk_backlog.tail = skb; } skb->next = NULL; + kevent_socket_notify(sk, KEVENT_SOCKET_RECV); } #define sk_wait_event(__sk, __timeo, __condition) \ @@ -679,21 +696,6 @@ static inline struct kiocb *siocb_to_kio return si->kiocb; } -struct socket_alloc { - struct socket socket; - struct inode vfs_inode; -}; - -static inline struct socket *SOCKET_I(struct inode *inode) -{ - return &container_of(inode, struct socket_alloc, vfs_inode)->socket; -} - -static inline struct inode *SOCK_INODE(struct socket *socket) -{ - return &container_of(socket, struct socket_alloc, socket)->vfs_inode; -} - extern void __sk_stream_mem_reclaim(struct sock *sk); extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind); diff --git a/include/net/tcp.h b/include/net/tcp.h index 7a093d0..69f4ad2 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -857,6 +857,7 @@ static inline int tcp_prequeue(struct so tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { wake_up_interruptible(sk->sk_sleep); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, (3 * TCP_RTO_MIN) / 4, diff --git a/kernel/kevent/kevent_socket.c b/kernel/kevent/kevent_socket.c new file mode 100644 index 0000000..1ddd2a1 --- /dev/null +++ b/kernel/kevent/kevent_socket.c @@ -0,0 +1,126 @@ +/* + * kevent_socket.c + * + * 2006 Copyright (c) Evgeniy Polyakov + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int kevent_socket_callback(struct kevent *k) +{ + struct inode *inode = k->st->origin; + return SOCKET_I(inode)->ops->poll(SOCKET_I(inode)->file, SOCKET_I(inode), NULL); +} + +int kevent_socket_enqueue(struct kevent *k) +{ + struct inode *inode; + struct socket *sock; + int err = -ENODEV; + + sock = sockfd_lookup(k->event.id.raw[0], &err); + if (!sock) + goto err_out_exit; + + inode = igrab(SOCK_INODE(sock)); + if (!inode) + goto err_out_fput; + + err = kevent_storage_enqueue(&inode->st, k); + if (err) + goto err_out_iput; + + err = k->callbacks.callback(k); + if (err) + goto err_out_dequeue; + + sockfd_put(sock); + return err; + +err_out_dequeue: + kevent_storage_dequeue(k->st, k); +err_out_iput: + iput(inode); +err_out_fput: + sockfd_put(sock); +err_out_exit: + return err; +} + +int kevent_socket_dequeue(struct kevent *k) +{ + struct inode *inode = k->st->origin; + + kevent_storage_dequeue(k->st, k); + iput(inode); + + return 0; +} + +void kevent_socket_notify(struct sock *sk, u32 event) +{ + if (sk->sk_socket) + kevent_storage_ready(&SOCK_INODE(sk->sk_socket)->st, NULL, event); +} + +/* + * It is required for network protocols compiled as modules, like IPv6. + */ +EXPORT_SYMBOL_GPL(kevent_socket_notify); + +#ifdef CONFIG_LOCKDEP +static struct lock_class_key kevent_sock_key; + +void kevent_socket_reinit(struct socket *sock) +{ + struct inode *inode = SOCK_INODE(sock); + + lockdep_set_class(&inode->st.lock, &kevent_sock_key); +} + +void kevent_sk_reinit(struct sock *sk) +{ + if (sk->sk_socket) { + struct inode *inode = SOCK_INODE(sk->sk_socket); + + lockdep_set_class(&inode->st.lock, &kevent_sock_key); + } +} +#endif +static int __init kevent_init_socket(void) +{ + struct kevent_callbacks sc = { + .callback = &kevent_socket_callback, + .enqueue = &kevent_socket_enqueue, + .dequeue = &kevent_socket_dequeue}; + + return kevent_add_callbacks(&sc, KEVENT_SOCKET); +} +module_init(kevent_init_socket); diff --git a/net/core/sock.c b/net/core/sock.c index 51fcfbc..4f91615 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1406,6 +1406,7 @@ static void sock_def_wakeup(struct sock if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible_all(sk->sk_sleep); read_unlock(&sk->sk_callback_lock); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); } static void sock_def_error_report(struct sock *sk) @@ -1415,6 +1416,7 @@ static void sock_def_error_report(struct wake_up_interruptible(sk->sk_sleep); sk_wake_async(sk,0,POLL_ERR); read_unlock(&sk->sk_callback_lock); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); } static void sock_def_readable(struct sock *sk, int len) @@ -1424,6 +1426,7 @@ static void sock_def_readable(struct soc wake_up_interruptible(sk->sk_sleep); sk_wake_async(sk,1,POLL_IN); read_unlock(&sk->sk_callback_lock); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); } static void sock_def_write_space(struct sock *sk) @@ -1443,6 +1446,7 @@ static void sock_def_write_space(struct } read_unlock(&sk->sk_callback_lock); + kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV); } static void sock_def_destruct(struct sock *sk) @@ -1493,6 +1497,8 @@ #endif sk->sk_state = TCP_CLOSE; sk->sk_socket = sock; + kevent_sk_reinit(sk); + sock_set_flag(sk, SOCK_ZAPPED); if(sock) @@ -1559,8 +1565,10 @@ void fastcall release_sock(struct sock * if (sk->sk_backlog.tail) __release_sock(sk); sk->sk_lock.owner = NULL; - if (waitqueue_active(&sk->sk_lock.wq)) + if (waitqueue_active(&sk->sk_lock.wq)) { wake_up(&sk->sk_lock.wq); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV|KEVENT_SOCKET_SEND); + } spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL(release_sock); diff --git a/net/core/stream.c b/net/core/stream.c index d1d7dec..2878c2a 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -36,6 +36,7 @@ void sk_stream_write_space(struct sock * wake_up_interruptible(sk->sk_sleep); if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) sock_wake_async(sock, 2, POLL_OUT); + kevent_socket_notify(sk, KEVENT_SOCKET_SEND|KEVENT_SOCKET_RECV); } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 104af5d..14cee12 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3112,6 +3112,7 @@ static void tcp_ofo_queue(struct sock *s __skb_unlink(skb, &tp->out_of_order_queue); __skb_queue_tail(&sk->sk_receive_queue, skb); + kevent_socket_notify(sk, KEVENT_SOCKET_RECV); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; if(skb->h.th->fin) tcp_fin(skb, sk, skb->h.th); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4b04c3e..cda1500 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -61,6 +61,7 @@ #include #include #include #include +#include #include #include @@ -867,6 +868,7 @@ #endif reqsk_free(req); } else { inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + kevent_socket_notify(sk, KEVENT_SOCKET_ACCEPT); } return 0; diff --git a/net/socket.c b/net/socket.c index b4848ce..42e19e2 100644 --- a/net/socket.c +++ b/net/socket.c @@ -85,6 +85,7 @@ #include #include #include #include +#include #include #include @@ -526,6 +527,8 @@ static struct socket *sock_alloc(void) inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; + kevent_socket_reinit(sock); + get_cpu_var(sockets_in_use)++; put_cpu_var(sockets_in_use); return sock; - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/