2002-10-30 23:53:23

by NeilBrown

[permalink] [raw]
Subject: Re: Re: [PATCH] zerocopy NFS for 2.5.43

On Thursday October 31, [email protected] wrote:
> Hello,
>
> How is it going?

I've just sent some patches to Linus and nfs@lists....

The rest of the zero copy stuff should fit in quite easily, with the
possible exception of single-copy writes: I haven't looked very hard
at that yet.

NeilBrown

>
> neilb> I would make a special 'fast-path' for that case which didn't copy any
> neilb> data but passed a skbuf up, and code in nfs*xdr.c would convert that
> neilb> into an iovec[];
> neilb>
> neilb> I am working on a patch which changes rpcsvc to use xdr_buf. Some of
> neilb> it works. Some doesn't. I include it below for your reference I
> neilb> repeat: it doesn't work yet.
> neilb> Once it is done, adding the rest of zero-copy should be fairly easy.


-------------------------------------------------------
This sf.net email is sponsored by: Influence the future
of Java(TM) technology. Join the Java Community
Process(SM) (JCP(SM)) program now.
http://ads.sourceforge.net/cgi-bin/redirect.pl?sunm0004en
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs


2002-10-31 02:15:13

by Hirokazu Takahashi

[permalink] [raw]
Subject: Re: Re: [PATCH] zerocopy NFS for 2.5.43

Hello,

> > How is it going?
>
> I've just sent some patches to Linus and nfs@lists....

Thanks. I've seen them in linux-2.5.45.

> The rest of the zero copy stuff should fit in quite easily, with the
> possible exception of single-copy writes: I haven't looked very hard
> at that yet.

Ok,
I'll try to port the zero copy stuff on it.


-------------------------------------------------------
This sf.net email is sponsored by: Influence the future
of Java(TM) technology. Join the Java Community
Process(SM) (JCP(SM)) program now.
http://ads.sourceforge.net/cgi-bin/redirect.pl?sunm0004en
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs

2002-10-31 15:48:30

by Hirokazu Takahashi

[permalink] [raw]
Subject: Re: Re: [PATCH] zerocopy NFS for 2.5.43

Hello,

> The rest of the zero copy stuff should fit in quite easily, with the
> possible exception of single-copy writes: I haven't looked very hard
> at that yet.

I just ported part of the zero copy stuff against linux-2.5.45.
single-copy writes and per-cpu sokcets are not included yet.
And I fixed a problem that NFS over TCP wouldn't work.


va-nfsd-sendpage.patch ....use sendpage instead of sock_sendmsg.
va-sunrpc-zeropage.patch ....zero filled page for padding.
va-nfsd-vfsread.patch ....zero-copy nfsd_read/nfsd_readdir.


Attachments:
zerocopy-2.5.45.taz (6.12 kB)

2002-10-31 17:04:00

by Hirokazu Takahashi

[permalink] [raw]
Subject: Re: Re: [PATCH] zerocopy NFS for 2.5.43

Hello,

> > The rest of the zero copy stuff should fit in quite easily, with the
> > possible exception of single-copy writes: I haven't looked very hard
> > at that yet.
>
> I just ported part of the zero copy stuff against linux-2.5.45.
> single-copy writes and per-cpu sokcets are not included yet.
> And I fixed a problem that NFS over TCP wouldn't work.

I also ported the per-cpu socket patch against linux2.5.45.


--- include/linux/sunrpc/svcsock.h.ORG3 Fri Nov 1 01:29:52 2030
+++ include/linux/sunrpc/svcsock.h Fri Nov 1 01:31:28 2030
@@ -51,6 +51,7 @@ struct svc_sock {
int sk_reclen; /* length of record */
int sk_tcplen; /* current read length */
time_t sk_lastrecv; /* time of last received request */
+ struct svc_sock **sk_shadow; /* shadow sockets for sending */
};

/*
--- net/sunrpc/svcsock.c.ORG3 Fri Nov 1 01:30:14 2030
+++ net/sunrpc/svcsock.c Fri Nov 1 01:51:34 2030
@@ -64,7 +64,9 @@


static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
- int *errp, int pmap_reg);
+ int *errp, int type);
+#define SVSK_PMAP_REGISTER 1
+#define SVSK_SHADOW 2
static void svc_udp_data_ready(struct sock *, int);
static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *);
@@ -259,6 +261,8 @@ svc_sock_put(struct svc_sock *svsk)
if (!--(svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) {
spin_unlock_bh(&serv->sv_lock);
dprintk("svc: releasing dead socket\n");
+ if (svsk->sk_shadow)
+ kfree(svsk->sk_shadow);
sock_release(svsk->sk_sock);
kfree(svsk);
}
@@ -326,6 +330,27 @@ svc_wake_up(struct svc_serv *serv)
spin_unlock_bh(&serv->sv_lock);
}

+static inline struct svc_sock *
+svc_get_svsk(struct svc_rqst *rqstp)
+{
+ struct svc_sock *svsk = rqstp->rq_sock;
+#ifdef CONFIG_SMP
+ if (svsk->sk_shadow) {
+ struct svc_sock *shadow = svsk->sk_shadow[smp_processor_id()];
+ if (shadow) {
+ struct svc_serv *serv = svsk->sk_server;
+ svsk = shadow;
+ if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
+ svc_sock_setbufsize(svsk->sk_sock,
+ (serv->sv_nrthreads+3) * serv->sv_bufsz,
+ (serv->sv_nrthreads+3) * serv->sv_bufsz);
+ }
+
+ }
+#endif
+ return svsk;
+}
+
/*
* Generic sendto routine
*/
@@ -333,7 +358,7 @@ static int
svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
{
mm_segment_t oldfs;
- struct svc_sock *svsk = rqstp->rq_sock;
+ struct svc_sock *svsk = svc_get_svsk(rqstp);
struct socket *sock = svsk->sk_sock;
struct msghdr msg;
int slen;
@@ -1228,7 +1253,7 @@ svc_send(struct svc_rqst *rqstp)
*/
static struct svc_sock *
svc_setup_socket(struct svc_serv *serv, struct socket *sock,
- int *errp, int pmap_register)
+ int *errp, int type)
{
struct svc_sock *svsk;
struct sock *inet;
@@ -1249,6 +1274,7 @@ svc_setup_socket(struct svc_serv *serv,
svsk->sk_owspace = inet->write_space;
svsk->sk_server = serv;
svsk->sk_lastrecv = CURRENT_TIME;
+ svsk->sk_shadow = NULL;
INIT_LIST_HEAD(&svsk->sk_deferred);
sema_init(&svsk->sk_sem, 1);

@@ -1261,7 +1287,7 @@ if (svsk->sk_sk == NULL)
printk(KERN_WARNING "svsk->sk_sk == NULL after svc_prot_init!\n");

/* Register socket with portmapper */
- if (*errp >= 0 && pmap_register)
+ if (*errp >= 0 && type == SVSK_PMAP_REGISTER)
*errp = svc_register(serv, inet->protocol,
ntohs(inet_sk(inet)->sport));

@@ -1273,13 +1299,13 @@ if (svsk->sk_sk == NULL)


spin_lock_bh(&serv->sv_lock);
- if (!pmap_register) {
+ if (type == SVSK_PMAP_REGISTER || type == SVSK_SHADOW) {
+ clear_bit(SK_TEMP, &svsk->sk_flags);
+ list_add(&svsk->sk_list, &serv->sv_permsocks);
+ } else {
set_bit(SK_TEMP, &svsk->sk_flags);
list_add(&svsk->sk_list, &serv->sv_tempsocks);
serv->sv_tmpcnt++;
- } else {
- clear_bit(SK_TEMP, &svsk->sk_flags);
- list_add(&svsk->sk_list, &serv->sv_permsocks);
}
spin_unlock_bh(&serv->sv_lock);

@@ -1288,6 +1314,61 @@ if (svsk->sk_sk == NULL)
return svsk;
}

+
+/*
+ * Create a shadow socket which has the same sport of given svsk.
+ * Let each cpu have its own socket to send packets.
+ */
+static int
+svc_create_shadow_socket(struct svc_serv *serv, struct svc_sock *svsk,
+ int protocol, struct sockaddr_in *sin)
+{
+#ifdef CONFIG_SMP
+ int error;
+ struct socket *newsock;
+ struct svc_sock *newsvsk;
+ int i;
+
+ if (num_online_cpus() == 1)
+ return 0;
+
+ svsk->sk_shadow = kmalloc(sizeof(struct svc_sock*)*NR_CPUS, GFP_KERNEL);
+ if (!svsk->sk_shadow)
+ return -ENOMEM;
+
+ memset(svsk->sk_shadow, 0, sizeof(struct svc_sock*)*NR_CPUS);
+
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_online(i))
+ continue;
+
+ if ((error = sock_create(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &newsock)) < 0)
+ return error;
+ if ((newsvsk = svc_setup_socket(serv, newsock, &error, SVSK_SHADOW)) == NULL) {
+ sock_release(newsock);
+ return error;
+ }
+ /*
+ * Make the newsvsk as shadow of the svsk.
+ */
+ newsock->sk->reuse = 1; /* allow address reuse */
+ error = newsock->ops->bind(newsock, (struct sockaddr *) sin,
+ sizeof(*sin));
+ if (error < 0) {
+ sock_release(newsock);
+ kfree(newsvsk);
+ return error;
+ }
+ /*
+ * Unhash the newsocket not to receive packets.
+ */
+ newsock->sk->prot->unhash(newsock->sk);
+ svsk->sk_shadow[i] = newsvsk;
+ }
+#endif
+ return 0;
+}
+
/*
* Create socket for RPC service.
*/
@@ -1327,8 +1408,13 @@ svc_create_socket(struct svc_serv *serv,
goto bummer;
}

- if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL)
- return 0;
+ if ((svsk = svc_setup_socket(serv, sock, &error, SVSK_PMAP_REGISTER)) == NULL)
+ goto bummer;
+
+ if (protocol == IPPROTO_UDP && sin != NULL)
+ svc_create_shadow_socket(serv, svsk, protocol, sin);
+
+ return 0;

bummer:
dprintk("svc: svc_create_socket error = %d\n", -error);
@@ -1367,6 +1453,8 @@ svc_delete_socket(struct svc_sock *svsk)

if (!svsk->sk_inuse) {
spin_unlock_bh(&serv->sv_lock);
+ if (svsk->sk_shadow)
+ kfree(svsk->sk_shadow);
sock_release(svsk->sk_sock);
kfree(svsk);
} else {


-------------------------------------------------------
This sf.net email is sponsored by: Influence the future
of Java(TM) technology. Join the Java Community
Process(SM) (JCP(SM)) program now.
http://ads.sourceforge.net/cgi-bin/redirect.pl?sunm0004en
_______________________________________________
NFS maillist - [email protected]
https://lists.sourceforge.net/lists/listinfo/nfs