2008-05-19 23:52:14

by NeilBrown

[permalink] [raw]
Subject: Re: [PATCH 0/3] [RFC] knfsd: convert to kthread API and remove signaling for shutdown

On Monday May 19, [email protected] wrote:
> I think we also need to protect those against svc_create_pooled(). I
> tried this a few weeks back in an attempt to deal with a customer's
> problem, but gave up in disgust when the obvious changes resulted in the
> nfsds self-deadlocking during startup. The svc_serv was being created
> as a side effect of the first write to a /proc/fs/nfsd/ file, and the
> locking was very very confused. I think it would be great if you gave
> that code some care and attention.

What was confused???

Here is a quick-and-dirty conversion to mutexs. It starts and stops
threads without any deadlocking or lockdep warnings. I haven't tried
to synthesise any races so I cannot promise it is perfect, but it
feels close at least.

You are correct of course, svc_create_pooled is one of the bits that
needs protection.

NeilBrown


Replace lock_kernel with a mutex for nfsd thread startup/shutdown locking.


Signed-off-by: Neil Brown <[email protected]>

### Diffstat output
./fs/nfsd/nfsctl.c | 39 +++++++++++++++++++++++++--------------
./fs/nfsd/nfssvc.c | 28 ++++++++++++++++------------
2 files changed, 41 insertions(+), 26 deletions(-)

diff .prev/fs/nfsd/nfsctl.c ./fs/nfsd/nfsctl.c
--- .prev/fs/nfsd/nfsctl.c 2008-05-20 09:49:52.000000000 +1000
+++ ./fs/nfsd/nfsctl.c 2008-05-20 09:50:25.000000000 +1000
@@ -441,6 +441,8 @@ static ssize_t write_threads(struct file
return strlen(buf);
}

+extern struct mutex nfsd_mutex;
+
static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
{
/* if size > 0, look for an array of number of threads per node
@@ -450,22 +452,26 @@ static ssize_t write_pool_threads(struct
int i;
int rv;
int len;
- int npools = nfsd_nrpools();
+ int npools;
int *nthreads;

+ mutex_lock(&nfsd_mutex);
+ npools = nfsd_nrpools();
if (npools == 0) {
/*
* NFS is shut down. The admin can start it by
* writing to the threads file but NOT the pool_threads
* file, sorry. Report zero threads.
*/
+ mutex_unlock(&nfsd_mutex);
strcpy(buf, "0\n");
return strlen(buf);
}

nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL);
+ rv = -ENOMEM;
if (nthreads == NULL)
- return -ENOMEM;
+ goto out_free;

if (size > 0) {
for (i = 0; i < npools; i++) {
@@ -496,10 +502,12 @@ static ssize_t write_pool_threads(struct
mesg += len;
}

+ mutex_unlock(&nfsd_mutex);
return (mesg-buf);

out_free:
kfree(nthreads);
+ mutex_unlock(&nfsd_mutex);
return rv;
}

@@ -566,14 +574,13 @@ static ssize_t write_versions(struct fil
return len;
}

-static ssize_t write_ports(struct file *file, char *buf, size_t size)
+static ssize_t __write_ports(struct file *file, char *buf, size_t size)
{
if (size == 0) {
int len = 0;
- lock_kernel();
+
if (nfsd_serv)
len = svc_xprt_names(nfsd_serv, buf, 0);
- unlock_kernel();
return len;
}
/* Either a single 'fd' number is written, in which
@@ -603,9 +610,7 @@ static ssize_t write_ports(struct file *
/* Decrease the count, but don't shutdown the
* the service
*/
- lock_kernel();
nfsd_serv->sv_nrthreads--;
- unlock_kernel();
}
return err < 0 ? err : 0;
}
@@ -614,10 +619,8 @@ static ssize_t write_ports(struct file *
int len = 0;
if (!toclose)
return -ENOMEM;
- lock_kernel();
if (nfsd_serv)
len = svc_sock_names(buf, nfsd_serv, toclose);
- unlock_kernel();
if (len >= 0)
lockd_down();
kfree(toclose);
@@ -655,7 +658,6 @@ static ssize_t write_ports(struct file *
if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
if (port == 0)
return -EINVAL;
- lock_kernel();
if (nfsd_serv) {
xprt = svc_find_xprt(nfsd_serv, transport,
AF_UNSPEC, port);
@@ -666,13 +668,22 @@ static ssize_t write_ports(struct file *
} else
err = -ENOTCONN;
}
- unlock_kernel();
return err < 0 ? err : 0;
}
}
return -EINVAL;
}

+static ssize_t write_ports(struct file *file, char *buf, size_t size)
+{
+ ssize_t rv;
+ mutex_lock(&nfsd_mutex);
+ rv = __write_ports(file, buf, size);
+ mutex_unlock(&nfsd_mutex);
+ return rv;
+}
+
+
int nfsd_max_blksize;

static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
@@ -691,13 +702,13 @@ static ssize_t write_maxblksize(struct f
if (bsize > NFSSVC_MAXBLKSIZE)
bsize = NFSSVC_MAXBLKSIZE;
bsize &= ~(1024-1);
- lock_kernel();
+ mutex_lock(&nfsd_mutex);
if (nfsd_serv && nfsd_serv->sv_nrthreads) {
- unlock_kernel();
+ mutex_unlock(&nfsd_mutex);
return -EBUSY;
}
nfsd_max_blksize = bsize;
- unlock_kernel();
+ mutex_unlock(&nfsd_mutex);
}
return sprintf(buf, "%d\n", nfsd_max_blksize);
}

diff .prev/fs/nfsd/nfssvc.c ./fs/nfsd/nfssvc.c
--- .prev/fs/nfsd/nfssvc.c 2008-05-20 09:49:52.000000000 +1000
+++ ./fs/nfsd/nfssvc.c 2008-05-20 09:47:59.000000000 +1000
@@ -190,13 +190,15 @@ void nfsd_reset_versions(void)
}
}

+DEFINE_MUTEX(nfsd_mutex);
+
int nfsd_create_serv(void)
{
int err = 0;
- lock_kernel();
+
+ WARN_ON(!mutex_is_locked(&nfsd_mutex));
if (nfsd_serv) {
svc_get(nfsd_serv);
- unlock_kernel();
return 0;
}
if (nfsd_max_blksize == 0) {
@@ -223,7 +225,7 @@ int nfsd_create_serv(void)
nfsd, SIG_NOCLEAN, THIS_MODULE);
if (nfsd_serv == NULL)
err = -ENOMEM;
- unlock_kernel();
+
do_gettimeofday(&nfssvc_boot); /* record boot time */
return err;
}
@@ -282,6 +284,8 @@ int nfsd_set_nrthreads(int n, int *nthre
int tot = 0;
int err = 0;

+ WARN_ON(!mutex_is_locked(&nfsd_mutex));
+
if (nfsd_serv == NULL || n <= 0)
return 0;

@@ -316,7 +320,6 @@ int nfsd_set_nrthreads(int n, int *nthre
nthreads[0] = 1;

/* apply the new numbers */
- lock_kernel();
svc_get(nfsd_serv);
for (i = 0; i < n; i++) {
err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
@@ -325,7 +328,6 @@ int nfsd_set_nrthreads(int n, int *nthre
break;
}
svc_destroy(nfsd_serv);
- unlock_kernel();

return err;
}
@@ -334,8 +336,8 @@ int
nfsd_svc(unsigned short port, int nrservs)
{
int error;
-
- lock_kernel();
+
+ mutex_lock(&nfsd_mutex);
dprintk("nfsd: creating service\n");
error = -EINVAL;
if (nrservs <= 0)
@@ -363,7 +365,7 @@ nfsd_svc(unsigned short port, int nrserv
failure:
svc_destroy(nfsd_serv); /* Release server */
out:
- unlock_kernel();
+ mutex_unlock(&nfsd_mutex);
return error;
}

@@ -399,7 +401,7 @@ nfsd(struct svc_rqst *rqstp)
sigset_t shutdown_mask, allowed_mask;

/* Lock module and set up kernel thread */
- lock_kernel();
+ mutex_lock(&nfsd_mutex);
daemonize("nfsd");

/* After daemonize() this kernel thread shares current->fs
@@ -417,11 +419,13 @@ nfsd(struct svc_rqst *rqstp)
siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS);
siginitsetinv(&allowed_mask, ALLOWED_SIGS);

+
nfsdstats.th_cnt++;

rqstp->rq_task = current;

- unlock_kernel();
+ mutex_unlock(&nfsd_mutex);
+

/*
* We want less throttling in balance_dirty_pages() so that nfs to
@@ -477,7 +481,7 @@ nfsd(struct svc_rqst *rqstp)
/* Clear signals before calling svc_exit_thread() */
flush_signals(current);

- lock_kernel();
+ mutex_lock(&nfsd_mutex);

nfsdstats.th_cnt --;

@@ -486,7 +490,7 @@ out:
svc_exit_thread(rqstp);

/* Release module */
- unlock_kernel();
+ mutex_unlock(&nfsd_mutex);
module_put_and_exit(0);
}


2008-05-20 02:24:57

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 0/3] [RFC] knfsd: convert to kthread API and remove signaling for shutdown

On Tue, 20 May 2008 09:52:14 +1000
Neil Brown <[email protected]> wrote:

> On Monday May 19, [email protected] wrote:
> > I think we also need to protect those against svc_create_pooled(). I
> > tried this a few weeks back in an attempt to deal with a customer's
> > problem, but gave up in disgust when the obvious changes resulted in the
> > nfsds self-deadlocking during startup. The svc_serv was being created
> > as a side effect of the first write to a /proc/fs/nfsd/ file, and the
> > locking was very very confused. I think it would be great if you gave
> > that code some care and attention.
>
> What was confused???
>

> Here is a quick-and-dirty conversion to mutexs. It starts and stops
> threads without any deadlocking or lockdep warnings. I haven't tried
> to synthesise any races so I cannot promise it is perfect, but it
> feels close at least.
>
> You are correct of course, svc_create_pooled is one of the bits that
> needs protection.
>
> NeilBrown
>
>

I find all of the locking in rpc/nlm/nfs/nfsd *very* confusing. It's
always been explained to me that it's best to not use locking to
protect a section of code, but rather data structures. This makes it
easier to get the locking right when the code changes.

This is the problem we have now with the BKL. So much of
rpc/nlm/nfs/nfsd runs under it that it's nearly impossible to tell what
it's intended to actually protect. If we're going to start a push
toward BKL removal, my humble request is that we try to be as explicit
as possible about what locks protect what data structures.

So that's my question about this patch -- exactly what data is this new
mutex intended to protect?


> Replace lock_kernel with a mutex for nfsd thread startup/shutdown locking.
>
>
> Signed-off-by: Neil Brown <[email protected]>
>
> ### Diffstat output
> ./fs/nfsd/nfsctl.c | 39 +++++++++++++++++++++++++--------------
> ./fs/nfsd/nfssvc.c | 28 ++++++++++++++++------------
> 2 files changed, 41 insertions(+), 26 deletions(-)
>
> diff .prev/fs/nfsd/nfsctl.c ./fs/nfsd/nfsctl.c
> --- .prev/fs/nfsd/nfsctl.c 2008-05-20 09:49:52.000000000 +1000
> +++ ./fs/nfsd/nfsctl.c 2008-05-20 09:50:25.000000000 +1000
> @@ -441,6 +441,8 @@ static ssize_t write_threads(struct file
> return strlen(buf);
> }
>
> +extern struct mutex nfsd_mutex;
> +
> static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
> {
> /* if size > 0, look for an array of number of threads per node
> @@ -450,22 +452,26 @@ static ssize_t write_pool_threads(struct
> int i;
> int rv;
> int len;
> - int npools = nfsd_nrpools();
> + int npools;
> int *nthreads;
>
> + mutex_lock(&nfsd_mutex);
> + npools = nfsd_nrpools();
> if (npools == 0) {
> /*
> * NFS is shut down. The admin can start it by
> * writing to the threads file but NOT the pool_threads
> * file, sorry. Report zero threads.
> */
> + mutex_unlock(&nfsd_mutex);
> strcpy(buf, "0\n");
> return strlen(buf);
> }
>
> nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL);
> + rv = -ENOMEM;
> if (nthreads == NULL)
> - return -ENOMEM;
> + goto out_free;
>
> if (size > 0) {
> for (i = 0; i < npools; i++) {
> @@ -496,10 +502,12 @@ static ssize_t write_pool_threads(struct
> mesg += len;
> }
>
> + mutex_unlock(&nfsd_mutex);
> return (mesg-buf);
>
> out_free:
> kfree(nthreads);
> + mutex_unlock(&nfsd_mutex);
> return rv;
> }
>
> @@ -566,14 +574,13 @@ static ssize_t write_versions(struct fil
> return len;
> }
>
> -static ssize_t write_ports(struct file *file, char *buf, size_t size)
> +static ssize_t __write_ports(struct file *file, char *buf, size_t size)
> {
> if (size == 0) {
> int len = 0;
> - lock_kernel();
> +
> if (nfsd_serv)
> len = svc_xprt_names(nfsd_serv, buf, 0);
> - unlock_kernel();
> return len;
> }
> /* Either a single 'fd' number is written, in which
> @@ -603,9 +610,7 @@ static ssize_t write_ports(struct file *
> /* Decrease the count, but don't shutdown the
> * the service
> */
> - lock_kernel();
> nfsd_serv->sv_nrthreads--;
> - unlock_kernel();
> }
> return err < 0 ? err : 0;
> }
> @@ -614,10 +619,8 @@ static ssize_t write_ports(struct file *
> int len = 0;
> if (!toclose)
> return -ENOMEM;
> - lock_kernel();
> if (nfsd_serv)
> len = svc_sock_names(buf, nfsd_serv, toclose);
> - unlock_kernel();
> if (len >= 0)
> lockd_down();
> kfree(toclose);
> @@ -655,7 +658,6 @@ static ssize_t write_ports(struct file *
> if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
> if (port == 0)
> return -EINVAL;
> - lock_kernel();
> if (nfsd_serv) {
> xprt = svc_find_xprt(nfsd_serv, transport,
> AF_UNSPEC, port);
> @@ -666,13 +668,22 @@ static ssize_t write_ports(struct file *
> } else
> err = -ENOTCONN;
> }
> - unlock_kernel();
> return err < 0 ? err : 0;
> }
> }
> return -EINVAL;
> }
>
> +static ssize_t write_ports(struct file *file, char *buf, size_t size)
> +{
> + ssize_t rv;
> + mutex_lock(&nfsd_mutex);
> + rv = __write_ports(file, buf, size);
> + mutex_unlock(&nfsd_mutex);
> + return rv;
> +}
> +
> +
> int nfsd_max_blksize;
>
> static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
> @@ -691,13 +702,13 @@ static ssize_t write_maxblksize(struct f
> if (bsize > NFSSVC_MAXBLKSIZE)
> bsize = NFSSVC_MAXBLKSIZE;
> bsize &= ~(1024-1);
> - lock_kernel();
> + mutex_lock(&nfsd_mutex);
> if (nfsd_serv && nfsd_serv->sv_nrthreads) {
> - unlock_kernel();
> + mutex_unlock(&nfsd_mutex);
> return -EBUSY;
> }
> nfsd_max_blksize = bsize;
> - unlock_kernel();
> + mutex_unlock(&nfsd_mutex);
> }
> return sprintf(buf, "%d\n", nfsd_max_blksize);
> }
>
> diff .prev/fs/nfsd/nfssvc.c ./fs/nfsd/nfssvc.c
> --- .prev/fs/nfsd/nfssvc.c 2008-05-20 09:49:52.000000000 +1000
> +++ ./fs/nfsd/nfssvc.c 2008-05-20 09:47:59.000000000 +1000
> @@ -190,13 +190,15 @@ void nfsd_reset_versions(void)
> }
> }
>
> +DEFINE_MUTEX(nfsd_mutex);
> +
> int nfsd_create_serv(void)
> {
> int err = 0;
> - lock_kernel();
> +
> + WARN_ON(!mutex_is_locked(&nfsd_mutex));
> if (nfsd_serv) {
> svc_get(nfsd_serv);
> - unlock_kernel();
> return 0;
> }
> if (nfsd_max_blksize == 0) {
> @@ -223,7 +225,7 @@ int nfsd_create_serv(void)
> nfsd, SIG_NOCLEAN, THIS_MODULE);
> if (nfsd_serv == NULL)
> err = -ENOMEM;
> - unlock_kernel();
> +
> do_gettimeofday(&nfssvc_boot); /* record boot time */
> return err;
> }
> @@ -282,6 +284,8 @@ int nfsd_set_nrthreads(int n, int *nthre
> int tot = 0;
> int err = 0;
>
> + WARN_ON(!mutex_is_locked(&nfsd_mutex));
> +
> if (nfsd_serv == NULL || n <= 0)
> return 0;
>
> @@ -316,7 +320,6 @@ int nfsd_set_nrthreads(int n, int *nthre
> nthreads[0] = 1;
>
> /* apply the new numbers */
> - lock_kernel();
> svc_get(nfsd_serv);
> for (i = 0; i < n; i++) {
> err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
> @@ -325,7 +328,6 @@ int nfsd_set_nrthreads(int n, int *nthre
> break;
> }
> svc_destroy(nfsd_serv);
> - unlock_kernel();
>
> return err;
> }
> @@ -334,8 +336,8 @@ int
> nfsd_svc(unsigned short port, int nrservs)
> {
> int error;
> -
> - lock_kernel();
> +
> + mutex_lock(&nfsd_mutex);
> dprintk("nfsd: creating service\n");
> error = -EINVAL;
> if (nrservs <= 0)
> @@ -363,7 +365,7 @@ nfsd_svc(unsigned short port, int nrserv
> failure:
> svc_destroy(nfsd_serv); /* Release server */
> out:
> - unlock_kernel();
> + mutex_unlock(&nfsd_mutex);
> return error;
> }
>
> @@ -399,7 +401,7 @@ nfsd(struct svc_rqst *rqstp)
> sigset_t shutdown_mask, allowed_mask;
>
> /* Lock module and set up kernel thread */
> - lock_kernel();
> + mutex_lock(&nfsd_mutex);
> daemonize("nfsd");
>
> /* After daemonize() this kernel thread shares current->fs
> @@ -417,11 +419,13 @@ nfsd(struct svc_rqst *rqstp)
> siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS);
> siginitsetinv(&allowed_mask, ALLOWED_SIGS);
>
> +
> nfsdstats.th_cnt++;
>
> rqstp->rq_task = current;
>
> - unlock_kernel();
> + mutex_unlock(&nfsd_mutex);
> +
>
> /*
> * We want less throttling in balance_dirty_pages() so that nfs to
> @@ -477,7 +481,7 @@ nfsd(struct svc_rqst *rqstp)
> /* Clear signals before calling svc_exit_thread() */
> flush_signals(current);
>
> - lock_kernel();
> + mutex_lock(&nfsd_mutex);
>
> nfsdstats.th_cnt --;
>
> @@ -486,7 +490,7 @@ out:
> svc_exit_thread(rqstp);
>
> /* Release module */
> - unlock_kernel();
> + mutex_unlock(&nfsd_mutex);
> module_put_and_exit(0);
> }
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>


--
Jeff Layton <[email protected]>