2008-06-06 15:12:46

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 0/5] Convert knfsd to kthread API and fix startup/shutdown races (try #3)

This patchset is the third attempt to change nfsd to the kthread
API. Most of the changes here were suggested by Greg Banks. The main
changes from the second patchset are:

- add more stringent locking around some of the /proc/fs/nfsd
interfaces. Particularly those that set parameters that are only
consulted when nfsd is starting.

- I've also changed more of these interfaces to return -EBUSY when
someone attempts to set them while nfsd is up.

- fixed /proc/fs/nfsd/nfsv4recoverydir to return the recoverydir
pathname when read. It previously always returned -EINVAL.

- removed the sv_kill_signal parameter from svc_serv struct, and
changed svc_set_pool_threads to send SIGINT to bring down a thread.

As always, comments and suggestions are appreciated...

Signed-off-by: Jeff Layton <[email protected]>


2008-06-06 15:12:57

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 2/5] knfsd: clean up nfsd filesystem interfaces

Several of the nfsd filesystem interfaces allow changes to parameters
that don't have any effect on a running nfsd service. They are only ever
checked when nfsd is started. This patch fixes it so that changes to
those procfiles return -EBUSY if nfsd is already running to make it
clear that changes on the fly don't work.

The patch should also close some relatively harmless races between
changing the info in those interfaces and starting nfsd, since these
variables are being moved under the protection of the nfsd_mutex.

Finally, the nfsv4recoverydir file always returns -EINVAL if read. This
patch fixes it to return the recoverydir path as expected.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfs4state.c | 17 ++++++++++---
fs/nfsd/nfsctl.c | 66 +++++++++++++++++++++++++++++++++++++++++----------
fs/nfsd/nfssvc.c | 8 ++++++
3 files changed, 74 insertions(+), 17 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8799b87..c602aba 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3249,12 +3249,14 @@ nfs4_state_shutdown(void)
nfs4_unlock_state();
}

+/*
+ * user_recovery_dirname is protected by the nfsd_mutex since it's only
+ * accessed when nfsd is starting.
+ */
static void
nfs4_set_recdir(char *recdir)
{
- nfs4_lock_state();
strcpy(user_recovery_dirname, recdir);
- nfs4_unlock_state();
}

/*
@@ -3278,6 +3280,12 @@ nfs4_reset_recoverydir(char *recdir)
return status;
}

+char *
+nfs4_recoverydir(void)
+{
+ return user_recovery_dirname;
+}
+
/*
* Called when leasetime is changed.
*
@@ -3286,11 +3294,12 @@ nfs4_reset_recoverydir(char *recdir)
* we start to register any changes in lease time. If the administrator
* really wants to change the lease time *now*, they can go ahead and bring
* nfsd down and then back up again after changing the lease time.
+ *
+ * user_lease_time is protected by nfsd_mutex since it's only really accessed
+ * when nfsd is starting
*/
void
nfs4_reset_lease(time_t leasetime)
{
- lock_kernel();
user_lease_time = leasetime;
- unlock_kernel();
}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 049d2a9..2c2eb87 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -509,7 +509,7 @@ out_free:
return rv;
}

-static ssize_t write_versions(struct file *file, char *buf, size_t size)
+static ssize_t __write_versions(struct file *file, char *buf, size_t size)
{
/*
* Format:
@@ -572,6 +572,16 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
return len;
}

+static ssize_t write_versions(struct file *file, char *buf, size_t size)
+{
+ ssize_t rv;
+
+ mutex_lock(&nfsd_mutex);
+ rv = __write_versions(file, buf, size);
+ mutex_unlock(&nfsd_mutex);
+ return rv;
+}
+
static ssize_t __write_ports(struct file *file, char *buf, size_t size)
{
if (size == 0) {
@@ -675,6 +685,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size)
static ssize_t write_ports(struct file *file, char *buf, size_t size)
{
ssize_t rv;
+
mutex_lock(&nfsd_mutex);
rv = __write_ports(file, buf, size);
mutex_unlock(&nfsd_mutex);
@@ -714,16 +725,17 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
#ifdef CONFIG_NFSD_V4
extern time_t nfs4_leasetime(void);

-static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
+static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
{
/* if size > 10 seconds, call
* nfs4_reset_lease() then write out the new lease (seconds) as reply
*/
char *mesg = buf;
- int rv;
+ int rv, lease;

if (size > 0) {
- int lease;
+ if (nfsd_serv)
+ return -EBUSY;
rv = get_int(&mesg, &lease);
if (rv)
return rv;
@@ -735,24 +747,52 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
return strlen(buf);
}

-static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
+static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
+{
+ ssize_t rv;
+
+ mutex_lock(&nfsd_mutex);
+ rv = __write_leasetime(file, buf, size);
+ mutex_unlock(&nfsd_mutex);
+ return rv;
+}
+
+extern char *nfs4_recoverydir(void);
+
+static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size)
{
char *mesg = buf;
char *recdir;
int len, status;

- if (size == 0 || size > PATH_MAX || buf[size-1] != '\n')
- return -EINVAL;
- buf[size-1] = 0;
+ if (size > 0) {
+ if (nfsd_serv)
+ return -EBUSY;
+ if (size > PATH_MAX || buf[size-1] != '\n')
+ return -EINVAL;
+ buf[size-1] = 0;

- recdir = mesg;
- len = qword_get(&mesg, recdir, size);
- if (len <= 0)
- return -EINVAL;
+ recdir = mesg;
+ len = qword_get(&mesg, recdir, size);
+ if (len <= 0)
+ return -EINVAL;

- status = nfs4_reset_recoverydir(recdir);
+ status = nfs4_reset_recoverydir(recdir);
+ }
+ sprintf(buf, "%s\n", nfs4_recoverydir());
return strlen(buf);
}
+
+static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
+{
+ ssize_t rv;
+
+ mutex_lock(&nfsd_mutex);
+ rv = __write_recoverydir(file, buf, size);
+ mutex_unlock(&nfsd_mutex);
+ return rv;
+}
+
#endif

/*----------------------------------------------------------------------------*/
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 512bd04..929af23 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -70,6 +70,14 @@ static DEFINE_SPINLOCK(nfsd_call_lock);
* Transitions of the thread count between zero and non-zero are of particular
* interest since the svc_serv needs to be created and initialized at that
* point, or freed.
+ *
+ * Finally, the nfsd_mutex also protects some of the global variables that are
+ * accessed when nfsd starts and that are settable via the write_* routines in
+ * nfsctl.c. In particular:
+ *
+ * user_recovery_dirname
+ * user_lease_time
+ * nfsd_versions
*/
DEFINE_MUTEX(nfsd_mutex);
struct svc_serv *nfsd_serv;
--
1.5.3.6


2008-06-06 15:12:47

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 1/5] knfsd: Replace lock_kernel with a mutex for nfsd thread startup/shutdown locking.

From: Neil Brown <[email protected]>

This removes the BKL from the RPC service creation codepath. The BKL
really isn't adequate for this job since some of this info needs
protection across sleeps.

Also, add some comments to try and clarify how the locking should work
and to make it clear that the BKL isn't necessary as long as there is
adequate locking between tasks when touching the svc_serv fields.

Signed-off-by: Neil Brown <[email protected]>
Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfsctl.c | 37 +++++++++++++++++++++++--------------
fs/nfsd/nfssvc.c | 45 ++++++++++++++++++++++++++++++++-------------
include/linux/nfsd/nfsd.h | 1 +
net/sunrpc/svc.c | 15 +++++++++------
4 files changed, 65 insertions(+), 33 deletions(-)

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5ac00c4..049d2a9 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -450,22 +450,26 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
int i;
int rv;
int len;
- int npools = nfsd_nrpools();
+ int npools;
int *nthreads;

+ mutex_lock(&nfsd_mutex);
+ npools = nfsd_nrpools();
if (npools == 0) {
/*
* NFS is shut down. The admin can start it by
* writing to the threads file but NOT the pool_threads
* file, sorry. Report zero threads.
*/
+ mutex_unlock(&nfsd_mutex);
strcpy(buf, "0\n");
return strlen(buf);
}

nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL);
+ rv = -ENOMEM;
if (nthreads == NULL)
- return -ENOMEM;
+ goto out_free;

if (size > 0) {
for (i = 0; i < npools; i++) {
@@ -496,10 +500,12 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
mesg += len;
}

+ mutex_unlock(&nfsd_mutex);
return (mesg-buf);

out_free:
kfree(nthreads);
+ mutex_unlock(&nfsd_mutex);
return rv;
}

@@ -566,14 +572,13 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
return len;
}

-static ssize_t write_ports(struct file *file, char *buf, size_t size)
+static ssize_t __write_ports(struct file *file, char *buf, size_t size)
{
if (size == 0) {
int len = 0;
- lock_kernel();
+
if (nfsd_serv)
len = svc_xprt_names(nfsd_serv, buf, 0);
- unlock_kernel();
return len;
}
/* Either a single 'fd' number is written, in which
@@ -603,9 +608,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
/* Decrease the count, but don't shutdown the
* the service
*/
- lock_kernel();
nfsd_serv->sv_nrthreads--;
- unlock_kernel();
}
return err < 0 ? err : 0;
}
@@ -614,10 +617,8 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
int len = 0;
if (!toclose)
return -ENOMEM;
- lock_kernel();
if (nfsd_serv)
len = svc_sock_names(buf, nfsd_serv, toclose);
- unlock_kernel();
if (len >= 0)
lockd_down();
kfree(toclose);
@@ -655,7 +656,6 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
if (port == 0)
return -EINVAL;
- lock_kernel();
if (nfsd_serv) {
xprt = svc_find_xprt(nfsd_serv, transport,
AF_UNSPEC, port);
@@ -666,13 +666,22 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
} else
err = -ENOTCONN;
}
- unlock_kernel();
return err < 0 ? err : 0;
}
}
return -EINVAL;
}

+static ssize_t write_ports(struct file *file, char *buf, size_t size)
+{
+ ssize_t rv;
+ mutex_lock(&nfsd_mutex);
+ rv = __write_ports(file, buf, size);
+ mutex_unlock(&nfsd_mutex);
+ return rv;
+}
+
+
int nfsd_max_blksize;

static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
@@ -691,13 +700,13 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
if (bsize > NFSSVC_MAXBLKSIZE)
bsize = NFSSVC_MAXBLKSIZE;
bsize &= ~(1024-1);
- lock_kernel();
+ mutex_lock(&nfsd_mutex);
if (nfsd_serv && nfsd_serv->sv_nrthreads) {
- unlock_kernel();
+ mutex_unlock(&nfsd_mutex);
return -EBUSY;
}
nfsd_max_blksize = bsize;
- unlock_kernel();
+ mutex_unlock(&nfsd_mutex);
}
return sprintf(buf, "%d\n", nfsd_max_blksize);
}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 941041f..512bd04 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -53,11 +53,27 @@
extern struct svc_program nfsd_program;
static void nfsd(struct svc_rqst *rqstp);
struct timeval nfssvc_boot;
- struct svc_serv *nfsd_serv;
static atomic_t nfsd_busy;
static unsigned long nfsd_last_call;
static DEFINE_SPINLOCK(nfsd_call_lock);

+/*
+ * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
+ * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
+ * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
+ *
+ * If (out side the lock) nfsd_serv is non-NULL, then it must point to a
+ * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
+ * of nfsd threads must exist and each must listed in ->sp_all_threads in each
+ * entry of ->sv_pools[].
+ *
+ * Transitions of the thread count between zero and non-zero are of particular
+ * interest since the svc_serv needs to be created and initialized at that
+ * point, or freed.
+ */
+DEFINE_MUTEX(nfsd_mutex);
+struct svc_serv *nfsd_serv;
+
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
static struct svc_stat nfsd_acl_svcstats;
static struct svc_version * nfsd_acl_version[] = {
@@ -190,13 +206,14 @@ void nfsd_reset_versions(void)
}
}

+
int nfsd_create_serv(void)
{
int err = 0;
- lock_kernel();
+
+ WARN_ON(!mutex_is_locked(&nfsd_mutex));
if (nfsd_serv) {
svc_get(nfsd_serv);
- unlock_kernel();
return 0;
}
if (nfsd_max_blksize == 0) {
@@ -223,7 +240,7 @@ int nfsd_create_serv(void)
nfsd, SIG_NOCLEAN, THIS_MODULE);
if (nfsd_serv == NULL)
err = -ENOMEM;
- unlock_kernel();
+
do_gettimeofday(&nfssvc_boot); /* record boot time */
return err;
}
@@ -282,6 +299,8 @@ int nfsd_set_nrthreads(int n, int *nthreads)
int tot = 0;
int err = 0;

+ WARN_ON(!mutex_is_locked(&nfsd_mutex));
+
if (nfsd_serv == NULL || n <= 0)
return 0;

@@ -316,7 +335,6 @@ int nfsd_set_nrthreads(int n, int *nthreads)
nthreads[0] = 1;

/* apply the new numbers */
- lock_kernel();
svc_get(nfsd_serv);
for (i = 0; i < n; i++) {
err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
@@ -325,7 +343,6 @@ int nfsd_set_nrthreads(int n, int *nthreads)
break;
}
svc_destroy(nfsd_serv);
- unlock_kernel();

return err;
}
@@ -334,8 +351,8 @@ int
nfsd_svc(unsigned short port, int nrservs)
{
int error;
-
- lock_kernel();
+
+ mutex_lock(&nfsd_mutex);
dprintk("nfsd: creating service\n");
error = -EINVAL;
if (nrservs <= 0)
@@ -363,7 +380,7 @@ nfsd_svc(unsigned short port, int nrservs)
failure:
svc_destroy(nfsd_serv); /* Release server */
out:
- unlock_kernel();
+ mutex_unlock(&nfsd_mutex);
return error;
}

@@ -399,7 +416,7 @@ nfsd(struct svc_rqst *rqstp)
sigset_t shutdown_mask, allowed_mask;

/* Lock module and set up kernel thread */
- lock_kernel();
+ mutex_lock(&nfsd_mutex);
daemonize("nfsd");

/* After daemonize() this kernel thread shares current->fs
@@ -417,11 +434,13 @@ nfsd(struct svc_rqst *rqstp)
siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS);
siginitsetinv(&allowed_mask, ALLOWED_SIGS);

+
nfsdstats.th_cnt++;

rqstp->rq_task = current;

- unlock_kernel();
+ mutex_unlock(&nfsd_mutex);
+

/*
* We want less throttling in balance_dirty_pages() so that nfs to
@@ -477,7 +496,7 @@ nfsd(struct svc_rqst *rqstp)
/* Clear signals before calling svc_exit_thread() */
flush_signals(current);

- lock_kernel();
+ mutex_lock(&nfsd_mutex);

nfsdstats.th_cnt --;

@@ -486,7 +505,7 @@ out:
svc_exit_thread(rqstp);

/* Release module */
- unlock_kernel();
+ mutex_unlock(&nfsd_mutex);
module_put_and_exit(0);
}

diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 41d30c9..88d85b9 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -54,6 +54,7 @@ typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
extern struct svc_program nfsd_program;
extern struct svc_version nfsd_version2, nfsd_version3,
nfsd_version4;
+extern struct mutex nfsd_mutex;
extern struct svc_serv *nfsd_serv;

extern struct seq_operations nfs_exports_op;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 01c7e31..7bffaff 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -461,7 +461,8 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
EXPORT_SYMBOL(svc_create_pooled);

/*
- * Destroy an RPC service. Should be called with the BKL held
+ * Destroy an RPC service. Should be called with appropriate locking to
+ * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
*/
void
svc_destroy(struct svc_serv *serv)
@@ -578,9 +579,10 @@ out_enomem:
EXPORT_SYMBOL(svc_prepare_thread);

/*
- * Create a thread in the given pool. Caller must hold BKL.
- * On a NUMA or SMP machine, with a multi-pool serv, the thread
- * will be restricted to run on the cpus belonging to the pool.
+ * Create a thread in the given pool. Caller must hold BKL or another lock to
+ * serialize access to the svc_serv struct. On a NUMA or SMP machine, with a
+ * multi-pool serv, the thread will be restricted to run on the cpus belonging
+ * to the pool.
*/
static int
__svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
@@ -674,7 +676,7 @@ found_pool:
* of threads the given number. If `pool' is non-NULL, applies
* only to threads in that pool, otherwise round-robins between
* all pools. Must be called with a svc_get() reference and
- * the BKL held.
+ * the BKL or another lock to protect access to svc_serv fields.
*
* Destroying threads relies on the service threads filling in
* rqstp->rq_task, which only the nfs ones do. Assumes the serv
@@ -722,7 +724,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
EXPORT_SYMBOL(svc_set_num_threads);

/*
- * Called from a server thread as it's exiting. Caller must hold BKL.
+ * Called from a server thread as it's exiting. Caller must hold the BKL or
+ * the "service mutex", whichever is appropriate for the service.
*/
void
svc_exit_thread(struct svc_rqst *rqstp)
--
1.5.3.6

2008-06-06 15:12:56

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 3/5] knfsd: remove special handling for SIGHUP

The special handling for SIGHUP in knfsd is a holdover from much
earlier versions of Linux where reloading the export table was
more expensive. That facility is not really needed anymore and
to my knowledge, is seldom-used.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfssvc.c | 33 ++++++++-------------------------
1 files changed, 8 insertions(+), 25 deletions(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 929af23..6339cb7 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -44,11 +44,6 @@
* when not handling a request. i.e. when waiting
*/
#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT))
-/* if the last thread dies with SIGHUP, then the exports table is
- * left unchanged ( like 2.4-{0-9} ). Any other signal will clear
- * the exports table (like 2.2).
- */
-#define SIG_NOCLEAN SIGHUP

extern struct svc_program nfsd_program;
static void nfsd(struct svc_rqst *rqstp);
@@ -175,7 +170,6 @@ int nfsd_nrthreads(void)
return nfsd_serv->sv_nrthreads;
}

-static int killsig; /* signal that was used to kill last nfsd */
static void nfsd_last_thread(struct svc_serv *serv)
{
/* When last nfsd thread exits we need to do some clean-up */
@@ -186,11 +180,9 @@ static void nfsd_last_thread(struct svc_serv *serv)
nfsd_racache_shutdown();
nfs4_state_shutdown();

- printk(KERN_WARNING "nfsd: last server has exited\n");
- if (killsig != SIG_NOCLEAN) {
- printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
- nfsd_export_flush();
- }
+ printk(KERN_WARNING "nfsd: last server has exited, flushing export "
+ "cache\n");
+ nfsd_export_flush();
}

void nfsd_reset_versions(void)
@@ -242,10 +234,9 @@ int nfsd_create_serv(void)
}

atomic_set(&nfsd_busy, 0);
- nfsd_serv = svc_create_pooled(&nfsd_program,
- nfsd_max_blksize,
- nfsd_last_thread,
- nfsd, SIG_NOCLEAN, THIS_MODULE);
+ nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
+ nfsd_last_thread, nfsd, SIGINT,
+ THIS_MODULE);
if (nfsd_serv == NULL)
err = -ENOMEM;

@@ -490,17 +481,9 @@ nfsd(struct svc_rqst *rqstp)
atomic_dec(&nfsd_busy);
}

- if (err != -EINTR) {
+ if (err != -EINTR)
printk(KERN_WARNING "nfsd: terminating on error %d\n", -err);
- } else {
- unsigned int signo;
-
- for (signo = 1; signo <= _NSIG; signo++)
- if (sigismember(&current->pending.signal, signo) &&
- !sigismember(&current->blocked, signo))
- break;
- killsig = signo;
- }
+
/* Clear signals before calling svc_exit_thread() */
flush_signals(current);

--
1.5.3.6


2008-06-06 15:12:57

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 4/5] knfsd: convert knfsd to kthread API

This patch is rather large, but I couldn't figure out a way to break it
up that would remain bisectable. It does several things:

- change svc_thread_fn typedef to better match what kthread_create expects
- change svc_pool_map_set_cpumask to be more kthread friendly. Make it
take a task arg and and get rid of the "oldmask"
- have svc_set_num_threads call kthread_create directly
- eliminate __svc_create_thread

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfssvc.c | 52 ++++++++++++++--------
include/linux/sunrpc/svc.h | 2 +-
net/sunrpc/svc.c | 102 +++++++++++++++-----------------------------
3 files changed, 68 insertions(+), 88 deletions(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 6339cb7..fe62ec8 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -21,6 +21,7 @@
#include <linux/smp_lock.h>
#include <linux/freezer.h>
#include <linux/fs_struct.h>
+#include <linux/kthread.h>

#include <linux/sunrpc/types.h>
#include <linux/sunrpc/stats.h>
@@ -46,7 +47,7 @@
#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT))

extern struct svc_program nfsd_program;
-static void nfsd(struct svc_rqst *rqstp);
+static int nfsd(void *vrqstp);
struct timeval nfssvc_boot;
static atomic_t nfsd_busy;
static unsigned long nfsd_last_call;
@@ -407,18 +408,19 @@ update_thread_usage(int busy_threads)
/*
* This is the NFS server kernel thread
*/
-static void
-nfsd(struct svc_rqst *rqstp)
+static int
+nfsd(void *vrqstp)
{
+ struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
struct fs_struct *fsp;
- int err;
sigset_t shutdown_mask, allowed_mask;
+ int err, preverr = 0;
+ unsigned int signo;

/* Lock module and set up kernel thread */
mutex_lock(&nfsd_mutex);
- daemonize("nfsd");

- /* After daemonize() this kernel thread shares current->fs
+ /* At this point, the thread shares current->fs
* with the init process. We need to create files with a
* umask of 0 instead of init's umask. */
fsp = copy_fs_struct(current->fs);
@@ -433,14 +435,18 @@ nfsd(struct svc_rqst *rqstp)
siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS);
siginitsetinv(&allowed_mask, ALLOWED_SIGS);

+ /*
+ * thread is spawned with all signals set to SIG_IGN, re-enable
+ * the ones that matter
+ */
+ for (signo = 1; signo <= _NSIG; signo++) {
+ if (!sigismember(&shutdown_mask, signo))
+ allow_signal(signo);
+ }

nfsdstats.th_cnt++;
-
- rqstp->rq_task = current;
-
mutex_unlock(&nfsd_mutex);

-
/*
* We want less throttling in balance_dirty_pages() so that nfs to
* localhost doesn't cause nfsd to lock up due to all the client's
@@ -462,15 +468,25 @@ nfsd(struct svc_rqst *rqstp)
*/
while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
;
- if (err < 0)
+ if (err == -EINTR)
break;
+ else if (err < 0) {
+ if (err != preverr) {
+ printk(KERN_WARNING "%s: unexpected error "
+ "from svc_recv (%d)\n", __func__, -err);
+ preverr = err;
+ }
+ schedule_timeout_uninterruptible(HZ);
+ continue;
+ }
+
update_thread_usage(atomic_read(&nfsd_busy));
atomic_inc(&nfsd_busy);

/* Lock the export hash tables for reading. */
exp_readlock();

- /* Process request with signals blocked. */
+ /* Process request with signals blocked. */
sigprocmask(SIG_SETMASK, &allowed_mask, NULL);

svc_process(rqstp);
@@ -479,25 +495,23 @@ nfsd(struct svc_rqst *rqstp)
exp_readunlock();
update_thread_usage(atomic_read(&nfsd_busy));
atomic_dec(&nfsd_busy);
- }

- if (err != -EINTR)
- printk(KERN_WARNING "nfsd: terminating on error %d\n", -err);
+ /* reset err in case kthread_stop is called */
+ err = 0;
+ }

/* Clear signals before calling svc_exit_thread() */
flush_signals(current);

mutex_lock(&nfsd_mutex);
-
nfsdstats.th_cnt --;

out:
/* Release the thread */
svc_exit_thread(rqstp);
-
- /* Release module */
mutex_unlock(&nfsd_mutex);
- module_put_and_exit(0);
+
+ return 0;
}

static __be32 map_new_errors(u32 vers, __be32 nfserr)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 4b54c5f..011d6d8 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -22,7 +22,7 @@
/*
* This is the RPC server thread function prototype
*/
-typedef void (*svc_thread_fn)(struct svc_rqst *);
+typedef int (*svc_thread_fn)(void *);

/*
*
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 7bffaff..f461da2 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -18,6 +18,7 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/module.h>
+#include <linux/kthread.h>

#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
@@ -291,15 +292,14 @@ svc_pool_map_put(void)


/*
- * Set the current thread's cpus_allowed mask so that it
+ * Set the given thread's cpus_allowed mask so that it
* will only run on cpus in the given pool.
- *
- * Returns 1 and fills in oldmask iff a cpumask was applied.
*/
-static inline int
-svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
+static inline void
+svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
{
struct svc_pool_map *m = &svc_pool_map;
+ unsigned int node = m->pool_to[pidx];

/*
* The caller checks for sv_nrpools > 1, which
@@ -307,26 +307,17 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
*/
BUG_ON(m->count == 0);

- switch (m->mode)
- {
- default:
- return 0;
+ switch (m->mode) {
case SVC_POOL_PERCPU:
{
- unsigned int cpu = m->pool_to[pidx];
-
- *oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
- return 1;
+ set_cpus_allowed_ptr(task, &cpumask_of_cpu(node));
+ break;
}
case SVC_POOL_PERNODE:
{
- unsigned int node = m->pool_to[pidx];
node_to_cpumask_ptr(nodecpumask, node);
-
- *oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, nodecpumask);
- return 1;
+ set_cpus_allowed_ptr(task, nodecpumask);
+ break;
}
}
}
@@ -579,47 +570,6 @@ out_enomem:
EXPORT_SYMBOL(svc_prepare_thread);

/*
- * Create a thread in the given pool. Caller must hold BKL or another lock to
- * serialize access to the svc_serv struct. On a NUMA or SMP machine, with a
- * multi-pool serv, the thread will be restricted to run on the cpus belonging
- * to the pool.
- */
-static int
-__svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
- struct svc_pool *pool)
-{
- struct svc_rqst *rqstp;
- int error = -ENOMEM;
- int have_oldmask = 0;
- cpumask_t uninitialized_var(oldmask);
-
- rqstp = svc_prepare_thread(serv, pool);
- if (IS_ERR(rqstp)) {
- error = PTR_ERR(rqstp);
- goto out;
- }
-
- if (serv->sv_nrpools > 1)
- have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
-
- error = kernel_thread((int (*)(void *)) func, rqstp, 0);
-
- if (have_oldmask)
- set_cpus_allowed(current, oldmask);
-
- if (error < 0)
- goto out_thread;
- svc_sock_update_bufs(serv);
- error = 0;
-out:
- return error;
-
-out_thread:
- svc_exit_thread(rqstp);
- goto out;
-}
-
-/*
* Choose a pool in which to create a new thread, for svc_set_num_threads
*/
static inline struct svc_pool *
@@ -688,7 +638,9 @@ found_pool:
int
svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
- struct task_struct *victim;
+ struct svc_rqst *rqstp;
+ struct task_struct *task;
+ struct svc_pool *chosen_pool;
int error = 0;
unsigned int state = serv->sv_nrthreads-1;

@@ -704,18 +656,32 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
/* create new threads */
while (nrservs > 0) {
nrservs--;
- __module_get(serv->sv_module);
- error = __svc_create_thread(serv->sv_function, serv,
- choose_pool(serv, pool, &state));
- if (error < 0) {
- module_put(serv->sv_module);
+ chosen_pool = choose_pool(serv, pool, &state);
+
+ rqstp = svc_prepare_thread(serv, chosen_pool);
+ if (IS_ERR(rqstp)) {
+ error = PTR_ERR(rqstp);
break;
}
+
+ task = kthread_create(serv->sv_function, rqstp, serv->sv_name);
+ if (IS_ERR(task)) {
+ error = PTR_ERR(task);
+ svc_exit_thread(rqstp);
+ break;
+ }
+
+ rqstp->rq_task = task;
+ if (serv->sv_nrpools > 1)
+ svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
+
+ svc_sock_update_bufs(serv);
+ wake_up_process(task);
}
/* destroy old threads */
while (nrservs < 0 &&
- (victim = choose_victim(serv, pool, &state)) != NULL) {
- send_sig(serv->sv_kill_signal, victim, 1);
+ (task = choose_victim(serv, pool, &state)) != NULL) {
+ send_sig(serv->sv_kill_signal, task, 1);
nrservs++;
}

--
1.5.3.6


2008-06-06 15:12:57

by Jeff Layton

[permalink] [raw]
Subject: [PATCH 5/5] sunrpc: remove unneeded fields from svc_serv struct

Remove the sv_module sv_kill_signal fields from the svc_serv struct.
svc_set_num_threads doesn't bother with module reference counts
anymore, and since we don't have a special shutdown signal anymore,
we might as well just standardize on one that's always used to take
down the threads by the kernel (SIGINT).

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfssvc.c | 3 +--
include/linux/sunrpc/svc.h | 10 ++--------
net/sunrpc/svc.c | 10 +++-------
3 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index fe62ec8..f4b8592 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -236,8 +236,7 @@ int nfsd_create_serv(void)

atomic_set(&nfsd_busy, 0);
nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
- nfsd_last_thread, nfsd, SIGINT,
- THIS_MODULE);
+ nfsd_last_thread, nfsd);
if (nfsd_serv == NULL)
err = -ENOMEM;

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 011d6d8..cd304c4 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -72,15 +72,10 @@ struct svc_serv {
unsigned int sv_nrpools; /* number of thread pools */
struct svc_pool * sv_pools; /* array of thread pools */

+ /* Callback to use when last thread exits */
void (*sv_shutdown)(struct svc_serv *serv);
- /* Callback to use when last thread
- * exits.
- */

- struct module * sv_module; /* optional module to count when
- * adding threads */
svc_thread_fn sv_function; /* main function for threads */
- int sv_kill_signal; /* signal to kill threads */
};

/*
@@ -388,8 +383,7 @@ struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
struct svc_pool *pool);
void svc_exit_thread(struct svc_rqst *);
struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
- void (*shutdown)(struct svc_serv*),
- svc_thread_fn, int sig, struct module *);
+ void (*shutdown)(struct svc_serv*), svc_thread_fn);
int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
void svc_destroy(struct svc_serv *);
int svc_process(struct svc_rqst *);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index f461da2..a6794a8 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -433,19 +433,15 @@ EXPORT_SYMBOL(svc_create);

struct svc_serv *
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
- void (*shutdown)(struct svc_serv *serv),
- svc_thread_fn func, int sig, struct module *mod)
+ void (*shutdown)(struct svc_serv *serv), svc_thread_fn func)
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();

serv = __svc_create(prog, bufsize, npools, shutdown);

- if (serv != NULL) {
+ if (serv != NULL)
serv->sv_function = func;
- serv->sv_kill_signal = sig;
- serv->sv_module = mod;
- }

return serv;
}
@@ -681,7 +677,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
/* destroy old threads */
while (nrservs < 0 &&
(task = choose_victim(serv, pool, &state)) != NULL) {
- send_sig(serv->sv_kill_signal, task, 1);
+ send_sig(SIGINT, task, 1);
nrservs++;
}

--
1.5.3.6


2008-06-06 17:24:31

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 4/5] knfsd: convert knfsd to kthread API

On Fri, Jun 06, 2008 at 11:12:50AM -0400, Jeff Layton wrote:
> This patch is rather large, but I couldn't figure out a way to break it
> up that would remain bisectable. It does several things:
>
> - change svc_thread_fn typedef to better match what kthread_create expects
> - change svc_pool_map_set_cpumask to be more kthread friendly. Make it
> take a task arg and and get rid of the "oldmask"
> - have svc_set_num_threads call kthread_create directly
> - eliminate __svc_create_thread
>
> Signed-off-by: Jeff Layton <[email protected]>
> ---
> fs/nfsd/nfssvc.c | 52 ++++++++++++++--------
> include/linux/sunrpc/svc.h | 2 +-
> net/sunrpc/svc.c | 102 +++++++++++++++-----------------------------
> 3 files changed, 68 insertions(+), 88 deletions(-)
>
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index 6339cb7..fe62ec8 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -21,6 +21,7 @@
> #include <linux/smp_lock.h>
> #include <linux/freezer.h>
> #include <linux/fs_struct.h>
> +#include <linux/kthread.h>
>
> #include <linux/sunrpc/types.h>
> #include <linux/sunrpc/stats.h>
> @@ -46,7 +47,7 @@
> #define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT))
>
> extern struct svc_program nfsd_program;
> -static void nfsd(struct svc_rqst *rqstp);
> +static int nfsd(void *vrqstp);
> struct timeval nfssvc_boot;
> static atomic_t nfsd_busy;
> static unsigned long nfsd_last_call;
> @@ -407,18 +408,19 @@ update_thread_usage(int busy_threads)
> /*
> * This is the NFS server kernel thread
> */
> -static void
> -nfsd(struct svc_rqst *rqstp)
> +static int
> +nfsd(void *vrqstp)
> {
> + struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
> struct fs_struct *fsp;
> - int err;
> sigset_t shutdown_mask, allowed_mask;
> + int err, preverr = 0;
> + unsigned int signo;
>
> /* Lock module and set up kernel thread */
> mutex_lock(&nfsd_mutex);
> - daemonize("nfsd");
>
> - /* After daemonize() this kernel thread shares current->fs
> + /* At this point, the thread shares current->fs
> * with the init process. We need to create files with a
> * umask of 0 instead of init's umask. */
> fsp = copy_fs_struct(current->fs);
> @@ -433,14 +435,18 @@ nfsd(struct svc_rqst *rqstp)
> siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS);
> siginitsetinv(&allowed_mask, ALLOWED_SIGS);
>
> + /*
> + * thread is spawned with all signals set to SIG_IGN, re-enable
> + * the ones that matter
> + */
> + for (signo = 1; signo <= _NSIG; signo++) {
> + if (!sigismember(&shutdown_mask, signo))
> + allow_signal(signo);
> + }
>
> nfsdstats.th_cnt++;
> -
> - rqstp->rq_task = current;
> -
> mutex_unlock(&nfsd_mutex);
>
> -
> /*
> * We want less throttling in balance_dirty_pages() so that nfs to
> * localhost doesn't cause nfsd to lock up due to all the client's
> @@ -462,15 +468,25 @@ nfsd(struct svc_rqst *rqstp)
> */
> while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
> ;
> - if (err < 0)
> + if (err == -EINTR)
> break;
> + else if (err < 0) {
> + if (err != preverr) {
> + printk(KERN_WARNING "%s: unexpected error "
> + "from svc_recv (%d)\n", __func__, -err);
> + preverr = err;
> + }
> + schedule_timeout_uninterruptible(HZ);
> + continue;
> + }
> +
> update_thread_usage(atomic_read(&nfsd_busy));
> atomic_inc(&nfsd_busy);
>
> /* Lock the export hash tables for reading. */
> exp_readlock();
>
> - /* Process request with signals blocked. */
> + /* Process request with signals blocked. */
> sigprocmask(SIG_SETMASK, &allowed_mask, NULL);
>
> svc_process(rqstp);
> @@ -479,25 +495,23 @@ nfsd(struct svc_rqst *rqstp)
> exp_readunlock();
> update_thread_usage(atomic_read(&nfsd_busy));
> atomic_dec(&nfsd_busy);
> - }
>
> - if (err != -EINTR)
> - printk(KERN_WARNING "nfsd: terminating on error %d\n", -err);
> + /* reset err in case kthread_stop is called */
> + err = 0;
> + }

There's no use of err here until it's next set in

while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)

so I assume the comment and this "err = 0" are artifacts of a previous
implementation.

>
> /* Clear signals before calling svc_exit_thread() */
> flush_signals(current);
>
> mutex_lock(&nfsd_mutex);
> -
> nfsdstats.th_cnt --;
>
> out:
> /* Release the thread */
> svc_exit_thread(rqstp);
> -
> - /* Release module */
> mutex_unlock(&nfsd_mutex);
> - module_put_and_exit(0);

How does the module refcounting work after this patch?

--b.

> +
> + return 0;
> }
>
> static __be32 map_new_errors(u32 vers, __be32 nfserr)
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 4b54c5f..011d6d8 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -22,7 +22,7 @@
> /*
> * This is the RPC server thread function prototype
> */
> -typedef void (*svc_thread_fn)(struct svc_rqst *);
> +typedef int (*svc_thread_fn)(void *);
>
> /*
> *
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index 7bffaff..f461da2 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -18,6 +18,7 @@
> #include <linux/mm.h>
> #include <linux/interrupt.h>
> #include <linux/module.h>
> +#include <linux/kthread.h>
>
> #include <linux/sunrpc/types.h>
> #include <linux/sunrpc/xdr.h>
> @@ -291,15 +292,14 @@ svc_pool_map_put(void)
>
>
> /*
> - * Set the current thread's cpus_allowed mask so that it
> + * Set the given thread's cpus_allowed mask so that it
> * will only run on cpus in the given pool.
> - *
> - * Returns 1 and fills in oldmask iff a cpumask was applied.
> */
> -static inline int
> -svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
> +static inline void
> +svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
> {
> struct svc_pool_map *m = &svc_pool_map;
> + unsigned int node = m->pool_to[pidx];
>
> /*
> * The caller checks for sv_nrpools > 1, which
> @@ -307,26 +307,17 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
> */
> BUG_ON(m->count == 0);
>
> - switch (m->mode)
> - {
> - default:
> - return 0;
> + switch (m->mode) {
> case SVC_POOL_PERCPU:
> {
> - unsigned int cpu = m->pool_to[pidx];
> -
> - *oldmask = current->cpus_allowed;
> - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
> - return 1;
> + set_cpus_allowed_ptr(task, &cpumask_of_cpu(node));
> + break;
> }
> case SVC_POOL_PERNODE:
> {
> - unsigned int node = m->pool_to[pidx];
> node_to_cpumask_ptr(nodecpumask, node);
> -
> - *oldmask = current->cpus_allowed;
> - set_cpus_allowed_ptr(current, nodecpumask);
> - return 1;
> + set_cpus_allowed_ptr(task, nodecpumask);
> + break;
> }
> }
> }
> @@ -579,47 +570,6 @@ out_enomem:
> EXPORT_SYMBOL(svc_prepare_thread);
>
> /*
> - * Create a thread in the given pool. Caller must hold BKL or another lock to
> - * serialize access to the svc_serv struct. On a NUMA or SMP machine, with a
> - * multi-pool serv, the thread will be restricted to run on the cpus belonging
> - * to the pool.
> - */
> -static int
> -__svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
> - struct svc_pool *pool)
> -{
> - struct svc_rqst *rqstp;
> - int error = -ENOMEM;
> - int have_oldmask = 0;
> - cpumask_t uninitialized_var(oldmask);
> -
> - rqstp = svc_prepare_thread(serv, pool);
> - if (IS_ERR(rqstp)) {
> - error = PTR_ERR(rqstp);
> - goto out;
> - }
> -
> - if (serv->sv_nrpools > 1)
> - have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
> -
> - error = kernel_thread((int (*)(void *)) func, rqstp, 0);
> -
> - if (have_oldmask)
> - set_cpus_allowed(current, oldmask);
> -
> - if (error < 0)
> - goto out_thread;
> - svc_sock_update_bufs(serv);
> - error = 0;
> -out:
> - return error;
> -
> -out_thread:
> - svc_exit_thread(rqstp);
> - goto out;
> -}
> -
> -/*
> * Choose a pool in which to create a new thread, for svc_set_num_threads
> */
> static inline struct svc_pool *
> @@ -688,7 +638,9 @@ found_pool:
> int
> svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
> {
> - struct task_struct *victim;
> + struct svc_rqst *rqstp;
> + struct task_struct *task;
> + struct svc_pool *chosen_pool;
> int error = 0;
> unsigned int state = serv->sv_nrthreads-1;
>
> @@ -704,18 +656,32 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
> /* create new threads */
> while (nrservs > 0) {
> nrservs--;
> - __module_get(serv->sv_module);
> - error = __svc_create_thread(serv->sv_function, serv,
> - choose_pool(serv, pool, &state));
> - if (error < 0) {
> - module_put(serv->sv_module);
> + chosen_pool = choose_pool(serv, pool, &state);
> +
> + rqstp = svc_prepare_thread(serv, chosen_pool);
> + if (IS_ERR(rqstp)) {
> + error = PTR_ERR(rqstp);
> break;
> }
> +
> + task = kthread_create(serv->sv_function, rqstp, serv->sv_name);
> + if (IS_ERR(task)) {
> + error = PTR_ERR(task);
> + svc_exit_thread(rqstp);
> + break;
> + }
> +
> + rqstp->rq_task = task;
> + if (serv->sv_nrpools > 1)
> + svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
> +
> + svc_sock_update_bufs(serv);
> + wake_up_process(task);
> }
> /* destroy old threads */
> while (nrservs < 0 &&
> - (victim = choose_victim(serv, pool, &state)) != NULL) {
> - send_sig(serv->sv_kill_signal, victim, 1);
> + (task = choose_victim(serv, pool, &state)) != NULL) {
> + send_sig(serv->sv_kill_signal, task, 1);
> nrservs++;
> }
>
> --
> 1.5.3.6
>

2008-06-06 18:11:16

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 4/5] knfsd: convert knfsd to kthread API

On Fri, 6 Jun 2008 13:24:31 -0400
"J. Bruce Fields" <[email protected]> wrote:

> On Fri, Jun 06, 2008 at 11:12:50AM -0400, Jeff Layton wrote:
> > This patch is rather large, but I couldn't figure out a way to break it
> > up that would remain bisectable. It does several things:
> >
> > - change svc_thread_fn typedef to better match what kthread_create expects
> > - change svc_pool_map_set_cpumask to be more kthread friendly. Make it
> > take a task arg and and get rid of the "oldmask"
> > - have svc_set_num_threads call kthread_create directly
> > - eliminate __svc_create_thread
> >
> > Signed-off-by: Jeff Layton <[email protected]>
> > ---
> > fs/nfsd/nfssvc.c | 52 ++++++++++++++--------
> > include/linux/sunrpc/svc.h | 2 +-
> > net/sunrpc/svc.c | 102 +++++++++++++++-----------------------------
> > 3 files changed, 68 insertions(+), 88 deletions(-)
> >
> > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > index 6339cb7..fe62ec8 100644
> > --- a/fs/nfsd/nfssvc.c
> > +++ b/fs/nfsd/nfssvc.c
> > @@ -21,6 +21,7 @@
> > #include <linux/smp_lock.h>
> > #include <linux/freezer.h>
> > #include <linux/fs_struct.h>
> > +#include <linux/kthread.h>
> >
> > #include <linux/sunrpc/types.h>
> > #include <linux/sunrpc/stats.h>
> > @@ -46,7 +47,7 @@
> > #define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT))
> >
> > extern struct svc_program nfsd_program;
> > -static void nfsd(struct svc_rqst *rqstp);
> > +static int nfsd(void *vrqstp);
> > struct timeval nfssvc_boot;
> > static atomic_t nfsd_busy;
> > static unsigned long nfsd_last_call;
> > @@ -407,18 +408,19 @@ update_thread_usage(int busy_threads)
> > /*
> > * This is the NFS server kernel thread
> > */
> > -static void
> > -nfsd(struct svc_rqst *rqstp)
> > +static int
> > +nfsd(void *vrqstp)
> > {
> > + struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
> > struct fs_struct *fsp;
> > - int err;
> > sigset_t shutdown_mask, allowed_mask;
> > + int err, preverr = 0;
> > + unsigned int signo;
> >
> > /* Lock module and set up kernel thread */
> > mutex_lock(&nfsd_mutex);
> > - daemonize("nfsd");
> >
> > - /* After daemonize() this kernel thread shares current->fs
> > + /* At this point, the thread shares current->fs
> > * with the init process. We need to create files with a
> > * umask of 0 instead of init's umask. */
> > fsp = copy_fs_struct(current->fs);
> > @@ -433,14 +435,18 @@ nfsd(struct svc_rqst *rqstp)
> > siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS);
> > siginitsetinv(&allowed_mask, ALLOWED_SIGS);
> >
> > + /*
> > + * thread is spawned with all signals set to SIG_IGN, re-enable
> > + * the ones that matter
> > + */
> > + for (signo = 1; signo <= _NSIG; signo++) {
> > + if (!sigismember(&shutdown_mask, signo))
> > + allow_signal(signo);
> > + }
> >
> > nfsdstats.th_cnt++;
> > -
> > - rqstp->rq_task = current;
> > -
> > mutex_unlock(&nfsd_mutex);
> >
> > -
> > /*
> > * We want less throttling in balance_dirty_pages() so that nfs to
> > * localhost doesn't cause nfsd to lock up due to all the client's
> > @@ -462,15 +468,25 @@ nfsd(struct svc_rqst *rqstp)
> > */
> > while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
> > ;
> > - if (err < 0)
> > + if (err == -EINTR)
> > break;
> > + else if (err < 0) {
> > + if (err != preverr) {
> > + printk(KERN_WARNING "%s: unexpected error "
> > + "from svc_recv (%d)\n", __func__, -err);
> > + preverr = err;
> > + }
> > + schedule_timeout_uninterruptible(HZ);
> > + continue;
> > + }
> > +
> > update_thread_usage(atomic_read(&nfsd_busy));
> > atomic_inc(&nfsd_busy);
> >
> > /* Lock the export hash tables for reading. */
> > exp_readlock();
> >
> > - /* Process request with signals blocked. */
> > + /* Process request with signals blocked. */
> > sigprocmask(SIG_SETMASK, &allowed_mask, NULL);
> >
> > svc_process(rqstp);
> > @@ -479,25 +495,23 @@ nfsd(struct svc_rqst *rqstp)
> > exp_readunlock();
> > update_thread_usage(atomic_read(&nfsd_busy));
> > atomic_dec(&nfsd_busy);
> > - }
> >
> > - if (err != -EINTR)
> > - printk(KERN_WARNING "nfsd: terminating on error %d\n", -err);
> > + /* reset err in case kthread_stop is called */
> > + err = 0;
> > + }
>
> There's no use of err here until it's next set in
>
> while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
>
> so I assume the comment and this "err = 0" are artifacts of a previous
> implementation.
>

That's correct. An earlier patch has the main loop as
"while(!kthread_should_stop())". Since we're using signals to take the
thread down though, there's not much point in checking
kthread_should_stop(). I can remove the "err = 0;" there.

> >
> > /* Clear signals before calling svc_exit_thread() */
> > flush_signals(current);
> >
> > mutex_lock(&nfsd_mutex);
> > -
> > nfsdstats.th_cnt --;
> >
> > out:
> > /* Release the thread */
> > svc_exit_thread(rqstp);
> > -
> > - /* Release module */
> > mutex_unlock(&nfsd_mutex);
> > - module_put_and_exit(0);
>
> How does the module refcounting work after this patch?
>
> --b.
>

I think I've goofed this part, actually. I was thinking that we didn't
need to bump the refcount here, and that the kernel would realize that
nfsd() hadn't returned and would prevent unloading until it had. This
doesn't seem to be the case. I'll need to go back and add refcounting
back in.

--
Jeff Layton <[email protected]>

2008-06-06 18:16:12

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 4/5] knfsd: convert knfsd to kthread API

On Fri, Jun 06, 2008 at 02:11:16PM -0400, Jeff Layton wrote:
> I think I've goofed this part, actually. I was thinking that we didn't
> need to bump the refcount here, and that the kernel would realize that
> nfsd() hadn't returned and would prevent unloading until it had. This
> doesn't seem to be the case. I'll need to go back and add refcounting
> back in.

OK. If you decide it is needed here, could you double-check the lockd
conversion as well? Looks like some refcounting logic might have gotten
lost there too.

--b.

2008-06-06 19:05:37

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 4/5] knfsd: convert knfsd to kthread API

On Fri, 6 Jun 2008 14:16:12 -0400
"J. Bruce Fields" <[email protected]> wrote:

> On Fri, Jun 06, 2008 at 02:11:16PM -0400, Jeff Layton wrote:
> > I think I've goofed this part, actually. I was thinking that we didn't
> > need to bump the refcount here, and that the kernel would realize that
> > nfsd() hadn't returned and would prevent unloading until it had. This
> > doesn't seem to be the case. I'll need to go back and add refcounting
> > back in.
>
> OK. If you decide it is needed here, could you double-check the lockd
> conversion as well? Looks like some refcounting logic might have gotten
> lost there too.
>
> --b.

Full disclosure:

I don't completely understand module refcounts and when we need to take
a reference. So feel free to set me straight if my comments below are
wrong :-)

The change to lockd was deliberate and was suggested by Neil Brown, when
I was working on an earlier version of the lockd-kthread patch:

--------------[snip]------------------

> - module_put_and_exit(0);
> + module_put(THIS_MODULE);
> + return 0;

This changes bothers me. Putting the last ref to a module in code
inside that module is not safe, which is why module_put_and_exit
exists.

So this module_put is either unsafe or not needed. I think the
latter.

As you say in the comment, lockd_down now blocks until lockd actually
exits. As every caller for lockd_down will own a reference to the
lockd module, the lockd thread no longer needs to own a reference too.
So I think it is safe to remove the module_put, and also remove the
__module_get at the top of the lockd function.

--------------[snip]------------------

So I followed his advice and everything seems to be OK. I don't see a way
to yank out the lockd module while lockd is actually up, since the
callers of lockd_up() have to have a reference to the lockd module, and
if those modules go away, then lockd should be down anyway.

This is what led me to think that we didn't need this for nfsd either,
but that seems to be incorrect. I think nfsd is different because it's
started directly from userspace. We don't have any persistent module
references so we need to take them explicitly.

--
Jeff Layton <[email protected]>

2008-06-06 19:49:48

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 4/5] knfsd: convert knfsd to kthread API

On Fri, 6 Jun 2008 14:11:16 -0400
Jeff Layton <[email protected]> wrote:

> On Fri, 6 Jun 2008 13:24:31 -0400
> "J. Bruce Fields" <[email protected]> wrote:
>
...
> > How does the module refcounting work after this patch?
> >
> > --b.
> >
>
> I think I've goofed this part, actually. I was thinking that we didn't
> need to bump the refcount here, and that the kernel would realize that
> nfsd() hadn't returned and would prevent unloading until it had. This
> doesn't seem to be the case. I'll need to go back and add refcounting
> back in.
>

Here's a respun patch that adds back in the module refcounts and also
removes the unneeded "err = 0;" at the bottom of the loop. Thoughts?

-----------[snip]--------------

>From 794f3137ec5a5a8720b091f00b22807dab8f1be2 Mon Sep 17 00:00:00 2001
From: Jeff Layton <[email protected]>
Date: Fri, 6 Jun 2008 14:44:23 -0400
Subject: [PATCH 4/5] knfsd: convert knfsd to kthread API

This patch is rather large, but I couldn't figure out a way to break it
up that would remain bisectable. It does several things:

- change svc_thread_fn typedef to better match what kthread_create expects
- change svc_pool_map_set_cpumask to be more kthread friendly. Make it
take a task arg and and get rid of the "oldmask"
- have svc_set_num_threads call kthread_create directly
- eliminate __svc_create_thread

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfssvc.c | 49 +++++++++++++--------
include/linux/sunrpc/svc.h | 2 +-
net/sunrpc/svc.c | 102 +++++++++++++++-----------------------------
3 files changed, 66 insertions(+), 87 deletions(-)

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 6339cb7..7fdfa23 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -21,6 +21,7 @@
#include <linux/smp_lock.h>
#include <linux/freezer.h>
#include <linux/fs_struct.h>
+#include <linux/kthread.h>

#include <linux/sunrpc/types.h>
#include <linux/sunrpc/stats.h>
@@ -46,7 +47,7 @@
#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT))

extern struct svc_program nfsd_program;
-static void nfsd(struct svc_rqst *rqstp);
+static int nfsd(void *vrqstp);
struct timeval nfssvc_boot;
static atomic_t nfsd_busy;
static unsigned long nfsd_last_call;
@@ -407,18 +408,20 @@ update_thread_usage(int busy_threads)
/*
* This is the NFS server kernel thread
*/
-static void
-nfsd(struct svc_rqst *rqstp)
+static int
+nfsd(void *vrqstp)
{
+ struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
struct fs_struct *fsp;
- int err;
sigset_t shutdown_mask, allowed_mask;
+ int err, preverr = 0;
+ unsigned int signo;

/* Lock module and set up kernel thread */
+ __module_get(THIS_MODULE);
mutex_lock(&nfsd_mutex);
- daemonize("nfsd");

- /* After daemonize() this kernel thread shares current->fs
+ /* At this point, the thread shares current->fs
* with the init process. We need to create files with a
* umask of 0 instead of init's umask. */
fsp = copy_fs_struct(current->fs);
@@ -433,14 +436,18 @@ nfsd(struct svc_rqst *rqstp)
siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS);
siginitsetinv(&allowed_mask, ALLOWED_SIGS);

+ /*
+ * thread is spawned with all signals set to SIG_IGN, re-enable
+ * the ones that matter
+ */
+ for (signo = 1; signo <= _NSIG; signo++) {
+ if (!sigismember(&shutdown_mask, signo))
+ allow_signal(signo);
+ }

nfsdstats.th_cnt++;
-
- rqstp->rq_task = current;
-
mutex_unlock(&nfsd_mutex);

-
/*
* We want less throttling in balance_dirty_pages() so that nfs to
* localhost doesn't cause nfsd to lock up due to all the client's
@@ -462,15 +469,25 @@ nfsd(struct svc_rqst *rqstp)
*/
while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
;
- if (err < 0)
+ if (err == -EINTR)
break;
+ else if (err < 0) {
+ if (err != preverr) {
+ printk(KERN_WARNING "%s: unexpected error "
+ "from svc_recv (%d)\n", __func__, -err);
+ preverr = err;
+ }
+ schedule_timeout_uninterruptible(HZ);
+ continue;
+ }
+
update_thread_usage(atomic_read(&nfsd_busy));
atomic_inc(&nfsd_busy);

/* Lock the export hash tables for reading. */
exp_readlock();

- /* Process request with signals blocked. */
+ /* Process request with signals blocked. */
sigprocmask(SIG_SETMASK, &allowed_mask, NULL);

svc_process(rqstp);
@@ -481,23 +498,19 @@ nfsd(struct svc_rqst *rqstp)
atomic_dec(&nfsd_busy);
}

- if (err != -EINTR)
- printk(KERN_WARNING "nfsd: terminating on error %d\n", -err);
-
/* Clear signals before calling svc_exit_thread() */
flush_signals(current);

mutex_lock(&nfsd_mutex);
-
nfsdstats.th_cnt --;

out:
/* Release the thread */
svc_exit_thread(rqstp);
-
- /* Release module */
mutex_unlock(&nfsd_mutex);
+
module_put_and_exit(0);
+ return 0;
}

static __be32 map_new_errors(u32 vers, __be32 nfserr)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 4b54c5f..011d6d8 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -22,7 +22,7 @@
/*
* This is the RPC server thread function prototype
*/
-typedef void (*svc_thread_fn)(struct svc_rqst *);
+typedef int (*svc_thread_fn)(void *);

/*
*
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 7bffaff..f461da2 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -18,6 +18,7 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/module.h>
+#include <linux/kthread.h>

#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
@@ -291,15 +292,14 @@ svc_pool_map_put(void)


/*
- * Set the current thread's cpus_allowed mask so that it
+ * Set the given thread's cpus_allowed mask so that it
* will only run on cpus in the given pool.
- *
- * Returns 1 and fills in oldmask iff a cpumask was applied.
*/
-static inline int
-svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
+static inline void
+svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
{
struct svc_pool_map *m = &svc_pool_map;
+ unsigned int node = m->pool_to[pidx];

/*
* The caller checks for sv_nrpools > 1, which
@@ -307,26 +307,17 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
*/
BUG_ON(m->count == 0);

- switch (m->mode)
- {
- default:
- return 0;
+ switch (m->mode) {
case SVC_POOL_PERCPU:
{
- unsigned int cpu = m->pool_to[pidx];
-
- *oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
- return 1;
+ set_cpus_allowed_ptr(task, &cpumask_of_cpu(node));
+ break;
}
case SVC_POOL_PERNODE:
{
- unsigned int node = m->pool_to[pidx];
node_to_cpumask_ptr(nodecpumask, node);
-
- *oldmask = current->cpus_allowed;
- set_cpus_allowed_ptr(current, nodecpumask);
- return 1;
+ set_cpus_allowed_ptr(task, nodecpumask);
+ break;
}
}
}
@@ -579,47 +570,6 @@ out_enomem:
EXPORT_SYMBOL(svc_prepare_thread);

/*
- * Create a thread in the given pool. Caller must hold BKL or another lock to
- * serialize access to the svc_serv struct. On a NUMA or SMP machine, with a
- * multi-pool serv, the thread will be restricted to run on the cpus belonging
- * to the pool.
- */
-static int
-__svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
- struct svc_pool *pool)
-{
- struct svc_rqst *rqstp;
- int error = -ENOMEM;
- int have_oldmask = 0;
- cpumask_t uninitialized_var(oldmask);
-
- rqstp = svc_prepare_thread(serv, pool);
- if (IS_ERR(rqstp)) {
- error = PTR_ERR(rqstp);
- goto out;
- }
-
- if (serv->sv_nrpools > 1)
- have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
-
- error = kernel_thread((int (*)(void *)) func, rqstp, 0);
-
- if (have_oldmask)
- set_cpus_allowed(current, oldmask);
-
- if (error < 0)
- goto out_thread;
- svc_sock_update_bufs(serv);
- error = 0;
-out:
- return error;
-
-out_thread:
- svc_exit_thread(rqstp);
- goto out;
-}
-
-/*
* Choose a pool in which to create a new thread, for svc_set_num_threads
*/
static inline struct svc_pool *
@@ -688,7 +638,9 @@ found_pool:
int
svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
- struct task_struct *victim;
+ struct svc_rqst *rqstp;
+ struct task_struct *task;
+ struct svc_pool *chosen_pool;
int error = 0;
unsigned int state = serv->sv_nrthreads-1;

@@ -704,18 +656,32 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
/* create new threads */
while (nrservs > 0) {
nrservs--;
- __module_get(serv->sv_module);
- error = __svc_create_thread(serv->sv_function, serv,
- choose_pool(serv, pool, &state));
- if (error < 0) {
- module_put(serv->sv_module);
+ chosen_pool = choose_pool(serv, pool, &state);
+
+ rqstp = svc_prepare_thread(serv, chosen_pool);
+ if (IS_ERR(rqstp)) {
+ error = PTR_ERR(rqstp);
break;
}
+
+ task = kthread_create(serv->sv_function, rqstp, serv->sv_name);
+ if (IS_ERR(task)) {
+ error = PTR_ERR(task);
+ svc_exit_thread(rqstp);
+ break;
+ }
+
+ rqstp->rq_task = task;
+ if (serv->sv_nrpools > 1)
+ svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
+
+ svc_sock_update_bufs(serv);
+ wake_up_process(task);
}
/* destroy old threads */
while (nrservs < 0 &&
- (victim = choose_victim(serv, pool, &state)) != NULL) {
- send_sig(serv->sv_kill_signal, victim, 1);
+ (task = choose_victim(serv, pool, &state)) != NULL) {
+ send_sig(serv->sv_kill_signal, task, 1);
nrservs++;
}

--
1.5.3.6