This code .... grew a bit since my previous pencil-sketch code.
The goal is really the final patch: using a llist without spinlocks to
handle dispatch of idle threads. To get there I found it necessary - or
at least helpful - to do a lot of refactoring.
This code passes some basic tests, but I haven't push it hard yet.
Even if other aren't convinced that llists are the best solution, I
think a lot of the refactoring is this valuable.
Comments welcome,
Thanks,
NeilBrown
---
NeilBr own (14):
lockd: remove SIGKILL handling.
nfsd: don't allow nfsd threads to be signalled.
SUNRPC: call svc_process() from svc_recv().
SUNRPC: change svc_recv() to return void.
SUNRPC: remove timeout arg from svc_recv()
SUNRPC: change various server-side #defines to enum
SUNRPC: refactor svc_recv()
SUNRPC: integrate back-channel processing with svc_recv() and svc_process()
SUNRPC: change how svc threads are asked to exit.
SUNRPC: change svc_pool_wake_idle_thread() to return nothing.
SUNRPC: add list of idle threads
SUNRPC: discard SP_CONGESTED
SUNRPC: change service idle list to be an llist
SUNRPC: only have one thread waking up at a time
fs/lockd/svc.c | 49 ++-----
fs/lockd/svclock.c | 9 +-
fs/nfs/callback.c | 59 +-------
fs/nfsd/nfs4proc.c | 10 +-
fs/nfsd/nfssvc.c | 22 +--
include/linux/llist.h | 2 +
include/linux/lockd/lockd.h | 4 +-
include/linux/sunrpc/cache.h | 11 +-
include/linux/sunrpc/svc.h | 87 +++++++++---
include/linux/sunrpc/svc_xprt.h | 39 +++---
include/linux/sunrpc/svcauth.h | 29 ++--
include/linux/sunrpc/svcsock.h | 2 +-
include/linux/sunrpc/xprtsock.h | 25 ++--
include/trace/events/sunrpc.h | 5 +-
lib/llist.c | 27 ++++
net/sunrpc/backchannel_rqst.c | 8 +-
net/sunrpc/svc.c | 71 ++++------
net/sunrpc/svc_xprt.c | 226 ++++++++++++++++--------------
net/sunrpc/xprtrdma/backchannel.c | 2 +-
19 files changed, 347 insertions(+), 340 deletions(-)
--
Signature
Rather than searching a list of threads to find an idle one, having a
list of idle threads allows an idle thread to be found immediately.
This adds some spin_lock calls which is not ideal, but as the hold-time
is tiny it is still faster than searching a list. A future patch will
remove them using llist.h. This involves some subtlety and so is left
to a separate patch.
Signed-off-by: NeilBrown <[email protected]>
---
include/linux/sunrpc/svc.h | 25 ++++++++++++++++++++++++-
include/trace/events/sunrpc.h | 1 -
net/sunrpc/svc.c | 13 ++++++++-----
net/sunrpc/svc_xprt.c | 17 ++++++++++++-----
4 files changed, 44 insertions(+), 12 deletions(-)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index b39c613fbe06..ec8088d5b57f 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -36,6 +36,7 @@ struct svc_pool {
struct list_head sp_sockets; /* pending sockets */
unsigned int sp_nrthreads; /* # of threads in pool */
struct list_head sp_all_threads; /* all server threads */
+ struct list_head sp_idle_threads; /* idle server threads */
/* statistics on pool operation */
struct percpu_counter sp_messages_arrived;
@@ -199,6 +200,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
*/
struct svc_rqst {
struct list_head rq_all; /* all threads list */
+ struct list_head rq_idle; /* On the idle list */
struct rcu_head rq_rcu_head; /* for RCU deferred kfree */
struct svc_xprt * rq_xprt; /* transport ptr */
@@ -278,10 +280,31 @@ enum {
RQ_SPLICE_OK, /* turned off in gss privacy to prevent encrypting page
* cache pages */
RQ_VICTIM, /* Have agreed to shut down */
- RQ_BUSY, /* request is busy */
RQ_DATA, /* request has data */
};
+/**
+ * svc_thread_set_busy - mark a thread as busy
+ * @rqstp: the thread which is now busy
+ *
+ * If rq_idle is "empty", the thread must be busy.
+ */
+static inline void svc_thread_set_busy(struct svc_rqst *rqstp)
+{
+ INIT_LIST_HEAD(&rqstp->rq_idle);
+}
+
+/**
+ * svc_thread_busy - check if a thread as busy
+ * @rqstp: the thread which might be busy
+ *
+ * If rq_idle is "empty", the thread must be busy.
+ */
+static inline bool svc_thread_busy(struct svc_rqst *rqstp)
+{
+ return list_empty(&rqstp->rq_idle);
+}
+
#define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
/*
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index c79375e37dc2..dd0323c40ef5 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1677,7 +1677,6 @@ DEFINE_SVCXDRBUF_EVENT(sendto);
svc_rqst_flag(DROPME) \
svc_rqst_flag(SPLICE_OK) \
svc_rqst_flag(VICTIM) \
- svc_rqst_flag(BUSY) \
svc_rqst_flag_end(DATA)
#undef svc_rqst_flag
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index fd49e7b12c94..028de8f662a8 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -644,7 +644,7 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
folio_batch_init(&rqstp->rq_fbatch);
- __set_bit(RQ_BUSY, &rqstp->rq_flags);
+ svc_thread_set_busy(rqstp);
rqstp->rq_server = serv;
rqstp->rq_pool = pool;
@@ -704,10 +704,13 @@ void svc_pool_wake_idle_thread(struct svc_serv *serv,
struct svc_rqst *rqstp;
rcu_read_lock();
- list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
- if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
- continue;
-
+ spin_lock_bh(&pool->sp_lock);
+ rqstp = list_first_entry_or_null(&pool->sp_idle_threads,
+ struct svc_rqst, rq_idle);
+ if (rqstp)
+ list_del_init(&rqstp->rq_idle);
+ spin_unlock_bh(&pool->sp_lock);
+ if (rqstp) {
WRITE_ONCE(rqstp->rq_qtime, ktime_get());
wake_up_process(rqstp->rq_task);
rcu_read_unlock();
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 964c97dbb36c..1d134415ae3f 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -736,18 +736,25 @@ static void svc_wait_for_work(struct svc_rqst *rqstp)
set_current_state(TASK_IDLE);
smp_mb__before_atomic();
clear_bit(SP_CONGESTED, &pool->sp_flags);
- clear_bit(RQ_BUSY, &rqstp->rq_flags);
- smp_mb__after_atomic();
+ spin_lock_bh(&pool->sp_lock);
+ list_add(&rqstp->rq_idle, &pool->sp_idle_threads);
+ spin_unlock_bh(&pool->sp_lock);
if (likely(rqst_should_sleep(rqstp)))
schedule();
else
__set_current_state(TASK_RUNNING);
- try_to_freeze();
+ /* We *must* be removed from the list before we can continue.
+ * If we were woken, this is already done
+ */
+ if (!svc_thread_busy(rqstp)) {
+ spin_lock_bh(&pool->sp_lock);
+ list_del_init(&rqstp->rq_idle);
+ spin_unlock_bh(&pool->sp_lock);
+ }
- set_bit(RQ_BUSY, &rqstp->rq_flags);
- smp_mb__after_atomic();
+ try_to_freeze();
rqstp->rq_xprt = svc_xprt_dequeue(pool);
if (rqstp->rq_xprt) {
With an llist we don't need to take a lock to add a thread to the list,
though we still need a lock to remove it. That will go in the next
patch.
Unlike double-linked lists, a thread cannot remove itself from the list.
Only the first thread can be removed, and that can change
asynchronously. So some care is needed.
Firstly, we don't add ourselves to the list of there is more work to do
- we just go ahead and do it. So we remain in the busy state. That
already applied to xprts being ready, it now applies to all possible
triggers. We DON'T keep the likely annotation on this test for "more
work to do". It isn't particularly likely that there is no work to do,
and there is definitely no need to optimise for that case.
Secondly if we do find something needs to be done after adding ourselves
to the list, we simply wake up the first thread on the list. If that
was us, we have been removed and can continue. If it was some other
thread, they will do the work that needs to be done. We can safely
sleep until woken.
With this a thread could call rqst_should_sleep() without actually
proceeding to do work if told not to sleep. So that function mustn't
clear SP_TASK_PENDING. The flag must stay set until some thread commits
to not sleeping.
We also remove the test on freezing() from rqst_should_sleep(). Instead
we always try_to_freeze() before scheduling, which is needed as we now
schedule() in a loop waiting to be removed from the idle queue.
Signed-off-by: NeilBrown <[email protected]>
---
include/linux/sunrpc/svc.h | 14 +++++++-----
net/sunrpc/svc.c | 11 +++++----
net/sunrpc/svc_xprt.c | 51 ++++++++++++++++++++++++++------------------
3 files changed, 44 insertions(+), 32 deletions(-)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index f5b69e6da0f8..5941b8a8e754 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -36,7 +36,7 @@ struct svc_pool {
struct list_head sp_sockets; /* pending sockets */
unsigned int sp_nrthreads; /* # of threads in pool */
struct list_head sp_all_threads; /* all server threads */
- struct list_head sp_idle_threads; /* idle server threads */
+ struct llist_head sp_idle_threads; /* idle server threads */
/* statistics on pool operation */
struct percpu_counter sp_messages_arrived;
@@ -199,7 +199,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
*/
struct svc_rqst {
struct list_head rq_all; /* all threads list */
- struct list_head rq_idle; /* On the idle list */
+ struct llist_node rq_idle; /* On the idle list */
struct rcu_head rq_rcu_head; /* for RCU deferred kfree */
struct svc_xprt * rq_xprt; /* transport ptr */
@@ -286,22 +286,24 @@ enum {
* svc_thread_set_busy - mark a thread as busy
* @rqstp: the thread which is now busy
*
- * If rq_idle is "empty", the thread must be busy.
+ * By convention a thread is busy if rq_idle.next points to rq_idle.
+ * This ensures it is not on the idle list.
*/
static inline void svc_thread_set_busy(struct svc_rqst *rqstp)
{
- INIT_LIST_HEAD(&rqstp->rq_idle);
+ smp_store_release(&rqstp->rq_idle.next, &rqstp->rq_idle);
}
/**
* svc_thread_busy - check if a thread as busy
* @rqstp: the thread which might be busy
*
- * If rq_idle is "empty", the thread must be busy.
+ * By convention a thread is busy if rq_idle.next points to rq_idle.
+ * This ensures it is not on the idle list.
*/
static inline bool svc_thread_busy(struct svc_rqst *rqstp)
{
- return list_empty(&rqstp->rq_idle);
+ return smp_load_acquire(&rqstp->rq_idle.next) == &rqstp->rq_idle;
}
#define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index f6da390932cd..9287a08250b2 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -702,15 +702,16 @@ void svc_pool_wake_idle_thread(struct svc_serv *serv,
struct svc_pool *pool)
{
struct svc_rqst *rqstp;
+ struct llist_node *ln;
rcu_read_lock();
spin_lock_bh(&pool->sp_lock);
- rqstp = list_first_entry_or_null(&pool->sp_idle_threads,
- struct svc_rqst, rq_idle);
- if (rqstp)
- list_del_init(&rqstp->rq_idle);
+ ln = llist_del_first(&pool->sp_idle_threads);
spin_unlock_bh(&pool->sp_lock);
- if (rqstp) {
+ if (ln) {
+ rqstp = llist_entry(ln, struct svc_rqst, rq_idle);
+ svc_thread_set_busy(rqstp);
+
WRITE_ONCE(rqstp->rq_qtime, ktime_get());
wake_up_process(rqstp->rq_task);
rcu_read_unlock();
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b82a66f68f17..eb8d345641b2 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -704,7 +704,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
struct svc_pool *pool = rqstp->rq_pool;
/* did someone call svc_wake_up? */
- if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags))
+ if (test_bit(SP_TASK_PENDING, &pool->sp_flags))
return false;
/* was a socket queued? */
@@ -715,10 +715,6 @@ rqst_should_sleep(struct svc_rqst *rqstp)
if (svc_thread_should_stop(rqstp))
return false;
- /* are we freezing? */
- if (freezing(current))
- return false;
-
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
if (svc_is_backchannel(rqstp)) {
if (!list_empty(&rqstp->rq_server->sv_cb_list))
@@ -733,27 +729,40 @@ static void svc_wait_for_work(struct svc_rqst *rqstp)
{
struct svc_pool *pool = rqstp->rq_pool;
- set_current_state(TASK_IDLE);
- spin_lock_bh(&pool->sp_lock);
- list_add(&rqstp->rq_idle, &pool->sp_idle_threads);
- spin_unlock_bh(&pool->sp_lock);
+ if (rqst_should_sleep(rqstp)) {
+ set_current_state(TASK_IDLE);
+ llist_add(&rqstp->rq_idle, &pool->sp_idle_threads);
+
+ if (unlikely(!rqst_should_sleep(rqstp)))
+ /* maybe there were no idle threads when some work
+ * became ready and so nothing was woken. We've just
+ * become idle so someone can to the work - maybe us.
+ * But we cannot reliably remove ourselves from the
+ * idle list - we can only remove the first task which
+ * might be us, and might not.
+ * So remove and wake it, then schedule(). If it was
+ * us, we won't sleep. If it is some other thread, they
+ * will do the work.
+ */
+ svc_pool_wake_idle_thread(rqstp->rq_server, pool);
- if (likely(rqst_should_sleep(rqstp)))
- schedule();
- else
+ /* We mustn't continue while on the idle list, and we
+ * cannot remove outselves reliably. The only "work"
+ * we can do while on the idle list is to freeze.
+ * So loop until someone removes us
+ */
+ while (!svc_thread_busy(rqstp)) {
+ try_to_freeze();
+ schedule();
+ set_current_state(TASK_IDLE);
+ }
__set_current_state(TASK_RUNNING);
-
- /* We *must* be removed from the list before we can continue.
- * If we were woken, this is already done
- */
- if (!svc_thread_busy(rqstp)) {
- spin_lock_bh(&pool->sp_lock);
- list_del_init(&rqstp->rq_idle);
- spin_unlock_bh(&pool->sp_lock);
}
try_to_freeze();
+ clear_bit(SP_TASK_PENDING, &pool->sp_flags);
+
rqstp->rq_xprt = svc_xprt_dequeue(pool);
if (rqstp->rq_xprt) {
trace_svc_pool_awoken(rqstp);
@@ -785,7 +794,7 @@ static void svc_wait_for_work(struct svc_rqst *rqstp)
/* Normally we will wait up to 5 seconds for any required
* cache information to be provided.
*/
- if (!list_empty(&pool->sp_idle_threads))
+ if (pool->sp_idle_threads.first != NULL)
rqstp->rq_chandle.thread_wait = 5*HZ;
else
rqstp->rq_chandle.thread_wait = 1*HZ;
svc_recv() currently returns a 0 on success or one of two errors:
- -EAGAIN means no message was successfully received
- -EINTR means the thread has been told to stop
Previously nfsd would stop as the result of a signal as well as
following kthread_stop(). In that case the difference was useful: EINTR
means stop unconditionally. EAGAIN means stop if kthread_should_stop(),
continue otherwise.
Now threads only exit when kthread_should_stop() so we don't need the
distinction.
So have all threads test kthread_should_stop() (most do), and return
simple success/failure from svc_recv().
Also change some helpers that svc_recv() uses to not bother with an
error code, returning a bool in once case, and NULL for failure in
another.
Signed-off-by: NeilBrown <[email protected]>
---
fs/lockd/svc.c | 9 +++------
fs/nfs/callback.c | 5 +----
fs/nfsd/nfssvc.c | 8 ++------
include/linux/sunrpc/svcsock.h | 2 +-
net/sunrpc/svc_xprt.c | 27 +++++++++++----------------
5 files changed, 18 insertions(+), 33 deletions(-)
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 91ef139a7757..a43b63e46127 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -116,7 +116,6 @@ static void set_grace_period(struct net *net)
static int
lockd(void *vrqstp)
{
- int err = 0;
struct svc_rqst *rqstp = vrqstp;
struct net *net = &init_net;
struct lockd_net *ln = net_generic(net, lockd_net_id);
@@ -139,12 +138,10 @@ lockd(void *vrqstp)
timeout = nlmsvc_retry_blocked();
/*
- * Find a socket with data available and call its
- * recvfrom routine.
+ * Find any work to do, such as a socket with data available,
+ * and do the work.
*/
- err = svc_recv(rqstp, timeout);
- if (err == -EAGAIN || err == -EINTR)
- continue;
+ svc_recv(rqstp, timeout);
}
if (nlmsvc_ops)
nlmsvc_invalidate_all();
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 2d94384bd6a9..914d2402ca98 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -74,7 +74,6 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
static int
nfs4_callback_svc(void *vrqstp)
{
- int err;
struct svc_rqst *rqstp = vrqstp;
set_freezable();
@@ -83,9 +82,7 @@ nfs4_callback_svc(void *vrqstp)
/*
* Listen for a request on the socket
*/
- err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
- if (err == -EAGAIN || err == -EINTR)
- continue;
+ svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
}
svc_exit_thread(rqstp);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 3e08cc746870..5bf48c33986e 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -953,7 +953,6 @@ nfsd(void *vrqstp)
struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
struct net *net = perm_sock->xpt_net;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- int err;
/* At this point, the thread shares current->fs
* with the init process. We need to create files with the
@@ -972,7 +971,7 @@ nfsd(void *vrqstp)
/*
* The main request loop
*/
- for (;;) {
+ while (!kthread_should_stop()) {
/* Update sv_maxconn if it has changed */
rqstp->rq_server->sv_maxconn = nn->max_connections;
@@ -980,10 +979,7 @@ nfsd(void *vrqstp)
* Find a socket with data available and call its
* recvfrom routine.
*/
- while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
- ;
- if (err == -EINTR)
- break;
+ svc_recv(rqstp, 60*60*HZ);
validate_process_creds();
}
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 55446136499f..fb5c98069356 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -64,7 +64,7 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
* Function prototypes.
*/
void svc_close_net(struct svc_serv *, struct net *);
-int svc_recv(struct svc_rqst *, long);
+void svc_recv(struct svc_rqst *, long);
void svc_send(struct svc_rqst *rqstp);
void svc_drop(struct svc_rqst *);
void svc_sock_update_bufs(struct svc_serv *serv);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index c808f6d60c99..67825eef8646 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -664,7 +664,7 @@ static void svc_check_conn_limits(struct svc_serv *serv)
}
}
-static int svc_alloc_arg(struct svc_rqst *rqstp)
+static bool svc_alloc_arg(struct svc_rqst *rqstp)
{
struct svc_serv *serv = rqstp->rq_server;
struct xdr_buf *arg = &rqstp->rq_arg;
@@ -689,7 +689,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
set_current_state(TASK_IDLE);
if (kthread_should_stop()) {
set_current_state(TASK_RUNNING);
- return -EINTR;
+ return false;
}
trace_svc_alloc_arg_err(pages, ret);
memalloc_retry_wait(GFP_KERNEL);
@@ -708,7 +708,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
arg->tail[0].iov_len = 0;
rqstp->rq_xid = xdr_zero;
- return 0;
+ return true;
}
static bool
@@ -773,9 +773,9 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
if (!time_left)
percpu_counter_inc(&pool->sp_threads_timedout);
if (kthread_should_stop())
- return ERR_PTR(-EINTR);
+ return NULL;
percpu_counter_inc(&pool->sp_threads_no_work);
- return ERR_PTR(-EAGAIN);
+ return NULL;
out_found:
/* Normally we will wait up to 5 seconds for any required
* cache information to be provided.
@@ -856,32 +856,27 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
* organised not to touch any cachelines in the shared svc_serv
* structure, only cachelines in the local svc_pool.
*/
-int svc_recv(struct svc_rqst *rqstp, long timeout)
+void svc_recv(struct svc_rqst *rqstp, long timeout)
{
struct svc_xprt *xprt = NULL;
struct svc_serv *serv = rqstp->rq_server;
- int len, err;
+ int len;
- err = svc_alloc_arg(rqstp);
- if (err)
+ if (!svc_alloc_arg(rqstp))
goto out;
try_to_freeze();
cond_resched();
- err = -EINTR;
if (kthread_should_stop())
goto out;
xprt = svc_get_next_xprt(rqstp, timeout);
- if (IS_ERR(xprt)) {
- err = PTR_ERR(xprt);
+ if (!xprt)
goto out;
- }
len = svc_handle_xprt(rqstp, xprt);
/* No data, incomplete (TCP) read, or accept() */
- err = -EAGAIN;
if (len <= 0)
goto out_release;
@@ -896,12 +891,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived);
rqstp->rq_stime = ktime_get();
svc_process(rqstp);
- return 0;
+ return;
out_release:
rqstp->rq_res.len = 0;
svc_xprt_release(rqstp);
out:
- return err;
+ return;
}
EXPORT_SYMBOL_GPL(svc_recv);
Using svc_recv() and svc_process() for (NFSv4.1) back-channel handling
means we have just one mechanism for waking threads.
Also change kthread_freezable_should_stop() in nfs4_callback_svc() to
kthread_should_stop() as used elsewhere.
kthread_freezable_should_stop() effectively adds a try_to_freeze() call,
and svc_recv() already contains that at an appropriate place.
Signed-off-by: NeilBrown <[email protected]>
---
fs/nfs/callback.c | 46 ++---------------------------------
include/linux/sunrpc/svc.h | 6 +++--
net/sunrpc/backchannel_rqst.c | 8 ++----
net/sunrpc/svc.c | 2 +-
net/sunrpc/svc_xprt.c | 48 ++++++++++++++++++++++++++++++++++++-
net/sunrpc/xprtrdma/backchannel.c | 2 +-
6 files changed, 58 insertions(+), 54 deletions(-)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index c47834970224..660cec36c45c 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -78,7 +78,7 @@ nfs4_callback_svc(void *vrqstp)
set_freezable();
- while (!kthread_freezable_should_stop(NULL)) {
+ while (!kthread_should_stop()) {
/*
* Listen for a request on the socket
*/
@@ -90,45 +90,6 @@ nfs4_callback_svc(void *vrqstp)
}
#if defined(CONFIG_NFS_V4_1)
-/*
- * The callback service for NFSv4.1 callbacks
- */
-static int
-nfs41_callback_svc(void *vrqstp)
-{
- struct svc_rqst *rqstp = vrqstp;
- struct svc_serv *serv = rqstp->rq_server;
- struct rpc_rqst *req;
- int error;
- DEFINE_WAIT(wq);
-
- set_freezable();
-
- while (!kthread_freezable_should_stop(NULL)) {
- prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE);
- spin_lock_bh(&serv->sv_cb_lock);
- if (!list_empty(&serv->sv_cb_list)) {
- req = list_first_entry(&serv->sv_cb_list,
- struct rpc_rqst, rq_bc_list);
- list_del(&req->rq_bc_list);
- spin_unlock_bh(&serv->sv_cb_lock);
- finish_wait(&serv->sv_cb_waitq, &wq);
- dprintk("Invoking bc_svc_process()\n");
- error = bc_svc_process(serv, req, rqstp);
- dprintk("bc_svc_process() returned w/ error code= %d\n",
- error);
- } else {
- spin_unlock_bh(&serv->sv_cb_lock);
- if (!kthread_should_stop())
- schedule();
- finish_wait(&serv->sv_cb_waitq, &wq);
- }
- }
-
- svc_exit_thread(rqstp);
- return 0;
-}
-
static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
struct svc_serv *serv)
{
@@ -241,10 +202,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
cb_info->users);
threadfn = nfs4_callback_svc;
-#if defined(CONFIG_NFS_V4_1)
- if (minorversion)
- threadfn = nfs41_callback_svc;
-#else
+#if !defined(CONFIG_NFS_V4_1)
if (minorversion)
return ERR_PTR(-ENOTSUPP);
#endif
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 83f31a09c853..15d468d852b5 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -92,8 +92,6 @@ struct svc_serv {
* that arrive over the same
* connection */
spinlock_t sv_cb_lock; /* protects the svc_cb_list */
- wait_queue_head_t sv_cb_waitq; /* sleep here if there are no
- * entries in the svc_cb_list */
bool sv_bc_enabled; /* service uses backchannel */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
};
@@ -202,6 +200,10 @@ struct svc_rqst {
struct rcu_head rq_rcu_head; /* for RCU deferred kfree */
struct svc_xprt * rq_xprt; /* transport ptr */
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ struct rpc_rqst *rq_cb; /* callback to be handled */
+#endif
+
struct sockaddr_storage rq_addr; /* peer address */
size_t rq_addrlen;
struct sockaddr_storage rq_daddr; /* dest addr of request
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 65a6c6429a53..60b8d310bb27 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -349,10 +349,8 @@ struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid)
}
/*
- * Add callback request to callback list. The callback
- * service sleeps on the sv_cb_waitq waiting for new
- * requests. Wake it up after adding enqueing the
- * request.
+ * Add callback request to callback list. Wake a thread
+ * on the first pool (usually the only pool) to handle it.
*/
void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
{
@@ -371,6 +369,6 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
xprt_get(xprt);
spin_lock(&bc_serv->sv_cb_lock);
list_add(&req->rq_bc_list, &bc_serv->sv_cb_list);
- wake_up(&bc_serv->sv_cb_waitq);
spin_unlock(&bc_serv->sv_cb_lock);
+ svc_pool_wake_idle_thread(bc_serv, &bc_serv->sv_pools[0]);
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 170eabc03988..56b4a88776d5 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -440,7 +440,6 @@ __svc_init_bc(struct svc_serv *serv)
{
INIT_LIST_HEAD(&serv->sv_cb_list);
spin_lock_init(&serv->sv_cb_lock);
- init_waitqueue_head(&serv->sv_cb_waitq);
}
#else
static void
@@ -724,6 +723,7 @@ struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv,
percpu_counter_inc(&pool->sp_threads_starved);
return NULL;
}
+EXPORT_SYMBOL_GPL(svc_pool_wake_idle_thread);
static struct svc_pool *
svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index c7095ff7d5fd..4fdf1aaa514b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -17,6 +17,7 @@
#include <linux/sunrpc/svc_xprt.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/bc_xprt.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <trace/events/sunrpc.h>
@@ -732,6 +733,13 @@ rqst_should_sleep(struct svc_rqst *rqstp)
if (freezing(current))
return false;
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ if (svc_is_backchannel(rqstp)) {
+ if (!list_empty(&rqstp->rq_server->sv_cb_list))
+ return false;
+ }
+#endif
+
return true;
}
@@ -754,12 +762,31 @@ static void svc_wait_for_work(struct svc_rqst *rqstp)
set_bit(RQ_BUSY, &rqstp->rq_flags);
smp_mb__after_atomic();
+
rqstp->rq_xprt = svc_xprt_dequeue(pool);
if (rqstp->rq_xprt) {
trace_svc_pool_awoken(rqstp);
goto out_found;
}
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ if (svc_is_backchannel(rqstp)) {
+ struct svc_serv *serv = rqstp->rq_server;
+ struct rpc_rqst *req;
+
+ spin_lock_bh(&serv->sv_cb_lock);
+ req = list_first_entry_or_null(&serv->sv_cb_list,
+ struct rpc_rqst, rq_bc_list);
+ if (req) {
+ list_del(&req->rq_bc_list);
+ rqstp->rq_cb = req;
+ }
+ spin_unlock_bh(&serv->sv_cb_lock);
+ if (rqstp->rq_cb)
+ goto out_found;
+ }
+#endif
+
if (!kthread_should_stop())
percpu_counter_inc(&pool->sp_threads_no_work);
return;
@@ -853,7 +880,7 @@ void svc_recv(struct svc_rqst *rqstp)
svc_wait_for_work(rqstp);
if (rqstp->rq_xprt) {
- struct svc_serv *serv = rqstp->rq_server;
+ struct svc_serv *serv = rqstp->rq_server;
struct svc_xprt *xprt = rqstp->rq_xprt;
int len;
@@ -878,6 +905,25 @@ void svc_recv(struct svc_rqst *rqstp)
svc_process(rqstp);
}
}
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ if (svc_is_backchannel(rqstp)) {
+ struct rpc_rqst *cb;
+ int error;
+
+ if (!rqstp->rq_cb)
+ return;
+
+ cb = rqstp->rq_cb;
+ rqstp->rq_cb = NULL;
+
+ dprintk("Invoking bc_svc_process()\n");
+ error = bc_svc_process(rqstp->rq_server, cb, rqstp);
+ dprintk("bc_svc_process() returned w/ error code= %d\n",
+ error);
+ return;
+ }
+#endif
+
}
EXPORT_SYMBOL_GPL(svc_recv);
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index e4d84a13c566..f1e1d4909434 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -267,7 +267,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list);
spin_unlock(&bc_serv->sv_cb_lock);
- wake_up(&bc_serv->sv_cb_waitq);
+ svc_pool_wake_idle_thread(bc_serv, &bc_serv->sv_pools[0]);
r_xprt->rx_stats.bcall_count++;
return;
svc_get_next_xprt() does a lot more than get an xprt. It mostly waits.
So rename to svc_wait_for_work() and don't bother returning a value.
The xprt can be found in ->rq_xprt.
Also move all the code to handle ->rq_xprt into a single if branch, so
that other handlers can be added there if other work is found.
Remove the call to svc_xprt_dequeue() that is before we set TASK_IDLE.
If there is still something to dequeue will still get it after a few
more checks - no sleeping. This was an unnecessary optimisation which
muddles the code.
Drop a call to kthread_should_stop(). There are enough of those in
svc_wait_for_work().
(This patch is best viewed with "-b")
Signed-off-by: NeilBrown <[email protected]>
---
net/sunrpc/svc_xprt.c | 70 +++++++++++++++++++------------------------------
1 file changed, 27 insertions(+), 43 deletions(-)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 44a33b1f542f..c7095ff7d5fd 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -735,19 +735,10 @@ rqst_should_sleep(struct svc_rqst *rqstp)
return true;
}
-static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
+static void svc_wait_for_work(struct svc_rqst *rqstp)
{
struct svc_pool *pool = rqstp->rq_pool;
- /* rq_xprt should be clear on entry */
- WARN_ON_ONCE(rqstp->rq_xprt);
-
- rqstp->rq_xprt = svc_xprt_dequeue(pool);
- if (rqstp->rq_xprt) {
- trace_svc_pool_polled(rqstp);
- goto out_found;
- }
-
set_current_state(TASK_IDLE);
smp_mb__before_atomic();
clear_bit(SP_CONGESTED, &pool->sp_flags);
@@ -769,10 +760,9 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
goto out_found;
}
- if (kthread_should_stop())
- return NULL;
- percpu_counter_inc(&pool->sp_threads_no_work);
- return NULL;
+ if (!kthread_should_stop())
+ percpu_counter_inc(&pool->sp_threads_no_work);
+ return;
out_found:
/* Normally we will wait up to 5 seconds for any required
* cache information to be provided.
@@ -781,7 +771,6 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
rqstp->rq_chandle.thread_wait = 5*HZ;
else
rqstp->rq_chandle.thread_wait = 1*HZ;
- return rqstp->rq_xprt;
}
static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
@@ -855,45 +844,40 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
*/
void svc_recv(struct svc_rqst *rqstp)
{
- struct svc_xprt *xprt = NULL;
- struct svc_serv *serv = rqstp->rq_server;
- int len;
-
if (!svc_alloc_arg(rqstp))
- goto out;
+ return;
try_to_freeze();
cond_resched();
- if (kthread_should_stop())
- goto out;
- xprt = svc_get_next_xprt(rqstp);
- if (!xprt)
- goto out;
+ svc_wait_for_work(rqstp);
- len = svc_handle_xprt(rqstp, xprt);
+ if (rqstp->rq_xprt) {
+ struct svc_serv *serv = rqstp->rq_server;
+ struct svc_xprt *xprt = rqstp->rq_xprt;
+ int len;
- /* No data, incomplete (TCP) read, or accept() */
- if (len <= 0)
- goto out_release;
+ len = svc_handle_xprt(rqstp, xprt);
- trace_svc_xdr_recvfrom(&rqstp->rq_arg);
+ /* No data, incomplete (TCP) read, or accept() */
+ if (len <= 0) {
+ rqstp->rq_res.len = 0;
+ svc_xprt_release(rqstp);
+ } else {
- clear_bit(XPT_OLD, &xprt->xpt_flags);
+ trace_svc_xdr_recvfrom(&rqstp->rq_arg);
- rqstp->rq_chandle.defer = svc_defer;
+ clear_bit(XPT_OLD, &xprt->xpt_flags);
- if (serv->sv_stats)
- serv->sv_stats->netcnt++;
- percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived);
- rqstp->rq_stime = ktime_get();
- svc_process(rqstp);
- return;
-out_release:
- rqstp->rq_res.len = 0;
- svc_xprt_release(rqstp);
-out:
- return;
+ rqstp->rq_chandle.defer = svc_defer;
+
+ if (serv->sv_stats)
+ serv->sv_stats->netcnt++;
+ percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived);
+ rqstp->rq_stime = ktime_get();
+ svc_process(rqstp);
+ }
+ }
}
EXPORT_SYMBOL_GPL(svc_recv);
When a sequence of numbers are needed for internal-use only, an enum is
typically best. The sequence will inevitably need to be changed one
day, and having an enum means the developer doesn't need to think about
renumbering after insertion or deletion. The patch will be easier to
review.
Signed-off-by: NeilBrown <[email protected]>
---
include/linux/sunrpc/cache.h | 11 +++++++----
include/linux/sunrpc/svc.h | 34 ++++++++++++++++++++--------------
include/linux/sunrpc/svc_xprt.h | 39 +++++++++++++++++++++------------------
include/linux/sunrpc/svcauth.h | 29 ++++++++++++++++-------------
include/linux/sunrpc/xprtsock.h | 25 +++++++++++++------------
5 files changed, 77 insertions(+), 61 deletions(-)
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 518bd28f5ab8..3cc4f4f0c764 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -56,10 +56,13 @@ struct cache_head {
struct kref ref;
unsigned long flags;
};
-#define CACHE_VALID 0 /* Entry contains valid data */
-#define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */
-#define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/
-#define CACHE_CLEANED 3 /* Entry has been cleaned from cache */
+/* cache_head.flags */
+enum {
+ CACHE_VALID, /* Entry contains valid data */
+ CACHE_NEGATIVE, /* Negative entry - there is no match for the key */
+ CACHE_PENDING, /* An upcall has been sent but no reply received yet*/
+ CACHE_CLEANED, /* Entry has been cleaned from cache */
+};
#define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index f3df7f963653..83f31a09c853 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -31,7 +31,7 @@
* node traffic on multi-node NUMA NFS servers.
*/
struct svc_pool {
- unsigned int sp_id; /* pool id; also node id on NUMA */
+ unsigned int sp_id; /* pool id; also node id on NUMA */
spinlock_t sp_lock; /* protects all fields */
struct list_head sp_sockets; /* pending sockets */
unsigned int sp_nrthreads; /* # of threads in pool */
@@ -44,12 +44,15 @@ struct svc_pool {
struct percpu_counter sp_threads_starved;
struct percpu_counter sp_threads_no_work;
-#define SP_TASK_PENDING (0) /* still work to do even if no
- * xprt is queued. */
-#define SP_CONGESTED (1)
unsigned long sp_flags;
} ____cacheline_aligned_in_smp;
+/* bits for sp_flags */
+enum {
+ SP_TASK_PENDING, /* still work to do even if no xprt is queued */
+ SP_CONGESTED, /* all threads are busy, none idle */
+};
+
/*
* RPC service.
*
@@ -232,16 +235,6 @@ struct svc_rqst {
u32 rq_proc; /* procedure number */
u32 rq_prot; /* IP protocol */
int rq_cachetype; /* catering to nfsd */
-#define RQ_SECURE (0) /* secure port */
-#define RQ_LOCAL (1) /* local request */
-#define RQ_USEDEFERRAL (2) /* use deferral */
-#define RQ_DROPME (3) /* drop current reply */
-#define RQ_SPLICE_OK (4) /* turned off in gss privacy
- * to prevent encrypting page
- * cache pages */
-#define RQ_VICTIM (5) /* about to be shut down */
-#define RQ_BUSY (6) /* request is busy */
-#define RQ_DATA (7) /* request has data */
unsigned long rq_flags; /* flags field */
ktime_t rq_qtime; /* enqueue time */
@@ -272,6 +265,19 @@ struct svc_rqst {
void ** rq_lease_breaker; /* The v4 client breaking a lease */
};
+/* bits for rq_flags */
+enum {
+ RQ_SECURE, /* secure port */
+ RQ_LOCAL, /* local request */
+ RQ_USEDEFERRAL, /* use deferral */
+ RQ_DROPME, /* drop current reply */
+ RQ_SPLICE_OK, /* turned off in gss privacy to prevent encrypting page
+ * cache pages */
+ RQ_VICTIM, /* about to be shut down */
+ RQ_BUSY, /* request is busy */
+ RQ_DATA, /* request has data */
+};
+
#define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
/*
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index a6b12631db21..af383d0349b3 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -56,26 +56,9 @@ struct svc_xprt {
struct list_head xpt_list;
struct list_head xpt_ready;
unsigned long xpt_flags;
-#define XPT_BUSY 0 /* enqueued/receiving */
-#define XPT_CONN 1 /* conn pending */
-#define XPT_CLOSE 2 /* dead or dying */
-#define XPT_DATA 3 /* data pending */
-#define XPT_TEMP 4 /* connected transport */
-#define XPT_DEAD 6 /* transport closed */
-#define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */
-#define XPT_DEFERRED 8 /* deferred request pending */
-#define XPT_OLD 9 /* used for xprt aging mark+sweep */
-#define XPT_LISTENER 10 /* listening endpoint */
-#define XPT_CACHE_AUTH 11 /* cache auth info */
-#define XPT_LOCAL 12 /* connection from loopback interface */
-#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
-#define XPT_CONG_CTRL 14 /* has congestion control */
-#define XPT_HANDSHAKE 15 /* xprt requests a handshake */
-#define XPT_TLS_SESSION 16 /* transport-layer security established */
-#define XPT_PEER_AUTH 17 /* peer has been authenticated */
struct svc_serv *xpt_server; /* service for transport */
- atomic_t xpt_reserved; /* space on outq that is rsvd */
+ atomic_t xpt_reserved; /* space on outq that is rsvd */
atomic_t xpt_nr_rqsts; /* Number of requests */
struct mutex xpt_mutex; /* to serialize sending data */
spinlock_t xpt_lock; /* protects sk_deferred
@@ -96,6 +79,26 @@ struct svc_xprt {
struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */
struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */
};
+/* flag bits for xpt_flags */
+enum {
+ XPT_BUSY, /* enqueued/receiving */
+ XPT_CONN, /* conn pending */
+ XPT_CLOSE, /* dead or dying */
+ XPT_DATA, /* data pending */
+ XPT_TEMP, /* connected transport */
+ XPT_DEAD, /* transport closed */
+ XPT_CHNGBUF, /* need to change snd/rcv buf sizes */
+ XPT_DEFERRED, /* deferred request pending */
+ XPT_OLD, /* used for xprt aging mark+sweep */
+ XPT_LISTENER, /* listening endpoint */
+ XPT_CACHE_AUTH, /* cache auth info */
+ XPT_LOCAL, /* connection from loopback interface */
+ XPT_KILL_TEMP, /* call xpo_kill_temp_xprt before closing */
+ XPT_CONG_CTRL, /* has congestion control */
+ XPT_HANDSHAKE, /* xprt requests a handshake */
+ XPT_TLS_SESSION, /* transport-layer security established */
+ XPT_PEER_AUTH, /* peer has been authenticated */
+};
static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
{
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 6d9cc9080aca..8d1d0d0721d2 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -133,19 +133,22 @@ struct auth_ops {
int (*set_client)(struct svc_rqst *rq);
};
-#define SVC_GARBAGE 1
-#define SVC_SYSERR 2
-#define SVC_VALID 3
-#define SVC_NEGATIVE 4
-#define SVC_OK 5
-#define SVC_DROP 6
-#define SVC_CLOSE 7 /* Like SVC_DROP, but request is definitely
- * lost so if there is a tcp connection, it
- * should be closed
- */
-#define SVC_DENIED 8
-#define SVC_PENDING 9
-#define SVC_COMPLETE 10
+/*return values for svc functions that analyse request */
+enum {
+ SVC_GARBAGE,
+ SVC_SYSERR,
+ SVC_VALID,
+ SVC_NEGATIVE,
+ SVC_OK,
+ SVC_DROP,
+ SVC_CLOSE, /* Like SVC_DROP, but request is definitely
+ * lost so if there is a tcp connection, it
+ * should be closed
+ */
+ SVC_DENIED,
+ SVC_PENDING,
+ SVC_COMPLETE,
+};
struct svc_xprt;
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index 700a1e6c047c..1ed2f446010b 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -81,17 +81,18 @@ struct sock_xprt {
};
/*
- * TCP RPC flags
+ * TCP RPC flags in ->sock_state
*/
-#define XPRT_SOCK_CONNECTING 1U
-#define XPRT_SOCK_DATA_READY (2)
-#define XPRT_SOCK_UPD_TIMEOUT (3)
-#define XPRT_SOCK_WAKE_ERROR (4)
-#define XPRT_SOCK_WAKE_WRITE (5)
-#define XPRT_SOCK_WAKE_PENDING (6)
-#define XPRT_SOCK_WAKE_DISCONNECT (7)
-#define XPRT_SOCK_CONNECT_SENT (8)
-#define XPRT_SOCK_NOSPACE (9)
-#define XPRT_SOCK_IGNORE_RECV (10)
-
+enum {
+ XPRT_SOCK_CONNECTING,
+ XPRT_SOCK_DATA_READY,
+ XPRT_SOCK_UPD_TIMEOUT,
+ XPRT_SOCK_WAKE_ERROR,
+ XPRT_SOCK_WAKE_WRITE,
+ XPRT_SOCK_WAKE_PENDING,
+ XPRT_SOCK_WAKE_DISCONNECT,
+ XPRT_SOCK_CONNECT_SENT,
+ XPRT_SOCK_NOSPACE,
+ XPRT_SOCK_IGNORE_RECV,
+};
#endif /* _LINUX_SUNRPC_XPRTSOCK_H */
svc threads are currently stopped using kthread_stop(). This requires
identifying a specific thread. However we don't care which thread
stops, just as long as one does.
So instead, set a flag in the svc_pool to say that a thread needs to
die, and have each thread check this flag instead of calling
kthread_should_stop(). The first to find it clear the flag and moves
towards shutting down.
This removes an explicit dependency on sp_all_threads which will make a
future patch simpler.
Signed-off-by: NeilBrown <[email protected]>
---
fs/lockd/svc.c | 4 ++--
fs/lockd/svclock.c | 4 ++--
fs/nfs/callback.c | 2 +-
fs/nfsd/nfs4proc.c | 8 +++++---
fs/nfsd/nfssvc.c | 2 +-
include/linux/lockd/lockd.h | 2 +-
include/linux/sunrpc/svc.h | 22 +++++++++++++++++++++-
include/trace/events/sunrpc.h | 5 +++--
net/sunrpc/svc.c | 39 +++++++++++++++++----------------------
net/sunrpc/svc_xprt.c | 8 +++-----
10 files changed, 56 insertions(+), 40 deletions(-)
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 4f55cd42c2e6..30543edd2309 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -134,12 +134,12 @@ lockd(void *vrqstp)
* The main request loop. We don't terminate until the last
* NFS mount or NFS daemon has gone away.
*/
- while (!kthread_should_stop()) {
+ while (!svc_thread_should_stop(rqstp)) {
/* update sv_maxconn if it has changed */
rqstp->rq_server->sv_maxconn = nlm_max_connections;
- nlmsvc_retry_blocked();
+ nlmsvc_retry_blocked(rqstp);
/*
* Find any work to do, such as a socket with data available,
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 3d7bd5c04b36..fd399c9bea5c 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -1009,13 +1009,13 @@ retry_deferred_block(struct nlm_block *block)
* be retransmitted.
*/
void
-nlmsvc_retry_blocked(void)
+nlmsvc_retry_blocked(struct svc_rqst *rqstp)
{
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
struct nlm_block *block;
spin_lock(&nlm_blocked_lock);
- while (!list_empty(&nlm_blocked) && !kthread_should_stop()) {
+ while (!list_empty(&nlm_blocked) && !svc_thread_should_stop(rqstp)) {
block = list_entry(nlm_blocked.next, struct nlm_block, b_list);
if (block->b_when == NLM_NEVER)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 660cec36c45c..c58ec2e66aaf 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -78,7 +78,7 @@ nfs4_callback_svc(void *vrqstp)
set_freezable();
- while (!kthread_should_stop()) {
+ while (!svc_thread_should_stop(rqstp)) {
/*
* Listen for a request on the socket
*/
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 157488290676..66024ed06181 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1306,7 +1306,8 @@ extern void nfs_sb_deactive(struct super_block *sb);
* setup a work entry in the ssc delayed unmount list.
*/
static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
- struct nfsd4_ssc_umount_item **nsui)
+ struct nfsd4_ssc_umount_item **nsui,
+ struct svc_rqst *rqstp)
{
struct nfsd4_ssc_umount_item *ni = NULL;
struct nfsd4_ssc_umount_item *work = NULL;
@@ -1329,7 +1330,7 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
spin_unlock(&nn->nfsd_ssc_lock);
/* allow 20secs for mount/unmount for now - revisit */
- if (kthread_should_stop() ||
+ if (svc_thread_should_stop(rqstp) ||
(schedule_timeout(20*HZ) == 0)) {
finish_wait(&nn->nfsd_ssc_waitq, &wait);
kfree(work);
@@ -1445,7 +1446,7 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
goto out_free_rawdata;
snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
- status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui);
+ status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui, rqstp);
if (status)
goto out_free_devname;
if ((*nsui)->nsui_vfsmount)
@@ -1620,6 +1621,7 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy,
if (bytes_total == 0)
bytes_total = ULLONG_MAX;
do {
+ /* Only async copies can be stopped here */
if (kthread_should_stop())
break;
bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos,
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index b536b254c59e..9b282c89ea87 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -971,7 +971,7 @@ nfsd(void *vrqstp)
/*
* The main request loop
*/
- while (!kthread_should_stop()) {
+ while (!svc_thread_should_stop(rqstp)) {
/* Update sv_maxconn if it has changed */
rqstp->rq_server->sv_maxconn = nn->max_connections;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 0f016d69c996..9f565416d186 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -282,7 +282,7 @@ __be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
struct nlm_host *, struct nlm_lock *,
struct nlm_lock *, struct nlm_cookie *);
__be32 nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
-void nlmsvc_retry_blocked(void);
+void nlmsvc_retry_blocked(struct svc_rqst *rqstp);
void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
nlm_host_match_fn_t match);
void nlmsvc_grant_reply(struct nlm_cookie *, __be32);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 15d468d852b5..ea3ce1315416 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -51,6 +51,8 @@ struct svc_pool {
enum {
SP_TASK_PENDING, /* still work to do even if no xprt is queued */
SP_CONGESTED, /* all threads are busy, none idle */
+ SP_NEED_VICTIM, /* One thread needs to agree to exit */
+ SP_VICTIM_REMAINS, /* One thread needs to actually exit */
};
/*
@@ -275,7 +277,7 @@ enum {
RQ_DROPME, /* drop current reply */
RQ_SPLICE_OK, /* turned off in gss privacy to prevent encrypting page
* cache pages */
- RQ_VICTIM, /* about to be shut down */
+ RQ_VICTIM, /* Have agreed to shut down */
RQ_BUSY, /* request is busy */
RQ_DATA, /* request has data */
};
@@ -315,6 +317,24 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst)
return (struct sockaddr *) &rqst->rq_daddr;
}
+/**
+ * svc_thread_should_stop - check if this thread should stop
+ * @rqstp: the thread that might need to stop
+ *
+ * To stop an svc thread, the pool flags SP_NEED_VICTIM and SP_VICTIM_REMAINS
+ * are set. The firs thread which sees SP_NEED_VICTIM clear it becoming
+ * the victim using this function. It should then promptly call
+ * svc_exit_thread() which completes the process, clearing SP_VICTIM_REMAINS
+ * so the task waiting for a thread to exit can wake and continue.
+ */
+static inline bool svc_thread_should_stop(struct svc_rqst *rqstp)
+{
+ if (test_and_clear_bit(SP_NEED_VICTIM, &rqstp->rq_pool->sp_flags))
+ set_bit(RQ_VICTIM, &rqstp->rq_flags);
+
+ return test_bit(RQ_VICTIM, &rqstp->rq_flags);
+}
+
struct svc_deferred_req {
u32 prot; /* protocol (UDP or TCP) */
struct svc_xprt *xprt;
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 60c8e03268d4..c79375e37dc2 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -2041,8 +2041,9 @@ TRACE_EVENT(svc_xprt_enqueue,
#define show_svc_pool_flags(x) \
__print_flags(x, "|", \
{ BIT(SP_TASK_PENDING), "TASK_PENDING" }, \
- { BIT(SP_CONGESTED), "CONGESTED" })
-
+ { BIT(SP_CONGESTED), "CONGESTED" }, \
+ { BIT(SP_NEED_VICTIM), "NEED_VICTIM" }, \
+ { BIT(SP_VICTIM_REMAINS), "VICTIM_REMAINS" })
DECLARE_EVENT_CLASS(svc_pool_scheduler_class,
TP_PROTO(
const struct svc_rqst *rqstp
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 56b4a88776d5..b18175ef74ec 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -731,19 +731,22 @@ svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools];
}
-static struct task_struct *
+static struct svc_pool *
svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
{
unsigned int i;
- struct task_struct *task = NULL;
if (pool != NULL) {
spin_lock_bh(&pool->sp_lock);
+ if (pool->sp_nrthreads)
+ goto found_pool;
+ spin_unlock_bh(&pool->sp_lock);
+ return NULL;
} else {
for (i = 0; i < serv->sv_nrpools; i++) {
pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
spin_lock_bh(&pool->sp_lock);
- if (!list_empty(&pool->sp_all_threads))
+ if (pool->sp_nrthreads)
goto found_pool;
spin_unlock_bh(&pool->sp_lock);
}
@@ -751,16 +754,10 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *stat
}
found_pool:
- if (!list_empty(&pool->sp_all_threads)) {
- struct svc_rqst *rqstp;
-
- rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
- set_bit(RQ_VICTIM, &rqstp->rq_flags);
- list_del_rcu(&rqstp->rq_all);
- task = rqstp->rq_task;
- }
+ set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
+ set_bit(SP_NEED_VICTIM, &pool->sp_flags);
spin_unlock_bh(&pool->sp_lock);
- return task;
+ return pool;
}
static int
@@ -801,18 +798,16 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
static int
svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
- struct svc_rqst *rqstp;
- struct task_struct *task;
unsigned int state = serv->sv_nrthreads-1;
+ struct svc_pool *vpool;
do {
- task = svc_pool_victim(serv, pool, &state);
- if (task == NULL)
+ vpool = svc_pool_victim(serv, pool, &state);
+ if (!vpool)
break;
- rqstp = kthread_data(task);
- /* Did we lose a race to svo_function threadfn? */
- if (kthread_stop(task) == -EINTR)
- svc_exit_thread(rqstp);
+ svc_pool_wake_idle_thread(serv, vpool);
+ wait_on_bit(&vpool->sp_flags, SP_VICTIM_REMAINS,
+ TASK_IDLE);
nrservs++;
} while (nrservs < 0);
return 0;
@@ -932,8 +927,6 @@ svc_exit_thread(struct svc_rqst *rqstp)
spin_lock_bh(&pool->sp_lock);
pool->sp_nrthreads--;
- if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags))
- list_del_rcu(&rqstp->rq_all);
spin_unlock_bh(&pool->sp_lock);
spin_lock_bh(&serv->sv_lock);
@@ -941,6 +934,8 @@ svc_exit_thread(struct svc_rqst *rqstp)
spin_unlock_bh(&serv->sv_lock);
svc_sock_update_bufs(serv);
+ if (svc_thread_should_stop(rqstp))
+ clear_and_wake_up_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
svc_rqst_free(rqstp);
svc_put(serv);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 4fdf1aaa514b..948605e7043b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -595,8 +595,6 @@ void svc_wake_up(struct svc_serv *serv)
smp_wmb();
return;
}
-
- trace_svc_wake_up(rqstp);
}
EXPORT_SYMBOL_GPL(svc_wake_up);
@@ -688,7 +686,7 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp)
continue;
set_current_state(TASK_IDLE);
- if (kthread_should_stop()) {
+ if (svc_thread_should_stop(rqstp)) {
set_current_state(TASK_RUNNING);
return false;
}
@@ -726,7 +724,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
return false;
/* are we shutting down? */
- if (kthread_should_stop())
+ if (svc_thread_should_stop(rqstp))
return false;
/* are we freezing? */
@@ -787,7 +785,7 @@ static void svc_wait_for_work(struct svc_rqst *rqstp)
}
#endif
- if (!kthread_should_stop())
+ if (!svc_thread_should_stop(rqstp))
percpu_counter_inc(&pool->sp_threads_no_work);
return;
out_found:
No callers of svc_pool_wake_idle_thread() care which thread was woken -
except one that wants to trace the wakeup. For now we drop that
tracepoint.
One caller wants to know if anything was woken to set SP_CONGESTED, so
set that inside the function instead.
Now svc_pool_wake_idle_thread() can "return" void.
Signed-off-by: NeilBrown <[email protected]>
---
include/linux/sunrpc/svc.h | 2 +-
net/sunrpc/svc.c | 13 +++++--------
net/sunrpc/svc_xprt.c | 18 +++---------------
3 files changed, 9 insertions(+), 24 deletions(-)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index ea3ce1315416..b39c613fbe06 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -454,7 +454,7 @@ int svc_register(const struct svc_serv *, struct net *, const int,
void svc_wake_up(struct svc_serv *);
void svc_reserve(struct svc_rqst *rqstp, int space);
-struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv,
+void svc_pool_wake_idle_thread(struct svc_serv *serv,
struct svc_pool *pool);
struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv);
char * svc_print_addr(struct svc_rqst *, char *, size_t);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index b18175ef74ec..fd49e7b12c94 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -696,13 +696,10 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
* @serv: RPC service
* @pool: service thread pool
*
- * Returns an idle service thread (now marked BUSY), or NULL
- * if no service threads are available. Finding an idle service
- * thread and marking it BUSY is atomic with respect to other
- * calls to svc_pool_wake_idle_thread().
+ * Wake an idle thread if there is one, else mark the pool as congested.
*/
-struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv,
- struct svc_pool *pool)
+void svc_pool_wake_idle_thread(struct svc_serv *serv,
+ struct svc_pool *pool)
{
struct svc_rqst *rqstp;
@@ -715,13 +712,13 @@ struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv,
wake_up_process(rqstp->rq_task);
rcu_read_unlock();
percpu_counter_inc(&pool->sp_threads_woken);
- return rqstp;
+ return;
}
rcu_read_unlock();
trace_svc_pool_starved(serv, pool);
percpu_counter_inc(&pool->sp_threads_starved);
- return NULL;
+ set_bit(SP_CONGESTED, &pool->sp_flags);
}
EXPORT_SYMBOL_GPL(svc_pool_wake_idle_thread);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 948605e7043b..964c97dbb36c 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -456,7 +456,6 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
*/
void svc_xprt_enqueue(struct svc_xprt *xprt)
{
- struct svc_rqst *rqstp;
struct svc_pool *pool;
if (!svc_xprt_ready(xprt))
@@ -477,13 +476,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
spin_unlock_bh(&pool->sp_lock);
- rqstp = svc_pool_wake_idle_thread(xprt->xpt_server, pool);
- if (!rqstp) {
- set_bit(SP_CONGESTED, &pool->sp_flags);
- return;
- }
-
- trace_svc_xprt_enqueue(xprt, rqstp);
+ svc_pool_wake_idle_thread(xprt->xpt_server, pool);
}
EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
@@ -587,14 +580,9 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
void svc_wake_up(struct svc_serv *serv)
{
struct svc_pool *pool = &serv->sv_pools[0];
- struct svc_rqst *rqstp;
- rqstp = svc_pool_wake_idle_thread(serv, pool);
- if (!rqstp) {
- set_bit(SP_TASK_PENDING, &pool->sp_flags);
- smp_wmb();
- return;
- }
+ set_bit(SP_TASK_PENDING, &pool->sp_flags);
+ svc_pool_wake_idle_thread(serv, pool);
}
EXPORT_SYMBOL_GPL(svc_wake_up);
Currently if several items of work become available in quick succession,
that number of threads (if available) will be woken. By the time some
of them wake up another thread that was already cache-warm might have
come along and completed the work. Anecdotal evidence suggests as many
as 15% of wakes find nothing to do once they get to the point of
looking.
This patch changes svc_pool_wake_idle_thread() to wake the first thread
on the queue but NOT remove it. Subsequent calls will wake the same
thread. Once that thread starts it will dequeue itself and after
dequeuing some work to do, it will wake the next thread if there is more
work ready. This results in a more orderly increase in the number of
busy threads.
As a bonus, this allows us to reduce locking around the idle queue.
svc_pool_wake_idle_thread() no longer needs to take a lock (beyond
rcu_read_lock()) as it doesn't manipulate the queue, just looks at the
first item.
The thread itself can avoid locking by using the new
llist_del_first_this() interface. This will safely remove the thread
itself if it is the head. If it isn't the head, it will do nothing.
If multiple threads call this concurrently only one will succeed. The
others will do nothing, so no corruption can result.
If a thread wakes up and find that it cannot dequeue itself that mean
either
- that it wasn't woken because it was the head of the queue. Maybe the
freezer woke it. In that case it can go back to sleep (after trying
to freeze of course).
- some other thread found there was nothing to do very recently, and
placed itself on the head of the queue in front of this thread.
It must check again after placing itself there, so it can be deemed to
be responsible for any pending work, and this thread can go back to
sleep until woken.
Signed-off-by: NeilBrown <[email protected]>
---
include/linux/llist.h | 2 ++
lib/llist.c | 27 +++++++++++++++++++++++++++
net/sunrpc/svc.c | 12 +++++-------
net/sunrpc/svc_xprt.c | 39 +++++++++++++++++++++------------------
4 files changed, 55 insertions(+), 25 deletions(-)
diff --git a/include/linux/llist.h b/include/linux/llist.h
index 85bda2d02d65..d8b1b73f3df0 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -248,6 +248,8 @@ static inline struct llist_node *__llist_del_all(struct llist_head *head)
}
extern struct llist_node *llist_del_first(struct llist_head *head);
+extern struct llist_node *llist_del_first_this(struct llist_head *head,
+ struct llist_node *this);
struct llist_node *llist_reverse_order(struct llist_node *head);
diff --git a/lib/llist.c b/lib/llist.c
index 6e668fa5a2c6..7e8a13a13586 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -65,6 +65,33 @@ struct llist_node *llist_del_first(struct llist_head *head)
}
EXPORT_SYMBOL_GPL(llist_del_first);
+/**
+ * llist_del_first_this - delete given entry of lock-less list if it is first
+ * @head: the head for your lock-less list
+ * @this: a list entry.
+ *
+ * If head of the list is given entry, delete and return it, else
+ * return %NULL.
+ *
+ * Providing the caller has exclusive access to @this, multiple callers can
+ * safely call this concurrently with multiple llist_add() callers.
+ */
+struct llist_node *llist_del_first_this(struct llist_head *head,
+ struct llist_node *this)
+{
+ struct llist_node *entry, *next;
+
+ entry = smp_load_acquire(&head->first);
+ do {
+ if (entry != this)
+ return NULL;
+ next = READ_ONCE(entry->next);
+ } while (!try_cmpxchg(&head->first, &entry, next));
+
+ return entry;
+}
+EXPORT_SYMBOL_GPL(llist_del_first_this);
+
/**
* llist_reverse_order - reverse order of a llist chain
* @head: first item of the list to be reversed
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 9287a08250b2..43f29f7380db 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -705,17 +705,15 @@ void svc_pool_wake_idle_thread(struct svc_serv *serv,
struct llist_node *ln;
rcu_read_lock();
- spin_lock_bh(&pool->sp_lock);
- ln = llist_del_first(&pool->sp_idle_threads);
- spin_unlock_bh(&pool->sp_lock);
+ ln = READ_ONCE(pool->sp_idle_threads.first);
if (ln) {
rqstp = llist_entry(ln, struct svc_rqst, rq_idle);
- svc_thread_set_busy(rqstp);
-
WRITE_ONCE(rqstp->rq_qtime, ktime_get());
- wake_up_process(rqstp->rq_task);
+ if (!task_is_running(rqstp->rq_task)) {
+ wake_up_process(rqstp->rq_task);
+ percpu_counter_inc(&pool->sp_threads_woken);
+ }
rcu_read_unlock();
- percpu_counter_inc(&pool->sp_threads_woken);
return;
}
rcu_read_unlock();
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index eb8d345641b2..fa945cbf174a 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -732,31 +732,29 @@ static void svc_wait_for_work(struct svc_rqst *rqstp)
if (rqst_should_sleep(rqstp)) {
set_current_state(TASK_IDLE);
llist_add(&rqstp->rq_idle, &pool->sp_idle_threads);
+ /* maybe there were no idle threads when some work
+ * became ready and so nothing was woken. We've just
+ * become idle so someone can do the work - maybe us.
+ * So check again for work to do.
+ */
+ if (likely(rqst_should_sleep(rqstp))) {
+ try_to_freeze();
+ schedule();
+ }
- if (unlikely(!rqst_should_sleep(rqstp)))
- /* maybe there were no idle threads when some work
- * became ready and so nothing was woken. We've just
- * become idle so someone can to the work - maybe us.
- * But we cannot reliably remove ourselves from the
- * idle list - we can only remove the first task which
- * might be us, and might not.
- * So remove and wake it, then schedule(). If it was
- * us, we won't sleep. If it is some other thread, they
- * will do the work.
+ while (llist_del_first_this(&pool->sp_idle_threads,
+ &rqstp->rq_idle) == NULL) {
+ /* Cannot remove myself, some other thread
+ * must have queued themselves after finding
+ * no work to do, so they have taken reponsibility
+ * for any outstanding work.
*/
- svc_pool_wake_idle_thread(rqstp->rq_server, pool);
-
- /* We mustn't continue while on the idle list, and we
- * cannot remove outselves reliably. The only "work"
- * we can do while on the idle list is to freeze.
- * So loop until someone removes us
- */
- while (!svc_thread_busy(rqstp)) {
try_to_freeze();
schedule();
set_current_state(TASK_IDLE);
}
__set_current_state(TASK_RUNNING);
+ svc_thread_set_busy(rqstp);
}
try_to_freeze();
@@ -798,6 +796,11 @@ static void svc_wait_for_work(struct svc_rqst *rqstp)
rqstp->rq_chandle.thread_wait = 5*HZ;
else
rqstp->rq_chandle.thread_wait = 1*HZ;
+
+ if (!rqst_should_sleep(rqstp))
+ /* More work pending after I dequeued some,
+ * wake another worker */
+ svc_pool_wake_idle_thread(rqstp->rq_server, pool);
}
static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
The original implementation of nfsd used signals to stop threads during
shutdown.
In Linux 2.3.46pre5 nfsd gained the ability to shutdown threads
internally it if was asked to run "0" threads. After this user-space
transitioned to using "rpc.nfsd 0" to stop nfsd and sending signals to
threads was no longer an important part of the API.
In Commit 3ebdbe5203a8 ("SUNRPC: discard svo_setup and rename
svc_set_num_threads_sync()") (v5.17-rc1~75^2~41) we finally removed the
use of signals for stopping threads, using kthread_stop() instead.
This patch makes the "obvious" next step and removes the ability to
signal nfsd threads - or any svc threads. nfsd stops allowing signals
and we don't check for their delivery any more.
This will allow for some simplification in later patches.
A change worth noting is in nfsd4_ssc_setup_dul(). There was previously
a signal_pending() check which would only succeed when the thread was
being shut down. It should really have tested kthread_should_stop() as
well. Now it just does the later, not the former.
Signed-off-by: NeilBrown <[email protected]>
---
fs/nfs/callback.c | 9 +--------
fs/nfsd/nfs4proc.c | 4 ++--
fs/nfsd/nfssvc.c | 12 ------------
net/sunrpc/svc_xprt.c | 16 ++++++----------
4 files changed, 9 insertions(+), 32 deletions(-)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 456af7d230cf..46a0a2d6962e 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -80,9 +80,6 @@ nfs4_callback_svc(void *vrqstp)
set_freezable();
while (!kthread_freezable_should_stop(NULL)) {
-
- if (signal_pending(current))
- flush_signals(current);
/*
* Listen for a request on the socket
*/
@@ -112,11 +109,7 @@ nfs41_callback_svc(void *vrqstp)
set_freezable();
while (!kthread_freezable_should_stop(NULL)) {
-
- if (signal_pending(current))
- flush_signals(current);
-
- prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
+ prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE);
spin_lock_bh(&serv->sv_cb_lock);
if (!list_empty(&serv->sv_cb_list)) {
req = list_first_entry(&serv->sv_cb_list,
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d8e7a533f9d2..157488290676 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1325,11 +1325,11 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
if (ni->nsui_busy) {
/* wait - and try again */
prepare_to_wait(&nn->nfsd_ssc_waitq, &wait,
- TASK_INTERRUPTIBLE);
+ TASK_IDLE);
spin_unlock(&nn->nfsd_ssc_lock);
/* allow 20secs for mount/unmount for now - revisit */
- if (signal_pending(current) ||
+ if (kthread_should_stop() ||
(schedule_timeout(20*HZ) == 0)) {
finish_wait(&nn->nfsd_ssc_waitq, &wait);
kfree(work);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 97830e28c140..439fca195925 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -965,15 +965,6 @@ nfsd(void *vrqstp)
current->fs->umask = 0;
- /*
- * thread is spawned with all signals set to SIG_IGN, re-enable
- * the ones that will bring down the thread
- */
- allow_signal(SIGKILL);
- allow_signal(SIGHUP);
- allow_signal(SIGINT);
- allow_signal(SIGQUIT);
-
atomic_inc(&nfsdstats.th_cnt);
set_freezable();
@@ -998,9 +989,6 @@ nfsd(void *vrqstp)
validate_process_creds();
}
- /* Clear signals before calling svc_exit_thread() */
- flush_signals(current);
-
atomic_dec(&nfsdstats.th_cnt);
out:
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 71b19d0ed642..93395606a0ba 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -686,8 +686,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
/* Made progress, don't sleep yet */
continue;
- set_current_state(TASK_INTERRUPTIBLE);
- if (signalled() || kthread_should_stop()) {
+ set_current_state(TASK_IDLE);
+ if (kthread_should_stop()) {
set_current_state(TASK_RUNNING);
return -EINTR;
}
@@ -725,7 +725,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
return false;
/* are we shutting down? */
- if (signalled() || kthread_should_stop())
+ if (kthread_should_stop())
return false;
/* are we freezing? */
@@ -749,11 +749,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
goto out_found;
}
- /*
- * We have to be able to interrupt this wait
- * to bring down the daemons ...
- */
- set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(TASK_IDLE);
smp_mb__before_atomic();
clear_bit(SP_CONGESTED, &pool->sp_flags);
clear_bit(RQ_BUSY, &rqstp->rq_flags);
@@ -776,7 +772,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
if (!time_left)
percpu_counter_inc(&pool->sp_threads_timedout);
- if (signalled() || kthread_should_stop())
+ if (kthread_should_stop())
return ERR_PTR(-EINTR);
percpu_counter_inc(&pool->sp_threads_no_work);
return ERR_PTR(-EAGAIN);
@@ -873,7 +869,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
try_to_freeze();
cond_resched();
err = -EINTR;
- if (signalled() || kthread_should_stop())
+ if (kthread_should_stop())
goto out;
xprt = svc_get_next_xprt(rqstp, timeout);
Most svc threads have no interest in a timeout.
nfsd sets it to 1 hour, but this is a wart of no significance.
lockd uses the timeout so that it can call nlmsvc_retry_blocked().
It also sometimes calls svc_wake_up() to ensure this is called.
So change lockd to be consistent and always use svc_wake_up() to trigger
nlmsvc_retry_blocked() - using a timer instead of a timeout to
svc_recv().
And change svc_recv() to not take a timeout arg.
This makes the sp_threads_timedout counter always zero.
Signed-off-by: NeilBrown <[email protected]>
---
fs/lockd/svc.c | 11 ++++++++---
fs/lockd/svclock.c | 5 +++--
fs/nfs/callback.c | 2 +-
fs/nfsd/nfssvc.c | 2 +-
include/linux/lockd/lockd.h | 4 +++-
include/linux/sunrpc/svc.h | 1 -
include/linux/sunrpc/svcsock.h | 2 +-
net/sunrpc/svc.c | 2 --
net/sunrpc/svc_xprt.c | 27 ++++++++++++---------------
9 files changed, 29 insertions(+), 27 deletions(-)
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index a43b63e46127..4f55cd42c2e6 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -55,6 +55,11 @@ static DEFINE_MUTEX(nlmsvc_mutex);
static unsigned int nlmsvc_users;
static struct svc_serv *nlmsvc_serv;
unsigned long nlmsvc_timeout;
+static void nlmsvc_request_retry(struct timer_list *tl)
+{
+ svc_wake_up(nlmsvc_serv);
+}
+DEFINE_TIMER(nlmsvc_retry, nlmsvc_request_retry);
unsigned int lockd_net_id;
@@ -130,18 +135,17 @@ lockd(void *vrqstp)
* NFS mount or NFS daemon has gone away.
*/
while (!kthread_should_stop()) {
- long timeout = MAX_SCHEDULE_TIMEOUT;
/* update sv_maxconn if it has changed */
rqstp->rq_server->sv_maxconn = nlm_max_connections;
- timeout = nlmsvc_retry_blocked();
+ nlmsvc_retry_blocked();
/*
* Find any work to do, such as a socket with data available,
* and do the work.
*/
- svc_recv(rqstp, timeout);
+ svc_recv(rqstp);
}
if (nlmsvc_ops)
nlmsvc_invalidate_all();
@@ -375,6 +379,7 @@ static void lockd_put(void)
#endif
svc_set_num_threads(nlmsvc_serv, NULL, 0);
+ timer_delete_sync(&nlmsvc_retry);
nlmsvc_serv = NULL;
dprintk("lockd_down: service destroyed\n");
}
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c43ccdf28ed9..3d7bd5c04b36 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -1008,7 +1008,7 @@ retry_deferred_block(struct nlm_block *block)
* picks up locks that can be granted, or grant notifications that must
* be retransmitted.
*/
-unsigned long
+void
nlmsvc_retry_blocked(void)
{
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
@@ -1038,5 +1038,6 @@ nlmsvc_retry_blocked(void)
}
spin_unlock(&nlm_blocked_lock);
- return timeout;
+ if (timeout < MAX_SCHEDULE_TIMEOUT)
+ mod_timer(&nlmsvc_retry, jiffies + timeout);
}
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 914d2402ca98..c47834970224 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -82,7 +82,7 @@ nfs4_callback_svc(void *vrqstp)
/*
* Listen for a request on the socket
*/
- svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
+ svc_recv(rqstp);
}
svc_exit_thread(rqstp);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 5bf48c33986e..b536b254c59e 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -979,7 +979,7 @@ nfsd(void *vrqstp)
* Find a socket with data available and call its
* recvfrom routine.
*/
- svc_recv(rqstp, 60*60*HZ);
+ svc_recv(rqstp);
validate_process_creds();
}
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index f42594a9efe0..0f016d69c996 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -204,6 +204,8 @@ extern unsigned long nlmsvc_timeout;
extern bool nsm_use_hostnames;
extern u32 nsm_local_state;
+extern struct timer_list nlmsvc_retry;
+
/*
* Lockd client functions
*/
@@ -280,7 +282,7 @@ __be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
struct nlm_host *, struct nlm_lock *,
struct nlm_lock *, struct nlm_cookie *);
__be32 nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
-unsigned long nlmsvc_retry_blocked(void);
+void nlmsvc_retry_blocked(void);
void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
nlm_host_match_fn_t match);
void nlmsvc_grant_reply(struct nlm_cookie *, __be32);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d51ae1e109b6..f3df7f963653 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -41,7 +41,6 @@ struct svc_pool {
struct percpu_counter sp_messages_arrived;
struct percpu_counter sp_sockets_queued;
struct percpu_counter sp_threads_woken;
- struct percpu_counter sp_threads_timedout;
struct percpu_counter sp_threads_starved;
struct percpu_counter sp_threads_no_work;
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index fb5c98069356..8da31799affe 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -64,7 +64,7 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
* Function prototypes.
*/
void svc_close_net(struct svc_serv *, struct net *);
-void svc_recv(struct svc_rqst *, long);
+void svc_recv(struct svc_rqst *);
void svc_send(struct svc_rqst *rqstp);
void svc_drop(struct svc_rqst *);
void svc_sock_update_bufs(struct svc_serv *serv);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index f09b0cce041c..170eabc03988 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -513,7 +513,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL);
percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL);
percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL);
- percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL);
percpu_counter_init(&pool->sp_threads_starved, 0, GFP_KERNEL);
percpu_counter_init(&pool->sp_threads_no_work, 0, GFP_KERNEL);
@@ -593,7 +592,6 @@ svc_destroy(struct kref *ref)
percpu_counter_destroy(&pool->sp_messages_arrived);
percpu_counter_destroy(&pool->sp_sockets_queued);
percpu_counter_destroy(&pool->sp_threads_woken);
- percpu_counter_destroy(&pool->sp_threads_timedout);
percpu_counter_destroy(&pool->sp_threads_starved);
percpu_counter_destroy(&pool->sp_threads_no_work);
}
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 67825eef8646..44a33b1f542f 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -735,10 +735,9 @@ rqst_should_sleep(struct svc_rqst *rqstp)
return true;
}
-static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
+static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
{
struct svc_pool *pool = rqstp->rq_pool;
- long time_left = 0;
/* rq_xprt should be clear on entry */
WARN_ON_ONCE(rqstp->rq_xprt);
@@ -756,7 +755,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
smp_mb__after_atomic();
if (likely(rqst_should_sleep(rqstp)))
- time_left = schedule_timeout(timeout);
+ schedule();
else
__set_current_state(TASK_RUNNING);
@@ -770,8 +769,6 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
goto out_found;
}
- if (!time_left)
- percpu_counter_inc(&pool->sp_threads_timedout);
if (kthread_should_stop())
return NULL;
percpu_counter_inc(&pool->sp_threads_no_work);
@@ -856,7 +853,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
* organised not to touch any cachelines in the shared svc_serv
* structure, only cachelines in the local svc_pool.
*/
-void svc_recv(struct svc_rqst *rqstp, long timeout)
+void svc_recv(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = NULL;
struct svc_serv *serv = rqstp->rq_server;
@@ -870,7 +867,7 @@ void svc_recv(struct svc_rqst *rqstp, long timeout)
if (kthread_should_stop())
goto out;
- xprt = svc_get_next_xprt(rqstp, timeout);
+ xprt = svc_get_next_xprt(rqstp);
if (!xprt)
goto out;
@@ -1437,14 +1434,14 @@ static int svc_pool_stats_show(struct seq_file *m, void *p)
return 0;
}
- seq_printf(m, "%u %llu %llu %llu %llu %llu %llu\n",
- pool->sp_id,
- percpu_counter_sum_positive(&pool->sp_messages_arrived),
- percpu_counter_sum_positive(&pool->sp_sockets_queued),
- percpu_counter_sum_positive(&pool->sp_threads_woken),
- percpu_counter_sum_positive(&pool->sp_threads_timedout),
- percpu_counter_sum_positive(&pool->sp_threads_starved),
- percpu_counter_sum_positive(&pool->sp_threads_no_work));
+ seq_printf(m, "%u %llu %llu %llu 0 %llu %llu\n",
+ pool->sp_id,
+ percpu_counter_sum_positive(&pool->sp_messages_arrived),
+ percpu_counter_sum_positive(&pool->sp_sockets_queued),
+ percpu_counter_sum_positive(&pool->sp_threads_woken),
+ /* prevously pool->sp_threads_timedout */
+ percpu_counter_sum_positive(&pool->sp_threads_starved),
+ percpu_counter_sum_positive(&pool->sp_threads_no_work));
return 0;
}
On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
> svc_recv() currently returns a 0 on success or one of two errors:
> - -EAGAIN means no message was successfully received
> - -EINTR means the thread has been told to stop
>
> Previously nfsd would stop as the result of a signal as well as
> following kthread_stop(). In that case the difference was useful: EINTR
> means stop unconditionally. EAGAIN means stop if kthread_should_stop(),
> continue otherwise.
>
> Now threads only exit when kthread_should_stop() so we don't need the
> distinction.
>
> So have all threads test kthread_should_stop() (most do), and return
> simple success/failure from svc_recv().
The above sentence doesn't make sense. svc_recv() now returns void,
not success/failure.
> Also change some helpers that svc_recv() uses to not bother with an
> error code, returning a bool in once case, and NULL for failure in
> another.
>
> Signed-off-by: NeilBrown <[email protected]>
> ---
> fs/lockd/svc.c | 9 +++------
> fs/nfs/callback.c | 5 +----
> fs/nfsd/nfssvc.c | 8 ++------
> include/linux/sunrpc/svcsock.h | 2 +-
> net/sunrpc/svc_xprt.c | 27 +++++++++++----------------
> 5 files changed, 18 insertions(+), 33 deletions(-)
>
> diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
> index 91ef139a7757..a43b63e46127 100644
> --- a/fs/lockd/svc.c
> +++ b/fs/lockd/svc.c
> @@ -116,7 +116,6 @@ static void set_grace_period(struct net *net)
> static int
> lockd(void *vrqstp)
> {
> - int err = 0;
> struct svc_rqst *rqstp = vrqstp;
> struct net *net = &init_net;
> struct lockd_net *ln = net_generic(net, lockd_net_id);
> @@ -139,12 +138,10 @@ lockd(void *vrqstp)
> timeout = nlmsvc_retry_blocked();
>
> /*
> - * Find a socket with data available and call its
> - * recvfrom routine.
> + * Find any work to do, such as a socket with data available,
> + * and do the work.
> */
> - err = svc_recv(rqstp, timeout);
> - if (err == -EAGAIN || err == -EINTR)
> - continue;
> + svc_recv(rqstp, timeout);
> }
> if (nlmsvc_ops)
> nlmsvc_invalidate_all();
> diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
> index 2d94384bd6a9..914d2402ca98 100644
> --- a/fs/nfs/callback.c
> +++ b/fs/nfs/callback.c
> @@ -74,7 +74,6 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
> static int
> nfs4_callback_svc(void *vrqstp)
> {
> - int err;
> struct svc_rqst *rqstp = vrqstp;
>
> set_freezable();
> @@ -83,9 +82,7 @@ nfs4_callback_svc(void *vrqstp)
> /*
> * Listen for a request on the socket
> */
> - err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
> - if (err == -EAGAIN || err == -EINTR)
> - continue;
> + svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
> }
>
> svc_exit_thread(rqstp);
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index 3e08cc746870..5bf48c33986e 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -953,7 +953,6 @@ nfsd(void *vrqstp)
> struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
> struct net *net = perm_sock->xpt_net;
> struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> - int err;
>
> /* At this point, the thread shares current->fs
> * with the init process. We need to create files with the
> @@ -972,7 +971,7 @@ nfsd(void *vrqstp)
> /*
> * The main request loop
> */
> - for (;;) {
> + while (!kthread_should_stop()) {
> /* Update sv_maxconn if it has changed */
> rqstp->rq_server->sv_maxconn = nn->max_connections;
>
> @@ -980,10 +979,7 @@ nfsd(void *vrqstp)
> * Find a socket with data available and call its
> * recvfrom routine.
> */
> - while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
> - ;
> - if (err == -EINTR)
> - break;
> + svc_recv(rqstp, 60*60*HZ);
> validate_process_creds();
> }
>
> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
> index 55446136499f..fb5c98069356 100644
> --- a/include/linux/sunrpc/svcsock.h
> +++ b/include/linux/sunrpc/svcsock.h
> @@ -64,7 +64,7 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
> * Function prototypes.
> */
> void svc_close_net(struct svc_serv *, struct net *);
> -int svc_recv(struct svc_rqst *, long);
> +void svc_recv(struct svc_rqst *, long);
> void svc_send(struct svc_rqst *rqstp);
> void svc_drop(struct svc_rqst *);
> void svc_sock_update_bufs(struct svc_serv *serv);
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index c808f6d60c99..67825eef8646 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -664,7 +664,7 @@ static void svc_check_conn_limits(struct svc_serv *serv)
> }
> }
>
> -static int svc_alloc_arg(struct svc_rqst *rqstp)
> +static bool svc_alloc_arg(struct svc_rqst *rqstp)
> {
> struct svc_serv *serv = rqstp->rq_server;
> struct xdr_buf *arg = &rqstp->rq_arg;
> @@ -689,7 +689,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
> set_current_state(TASK_IDLE);
> if (kthread_should_stop()) {
> set_current_state(TASK_RUNNING);
> - return -EINTR;
> + return false;
> }
> trace_svc_alloc_arg_err(pages, ret);
> memalloc_retry_wait(GFP_KERNEL);
> @@ -708,7 +708,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
> arg->tail[0].iov_len = 0;
>
> rqstp->rq_xid = xdr_zero;
> - return 0;
> + return true;
> }
>
> static bool
> @@ -773,9 +773,9 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
> if (!time_left)
> percpu_counter_inc(&pool->sp_threads_timedout);
> if (kthread_should_stop())
> - return ERR_PTR(-EINTR);
> + return NULL;
> percpu_counter_inc(&pool->sp_threads_no_work);
> - return ERR_PTR(-EAGAIN);
> + return NULL;
> out_found:
> /* Normally we will wait up to 5 seconds for any required
> * cache information to be provided.
> @@ -856,32 +856,27 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
> * organised not to touch any cachelines in the shared svc_serv
> * structure, only cachelines in the local svc_pool.
> */
> -int svc_recv(struct svc_rqst *rqstp, long timeout)
> +void svc_recv(struct svc_rqst *rqstp, long timeout)
> {
> struct svc_xprt *xprt = NULL;
> struct svc_serv *serv = rqstp->rq_server;
> - int len, err;
> + int len;
>
> - err = svc_alloc_arg(rqstp);
> - if (err)
> + if (!svc_alloc_arg(rqstp))
> goto out;
>
> try_to_freeze();
> cond_resched();
> - err = -EINTR;
> if (kthread_should_stop())
> goto out;
>
> xprt = svc_get_next_xprt(rqstp, timeout);
> - if (IS_ERR(xprt)) {
> - err = PTR_ERR(xprt);
> + if (!xprt)
> goto out;
> - }
>
> len = svc_handle_xprt(rqstp, xprt);
>
> /* No data, incomplete (TCP) read, or accept() */
> - err = -EAGAIN;
> if (len <= 0)
> goto out_release;
>
> @@ -896,12 +891,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
> percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived);
> rqstp->rq_stime = ktime_get();
> svc_process(rqstp);
> - return 0;
> + return;
> out_release:
> rqstp->rq_res.len = 0;
> svc_xprt_release(rqstp);
> out:
> - return err;
> + return;
> }
> EXPORT_SYMBOL_GPL(svc_recv);
>
>
>
On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
> When a sequence of numbers are needed for internal-use only, an enum is
> typically best. The sequence will inevitably need to be changed one
> day, and having an enum means the developer doesn't need to think about
> renumbering after insertion or deletion. The patch will be easier to
> review.
Last sentence needs to define the antecedant of "The patch".
> Signed-off-by: NeilBrown <[email protected]>
> ---
> include/linux/sunrpc/cache.h | 11 +++++++----
> include/linux/sunrpc/svc.h | 34 ++++++++++++++++++++--------------
> include/linux/sunrpc/svc_xprt.h | 39 +++++++++++++++++++++------------------
> include/linux/sunrpc/svcauth.h | 29 ++++++++++++++++-------------
> include/linux/sunrpc/xprtsock.h | 25 +++++++++++++------------
> 5 files changed, 77 insertions(+), 61 deletions(-)
>
> diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
> index 518bd28f5ab8..3cc4f4f0c764 100644
> --- a/include/linux/sunrpc/cache.h
> +++ b/include/linux/sunrpc/cache.h
> @@ -56,10 +56,13 @@ struct cache_head {
> struct kref ref;
> unsigned long flags;
> };
> -#define CACHE_VALID 0 /* Entry contains valid data */
> -#define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */
> -#define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/
> -#define CACHE_CLEANED 3 /* Entry has been cleaned from cache */
> +/* cache_head.flags */
> +enum {
> + CACHE_VALID, /* Entry contains valid data */
> + CACHE_NEGATIVE, /* Negative entry - there is no match for the key */
> + CACHE_PENDING, /* An upcall has been sent but no reply received yet*/
> + CACHE_CLEANED, /* Entry has been cleaned from cache */
> +};
Weird comment of the day: Please use a double-tab before the comments
to leave room for larger flag names in the future.
> #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */
>
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index f3df7f963653..83f31a09c853 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -31,7 +31,7 @@
> * node traffic on multi-node NUMA NFS servers.
> */
> struct svc_pool {
> - unsigned int sp_id; /* pool id; also node id on NUMA */
> + unsigned int sp_id; /* pool id; also node id on NUMA */
> spinlock_t sp_lock; /* protects all fields */
> struct list_head sp_sockets; /* pending sockets */
> unsigned int sp_nrthreads; /* # of threads in pool */
> @@ -44,12 +44,15 @@ struct svc_pool {
> struct percpu_counter sp_threads_starved;
> struct percpu_counter sp_threads_no_work;
>
> -#define SP_TASK_PENDING (0) /* still work to do even if no
> - * xprt is queued. */
> -#define SP_CONGESTED (1)
> unsigned long sp_flags;
> } ____cacheline_aligned_in_smp;
>
> +/* bits for sp_flags */
> +enum {
> + SP_TASK_PENDING, /* still work to do even if no xprt is queued */
> + SP_CONGESTED, /* all threads are busy, none idle */
> +};
> +
> /*
> * RPC service.
> *
> @@ -232,16 +235,6 @@ struct svc_rqst {
> u32 rq_proc; /* procedure number */
> u32 rq_prot; /* IP protocol */
> int rq_cachetype; /* catering to nfsd */
> -#define RQ_SECURE (0) /* secure port */
> -#define RQ_LOCAL (1) /* local request */
> -#define RQ_USEDEFERRAL (2) /* use deferral */
> -#define RQ_DROPME (3) /* drop current reply */
> -#define RQ_SPLICE_OK (4) /* turned off in gss privacy
> - * to prevent encrypting page
> - * cache pages */
> -#define RQ_VICTIM (5) /* about to be shut down */
> -#define RQ_BUSY (6) /* request is busy */
> -#define RQ_DATA (7) /* request has data */
> unsigned long rq_flags; /* flags field */
> ktime_t rq_qtime; /* enqueue time */
>
> @@ -272,6 +265,19 @@ struct svc_rqst {
> void ** rq_lease_breaker; /* The v4 client breaking a lease */
> };
>
> +/* bits for rq_flags */
> +enum {
> + RQ_SECURE, /* secure port */
> + RQ_LOCAL, /* local request */
> + RQ_USEDEFERRAL, /* use deferral */
> + RQ_DROPME, /* drop current reply */
> + RQ_SPLICE_OK, /* turned off in gss privacy to prevent encrypting page
> + * cache pages */
> + RQ_VICTIM, /* about to be shut down */
> + RQ_BUSY, /* request is busy */
> + RQ_DATA, /* request has data */
> +};
> +
Also here -- two tab stops instead of one.
> #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
>
> /*
> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
> index a6b12631db21..af383d0349b3 100644
> --- a/include/linux/sunrpc/svc_xprt.h
> +++ b/include/linux/sunrpc/svc_xprt.h
> @@ -56,26 +56,9 @@ struct svc_xprt {
> struct list_head xpt_list;
> struct list_head xpt_ready;
> unsigned long xpt_flags;
> -#define XPT_BUSY 0 /* enqueued/receiving */
> -#define XPT_CONN 1 /* conn pending */
> -#define XPT_CLOSE 2 /* dead or dying */
> -#define XPT_DATA 3 /* data pending */
> -#define XPT_TEMP 4 /* connected transport */
> -#define XPT_DEAD 6 /* transport closed */
> -#define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */
> -#define XPT_DEFERRED 8 /* deferred request pending */
> -#define XPT_OLD 9 /* used for xprt aging mark+sweep */
> -#define XPT_LISTENER 10 /* listening endpoint */
> -#define XPT_CACHE_AUTH 11 /* cache auth info */
> -#define XPT_LOCAL 12 /* connection from loopback interface */
> -#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
> -#define XPT_CONG_CTRL 14 /* has congestion control */
> -#define XPT_HANDSHAKE 15 /* xprt requests a handshake */
> -#define XPT_TLS_SESSION 16 /* transport-layer security established */
> -#define XPT_PEER_AUTH 17 /* peer has been authenticated */
>
> struct svc_serv *xpt_server; /* service for transport */
> - atomic_t xpt_reserved; /* space on outq that is rsvd */
> + atomic_t xpt_reserved; /* space on outq that is rsvd */
> atomic_t xpt_nr_rqsts; /* Number of requests */
> struct mutex xpt_mutex; /* to serialize sending data */
> spinlock_t xpt_lock; /* protects sk_deferred
> @@ -96,6 +79,26 @@ struct svc_xprt {
> struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */
> struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */
> };
> +/* flag bits for xpt_flags */
> +enum {
> + XPT_BUSY, /* enqueued/receiving */
> + XPT_CONN, /* conn pending */
> + XPT_CLOSE, /* dead or dying */
> + XPT_DATA, /* data pending */
> + XPT_TEMP, /* connected transport */
> + XPT_DEAD, /* transport closed */
> + XPT_CHNGBUF, /* need to change snd/rcv buf sizes */
> + XPT_DEFERRED, /* deferred request pending */
> + XPT_OLD, /* used for xprt aging mark+sweep */
> + XPT_LISTENER, /* listening endpoint */
> + XPT_CACHE_AUTH, /* cache auth info */
> + XPT_LOCAL, /* connection from loopback interface */
> + XPT_KILL_TEMP, /* call xpo_kill_temp_xprt before closing */
> + XPT_CONG_CTRL, /* has congestion control */
> + XPT_HANDSHAKE, /* xprt requests a handshake */
> + XPT_TLS_SESSION, /* transport-layer security established */
> + XPT_PEER_AUTH, /* peer has been authenticated */
> +};
>
> static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
> {
> diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
> index 6d9cc9080aca..8d1d0d0721d2 100644
> --- a/include/linux/sunrpc/svcauth.h
> +++ b/include/linux/sunrpc/svcauth.h
> @@ -133,19 +133,22 @@ struct auth_ops {
> int (*set_client)(struct svc_rqst *rq);
> };
>
> -#define SVC_GARBAGE 1
> -#define SVC_SYSERR 2
> -#define SVC_VALID 3
> -#define SVC_NEGATIVE 4
> -#define SVC_OK 5
> -#define SVC_DROP 6
> -#define SVC_CLOSE 7 /* Like SVC_DROP, but request is definitely
> - * lost so if there is a tcp connection, it
> - * should be closed
> - */
> -#define SVC_DENIED 8
> -#define SVC_PENDING 9
> -#define SVC_COMPLETE 10
> +/*return values for svc functions that analyse request */
> +enum {
> + SVC_GARBAGE,
> + SVC_SYSERR,
> + SVC_VALID,
> + SVC_NEGATIVE,
> + SVC_OK,
> + SVC_DROP,
> + SVC_CLOSE, /* Like SVC_DROP, but request is definitely
> + * lost so if there is a tcp connection, it
> + * should be closed
> + */
> + SVC_DENIED,
> + SVC_PENDING,
> + SVC_COMPLETE,
> +};
>
> struct svc_xprt;
>
> diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
> index 700a1e6c047c..1ed2f446010b 100644
> --- a/include/linux/sunrpc/xprtsock.h
> +++ b/include/linux/sunrpc/xprtsock.h
> @@ -81,17 +81,18 @@ struct sock_xprt {
> };
>
> /*
> - * TCP RPC flags
> + * TCP RPC flags in ->sock_state
> */
> -#define XPRT_SOCK_CONNECTING 1U
> -#define XPRT_SOCK_DATA_READY (2)
> -#define XPRT_SOCK_UPD_TIMEOUT (3)
> -#define XPRT_SOCK_WAKE_ERROR (4)
> -#define XPRT_SOCK_WAKE_WRITE (5)
> -#define XPRT_SOCK_WAKE_PENDING (6)
> -#define XPRT_SOCK_WAKE_DISCONNECT (7)
> -#define XPRT_SOCK_CONNECT_SENT (8)
> -#define XPRT_SOCK_NOSPACE (9)
> -#define XPRT_SOCK_IGNORE_RECV (10)
> -
> +enum {
> + XPRT_SOCK_CONNECTING,
> + XPRT_SOCK_DATA_READY,
> + XPRT_SOCK_UPD_TIMEOUT,
> + XPRT_SOCK_WAKE_ERROR,
> + XPRT_SOCK_WAKE_WRITE,
> + XPRT_SOCK_WAKE_PENDING,
> + XPRT_SOCK_WAKE_DISCONNECT,
> + XPRT_SOCK_CONNECT_SENT,
> + XPRT_SOCK_NOSPACE,
> + XPRT_SOCK_IGNORE_RECV,
> +};
> #endif /* _LINUX_SUNRPC_XPRTSOCK_H */
>
>
Let's not change client-side code in this patch. Please split this
hunk out and send it to Trond and Anna separately.
On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
> svc_get_next_xprt() does a lot more than get an xprt. It mostly waits.
>
> So rename to svc_wait_for_work() and don't bother returning a value.
Or svc_rqst_wait_for_work() ?
> The xprt can be found in ->rq_xprt.
>
> Also move all the code to handle ->rq_xprt into a single if branch, so
> that other handlers can be added there if other work is found.
>
> Remove the call to svc_xprt_dequeue() that is before we set TASK_IDLE.
> If there is still something to dequeue will still get it after a few
> more checks - no sleeping. This was an unnecessary optimisation which
> muddles the code.
I think "This was an unnecessary optimisation" needs to be
demonstrated, and the removal needs to be a separate patch.
I would also move it before the patch adding
trace_svc_pool_polled() so we don't have a series with a
patch that adds a tracepoint followed by another patch
that removes the same tracepoint.
> Drop a call to kthread_should_stop(). There are enough of those in
> svc_wait_for_work().
A bit of this clean-up could be moved back to 2/14.
> (This patch is best viewed with "-b")
>
> Signed-off-by: NeilBrown <[email protected]>
> ---
> net/sunrpc/svc_xprt.c | 70 +++++++++++++++++++------------------------------
> 1 file changed, 27 insertions(+), 43 deletions(-)
>
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index 44a33b1f542f..c7095ff7d5fd 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -735,19 +735,10 @@ rqst_should_sleep(struct svc_rqst *rqstp)
> return true;
> }
>
> -static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
> +static void svc_wait_for_work(struct svc_rqst *rqstp)
> {
> struct svc_pool *pool = rqstp->rq_pool;
>
> - /* rq_xprt should be clear on entry */
> - WARN_ON_ONCE(rqstp->rq_xprt);
> -
> - rqstp->rq_xprt = svc_xprt_dequeue(pool);
> - if (rqstp->rq_xprt) {
> - trace_svc_pool_polled(rqstp);
> - goto out_found;
> - }
> -
> set_current_state(TASK_IDLE);
> smp_mb__before_atomic();
> clear_bit(SP_CONGESTED, &pool->sp_flags);
> @@ -769,10 +760,9 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
> goto out_found;
> }
>
> - if (kthread_should_stop())
> - return NULL;
> - percpu_counter_inc(&pool->sp_threads_no_work);
> - return NULL;
> + if (!kthread_should_stop())
> + percpu_counter_inc(&pool->sp_threads_no_work);
> + return;
> out_found:
> /* Normally we will wait up to 5 seconds for any required
> * cache information to be provided.
> @@ -781,7 +771,6 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
> rqstp->rq_chandle.thread_wait = 5*HZ;
> else
> rqstp->rq_chandle.thread_wait = 1*HZ;
> - return rqstp->rq_xprt;
> }
>
> static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
> @@ -855,45 +844,40 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
> */
> void svc_recv(struct svc_rqst *rqstp)
> {
> - struct svc_xprt *xprt = NULL;
> - struct svc_serv *serv = rqstp->rq_server;
> - int len;
> -
> if (!svc_alloc_arg(rqstp))
> - goto out;
> + return;
>
> try_to_freeze();
> cond_resched();
> - if (kthread_should_stop())
> - goto out;
>
> - xprt = svc_get_next_xprt(rqstp);
> - if (!xprt)
> - goto out;
> + svc_wait_for_work(rqstp);
>
> - len = svc_handle_xprt(rqstp, xprt);
> + if (rqstp->rq_xprt) {
> + struct svc_serv *serv = rqstp->rq_server;
> + struct svc_xprt *xprt = rqstp->rq_xprt;
> + int len;
>
> - /* No data, incomplete (TCP) read, or accept() */
> - if (len <= 0)
> - goto out_release;
> + len = svc_handle_xprt(rqstp, xprt);
>
> - trace_svc_xdr_recvfrom(&rqstp->rq_arg);
> + /* No data, incomplete (TCP) read, or accept() */
> + if (len <= 0) {
> + rqstp->rq_res.len = 0;
> + svc_xprt_release(rqstp);
> + } else {
>
> - clear_bit(XPT_OLD, &xprt->xpt_flags);
> + trace_svc_xdr_recvfrom(&rqstp->rq_arg);
>
> - rqstp->rq_chandle.defer = svc_defer;
> + clear_bit(XPT_OLD, &xprt->xpt_flags);
>
> - if (serv->sv_stats)
> - serv->sv_stats->netcnt++;
> - percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived);
> - rqstp->rq_stime = ktime_get();
> - svc_process(rqstp);
> - return;
> -out_release:
> - rqstp->rq_res.len = 0;
> - svc_xprt_release(rqstp);
> -out:
> - return;
> + rqstp->rq_chandle.defer = svc_defer;
> +
> + if (serv->sv_stats)
> + serv->sv_stats->netcnt++;
> + percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived);
> + rqstp->rq_stime = ktime_get();
> + svc_process(rqstp);
> + }
> + }
> }
> EXPORT_SYMBOL_GPL(svc_recv);
>
>
>
On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
> No callers of svc_pool_wake_idle_thread() care which thread was woken -
> except one that wants to trace the wakeup. For now we drop that
> tracepoint.
That's an important tracepoint, IMO.
It might be better to have svc_pool_wake_idle_thread() return void
right from it's introduction, and move the tracepoint into that
function. I can do that and respin if you agree.
> One caller wants to know if anything was woken to set SP_CONGESTED, so
> set that inside the function instead.
>
> Now svc_pool_wake_idle_thread() can "return" void.
>
> Signed-off-by: NeilBrown <[email protected]>
> ---
> include/linux/sunrpc/svc.h | 2 +-
> net/sunrpc/svc.c | 13 +++++--------
> net/sunrpc/svc_xprt.c | 18 +++---------------
> 3 files changed, 9 insertions(+), 24 deletions(-)
>
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index ea3ce1315416..b39c613fbe06 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -454,7 +454,7 @@ int svc_register(const struct svc_serv *, struct net *, const int,
>
> void svc_wake_up(struct svc_serv *);
> void svc_reserve(struct svc_rqst *rqstp, int space);
> -struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv,
> +void svc_pool_wake_idle_thread(struct svc_serv *serv,
> struct svc_pool *pool);
> struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv);
> char * svc_print_addr(struct svc_rqst *, char *, size_t);
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index b18175ef74ec..fd49e7b12c94 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -696,13 +696,10 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
> * @serv: RPC service
> * @pool: service thread pool
> *
> - * Returns an idle service thread (now marked BUSY), or NULL
> - * if no service threads are available. Finding an idle service
> - * thread and marking it BUSY is atomic with respect to other
> - * calls to svc_pool_wake_idle_thread().
> + * Wake an idle thread if there is one, else mark the pool as congested.
> */
> -struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv,
> - struct svc_pool *pool)
> +void svc_pool_wake_idle_thread(struct svc_serv *serv,
> + struct svc_pool *pool)
> {
> struct svc_rqst *rqstp;
>
> @@ -715,13 +712,13 @@ struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv,
> wake_up_process(rqstp->rq_task);
> rcu_read_unlock();
> percpu_counter_inc(&pool->sp_threads_woken);
> - return rqstp;
> + return;
> }
> rcu_read_unlock();
>
> trace_svc_pool_starved(serv, pool);
> percpu_counter_inc(&pool->sp_threads_starved);
> - return NULL;
> + set_bit(SP_CONGESTED, &pool->sp_flags);
> }
> EXPORT_SYMBOL_GPL(svc_pool_wake_idle_thread);
>
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index 948605e7043b..964c97dbb36c 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -456,7 +456,6 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
> */
> void svc_xprt_enqueue(struct svc_xprt *xprt)
> {
> - struct svc_rqst *rqstp;
> struct svc_pool *pool;
>
> if (!svc_xprt_ready(xprt))
> @@ -477,13 +476,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
> list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
> spin_unlock_bh(&pool->sp_lock);
>
> - rqstp = svc_pool_wake_idle_thread(xprt->xpt_server, pool);
> - if (!rqstp) {
> - set_bit(SP_CONGESTED, &pool->sp_flags);
> - return;
> - }
> -
> - trace_svc_xprt_enqueue(xprt, rqstp);
> + svc_pool_wake_idle_thread(xprt->xpt_server, pool);
> }
> EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
>
> @@ -587,14 +580,9 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
> void svc_wake_up(struct svc_serv *serv)
> {
> struct svc_pool *pool = &serv->sv_pools[0];
> - struct svc_rqst *rqstp;
>
> - rqstp = svc_pool_wake_idle_thread(serv, pool);
> - if (!rqstp) {
> - set_bit(SP_TASK_PENDING, &pool->sp_flags);
> - smp_wmb();
> - return;
> - }
> + set_bit(SP_TASK_PENDING, &pool->sp_flags);
> + svc_pool_wake_idle_thread(serv, pool);
> }
> EXPORT_SYMBOL_GPL(svc_wake_up);
>
>
>
> On Jul 18, 2023, at 2:38 AM, NeilBrown <[email protected]> wrote:
>
> This code .... grew a bit since my previous pencil-sketch code.
>
> The goal is really the final patch: using a llist without spinlocks to
> handle dispatch of idle threads. To get there I found it necessary - or
> at least helpful - to do a lot of refactoring.
>
> This code passes some basic tests, but I haven't push it hard yet.
>
> Even if other aren't convinced that llists are the best solution, I
> think a lot of the refactoring is this valuable.
Some of these are indeed immediately appealing. Let's work on getting
those into the code base before we decide on the scheduler changes.
> Comments welcome,
> Thanks,
> NeilBrown
>
> ---
>
> NeilBr own (14):
> lockd: remove SIGKILL handling.
> nfsd: don't allow nfsd threads to be signalled.
> SUNRPC: call svc_process() from svc_recv().
> SUNRPC: change svc_recv() to return void.
> SUNRPC: remove timeout arg from svc_recv()
> SUNRPC: change various server-side #defines to enum
> SUNRPC: refactor svc_recv()
> SUNRPC: integrate back-channel processing with svc_recv() and svc_process()
> SUNRPC: change how svc threads are asked to exit.
> SUNRPC: change svc_pool_wake_idle_thread() to return nothing.
> SUNRPC: add list of idle threads
> SUNRPC: discard SP_CONGESTED
> SUNRPC: change service idle list to be an llist
> SUNRPC: only have one thread waking up at a time
>
>
> fs/lockd/svc.c | 49 ++-----
> fs/lockd/svclock.c | 9 +-
> fs/nfs/callback.c | 59 +-------
> fs/nfsd/nfs4proc.c | 10 +-
> fs/nfsd/nfssvc.c | 22 +--
> include/linux/llist.h | 2 +
> include/linux/lockd/lockd.h | 4 +-
> include/linux/sunrpc/cache.h | 11 +-
> include/linux/sunrpc/svc.h | 87 +++++++++---
> include/linux/sunrpc/svc_xprt.h | 39 +++---
> include/linux/sunrpc/svcauth.h | 29 ++--
> include/linux/sunrpc/svcsock.h | 2 +-
> include/linux/sunrpc/xprtsock.h | 25 ++--
> include/trace/events/sunrpc.h | 5 +-
> lib/llist.c | 27 ++++
> net/sunrpc/backchannel_rqst.c | 8 +-
> net/sunrpc/svc.c | 71 ++++------
> net/sunrpc/svc_xprt.c | 226 ++++++++++++++++--------------
> net/sunrpc/xprtrdma/backchannel.c | 2 +-
> 19 files changed, 347 insertions(+), 340 deletions(-)
>
> --
> Signature
>
--
Chuck Lever
On Tue, 18 Jul 2023, Chuck Lever wrote:
> On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
> > No callers of svc_pool_wake_idle_thread() care which thread was woken -
> > except one that wants to trace the wakeup. For now we drop that
> > tracepoint.
>
> That's an important tracepoint, IMO.
>
> It might be better to have svc_pool_wake_idle_thread() return void
> right from it's introduction, and move the tracepoint into that
> function. I can do that and respin if you agree.
Mostly I agree.
It isn't clear to me how you would handle trace_svc_xprt_enqueue(),
as there would be no code that can see both the trigger xprt, and the
woken rqst.
I also wonder if having the trace point when the wake-up is requested
makes any sense, as there is no guarantee that thread with handle that
xprt.
Maybe the trace point should report when the xprt is dequeued. i.e.
maybe trace_svc_pool_awoken() should report the pid, and we could have
trace_svc_xprt_enqueue() only report the xprt, not the rqst.
Thanks,
NeilBrown
> On Jul 18, 2023, at 9:16 PM, NeilBrown <[email protected]> wrote:
>
> On Tue, 18 Jul 2023, Chuck Lever wrote:
>> On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
>>> No callers of svc_pool_wake_idle_thread() care which thread was woken -
>>> except one that wants to trace the wakeup. For now we drop that
>>> tracepoint.
>>
>> That's an important tracepoint, IMO.
>>
>> It might be better to have svc_pool_wake_idle_thread() return void
>> right from it's introduction, and move the tracepoint into that
>> function. I can do that and respin if you agree.
>
> Mostly I agree.
>
> It isn't clear to me how you would handle trace_svc_xprt_enqueue(),
> as there would be no code that can see both the trigger xprt, and the
> woken rqst.
>
> I also wonder if having the trace point when the wake-up is requested
> makes any sense, as there is no guarantee that thread with handle that
> xprt.
>
> Maybe the trace point should report when the xprt is dequeued. i.e.
> maybe trace_svc_pool_awoken() should report the pid, and we could have
> trace_svc_xprt_enqueue() only report the xprt, not the rqst.
I'll come up with something that rearranges the tracepoints so that
svc_pool_wake_idle_thread() can return void.
svc_pool_wake_idle_thread() can save the waker's PID in svc_rqst
somewhere, for example. The dequeue tracepoint can then report that
(if it's still interesting when we're all done with this work).
--
Chuck Lever
On Wed, 19 Jul 2023, Chuck Lever III wrote:
>
> > On Jul 18, 2023, at 9:16 PM, NeilBrown <[email protected]> wrote:
> >
> > On Tue, 18 Jul 2023, Chuck Lever wrote:
> >> On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
> >>> No callers of svc_pool_wake_idle_thread() care which thread was woken -
> >>> except one that wants to trace the wakeup. For now we drop that
> >>> tracepoint.
> >>
> >> That's an important tracepoint, IMO.
> >>
> >> It might be better to have svc_pool_wake_idle_thread() return void
> >> right from it's introduction, and move the tracepoint into that
> >> function. I can do that and respin if you agree.
> >
> > Mostly I agree.
> >
> > It isn't clear to me how you would handle trace_svc_xprt_enqueue(),
> > as there would be no code that can see both the trigger xprt, and the
> > woken rqst.
> >
> > I also wonder if having the trace point when the wake-up is requested
> > makes any sense, as there is no guarantee that thread with handle that
> > xprt.
> >
> > Maybe the trace point should report when the xprt is dequeued. i.e.
> > maybe trace_svc_pool_awoken() should report the pid, and we could have
> > trace_svc_xprt_enqueue() only report the xprt, not the rqst.
>
> I'll come up with something that rearranges the tracepoints so that
> svc_pool_wake_idle_thread() can return void.
My current draft code has svc_pool_wake_idle_thread() returning bool -
if it found something to wake up - purely for logging.
I think it is worth logging whether an event triggered a wake up or not,
and which event did that. I'm less you that the pid is relevant. But
as you say - this will probably become clearer as the code settles down.
Thanks,
NeilBrown
>
> svc_pool_wake_idle_thread() can save the waker's PID in svc_rqst
> somewhere, for example. The dequeue tracepoint can then report that
> (if it's still interesting when we're all done with this work).
>
>
> --
> Chuck Lever
>
>
>
> On Jul 19, 2023, at 7:20 PM, NeilBrown <[email protected]> wrote:
>
> On Wed, 19 Jul 2023, Chuck Lever III wrote:
>>
>>> On Jul 18, 2023, at 9:16 PM, NeilBrown <[email protected]> wrote:
>>>
>>> On Tue, 18 Jul 2023, Chuck Lever wrote:
>>>> On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
>>>>> No callers of svc_pool_wake_idle_thread() care which thread was woken -
>>>>> except one that wants to trace the wakeup. For now we drop that
>>>>> tracepoint.
>>>>
>>>> That's an important tracepoint, IMO.
>>>>
>>>> It might be better to have svc_pool_wake_idle_thread() return void
>>>> right from it's introduction, and move the tracepoint into that
>>>> function. I can do that and respin if you agree.
>>>
>>> Mostly I agree.
>>>
>>> It isn't clear to me how you would handle trace_svc_xprt_enqueue(),
>>> as there would be no code that can see both the trigger xprt, and the
>>> woken rqst.
>>>
>>> I also wonder if having the trace point when the wake-up is requested
>>> makes any sense, as there is no guarantee that thread with handle that
>>> xprt.
>>>
>>> Maybe the trace point should report when the xprt is dequeued. i.e.
>>> maybe trace_svc_pool_awoken() should report the pid, and we could have
>>> trace_svc_xprt_enqueue() only report the xprt, not the rqst.
>>
>> I'll come up with something that rearranges the tracepoints so that
>> svc_pool_wake_idle_thread() can return void.
>
> My current draft code has svc_pool_wake_idle_thread() returning bool -
> if it found something to wake up - purely for logging.
This is also where I have ended up. I'll post an update probably tomorrow
my time. Too much other stuff going on to finish it today.
> I think it is worth logging whether an event triggered a wake up or not,
> and which event did that.
Agreed. I have some experimental code that records _RET_IP_ of the caller
of svc_xprt_enqueue(), but again it's questionable whether that is of
long term value.
> I'm less you that the pid is relevant. But
> as you say - this will probably become clearer as the code settles down.
>
> Thanks,
> NeilBrown
>
>
>>
>> svc_pool_wake_idle_thread() can save the waker's PID in svc_rqst
>> somewhere, for example. The dequeue tracepoint can then report that
>> (if it's still interesting when we're all done with this work).
>>
>>
>> --
>> Chuck Lever
--
Chuck Lever
> On Jul 19, 2023, at 7:44 PM, Chuck Lever III <[email protected]> wrote:
>
>
>
>> On Jul 19, 2023, at 7:20 PM, NeilBrown <[email protected]> wrote:
>>
>> On Wed, 19 Jul 2023, Chuck Lever III wrote:
>>>
>>>> On Jul 18, 2023, at 9:16 PM, NeilBrown <[email protected]> wrote:
>>>>
>>>> On Tue, 18 Jul 2023, Chuck Lever wrote:
>>>>> On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
>>>>>> No callers of svc_pool_wake_idle_thread() care which thread was woken -
>>>>>> except one that wants to trace the wakeup. For now we drop that
>>>>>> tracepoint.
>>>>>
>>>>> That's an important tracepoint, IMO.
>>>>>
>>>>> It might be better to have svc_pool_wake_idle_thread() return void
>>>>> right from it's introduction, and move the tracepoint into that
>>>>> function. I can do that and respin if you agree.
>>>>
>>>> Mostly I agree.
>>>>
>>>> It isn't clear to me how you would handle trace_svc_xprt_enqueue(),
>>>> as there would be no code that can see both the trigger xprt, and the
>>>> woken rqst.
>>>>
>>>> I also wonder if having the trace point when the wake-up is requested
>>>> makes any sense, as there is no guarantee that thread with handle that
>>>> xprt.
>>>>
>>>> Maybe the trace point should report when the xprt is dequeued. i.e.
>>>> maybe trace_svc_pool_awoken() should report the pid, and we could have
>>>> trace_svc_xprt_enqueue() only report the xprt, not the rqst.
>>>
>>> I'll come up with something that rearranges the tracepoints so that
>>> svc_pool_wake_idle_thread() can return void.
>>
>> My current draft code has svc_pool_wake_idle_thread() returning bool -
>> if it found something to wake up - purely for logging.
>
> This is also where I have ended up. I'll post an update probably tomorrow
> my time. Too much other stuff going on to finish it today.
Pushed to https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git
in branch topic-sunrpc-thread-scheduling
>> I think it is worth logging whether an event triggered a wake up or not,
>> and which event did that.
>
> Agreed. I have some experimental code that records _RET_IP_ of the caller
> of svc_xprt_enqueue(), but again it's questionable whether that is of
> long term value.
>
>
>> I'm less you that the pid is relevant. But
>> as you say - this will probably become clearer as the code settles down.
>>
>> Thanks,
>> NeilBrown
>>
>>
>>>
>>> svc_pool_wake_idle_thread() can save the waker's PID in svc_rqst
>>> somewhere, for example. The dequeue tracepoint can then report that
>>> (if it's still interesting when we're all done with this work).
>>>
>>>
>>> --
>>> Chuck Lever
>
>
> --
> Chuck Lever
--
Chuck Lever
On Tue, 2023-07-18 at 16:38 +1000, NeilBrown wrote:
> The original implementation of nfsd used signals to stop threads during
> shutdown.
> In Linux 2.3.46pre5 nfsd gained the ability to shutdown threads
> internally it if was asked to run "0" threads. After this user-space
> transitioned to using "rpc.nfsd 0" to stop nfsd and sending signals to
> threads was no longer an important part of the API.
>
> In Commit 3ebdbe5203a8 ("SUNRPC: discard svo_setup and rename
> svc_set_num_threads_sync()") (v5.17-rc1~75^2~41) we finally removed the
> use of signals for stopping threads, using kthread_stop() instead.
>
> This patch makes the "obvious" next step and removes the ability to
> signal nfsd threads - or any svc threads. nfsd stops allowing signals
> and we don't check for their delivery any more.
>
> This will allow for some simplification in later patches.
>
> A change worth noting is in nfsd4_ssc_setup_dul(). There was previously
> a signal_pending() check which would only succeed when the thread was
> being shut down. It should really have tested kthread_should_stop() as
> well. Now it just does the later, not the former.
>
> Signed-off-by: NeilBrown <[email protected]>
> ---
> fs/nfs/callback.c | 9 +--------
> fs/nfsd/nfs4proc.c | 4 ++--
> fs/nfsd/nfssvc.c | 12 ------------
> net/sunrpc/svc_xprt.c | 16 ++++++----------
> 4 files changed, 9 insertions(+), 32 deletions(-)
>
> diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
> index 456af7d230cf..46a0a2d6962e 100644
> --- a/fs/nfs/callback.c
> +++ b/fs/nfs/callback.c
> @@ -80,9 +80,6 @@ nfs4_callback_svc(void *vrqstp)
> set_freezable();
>
> while (!kthread_freezable_should_stop(NULL)) {
> -
> - if (signal_pending(current))
> - flush_signals(current);
> /*
> * Listen for a request on the socket
> */
> @@ -112,11 +109,7 @@ nfs41_callback_svc(void *vrqstp)
> set_freezable();
>
> while (!kthread_freezable_should_stop(NULL)) {
> -
> - if (signal_pending(current))
> - flush_signals(current);
> -
> - prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
> + prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE);
> spin_lock_bh(&serv->sv_cb_lock);
> if (!list_empty(&serv->sv_cb_list)) {
> req = list_first_entry(&serv->sv_cb_list,
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index d8e7a533f9d2..157488290676 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -1325,11 +1325,11 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
> if (ni->nsui_busy) {
> /* wait - and try again */
> prepare_to_wait(&nn->nfsd_ssc_waitq, &wait,
> - TASK_INTERRUPTIBLE);
> + TASK_IDLE);
> spin_unlock(&nn->nfsd_ssc_lock);
>
> /* allow 20secs for mount/unmount for now - revisit */
> - if (signal_pending(current) ||
> + if (kthread_should_stop() ||
> (schedule_timeout(20*HZ) == 0)) {
> finish_wait(&nn->nfsd_ssc_waitq, &wait);
> kfree(work);
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index 97830e28c140..439fca195925 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -965,15 +965,6 @@ nfsd(void *vrqstp)
>
> current->fs->umask = 0;
>
> - /*
> - * thread is spawned with all signals set to SIG_IGN, re-enable
> - * the ones that will bring down the thread
> - */
> - allow_signal(SIGKILL);
> - allow_signal(SIGHUP);
> - allow_signal(SIGINT);
> - allow_signal(SIGQUIT);
> -
> atomic_inc(&nfsdstats.th_cnt);
>
> set_freezable();
> @@ -998,9 +989,6 @@ nfsd(void *vrqstp)
> validate_process_creds();
> }
>
> - /* Clear signals before calling svc_exit_thread() */
> - flush_signals(current);
> -
> atomic_dec(&nfsdstats.th_cnt);
>
> out:
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index 71b19d0ed642..93395606a0ba 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -686,8 +686,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
> /* Made progress, don't sleep yet */
> continue;
>
> - set_current_state(TASK_INTERRUPTIBLE);
> - if (signalled() || kthread_should_stop()) {
> + set_current_state(TASK_IDLE);
> + if (kthread_should_stop()) {
> set_current_state(TASK_RUNNING);
> return -EINTR;
> }
> @@ -725,7 +725,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
> return false;
>
> /* are we shutting down? */
> - if (signalled() || kthread_should_stop())
> + if (kthread_should_stop())
> return false;
>
> /* are we freezing? */
> @@ -749,11 +749,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
> goto out_found;
> }
>
> - /*
> - * We have to be able to interrupt this wait
> - * to bring down the daemons ...
> - */
> - set_current_state(TASK_INTERRUPTIBLE);
> + set_current_state(TASK_IDLE);
> smp_mb__before_atomic();
> clear_bit(SP_CONGESTED, &pool->sp_flags);
> clear_bit(RQ_BUSY, &rqstp->rq_flags);
> @@ -776,7 +772,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
>
> if (!time_left)
> percpu_counter_inc(&pool->sp_threads_timedout);
> - if (signalled() || kthread_should_stop())
> + if (kthread_should_stop())
> return ERR_PTR(-EINTR);
> percpu_counter_inc(&pool->sp_threads_no_work);
> return ERR_PTR(-EAGAIN);
> @@ -873,7 +869,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
> try_to_freeze();
> cond_resched();
> err = -EINTR;
> - if (signalled() || kthread_should_stop())
> + if (kthread_should_stop())
> goto out;
>
> xprt = svc_get_next_xprt(rqstp, timeout);
>
>
LGTM
Reviewed-by: Jeff Layton <[email protected]>
On Tue, 2023-07-18 at 16:38 +1000, NeilBrown wrote:
> svc_recv() currently returns a 0 on success or one of two errors:
> - -EAGAIN means no message was successfully received
> - -EINTR means the thread has been told to stop
>
> Previously nfsd would stop as the result of a signal as well as
> following kthread_stop(). In that case the difference was useful: EINTR
> means stop unconditionally. EAGAIN means stop if kthread_should_stop(),
> continue otherwise.
>
> Now threads only exit when kthread_should_stop() so we don't need the
> distinction.
>
> So have all threads test kthread_should_stop() (most do), and return
> simple success/failure from svc_recv().
The patch makes sense, but the above sentence doesn't. You're making
svc_recv void return, but you're returning simple success/failure?
> Also change some helpers that svc_recv() uses to not bother with an
> error code, returning a bool in once case, and NULL for failure in
> another.
>
> Signed-off-by: NeilBrown <[email protected]>
> ---
> fs/lockd/svc.c | 9 +++------
> fs/nfs/callback.c | 5 +----
> fs/nfsd/nfssvc.c | 8 ++------
> include/linux/sunrpc/svcsock.h | 2 +-
> net/sunrpc/svc_xprt.c | 27 +++++++++++----------------
> 5 files changed, 18 insertions(+), 33 deletions(-)
>
> diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
> index 91ef139a7757..a43b63e46127 100644
> --- a/fs/lockd/svc.c
> +++ b/fs/lockd/svc.c
> @@ -116,7 +116,6 @@ static void set_grace_period(struct net *net)
> static int
> lockd(void *vrqstp)
> {
> - int err = 0;
> struct svc_rqst *rqstp = vrqstp;
> struct net *net = &init_net;
> struct lockd_net *ln = net_generic(net, lockd_net_id);
> @@ -139,12 +138,10 @@ lockd(void *vrqstp)
> timeout = nlmsvc_retry_blocked();
>
> /*
> - * Find a socket with data available and call its
> - * recvfrom routine.
> + * Find any work to do, such as a socket with data available,
> + * and do the work.
> */
> - err = svc_recv(rqstp, timeout);
> - if (err == -EAGAIN || err == -EINTR)
> - continue;
> + svc_recv(rqstp, timeout);
> }
> if (nlmsvc_ops)
> nlmsvc_invalidate_all();
> diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
> index 2d94384bd6a9..914d2402ca98 100644
> --- a/fs/nfs/callback.c
> +++ b/fs/nfs/callback.c
> @@ -74,7 +74,6 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
> static int
> nfs4_callback_svc(void *vrqstp)
> {
> - int err;
> struct svc_rqst *rqstp = vrqstp;
>
> set_freezable();
> @@ -83,9 +82,7 @@ nfs4_callback_svc(void *vrqstp)
> /*
> * Listen for a request on the socket
> */
> - err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
> - if (err == -EAGAIN || err == -EINTR)
> - continue;
> + svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
> }
>
> svc_exit_thread(rqstp);
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index 3e08cc746870..5bf48c33986e 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -953,7 +953,6 @@ nfsd(void *vrqstp)
> struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
> struct net *net = perm_sock->xpt_net;
> struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> - int err;
>
> /* At this point, the thread shares current->fs
> * with the init process. We need to create files with the
> @@ -972,7 +971,7 @@ nfsd(void *vrqstp)
> /*
> * The main request loop
> */
> - for (;;) {
> + while (!kthread_should_stop()) {
> /* Update sv_maxconn if it has changed */
> rqstp->rq_server->sv_maxconn = nn->max_connections;
>
> @@ -980,10 +979,7 @@ nfsd(void *vrqstp)
> * Find a socket with data available and call its
> * recvfrom routine.
> */
> - while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
> - ;
> - if (err == -EINTR)
> - break;
> + svc_recv(rqstp, 60*60*HZ);
> validate_process_creds();
> }
>
> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
> index 55446136499f..fb5c98069356 100644
> --- a/include/linux/sunrpc/svcsock.h
> +++ b/include/linux/sunrpc/svcsock.h
> @@ -64,7 +64,7 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
> * Function prototypes.
> */
> void svc_close_net(struct svc_serv *, struct net *);
> -int svc_recv(struct svc_rqst *, long);
> +void svc_recv(struct svc_rqst *, long);
> void svc_send(struct svc_rqst *rqstp);
> void svc_drop(struct svc_rqst *);
> void svc_sock_update_bufs(struct svc_serv *serv);
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index c808f6d60c99..67825eef8646 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -664,7 +664,7 @@ static void svc_check_conn_limits(struct svc_serv *serv)
> }
> }
>
> -static int svc_alloc_arg(struct svc_rqst *rqstp)
> +static bool svc_alloc_arg(struct svc_rqst *rqstp)
> {
> struct svc_serv *serv = rqstp->rq_server;
> struct xdr_buf *arg = &rqstp->rq_arg;
> @@ -689,7 +689,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
> set_current_state(TASK_IDLE);
> if (kthread_should_stop()) {
> set_current_state(TASK_RUNNING);
> - return -EINTR;
> + return false;
> }
> trace_svc_alloc_arg_err(pages, ret);
> memalloc_retry_wait(GFP_KERNEL);
> @@ -708,7 +708,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
> arg->tail[0].iov_len = 0;
>
> rqstp->rq_xid = xdr_zero;
> - return 0;
> + return true;
> }
>
> static bool
> @@ -773,9 +773,9 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
> if (!time_left)
> percpu_counter_inc(&pool->sp_threads_timedout);
> if (kthread_should_stop())
> - return ERR_PTR(-EINTR);
> + return NULL;
> percpu_counter_inc(&pool->sp_threads_no_work);
> - return ERR_PTR(-EAGAIN);
> + return NULL;
> out_found:
> /* Normally we will wait up to 5 seconds for any required
> * cache information to be provided.
> @@ -856,32 +856,27 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
> * organised not to touch any cachelines in the shared svc_serv
> * structure, only cachelines in the local svc_pool.
> */
> -int svc_recv(struct svc_rqst *rqstp, long timeout)
> +void svc_recv(struct svc_rqst *rqstp, long timeout)
> {
> struct svc_xprt *xprt = NULL;
> struct svc_serv *serv = rqstp->rq_server;
> - int len, err;
> + int len;
>
> - err = svc_alloc_arg(rqstp);
> - if (err)
> + if (!svc_alloc_arg(rqstp))
> goto out;
>
> try_to_freeze();
> cond_resched();
> - err = -EINTR;
> if (kthread_should_stop())
> goto out;
>
> xprt = svc_get_next_xprt(rqstp, timeout);
> - if (IS_ERR(xprt)) {
> - err = PTR_ERR(xprt);
> + if (!xprt)
> goto out;
> - }
>
> len = svc_handle_xprt(rqstp, xprt);
>
> /* No data, incomplete (TCP) read, or accept() */
> - err = -EAGAIN;
> if (len <= 0)
> goto out_release;
>
> @@ -896,12 +891,12 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
> percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived);
> rqstp->rq_stime = ktime_get();
> svc_process(rqstp);
> - return 0;
> + return;
> out_release:
> rqstp->rq_res.len = 0;
> svc_xprt_release(rqstp);
> out:
> - return err;
> + return;
> }
> EXPORT_SYMBOL_GPL(svc_recv);
>
>
>
--
Jeff Layton <[email protected]>
On Tue, 2023-07-18 at 16:38 +1000, NeilBrown wrote:
> Most svc threads have no interest in a timeout.
> nfsd sets it to 1 hour, but this is a wart of no significance.
>
> lockd uses the timeout so that it can call nlmsvc_retry_blocked().
> It also sometimes calls svc_wake_up() to ensure this is called.
>
> So change lockd to be consistent and always use svc_wake_up() to trigger
> nlmsvc_retry_blocked() - using a timer instead of a timeout to
> svc_recv().
>
> And change svc_recv() to not take a timeout arg.
>
> This makes the sp_threads_timedout counter always zero.
>
> Signed-off-by: NeilBrown <[email protected]>
> ---
> fs/lockd/svc.c | 11 ++++++++---
> fs/lockd/svclock.c | 5 +++--
> fs/nfs/callback.c | 2 +-
> fs/nfsd/nfssvc.c | 2 +-
> include/linux/lockd/lockd.h | 4 +++-
> include/linux/sunrpc/svc.h | 1 -
> include/linux/sunrpc/svcsock.h | 2 +-
> net/sunrpc/svc.c | 2 --
> net/sunrpc/svc_xprt.c | 27 ++++++++++++---------------
> 9 files changed, 29 insertions(+), 27 deletions(-)
>
> diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
> index a43b63e46127..4f55cd42c2e6 100644
> --- a/fs/lockd/svc.c
> +++ b/fs/lockd/svc.c
> @@ -55,6 +55,11 @@ static DEFINE_MUTEX(nlmsvc_mutex);
> static unsigned int nlmsvc_users;
> static struct svc_serv *nlmsvc_serv;
> unsigned long nlmsvc_timeout;
> +static void nlmsvc_request_retry(struct timer_list *tl)
> +{
> + svc_wake_up(nlmsvc_serv);
> +}
> +DEFINE_TIMER(nlmsvc_retry, nlmsvc_request_retry);
>
> unsigned int lockd_net_id;
>
> @@ -130,18 +135,17 @@ lockd(void *vrqstp)
> * NFS mount or NFS daemon has gone away.
> */
> while (!kthread_should_stop()) {
> - long timeout = MAX_SCHEDULE_TIMEOUT;
>
> /* update sv_maxconn if it has changed */
> rqstp->rq_server->sv_maxconn = nlm_max_connections;
>
> - timeout = nlmsvc_retry_blocked();
> + nlmsvc_retry_blocked();
>
> /*
> * Find any work to do, such as a socket with data available,
> * and do the work.
> */
> - svc_recv(rqstp, timeout);
> + svc_recv(rqstp);
> }
> if (nlmsvc_ops)
> nlmsvc_invalidate_all();
> @@ -375,6 +379,7 @@ static void lockd_put(void)
> #endif
>
> svc_set_num_threads(nlmsvc_serv, NULL, 0);
> + timer_delete_sync(&nlmsvc_retry);
> nlmsvc_serv = NULL;
> dprintk("lockd_down: service destroyed\n");
> }
> diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
> index c43ccdf28ed9..3d7bd5c04b36 100644
> --- a/fs/lockd/svclock.c
> +++ b/fs/lockd/svclock.c
> @@ -1008,7 +1008,7 @@ retry_deferred_block(struct nlm_block *block)
> * picks up locks that can be granted, or grant notifications that must
> * be retransmitted.
> */
> -unsigned long
> +void
> nlmsvc_retry_blocked(void)
> {
> unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
> @@ -1038,5 +1038,6 @@ nlmsvc_retry_blocked(void)
> }
> spin_unlock(&nlm_blocked_lock);
>
> - return timeout;
> + if (timeout < MAX_SCHEDULE_TIMEOUT)
> + mod_timer(&nlmsvc_retry, jiffies + timeout);
> }
> diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
> index 914d2402ca98..c47834970224 100644
> --- a/fs/nfs/callback.c
> +++ b/fs/nfs/callback.c
> @@ -82,7 +82,7 @@ nfs4_callback_svc(void *vrqstp)
> /*
> * Listen for a request on the socket
> */
> - svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
> + svc_recv(rqstp);
> }
>
> svc_exit_thread(rqstp);
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index 5bf48c33986e..b536b254c59e 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -979,7 +979,7 @@ nfsd(void *vrqstp)
> * Find a socket with data available and call its
> * recvfrom routine.
> */
> - svc_recv(rqstp, 60*60*HZ);
> + svc_recv(rqstp);
> validate_process_creds();
> }
>
> diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
> index f42594a9efe0..0f016d69c996 100644
> --- a/include/linux/lockd/lockd.h
> +++ b/include/linux/lockd/lockd.h
> @@ -204,6 +204,8 @@ extern unsigned long nlmsvc_timeout;
> extern bool nsm_use_hostnames;
> extern u32 nsm_local_state;
>
> +extern struct timer_list nlmsvc_retry;
> +
> /*
> * Lockd client functions
> */
> @@ -280,7 +282,7 @@ __be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
> struct nlm_host *, struct nlm_lock *,
> struct nlm_lock *, struct nlm_cookie *);
> __be32 nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
> -unsigned long nlmsvc_retry_blocked(void);
> +void nlmsvc_retry_blocked(void);
> void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
> nlm_host_match_fn_t match);
> void nlmsvc_grant_reply(struct nlm_cookie *, __be32);
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index d51ae1e109b6..f3df7f963653 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -41,7 +41,6 @@ struct svc_pool {
> struct percpu_counter sp_messages_arrived;
> struct percpu_counter sp_sockets_queued;
> struct percpu_counter sp_threads_woken;
> - struct percpu_counter sp_threads_timedout;
> struct percpu_counter sp_threads_starved;
> struct percpu_counter sp_threads_no_work;
>
> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
> index fb5c98069356..8da31799affe 100644
> --- a/include/linux/sunrpc/svcsock.h
> +++ b/include/linux/sunrpc/svcsock.h
> @@ -64,7 +64,7 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
> * Function prototypes.
> */
> void svc_close_net(struct svc_serv *, struct net *);
> -void svc_recv(struct svc_rqst *, long);
> +void svc_recv(struct svc_rqst *);
> void svc_send(struct svc_rqst *rqstp);
> void svc_drop(struct svc_rqst *);
> void svc_sock_update_bufs(struct svc_serv *serv);
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index f09b0cce041c..170eabc03988 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -513,7 +513,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
> percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL);
> percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL);
> percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL);
> - percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL);
> percpu_counter_init(&pool->sp_threads_starved, 0, GFP_KERNEL);
> percpu_counter_init(&pool->sp_threads_no_work, 0, GFP_KERNEL);
>
> @@ -593,7 +592,6 @@ svc_destroy(struct kref *ref)
> percpu_counter_destroy(&pool->sp_messages_arrived);
> percpu_counter_destroy(&pool->sp_sockets_queued);
> percpu_counter_destroy(&pool->sp_threads_woken);
> - percpu_counter_destroy(&pool->sp_threads_timedout);
> percpu_counter_destroy(&pool->sp_threads_starved);
> percpu_counter_destroy(&pool->sp_threads_no_work);
> }
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index 67825eef8646..44a33b1f542f 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -735,10 +735,9 @@ rqst_should_sleep(struct svc_rqst *rqstp)
> return true;
> }
>
> -static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
> +static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
> {
> struct svc_pool *pool = rqstp->rq_pool;
> - long time_left = 0;
>
> /* rq_xprt should be clear on entry */
> WARN_ON_ONCE(rqstp->rq_xprt);
> @@ -756,7 +755,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
> smp_mb__after_atomic();
>
> if (likely(rqst_should_sleep(rqstp)))
> - time_left = schedule_timeout(timeout);
> + schedule();
> else
> __set_current_state(TASK_RUNNING);
>
> @@ -770,8 +769,6 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
> goto out_found;
> }
>
> - if (!time_left)
> - percpu_counter_inc(&pool->sp_threads_timedout);
> if (kthread_should_stop())
> return NULL;
> percpu_counter_inc(&pool->sp_threads_no_work);
> @@ -856,7 +853,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
> * organised not to touch any cachelines in the shared svc_serv
> * structure, only cachelines in the local svc_pool.
> */
> -void svc_recv(struct svc_rqst *rqstp, long timeout)
> +void svc_recv(struct svc_rqst *rqstp)
> {
> struct svc_xprt *xprt = NULL;
> struct svc_serv *serv = rqstp->rq_server;
> @@ -870,7 +867,7 @@ void svc_recv(struct svc_rqst *rqstp, long timeout)
> if (kthread_should_stop())
> goto out;
>
> - xprt = svc_get_next_xprt(rqstp, timeout);
> + xprt = svc_get_next_xprt(rqstp);
> if (!xprt)
> goto out;
>
> @@ -1437,14 +1434,14 @@ static int svc_pool_stats_show(struct seq_file *m, void *p)
> return 0;
> }
>
> - seq_printf(m, "%u %llu %llu %llu %llu %llu %llu\n",
> - pool->sp_id,
> - percpu_counter_sum_positive(&pool->sp_messages_arrived),
> - percpu_counter_sum_positive(&pool->sp_sockets_queued),
> - percpu_counter_sum_positive(&pool->sp_threads_woken),
> - percpu_counter_sum_positive(&pool->sp_threads_timedout),
> - percpu_counter_sum_positive(&pool->sp_threads_starved),
> - percpu_counter_sum_positive(&pool->sp_threads_no_work));
> + seq_printf(m, "%u %llu %llu %llu 0 %llu %llu\n",
> + pool->sp_id,
> + percpu_counter_sum_positive(&pool->sp_messages_arrived),
> + percpu_counter_sum_positive(&pool->sp_sockets_queued),
> + percpu_counter_sum_positive(&pool->sp_threads_woken),
> + /* prevously pool->sp_threads_timedout */
> + percpu_counter_sum_positive(&pool->sp_threads_starved),
> + percpu_counter_sum_positive(&pool->sp_threads_no_work));
>
> return 0;
> }
>
>
More simplifications. I like it!
Reviewed-by: Jeff Layton <[email protected]>
On Tue, 2023-07-18 at 16:38 +1000, NeilBrown wrote:
> Using svc_recv() and svc_process() for (NFSv4.1) back-channel handling
> means we have just one mechanism for waking threads.
>
> Also change kthread_freezable_should_stop() in nfs4_callback_svc() to
> kthread_should_stop() as used elsewhere.
> kthread_freezable_should_stop() effectively adds a try_to_freeze() call,
> and svc_recv() already contains that at an appropriate place.
>
> Signed-off-by: NeilBrown <[email protected]>
> ---
> fs/nfs/callback.c | 46 ++---------------------------------
> include/linux/sunrpc/svc.h | 6 +++--
> net/sunrpc/backchannel_rqst.c | 8 ++----
> net/sunrpc/svc.c | 2 +-
> net/sunrpc/svc_xprt.c | 48 ++++++++++++++++++++++++++++++++++++-
> net/sunrpc/xprtrdma/backchannel.c | 2 +-
> 6 files changed, 58 insertions(+), 54 deletions(-)
>
> diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
> index c47834970224..660cec36c45c 100644
> --- a/fs/nfs/callback.c
> +++ b/fs/nfs/callback.c
> @@ -78,7 +78,7 @@ nfs4_callback_svc(void *vrqstp)
>
> set_freezable();
>
> - while (!kthread_freezable_should_stop(NULL)) {
> + while (!kthread_should_stop()) {
> /*
> * Listen for a request on the socket
> */
> @@ -90,45 +90,6 @@ nfs4_callback_svc(void *vrqstp)
> }
>
> #if defined(CONFIG_NFS_V4_1)
> -/*
> - * The callback service for NFSv4.1 callbacks
> - */
> -static int
> -nfs41_callback_svc(void *vrqstp)
> -{
> - struct svc_rqst *rqstp = vrqstp;
> - struct svc_serv *serv = rqstp->rq_server;
> - struct rpc_rqst *req;
> - int error;
> - DEFINE_WAIT(wq);
> -
> - set_freezable();
> -
> - while (!kthread_freezable_should_stop(NULL)) {
> - prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE);
> - spin_lock_bh(&serv->sv_cb_lock);
> - if (!list_empty(&serv->sv_cb_list)) {
> - req = list_first_entry(&serv->sv_cb_list,
> - struct rpc_rqst, rq_bc_list);
> - list_del(&req->rq_bc_list);
> - spin_unlock_bh(&serv->sv_cb_lock);
> - finish_wait(&serv->sv_cb_waitq, &wq);
> - dprintk("Invoking bc_svc_process()\n");
> - error = bc_svc_process(serv, req, rqstp);
> - dprintk("bc_svc_process() returned w/ error code= %d\n",
> - error);
> - } else {
> - spin_unlock_bh(&serv->sv_cb_lock);
> - if (!kthread_should_stop())
> - schedule();
> - finish_wait(&serv->sv_cb_waitq, &wq);
> - }
> - }
> -
> - svc_exit_thread(rqstp);
> - return 0;
> -}
> -
> static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
> struct svc_serv *serv)
> {
> @@ -241,10 +202,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion)
> cb_info->users);
>
> threadfn = nfs4_callback_svc;
> -#if defined(CONFIG_NFS_V4_1)
> - if (minorversion)
> - threadfn = nfs41_callback_svc;
> -#else
> +#if !defined(CONFIG_NFS_V4_1)
> if (minorversion)
> return ERR_PTR(-ENOTSUPP);
> #endif
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 83f31a09c853..15d468d852b5 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -92,8 +92,6 @@ struct svc_serv {
> * that arrive over the same
> * connection */
> spinlock_t sv_cb_lock; /* protects the svc_cb_list */
> - wait_queue_head_t sv_cb_waitq; /* sleep here if there are no
> - * entries in the svc_cb_list */
> bool sv_bc_enabled; /* service uses backchannel */
> #endif /* CONFIG_SUNRPC_BACKCHANNEL */
> };
> @@ -202,6 +200,10 @@ struct svc_rqst {
> struct rcu_head rq_rcu_head; /* for RCU deferred kfree */
> struct svc_xprt * rq_xprt; /* transport ptr */
>
> +#if defined(CONFIG_SUNRPC_BACKCHANNEL)
> + struct rpc_rqst *rq_cb; /* callback to be handled */
> +#endif
> +
> struct sockaddr_storage rq_addr; /* peer address */
> size_t rq_addrlen;
> struct sockaddr_storage rq_daddr; /* dest addr of request
> diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
> index 65a6c6429a53..60b8d310bb27 100644
> --- a/net/sunrpc/backchannel_rqst.c
> +++ b/net/sunrpc/backchannel_rqst.c
> @@ -349,10 +349,8 @@ struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid)
> }
>
> /*
> - * Add callback request to callback list. The callback
> - * service sleeps on the sv_cb_waitq waiting for new
> - * requests. Wake it up after adding enqueing the
> - * request.
> + * Add callback request to callback list. Wake a thread
> + * on the first pool (usually the only pool) to handle it.
> */
> void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
> {
> @@ -371,6 +369,6 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
> xprt_get(xprt);
> spin_lock(&bc_serv->sv_cb_lock);
> list_add(&req->rq_bc_list, &bc_serv->sv_cb_list);
> - wake_up(&bc_serv->sv_cb_waitq);
> spin_unlock(&bc_serv->sv_cb_lock);
> + svc_pool_wake_idle_thread(bc_serv, &bc_serv->sv_pools[0]);
> }
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index 170eabc03988..56b4a88776d5 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -440,7 +440,6 @@ __svc_init_bc(struct svc_serv *serv)
> {
> INIT_LIST_HEAD(&serv->sv_cb_list);
> spin_lock_init(&serv->sv_cb_lock);
> - init_waitqueue_head(&serv->sv_cb_waitq);
> }
> #else
> static void
> @@ -724,6 +723,7 @@ struct svc_rqst *svc_pool_wake_idle_thread(struct svc_serv *serv,
> percpu_counter_inc(&pool->sp_threads_starved);
> return NULL;
> }
> +EXPORT_SYMBOL_GPL(svc_pool_wake_idle_thread);
>
> static struct svc_pool *
> svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index c7095ff7d5fd..4fdf1aaa514b 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -17,6 +17,7 @@
> #include <linux/sunrpc/svc_xprt.h>
> #include <linux/sunrpc/svcsock.h>
> #include <linux/sunrpc/xprt.h>
> +#include <linux/sunrpc/bc_xprt.h>
> #include <linux/module.h>
> #include <linux/netdevice.h>
> #include <trace/events/sunrpc.h>
> @@ -732,6 +733,13 @@ rqst_should_sleep(struct svc_rqst *rqstp)
> if (freezing(current))
> return false;
>
> +#if defined(CONFIG_SUNRPC_BACKCHANNEL)
> + if (svc_is_backchannel(rqstp)) {
> + if (!list_empty(&rqstp->rq_server->sv_cb_list))
> + return false;
> + }
> +#endif
> +
> return true;
> }
>
> @@ -754,12 +762,31 @@ static void svc_wait_for_work(struct svc_rqst *rqstp)
>
> set_bit(RQ_BUSY, &rqstp->rq_flags);
> smp_mb__after_atomic();
> +
> rqstp->rq_xprt = svc_xprt_dequeue(pool);
> if (rqstp->rq_xprt) {
> trace_svc_pool_awoken(rqstp);
> goto out_found;
> }
>
> +#if defined(CONFIG_SUNRPC_BACKCHANNEL)
> + if (svc_is_backchannel(rqstp)) {
> + struct svc_serv *serv = rqstp->rq_server;
> + struct rpc_rqst *req;
> +
> + spin_lock_bh(&serv->sv_cb_lock);
> + req = list_first_entry_or_null(&serv->sv_cb_list,
> + struct rpc_rqst, rq_bc_list);
> + if (req) {
> + list_del(&req->rq_bc_list);
> + rqstp->rq_cb = req;
> + }
> + spin_unlock_bh(&serv->sv_cb_lock);
> + if (rqstp->rq_cb)
> + goto out_found;
> + }
> +#endif
> +
> if (!kthread_should_stop())
> percpu_counter_inc(&pool->sp_threads_no_work);
> return;
> @@ -853,7 +880,7 @@ void svc_recv(struct svc_rqst *rqstp)
> svc_wait_for_work(rqstp);
>
> if (rqstp->rq_xprt) {
> - struct svc_serv *serv = rqstp->rq_server;
> + struct svc_serv *serv = rqstp->rq_server;
> struct svc_xprt *xprt = rqstp->rq_xprt;
> int len;
>
> @@ -878,6 +905,25 @@ void svc_recv(struct svc_rqst *rqstp)
> svc_process(rqstp);
> }
> }
> +#if defined(CONFIG_SUNRPC_BACKCHANNEL)
> + if (svc_is_backchannel(rqstp)) {
> + struct rpc_rqst *cb;
> + int error;
> +
> + if (!rqstp->rq_cb)
> + return;
> +
> + cb = rqstp->rq_cb;
> + rqstp->rq_cb = NULL;
> +
> + dprintk("Invoking bc_svc_process()\n");
> + error = bc_svc_process(rqstp->rq_server, cb, rqstp);
> + dprintk("bc_svc_process() returned w/ error code= %d\n",
> + error);
> + return;
> + }
> +#endif
> +
> }
> EXPORT_SYMBOL_GPL(svc_recv);
>
> diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
> index e4d84a13c566..f1e1d4909434 100644
> --- a/net/sunrpc/xprtrdma/backchannel.c
> +++ b/net/sunrpc/xprtrdma/backchannel.c
> @@ -267,7 +267,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
> list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list);
> spin_unlock(&bc_serv->sv_cb_lock);
>
> - wake_up(&bc_serv->sv_cb_waitq);
> + svc_pool_wake_idle_thread(bc_serv, &bc_serv->sv_pools[0]);
>
> r_xprt->rx_stats.bcall_count++;
> return;
>
>
Nice cleanup.
Reviewed-by: Jeff Layton <[email protected]>
On Tue, 2023-07-18 at 16:38 +1000, NeilBrown wrote:
> svc threads are currently stopped using kthread_stop(). This requires
> identifying a specific thread. However we don't care which thread
> stops, just as long as one does.
>
> So instead, set a flag in the svc_pool to say that a thread needs to
> die, and have each thread check this flag instead of calling
> kthread_should_stop(). The first to find it clear the flag and moves
> towards shutting down.
>
> This removes an explicit dependency on sp_all_threads which will make a
> future patch simpler.
>
> Signed-off-by: NeilBrown <[email protected]>
> ---
> fs/lockd/svc.c | 4 ++--
> fs/lockd/svclock.c | 4 ++--
> fs/nfs/callback.c | 2 +-
> fs/nfsd/nfs4proc.c | 8 +++++---
> fs/nfsd/nfssvc.c | 2 +-
> include/linux/lockd/lockd.h | 2 +-
> include/linux/sunrpc/svc.h | 22 +++++++++++++++++++++-
> include/trace/events/sunrpc.h | 5 +++--
> net/sunrpc/svc.c | 39 +++++++++++++++++----------------------
> net/sunrpc/svc_xprt.c | 8 +++-----
> 10 files changed, 56 insertions(+), 40 deletions(-)
>
> diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
> index 4f55cd42c2e6..30543edd2309 100644
> --- a/fs/lockd/svc.c
> +++ b/fs/lockd/svc.c
> @@ -134,12 +134,12 @@ lockd(void *vrqstp)
> * The main request loop. We don't terminate until the last
> * NFS mount or NFS daemon has gone away.
> */
> - while (!kthread_should_stop()) {
> + while (!svc_thread_should_stop(rqstp)) {
>
> /* update sv_maxconn if it has changed */
> rqstp->rq_server->sv_maxconn = nlm_max_connections;
>
> - nlmsvc_retry_blocked();
> + nlmsvc_retry_blocked(rqstp);
>
> /*
> * Find any work to do, such as a socket with data available,
> diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
> index 3d7bd5c04b36..fd399c9bea5c 100644
> --- a/fs/lockd/svclock.c
> +++ b/fs/lockd/svclock.c
> @@ -1009,13 +1009,13 @@ retry_deferred_block(struct nlm_block *block)
> * be retransmitted.
> */
> void
> -nlmsvc_retry_blocked(void)
> +nlmsvc_retry_blocked(struct svc_rqst *rqstp)
> {
> unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
> struct nlm_block *block;
>
> spin_lock(&nlm_blocked_lock);
> - while (!list_empty(&nlm_blocked) && !kthread_should_stop()) {
> + while (!list_empty(&nlm_blocked) && !svc_thread_should_stop(rqstp)) {
> block = list_entry(nlm_blocked.next, struct nlm_block, b_list);
>
> if (block->b_when == NLM_NEVER)
> diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
> index 660cec36c45c..c58ec2e66aaf 100644
> --- a/fs/nfs/callback.c
> +++ b/fs/nfs/callback.c
> @@ -78,7 +78,7 @@ nfs4_callback_svc(void *vrqstp)
>
> set_freezable();
>
> - while (!kthread_should_stop()) {
> + while (!svc_thread_should_stop(rqstp)) {
> /*
> * Listen for a request on the socket
> */
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 157488290676..66024ed06181 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -1306,7 +1306,8 @@ extern void nfs_sb_deactive(struct super_block *sb);
> * setup a work entry in the ssc delayed unmount list.
> */
> static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
> - struct nfsd4_ssc_umount_item **nsui)
> + struct nfsd4_ssc_umount_item **nsui,
> + struct svc_rqst *rqstp)
> {
> struct nfsd4_ssc_umount_item *ni = NULL;
> struct nfsd4_ssc_umount_item *work = NULL;
> @@ -1329,7 +1330,7 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
> spin_unlock(&nn->nfsd_ssc_lock);
>
> /* allow 20secs for mount/unmount for now - revisit */
> - if (kthread_should_stop() ||
> + if (svc_thread_should_stop(rqstp) ||
> (schedule_timeout(20*HZ) == 0)) {
> finish_wait(&nn->nfsd_ssc_waitq, &wait);
> kfree(work);
> @@ -1445,7 +1446,7 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
> goto out_free_rawdata;
> snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
>
> - status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui);
> + status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui, rqstp);
> if (status)
> goto out_free_devname;
> if ((*nsui)->nsui_vfsmount)
> @@ -1620,6 +1621,7 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy,
> if (bytes_total == 0)
> bytes_total = ULLONG_MAX;
> do {
> + /* Only async copies can be stopped here */
> if (kthread_should_stop())
> break;
> bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos,
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index b536b254c59e..9b282c89ea87 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -971,7 +971,7 @@ nfsd(void *vrqstp)
> /*
> * The main request loop
> */
> - while (!kthread_should_stop()) {
> + while (!svc_thread_should_stop(rqstp)) {
> /* Update sv_maxconn if it has changed */
> rqstp->rq_server->sv_maxconn = nn->max_connections;
>
> diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
> index 0f016d69c996..9f565416d186 100644
> --- a/include/linux/lockd/lockd.h
> +++ b/include/linux/lockd/lockd.h
> @@ -282,7 +282,7 @@ __be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
> struct nlm_host *, struct nlm_lock *,
> struct nlm_lock *, struct nlm_cookie *);
> __be32 nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
> -void nlmsvc_retry_blocked(void);
> +void nlmsvc_retry_blocked(struct svc_rqst *rqstp);
> void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
> nlm_host_match_fn_t match);
> void nlmsvc_grant_reply(struct nlm_cookie *, __be32);
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 15d468d852b5..ea3ce1315416 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -51,6 +51,8 @@ struct svc_pool {
> enum {
> SP_TASK_PENDING, /* still work to do even if no xprt is queued */
> SP_CONGESTED, /* all threads are busy, none idle */
> + SP_NEED_VICTIM, /* One thread needs to agree to exit */
> + SP_VICTIM_REMAINS, /* One thread needs to actually exit */
> };
>
> /*
> @@ -275,7 +277,7 @@ enum {
> RQ_DROPME, /* drop current reply */
> RQ_SPLICE_OK, /* turned off in gss privacy to prevent encrypting page
> * cache pages */
> - RQ_VICTIM, /* about to be shut down */
> + RQ_VICTIM, /* Have agreed to shut down */
> RQ_BUSY, /* request is busy */
> RQ_DATA, /* request has data */
> };
> @@ -315,6 +317,24 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst)
> return (struct sockaddr *) &rqst->rq_daddr;
> }
>
> +/**
> + * svc_thread_should_stop - check if this thread should stop
> + * @rqstp: the thread that might need to stop
> + *
> + * To stop an svc thread, the pool flags SP_NEED_VICTIM and SP_VICTIM_REMAINS
> + * are set. The firs thread which sees SP_NEED_VICTIM clear it becoming
> + * the victim using this function. It should then promptly call
> + * svc_exit_thread() which completes the process, clearing SP_VICTIM_REMAINS
> + * so the task waiting for a thread to exit can wake and continue.
> + */
> +static inline bool svc_thread_should_stop(struct svc_rqst *rqstp)
> +{
> + if (test_and_clear_bit(SP_NEED_VICTIM, &rqstp->rq_pool->sp_flags))
> + set_bit(RQ_VICTIM, &rqstp->rq_flags);
> +
> + return test_bit(RQ_VICTIM, &rqstp->rq_flags);
> +}
> +
> struct svc_deferred_req {
> u32 prot; /* protocol (UDP or TCP) */
> struct svc_xprt *xprt;
> diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
> index 60c8e03268d4..c79375e37dc2 100644
> --- a/include/trace/events/sunrpc.h
> +++ b/include/trace/events/sunrpc.h
> @@ -2041,8 +2041,9 @@ TRACE_EVENT(svc_xprt_enqueue,
> #define show_svc_pool_flags(x) \
> __print_flags(x, "|", \
> { BIT(SP_TASK_PENDING), "TASK_PENDING" }, \
> - { BIT(SP_CONGESTED), "CONGESTED" })
> -
> + { BIT(SP_CONGESTED), "CONGESTED" }, \
> + { BIT(SP_NEED_VICTIM), "NEED_VICTIM" }, \
> + { BIT(SP_VICTIM_REMAINS), "VICTIM_REMAINS" })
> DECLARE_EVENT_CLASS(svc_pool_scheduler_class,
> TP_PROTO(
> const struct svc_rqst *rqstp
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index 56b4a88776d5..b18175ef74ec 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -731,19 +731,22 @@ svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
> return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools];
> }
>
> -static struct task_struct *
> +static struct svc_pool *
> svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
> {
> unsigned int i;
> - struct task_struct *task = NULL;
>
> if (pool != NULL) {
> spin_lock_bh(&pool->sp_lock);
> + if (pool->sp_nrthreads)
> + goto found_pool;
> + spin_unlock_bh(&pool->sp_lock);
> + return NULL;
> } else {
> for (i = 0; i < serv->sv_nrpools; i++) {
> pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
> spin_lock_bh(&pool->sp_lock);
> - if (!list_empty(&pool->sp_all_threads))
> + if (pool->sp_nrthreads)
> goto found_pool;
> spin_unlock_bh(&pool->sp_lock);
> }
> @@ -751,16 +754,10 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *stat
> }
>
> found_pool:
> - if (!list_empty(&pool->sp_all_threads)) {
> - struct svc_rqst *rqstp;
> -
> - rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
> - set_bit(RQ_VICTIM, &rqstp->rq_flags);
> - list_del_rcu(&rqstp->rq_all);
> - task = rqstp->rq_task;
> - }
> + set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> + set_bit(SP_NEED_VICTIM, &pool->sp_flags);
> spin_unlock_bh(&pool->sp_lock);
> - return task;
> + return pool;
> }
>
> static int
> @@ -801,18 +798,16 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
> static int
> svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
> {
> - struct svc_rqst *rqstp;
> - struct task_struct *task;
> unsigned int state = serv->sv_nrthreads-1;
> + struct svc_pool *vpool;
>
> do {
> - task = svc_pool_victim(serv, pool, &state);
> - if (task == NULL)
> + vpool = svc_pool_victim(serv, pool, &state);
> + if (!vpool)
> break;
> - rqstp = kthread_data(task);
> - /* Did we lose a race to svo_function threadfn? */
> - if (kthread_stop(task) == -EINTR)
> - svc_exit_thread(rqstp);
> + svc_pool_wake_idle_thread(serv, vpool);
> + wait_on_bit(&vpool->sp_flags, SP_VICTIM_REMAINS,
> + TASK_IDLE);
I'm not sure about this bit. Previously (AFAICT) we didn't shut down the
threads serially. With this change, we will be. Granted we only have to
wait until SP_VICTIM_REMAINS is clear. Does that happen early enough in
the shutdown process that you aren't worried about this?
> nrservs++;
> } while (nrservs < 0);
> return 0;
> @@ -932,8 +927,6 @@ svc_exit_thread(struct svc_rqst *rqstp)
>
> spin_lock_bh(&pool->sp_lock);
> pool->sp_nrthreads--;
> - if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags))
> - list_del_rcu(&rqstp->rq_all);
> spin_unlock_bh(&pool->sp_lock);
>
> spin_lock_bh(&serv->sv_lock);
> @@ -941,6 +934,8 @@ svc_exit_thread(struct svc_rqst *rqstp)
> spin_unlock_bh(&serv->sv_lock);
> svc_sock_update_bufs(serv);
>
> + if (svc_thread_should_stop(rqstp))
> + clear_and_wake_up_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> svc_rqst_free(rqstp);
>
> svc_put(serv);
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index 4fdf1aaa514b..948605e7043b 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -595,8 +595,6 @@ void svc_wake_up(struct svc_serv *serv)
> smp_wmb();
> return;
> }
> -
> - trace_svc_wake_up(rqstp);
> }
> EXPORT_SYMBOL_GPL(svc_wake_up);
>
> @@ -688,7 +686,7 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp)
> continue;
>
> set_current_state(TASK_IDLE);
> - if (kthread_should_stop()) {
> + if (svc_thread_should_stop(rqstp)) {
> set_current_state(TASK_RUNNING);
> return false;
> }
> @@ -726,7 +724,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
> return false;
>
> /* are we shutting down? */
> - if (kthread_should_stop())
> + if (svc_thread_should_stop(rqstp))
> return false;
>
> /* are we freezing? */
> @@ -787,7 +785,7 @@ static void svc_wait_for_work(struct svc_rqst *rqstp)
> }
> #endif
>
> - if (!kthread_should_stop())
> + if (!svc_thread_should_stop(rqstp))
> percpu_counter_inc(&pool->sp_threads_no_work);
> return;
> out_found:
>
>
--
Jeff Layton <[email protected]>
On Fri, 21 Jul 2023, Jeff Layton wrote:
> On Tue, 2023-07-18 at 16:38 +1000, NeilBrown wrote:
> > svc threads are currently stopped using kthread_stop(). This requires
> > identifying a specific thread. However we don't care which thread
> > stops, just as long as one does.
> >
> > So instead, set a flag in the svc_pool to say that a thread needs to
> > die, and have each thread check this flag instead of calling
> > kthread_should_stop(). The first to find it clear the flag and moves
> > towards shutting down.
> >
> > This removes an explicit dependency on sp_all_threads which will make a
> > future patch simpler.
> >
> > Signed-off-by: NeilBrown <[email protected]>
> > ---
> > fs/lockd/svc.c | 4 ++--
> > fs/lockd/svclock.c | 4 ++--
> > fs/nfs/callback.c | 2 +-
> > fs/nfsd/nfs4proc.c | 8 +++++---
> > fs/nfsd/nfssvc.c | 2 +-
> > include/linux/lockd/lockd.h | 2 +-
> > include/linux/sunrpc/svc.h | 22 +++++++++++++++++++++-
> > include/trace/events/sunrpc.h | 5 +++--
> > net/sunrpc/svc.c | 39 +++++++++++++++++----------------------
> > net/sunrpc/svc_xprt.c | 8 +++-----
> > 10 files changed, 56 insertions(+), 40 deletions(-)
> >
> > diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
> > index 4f55cd42c2e6..30543edd2309 100644
> > --- a/fs/lockd/svc.c
> > +++ b/fs/lockd/svc.c
> > @@ -134,12 +134,12 @@ lockd(void *vrqstp)
> > * The main request loop. We don't terminate until the last
> > * NFS mount or NFS daemon has gone away.
> > */
> > - while (!kthread_should_stop()) {
> > + while (!svc_thread_should_stop(rqstp)) {
> >
> > /* update sv_maxconn if it has changed */
> > rqstp->rq_server->sv_maxconn = nlm_max_connections;
> >
> > - nlmsvc_retry_blocked();
> > + nlmsvc_retry_blocked(rqstp);
> >
> > /*
> > * Find any work to do, such as a socket with data available,
> > diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
> > index 3d7bd5c04b36..fd399c9bea5c 100644
> > --- a/fs/lockd/svclock.c
> > +++ b/fs/lockd/svclock.c
> > @@ -1009,13 +1009,13 @@ retry_deferred_block(struct nlm_block *block)
> > * be retransmitted.
> > */
> > void
> > -nlmsvc_retry_blocked(void)
> > +nlmsvc_retry_blocked(struct svc_rqst *rqstp)
> > {
> > unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
> > struct nlm_block *block;
> >
> > spin_lock(&nlm_blocked_lock);
> > - while (!list_empty(&nlm_blocked) && !kthread_should_stop()) {
> > + while (!list_empty(&nlm_blocked) && !svc_thread_should_stop(rqstp)) {
> > block = list_entry(nlm_blocked.next, struct nlm_block, b_list);
> >
> > if (block->b_when == NLM_NEVER)
> > diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
> > index 660cec36c45c..c58ec2e66aaf 100644
> > --- a/fs/nfs/callback.c
> > +++ b/fs/nfs/callback.c
> > @@ -78,7 +78,7 @@ nfs4_callback_svc(void *vrqstp)
> >
> > set_freezable();
> >
> > - while (!kthread_should_stop()) {
> > + while (!svc_thread_should_stop(rqstp)) {
> > /*
> > * Listen for a request on the socket
> > */
> > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > index 157488290676..66024ed06181 100644
> > --- a/fs/nfsd/nfs4proc.c
> > +++ b/fs/nfsd/nfs4proc.c
> > @@ -1306,7 +1306,8 @@ extern void nfs_sb_deactive(struct super_block *sb);
> > * setup a work entry in the ssc delayed unmount list.
> > */
> > static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
> > - struct nfsd4_ssc_umount_item **nsui)
> > + struct nfsd4_ssc_umount_item **nsui,
> > + struct svc_rqst *rqstp)
> > {
> > struct nfsd4_ssc_umount_item *ni = NULL;
> > struct nfsd4_ssc_umount_item *work = NULL;
> > @@ -1329,7 +1330,7 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr,
> > spin_unlock(&nn->nfsd_ssc_lock);
> >
> > /* allow 20secs for mount/unmount for now - revisit */
> > - if (kthread_should_stop() ||
> > + if (svc_thread_should_stop(rqstp) ||
> > (schedule_timeout(20*HZ) == 0)) {
> > finish_wait(&nn->nfsd_ssc_waitq, &wait);
> > kfree(work);
> > @@ -1445,7 +1446,7 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp,
> > goto out_free_rawdata;
> > snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep);
> >
> > - status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui);
> > + status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui, rqstp);
> > if (status)
> > goto out_free_devname;
> > if ((*nsui)->nsui_vfsmount)
> > @@ -1620,6 +1621,7 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy,
> > if (bytes_total == 0)
> > bytes_total = ULLONG_MAX;
> > do {
> > + /* Only async copies can be stopped here */
> > if (kthread_should_stop())
> > break;
> > bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos,
> > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > index b536b254c59e..9b282c89ea87 100644
> > --- a/fs/nfsd/nfssvc.c
> > +++ b/fs/nfsd/nfssvc.c
> > @@ -971,7 +971,7 @@ nfsd(void *vrqstp)
> > /*
> > * The main request loop
> > */
> > - while (!kthread_should_stop()) {
> > + while (!svc_thread_should_stop(rqstp)) {
> > /* Update sv_maxconn if it has changed */
> > rqstp->rq_server->sv_maxconn = nn->max_connections;
> >
> > diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
> > index 0f016d69c996..9f565416d186 100644
> > --- a/include/linux/lockd/lockd.h
> > +++ b/include/linux/lockd/lockd.h
> > @@ -282,7 +282,7 @@ __be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
> > struct nlm_host *, struct nlm_lock *,
> > struct nlm_lock *, struct nlm_cookie *);
> > __be32 nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
> > -void nlmsvc_retry_blocked(void);
> > +void nlmsvc_retry_blocked(struct svc_rqst *rqstp);
> > void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
> > nlm_host_match_fn_t match);
> > void nlmsvc_grant_reply(struct nlm_cookie *, __be32);
> > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> > index 15d468d852b5..ea3ce1315416 100644
> > --- a/include/linux/sunrpc/svc.h
> > +++ b/include/linux/sunrpc/svc.h
> > @@ -51,6 +51,8 @@ struct svc_pool {
> > enum {
> > SP_TASK_PENDING, /* still work to do even if no xprt is queued */
> > SP_CONGESTED, /* all threads are busy, none idle */
> > + SP_NEED_VICTIM, /* One thread needs to agree to exit */
> > + SP_VICTIM_REMAINS, /* One thread needs to actually exit */
> > };
> >
> > /*
> > @@ -275,7 +277,7 @@ enum {
> > RQ_DROPME, /* drop current reply */
> > RQ_SPLICE_OK, /* turned off in gss privacy to prevent encrypting page
> > * cache pages */
> > - RQ_VICTIM, /* about to be shut down */
> > + RQ_VICTIM, /* Have agreed to shut down */
> > RQ_BUSY, /* request is busy */
> > RQ_DATA, /* request has data */
> > };
> > @@ -315,6 +317,24 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst)
> > return (struct sockaddr *) &rqst->rq_daddr;
> > }
> >
> > +/**
> > + * svc_thread_should_stop - check if this thread should stop
> > + * @rqstp: the thread that might need to stop
> > + *
> > + * To stop an svc thread, the pool flags SP_NEED_VICTIM and SP_VICTIM_REMAINS
> > + * are set. The firs thread which sees SP_NEED_VICTIM clear it becoming
> > + * the victim using this function. It should then promptly call
> > + * svc_exit_thread() which completes the process, clearing SP_VICTIM_REMAINS
> > + * so the task waiting for a thread to exit can wake and continue.
> > + */
> > +static inline bool svc_thread_should_stop(struct svc_rqst *rqstp)
> > +{
> > + if (test_and_clear_bit(SP_NEED_VICTIM, &rqstp->rq_pool->sp_flags))
> > + set_bit(RQ_VICTIM, &rqstp->rq_flags);
> > +
> > + return test_bit(RQ_VICTIM, &rqstp->rq_flags);
> > +}
> > +
> > struct svc_deferred_req {
> > u32 prot; /* protocol (UDP or TCP) */
> > struct svc_xprt *xprt;
> > diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
> > index 60c8e03268d4..c79375e37dc2 100644
> > --- a/include/trace/events/sunrpc.h
> > +++ b/include/trace/events/sunrpc.h
> > @@ -2041,8 +2041,9 @@ TRACE_EVENT(svc_xprt_enqueue,
> > #define show_svc_pool_flags(x) \
> > __print_flags(x, "|", \
> > { BIT(SP_TASK_PENDING), "TASK_PENDING" }, \
> > - { BIT(SP_CONGESTED), "CONGESTED" })
> > -
> > + { BIT(SP_CONGESTED), "CONGESTED" }, \
> > + { BIT(SP_NEED_VICTIM), "NEED_VICTIM" }, \
> > + { BIT(SP_VICTIM_REMAINS), "VICTIM_REMAINS" })
> > DECLARE_EVENT_CLASS(svc_pool_scheduler_class,
> > TP_PROTO(
> > const struct svc_rqst *rqstp
> > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> > index 56b4a88776d5..b18175ef74ec 100644
> > --- a/net/sunrpc/svc.c
> > +++ b/net/sunrpc/svc.c
> > @@ -731,19 +731,22 @@ svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
> > return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools];
> > }
> >
> > -static struct task_struct *
> > +static struct svc_pool *
> > svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
> > {
> > unsigned int i;
> > - struct task_struct *task = NULL;
> >
> > if (pool != NULL) {
> > spin_lock_bh(&pool->sp_lock);
> > + if (pool->sp_nrthreads)
> > + goto found_pool;
> > + spin_unlock_bh(&pool->sp_lock);
> > + return NULL;
> > } else {
> > for (i = 0; i < serv->sv_nrpools; i++) {
> > pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
> > spin_lock_bh(&pool->sp_lock);
> > - if (!list_empty(&pool->sp_all_threads))
> > + if (pool->sp_nrthreads)
> > goto found_pool;
> > spin_unlock_bh(&pool->sp_lock);
> > }
> > @@ -751,16 +754,10 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *stat
> > }
> >
> > found_pool:
> > - if (!list_empty(&pool->sp_all_threads)) {
> > - struct svc_rqst *rqstp;
> > -
> > - rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
> > - set_bit(RQ_VICTIM, &rqstp->rq_flags);
> > - list_del_rcu(&rqstp->rq_all);
> > - task = rqstp->rq_task;
> > - }
> > + set_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> > + set_bit(SP_NEED_VICTIM, &pool->sp_flags);
> > spin_unlock_bh(&pool->sp_lock);
> > - return task;
> > + return pool;
> > }
> >
> > static int
> > @@ -801,18 +798,16 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
> > static int
> > svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
> > {
> > - struct svc_rqst *rqstp;
> > - struct task_struct *task;
> > unsigned int state = serv->sv_nrthreads-1;
> > + struct svc_pool *vpool;
> >
> > do {
> > - task = svc_pool_victim(serv, pool, &state);
> > - if (task == NULL)
> > + vpool = svc_pool_victim(serv, pool, &state);
> > + if (!vpool)
> > break;
> > - rqstp = kthread_data(task);
> > - /* Did we lose a race to svo_function threadfn? */
> > - if (kthread_stop(task) == -EINTR)
> > - svc_exit_thread(rqstp);
> > + svc_pool_wake_idle_thread(serv, vpool);
> > + wait_on_bit(&vpool->sp_flags, SP_VICTIM_REMAINS,
> > + TASK_IDLE);
>
> I'm not sure about this bit. Previously (AFAICT) we didn't shut down the
> threads serially. With this change, we will be. Granted we only have to
> wait until SP_VICTIM_REMAINS is clear. Does that happen early enough in
> the shutdown process that you aren't worried about this?
If by "previously" you mean "before v5.17-rc1~75^2~42", then you are
correct.
When we used send_sig to stop nfsd threads we sent the signals in a
tight loop and didn't wait.
Since then we use kthread_stop() which is synchronous. It sets a flag,
wakes the threads, and waits on a completion.
With this patch the clear_and_wake_bit() happens very slightly
*before* the complete() which we currently wait for rather late in
exit() handling (complete_vfork_done() I think).
Thanks for the review - here and elsewhere.
NeilBrown
>
> > nrservs++;
> > } while (nrservs < 0);
> > return 0;
> > @@ -932,8 +927,6 @@ svc_exit_thread(struct svc_rqst *rqstp)
> >
> > spin_lock_bh(&pool->sp_lock);
> > pool->sp_nrthreads--;
> > - if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags))
> > - list_del_rcu(&rqstp->rq_all);
> > spin_unlock_bh(&pool->sp_lock);
> >
> > spin_lock_bh(&serv->sv_lock);
> > @@ -941,6 +934,8 @@ svc_exit_thread(struct svc_rqst *rqstp)
> > spin_unlock_bh(&serv->sv_lock);
> > svc_sock_update_bufs(serv);
> >
> > + if (svc_thread_should_stop(rqstp))
> > + clear_and_wake_up_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
> > svc_rqst_free(rqstp);
> >
> > svc_put(serv);
> > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> > index 4fdf1aaa514b..948605e7043b 100644
> > --- a/net/sunrpc/svc_xprt.c
> > +++ b/net/sunrpc/svc_xprt.c
> > @@ -595,8 +595,6 @@ void svc_wake_up(struct svc_serv *serv)
> > smp_wmb();
> > return;
> > }
> > -
> > - trace_svc_wake_up(rqstp);
> > }
> > EXPORT_SYMBOL_GPL(svc_wake_up);
> >
> > @@ -688,7 +686,7 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp)
> > continue;
> >
> > set_current_state(TASK_IDLE);
> > - if (kthread_should_stop()) {
> > + if (svc_thread_should_stop(rqstp)) {
> > set_current_state(TASK_RUNNING);
> > return false;
> > }
> > @@ -726,7 +724,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
> > return false;
> >
> > /* are we shutting down? */
> > - if (kthread_should_stop())
> > + if (svc_thread_should_stop(rqstp))
> > return false;
> >
> > /* are we freezing? */
> > @@ -787,7 +785,7 @@ static void svc_wait_for_work(struct svc_rqst *rqstp)
> > }
> > #endif
> >
> > - if (!kthread_should_stop())
> > + if (!svc_thread_should_stop(rqstp))
> > percpu_counter_inc(&pool->sp_threads_no_work);
> > return;
> > out_found:
> >
> >
>
> --
> Jeff Layton <[email protected]>
>
On Fri, 21 Jul 2023, Chuck Lever III wrote:
>
> > On Jul 19, 2023, at 7:44 PM, Chuck Lever III <[email protected]> wrote:
> >
> >
> >
> >> On Jul 19, 2023, at 7:20 PM, NeilBrown <[email protected]> wrote:
> >>
> >> On Wed, 19 Jul 2023, Chuck Lever III wrote:
> >>>
> >>>> On Jul 18, 2023, at 9:16 PM, NeilBrown <[email protected]> wrote:
> >>>>
> >>>> On Tue, 18 Jul 2023, Chuck Lever wrote:
> >>>>> On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
> >>>>>> No callers of svc_pool_wake_idle_thread() care which thread was woken -
> >>>>>> except one that wants to trace the wakeup. For now we drop that
> >>>>>> tracepoint.
> >>>>>
> >>>>> That's an important tracepoint, IMO.
> >>>>>
> >>>>> It might be better to have svc_pool_wake_idle_thread() return void
> >>>>> right from it's introduction, and move the tracepoint into that
> >>>>> function. I can do that and respin if you agree.
> >>>>
> >>>> Mostly I agree.
> >>>>
> >>>> It isn't clear to me how you would handle trace_svc_xprt_enqueue(),
> >>>> as there would be no code that can see both the trigger xprt, and the
> >>>> woken rqst.
> >>>>
> >>>> I also wonder if having the trace point when the wake-up is requested
> >>>> makes any sense, as there is no guarantee that thread with handle that
> >>>> xprt.
> >>>>
> >>>> Maybe the trace point should report when the xprt is dequeued. i.e.
> >>>> maybe trace_svc_pool_awoken() should report the pid, and we could have
> >>>> trace_svc_xprt_enqueue() only report the xprt, not the rqst.
> >>>
> >>> I'll come up with something that rearranges the tracepoints so that
> >>> svc_pool_wake_idle_thread() can return void.
> >>
> >> My current draft code has svc_pool_wake_idle_thread() returning bool -
> >> if it found something to wake up - purely for logging.
> >
> > This is also where I have ended up. I'll post an update probably tomorrow
> > my time. Too much other stuff going on to finish it today.
>
> Pushed to https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git
> in branch topic-sunrpc-thread-scheduling
>
Thanks.
One little thing to fix.
In
SUNRPC: Report when no service thread is available.
you now have
- return false;
+
+ trace_svc_pool_starved(serv, pool);
+ percpu_counter_inc(&pool->sp_threads_starved);
+ return NULL;
That should still be "return false" at the end;
NeilBrown
On Fri, 21 Jul 2023, Chuck Lever III wrote:
>
> > On Jul 19, 2023, at 7:44 PM, Chuck Lever III <[email protected]> wrote:
> >
> >
> >
> >> On Jul 19, 2023, at 7:20 PM, NeilBrown <[email protected]> wrote:
> >>
> >> On Wed, 19 Jul 2023, Chuck Lever III wrote:
> >>>
> >>>> On Jul 18, 2023, at 9:16 PM, NeilBrown <[email protected]> wrote:
> >>>>
> >>>> On Tue, 18 Jul 2023, Chuck Lever wrote:
> >>>>> On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
> >>>>>> No callers of svc_pool_wake_idle_thread() care which thread was woken -
> >>>>>> except one that wants to trace the wakeup. For now we drop that
> >>>>>> tracepoint.
> >>>>>
> >>>>> That's an important tracepoint, IMO.
> >>>>>
> >>>>> It might be better to have svc_pool_wake_idle_thread() return void
> >>>>> right from it's introduction, and move the tracepoint into that
> >>>>> function. I can do that and respin if you agree.
> >>>>
> >>>> Mostly I agree.
> >>>>
> >>>> It isn't clear to me how you would handle trace_svc_xprt_enqueue(),
> >>>> as there would be no code that can see both the trigger xprt, and the
> >>>> woken rqst.
> >>>>
> >>>> I also wonder if having the trace point when the wake-up is requested
> >>>> makes any sense, as there is no guarantee that thread with handle that
> >>>> xprt.
> >>>>
> >>>> Maybe the trace point should report when the xprt is dequeued. i.e.
> >>>> maybe trace_svc_pool_awoken() should report the pid, and we could have
> >>>> trace_svc_xprt_enqueue() only report the xprt, not the rqst.
> >>>
> >>> I'll come up with something that rearranges the tracepoints so that
> >>> svc_pool_wake_idle_thread() can return void.
> >>
> >> My current draft code has svc_pool_wake_idle_thread() returning bool -
> >> if it found something to wake up - purely for logging.
> >
> > This is also where I have ended up. I'll post an update probably tomorrow
> > my time. Too much other stuff going on to finish it today.
>
> Pushed to https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git
> in branch topic-sunrpc-thread-scheduling
Another thing.
You have made svc_pool_wake_idle_thread() return, but for different
reasons than me.
I wanted bool so I could trace a wake up due to enqueuing an xprt
differently from a wakeup due to a call to svc_wake_up(). I thought the
difference might be important.
You have it returning a bool so that:
- in one case you can set SP_CONGESTED - but that can be safely set
inside svc_pool_wake_idle_thread()
- in another case so SP_TASK_PENDING can be set. But I think it is
best to set that anyway, and clear it when svc_recv() wakes up.
So maybe it can return void after all.
Thanks,
NeilBrown
> On Jul 24, 2023, at 12:44 AM, NeilBrown <[email protected]> wrote:
>
> On Fri, 21 Jul 2023, Chuck Lever III wrote:
>>
>>> On Jul 19, 2023, at 7:44 PM, Chuck Lever III <[email protected]> wrote:
>>>
>>>
>>>
>>>> On Jul 19, 2023, at 7:20 PM, NeilBrown <[email protected]> wrote:
>>>>
>>>> On Wed, 19 Jul 2023, Chuck Lever III wrote:
>>>>>
>>>>>> On Jul 18, 2023, at 9:16 PM, NeilBrown <[email protected]> wrote:
>>>>>>
>>>>>> On Tue, 18 Jul 2023, Chuck Lever wrote:
>>>>>>> On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
>>>>>>>> No callers of svc_pool_wake_idle_thread() care which thread was woken -
>>>>>>>> except one that wants to trace the wakeup. For now we drop that
>>>>>>>> tracepoint.
>>>>>>>
>>>>>>> That's an important tracepoint, IMO.
>>>>>>>
>>>>>>> It might be better to have svc_pool_wake_idle_thread() return void
>>>>>>> right from it's introduction, and move the tracepoint into that
>>>>>>> function. I can do that and respin if you agree.
>>>>>>
>>>>>> Mostly I agree.
>>>>>>
>>>>>> It isn't clear to me how you would handle trace_svc_xprt_enqueue(),
>>>>>> as there would be no code that can see both the trigger xprt, and the
>>>>>> woken rqst.
>>>>>>
>>>>>> I also wonder if having the trace point when the wake-up is requested
>>>>>> makes any sense, as there is no guarantee that thread with handle that
>>>>>> xprt.
>>>>>>
>>>>>> Maybe the trace point should report when the xprt is dequeued. i.e.
>>>>>> maybe trace_svc_pool_awoken() should report the pid, and we could have
>>>>>> trace_svc_xprt_enqueue() only report the xprt, not the rqst.
>>>>>
>>>>> I'll come up with something that rearranges the tracepoints so that
>>>>> svc_pool_wake_idle_thread() can return void.
>>>>
>>>> My current draft code has svc_pool_wake_idle_thread() returning bool -
>>>> if it found something to wake up - purely for logging.
>>>
>>> This is also where I have ended up. I'll post an update probably tomorrow
>>> my time. Too much other stuff going on to finish it today.
>>
>> Pushed to https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git
>> in branch topic-sunrpc-thread-scheduling
>
> Another thing.
> You have made svc_pool_wake_idle_thread() return, but for different
> reasons than me.
>
> I wanted bool so I could trace a wake up due to enqueuing an xprt
> differently from a wakeup due to a call to svc_wake_up(). I thought the
> difference might be important.
>
> You have it returning a bool so that:
> - in one case you can set SP_CONGESTED - but that can be safely set
> inside svc_pool_wake_idle_thread()
So, set CONGESTED whenever an idle thread cannot be found, no matter
the caller.
> - in another case so SP_TASK_PENDING can be set. But I think it is
> best to set that anyway, and clear it when svc_recv() wakes up.
Maybe that should be done in a separate patch. Can you give it a try?
> So maybe it can return void after all.
--
Chuck Lever
On Tue, Jul 18, 2023 at 09:37:58AM -0400, Chuck Lever wrote:
> On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
> > When a sequence of numbers are needed for internal-use only, an enum is
> > typically best. The sequence will inevitably need to be changed one
> > day, and having an enum means the developer doesn't need to think about
> > renumbering after insertion or deletion. The patch will be easier to
> > review.
>
> Last sentence needs to define the antecedant of "The patch".
I've changed the last sentence in the description to "Such patches
will be easier ..."
I've applied 1/5 through 5/5, with a few cosmetic changes, to the
SUNRPC threads topic branch. 6/6 needed more work:
I've split this into one patch for each new enum.
The XPT_ flags change needed TRACE_DEFINE_ENUM macros to make
show_svc_xprt_flags() work properly. Added.
The new enum for SVC_GARBAGE and friends... those aren't bit flags.
So I've made that a named enum so that it can be used for type-
checking function return values properly.
I dropped the hunk that changes XPRT_SOCK_CONNECTING and friends.
That should be submitted separately to Anna and Trond.
All this will appear in the nfsd repo later today.
> > Signed-off-by: NeilBrown <[email protected]>
> > ---
> > include/linux/sunrpc/cache.h | 11 +++++++----
> > include/linux/sunrpc/svc.h | 34 ++++++++++++++++++++--------------
> > include/linux/sunrpc/svc_xprt.h | 39 +++++++++++++++++++++------------------
> > include/linux/sunrpc/svcauth.h | 29 ++++++++++++++++-------------
> > include/linux/sunrpc/xprtsock.h | 25 +++++++++++++------------
> > 5 files changed, 77 insertions(+), 61 deletions(-)
> >
> > diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
> > index 518bd28f5ab8..3cc4f4f0c764 100644
> > --- a/include/linux/sunrpc/cache.h
> > +++ b/include/linux/sunrpc/cache.h
> > @@ -56,10 +56,13 @@ struct cache_head {
> > struct kref ref;
> > unsigned long flags;
> > };
> > -#define CACHE_VALID 0 /* Entry contains valid data */
> > -#define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */
> > -#define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/
> > -#define CACHE_CLEANED 3 /* Entry has been cleaned from cache */
> > +/* cache_head.flags */
> > +enum {
> > + CACHE_VALID, /* Entry contains valid data */
> > + CACHE_NEGATIVE, /* Negative entry - there is no match for the key */
> > + CACHE_PENDING, /* An upcall has been sent but no reply received yet*/
> > + CACHE_CLEANED, /* Entry has been cleaned from cache */
> > +};
>
> Weird comment of the day: Please use a double-tab before the comments
> to leave room for larger flag names in the future.
>
>
> > #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */
> >
> > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> > index f3df7f963653..83f31a09c853 100644
> > --- a/include/linux/sunrpc/svc.h
> > +++ b/include/linux/sunrpc/svc.h
> > @@ -31,7 +31,7 @@
> > * node traffic on multi-node NUMA NFS servers.
> > */
> > struct svc_pool {
> > - unsigned int sp_id; /* pool id; also node id on NUMA */
> > + unsigned int sp_id; /* pool id; also node id on NUMA */
> > spinlock_t sp_lock; /* protects all fields */
> > struct list_head sp_sockets; /* pending sockets */
> > unsigned int sp_nrthreads; /* # of threads in pool */
> > @@ -44,12 +44,15 @@ struct svc_pool {
> > struct percpu_counter sp_threads_starved;
> > struct percpu_counter sp_threads_no_work;
> >
> > -#define SP_TASK_PENDING (0) /* still work to do even if no
> > - * xprt is queued. */
> > -#define SP_CONGESTED (1)
> > unsigned long sp_flags;
> > } ____cacheline_aligned_in_smp;
> >
> > +/* bits for sp_flags */
> > +enum {
> > + SP_TASK_PENDING, /* still work to do even if no xprt is queued */
> > + SP_CONGESTED, /* all threads are busy, none idle */
> > +};
> > +
> > /*
> > * RPC service.
> > *
> > @@ -232,16 +235,6 @@ struct svc_rqst {
> > u32 rq_proc; /* procedure number */
> > u32 rq_prot; /* IP protocol */
> > int rq_cachetype; /* catering to nfsd */
> > -#define RQ_SECURE (0) /* secure port */
> > -#define RQ_LOCAL (1) /* local request */
> > -#define RQ_USEDEFERRAL (2) /* use deferral */
> > -#define RQ_DROPME (3) /* drop current reply */
> > -#define RQ_SPLICE_OK (4) /* turned off in gss privacy
> > - * to prevent encrypting page
> > - * cache pages */
> > -#define RQ_VICTIM (5) /* about to be shut down */
> > -#define RQ_BUSY (6) /* request is busy */
> > -#define RQ_DATA (7) /* request has data */
> > unsigned long rq_flags; /* flags field */
> > ktime_t rq_qtime; /* enqueue time */
> >
> > @@ -272,6 +265,19 @@ struct svc_rqst {
> > void ** rq_lease_breaker; /* The v4 client breaking a lease */
> > };
> >
> > +/* bits for rq_flags */
> > +enum {
> > + RQ_SECURE, /* secure port */
> > + RQ_LOCAL, /* local request */
> > + RQ_USEDEFERRAL, /* use deferral */
> > + RQ_DROPME, /* drop current reply */
> > + RQ_SPLICE_OK, /* turned off in gss privacy to prevent encrypting page
> > + * cache pages */
> > + RQ_VICTIM, /* about to be shut down */
> > + RQ_BUSY, /* request is busy */
> > + RQ_DATA, /* request has data */
> > +};
> > +
>
> Also here -- two tab stops instead of one.
>
>
> > #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
> >
> > /*
> > diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
> > index a6b12631db21..af383d0349b3 100644
> > --- a/include/linux/sunrpc/svc_xprt.h
> > +++ b/include/linux/sunrpc/svc_xprt.h
> > @@ -56,26 +56,9 @@ struct svc_xprt {
> > struct list_head xpt_list;
> > struct list_head xpt_ready;
> > unsigned long xpt_flags;
> > -#define XPT_BUSY 0 /* enqueued/receiving */
> > -#define XPT_CONN 1 /* conn pending */
> > -#define XPT_CLOSE 2 /* dead or dying */
> > -#define XPT_DATA 3 /* data pending */
> > -#define XPT_TEMP 4 /* connected transport */
> > -#define XPT_DEAD 6 /* transport closed */
> > -#define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */
> > -#define XPT_DEFERRED 8 /* deferred request pending */
> > -#define XPT_OLD 9 /* used for xprt aging mark+sweep */
> > -#define XPT_LISTENER 10 /* listening endpoint */
> > -#define XPT_CACHE_AUTH 11 /* cache auth info */
> > -#define XPT_LOCAL 12 /* connection from loopback interface */
> > -#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
> > -#define XPT_CONG_CTRL 14 /* has congestion control */
> > -#define XPT_HANDSHAKE 15 /* xprt requests a handshake */
> > -#define XPT_TLS_SESSION 16 /* transport-layer security established */
> > -#define XPT_PEER_AUTH 17 /* peer has been authenticated */
> >
> > struct svc_serv *xpt_server; /* service for transport */
> > - atomic_t xpt_reserved; /* space on outq that is rsvd */
> > + atomic_t xpt_reserved; /* space on outq that is rsvd */
> > atomic_t xpt_nr_rqsts; /* Number of requests */
> > struct mutex xpt_mutex; /* to serialize sending data */
> > spinlock_t xpt_lock; /* protects sk_deferred
> > @@ -96,6 +79,26 @@ struct svc_xprt {
> > struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */
> > struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */
> > };
> > +/* flag bits for xpt_flags */
> > +enum {
> > + XPT_BUSY, /* enqueued/receiving */
> > + XPT_CONN, /* conn pending */
> > + XPT_CLOSE, /* dead or dying */
> > + XPT_DATA, /* data pending */
> > + XPT_TEMP, /* connected transport */
> > + XPT_DEAD, /* transport closed */
> > + XPT_CHNGBUF, /* need to change snd/rcv buf sizes */
> > + XPT_DEFERRED, /* deferred request pending */
> > + XPT_OLD, /* used for xprt aging mark+sweep */
> > + XPT_LISTENER, /* listening endpoint */
> > + XPT_CACHE_AUTH, /* cache auth info */
> > + XPT_LOCAL, /* connection from loopback interface */
> > + XPT_KILL_TEMP, /* call xpo_kill_temp_xprt before closing */
> > + XPT_CONG_CTRL, /* has congestion control */
> > + XPT_HANDSHAKE, /* xprt requests a handshake */
> > + XPT_TLS_SESSION, /* transport-layer security established */
> > + XPT_PEER_AUTH, /* peer has been authenticated */
> > +};
> >
> > static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
> > {
> > diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
> > index 6d9cc9080aca..8d1d0d0721d2 100644
> > --- a/include/linux/sunrpc/svcauth.h
> > +++ b/include/linux/sunrpc/svcauth.h
> > @@ -133,19 +133,22 @@ struct auth_ops {
> > int (*set_client)(struct svc_rqst *rq);
> > };
> >
> > -#define SVC_GARBAGE 1
> > -#define SVC_SYSERR 2
> > -#define SVC_VALID 3
> > -#define SVC_NEGATIVE 4
> > -#define SVC_OK 5
> > -#define SVC_DROP 6
> > -#define SVC_CLOSE 7 /* Like SVC_DROP, but request is definitely
> > - * lost so if there is a tcp connection, it
> > - * should be closed
> > - */
> > -#define SVC_DENIED 8
> > -#define SVC_PENDING 9
> > -#define SVC_COMPLETE 10
> > +/*return values for svc functions that analyse request */
> > +enum {
> > + SVC_GARBAGE,
> > + SVC_SYSERR,
> > + SVC_VALID,
> > + SVC_NEGATIVE,
> > + SVC_OK,
> > + SVC_DROP,
> > + SVC_CLOSE, /* Like SVC_DROP, but request is definitely
> > + * lost so if there is a tcp connection, it
> > + * should be closed
> > + */
> > + SVC_DENIED,
> > + SVC_PENDING,
> > + SVC_COMPLETE,
> > +};
> >
> > struct svc_xprt;
> >
> > diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
> > index 700a1e6c047c..1ed2f446010b 100644
> > --- a/include/linux/sunrpc/xprtsock.h
> > +++ b/include/linux/sunrpc/xprtsock.h
> > @@ -81,17 +81,18 @@ struct sock_xprt {
> > };
> >
> > /*
> > - * TCP RPC flags
> > + * TCP RPC flags in ->sock_state
> > */
> > -#define XPRT_SOCK_CONNECTING 1U
> > -#define XPRT_SOCK_DATA_READY (2)
> > -#define XPRT_SOCK_UPD_TIMEOUT (3)
> > -#define XPRT_SOCK_WAKE_ERROR (4)
> > -#define XPRT_SOCK_WAKE_WRITE (5)
> > -#define XPRT_SOCK_WAKE_PENDING (6)
> > -#define XPRT_SOCK_WAKE_DISCONNECT (7)
> > -#define XPRT_SOCK_CONNECT_SENT (8)
> > -#define XPRT_SOCK_NOSPACE (9)
> > -#define XPRT_SOCK_IGNORE_RECV (10)
> > -
> > +enum {
> > + XPRT_SOCK_CONNECTING,
> > + XPRT_SOCK_DATA_READY,
> > + XPRT_SOCK_UPD_TIMEOUT,
> > + XPRT_SOCK_WAKE_ERROR,
> > + XPRT_SOCK_WAKE_WRITE,
> > + XPRT_SOCK_WAKE_PENDING,
> > + XPRT_SOCK_WAKE_DISCONNECT,
> > + XPRT_SOCK_CONNECT_SENT,
> > + XPRT_SOCK_NOSPACE,
> > + XPRT_SOCK_IGNORE_RECV,
> > +};
> > #endif /* _LINUX_SUNRPC_XPRTSOCK_H */
> >
> >
>
> Let's not change client-side code in this patch. Please split this
> hunk out and send it to Trond and Anna separately.
>
--
Chuck Lever
> On Jul 30, 2023, at 6:14 PM, NeilBrown <[email protected]> wrote:
>
> On Mon, 31 Jul 2023, NeilBrown wrote:
>> On Mon, 31 Jul 2023, Chuck Lever wrote:
>>> On Tue, Jul 18, 2023 at 09:37:58AM -0400, Chuck Lever wrote:
>>>> On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
>>>>> When a sequence of numbers are needed for internal-use only, an enum is
>>>>> typically best. The sequence will inevitably need to be changed one
>>>>> day, and having an enum means the developer doesn't need to think about
>>>>> renumbering after insertion or deletion. The patch will be easier to
>>>>> review.
>>>>
>>>> Last sentence needs to define the antecedant of "The patch".
>>>
>>> I've changed the last sentence in the description to "Such patches
>>> will be easier ..."
>>>
>>> I've applied 1/5 through 5/5, with a few cosmetic changes, to the
>>> SUNRPC threads topic branch. 6/6 needed more work:
>>
>> Ah - ok. I was all set to resubmit with various changes and
>> re-ordering. I guess I waited too long.
It's a topic branch. Send diffs and I can squash them in.
>>> All this will appear in the nfsd repo later today.
>>
>> Not yet....
>
> No, they are there - the top date looked rather old so I thought there
> was nothing new yet. Sorry.
There are some old changes in there already, but I'm still testing
the latest set.
--
Chuck Lever
On Mon, 31 Jul 2023, NeilBrown wrote:
> On Mon, 31 Jul 2023, Chuck Lever wrote:
> > On Tue, Jul 18, 2023 at 09:37:58AM -0400, Chuck Lever wrote:
> > > On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
> > > > When a sequence of numbers are needed for internal-use only, an enum is
> > > > typically best. The sequence will inevitably need to be changed one
> > > > day, and having an enum means the developer doesn't need to think about
> > > > renumbering after insertion or deletion. The patch will be easier to
> > > > review.
> > >
> > > Last sentence needs to define the antecedant of "The patch".
> >
> > I've changed the last sentence in the description to "Such patches
> > will be easier ..."
> >
> > I've applied 1/5 through 5/5, with a few cosmetic changes, to the
> > SUNRPC threads topic branch. 6/6 needed more work:
>
> Ah - ok. I was all set to resubmit with various changes and
> re-ordering. I guess I waited too long.
>
> > All this will appear in the nfsd repo later today.
>
> Not yet....
No, they are there - the top date looked rather old so I thought there
was nothing new yet. Sorry.
NeilBrown
On Mon, 31 Jul 2023, Chuck Lever wrote:
> On Tue, Jul 18, 2023 at 09:37:58AM -0400, Chuck Lever wrote:
> > On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
> > > When a sequence of numbers are needed for internal-use only, an enum is
> > > typically best. The sequence will inevitably need to be changed one
> > > day, and having an enum means the developer doesn't need to think about
> > > renumbering after insertion or deletion. The patch will be easier to
> > > review.
> >
> > Last sentence needs to define the antecedant of "The patch".
>
> I've changed the last sentence in the description to "Such patches
> will be easier ..."
>
> I've applied 1/5 through 5/5, with a few cosmetic changes, to the
> SUNRPC threads topic branch. 6/6 needed more work:
Ah - ok. I was all set to resubmit with various changes and
re-ordering. I guess I waited too long.
> All this will appear in the nfsd repo later today.
Not yet....
Thanks,
NeilBrown
> On Jul 30, 2023, at 6:56 PM, NeilBrown <[email protected]> wrote:
>
> On Mon, 31 Jul 2023, Chuck Lever wrote:
>> On Tue, Jul 18, 2023 at 09:37:58AM -0400, Chuck Lever wrote:
>>> On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
>>>> When a sequence of numbers are needed for internal-use only, an enum is
>>>> typically best. The sequence will inevitably need to be changed one
>>>> day, and having an enum means the developer doesn't need to think about
>>>> renumbering after insertion or deletion. The patch will be easier to
>>>> review.
>>>
>>> Last sentence needs to define the antecedant of "The patch".
>>
>> I've changed the last sentence in the description to "Such patches
>> will be easier ..."
>>
>> I've applied 1/5 through 5/5, with a few cosmetic changes, to the
>> SUNRPC threads topic branch. 6/6 needed more work:
>>
>> I've split this into one patch for each new enum.
>
> I don't see this in topic-sunrpc-thread-scheduling
> Commit 11a5027fd416 ("SUNRPC: change various server-side #defines to enum")
> contains 3 new enums, and the XPT_ and SVC_GARBAGE improvements you
> mention below cannot be found.
Still testing this.
>> The XPT_ flags change needed TRACE_DEFINE_ENUM macros to make
>> show_svc_xprt_flags() work properly. Added.
>>
>> The new enum for SVC_GARBAGE and friends... those aren't bit flags.
>> So I've made that a named enum so that it can be used for type-
>> checking function return values properly.
>>
>> I dropped the hunk that changes XPRT_SOCK_CONNECTING and friends.
>> That should be submitted separately to Anna and Trond.
>>
>> All this will appear in the nfsd repo later today.
--
Chuck Lever
> On Jul 30, 2023, at 11:57 AM, Chuck Lever <[email protected]> wrote:
>
> On Tue, Jul 18, 2023 at 09:37:58AM -0400, Chuck Lever wrote:
>> On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
>>> When a sequence of numbers are needed for internal-use only, an enum is
>>> typically best. The sequence will inevitably need to be changed one
>>> day, and having an enum means the developer doesn't need to think about
>>> renumbering after insertion or deletion. The patch will be easier to
>>> review.
>>
>> Last sentence needs to define the antecedant of "The patch".
>
> I've changed the last sentence in the description to "Such patches
> will be easier ..."
>
> I've applied 1/5 through 5/5, with a few cosmetic changes, to the
> SUNRPC threads topic branch. 6/6 needed more work:
>
> I've split this into one patch for each new enum.
>
> The XPT_ flags change needed TRACE_DEFINE_ENUM macros to make
> show_svc_xprt_flags() work properly. Added.
>
> The new enum for SVC_GARBAGE and friends... those aren't bit flags.
> So I've made that a named enum so that it can be used for type-
> checking function return values properly.
>
> I dropped the hunk that changes XPRT_SOCK_CONNECTING and friends.
> That should be submitted separately to Anna and Trond.
>
> All this will appear in the nfsd repo later today.
I just pushed these changes to topic-sunrpc-thread-scheduling.
>>> Signed-off-by: NeilBrown <[email protected]>
>>> ---
>>> include/linux/sunrpc/cache.h | 11 +++++++----
>>> include/linux/sunrpc/svc.h | 34 ++++++++++++++++++++--------------
>>> include/linux/sunrpc/svc_xprt.h | 39 +++++++++++++++++++++------------------
>>> include/linux/sunrpc/svcauth.h | 29 ++++++++++++++++-------------
>>> include/linux/sunrpc/xprtsock.h | 25 +++++++++++++------------
>>> 5 files changed, 77 insertions(+), 61 deletions(-)
>>>
>>> diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
>>> index 518bd28f5ab8..3cc4f4f0c764 100644
>>> --- a/include/linux/sunrpc/cache.h
>>> +++ b/include/linux/sunrpc/cache.h
>>> @@ -56,10 +56,13 @@ struct cache_head {
>>> struct kref ref;
>>> unsigned long flags;
>>> };
>>> -#define CACHE_VALID 0 /* Entry contains valid data */
>>> -#define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */
>>> -#define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/
>>> -#define CACHE_CLEANED 3 /* Entry has been cleaned from cache */
>>> +/* cache_head.flags */
>>> +enum {
>>> + CACHE_VALID, /* Entry contains valid data */
>>> + CACHE_NEGATIVE, /* Negative entry - there is no match for the key */
>>> + CACHE_PENDING, /* An upcall has been sent but no reply received yet*/
>>> + CACHE_CLEANED, /* Entry has been cleaned from cache */
>>> +};
>>
>> Weird comment of the day: Please use a double-tab before the comments
>> to leave room for larger flag names in the future.
>>
>>
>>> #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */
>>>
>>> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
>>> index f3df7f963653..83f31a09c853 100644
>>> --- a/include/linux/sunrpc/svc.h
>>> +++ b/include/linux/sunrpc/svc.h
>>> @@ -31,7 +31,7 @@
>>> * node traffic on multi-node NUMA NFS servers.
>>> */
>>> struct svc_pool {
>>> - unsigned int sp_id; /* pool id; also node id on NUMA */
>>> + unsigned int sp_id; /* pool id; also node id on NUMA */
>>> spinlock_t sp_lock; /* protects all fields */
>>> struct list_head sp_sockets; /* pending sockets */
>>> unsigned int sp_nrthreads; /* # of threads in pool */
>>> @@ -44,12 +44,15 @@ struct svc_pool {
>>> struct percpu_counter sp_threads_starved;
>>> struct percpu_counter sp_threads_no_work;
>>>
>>> -#define SP_TASK_PENDING (0) /* still work to do even if no
>>> - * xprt is queued. */
>>> -#define SP_CONGESTED (1)
>>> unsigned long sp_flags;
>>> } ____cacheline_aligned_in_smp;
>>>
>>> +/* bits for sp_flags */
>>> +enum {
>>> + SP_TASK_PENDING, /* still work to do even if no xprt is queued */
>>> + SP_CONGESTED, /* all threads are busy, none idle */
>>> +};
>>> +
>>> /*
>>> * RPC service.
>>> *
>>> @@ -232,16 +235,6 @@ struct svc_rqst {
>>> u32 rq_proc; /* procedure number */
>>> u32 rq_prot; /* IP protocol */
>>> int rq_cachetype; /* catering to nfsd */
>>> -#define RQ_SECURE (0) /* secure port */
>>> -#define RQ_LOCAL (1) /* local request */
>>> -#define RQ_USEDEFERRAL (2) /* use deferral */
>>> -#define RQ_DROPME (3) /* drop current reply */
>>> -#define RQ_SPLICE_OK (4) /* turned off in gss privacy
>>> - * to prevent encrypting page
>>> - * cache pages */
>>> -#define RQ_VICTIM (5) /* about to be shut down */
>>> -#define RQ_BUSY (6) /* request is busy */
>>> -#define RQ_DATA (7) /* request has data */
>>> unsigned long rq_flags; /* flags field */
>>> ktime_t rq_qtime; /* enqueue time */
>>>
>>> @@ -272,6 +265,19 @@ struct svc_rqst {
>>> void ** rq_lease_breaker; /* The v4 client breaking a lease */
>>> };
>>>
>>> +/* bits for rq_flags */
>>> +enum {
>>> + RQ_SECURE, /* secure port */
>>> + RQ_LOCAL, /* local request */
>>> + RQ_USEDEFERRAL, /* use deferral */
>>> + RQ_DROPME, /* drop current reply */
>>> + RQ_SPLICE_OK, /* turned off in gss privacy to prevent encrypting page
>>> + * cache pages */
>>> + RQ_VICTIM, /* about to be shut down */
>>> + RQ_BUSY, /* request is busy */
>>> + RQ_DATA, /* request has data */
>>> +};
>>> +
>>
>> Also here -- two tab stops instead of one.
>>
>>
>>> #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
>>>
>>> /*
>>> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
>>> index a6b12631db21..af383d0349b3 100644
>>> --- a/include/linux/sunrpc/svc_xprt.h
>>> +++ b/include/linux/sunrpc/svc_xprt.h
>>> @@ -56,26 +56,9 @@ struct svc_xprt {
>>> struct list_head xpt_list;
>>> struct list_head xpt_ready;
>>> unsigned long xpt_flags;
>>> -#define XPT_BUSY 0 /* enqueued/receiving */
>>> -#define XPT_CONN 1 /* conn pending */
>>> -#define XPT_CLOSE 2 /* dead or dying */
>>> -#define XPT_DATA 3 /* data pending */
>>> -#define XPT_TEMP 4 /* connected transport */
>>> -#define XPT_DEAD 6 /* transport closed */
>>> -#define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */
>>> -#define XPT_DEFERRED 8 /* deferred request pending */
>>> -#define XPT_OLD 9 /* used for xprt aging mark+sweep */
>>> -#define XPT_LISTENER 10 /* listening endpoint */
>>> -#define XPT_CACHE_AUTH 11 /* cache auth info */
>>> -#define XPT_LOCAL 12 /* connection from loopback interface */
>>> -#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
>>> -#define XPT_CONG_CTRL 14 /* has congestion control */
>>> -#define XPT_HANDSHAKE 15 /* xprt requests a handshake */
>>> -#define XPT_TLS_SESSION 16 /* transport-layer security established */
>>> -#define XPT_PEER_AUTH 17 /* peer has been authenticated */
>>>
>>> struct svc_serv *xpt_server; /* service for transport */
>>> - atomic_t xpt_reserved; /* space on outq that is rsvd */
>>> + atomic_t xpt_reserved; /* space on outq that is rsvd */
>>> atomic_t xpt_nr_rqsts; /* Number of requests */
>>> struct mutex xpt_mutex; /* to serialize sending data */
>>> spinlock_t xpt_lock; /* protects sk_deferred
>>> @@ -96,6 +79,26 @@ struct svc_xprt {
>>> struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */
>>> struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */
>>> };
>>> +/* flag bits for xpt_flags */
>>> +enum {
>>> + XPT_BUSY, /* enqueued/receiving */
>>> + XPT_CONN, /* conn pending */
>>> + XPT_CLOSE, /* dead or dying */
>>> + XPT_DATA, /* data pending */
>>> + XPT_TEMP, /* connected transport */
>>> + XPT_DEAD, /* transport closed */
>>> + XPT_CHNGBUF, /* need to change snd/rcv buf sizes */
>>> + XPT_DEFERRED, /* deferred request pending */
>>> + XPT_OLD, /* used for xprt aging mark+sweep */
>>> + XPT_LISTENER, /* listening endpoint */
>>> + XPT_CACHE_AUTH, /* cache auth info */
>>> + XPT_LOCAL, /* connection from loopback interface */
>>> + XPT_KILL_TEMP, /* call xpo_kill_temp_xprt before closing */
>>> + XPT_CONG_CTRL, /* has congestion control */
>>> + XPT_HANDSHAKE, /* xprt requests a handshake */
>>> + XPT_TLS_SESSION, /* transport-layer security established */
>>> + XPT_PEER_AUTH, /* peer has been authenticated */
>>> +};
>>>
>>> static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
>>> {
>>> diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
>>> index 6d9cc9080aca..8d1d0d0721d2 100644
>>> --- a/include/linux/sunrpc/svcauth.h
>>> +++ b/include/linux/sunrpc/svcauth.h
>>> @@ -133,19 +133,22 @@ struct auth_ops {
>>> int (*set_client)(struct svc_rqst *rq);
>>> };
>>>
>>> -#define SVC_GARBAGE 1
>>> -#define SVC_SYSERR 2
>>> -#define SVC_VALID 3
>>> -#define SVC_NEGATIVE 4
>>> -#define SVC_OK 5
>>> -#define SVC_DROP 6
>>> -#define SVC_CLOSE 7 /* Like SVC_DROP, but request is definitely
>>> - * lost so if there is a tcp connection, it
>>> - * should be closed
>>> - */
>>> -#define SVC_DENIED 8
>>> -#define SVC_PENDING 9
>>> -#define SVC_COMPLETE 10
>>> +/*return values for svc functions that analyse request */
>>> +enum {
>>> + SVC_GARBAGE,
>>> + SVC_SYSERR,
>>> + SVC_VALID,
>>> + SVC_NEGATIVE,
>>> + SVC_OK,
>>> + SVC_DROP,
>>> + SVC_CLOSE, /* Like SVC_DROP, but request is definitely
>>> + * lost so if there is a tcp connection, it
>>> + * should be closed
>>> + */
>>> + SVC_DENIED,
>>> + SVC_PENDING,
>>> + SVC_COMPLETE,
>>> +};
>>>
>>> struct svc_xprt;
>>>
>>> diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
>>> index 700a1e6c047c..1ed2f446010b 100644
>>> --- a/include/linux/sunrpc/xprtsock.h
>>> +++ b/include/linux/sunrpc/xprtsock.h
>>> @@ -81,17 +81,18 @@ struct sock_xprt {
>>> };
>>>
>>> /*
>>> - * TCP RPC flags
>>> + * TCP RPC flags in ->sock_state
>>> */
>>> -#define XPRT_SOCK_CONNECTING 1U
>>> -#define XPRT_SOCK_DATA_READY (2)
>>> -#define XPRT_SOCK_UPD_TIMEOUT (3)
>>> -#define XPRT_SOCK_WAKE_ERROR (4)
>>> -#define XPRT_SOCK_WAKE_WRITE (5)
>>> -#define XPRT_SOCK_WAKE_PENDING (6)
>>> -#define XPRT_SOCK_WAKE_DISCONNECT (7)
>>> -#define XPRT_SOCK_CONNECT_SENT (8)
>>> -#define XPRT_SOCK_NOSPACE (9)
>>> -#define XPRT_SOCK_IGNORE_RECV (10)
>>> -
>>> +enum {
>>> + XPRT_SOCK_CONNECTING,
>>> + XPRT_SOCK_DATA_READY,
>>> + XPRT_SOCK_UPD_TIMEOUT,
>>> + XPRT_SOCK_WAKE_ERROR,
>>> + XPRT_SOCK_WAKE_WRITE,
>>> + XPRT_SOCK_WAKE_PENDING,
>>> + XPRT_SOCK_WAKE_DISCONNECT,
>>> + XPRT_SOCK_CONNECT_SENT,
>>> + XPRT_SOCK_NOSPACE,
>>> + XPRT_SOCK_IGNORE_RECV,
>>> +};
>>> #endif /* _LINUX_SUNRPC_XPRTSOCK_H */
>>>
>>>
>>
>> Let's not change client-side code in this patch. Please split this
>> hunk out and send it to Trond and Anna separately.
>>
>
> --
> Chuck Lever
--
Chuck Lever
On Mon, 31 Jul 2023, Chuck Lever wrote:
> On Tue, Jul 18, 2023 at 09:37:58AM -0400, Chuck Lever wrote:
> > On Tue, Jul 18, 2023 at 04:38:08PM +1000, NeilBrown wrote:
> > > When a sequence of numbers are needed for internal-use only, an enum is
> > > typically best. The sequence will inevitably need to be changed one
> > > day, and having an enum means the developer doesn't need to think about
> > > renumbering after insertion or deletion. The patch will be easier to
> > > review.
> >
> > Last sentence needs to define the antecedant of "The patch".
>
> I've changed the last sentence in the description to "Such patches
> will be easier ..."
>
> I've applied 1/5 through 5/5, with a few cosmetic changes, to the
> SUNRPC threads topic branch. 6/6 needed more work:
>
> I've split this into one patch for each new enum.
I don't see this in topic-sunrpc-thread-scheduling
Commit 11a5027fd416 ("SUNRPC: change various server-side #defines to enum")
contains 3 new enums, and the XPT_ and SVC_GARBAGE improvements you
mention below cannot be found.
Thanks,
NeilBrown
>
> The XPT_ flags change needed TRACE_DEFINE_ENUM macros to make
> show_svc_xprt_flags() work properly. Added.
>
> The new enum for SVC_GARBAGE and friends... those aren't bit flags.
> So I've made that a named enum so that it can be used for type-
> checking function return values properly.
>
> I dropped the hunk that changes XPRT_SOCK_CONNECTING and friends.
> That should be submitted separately to Anna and Trond.
>
> All this will appear in the nfsd repo later today.