Introduce rpc_status entry in nfsd debug filesystem in order to dump
pending RPC requests debugging information.
Changes since v5:
- add missing delimiters for nfs4 compound ops
- add a header line for rpc_status handler
- do not dump rq_prog
- do not dump rq_flags in hex
Changes since v4:
- rely on acquire/release APIs and get rid of atomic operation
- fix kdoc for nfsd_rpc_status_open
- get rid of ',' as field delimiter in nfsd_rpc_status hanlder
- move nfsd_rpc_status before nfsd_v4 enum entries
- fix compilantion error if nfsdv4 is not enabled
Changes since v3:
- introduce rq_status_counter in order to detect if the RPC request is
pending and RPC info are stable
- rely on __svc_print_addr to dump IP info
Changes since v2:
- minor changes in nfsd_rpc_status_show output
Changes since v1:
- rework nfsd_rpc_status_show output
Changes since RFCv1:
- riduce time holding nfsd_mutex bumping svc_serv refcoung in
nfsd_rpc_status_open()
- dump rqstp->rq_stime
- add missing kdoc for nfsd_rpc_status_open()
Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=3D366
Lorenzo Bianconi (3):
SUNRPC: add verbose parameter to __svc_print_addr()
NFSD: introduce nfsd_stats_open utility routine
NFSD: add rpc_status entry in nfsd debug filesystem
fs/nfsd/nfs4proc.c | 4 +-
fs/nfsd/nfsctl.c | 11 ++-
fs/nfsd/nfsd.h | 9 +-
fs/nfsd/nfssvc.c | 163 ++++++++++++++++++++++++++++++--
include/linux/sunrpc/svc.h | 1 +
include/linux/sunrpc/svc_xprt.h | 12 +--
net/sunrpc/svc.c | 2 +-
net/sunrpc/svc_xprt.c | 2 +-
8 files changed, 185 insertions(+), 19 deletions(-)
--
2.41.0
Introduce rpc_status entry in nfsd debug filesystem in order to dump
pending RPC requests debugging information.
Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=366
Reviewed-by: NeilBrown <[email protected]>
Reviewed-by: Jeff Layton <[email protected]>
Signed-off-by: Lorenzo Bianconi <[email protected]>
---
fs/nfsd/nfs4proc.c | 4 +-
fs/nfsd/nfsctl.c | 9 +++
fs/nfsd/nfsd.h | 7 ++
fs/nfsd/nfssvc.c | 140 +++++++++++++++++++++++++++++++++++++
include/linux/sunrpc/svc.h | 1 +
net/sunrpc/svc.c | 2 +-
6 files changed, 159 insertions(+), 4 deletions(-)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index aa4f21f92deb..ff5a1dddc0ed 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2496,8 +2496,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
static const struct nfsd4_operation nfsd4_ops[];
-static const char *nfsd4_op_name(unsigned opnum);
-
/*
* Enforce NFSv4.1 COMPOUND ordering rules:
*
@@ -3627,7 +3625,7 @@ void warn_on_nonidempotent_op(struct nfsd4_op *op)
}
}
-static const char *nfsd4_op_name(unsigned opnum)
+const char *nfsd4_op_name(unsigned int opnum)
{
if (opnum < ARRAY_SIZE(nfsd4_ops))
return nfsd4_ops[opnum].op_name;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index dad88bff3b9e..83eb5c6d894e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -47,6 +47,7 @@ enum {
NFSD_MaxBlkSize,
NFSD_MaxConnections,
NFSD_Filecache,
+ NFSD_Rpc_Status,
/*
* The below MUST come last. Otherwise we leave a hole in nfsd_files[]
* with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
@@ -195,6 +196,13 @@ static inline struct net *netns(struct file *file)
return file_inode(file)->i_sb->s_fs_info;
}
+static const struct file_operations nfsd_rpc_status_operations = {
+ .open = nfsd_rpc_status_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = nfsd_stats_release,
+};
+
/*
* write_unlock_ip - Release all locks used by a client
*
@@ -1394,6 +1402,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
[NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
[NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO},
+ [NFSD_Rpc_Status] = {"rpc_status", &nfsd_rpc_status_operations, S_IRUGO},
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 55b9d85ed71b..3e8a47b93fd4 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -94,6 +94,7 @@ int nfsd_get_nrthreads(int n, int *, struct net *);
int nfsd_set_nrthreads(int n, int *, struct net *);
int nfsd_pool_stats_open(struct inode *, struct file *);
int nfsd_stats_release(struct inode *, struct file *);
+int nfsd_rpc_status_open(struct inode *inode, struct file *file);
void nfsd_shutdown_threads(struct net *net);
static inline void nfsd_put(struct net *net)
@@ -511,12 +512,18 @@ extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
extern void nfsd4_init_leases_net(struct nfsd_net *nn);
+const char *nfsd4_op_name(unsigned int opnum);
#else /* CONFIG_NFSD_V4 */
static inline int nfsd4_is_junction(struct dentry *dentry)
{
return 0;
}
+static inline const char *nfsd4_op_name(unsigned int opnum)
+{
+ return "unknown_operation";
+}
+
static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { };
#define register_cld_notifier() 0
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 460219030ce1..237be14d3a11 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -998,6 +998,15 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
goto out_decode_err;
+ /*
+ * Release rq_status_counter setting it to an odd value after the rpc
+ * request has been properly parsed. rq_status_counter is used to
+ * notify the consumers if the rqstp fields are stable
+ * (rq_status_counter is odd) or not meaningful (rq_status_counter
+ * is even).
+ */
+ smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1);
+
rp = NULL;
switch (nfsd_cache_lookup(rqstp, &rp)) {
case RC_DOIT:
@@ -1015,6 +1024,12 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
goto out_encode_err;
+ /*
+ * Release rq_status_counter setting it to an even value after the rpc
+ * request has been properly processed.
+ */
+ smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
+
nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
out_cached_reply:
return 1;
@@ -1101,3 +1116,128 @@ int nfsd_stats_release(struct inode *inode, struct file *file)
mutex_unlock(&nfsd_mutex);
return ret;
}
+
+static int nfsd_rpc_status_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = file_inode(m->file);
+ struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id);
+ int i;
+
+ seq_puts(m, "# XID FLAGS VERS PROC TIMESTAMP SADDR SPORT DADDR DPORT COMPOUND_OPS\n");
+
+ rcu_read_lock();
+
+ for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) {
+ struct svc_rqst *rqstp;
+
+ list_for_each_entry_rcu(rqstp,
+ &nn->nfsd_serv->sv_pools[i].sp_all_threads,
+ rq_all) {
+ struct {
+ struct sockaddr daddr;
+ struct sockaddr saddr;
+ unsigned long rq_flags;
+ const char *pc_name;
+ ktime_t rq_stime;
+ __be32 rq_xid;
+ u32 rq_vers;
+ /* NFSv4 compund */
+ u32 opnum[NFSD_MAX_OPS_PER_COMPOUND];
+ } rqstp_info;
+ unsigned int status_counter;
+ char buf[RPC_MAX_ADDRBUFLEN];
+ int opcnt = 0;
+
+ /*
+ * Acquire rq_status_counter before parsing the rqst
+ * fields. rq_status_counter is set to an odd value in
+ * order to notify the consumers the rqstp fields are
+ * meaningful.
+ */
+ status_counter = smp_load_acquire(&rqstp->rq_status_counter);
+ if (!(status_counter & 1))
+ continue;
+
+ rqstp_info.rq_xid = rqstp->rq_xid;
+ rqstp_info.rq_flags = rqstp->rq_flags;
+ rqstp_info.rq_vers = rqstp->rq_vers;
+ rqstp_info.pc_name = svc_proc_name(rqstp);
+ rqstp_info.rq_stime = rqstp->rq_stime;
+ memcpy(&rqstp_info.daddr, svc_daddr(rqstp),
+ sizeof(struct sockaddr));
+ memcpy(&rqstp_info.saddr, svc_addr(rqstp),
+ sizeof(struct sockaddr));
+
+#ifdef CONFIG_NFSD_V4
+ if (rqstp->rq_vers == NFS4_VERSION &&
+ rqstp->rq_proc == NFSPROC4_COMPOUND) {
+ /* NFSv4 compund */
+ struct nfsd4_compoundargs *args = rqstp->rq_argp;
+ int j;
+
+ opcnt = args->opcnt;
+ for (j = 0; j < opcnt; j++) {
+ struct nfsd4_op *op = &args->ops[j];
+
+ rqstp_info.opnum[j] = op->opnum;
+ }
+ }
+#endif /* CONFIG_NFSD_V4 */
+
+ /*
+ * Acquire rq_status_counter before reporting the rqst
+ * fields to the user.
+ */
+ if (smp_load_acquire(&rqstp->rq_status_counter) != status_counter)
+ continue;
+
+ seq_printf(m,
+ "%04u %04ld NFSv%d %s %016lld",
+ be32_to_cpu(rqstp_info.rq_xid),
+ rqstp_info.rq_flags,
+ rqstp_info.rq_vers,
+ rqstp_info.pc_name,
+ ktime_to_us(rqstp_info.rq_stime));
+ seq_printf(m, " %s",
+ __svc_print_addr(&rqstp_info.saddr, buf,
+ sizeof(buf), false));
+ seq_printf(m, " %s",
+ __svc_print_addr(&rqstp_info.daddr, buf,
+ sizeof(buf), false));
+ if (opcnt) {
+ int j;
+
+ seq_puts(m, " ");
+ for (j = 0; j < opcnt; j++)
+ seq_printf(m, "%s%s",
+ nfsd4_op_name(rqstp_info.opnum[j]),
+ j == opcnt - 1 ? "" : ":");
+ } else {
+ seq_puts(m, " -");
+ }
+ seq_puts(m, "\n");
+ }
+ }
+
+ rcu_read_unlock();
+
+ return 0;
+}
+
+/**
+ * nfsd_rpc_status_open - open routine for nfsd_rpc_status handler
+ * @inode: entry inode pointer.
+ * @file: entry file pointer.
+ *
+ * nfsd_rpc_status_open is the open routine for nfsd_rpc_status procfs handler.
+ * nfsd_rpc_status dumps pending RPC requests info queued into nfs server.
+ */
+int nfsd_rpc_status_open(struct inode *inode, struct file *file)
+{
+ int ret = nfsd_stats_open(inode);
+
+ if (ret)
+ return ret;
+
+ return single_open(file, nfsd_rpc_status_show, inode->i_private);
+}
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 7838b37bcfa8..b49c0470b4fe 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -251,6 +251,7 @@ struct svc_rqst {
* net namespace
*/
void ** rq_lease_breaker; /* The v4 client breaking a lease */
+ unsigned int rq_status_counter; /* RPC processing counter */
};
/* bits for rq_flags */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index af692bff44ab..83bee19df104 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1656,7 +1656,7 @@ const char *svc_proc_name(const struct svc_rqst *rqstp)
return rqstp->rq_procinfo->pc_name;
return "unknown";
}
-
+EXPORT_SYMBOL_GPL(svc_proc_name);
/**
* svc_encode_result_payload - mark a range of bytes as a result payload
--
2.41.0
Introduce verbose parameter to utility routine in order to reduce output
verbosity. This is a preliminary patch to add rpc_status entry in nfsd
debug filesystem in order to dump pending RPC requests debugging
information.
Reviewed-by: NeilBrown <[email protected]>
Reviewed-by: Jeff Layton <[email protected]>
Signed-off-by: Lorenzo Bianconi <[email protected]>
---
include/linux/sunrpc/svc_xprt.h | 12 ++++++------
net/sunrpc/svc_xprt.c | 2 +-
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index fa55d12dc765..2615cc535199 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -213,21 +213,21 @@ static inline unsigned short svc_xprt_remote_port(const struct svc_xprt *xprt)
}
static inline char *__svc_print_addr(const struct sockaddr *addr,
- char *buf, const size_t len)
+ char *buf, const size_t len,
+ bool verbose)
{
const struct sockaddr_in *sin = (const struct sockaddr_in *)addr;
const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)addr;
switch (addr->sa_family) {
case AF_INET:
- snprintf(buf, len, "%pI4, port=%u", &sin->sin_addr,
- ntohs(sin->sin_port));
+ snprintf(buf, len, "%pI4%s%hu", &sin->sin_addr,
+ verbose ? ", port=" : " ", ntohs(sin->sin_port));
break;
case AF_INET6:
- snprintf(buf, len, "%pI6, port=%u",
- &sin6->sin6_addr,
- ntohs(sin6->sin6_port));
+ snprintf(buf, len, "%pI6%s%hu", &sin6->sin6_addr,
+ verbose ? ", port=" : " ", ntohs(sin6->sin6_port));
break;
default:
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d3280ae70e36..940be13d02b0 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -386,7 +386,7 @@ EXPORT_SYMBOL_GPL(svc_xprt_copy_addrs);
*/
char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
{
- return __svc_print_addr(svc_addr(rqstp), buf, len);
+ return __svc_print_addr(svc_addr(rqstp), buf, len, true);
}
EXPORT_SYMBOL_GPL(svc_print_addr);
--
2.41.0
On Thu, 2023-08-10 at 10:39 +0200, Lorenzo Bianconi wrote:
> Introduce rpc_status entry in nfsd debug filesystem in order to dump
> pending RPC requests debugging information.
>
> Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=366
> Reviewed-by: NeilBrown <[email protected]>
> Reviewed-by: Jeff Layton <[email protected]>
> Signed-off-by: Lorenzo Bianconi <[email protected]>
> ---
> fs/nfsd/nfs4proc.c | 4 +-
> fs/nfsd/nfsctl.c | 9 +++
> fs/nfsd/nfsd.h | 7 ++
> fs/nfsd/nfssvc.c | 140 +++++++++++++++++++++++++++++++++++++
> include/linux/sunrpc/svc.h | 1 +
> net/sunrpc/svc.c | 2 +-
> 6 files changed, 159 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index aa4f21f92deb..ff5a1dddc0ed 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -2496,8 +2496,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
>
> static const struct nfsd4_operation nfsd4_ops[];
>
> -static const char *nfsd4_op_name(unsigned opnum);
> -
> /*
> * Enforce NFSv4.1 COMPOUND ordering rules:
> *
> @@ -3627,7 +3625,7 @@ void warn_on_nonidempotent_op(struct nfsd4_op *op)
> }
> }
>
> -static const char *nfsd4_op_name(unsigned opnum)
> +const char *nfsd4_op_name(unsigned int opnum)
> {
> if (opnum < ARRAY_SIZE(nfsd4_ops))
> return nfsd4_ops[opnum].op_name;
> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> index dad88bff3b9e..83eb5c6d894e 100644
> --- a/fs/nfsd/nfsctl.c
> +++ b/fs/nfsd/nfsctl.c
> @@ -47,6 +47,7 @@ enum {
> NFSD_MaxBlkSize,
> NFSD_MaxConnections,
> NFSD_Filecache,
> + NFSD_Rpc_Status,
> /*
> * The below MUST come last. Otherwise we leave a hole in nfsd_files[]
> * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
> @@ -195,6 +196,13 @@ static inline struct net *netns(struct file *file)
> return file_inode(file)->i_sb->s_fs_info;
> }
>
> +static const struct file_operations nfsd_rpc_status_operations = {
> + .open = nfsd_rpc_status_open,
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = nfsd_stats_release,
> +};
> +
> /*
> * write_unlock_ip - Release all locks used by a client
> *
> @@ -1394,6 +1402,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
> [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
> [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
> [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO},
> + [NFSD_Rpc_Status] = {"rpc_status", &nfsd_rpc_status_operations, S_IRUGO},
> #ifdef CONFIG_NFSD_V4
> [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
> [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 55b9d85ed71b..3e8a47b93fd4 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -94,6 +94,7 @@ int nfsd_get_nrthreads(int n, int *, struct net *);
> int nfsd_set_nrthreads(int n, int *, struct net *);
> int nfsd_pool_stats_open(struct inode *, struct file *);
> int nfsd_stats_release(struct inode *, struct file *);
> +int nfsd_rpc_status_open(struct inode *inode, struct file *file);
> void nfsd_shutdown_threads(struct net *net);
>
> static inline void nfsd_put(struct net *net)
> @@ -511,12 +512,18 @@ extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
>
> extern void nfsd4_init_leases_net(struct nfsd_net *nn);
>
> +const char *nfsd4_op_name(unsigned int opnum);
> #else /* CONFIG_NFSD_V4 */
> static inline int nfsd4_is_junction(struct dentry *dentry)
> {
> return 0;
> }
>
> +static inline const char *nfsd4_op_name(unsigned int opnum)
> +{
> + return "unknown_operation";
> +}
> +
> static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { };
>
> #define register_cld_notifier() 0
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index 460219030ce1..237be14d3a11 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -998,6 +998,15 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
> if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
> goto out_decode_err;
>
> + /*
> + * Release rq_status_counter setting it to an odd value after the rpc
> + * request has been properly parsed. rq_status_counter is used to
> + * notify the consumers if the rqstp fields are stable
> + * (rq_status_counter is odd) or not meaningful (rq_status_counter
> + * is even).
> + */
> + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1);
> +
> rp = NULL;
> switch (nfsd_cache_lookup(rqstp, &rp)) {
> case RC_DOIT:
> @@ -1015,6 +1024,12 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
> if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
> goto out_encode_err;
>
> + /*
> + * Release rq_status_counter setting it to an even value after the rpc
> + * request has been properly processed.
> + */
> + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
> +
> nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
> out_cached_reply:
> return 1;
> @@ -1101,3 +1116,128 @@ int nfsd_stats_release(struct inode *inode, struct file *file)
> mutex_unlock(&nfsd_mutex);
> return ret;
> }
> +
> +static int nfsd_rpc_status_show(struct seq_file *m, void *v)
> +{
> + struct inode *inode = file_inode(m->file);
> + struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id);
> + int i;
> +
> + seq_puts(m, "# XID FLAGS VERS PROC TIMESTAMP SADDR SPORT DADDR DPORT COMPOUND_OPS\n");
> +
> + rcu_read_lock();
> +
> + for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) {
> + struct svc_rqst *rqstp;
> +
> + list_for_each_entry_rcu(rqstp,
> + &nn->nfsd_serv->sv_pools[i].sp_all_threads,
> + rq_all) {
> + struct {
> + struct sockaddr daddr;
> + struct sockaddr saddr;
> + unsigned long rq_flags;
> + const char *pc_name;
> + ktime_t rq_stime;
> + __be32 rq_xid;
> + u32 rq_vers;
> + /* NFSv4 compund */
> + u32 opnum[NFSD_MAX_OPS_PER_COMPOUND];
> + } rqstp_info;
> + unsigned int status_counter;
> + char buf[RPC_MAX_ADDRBUFLEN];
> + int opcnt = 0;
> +
> + /*
> + * Acquire rq_status_counter before parsing the rqst
> + * fields. rq_status_counter is set to an odd value in
> + * order to notify the consumers the rqstp fields are
> + * meaningful.
> + */
> + status_counter = smp_load_acquire(&rqstp->rq_status_counter);
> + if (!(status_counter & 1))
> + continue;
> +
> + rqstp_info.rq_xid = rqstp->rq_xid;
> + rqstp_info.rq_flags = rqstp->rq_flags;
> + rqstp_info.rq_vers = rqstp->rq_vers;
> + rqstp_info.pc_name = svc_proc_name(rqstp);
> + rqstp_info.rq_stime = rqstp->rq_stime;
> + memcpy(&rqstp_info.daddr, svc_daddr(rqstp),
> + sizeof(struct sockaddr));
> + memcpy(&rqstp_info.saddr, svc_addr(rqstp),
> + sizeof(struct sockaddr));
> +
> +#ifdef CONFIG_NFSD_V4
> + if (rqstp->rq_vers == NFS4_VERSION &&
> + rqstp->rq_proc == NFSPROC4_COMPOUND) {
> + /* NFSv4 compund */
> + struct nfsd4_compoundargs *args = rqstp->rq_argp;
> + int j;
> +
> + opcnt = args->opcnt;
> + for (j = 0; j < opcnt; j++) {
> + struct nfsd4_op *op = &args->ops[j];
> +
> + rqstp_info.opnum[j] = op->opnum;
> + }
> + }
> +#endif /* CONFIG_NFSD_V4 */
> +
> + /*
> + * Acquire rq_status_counter before reporting the rqst
> + * fields to the user.
> + */
> + if (smp_load_acquire(&rqstp->rq_status_counter) != status_counter)
> + continue;
> +
> + seq_printf(m,
> + "%04u %04ld NFSv%d %s %016lld",
> + be32_to_cpu(rqstp_info.rq_xid),
> + rqstp_info.rq_flags,
> + rqstp_info.rq_vers,
> + rqstp_info.pc_name,
> + ktime_to_us(rqstp_info.rq_stime));
> + seq_printf(m, " %s",
> + __svc_print_addr(&rqstp_info.saddr, buf,
> + sizeof(buf), false));
> + seq_printf(m, " %s",
> + __svc_print_addr(&rqstp_info.daddr, buf,
> + sizeof(buf), false));
> + if (opcnt) {
> + int j;
> +
> + seq_puts(m, " ");
> + for (j = 0; j < opcnt; j++)
> + seq_printf(m, "%s%s",
> + nfsd4_op_name(rqstp_info.opnum[j]),
> + j == opcnt - 1 ? "" : ":");
> + } else {
> + seq_puts(m, " -");
> + }
> + seq_puts(m, "\n");
> + }
> + }
> +
> + rcu_read_unlock();
> +
> + return 0;
> +}
> +
> +/**
> + * nfsd_rpc_status_open - open routine for nfsd_rpc_status handler
> + * @inode: entry inode pointer.
> + * @file: entry file pointer.
> + *
> + * nfsd_rpc_status_open is the open routine for nfsd_rpc_status procfs handler.
> + * nfsd_rpc_status dumps pending RPC requests info queued into nfs server.
> + */
> +int nfsd_rpc_status_open(struct inode *inode, struct file *file)
> +{
> + int ret = nfsd_stats_open(inode);
> +
> + if (ret)
> + return ret;
> +
> + return single_open(file, nfsd_rpc_status_show, inode->i_private);
> +}
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 7838b37bcfa8..b49c0470b4fe 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -251,6 +251,7 @@ struct svc_rqst {
> * net namespace
> */
> void ** rq_lease_breaker; /* The v4 client breaking a lease */
> + unsigned int rq_status_counter; /* RPC processing counter */
> };
>
> /* bits for rq_flags */
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index af692bff44ab..83bee19df104 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -1656,7 +1656,7 @@ const char *svc_proc_name(const struct svc_rqst *rqstp)
> return rqstp->rq_procinfo->pc_name;
> return "unknown";
> }
> -
> +EXPORT_SYMBOL_GPL(svc_proc_name);
>
> /**
> * svc_encode_result_payload - mark a range of bytes as a result payload
LGTM (at least until we decide the right sort of interface for this
thing).
Reviewed-by: Jeff Layton <[email protected]>
On Thu, Aug 10, 2023 at 10:39:21AM +0200, Lorenzo Bianconi wrote:
> Introduce rpc_status entry in nfsd debug filesystem in order to dump
> pending RPC requests debugging information.
>
> Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=366
> Reviewed-by: NeilBrown <[email protected]>
> Reviewed-by: Jeff Layton <[email protected]>
> Signed-off-by: Lorenzo Bianconi <[email protected]>
> ---
> fs/nfsd/nfs4proc.c | 4 +-
> fs/nfsd/nfsctl.c | 9 +++
> fs/nfsd/nfsd.h | 7 ++
> fs/nfsd/nfssvc.c | 140 +++++++++++++++++++++++++++++++++++++
> include/linux/sunrpc/svc.h | 1 +
> net/sunrpc/svc.c | 2 +-
> 6 files changed, 159 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index aa4f21f92deb..ff5a1dddc0ed 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -2496,8 +2496,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
>
> static const struct nfsd4_operation nfsd4_ops[];
>
> -static const char *nfsd4_op_name(unsigned opnum);
> -
> /*
> * Enforce NFSv4.1 COMPOUND ordering rules:
> *
> @@ -3627,7 +3625,7 @@ void warn_on_nonidempotent_op(struct nfsd4_op *op)
> }
> }
>
> -static const char *nfsd4_op_name(unsigned opnum)
> +const char *nfsd4_op_name(unsigned int opnum)
> {
> if (opnum < ARRAY_SIZE(nfsd4_ops))
> return nfsd4_ops[opnum].op_name;
> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> index dad88bff3b9e..83eb5c6d894e 100644
> --- a/fs/nfsd/nfsctl.c
> +++ b/fs/nfsd/nfsctl.c
> @@ -47,6 +47,7 @@ enum {
> NFSD_MaxBlkSize,
> NFSD_MaxConnections,
> NFSD_Filecache,
> + NFSD_Rpc_Status,
> /*
> * The below MUST come last. Otherwise we leave a hole in nfsd_files[]
> * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
> @@ -195,6 +196,13 @@ static inline struct net *netns(struct file *file)
> return file_inode(file)->i_sb->s_fs_info;
> }
>
> +static const struct file_operations nfsd_rpc_status_operations = {
> + .open = nfsd_rpc_status_open,
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = nfsd_stats_release,
> +};
> +
> /*
> * write_unlock_ip - Release all locks used by a client
> *
> @@ -1394,6 +1402,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
> [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
> [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
> [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO},
> + [NFSD_Rpc_Status] = {"rpc_status", &nfsd_rpc_status_operations, S_IRUGO},
> #ifdef CONFIG_NFSD_V4
> [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
> [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 55b9d85ed71b..3e8a47b93fd4 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -94,6 +94,7 @@ int nfsd_get_nrthreads(int n, int *, struct net *);
> int nfsd_set_nrthreads(int n, int *, struct net *);
> int nfsd_pool_stats_open(struct inode *, struct file *);
> int nfsd_stats_release(struct inode *, struct file *);
> +int nfsd_rpc_status_open(struct inode *inode, struct file *file);
> void nfsd_shutdown_threads(struct net *net);
>
> static inline void nfsd_put(struct net *net)
> @@ -511,12 +512,18 @@ extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
>
> extern void nfsd4_init_leases_net(struct nfsd_net *nn);
>
> +const char *nfsd4_op_name(unsigned int opnum);
> #else /* CONFIG_NFSD_V4 */
> static inline int nfsd4_is_junction(struct dentry *dentry)
> {
> return 0;
> }
>
> +static inline const char *nfsd4_op_name(unsigned int opnum)
> +{
> + return "unknown_operation";
> +}
> +
> static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { };
>
> #define register_cld_notifier() 0
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index 460219030ce1..237be14d3a11 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -998,6 +998,15 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
> if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
> goto out_decode_err;
>
> + /*
> + * Release rq_status_counter setting it to an odd value after the rpc
> + * request has been properly parsed. rq_status_counter is used to
> + * notify the consumers if the rqstp fields are stable
> + * (rq_status_counter is odd) or not meaningful (rq_status_counter
> + * is even).
> + */
> + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1);
> +
> rp = NULL;
> switch (nfsd_cache_lookup(rqstp, &rp)) {
> case RC_DOIT:
> @@ -1015,6 +1024,12 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
> if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
> goto out_encode_err;
>
> + /*
> + * Release rq_status_counter setting it to an even value after the rpc
> + * request has been properly processed.
> + */
> + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
> +
> nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
> out_cached_reply:
> return 1;
> @@ -1101,3 +1116,128 @@ int nfsd_stats_release(struct inode *inode, struct file *file)
> mutex_unlock(&nfsd_mutex);
> return ret;
> }
> +
> +static int nfsd_rpc_status_show(struct seq_file *m, void *v)
> +{
> + struct inode *inode = file_inode(m->file);
> + struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id);
> + int i;
> +
> + seq_puts(m, "# XID FLAGS VERS PROC TIMESTAMP SADDR SPORT DADDR DPORT COMPOUND_OPS\n");
> +
> + rcu_read_lock();
> +
> + for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) {
> + struct svc_rqst *rqstp;
> +
> + list_for_each_entry_rcu(rqstp,
> + &nn->nfsd_serv->sv_pools[i].sp_all_threads,
> + rq_all) {
> + struct {
> + struct sockaddr daddr;
> + struct sockaddr saddr;
> + unsigned long rq_flags;
> + const char *pc_name;
> + ktime_t rq_stime;
> + __be32 rq_xid;
> + u32 rq_vers;
> + /* NFSv4 compund */
> + u32 opnum[NFSD_MAX_OPS_PER_COMPOUND];
> + } rqstp_info;
> + unsigned int status_counter;
> + char buf[RPC_MAX_ADDRBUFLEN];
> + int opcnt = 0;
> +
> + /*
> + * Acquire rq_status_counter before parsing the rqst
> + * fields. rq_status_counter is set to an odd value in
> + * order to notify the consumers the rqstp fields are
> + * meaningful.
> + */
> + status_counter = smp_load_acquire(&rqstp->rq_status_counter);
> + if (!(status_counter & 1))
> + continue;
> +
> + rqstp_info.rq_xid = rqstp->rq_xid;
> + rqstp_info.rq_flags = rqstp->rq_flags;
> + rqstp_info.rq_vers = rqstp->rq_vers;
> + rqstp_info.pc_name = svc_proc_name(rqstp);
> + rqstp_info.rq_stime = rqstp->rq_stime;
> + memcpy(&rqstp_info.daddr, svc_daddr(rqstp),
> + sizeof(struct sockaddr));
> + memcpy(&rqstp_info.saddr, svc_addr(rqstp),
> + sizeof(struct sockaddr));
> +
> +#ifdef CONFIG_NFSD_V4
> + if (rqstp->rq_vers == NFS4_VERSION &&
> + rqstp->rq_proc == NFSPROC4_COMPOUND) {
> + /* NFSv4 compund */
> + struct nfsd4_compoundargs *args = rqstp->rq_argp;
> + int j;
> +
> + opcnt = args->opcnt;
> + for (j = 0; j < opcnt; j++) {
> + struct nfsd4_op *op = &args->ops[j];
> +
> + rqstp_info.opnum[j] = op->opnum;
> + }
> + }
> +#endif /* CONFIG_NFSD_V4 */
> +
> + /*
> + * Acquire rq_status_counter before reporting the rqst
> + * fields to the user.
> + */
> + if (smp_load_acquire(&rqstp->rq_status_counter) != status_counter)
> + continue;
> +
> + seq_printf(m,
> + "%04u %04ld NFSv%d %s %016lld",
> + be32_to_cpu(rqstp_info.rq_xid),
It's proper to display XIDs as 8-hexit hexadecimal values, as you
did before. "0x%08x" is the correct format, as that matches the
XID display format used by Wireshark and our tracepoints.
> + rqstp_info.rq_flags,
I didn't mean for you to change the flags format to decimal. I was
trying to point out that the content of this field will need to be
displayed symbolically if we care about an easy user experience.
Let's stick with hex here. A clever user can read the bits directly
from that. All others should have a tool that parses this field and
prints the list of bits in it symbolically.
> + rqstp_info.rq_vers,
> + rqstp_info.pc_name,
> + ktime_to_us(rqstp_info.rq_stime));
> + seq_printf(m, " %s",
> + __svc_print_addr(&rqstp_info.saddr, buf,
> + sizeof(buf), false));
> + seq_printf(m, " %s",
> + __svc_print_addr(&rqstp_info.daddr, buf,
> + sizeof(buf), false));
> + if (opcnt) {
> + int j;
> +
> + seq_puts(m, " ");
> + for (j = 0; j < opcnt; j++)
> + seq_printf(m, "%s%s",
> + nfsd4_op_name(rqstp_info.opnum[j]),
> + j == opcnt - 1 ? "" : ":");
> + } else {
> + seq_puts(m, " -");
> + }
This looks correct to me.
I'm leaning towards moving this to a netlink API that can be
extended over time to handle other stats and also act as an NFSD
control plane, similar to other network subsystems.
Any comments, complaints or rotten fruit from anyone?
> + seq_puts(m, "\n");
> + }
> + }
> +
> + rcu_read_unlock();
> +
> + return 0;
> +}
> +
> +/**
> + * nfsd_rpc_status_open - open routine for nfsd_rpc_status handler
> + * @inode: entry inode pointer.
> + * @file: entry file pointer.
> + *
> + * nfsd_rpc_status_open is the open routine for nfsd_rpc_status procfs handler.
> + * nfsd_rpc_status dumps pending RPC requests info queued into nfs server.
> + */
> +int nfsd_rpc_status_open(struct inode *inode, struct file *file)
> +{
> + int ret = nfsd_stats_open(inode);
> +
> + if (ret)
> + return ret;
> +
> + return single_open(file, nfsd_rpc_status_show, inode->i_private);
> +}
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index 7838b37bcfa8..b49c0470b4fe 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -251,6 +251,7 @@ struct svc_rqst {
> * net namespace
> */
> void ** rq_lease_breaker; /* The v4 client breaking a lease */
> + unsigned int rq_status_counter; /* RPC processing counter */
> };
>
> /* bits for rq_flags */
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index af692bff44ab..83bee19df104 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -1656,7 +1656,7 @@ const char *svc_proc_name(const struct svc_rqst *rqstp)
> return rqstp->rq_procinfo->pc_name;
> return "unknown";
> }
> -
> +EXPORT_SYMBOL_GPL(svc_proc_name);
>
> /**
> * svc_encode_result_payload - mark a range of bytes as a result payload
> --
> 2.41.0
>
--
Chuck Lever
[...]
> > +#ifdef CONFIG_NFSD_V4
> > + if (rqstp->rq_vers == NFS4_VERSION &&
> > + rqstp->rq_proc == NFSPROC4_COMPOUND) {
> > + /* NFSv4 compund */
> > + struct nfsd4_compoundargs *args = rqstp->rq_argp;
> > + int j;
> > +
> > + opcnt = args->opcnt;
> > + for (j = 0; j < opcnt; j++) {
> > + struct nfsd4_op *op = &args->ops[j];
> > +
> > + rqstp_info.opnum[j] = op->opnum;
> > + }
> > + }
> > +#endif /* CONFIG_NFSD_V4 */
> > +
> > + /*
> > + * Acquire rq_status_counter before reporting the rqst
> > + * fields to the user.
> > + */
> > + if (smp_load_acquire(&rqstp->rq_status_counter) != status_counter)
> > + continue;
> > +
> > + seq_printf(m,
> > + "%04u %04ld NFSv%d %s %016lld",
> > + be32_to_cpu(rqstp_info.rq_xid),
>
> It's proper to display XIDs as 8-hexit hexadecimal values, as you
> did before. "0x%08x" is the correct format, as that matches the
> XID display format used by Wireshark and our tracepoints.
ops, I misunderstood your previous comments. I will address them in v7 if there
are no other comments.
Regards,
Lorenzo
>
>
> > + rqstp_info.rq_flags,
>
> I didn't mean for you to change the flags format to decimal. I was
> trying to point out that the content of this field will need to be
> displayed symbolically if we care about an easy user experience.
>
> Let's stick with hex here. A clever user can read the bits directly
> from that. All others should have a tool that parses this field and
> prints the list of bits in it symbolically.
>
>
> > + rqstp_info.rq_vers,
> > + rqstp_info.pc_name,
> > + ktime_to_us(rqstp_info.rq_stime));
> > + seq_printf(m, " %s",
> > + __svc_print_addr(&rqstp_info.saddr, buf,
> > + sizeof(buf), false));
> > + seq_printf(m, " %s",
> > + __svc_print_addr(&rqstp_info.daddr, buf,
> > + sizeof(buf), false));
> > + if (opcnt) {
> > + int j;
> > +
> > + seq_puts(m, " ");
> > + for (j = 0; j < opcnt; j++)
> > + seq_printf(m, "%s%s",
> > + nfsd4_op_name(rqstp_info.opnum[j]),
> > + j == opcnt - 1 ? "" : ":");
> > + } else {
> > + seq_puts(m, " -");
> > + }
>
> This looks correct to me.
>
> I'm leaning towards moving this to a netlink API that can be
> extended over time to handle other stats and also act as an NFSD
> control plane, similar to other network subsystems.
>
> Any comments, complaints or rotten fruit from anyone?
>
>
> > + seq_puts(m, "\n");
> > + }
> > + }
> > +
> > + rcu_read_unlock();
> > +
> > + return 0;
> > +}
> > +
> > +/**
> > + * nfsd_rpc_status_open - open routine for nfsd_rpc_status handler
> > + * @inode: entry inode pointer.
> > + * @file: entry file pointer.
> > + *
> > + * nfsd_rpc_status_open is the open routine for nfsd_rpc_status procfs handler.
> > + * nfsd_rpc_status dumps pending RPC requests info queued into nfs server.
> > + */
> > +int nfsd_rpc_status_open(struct inode *inode, struct file *file)
> > +{
> > + int ret = nfsd_stats_open(inode);
> > +
> > + if (ret)
> > + return ret;
> > +
> > + return single_open(file, nfsd_rpc_status_show, inode->i_private);
> > +}
> > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> > index 7838b37bcfa8..b49c0470b4fe 100644
> > --- a/include/linux/sunrpc/svc.h
> > +++ b/include/linux/sunrpc/svc.h
> > @@ -251,6 +251,7 @@ struct svc_rqst {
> > * net namespace
> > */
> > void ** rq_lease_breaker; /* The v4 client breaking a lease */
> > + unsigned int rq_status_counter; /* RPC processing counter */
> > };
> >
> > /* bits for rq_flags */
> > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> > index af692bff44ab..83bee19df104 100644
> > --- a/net/sunrpc/svc.c
> > +++ b/net/sunrpc/svc.c
> > @@ -1656,7 +1656,7 @@ const char *svc_proc_name(const struct svc_rqst *rqstp)
> > return rqstp->rq_procinfo->pc_name;
> > return "unknown";
> > }
> > -
> > +EXPORT_SYMBOL_GPL(svc_proc_name);
> >
> > /**
> > * svc_encode_result_payload - mark a range of bytes as a result payload
> > --
> > 2.41.0
> >
>
> --
> Chuck Lever
On Fri, 11 Aug 2023, Chuck Lever wrote:
> On Thu, Aug 10, 2023 at 10:39:21AM +0200, Lorenzo Bianconi wrote:
> > Introduce rpc_status entry in nfsd debug filesystem in order to dump
> > pending RPC requests debugging information.
> >
> > Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=366
> > Reviewed-by: NeilBrown <[email protected]>
> > Reviewed-by: Jeff Layton <[email protected]>
> > Signed-off-by: Lorenzo Bianconi <[email protected]>
> > ---
> > fs/nfsd/nfs4proc.c | 4 +-
> > fs/nfsd/nfsctl.c | 9 +++
> > fs/nfsd/nfsd.h | 7 ++
> > fs/nfsd/nfssvc.c | 140 +++++++++++++++++++++++++++++++++++++
> > include/linux/sunrpc/svc.h | 1 +
> > net/sunrpc/svc.c | 2 +-
> > 6 files changed, 159 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > index aa4f21f92deb..ff5a1dddc0ed 100644
> > --- a/fs/nfsd/nfs4proc.c
> > +++ b/fs/nfsd/nfs4proc.c
> > @@ -2496,8 +2496,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
> >
> > static const struct nfsd4_operation nfsd4_ops[];
> >
> > -static const char *nfsd4_op_name(unsigned opnum);
> > -
> > /*
> > * Enforce NFSv4.1 COMPOUND ordering rules:
> > *
> > @@ -3627,7 +3625,7 @@ void warn_on_nonidempotent_op(struct nfsd4_op *op)
> > }
> > }
> >
> > -static const char *nfsd4_op_name(unsigned opnum)
> > +const char *nfsd4_op_name(unsigned int opnum)
> > {
> > if (opnum < ARRAY_SIZE(nfsd4_ops))
> > return nfsd4_ops[opnum].op_name;
> > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> > index dad88bff3b9e..83eb5c6d894e 100644
> > --- a/fs/nfsd/nfsctl.c
> > +++ b/fs/nfsd/nfsctl.c
> > @@ -47,6 +47,7 @@ enum {
> > NFSD_MaxBlkSize,
> > NFSD_MaxConnections,
> > NFSD_Filecache,
> > + NFSD_Rpc_Status,
> > /*
> > * The below MUST come last. Otherwise we leave a hole in nfsd_files[]
> > * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
> > @@ -195,6 +196,13 @@ static inline struct net *netns(struct file *file)
> > return file_inode(file)->i_sb->s_fs_info;
> > }
> >
> > +static const struct file_operations nfsd_rpc_status_operations = {
> > + .open = nfsd_rpc_status_open,
> > + .read = seq_read,
> > + .llseek = seq_lseek,
> > + .release = nfsd_stats_release,
> > +};
> > +
> > /*
> > * write_unlock_ip - Release all locks used by a client
> > *
> > @@ -1394,6 +1402,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
> > [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
> > [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
> > [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO},
> > + [NFSD_Rpc_Status] = {"rpc_status", &nfsd_rpc_status_operations, S_IRUGO},
> > #ifdef CONFIG_NFSD_V4
> > [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
> > [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
> > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> > index 55b9d85ed71b..3e8a47b93fd4 100644
> > --- a/fs/nfsd/nfsd.h
> > +++ b/fs/nfsd/nfsd.h
> > @@ -94,6 +94,7 @@ int nfsd_get_nrthreads(int n, int *, struct net *);
> > int nfsd_set_nrthreads(int n, int *, struct net *);
> > int nfsd_pool_stats_open(struct inode *, struct file *);
> > int nfsd_stats_release(struct inode *, struct file *);
> > +int nfsd_rpc_status_open(struct inode *inode, struct file *file);
> > void nfsd_shutdown_threads(struct net *net);
> >
> > static inline void nfsd_put(struct net *net)
> > @@ -511,12 +512,18 @@ extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
> >
> > extern void nfsd4_init_leases_net(struct nfsd_net *nn);
> >
> > +const char *nfsd4_op_name(unsigned int opnum);
> > #else /* CONFIG_NFSD_V4 */
> > static inline int nfsd4_is_junction(struct dentry *dentry)
> > {
> > return 0;
> > }
> >
> > +static inline const char *nfsd4_op_name(unsigned int opnum)
> > +{
> > + return "unknown_operation";
> > +}
> > +
> > static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { };
> >
> > #define register_cld_notifier() 0
> > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > index 460219030ce1..237be14d3a11 100644
> > --- a/fs/nfsd/nfssvc.c
> > +++ b/fs/nfsd/nfssvc.c
> > @@ -998,6 +998,15 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
> > if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
> > goto out_decode_err;
> >
> > + /*
> > + * Release rq_status_counter setting it to an odd value after the rpc
> > + * request has been properly parsed. rq_status_counter is used to
> > + * notify the consumers if the rqstp fields are stable
> > + * (rq_status_counter is odd) or not meaningful (rq_status_counter
> > + * is even).
> > + */
> > + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1);
> > +
> > rp = NULL;
> > switch (nfsd_cache_lookup(rqstp, &rp)) {
> > case RC_DOIT:
> > @@ -1015,6 +1024,12 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
> > if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
> > goto out_encode_err;
> >
> > + /*
> > + * Release rq_status_counter setting it to an even value after the rpc
> > + * request has been properly processed.
> > + */
> > + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
> > +
> > nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
> > out_cached_reply:
> > return 1;
> > @@ -1101,3 +1116,128 @@ int nfsd_stats_release(struct inode *inode, struct file *file)
> > mutex_unlock(&nfsd_mutex);
> > return ret;
> > }
> > +
> > +static int nfsd_rpc_status_show(struct seq_file *m, void *v)
> > +{
> > + struct inode *inode = file_inode(m->file);
> > + struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id);
> > + int i;
> > +
> > + seq_puts(m, "# XID FLAGS VERS PROC TIMESTAMP SADDR SPORT DADDR DPORT COMPOUND_OPS\n");
> > +
> > + rcu_read_lock();
> > +
> > + for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) {
> > + struct svc_rqst *rqstp;
> > +
> > + list_for_each_entry_rcu(rqstp,
> > + &nn->nfsd_serv->sv_pools[i].sp_all_threads,
> > + rq_all) {
> > + struct {
> > + struct sockaddr daddr;
> > + struct sockaddr saddr;
> > + unsigned long rq_flags;
> > + const char *pc_name;
> > + ktime_t rq_stime;
> > + __be32 rq_xid;
> > + u32 rq_vers;
> > + /* NFSv4 compund */
> > + u32 opnum[NFSD_MAX_OPS_PER_COMPOUND];
> > + } rqstp_info;
> > + unsigned int status_counter;
> > + char buf[RPC_MAX_ADDRBUFLEN];
> > + int opcnt = 0;
> > +
> > + /*
> > + * Acquire rq_status_counter before parsing the rqst
> > + * fields. rq_status_counter is set to an odd value in
> > + * order to notify the consumers the rqstp fields are
> > + * meaningful.
> > + */
> > + status_counter = smp_load_acquire(&rqstp->rq_status_counter);
> > + if (!(status_counter & 1))
> > + continue;
> > +
> > + rqstp_info.rq_xid = rqstp->rq_xid;
> > + rqstp_info.rq_flags = rqstp->rq_flags;
> > + rqstp_info.rq_vers = rqstp->rq_vers;
> > + rqstp_info.pc_name = svc_proc_name(rqstp);
> > + rqstp_info.rq_stime = rqstp->rq_stime;
> > + memcpy(&rqstp_info.daddr, svc_daddr(rqstp),
> > + sizeof(struct sockaddr));
> > + memcpy(&rqstp_info.saddr, svc_addr(rqstp),
> > + sizeof(struct sockaddr));
> > +
> > +#ifdef CONFIG_NFSD_V4
> > + if (rqstp->rq_vers == NFS4_VERSION &&
> > + rqstp->rq_proc == NFSPROC4_COMPOUND) {
> > + /* NFSv4 compund */
> > + struct nfsd4_compoundargs *args = rqstp->rq_argp;
> > + int j;
> > +
> > + opcnt = args->opcnt;
> > + for (j = 0; j < opcnt; j++) {
> > + struct nfsd4_op *op = &args->ops[j];
> > +
> > + rqstp_info.opnum[j] = op->opnum;
> > + }
> > + }
> > +#endif /* CONFIG_NFSD_V4 */
> > +
> > + /*
> > + * Acquire rq_status_counter before reporting the rqst
> > + * fields to the user.
> > + */
> > + if (smp_load_acquire(&rqstp->rq_status_counter) != status_counter)
> > + continue;
> > +
> > + seq_printf(m,
> > + "%04u %04ld NFSv%d %s %016lld",
> > + be32_to_cpu(rqstp_info.rq_xid),
>
> It's proper to display XIDs as 8-hexit hexadecimal values, as you
> did before. "0x%08x" is the correct format, as that matches the
> XID display format used by Wireshark and our tracepoints.
>
>
> > + rqstp_info.rq_flags,
>
> I didn't mean for you to change the flags format to decimal. I was
> trying to point out that the content of this field will need to be
> displayed symbolically if we care about an easy user experience.
>
> Let's stick with hex here. A clever user can read the bits directly
> from that. All others should have a tool that parses this field and
> prints the list of bits in it symbolically.
>
>
> > + rqstp_info.rq_vers,
> > + rqstp_info.pc_name,
> > + ktime_to_us(rqstp_info.rq_stime));
> > + seq_printf(m, " %s",
> > + __svc_print_addr(&rqstp_info.saddr, buf,
> > + sizeof(buf), false));
> > + seq_printf(m, " %s",
> > + __svc_print_addr(&rqstp_info.daddr, buf,
> > + sizeof(buf), false));
> > + if (opcnt) {
> > + int j;
> > +
> > + seq_puts(m, " ");
> > + for (j = 0; j < opcnt; j++)
> > + seq_printf(m, "%s%s",
> > + nfsd4_op_name(rqstp_info.opnum[j]),
> > + j == opcnt - 1 ? "" : ":");
> > + } else {
> > + seq_puts(m, " -");
> > + }
>
> This looks correct to me.
>
> I'm leaning towards moving this to a netlink API that can be
> extended over time to handle other stats and also act as an NFSD
> control plane, similar to other network subsystems.
>
> Any comments, complaints or rotten fruit from anyone?
I think netlink is the best way forward. 'cat' is nice, but not
necessary. We have an established path for distributing tools for
working with nfsd so we get easily get a suitable tool into the hands of
our users.
The only fruit I have relates to the name "rpc_status", and it probably
over-ripe rather than rotten :-)
In the context of RPC, "status" means the success/failure result of a
request. That is not what this file provides. It is a list of active
requests. So maybe "active_rpc".
One advantage of netlink is that the API is based on numbers, not names!
NeilBrown
>
>
> > + seq_puts(m, "\n");
> > + }
> > + }
> > +
> > + rcu_read_unlock();
> > +
> > + return 0;
> > +}
> > +
> > +/**
> > + * nfsd_rpc_status_open - open routine for nfsd_rpc_status handler
> > + * @inode: entry inode pointer.
> > + * @file: entry file pointer.
> > + *
> > + * nfsd_rpc_status_open is the open routine for nfsd_rpc_status procfs handler.
> > + * nfsd_rpc_status dumps pending RPC requests info queued into nfs server.
> > + */
> > +int nfsd_rpc_status_open(struct inode *inode, struct file *file)
> > +{
> > + int ret = nfsd_stats_open(inode);
> > +
> > + if (ret)
> > + return ret;
> > +
> > + return single_open(file, nfsd_rpc_status_show, inode->i_private);
> > +}
> > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> > index 7838b37bcfa8..b49c0470b4fe 100644
> > --- a/include/linux/sunrpc/svc.h
> > +++ b/include/linux/sunrpc/svc.h
> > @@ -251,6 +251,7 @@ struct svc_rqst {
> > * net namespace
> > */
> > void ** rq_lease_breaker; /* The v4 client breaking a lease */
> > + unsigned int rq_status_counter; /* RPC processing counter */
> > };
> >
> > /* bits for rq_flags */
> > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> > index af692bff44ab..83bee19df104 100644
> > --- a/net/sunrpc/svc.c
> > +++ b/net/sunrpc/svc.c
> > @@ -1656,7 +1656,7 @@ const char *svc_proc_name(const struct svc_rqst *rqstp)
> > return rqstp->rq_procinfo->pc_name;
> > return "unknown";
> > }
> > -
> > +EXPORT_SYMBOL_GPL(svc_proc_name);
> >
> > /**
> > * svc_encode_result_payload - mark a range of bytes as a result payload
> > --
> > 2.41.0
> >
>
> --
> Chuck Lever
>
> On Aug 10, 2023, at 4:24 PM, NeilBrown <[email protected]> wrote:
>
> On Fri, 11 Aug 2023, Chuck Lever wrote:
>> On Thu, Aug 10, 2023 at 10:39:21AM +0200, Lorenzo Bianconi wrote:
>>> Introduce rpc_status entry in nfsd debug filesystem in order to dump
>>> pending RPC requests debugging information.
>>>
>>> Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=366
>>> Reviewed-by: NeilBrown <[email protected]>
>>> Reviewed-by: Jeff Layton <[email protected]>
>>> Signed-off-by: Lorenzo Bianconi <[email protected]>
>>> ---
>>> fs/nfsd/nfs4proc.c | 4 +-
>>> fs/nfsd/nfsctl.c | 9 +++
>>> fs/nfsd/nfsd.h | 7 ++
>>> fs/nfsd/nfssvc.c | 140 +++++++++++++++++++++++++++++++++++++
>>> include/linux/sunrpc/svc.h | 1 +
>>> net/sunrpc/svc.c | 2 +-
>>> 6 files changed, 159 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>>> index aa4f21f92deb..ff5a1dddc0ed 100644
>>> --- a/fs/nfsd/nfs4proc.c
>>> +++ b/fs/nfsd/nfs4proc.c
>>> @@ -2496,8 +2496,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
>>>
>>> static const struct nfsd4_operation nfsd4_ops[];
>>>
>>> -static const char *nfsd4_op_name(unsigned opnum);
>>> -
>>> /*
>>> * Enforce NFSv4.1 COMPOUND ordering rules:
>>> *
>>> @@ -3627,7 +3625,7 @@ void warn_on_nonidempotent_op(struct nfsd4_op *op)
>>> }
>>> }
>>>
>>> -static const char *nfsd4_op_name(unsigned opnum)
>>> +const char *nfsd4_op_name(unsigned int opnum)
>>> {
>>> if (opnum < ARRAY_SIZE(nfsd4_ops))
>>> return nfsd4_ops[opnum].op_name;
>>> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
>>> index dad88bff3b9e..83eb5c6d894e 100644
>>> --- a/fs/nfsd/nfsctl.c
>>> +++ b/fs/nfsd/nfsctl.c
>>> @@ -47,6 +47,7 @@ enum {
>>> NFSD_MaxBlkSize,
>>> NFSD_MaxConnections,
>>> NFSD_Filecache,
>>> + NFSD_Rpc_Status,
>>> /*
>>> * The below MUST come last. Otherwise we leave a hole in nfsd_files[]
>>> * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
>>> @@ -195,6 +196,13 @@ static inline struct net *netns(struct file *file)
>>> return file_inode(file)->i_sb->s_fs_info;
>>> }
>>>
>>> +static const struct file_operations nfsd_rpc_status_operations = {
>>> + .open = nfsd_rpc_status_open,
>>> + .read = seq_read,
>>> + .llseek = seq_lseek,
>>> + .release = nfsd_stats_release,
>>> +};
>>> +
>>> /*
>>> * write_unlock_ip - Release all locks used by a client
>>> *
>>> @@ -1394,6 +1402,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
>>> [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
>>> [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
>>> [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO},
>>> + [NFSD_Rpc_Status] = {"rpc_status", &nfsd_rpc_status_operations, S_IRUGO},
>>> #ifdef CONFIG_NFSD_V4
>>> [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
>>> [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
>>> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
>>> index 55b9d85ed71b..3e8a47b93fd4 100644
>>> --- a/fs/nfsd/nfsd.h
>>> +++ b/fs/nfsd/nfsd.h
>>> @@ -94,6 +94,7 @@ int nfsd_get_nrthreads(int n, int *, struct net *);
>>> int nfsd_set_nrthreads(int n, int *, struct net *);
>>> int nfsd_pool_stats_open(struct inode *, struct file *);
>>> int nfsd_stats_release(struct inode *, struct file *);
>>> +int nfsd_rpc_status_open(struct inode *inode, struct file *file);
>>> void nfsd_shutdown_threads(struct net *net);
>>>
>>> static inline void nfsd_put(struct net *net)
>>> @@ -511,12 +512,18 @@ extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
>>>
>>> extern void nfsd4_init_leases_net(struct nfsd_net *nn);
>>>
>>> +const char *nfsd4_op_name(unsigned int opnum);
>>> #else /* CONFIG_NFSD_V4 */
>>> static inline int nfsd4_is_junction(struct dentry *dentry)
>>> {
>>> return 0;
>>> }
>>>
>>> +static inline const char *nfsd4_op_name(unsigned int opnum)
>>> +{
>>> + return "unknown_operation";
>>> +}
>>> +
>>> static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { };
>>>
>>> #define register_cld_notifier() 0
>>> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
>>> index 460219030ce1..237be14d3a11 100644
>>> --- a/fs/nfsd/nfssvc.c
>>> +++ b/fs/nfsd/nfssvc.c
>>> @@ -998,6 +998,15 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
>>> if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
>>> goto out_decode_err;
>>>
>>> + /*
>>> + * Release rq_status_counter setting it to an odd value after the rpc
>>> + * request has been properly parsed. rq_status_counter is used to
>>> + * notify the consumers if the rqstp fields are stable
>>> + * (rq_status_counter is odd) or not meaningful (rq_status_counter
>>> + * is even).
>>> + */
>>> + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1);
>>> +
>>> rp = NULL;
>>> switch (nfsd_cache_lookup(rqstp, &rp)) {
>>> case RC_DOIT:
>>> @@ -1015,6 +1024,12 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
>>> if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
>>> goto out_encode_err;
>>>
>>> + /*
>>> + * Release rq_status_counter setting it to an even value after the rpc
>>> + * request has been properly processed.
>>> + */
>>> + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
>>> +
>>> nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
>>> out_cached_reply:
>>> return 1;
>>> @@ -1101,3 +1116,128 @@ int nfsd_stats_release(struct inode *inode, struct file *file)
>>> mutex_unlock(&nfsd_mutex);
>>> return ret;
>>> }
>>> +
>>> +static int nfsd_rpc_status_show(struct seq_file *m, void *v)
>>> +{
>>> + struct inode *inode = file_inode(m->file);
>>> + struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id);
>>> + int i;
>>> +
>>> + seq_puts(m, "# XID FLAGS VERS PROC TIMESTAMP SADDR SPORT DADDR DPORT COMPOUND_OPS\n");
>>> +
>>> + rcu_read_lock();
>>> +
>>> + for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) {
>>> + struct svc_rqst *rqstp;
>>> +
>>> + list_for_each_entry_rcu(rqstp,
>>> + &nn->nfsd_serv->sv_pools[i].sp_all_threads,
>>> + rq_all) {
>>> + struct {
>>> + struct sockaddr daddr;
>>> + struct sockaddr saddr;
>>> + unsigned long rq_flags;
>>> + const char *pc_name;
>>> + ktime_t rq_stime;
>>> + __be32 rq_xid;
>>> + u32 rq_vers;
>>> + /* NFSv4 compund */
>>> + u32 opnum[NFSD_MAX_OPS_PER_COMPOUND];
>>> + } rqstp_info;
>>> + unsigned int status_counter;
>>> + char buf[RPC_MAX_ADDRBUFLEN];
>>> + int opcnt = 0;
>>> +
>>> + /*
>>> + * Acquire rq_status_counter before parsing the rqst
>>> + * fields. rq_status_counter is set to an odd value in
>>> + * order to notify the consumers the rqstp fields are
>>> + * meaningful.
>>> + */
>>> + status_counter = smp_load_acquire(&rqstp->rq_status_counter);
>>> + if (!(status_counter & 1))
>>> + continue;
>>> +
>>> + rqstp_info.rq_xid = rqstp->rq_xid;
>>> + rqstp_info.rq_flags = rqstp->rq_flags;
>>> + rqstp_info.rq_vers = rqstp->rq_vers;
>>> + rqstp_info.pc_name = svc_proc_name(rqstp);
>>> + rqstp_info.rq_stime = rqstp->rq_stime;
>>> + memcpy(&rqstp_info.daddr, svc_daddr(rqstp),
>>> + sizeof(struct sockaddr));
>>> + memcpy(&rqstp_info.saddr, svc_addr(rqstp),
>>> + sizeof(struct sockaddr));
>>> +
>>> +#ifdef CONFIG_NFSD_V4
>>> + if (rqstp->rq_vers == NFS4_VERSION &&
>>> + rqstp->rq_proc == NFSPROC4_COMPOUND) {
>>> + /* NFSv4 compund */
>>> + struct nfsd4_compoundargs *args = rqstp->rq_argp;
>>> + int j;
>>> +
>>> + opcnt = args->opcnt;
>>> + for (j = 0; j < opcnt; j++) {
>>> + struct nfsd4_op *op = &args->ops[j];
>>> +
>>> + rqstp_info.opnum[j] = op->opnum;
>>> + }
>>> + }
>>> +#endif /* CONFIG_NFSD_V4 */
>>> +
>>> + /*
>>> + * Acquire rq_status_counter before reporting the rqst
>>> + * fields to the user.
>>> + */
>>> + if (smp_load_acquire(&rqstp->rq_status_counter) != status_counter)
>>> + continue;
>>> +
>>> + seq_printf(m,
>>> + "%04u %04ld NFSv%d %s %016lld",
>>> + be32_to_cpu(rqstp_info.rq_xid),
>>
>> It's proper to display XIDs as 8-hexit hexadecimal values, as you
>> did before. "0x%08x" is the correct format, as that matches the
>> XID display format used by Wireshark and our tracepoints.
>>
>>
>>> + rqstp_info.rq_flags,
>>
>> I didn't mean for you to change the flags format to decimal. I was
>> trying to point out that the content of this field will need to be
>> displayed symbolically if we care about an easy user experience.
>>
>> Let's stick with hex here. A clever user can read the bits directly
>> from that. All others should have a tool that parses this field and
>> prints the list of bits in it symbolically.
>>
>>
>>> + rqstp_info.rq_vers,
>>> + rqstp_info.pc_name,
>>> + ktime_to_us(rqstp_info.rq_stime));
>>> + seq_printf(m, " %s",
>>> + __svc_print_addr(&rqstp_info.saddr, buf,
>>> + sizeof(buf), false));
>>> + seq_printf(m, " %s",
>>> + __svc_print_addr(&rqstp_info.daddr, buf,
>>> + sizeof(buf), false));
>>> + if (opcnt) {
>>> + int j;
>>> +
>>> + seq_puts(m, " ");
>>> + for (j = 0; j < opcnt; j++)
>>> + seq_printf(m, "%s%s",
>>> + nfsd4_op_name(rqstp_info.opnum[j]),
>>> + j == opcnt - 1 ? "" : ":");
>>> + } else {
>>> + seq_puts(m, " -");
>>> + }
>>
>> This looks correct to me.
>>
>> I'm leaning towards moving this to a netlink API that can be
>> extended over time to handle other stats and also act as an NFSD
>> control plane, similar to other network subsystems.
>>
>> Any comments, complaints or rotten fruit from anyone?
>
> I think netlink is the best way forward. 'cat' is nice, but not
> necessary. We have an established path for distributing tools for
> working with nfsd so we get easily get a suitable tool into the hands of
> our users.
>
> The only fruit I have relates to the name "rpc_status", and it probably
> over-ripe rather than rotten :-)
> In the context of RPC, "status" means the success/failure result of a
> request. That is not what this file provides. It is a list of active
> requests. So maybe "active_rpc".
> One advantage of netlink is that the API is based on numbers, not names!
There won't be a file name with netlink, so right, it won't be an
issue.
Lorenzo, can you proceed with netlink instead of /proc/fs/nfsd?
Maybe start with a yaml spec file?
> NeilBrown
>
>
>>
>>
>>> + seq_puts(m, "\n");
>>> + }
>>> + }
>>> +
>>> + rcu_read_unlock();
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +/**
>>> + * nfsd_rpc_status_open - open routine for nfsd_rpc_status handler
>>> + * @inode: entry inode pointer.
>>> + * @file: entry file pointer.
>>> + *
>>> + * nfsd_rpc_status_open is the open routine for nfsd_rpc_status procfs handler.
>>> + * nfsd_rpc_status dumps pending RPC requests info queued into nfs server.
>>> + */
>>> +int nfsd_rpc_status_open(struct inode *inode, struct file *file)
>>> +{
>>> + int ret = nfsd_stats_open(inode);
>>> +
>>> + if (ret)
>>> + return ret;
>>> +
>>> + return single_open(file, nfsd_rpc_status_show, inode->i_private);
>>> +}
>>> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
>>> index 7838b37bcfa8..b49c0470b4fe 100644
>>> --- a/include/linux/sunrpc/svc.h
>>> +++ b/include/linux/sunrpc/svc.h
>>> @@ -251,6 +251,7 @@ struct svc_rqst {
>>> * net namespace
>>> */
>>> void ** rq_lease_breaker; /* The v4 client breaking a lease */
>>> + unsigned int rq_status_counter; /* RPC processing counter */
>>> };
>>>
>>> /* bits for rq_flags */
>>> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
>>> index af692bff44ab..83bee19df104 100644
>>> --- a/net/sunrpc/svc.c
>>> +++ b/net/sunrpc/svc.c
>>> @@ -1656,7 +1656,7 @@ const char *svc_proc_name(const struct svc_rqst *rqstp)
>>> return rqstp->rq_procinfo->pc_name;
>>> return "unknown";
>>> }
>>> -
>>> +EXPORT_SYMBOL_GPL(svc_proc_name);
>>>
>>> /**
>>> * svc_encode_result_payload - mark a range of bytes as a result payload
>>> --
>>> 2.41.0
>>>
>>
>> --
>> Chuck Lever
>>
>
--
Chuck Lever
>
>
> > On Aug 10, 2023, at 4:24 PM, NeilBrown <[email protected]> wrote:
> >
> > On Fri, 11 Aug 2023, Chuck Lever wrote:
> >> On Thu, Aug 10, 2023 at 10:39:21AM +0200, Lorenzo Bianconi wrote:
> >>> Introduce rpc_status entry in nfsd debug filesystem in order to dump
> >>> pending RPC requests debugging information.
> >>>
> >>> Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=366
> >>> Reviewed-by: NeilBrown <[email protected]>
> >>> Reviewed-by: Jeff Layton <[email protected]>
> >>> Signed-off-by: Lorenzo Bianconi <[email protected]>
> >>> ---
> >>> fs/nfsd/nfs4proc.c | 4 +-
> >>> fs/nfsd/nfsctl.c | 9 +++
> >>> fs/nfsd/nfsd.h | 7 ++
> >>> fs/nfsd/nfssvc.c | 140 +++++++++++++++++++++++++++++++++++++
> >>> include/linux/sunrpc/svc.h | 1 +
> >>> net/sunrpc/svc.c | 2 +-
> >>> 6 files changed, 159 insertions(+), 4 deletions(-)
> >>>
> >>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> >>> index aa4f21f92deb..ff5a1dddc0ed 100644
> >>> --- a/fs/nfsd/nfs4proc.c
> >>> +++ b/fs/nfsd/nfs4proc.c
> >>> @@ -2496,8 +2496,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
> >>>
> >>> static const struct nfsd4_operation nfsd4_ops[];
> >>>
> >>> -static const char *nfsd4_op_name(unsigned opnum);
> >>> -
> >>> /*
> >>> * Enforce NFSv4.1 COMPOUND ordering rules:
> >>> *
> >>> @@ -3627,7 +3625,7 @@ void warn_on_nonidempotent_op(struct nfsd4_op *op)
> >>> }
> >>> }
> >>>
> >>> -static const char *nfsd4_op_name(unsigned opnum)
> >>> +const char *nfsd4_op_name(unsigned int opnum)
> >>> {
> >>> if (opnum < ARRAY_SIZE(nfsd4_ops))
> >>> return nfsd4_ops[opnum].op_name;
> >>> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> >>> index dad88bff3b9e..83eb5c6d894e 100644
> >>> --- a/fs/nfsd/nfsctl.c
> >>> +++ b/fs/nfsd/nfsctl.c
> >>> @@ -47,6 +47,7 @@ enum {
> >>> NFSD_MaxBlkSize,
> >>> NFSD_MaxConnections,
> >>> NFSD_Filecache,
> >>> + NFSD_Rpc_Status,
> >>> /*
> >>> * The below MUST come last. Otherwise we leave a hole in nfsd_files[]
> >>> * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
> >>> @@ -195,6 +196,13 @@ static inline struct net *netns(struct file *file)
> >>> return file_inode(file)->i_sb->s_fs_info;
> >>> }
> >>>
> >>> +static const struct file_operations nfsd_rpc_status_operations = {
> >>> + .open = nfsd_rpc_status_open,
> >>> + .read = seq_read,
> >>> + .llseek = seq_lseek,
> >>> + .release = nfsd_stats_release,
> >>> +};
> >>> +
> >>> /*
> >>> * write_unlock_ip - Release all locks used by a client
> >>> *
> >>> @@ -1394,6 +1402,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
> >>> [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
> >>> [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
> >>> [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO},
> >>> + [NFSD_Rpc_Status] = {"rpc_status", &nfsd_rpc_status_operations, S_IRUGO},
> >>> #ifdef CONFIG_NFSD_V4
> >>> [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
> >>> [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
> >>> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> >>> index 55b9d85ed71b..3e8a47b93fd4 100644
> >>> --- a/fs/nfsd/nfsd.h
> >>> +++ b/fs/nfsd/nfsd.h
> >>> @@ -94,6 +94,7 @@ int nfsd_get_nrthreads(int n, int *, struct net *);
> >>> int nfsd_set_nrthreads(int n, int *, struct net *);
> >>> int nfsd_pool_stats_open(struct inode *, struct file *);
> >>> int nfsd_stats_release(struct inode *, struct file *);
> >>> +int nfsd_rpc_status_open(struct inode *inode, struct file *file);
> >>> void nfsd_shutdown_threads(struct net *net);
> >>>
> >>> static inline void nfsd_put(struct net *net)
> >>> @@ -511,12 +512,18 @@ extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
> >>>
> >>> extern void nfsd4_init_leases_net(struct nfsd_net *nn);
> >>>
> >>> +const char *nfsd4_op_name(unsigned int opnum);
> >>> #else /* CONFIG_NFSD_V4 */
> >>> static inline int nfsd4_is_junction(struct dentry *dentry)
> >>> {
> >>> return 0;
> >>> }
> >>>
> >>> +static inline const char *nfsd4_op_name(unsigned int opnum)
> >>> +{
> >>> + return "unknown_operation";
> >>> +}
> >>> +
> >>> static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { };
> >>>
> >>> #define register_cld_notifier() 0
> >>> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> >>> index 460219030ce1..237be14d3a11 100644
> >>> --- a/fs/nfsd/nfssvc.c
> >>> +++ b/fs/nfsd/nfssvc.c
> >>> @@ -998,6 +998,15 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
> >>> if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
> >>> goto out_decode_err;
> >>>
> >>> + /*
> >>> + * Release rq_status_counter setting it to an odd value after the rpc
> >>> + * request has been properly parsed. rq_status_counter is used to
> >>> + * notify the consumers if the rqstp fields are stable
> >>> + * (rq_status_counter is odd) or not meaningful (rq_status_counter
> >>> + * is even).
> >>> + */
> >>> + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1);
> >>> +
> >>> rp = NULL;
> >>> switch (nfsd_cache_lookup(rqstp, &rp)) {
> >>> case RC_DOIT:
> >>> @@ -1015,6 +1024,12 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
> >>> if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
> >>> goto out_encode_err;
> >>>
> >>> + /*
> >>> + * Release rq_status_counter setting it to an even value after the rpc
> >>> + * request has been properly processed.
> >>> + */
> >>> + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
> >>> +
> >>> nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
> >>> out_cached_reply:
> >>> return 1;
> >>> @@ -1101,3 +1116,128 @@ int nfsd_stats_release(struct inode *inode, struct file *file)
> >>> mutex_unlock(&nfsd_mutex);
> >>> return ret;
> >>> }
> >>> +
> >>> +static int nfsd_rpc_status_show(struct seq_file *m, void *v)
> >>> +{
> >>> + struct inode *inode = file_inode(m->file);
> >>> + struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id);
> >>> + int i;
> >>> +
> >>> + seq_puts(m, "# XID FLAGS VERS PROC TIMESTAMP SADDR SPORT DADDR DPORT COMPOUND_OPS\n");
> >>> +
> >>> + rcu_read_lock();
> >>> +
> >>> + for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) {
> >>> + struct svc_rqst *rqstp;
> >>> +
> >>> + list_for_each_entry_rcu(rqstp,
> >>> + &nn->nfsd_serv->sv_pools[i].sp_all_threads,
> >>> + rq_all) {
> >>> + struct {
> >>> + struct sockaddr daddr;
> >>> + struct sockaddr saddr;
> >>> + unsigned long rq_flags;
> >>> + const char *pc_name;
> >>> + ktime_t rq_stime;
> >>> + __be32 rq_xid;
> >>> + u32 rq_vers;
> >>> + /* NFSv4 compund */
> >>> + u32 opnum[NFSD_MAX_OPS_PER_COMPOUND];
> >>> + } rqstp_info;
> >>> + unsigned int status_counter;
> >>> + char buf[RPC_MAX_ADDRBUFLEN];
> >>> + int opcnt = 0;
> >>> +
> >>> + /*
> >>> + * Acquire rq_status_counter before parsing the rqst
> >>> + * fields. rq_status_counter is set to an odd value in
> >>> + * order to notify the consumers the rqstp fields are
> >>> + * meaningful.
> >>> + */
> >>> + status_counter = smp_load_acquire(&rqstp->rq_status_counter);
> >>> + if (!(status_counter & 1))
> >>> + continue;
> >>> +
> >>> + rqstp_info.rq_xid = rqstp->rq_xid;
> >>> + rqstp_info.rq_flags = rqstp->rq_flags;
> >>> + rqstp_info.rq_vers = rqstp->rq_vers;
> >>> + rqstp_info.pc_name = svc_proc_name(rqstp);
> >>> + rqstp_info.rq_stime = rqstp->rq_stime;
> >>> + memcpy(&rqstp_info.daddr, svc_daddr(rqstp),
> >>> + sizeof(struct sockaddr));
> >>> + memcpy(&rqstp_info.saddr, svc_addr(rqstp),
> >>> + sizeof(struct sockaddr));
> >>> +
> >>> +#ifdef CONFIG_NFSD_V4
> >>> + if (rqstp->rq_vers == NFS4_VERSION &&
> >>> + rqstp->rq_proc == NFSPROC4_COMPOUND) {
> >>> + /* NFSv4 compund */
> >>> + struct nfsd4_compoundargs *args = rqstp->rq_argp;
> >>> + int j;
> >>> +
> >>> + opcnt = args->opcnt;
> >>> + for (j = 0; j < opcnt; j++) {
> >>> + struct nfsd4_op *op = &args->ops[j];
> >>> +
> >>> + rqstp_info.opnum[j] = op->opnum;
> >>> + }
> >>> + }
> >>> +#endif /* CONFIG_NFSD_V4 */
> >>> +
> >>> + /*
> >>> + * Acquire rq_status_counter before reporting the rqst
> >>> + * fields to the user.
> >>> + */
> >>> + if (smp_load_acquire(&rqstp->rq_status_counter) != status_counter)
> >>> + continue;
> >>> +
> >>> + seq_printf(m,
> >>> + "%04u %04ld NFSv%d %s %016lld",
> >>> + be32_to_cpu(rqstp_info.rq_xid),
> >>
> >> It's proper to display XIDs as 8-hexit hexadecimal values, as you
> >> did before. "0x%08x" is the correct format, as that matches the
> >> XID display format used by Wireshark and our tracepoints.
> >>
> >>
> >>> + rqstp_info.rq_flags,
> >>
> >> I didn't mean for you to change the flags format to decimal. I was
> >> trying to point out that the content of this field will need to be
> >> displayed symbolically if we care about an easy user experience.
> >>
> >> Let's stick with hex here. A clever user can read the bits directly
> >> from that. All others should have a tool that parses this field and
> >> prints the list of bits in it symbolically.
> >>
> >>
> >>> + rqstp_info.rq_vers,
> >>> + rqstp_info.pc_name,
> >>> + ktime_to_us(rqstp_info.rq_stime));
> >>> + seq_printf(m, " %s",
> >>> + __svc_print_addr(&rqstp_info.saddr, buf,
> >>> + sizeof(buf), false));
> >>> + seq_printf(m, " %s",
> >>> + __svc_print_addr(&rqstp_info.daddr, buf,
> >>> + sizeof(buf), false));
> >>> + if (opcnt) {
> >>> + int j;
> >>> +
> >>> + seq_puts(m, " ");
> >>> + for (j = 0; j < opcnt; j++)
> >>> + seq_printf(m, "%s%s",
> >>> + nfsd4_op_name(rqstp_info.opnum[j]),
> >>> + j == opcnt - 1 ? "" : ":");
> >>> + } else {
> >>> + seq_puts(m, " -");
> >>> + }
> >>
> >> This looks correct to me.
> >>
> >> I'm leaning towards moving this to a netlink API that can be
> >> extended over time to handle other stats and also act as an NFSD
> >> control plane, similar to other network subsystems.
> >>
> >> Any comments, complaints or rotten fruit from anyone?
> >
> > I think netlink is the best way forward. 'cat' is nice, but not
> > necessary. We have an established path for distributing tools for
> > working with nfsd so we get easily get a suitable tool into the hands of
> > our users.
> >
> > The only fruit I have relates to the name "rpc_status", and it probably
> > over-ripe rather than rotten :-)
> > In the context of RPC, "status" means the success/failure result of a
> > request. That is not what this file provides. It is a list of active
> > requests. So maybe "active_rpc".
> > One advantage of netlink is that the API is based on numbers, not names!
>
> There won't be a file name with netlink, so right, it won't be an
> issue.
>
> Lorenzo, can you proceed with netlink instead of /proc/fs/nfsd?
> Maybe start with a yaml spec file?
ack, I will start working on it soon.
Regards,
Lorenzo
>
>
> > NeilBrown
> >
> >
> >>
> >>
> >>> + seq_puts(m, "\n");
> >>> + }
> >>> + }
> >>> +
> >>> + rcu_read_unlock();
> >>> +
> >>> + return 0;
> >>> +}
> >>> +
> >>> +/**
> >>> + * nfsd_rpc_status_open - open routine for nfsd_rpc_status handler
> >>> + * @inode: entry inode pointer.
> >>> + * @file: entry file pointer.
> >>> + *
> >>> + * nfsd_rpc_status_open is the open routine for nfsd_rpc_status procfs handler.
> >>> + * nfsd_rpc_status dumps pending RPC requests info queued into nfs server.
> >>> + */
> >>> +int nfsd_rpc_status_open(struct inode *inode, struct file *file)
> >>> +{
> >>> + int ret = nfsd_stats_open(inode);
> >>> +
> >>> + if (ret)
> >>> + return ret;
> >>> +
> >>> + return single_open(file, nfsd_rpc_status_show, inode->i_private);
> >>> +}
> >>> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> >>> index 7838b37bcfa8..b49c0470b4fe 100644
> >>> --- a/include/linux/sunrpc/svc.h
> >>> +++ b/include/linux/sunrpc/svc.h
> >>> @@ -251,6 +251,7 @@ struct svc_rqst {
> >>> * net namespace
> >>> */
> >>> void ** rq_lease_breaker; /* The v4 client breaking a lease */
> >>> + unsigned int rq_status_counter; /* RPC processing counter */
> >>> };
> >>>
> >>> /* bits for rq_flags */
> >>> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> >>> index af692bff44ab..83bee19df104 100644
> >>> --- a/net/sunrpc/svc.c
> >>> +++ b/net/sunrpc/svc.c
> >>> @@ -1656,7 +1656,7 @@ const char *svc_proc_name(const struct svc_rqst *rqstp)
> >>> return rqstp->rq_procinfo->pc_name;
> >>> return "unknown";
> >>> }
> >>> -
> >>> +EXPORT_SYMBOL_GPL(svc_proc_name);
> >>>
> >>> /**
> >>> * svc_encode_result_payload - mark a range of bytes as a result payload
> >>> --
> >>> 2.41.0
> >>>
> >>
> >> --
> >> Chuck Lever
> >>
> >
>
> --
> Chuck Lever
>
>