This patch series adds:
. XDR encode and decode function for CB_RECALL_ANY op.
. the delegation shrinker that sends the advisory CB_RECALL_ANY
to the clients to release unused delegations.
There is only one nfsd4_callback added for each nfs4_cleint.
Access to it must be serialized via the client flag
NFSD4_CLIENT_CB_RECALL_ANY.
v2:
. modify deleg_reaper to check and send CB_RECALL_ANY to client
only once per 5 secs.
v3:
. modify nfsd4_cb_recall_any_release to use nn->client_lock to
protect cl_recall_any_busy and call put_client_renew_locked
to decrement cl_rpc_users.
v4:
. move changes in nfs4state.c from patch (1/2) to patch(2/2).
. use xdr_stream_encode_u32 and xdr_stream_encode_uint32_array
to encode CB_RECALL_ANY arguments.
. add struct nfsd4_cb_recall_any with embedded nfs4_callback
and params for CB_RECALL_ANY op.
. replace cl_recall_any_busy boolean with client flag
NFSD4_CLIENT_CB_RECALL_ANY
. add tracepoints for CB_RECALL_ANY
---
Dai Ngo (3):
NFSD: add support for sending CB_RECALL_ANY
NFSD: add delegation shrinker to react to low memory condition
NFSD: add CB_RECALL_ANY tracepoints
fs/nfsd/netns.h | 3 ++
fs/nfsd/nfs4callback.c | 62 +++++++++++++++++++++++
fs/nfsd/nfs4state.c | 117 +++++++++++++++++++++++++++++++++++++++++++-
fs/nfsd/state.h | 9 ++++
fs/nfsd/trace.h | 53 ++++++++++++++++++++
fs/nfsd/xdr4.h | 5 ++
fs/nfsd/xdr4cb.h | 6 +++
7 files changed, 254 insertions(+), 1 deletion(-)
Add tracepoints to trace start and end of CB_RECALL_ANY operation.
Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4state.c | 2 ++
fs/nfsd/trace.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 55 insertions(+)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 813cdb67b370..eac7212c9218 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2859,6 +2859,7 @@ static int
nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
struct rpc_task *task)
{
+ trace_nfsd_cb_recall_any_done(cb, task);
switch (task->tk_status) {
case -NFS4ERR_DELAY:
rpc_delay(task, 2 * HZ);
@@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
clp->cl_ra->ra_keep = 0;
clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
BIT(RCA4_TYPE_MASK_WDATA_DLG);
+ trace_nfsd_cb_recall_any(clp->cl_ra);
nfsd4_run_cb(&clp->cl_ra->ra_cb);
}
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 06a96e955bd0..efc69c96bcbd 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -9,9 +9,11 @@
#define _NFSD_TRACE_H
#include <linux/tracepoint.h>
+#include <linux/sunrpc/xprt.h>
#include "export.h"
#include "nfsfh.h"
+#include "xdr4.h"
#define NFSD_TRACE_PROC_RES_FIELDS \
__field(unsigned int, netns_ino) \
@@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
+TRACE_EVENT(nfsd_cb_recall_any,
+ TP_PROTO(
+ const struct nfsd4_cb_recall_any *ra
+ ),
+ TP_ARGS(ra),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, ra_keep)
+ __field(u32, ra_bmval)
+ __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
+ __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
+ __entry->ra_keep = ra->ra_keep;
+ __entry->ra_bmval = ra->ra_bmval[0];
+ memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+ TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
+ __entry->cl_boot, __entry->cl_id,
+ __entry->addr, __entry->ra_keep, __entry->ra_bmval
+ )
+);
+
+TRACE_EVENT(nfsd_cb_recall_any_done,
+ TP_PROTO(
+ const struct nfsd4_callback *cb,
+ const struct rpc_task *task
+ ),
+ TP_ARGS(cb, task),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(int, status)
+ __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ ),
+ TP_fast_assign(
+ __entry->status = task->tk_status;
+ __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
+ __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
+ memcpy(__entry->addr, &cb->cb_clp->cl_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+ TP_printk("client %08x:%08x addr=%pISpc status=%d",
+ __entry->cl_boot, __entry->cl_id,
+ __entry->addr, __entry->status
+ )
+);
+
#endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH
--
2.9.5
The delegation shrinker is scheduled to run on every shrinker's
'count' callback. It scans the client list and sends the
courtesy CB_RECALL_ANY to the clients that hold delegations.
To avoid flooding the clients with CB_RECALL_ANY requests, the
delegation shrinker is scheduled to run after a 5 seconds delay.
Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/netns.h | 3 ++
fs/nfsd/nfs4state.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
fs/nfsd/state.h | 8 ++++
3 files changed, 125 insertions(+), 1 deletion(-)
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 8c854ba3285b..394a05fb46d8 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -196,6 +196,9 @@ struct nfsd_net {
atomic_t nfsd_courtesy_clients;
struct shrinker nfsd_client_shrinker;
struct delayed_work nfsd_shrinker_work;
+
+ struct shrinker nfsd_deleg_shrinker;
+ struct delayed_work nfsd_deleg_shrinker_work;
};
/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4e718500a00c..813cdb67b370 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2131,6 +2131,7 @@ static void __free_client(struct kref *k)
kfree(clp->cl_nii_domain.data);
kfree(clp->cl_nii_name.data);
idr_destroy(&clp->cl_stateids);
+ kfree(clp->cl_ra);
kmem_cache_free(client_slab, clp);
}
@@ -2854,6 +2855,36 @@ static const struct tree_descr client_files[] = {
[3] = {""},
};
+static int
+nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
+ struct rpc_task *task)
+{
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 2 * HZ);
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static void
+nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ spin_lock(&nn->client_lock);
+ clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+ put_client_renew_locked(clp);
+ spin_unlock(&nn->client_lock);
+}
+
+static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
+ .done = nfsd4_cb_recall_any_done,
+ .release = nfsd4_cb_recall_any_release,
+};
+
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@@ -2891,6 +2922,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
free_client(clp);
return NULL;
}
+ clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
+ if (!clp->cl_ra) {
+ free_client(clp);
+ return NULL;
+ }
+ clp->cl_ra_time = 0;
+ nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
+ NFSPROC4_CLNT_CB_RECALL_ANY);
return clp;
}
@@ -4365,11 +4404,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
return SHRINK_STOP;
}
+static unsigned long
+nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ unsigned long cnt;
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_deleg_shrinker);
+
+ cnt = atomic_long_read(&num_delegations);
+ if (cnt)
+ mod_delayed_work(laundry_wq,
+ &nn->nfsd_deleg_shrinker_work, 0);
+ return cnt;
+}
+
+static unsigned long
+nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ return SHRINK_STOP;
+}
+
int
nfsd4_init_leases_net(struct nfsd_net *nn)
{
struct sysinfo si;
u64 max_clients;
+ int retval;
nn->nfsd4_lease = 90; /* default lease time */
nn->nfsd4_grace = 90;
@@ -4390,13 +4450,24 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
- return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+ retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+ if (retval)
+ return retval;
+ nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
+ nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
+ nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
+ retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
+ if (retval)
+ unregister_shrinker(&nn->nfsd_client_shrinker);
+ return retval;
+
}
void
nfsd4_leases_net_shutdown(struct nfsd_net *nn)
{
unregister_shrinker(&nn->nfsd_client_shrinker);
+ unregister_shrinker(&nn->nfsd_deleg_shrinker);
}
static void init_nfs4_replay(struct nfs4_replay *rp)
@@ -6135,6 +6206,47 @@ courtesy_client_reaper(struct work_struct *reaper)
nfs4_process_client_reaplist(&reaplist);
}
+static void
+deleg_reaper(struct work_struct *deleg_work)
+{
+ struct list_head *pos, *next;
+ struct nfs4_client *clp;
+ struct list_head cblist;
+ struct delayed_work *dwork = to_delayed_work(deleg_work);
+ struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+ nfsd_deleg_shrinker_work);
+
+ INIT_LIST_HEAD(&cblist);
+ spin_lock(&nn->client_lock);
+ list_for_each_safe(pos, next, &nn->client_lru) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ if (clp->cl_state != NFSD4_ACTIVE ||
+ list_empty(&clp->cl_delegations) ||
+ atomic_read(&clp->cl_delegs_in_recall) ||
+ test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
+ (ktime_get_boottime_seconds() -
+ clp->cl_ra_time < 5)) {
+ continue;
+ }
+ list_add(&clp->cl_ra_cblist, &cblist);
+
+ /* release in nfsd4_cb_recall_any_release */
+ atomic_inc(&clp->cl_rpc_users);
+ set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+ clp->cl_ra_time = ktime_get_boottime_seconds();
+ }
+ spin_unlock(&nn->client_lock);
+ list_for_each_safe(pos, next, &cblist) {
+ clp = list_entry(pos, struct nfs4_client, cl_ra_cblist);
+ list_del_init(&clp->cl_ra_cblist);
+ clp->cl_ra->ra_keep = 0;
+ clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
+ BIT(RCA4_TYPE_MASK_WDATA_DLG);
+ nfsd4_run_cb(&clp->cl_ra->ra_cb);
+ }
+
+}
+
static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
{
if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
@@ -7958,6 +8070,7 @@ static int nfs4_state_create_net(struct net *net)
INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
+ INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
get_net(net);
return 0;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 6b33cbbe76d3..12ce9792c5b6 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -368,6 +368,7 @@ struct nfs4_client {
#define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
1 << NFSD4_CLIENT_CB_KILL)
+#define NFSD4_CLIENT_CB_RECALL_ANY (6)
unsigned long cl_flags;
const struct cred *cl_cb_cred;
struct rpc_clnt *cl_cb_client;
@@ -411,6 +412,10 @@ struct nfs4_client {
unsigned int cl_state;
atomic_t cl_delegs_in_recall;
+
+ struct nfsd4_cb_recall_any *cl_ra;
+ time64_t cl_ra_time;
+ struct list_head cl_ra_cblist;
};
/* struct nfs4_client_reset
@@ -642,6 +647,9 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_RECALL_ANY,
};
+#define RCA4_TYPE_MASK_RDATA_DLG 0
+#define RCA4_TYPE_MASK_WDATA_DLG 1
+
/* Returns true iff a is later than b: */
static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
{
--
2.9.5
Add XDR encode and decode function for CB_RECALL_ANY.
Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4callback.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/state.h | 1 +
fs/nfsd/xdr4.h | 5 ++++
fs/nfsd/xdr4cb.h | 6 +++++
4 files changed, 74 insertions(+)
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index f0e69edf5f0f..01226e5233cd 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -329,6 +329,25 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
}
/*
+ * CB_RECALLANY4args
+ *
+ * struct CB_RECALLANY4args {
+ * uint32_t craa_objects_to_keep;
+ * bitmap4 craa_type_mask;
+ * };
+ */
+static void
+encode_cb_recallany4args(struct xdr_stream *xdr,
+ struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra)
+{
+ encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
+ WARN_ON_ONCE(xdr_stream_encode_u32(xdr, ra->ra_keep) < 0);
+ WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr,
+ ra->ra_bmval, sizeof(ra->ra_bmval) / sizeof(u32)) < 0);
+ hdr->nops++;
+}
+
+/*
* CB_SEQUENCE4args
*
* struct CB_SEQUENCE4args {
@@ -482,6 +501,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_cb_nops(&hdr);
}
+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static void
+nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfsd4_callback *cb = data;
+ struct nfsd4_cb_recall_any *ra;
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = cb->cb_clp->cl_cb_ident,
+ .minorversion = cb->cb_clp->cl_minorversion,
+ };
+
+ ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb);
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+ encode_cb_recallany4args(xdr, &hdr, ra);
+ encode_cb_nops(&hdr);
+}
/*
* NFSv4.0 and NFSv4.1 XDR decode functions
@@ -520,6 +559,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
}
+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static int
+nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfsd4_callback *cb = data;
+ struct nfs4_cb_compound_hdr hdr;
+ int status;
+
+ status = decode_cb_compound4res(xdr, &hdr);
+ if (unlikely(status))
+ return status;
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+ status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
+ return status;
+}
+
#ifdef CONFIG_NFSD_PNFS
/*
* CB_LAYOUTRECALL4args
@@ -783,6 +844,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
#endif
PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
+ PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
};
static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e2daef3cc003..6b33cbbe76d3 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -639,6 +639,7 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_OFFLOAD,
NFSPROC4_CLNT_CB_SEQUENCE,
NFSPROC4_CLNT_CB_NOTIFY_LOCK,
+ NFSPROC4_CLNT_CB_RECALL_ANY,
};
/* Returns true iff a is later than b: */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 0eb00105d845..4fd2cf6d1d2d 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -896,5 +896,10 @@ struct nfsd4_operation {
union nfsd4_op_u *);
};
+struct nfsd4_cb_recall_any {
+ struct nfsd4_callback ra_cb;
+ u32 ra_keep;
+ u32 ra_bmval[1];
+};
#endif
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index 547cf07cf4e0..0d39af1b00a0 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -48,3 +48,9 @@
#define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + \
op_dec_sz)
+#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \
+ cb_sequence_enc_sz + \
+ 1 + 1 + 1)
+#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
+ cb_sequence_dec_sz + \
+ op_dec_sz)
--
2.9.5
> On Nov 9, 2022, at 11:17 PM, Dai Ngo <[email protected]> wrote:
>
> Add XDR encode and decode function for CB_RECALL_ANY.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/nfs4callback.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++
> fs/nfsd/state.h | 1 +
> fs/nfsd/xdr4.h | 5 ++++
> fs/nfsd/xdr4cb.h | 6 +++++
> 4 files changed, 74 insertions(+)
>
> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> index f0e69edf5f0f..01226e5233cd 100644
> --- a/fs/nfsd/nfs4callback.c
> +++ b/fs/nfsd/nfs4callback.c
> @@ -329,6 +329,25 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
> }
>
> /*
> + * CB_RECALLANY4args
> + *
> + * struct CB_RECALLANY4args {
> + * uint32_t craa_objects_to_keep;
> + * bitmap4 craa_type_mask;
> + * };
> + */
> +static void
> +encode_cb_recallany4args(struct xdr_stream *xdr,
> + struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra)
> +{
> + encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
> + WARN_ON_ONCE(xdr_stream_encode_u32(xdr, ra->ra_keep) < 0);
> + WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr,
> + ra->ra_bmval, sizeof(ra->ra_bmval) / sizeof(u32)) < 0);
My only nit: You should use ARRAY_SIZE() here.
> + hdr->nops++;
> +}
> +
> +/*
> * CB_SEQUENCE4args
> *
> * struct CB_SEQUENCE4args {
> @@ -482,6 +501,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
> encode_cb_nops(&hdr);
> }
>
> +/*
> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> + */
> +static void
> +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
> + struct xdr_stream *xdr, const void *data)
> +{
> + const struct nfsd4_callback *cb = data;
> + struct nfsd4_cb_recall_any *ra;
> + struct nfs4_cb_compound_hdr hdr = {
> + .ident = cb->cb_clp->cl_cb_ident,
> + .minorversion = cb->cb_clp->cl_minorversion,
> + };
> +
> + ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb);
> + encode_cb_compound4args(xdr, &hdr);
> + encode_cb_sequence4args(xdr, cb, &hdr);
> + encode_cb_recallany4args(xdr, &hdr, ra);
> + encode_cb_nops(&hdr);
> +}
>
> /*
> * NFSv4.0 and NFSv4.1 XDR decode functions
> @@ -520,6 +559,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
> return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
> }
>
> +/*
> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> + */
> +static int
> +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
> + struct xdr_stream *xdr,
> + void *data)
> +{
> + struct nfsd4_callback *cb = data;
> + struct nfs4_cb_compound_hdr hdr;
> + int status;
> +
> + status = decode_cb_compound4res(xdr, &hdr);
> + if (unlikely(status))
> + return status;
> + status = decode_cb_sequence4res(xdr, cb);
> + if (unlikely(status || cb->cb_seq_status))
> + return status;
> + status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
> + return status;
> +}
> +
> #ifdef CONFIG_NFSD_PNFS
> /*
> * CB_LAYOUTRECALL4args
> @@ -783,6 +844,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
> #endif
> PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
> PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
> + PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
> };
>
> static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index e2daef3cc003..6b33cbbe76d3 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -639,6 +639,7 @@ enum nfsd4_cb_op {
> NFSPROC4_CLNT_CB_OFFLOAD,
> NFSPROC4_CLNT_CB_SEQUENCE,
> NFSPROC4_CLNT_CB_NOTIFY_LOCK,
> + NFSPROC4_CLNT_CB_RECALL_ANY,
> };
>
> /* Returns true iff a is later than b: */
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 0eb00105d845..4fd2cf6d1d2d 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -896,5 +896,10 @@ struct nfsd4_operation {
> union nfsd4_op_u *);
> };
>
> +struct nfsd4_cb_recall_any {
> + struct nfsd4_callback ra_cb;
> + u32 ra_keep;
> + u32 ra_bmval[1];
> +};
>
> #endif
> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
> index 547cf07cf4e0..0d39af1b00a0 100644
> --- a/fs/nfsd/xdr4cb.h
> +++ b/fs/nfsd/xdr4cb.h
> @@ -48,3 +48,9 @@
> #define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \
> cb_sequence_dec_sz + \
> op_dec_sz)
> +#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \
> + cb_sequence_enc_sz + \
> + 1 + 1 + 1)
> +#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
> + cb_sequence_dec_sz + \
> + op_dec_sz)
> --
> 2.9.5
>
--
Chuck Lever
> On Nov 9, 2022, at 11:17 PM, Dai Ngo <[email protected]> wrote:
>
> The delegation shrinker is scheduled to run on every shrinker's
> 'count' callback. It scans the client list and sends the
> courtesy CB_RECALL_ANY to the clients that hold delegations.
>
> To avoid flooding the clients with CB_RECALL_ANY requests, the
> delegation shrinker is scheduled to run after a 5 seconds delay.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/netns.h | 3 ++
> fs/nfsd/nfs4state.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/nfsd/state.h | 8 ++++
> 3 files changed, 125 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 8c854ba3285b..394a05fb46d8 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -196,6 +196,9 @@ struct nfsd_net {
> atomic_t nfsd_courtesy_clients;
> struct shrinker nfsd_client_shrinker;
> struct delayed_work nfsd_shrinker_work;
> +
> + struct shrinker nfsd_deleg_shrinker;
> + struct delayed_work nfsd_deleg_shrinker_work;
> };
>
> /* Simple check to find out if a given net was properly initialized */
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 4e718500a00c..813cdb67b370 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2131,6 +2131,7 @@ static void __free_client(struct kref *k)
> kfree(clp->cl_nii_domain.data);
> kfree(clp->cl_nii_name.data);
> idr_destroy(&clp->cl_stateids);
> + kfree(clp->cl_ra);
> kmem_cache_free(client_slab, clp);
> }
>
> @@ -2854,6 +2855,36 @@ static const struct tree_descr client_files[] = {
> [3] = {""},
> };
>
> +static int
> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> + struct rpc_task *task)
> +{
> + switch (task->tk_status) {
> + case -NFS4ERR_DELAY:
> + rpc_delay(task, 2 * HZ);
> + return 0;
> + default:
> + return 1;
> + }
> +}
> +
> +static void
> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> +{
> + struct nfs4_client *clp = cb->cb_clp;
> + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> +
> + spin_lock(&nn->client_lock);
> + clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> + put_client_renew_locked(clp);
> + spin_unlock(&nn->client_lock);
> +}
> +
> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> + .done = nfsd4_cb_recall_any_done,
> + .release = nfsd4_cb_recall_any_release,
> +};
> +
> static struct nfs4_client *create_client(struct xdr_netobj name,
> struct svc_rqst *rqstp, nfs4_verifier *verf)
> {
> @@ -2891,6 +2922,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
> free_client(clp);
> return NULL;
> }
> + clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
> + if (!clp->cl_ra) {
> + free_client(clp);
> + return NULL;
> + }
> + clp->cl_ra_time = 0;
> + nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
> + NFSPROC4_CLNT_CB_RECALL_ANY);
> return clp;
> }
>
> @@ -4365,11 +4404,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
> return SHRINK_STOP;
> }
>
> +static unsigned long
> +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + unsigned long cnt;
No reason not to spell out "count".
> + struct nfsd_net *nn = container_of(shrink,
> + struct nfsd_net, nfsd_deleg_shrinker);
> +
> + cnt = atomic_long_read(&num_delegations);
> + if (cnt)
> + mod_delayed_work(laundry_wq,
> + &nn->nfsd_deleg_shrinker_work, 0);
> + return cnt;
> +}
> +
> +static unsigned long
> +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + return SHRINK_STOP;
> +}
> +
> int
> nfsd4_init_leases_net(struct nfsd_net *nn)
> {
> struct sysinfo si;
> u64 max_clients;
> + int retval;
>
> nn->nfsd4_lease = 90; /* default lease time */
> nn->nfsd4_grace = 90;
> @@ -4390,13 +4450,24 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
> nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
> nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
> nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
> - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> + retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> + if (retval)
> + return retval;
> + nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
> + nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
> + nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
> + retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
> + if (retval)
> + unregister_shrinker(&nn->nfsd_client_shrinker);
> + return retval;
> +
> }
>
> void
> nfsd4_leases_net_shutdown(struct nfsd_net *nn)
> {
> unregister_shrinker(&nn->nfsd_client_shrinker);
> + unregister_shrinker(&nn->nfsd_deleg_shrinker);
> }
>
> static void init_nfs4_replay(struct nfs4_replay *rp)
> @@ -6135,6 +6206,47 @@ courtesy_client_reaper(struct work_struct *reaper)
> nfs4_process_client_reaplist(&reaplist);
> }
>
> +static void
> +deleg_reaper(struct work_struct *deleg_work)
> +{
> + struct list_head *pos, *next;
> + struct nfs4_client *clp;
> + struct list_head cblist;
> + struct delayed_work *dwork = to_delayed_work(deleg_work);
> + struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> + nfsd_deleg_shrinker_work);
> +
> + INIT_LIST_HEAD(&cblist);
> + spin_lock(&nn->client_lock);
> + list_for_each_safe(pos, next, &nn->client_lru) {
> + clp = list_entry(pos, struct nfs4_client, cl_lru);
> + if (clp->cl_state != NFSD4_ACTIVE ||
> + list_empty(&clp->cl_delegations) ||
> + atomic_read(&clp->cl_delegs_in_recall) ||
> + test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
> + (ktime_get_boottime_seconds() -
> + clp->cl_ra_time < 5)) {
> + continue;
> + }
> + list_add(&clp->cl_ra_cblist, &cblist);
> +
> + /* release in nfsd4_cb_recall_any_release */
> + atomic_inc(&clp->cl_rpc_users);
> + set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> + clp->cl_ra_time = ktime_get_boottime_seconds();
> + }
> + spin_unlock(&nn->client_lock);
> + list_for_each_safe(pos, next, &cblist) {
The usual idiom for draining a list like this is
while (!list_empty(&cblist)) {
clp = list_first_entry( ... );
> + clp = list_entry(pos, struct nfs4_client, cl_ra_cblist);
> + list_del_init(&clp->cl_ra_cblist);
> + clp->cl_ra->ra_keep = 0;
> + clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> + BIT(RCA4_TYPE_MASK_WDATA_DLG);
> + nfsd4_run_cb(&clp->cl_ra->ra_cb);
> + }
> +
> +}
> +
> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> {
> if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
> @@ -7958,6 +8070,7 @@ static int nfs4_state_create_net(struct net *net)
>
> INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
> + INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
> get_net(net);
>
> return 0;
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 6b33cbbe76d3..12ce9792c5b6 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -368,6 +368,7 @@ struct nfs4_client {
> #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
> #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
> 1 << NFSD4_CLIENT_CB_KILL)
> +#define NFSD4_CLIENT_CB_RECALL_ANY (6)
> unsigned long cl_flags;
> const struct cred *cl_cb_cred;
> struct rpc_clnt *cl_cb_client;
> @@ -411,6 +412,10 @@ struct nfs4_client {
>
> unsigned int cl_state;
> atomic_t cl_delegs_in_recall;
> +
> + struct nfsd4_cb_recall_any *cl_ra;
> + time64_t cl_ra_time;
> + struct list_head cl_ra_cblist;
> };
>
> /* struct nfs4_client_reset
> @@ -642,6 +647,9 @@ enum nfsd4_cb_op {
> NFSPROC4_CLNT_CB_RECALL_ANY,
> };
>
> +#define RCA4_TYPE_MASK_RDATA_DLG 0
> +#define RCA4_TYPE_MASK_WDATA_DLG 1
> +
> /* Returns true iff a is later than b: */
> static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> {
> --
> 2.9.5
>
--
Chuck Lever
> On Nov 9, 2022, at 11:17 PM, Dai Ngo <[email protected]> wrote:
>
> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 2 ++
> fs/nfsd/trace.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 55 insertions(+)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 813cdb67b370..eac7212c9218 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2859,6 +2859,7 @@ static int
> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> struct rpc_task *task)
> {
> + trace_nfsd_cb_recall_any_done(cb, task);
> switch (task->tk_status) {
> case -NFS4ERR_DELAY:
> rpc_delay(task, 2 * HZ);
> @@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
> clp->cl_ra->ra_keep = 0;
> clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> BIT(RCA4_TYPE_MASK_WDATA_DLG);
> + trace_nfsd_cb_recall_any(clp->cl_ra);
> nfsd4_run_cb(&clp->cl_ra->ra_cb);
> }
>
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index 06a96e955bd0..efc69c96bcbd 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -9,9 +9,11 @@
> #define _NFSD_TRACE_H
>
> #include <linux/tracepoint.h>
> +#include <linux/sunrpc/xprt.h>
>
> #include "export.h"
> #include "nfsfh.h"
> +#include "xdr4.h"
>
> #define NFSD_TRACE_PROC_RES_FIELDS \
> __field(unsigned int, netns_ino) \
> @@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
>
> +TRACE_EVENT(nfsd_cb_recall_any,
> + TP_PROTO(
> + const struct nfsd4_cb_recall_any *ra
> + ),
> + TP_ARGS(ra),
> + TP_STRUCT__entry(
> + __field(u32, cl_boot)
> + __field(u32, cl_id)
> + __field(u32, ra_keep)
> + __field(u32, ra_bmval)
> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
> + ),
> + TP_fast_assign(
> + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
> + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
> + __entry->ra_keep = ra->ra_keep;
> + __entry->ra_bmval = ra->ra_bmval[0];
> + memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
> + sizeof(struct sockaddr_in6));
> + ),
> + TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
> + __entry->cl_boot, __entry->cl_id,
> + __entry->addr, __entry->ra_keep, __entry->ra_bmval
> + )
> +);
This one should go earlier in the file, after "TRACE_EVENT(nfsd_cb_offload,"
And let's use __sockaddr() and friends like the other nfsd_cb_ tracepoints.
> +
> +TRACE_EVENT(nfsd_cb_recall_any_done,
> + TP_PROTO(
> + const struct nfsd4_callback *cb,
> + const struct rpc_task *task
> + ),
> + TP_ARGS(cb, task),
> + TP_STRUCT__entry(
> + __field(u32, cl_boot)
> + __field(u32, cl_id)
> + __field(int, status)
> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
> + ),
> + TP_fast_assign(
> + __entry->status = task->tk_status;
> + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
> + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
> + memcpy(__entry->addr, &cb->cb_clp->cl_addr,
> + sizeof(struct sockaddr_in6));
> + ),
> + TP_printk("client %08x:%08x addr=%pISpc status=%d",
> + __entry->cl_boot, __entry->cl_id,
> + __entry->addr, __entry->status
> + )
> +);
I'd like you to change this to
DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_any_done);
> +
> #endif /* _NFSD_TRACE_H */
>
> #undef TRACE_INCLUDE_PATH
> --
> 2.9.5
>
--
Chuck Lever
On Fri, 2022-11-11 at 15:25 +0000, Chuck Lever III wrote:
>
> > On Nov 9, 2022, at 11:17 PM, Dai Ngo <[email protected]> wrote:
> >
> > The delegation shrinker is scheduled to run on every shrinker's
> > 'count' callback. It scans the client list and sends the
> > courtesy CB_RECALL_ANY to the clients that hold delegations.
> >
> > To avoid flooding the clients with CB_RECALL_ANY requests, the
> > delegation shrinker is scheduled to run after a 5 seconds delay.
> >
> > Signed-off-by: Dai Ngo <[email protected]>
> > ---
> > fs/nfsd/netns.h | 3 ++
> > fs/nfsd/nfs4state.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> > fs/nfsd/state.h | 8 ++++
> > 3 files changed, 125 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> > index 8c854ba3285b..394a05fb46d8 100644
> > --- a/fs/nfsd/netns.h
> > +++ b/fs/nfsd/netns.h
> > @@ -196,6 +196,9 @@ struct nfsd_net {
> > atomic_t nfsd_courtesy_clients;
> > struct shrinker nfsd_client_shrinker;
> > struct delayed_work nfsd_shrinker_work;
> > +
> > + struct shrinker nfsd_deleg_shrinker;
> > + struct delayed_work nfsd_deleg_shrinker_work;
> > };
> >
> > /* Simple check to find out if a given net was properly initialized */
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 4e718500a00c..813cdb67b370 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -2131,6 +2131,7 @@ static void __free_client(struct kref *k)
> > kfree(clp->cl_nii_domain.data);
> > kfree(clp->cl_nii_name.data);
> > idr_destroy(&clp->cl_stateids);
> > + kfree(clp->cl_ra);
> > kmem_cache_free(client_slab, clp);
> > }
> >
> > @@ -2854,6 +2855,36 @@ static const struct tree_descr client_files[] = {
> > [3] = {""},
> > };
> >
> > +static int
> > +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> > + struct rpc_task *task)
> > +{
> > + switch (task->tk_status) {
> > + case -NFS4ERR_DELAY:
> > + rpc_delay(task, 2 * HZ);
> > + return 0;
> > + default:
> > + return 1;
> > + }
> > +}
> > +
> > +static void
> > +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> > +{
> > + struct nfs4_client *clp = cb->cb_clp;
> > + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> > +
> > + spin_lock(&nn->client_lock);
> > + clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> > + put_client_renew_locked(clp);
> > + spin_unlock(&nn->client_lock);
> > +}
> > +
> > +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> > + .done = nfsd4_cb_recall_any_done,
> > + .release = nfsd4_cb_recall_any_release,
> > +};
> > +
> > static struct nfs4_client *create_client(struct xdr_netobj name,
> > struct svc_rqst *rqstp, nfs4_verifier *verf)
> > {
> > @@ -2891,6 +2922,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
> > free_client(clp);
> > return NULL;
> > }
> > + clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
> > + if (!clp->cl_ra) {
> > + free_client(clp);
> > + return NULL;
> > + }
> > + clp->cl_ra_time = 0;
> > + nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
> > + NFSPROC4_CLNT_CB_RECALL_ANY);
> > return clp;
> > }
> >
> > @@ -4365,11 +4404,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
> > return SHRINK_STOP;
> > }
> >
> > +static unsigned long
> > +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> > +{
> > + unsigned long cnt;
>
> No reason not to spell out "count".
>
> > + struct nfsd_net *nn = container_of(shrink,
> > + struct nfsd_net, nfsd_deleg_shrinker);
> > +
> > + cnt = atomic_long_read(&num_delegations);
> > + if (cnt)
> > + mod_delayed_work(laundry_wq,
> > + &nn->nfsd_deleg_shrinker_work, 0);
> > + return cnt;
> > +}
> > +
> > +static unsigned long
> > +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> > +{
> > + return SHRINK_STOP;
> > +}
> > +
> > int
> > nfsd4_init_leases_net(struct nfsd_net *nn)
> > {
> > struct sysinfo si;
> > u64 max_clients;
> > + int retval;
> >
> > nn->nfsd4_lease = 90; /* default lease time */
> > nn->nfsd4_grace = 90;
> > @@ -4390,13 +4450,24 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
> > nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
> > nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
> > nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
> > - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> > + retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> > + if (retval)
> > + return retval;
> > + nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
> > + nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
> > + nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
> > + retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
> > + if (retval)
> > + unregister_shrinker(&nn->nfsd_client_shrinker);
> > + return retval;
> > +
> > }
> >
> > void
> > nfsd4_leases_net_shutdown(struct nfsd_net *nn)
> > {
> > unregister_shrinker(&nn->nfsd_client_shrinker);
> > + unregister_shrinker(&nn->nfsd_deleg_shrinker);
> > }
> >
> > static void init_nfs4_replay(struct nfs4_replay *rp)
> > @@ -6135,6 +6206,47 @@ courtesy_client_reaper(struct work_struct *reaper)
> > nfs4_process_client_reaplist(&reaplist);
> > }
> >
> > +static void
> > +deleg_reaper(struct work_struct *deleg_work)
> > +{
> > + struct list_head *pos, *next;
> > + struct nfs4_client *clp;
> > + struct list_head cblist;
> > + struct delayed_work *dwork = to_delayed_work(deleg_work);
> > + struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> > + nfsd_deleg_shrinker_work);
> > +
> > + INIT_LIST_HEAD(&cblist);
> > + spin_lock(&nn->client_lock);
> > + list_for_each_safe(pos, next, &nn->client_lru) {
> > + clp = list_entry(pos, struct nfs4_client, cl_lru);
> > + if (clp->cl_state != NFSD4_ACTIVE ||
> > + list_empty(&clp->cl_delegations) ||
> > + atomic_read(&clp->cl_delegs_in_recall) ||
> > + test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
> > + (ktime_get_boottime_seconds() -
> > + clp->cl_ra_time < 5)) {
> > + continue;
> > + }
> > + list_add(&clp->cl_ra_cblist, &cblist);
> > +
> > + /* release in nfsd4_cb_recall_any_release */
> > + atomic_inc(&clp->cl_rpc_users);
> > + set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> > + clp->cl_ra_time = ktime_get_boottime_seconds();
> > + }
> > + spin_unlock(&nn->client_lock);
> > + list_for_each_safe(pos, next, &cblist) {
>
> The usual idiom for draining a list like this is
>
> while (!list_empty(&cblist)) {
> clp = list_first_entry( ... );
>
Agreed. Note that it's also slightly more efficient to do it that way,
since you don't require a "next" pointer.
>
> > + clp = list_entry(pos, struct nfs4_client, cl_ra_cblist);
> > + list_del_init(&clp->cl_ra_cblist);
> > + clp->cl_ra->ra_keep = 0;
> > + clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> > + BIT(RCA4_TYPE_MASK_WDATA_DLG);
> > + nfsd4_run_cb(&clp->cl_ra->ra_cb);
> > + }
> > +
> > +}
> > +
> > static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> > {
> > if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
> > @@ -7958,6 +8070,7 @@ static int nfs4_state_create_net(struct net *net)
> >
> > INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> > INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
> > + INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
> > get_net(net);
> >
> > return 0;
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index 6b33cbbe76d3..12ce9792c5b6 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -368,6 +368,7 @@ struct nfs4_client {
> > #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
> > #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
> > 1 << NFSD4_CLIENT_CB_KILL)
> > +#define NFSD4_CLIENT_CB_RECALL_ANY (6)
> > unsigned long cl_flags;
> > const struct cred *cl_cb_cred;
> > struct rpc_clnt *cl_cb_client;
> > @@ -411,6 +412,10 @@ struct nfs4_client {
> >
> > unsigned int cl_state;
> > atomic_t cl_delegs_in_recall;
> > +
> > + struct nfsd4_cb_recall_any *cl_ra;
> > + time64_t cl_ra_time;
> > + struct list_head cl_ra_cblist;
> > };
> >
> > /* struct nfs4_client_reset
> > @@ -642,6 +647,9 @@ enum nfsd4_cb_op {
> > NFSPROC4_CLNT_CB_RECALL_ANY,
> > };
> >
> > +#define RCA4_TYPE_MASK_RDATA_DLG 0
> > +#define RCA4_TYPE_MASK_WDATA_DLG 1
> > +
> > /* Returns true iff a is later than b: */
> > static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> > {
> > --
> > 2.9.5
> >
>
> --
> Chuck Lever
>
>
>
--
Jeff Layton <[email protected]>
On 11/11/22 7:25 AM, Chuck Lever III wrote:
>
>> On Nov 9, 2022, at 11:17 PM, Dai Ngo <[email protected]> wrote:
>>
>> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> fs/nfsd/nfs4state.c | 2 ++
>> fs/nfsd/trace.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 55 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 813cdb67b370..eac7212c9218 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -2859,6 +2859,7 @@ static int
>> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>> struct rpc_task *task)
>> {
>> + trace_nfsd_cb_recall_any_done(cb, task);
>> switch (task->tk_status) {
>> case -NFS4ERR_DELAY:
>> rpc_delay(task, 2 * HZ);
>> @@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
>> clp->cl_ra->ra_keep = 0;
>> clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>> BIT(RCA4_TYPE_MASK_WDATA_DLG);
>> + trace_nfsd_cb_recall_any(clp->cl_ra);
>> nfsd4_run_cb(&clp->cl_ra->ra_cb);
>> }
>>
>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>> index 06a96e955bd0..efc69c96bcbd 100644
>> --- a/fs/nfsd/trace.h
>> +++ b/fs/nfsd/trace.h
>> @@ -9,9 +9,11 @@
>> #define _NFSD_TRACE_H
>>
>> #include <linux/tracepoint.h>
>> +#include <linux/sunrpc/xprt.h>
>>
>> #include "export.h"
>> #include "nfsfh.h"
>> +#include "xdr4.h"
>>
>> #define NFSD_TRACE_PROC_RES_FIELDS \
>> __field(unsigned int, netns_ino) \
>> @@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
>>
>> +TRACE_EVENT(nfsd_cb_recall_any,
>> + TP_PROTO(
>> + const struct nfsd4_cb_recall_any *ra
>> + ),
>> + TP_ARGS(ra),
>> + TP_STRUCT__entry(
>> + __field(u32, cl_boot)
>> + __field(u32, cl_id)
>> + __field(u32, ra_keep)
>> + __field(u32, ra_bmval)
>> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
>> + ),
>> + TP_fast_assign(
>> + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
>> + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
>> + __entry->ra_keep = ra->ra_keep;
>> + __entry->ra_bmval = ra->ra_bmval[0];
>> + memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
>> + sizeof(struct sockaddr_in6));
>> + ),
>> + TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
>> + __entry->cl_boot, __entry->cl_id,
>> + __entry->addr, __entry->ra_keep, __entry->ra_bmval
>> + )
>> +);
> This one should go earlier in the file, after "TRACE_EVENT(nfsd_cb_offload,"
>
> And let's use __sockaddr() and friends like the other nfsd_cb_ tracepoints.
I tried but it did not work. I got the same error as 'nfsd_cb_args' tracepoint:
[root@nfsvmf24 ~]# uname -r
6.1.0-rc1+
[root@nfsvmf24 ~]# trace-cmd record -e nfsd_cb_args
[root@nfsvmf24 ~]# trace-cmd report
trace-cmd: No such file or directory
Error: expected type 4 but read 5
[nfsd:nfsd_cb_args]*function __get_sockaddr not defined*
cpus=1
nfsd-3976 [000] 410.956975: nfsd_cb_args: [*FAILED TO PARSE*] cl_boot=1668201586 cl_id=2938128369 prog=1073741824 ident=1 addr=ARRAY[02, 00, 80, 93, 0a, 50, 6f, 5f, 00, ..]
I also tried Fedora 36 and got same error.
Can you verify __get_sockaddr works with tracepoints on your system?
Thanks,
-Dai
>
>
>> +
>> +TRACE_EVENT(nfsd_cb_recall_any_done,
>> + TP_PROTO(
>> + const struct nfsd4_callback *cb,
>> + const struct rpc_task *task
>> + ),
>> + TP_ARGS(cb, task),
>> + TP_STRUCT__entry(
>> + __field(u32, cl_boot)
>> + __field(u32, cl_id)
>> + __field(int, status)
>> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
>> + ),
>> + TP_fast_assign(
>> + __entry->status = task->tk_status;
>> + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
>> + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
>> + memcpy(__entry->addr, &cb->cb_clp->cl_addr,
>> + sizeof(struct sockaddr_in6));
>> + ),
>> + TP_printk("client %08x:%08x addr=%pISpc status=%d",
>> + __entry->cl_boot, __entry->cl_id,
>> + __entry->addr, __entry->status
>> + )
>> +);
> I'd like you to change this to
>
> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_any_done);
>
>
>> +
>> #endif /* _NFSD_TRACE_H */
>>
>> #undef TRACE_INCLUDE_PATH
>> --
>> 2.9.5
>>
> --
> Chuck Lever
>
>
>
> On Nov 11, 2022, at 4:34 PM, Dai Ngo <[email protected]> wrote:
>
>
> On 11/11/22 7:25 AM, Chuck Lever III wrote:
>>
>>> On Nov 9, 2022, at 11:17 PM, Dai Ngo <[email protected]> wrote:
>>>
>>> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>>>
>>> Signed-off-by: Dai Ngo <[email protected]>
>>> ---
>>> fs/nfsd/nfs4state.c | 2 ++
>>> fs/nfsd/trace.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>> 2 files changed, 55 insertions(+)
>>>
>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>> index 813cdb67b370..eac7212c9218 100644
>>> --- a/fs/nfsd/nfs4state.c
>>> +++ b/fs/nfsd/nfs4state.c
>>> @@ -2859,6 +2859,7 @@ static int
>>> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>>> struct rpc_task *task)
>>> {
>>> + trace_nfsd_cb_recall_any_done(cb, task);
>>> switch (task->tk_status) {
>>> case -NFS4ERR_DELAY:
>>> rpc_delay(task, 2 * HZ);
>>> @@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
>>> clp->cl_ra->ra_keep = 0;
>>> clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>>> BIT(RCA4_TYPE_MASK_WDATA_DLG);
>>> + trace_nfsd_cb_recall_any(clp->cl_ra);
>>> nfsd4_run_cb(&clp->cl_ra->ra_cb);
>>> }
>>>
>>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>>> index 06a96e955bd0..efc69c96bcbd 100644
>>> --- a/fs/nfsd/trace.h
>>> +++ b/fs/nfsd/trace.h
>>> @@ -9,9 +9,11 @@
>>> #define _NFSD_TRACE_H
>>>
>>> #include <linux/tracepoint.h>
>>> +#include <linux/sunrpc/xprt.h>
>>>
>>> #include "export.h"
>>> #include "nfsfh.h"
>>> +#include "xdr4.h"
>>>
>>> #define NFSD_TRACE_PROC_RES_FIELDS \
>>> __field(unsigned int, netns_ino) \
>>> @@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
>>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
>>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
>>>
>>> +TRACE_EVENT(nfsd_cb_recall_any,
>>> + TP_PROTO(
>>> + const struct nfsd4_cb_recall_any *ra
>>> + ),
>>> + TP_ARGS(ra),
>>> + TP_STRUCT__entry(
>>> + __field(u32, cl_boot)
>>> + __field(u32, cl_id)
>>> + __field(u32, ra_keep)
>>> + __field(u32, ra_bmval)
>>> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
>>> + ),
>>> + TP_fast_assign(
>>> + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
>>> + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
>>> + __entry->ra_keep = ra->ra_keep;
>>> + __entry->ra_bmval = ra->ra_bmval[0];
>>> + memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
>>> + sizeof(struct sockaddr_in6));
>>> + ),
>>> + TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
>>> + __entry->cl_boot, __entry->cl_id,
>>> + __entry->addr, __entry->ra_keep, __entry->ra_bmval
>>> + )
>>> +);
>> This one should go earlier in the file, after "TRACE_EVENT(nfsd_cb_offload,"
>>
>> And let's use __sockaddr() and friends like the other nfsd_cb_ tracepoints.
>
> I tried but it did not work. I got the same error as 'nfsd_cb_args' tracepoint:
>
> [root@nfsvmf24 ~]# uname -r
> 6.1.0-rc1+
> [root@nfsvmf24 ~]# trace-cmd record -e nfsd_cb_args
> [root@nfsvmf24 ~]# trace-cmd report
> trace-cmd: No such file or directory
> Error: expected type 4 but read 5
> [nfsd:nfsd_cb_args]*function __get_sockaddr not defined*
> cpus=1
> nfsd-3976 [000] 410.956975: nfsd_cb_args: [*FAILED TO PARSE*] cl_boot=1668201586 cl_id=2938128369 prog=1073741824 ident=1 addr=ARRAY[02, 00, 80, 93, 0a, 50, 6f, 5f, 00, ..]
>
> I also tried Fedora 36 and got same error.
>
> Can you verify __get_sockaddr works with tracepoints on your system?
The user space libraries still don't have support for __get_sockaddr().
Assuming that eventually those libraries will get the proper support,
in this case I'm willing to take tracepoints that don't parse.
> Thanks,
> -Dai
>
>>
>>
>>> +
>>> +TRACE_EVENT(nfsd_cb_recall_any_done,
>>> + TP_PROTO(
>>> + const struct nfsd4_callback *cb,
>>> + const struct rpc_task *task
>>> + ),
>>> + TP_ARGS(cb, task),
>>> + TP_STRUCT__entry(
>>> + __field(u32, cl_boot)
>>> + __field(u32, cl_id)
>>> + __field(int, status)
>>> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
>>> + ),
>>> + TP_fast_assign(
>>> + __entry->status = task->tk_status;
>>> + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
>>> + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
>>> + memcpy(__entry->addr, &cb->cb_clp->cl_addr,
>>> + sizeof(struct sockaddr_in6));
>>> + ),
>>> + TP_printk("client %08x:%08x addr=%pISpc status=%d",
>>> + __entry->cl_boot, __entry->cl_id,
>>> + __entry->addr, __entry->status
>>> + )
>>> +);
>> I'd like you to change this to
>>
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_any_done);
>>
>>
>>> +
>>> #endif /* _NFSD_TRACE_H */
>>>
>>> #undef TRACE_INCLUDE_PATH
>>> --
>>> 2.9.5
>>>
>> --
>> Chuck Lever
--
Chuck Lever
On 11/11/22 7:25 AM, Chuck Lever III wrote:
>
>> On Nov 9, 2022, at 11:17 PM, Dai Ngo <[email protected]> wrote:
>>
>> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> fs/nfsd/nfs4state.c | 2 ++
>> fs/nfsd/trace.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 55 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 813cdb67b370..eac7212c9218 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -2859,6 +2859,7 @@ static int
>> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>> struct rpc_task *task)
>> {
>> + trace_nfsd_cb_recall_any_done(cb, task);
>> switch (task->tk_status) {
>> case -NFS4ERR_DELAY:
>> rpc_delay(task, 2 * HZ);
>> @@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
>> clp->cl_ra->ra_keep = 0;
>> clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>> BIT(RCA4_TYPE_MASK_WDATA_DLG);
>> + trace_nfsd_cb_recall_any(clp->cl_ra);
>> nfsd4_run_cb(&clp->cl_ra->ra_cb);
>> }
>>
>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>> index 06a96e955bd0..efc69c96bcbd 100644
>> --- a/fs/nfsd/trace.h
>> +++ b/fs/nfsd/trace.h
>> @@ -9,9 +9,11 @@
>> #define _NFSD_TRACE_H
>>
>> #include <linux/tracepoint.h>
>> +#include <linux/sunrpc/xprt.h>
>>
>> #include "export.h"
>> #include "nfsfh.h"
>> +#include "xdr4.h"
>>
>> #define NFSD_TRACE_PROC_RES_FIELDS \
>> __field(unsigned int, netns_ino) \
>> @@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
>>
>> +TRACE_EVENT(nfsd_cb_recall_any,
>> + TP_PROTO(
>> + const struct nfsd4_cb_recall_any *ra
>> + ),
>> + TP_ARGS(ra),
>> + TP_STRUCT__entry(
>> + __field(u32, cl_boot)
>> + __field(u32, cl_id)
>> + __field(u32, ra_keep)
>> + __field(u32, ra_bmval)
>> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
>> + ),
>> + TP_fast_assign(
>> + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
>> + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
>> + __entry->ra_keep = ra->ra_keep;
>> + __entry->ra_bmval = ra->ra_bmval[0];
>> + memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
>> + sizeof(struct sockaddr_in6));
>> + ),
>> + TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
>> + __entry->cl_boot, __entry->cl_id,
>> + __entry->addr, __entry->ra_keep, __entry->ra_bmval
>> + )
>> +);
> This one should go earlier in the file, after "TRACE_EVENT(nfsd_cb_offload,"
>
> And let's use __sockaddr() and friends like the other nfsd_cb_ tracepoints.
>
>
>> +
>> +TRACE_EVENT(nfsd_cb_recall_any_done,
>> + TP_PROTO(
>> + const struct nfsd4_callback *cb,
>> + const struct rpc_task *task
>> + ),
>> + TP_ARGS(cb, task),
>> + TP_STRUCT__entry(
>> + __field(u32, cl_boot)
>> + __field(u32, cl_id)
>> + __field(int, status)
>> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
>> + ),
>> + TP_fast_assign(
>> + __entry->status = task->tk_status;
>> + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
>> + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
>> + memcpy(__entry->addr, &cb->cb_clp->cl_addr,
>> + sizeof(struct sockaddr_in6));
>> + ),
>> + TP_printk("client %08x:%08x addr=%pISpc status=%d",
>> + __entry->cl_boot, __entry->cl_id,
>> + __entry->addr, __entry->status
>> + )
>> +);
> I'd like you to change this to
>
> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_any_done);
TP_PRORO of DEFINE_NFSD_CB_DONE_EVENT requires a stateid_t which
CB_RECALL_ANY does not have. Can we can create a dummy stateid_t
for nfsd_cb_recall_any_done?
Note that nfsd_cb_done_class does not print the server IP address
which is more useful for tracing. Should I modify nfsd_cb_recall_any_done
class to print the IP address from rpc_xprt?
-Dai
>
>
>> +
>> #endif /* _NFSD_TRACE_H */
>>
>> #undef TRACE_INCLUDE_PATH
>> --
>> 2.9.5
>>
> --
> Chuck Lever
>
>
>
> On Nov 15, 2022, at 12:08 AM, Dai Ngo <[email protected]> wrote:
>
>
> On 11/11/22 7:25 AM, Chuck Lever III wrote:
>>
>>> On Nov 9, 2022, at 11:17 PM, Dai Ngo <[email protected]> wrote:
>>>
>>> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>>>
>>> Signed-off-by: Dai Ngo <[email protected]>
>>> ---
>>> fs/nfsd/nfs4state.c | 2 ++
>>> fs/nfsd/trace.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>> 2 files changed, 55 insertions(+)
>>>
>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>> index 813cdb67b370..eac7212c9218 100644
>>> --- a/fs/nfsd/nfs4state.c
>>> +++ b/fs/nfsd/nfs4state.c
>>> @@ -2859,6 +2859,7 @@ static int
>>> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>>> struct rpc_task *task)
>>> {
>>> + trace_nfsd_cb_recall_any_done(cb, task);
>>> switch (task->tk_status) {
>>> case -NFS4ERR_DELAY:
>>> rpc_delay(task, 2 * HZ);
>>> @@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
>>> clp->cl_ra->ra_keep = 0;
>>> clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>>> BIT(RCA4_TYPE_MASK_WDATA_DLG);
>>> + trace_nfsd_cb_recall_any(clp->cl_ra);
>>> nfsd4_run_cb(&clp->cl_ra->ra_cb);
>>> }
>>>
>>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>>> index 06a96e955bd0..efc69c96bcbd 100644
>>> --- a/fs/nfsd/trace.h
>>> +++ b/fs/nfsd/trace.h
>>> @@ -9,9 +9,11 @@
>>> #define _NFSD_TRACE_H
>>>
>>> #include <linux/tracepoint.h>
>>> +#include <linux/sunrpc/xprt.h>
>>>
>>> #include "export.h"
>>> #include "nfsfh.h"
>>> +#include "xdr4.h"
>>>
>>> #define NFSD_TRACE_PROC_RES_FIELDS \
>>> __field(unsigned int, netns_ino) \
>>> @@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
>>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
>>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
>>>
>>> +TRACE_EVENT(nfsd_cb_recall_any,
>>> + TP_PROTO(
>>> + const struct nfsd4_cb_recall_any *ra
>>> + ),
>>> + TP_ARGS(ra),
>>> + TP_STRUCT__entry(
>>> + __field(u32, cl_boot)
>>> + __field(u32, cl_id)
>>> + __field(u32, ra_keep)
>>> + __field(u32, ra_bmval)
>>> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
>>> + ),
>>> + TP_fast_assign(
>>> + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
>>> + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
>>> + __entry->ra_keep = ra->ra_keep;
>>> + __entry->ra_bmval = ra->ra_bmval[0];
>>> + memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
>>> + sizeof(struct sockaddr_in6));
>>> + ),
>>> + TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
>>> + __entry->cl_boot, __entry->cl_id,
>>> + __entry->addr, __entry->ra_keep, __entry->ra_bmval
>>> + )
>>> +);
>> This one should go earlier in the file, after "TRACE_EVENT(nfsd_cb_offload,"
>>
>> And let's use __sockaddr() and friends like the other nfsd_cb_ tracepoints.
>>
>>
>>> +
>>> +TRACE_EVENT(nfsd_cb_recall_any_done,
>>> + TP_PROTO(
>>> + const struct nfsd4_callback *cb,
>>> + const struct rpc_task *task
>>> + ),
>>> + TP_ARGS(cb, task),
>>> + TP_STRUCT__entry(
>>> + __field(u32, cl_boot)
>>> + __field(u32, cl_id)
>>> + __field(int, status)
>>> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
>>> + ),
>>> + TP_fast_assign(
>>> + __entry->status = task->tk_status;
>>> + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
>>> + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
>>> + memcpy(__entry->addr, &cb->cb_clp->cl_addr,
>>> + sizeof(struct sockaddr_in6));
>>> + ),
>>> + TP_printk("client %08x:%08x addr=%pISpc status=%d",
>>> + __entry->cl_boot, __entry->cl_id,
>>> + __entry->addr, __entry->status
>>> + )
>>> +);
>> I'd like you to change this to
>>
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_any_done);
>
> TP_PRORO of DEFINE_NFSD_CB_DONE_EVENT requires a stateid_t which
> CB_RECALL_ANY does not have. Can we can create a dummy stateid_t
> for nfsd_cb_recall_any_done?
Ah, I didn't remember that a state ID was recorded. OK, then a
separate TRACE_EVENT is appropriate for nfsd_cb_recall_any_done.
> Note that nfsd_cb_done_class does not print the server IP address
> which is more useful for tracing. Should I modify nfsd_cb_recall_any_done
> class to print the IP address from rpc_xprt?
The IP address is recorded by the Call side tracepoints. I'm OK
leaving the IP address out of the Reply side tracepoints.
> -Dai
>
>>
>>
>>> +
>>> #endif /* _NFSD_TRACE_H */
>>>
>>> #undef TRACE_INCLUDE_PATH
>>> --
>>> 2.9.5
>>>
>> --
>> Chuck Lever
--
Chuck Lever