2022-11-10 04:18:02

by Dai Ngo

[permalink] [raw]
Subject: [PATCH v4 0/3] add support for CB_RECALL_ANY and the delegation shrinker

This patch series adds:

. XDR encode and decode function for CB_RECALL_ANY op.

. the delegation shrinker that sends the advisory CB_RECALL_ANY
to the clients to release unused delegations.
There is only one nfsd4_callback added for each nfs4_cleint.
Access to it must be serialized via the client flag
NFSD4_CLIENT_CB_RECALL_ANY.

v2:
. modify deleg_reaper to check and send CB_RECALL_ANY to client
only once per 5 secs.
v3:
. modify nfsd4_cb_recall_any_release to use nn->client_lock to
protect cl_recall_any_busy and call put_client_renew_locked
to decrement cl_rpc_users.

v4:
. move changes in nfs4state.c from patch (1/2) to patch(2/2).
. use xdr_stream_encode_u32 and xdr_stream_encode_uint32_array
to encode CB_RECALL_ANY arguments.
. add struct nfsd4_cb_recall_any with embedded nfs4_callback
and params for CB_RECALL_ANY op.
. replace cl_recall_any_busy boolean with client flag
NFSD4_CLIENT_CB_RECALL_ANY
. add tracepoints for CB_RECALL_ANY
---

Dai Ngo (3):
NFSD: add support for sending CB_RECALL_ANY
NFSD: add delegation shrinker to react to low memory condition
NFSD: add CB_RECALL_ANY tracepoints

fs/nfsd/netns.h | 3 ++
fs/nfsd/nfs4callback.c | 62 +++++++++++++++++++++++
fs/nfsd/nfs4state.c | 117 +++++++++++++++++++++++++++++++++++++++++++-
fs/nfsd/state.h | 9 ++++
fs/nfsd/trace.h | 53 ++++++++++++++++++++
fs/nfsd/xdr4.h | 5 ++
fs/nfsd/xdr4cb.h | 6 +++
7 files changed, 254 insertions(+), 1 deletion(-)


2022-11-10 04:18:39

by Dai Ngo

[permalink] [raw]
Subject: [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints

Add tracepoints to trace start and end of CB_RECALL_ANY operation.

Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4state.c | 2 ++
fs/nfsd/trace.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 55 insertions(+)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 813cdb67b370..eac7212c9218 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2859,6 +2859,7 @@ static int
nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
struct rpc_task *task)
{
+ trace_nfsd_cb_recall_any_done(cb, task);
switch (task->tk_status) {
case -NFS4ERR_DELAY:
rpc_delay(task, 2 * HZ);
@@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
clp->cl_ra->ra_keep = 0;
clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
BIT(RCA4_TYPE_MASK_WDATA_DLG);
+ trace_nfsd_cb_recall_any(clp->cl_ra);
nfsd4_run_cb(&clp->cl_ra->ra_cb);
}

diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 06a96e955bd0..efc69c96bcbd 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -9,9 +9,11 @@
#define _NFSD_TRACE_H

#include <linux/tracepoint.h>
+#include <linux/sunrpc/xprt.h>

#include "export.h"
#include "nfsfh.h"
+#include "xdr4.h"

#define NFSD_TRACE_PROC_RES_FIELDS \
__field(unsigned int, netns_ino) \
@@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);

+TRACE_EVENT(nfsd_cb_recall_any,
+ TP_PROTO(
+ const struct nfsd4_cb_recall_any *ra
+ ),
+ TP_ARGS(ra),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, ra_keep)
+ __field(u32, ra_bmval)
+ __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
+ __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
+ __entry->ra_keep = ra->ra_keep;
+ __entry->ra_bmval = ra->ra_bmval[0];
+ memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+ TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
+ __entry->cl_boot, __entry->cl_id,
+ __entry->addr, __entry->ra_keep, __entry->ra_bmval
+ )
+);
+
+TRACE_EVENT(nfsd_cb_recall_any_done,
+ TP_PROTO(
+ const struct nfsd4_callback *cb,
+ const struct rpc_task *task
+ ),
+ TP_ARGS(cb, task),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(int, status)
+ __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ ),
+ TP_fast_assign(
+ __entry->status = task->tk_status;
+ __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
+ __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
+ memcpy(__entry->addr, &cb->cb_clp->cl_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+ TP_printk("client %08x:%08x addr=%pISpc status=%d",
+ __entry->cl_boot, __entry->cl_id,
+ __entry->addr, __entry->status
+ )
+);
+
#endif /* _NFSD_TRACE_H */

#undef TRACE_INCLUDE_PATH
--
2.9.5


2022-11-10 04:18:39

by Dai Ngo

[permalink] [raw]
Subject: [PATCH v4 2/3] NFSD: add delegation shrinker to react to low memory condition

The delegation shrinker is scheduled to run on every shrinker's
'count' callback. It scans the client list and sends the
courtesy CB_RECALL_ANY to the clients that hold delegations.

To avoid flooding the clients with CB_RECALL_ANY requests, the
delegation shrinker is scheduled to run after a 5 seconds delay.

Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/netns.h | 3 ++
fs/nfsd/nfs4state.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
fs/nfsd/state.h | 8 ++++
3 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 8c854ba3285b..394a05fb46d8 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -196,6 +196,9 @@ struct nfsd_net {
atomic_t nfsd_courtesy_clients;
struct shrinker nfsd_client_shrinker;
struct delayed_work nfsd_shrinker_work;
+
+ struct shrinker nfsd_deleg_shrinker;
+ struct delayed_work nfsd_deleg_shrinker_work;
};

/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4e718500a00c..813cdb67b370 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2131,6 +2131,7 @@ static void __free_client(struct kref *k)
kfree(clp->cl_nii_domain.data);
kfree(clp->cl_nii_name.data);
idr_destroy(&clp->cl_stateids);
+ kfree(clp->cl_ra);
kmem_cache_free(client_slab, clp);
}

@@ -2854,6 +2855,36 @@ static const struct tree_descr client_files[] = {
[3] = {""},
};

+static int
+nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
+ struct rpc_task *task)
+{
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 2 * HZ);
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static void
+nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ spin_lock(&nn->client_lock);
+ clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+ put_client_renew_locked(clp);
+ spin_unlock(&nn->client_lock);
+}
+
+static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
+ .done = nfsd4_cb_recall_any_done,
+ .release = nfsd4_cb_recall_any_release,
+};
+
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@@ -2891,6 +2922,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
free_client(clp);
return NULL;
}
+ clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
+ if (!clp->cl_ra) {
+ free_client(clp);
+ return NULL;
+ }
+ clp->cl_ra_time = 0;
+ nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
+ NFSPROC4_CLNT_CB_RECALL_ANY);
return clp;
}

@@ -4365,11 +4404,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
return SHRINK_STOP;
}

+static unsigned long
+nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ unsigned long cnt;
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_deleg_shrinker);
+
+ cnt = atomic_long_read(&num_delegations);
+ if (cnt)
+ mod_delayed_work(laundry_wq,
+ &nn->nfsd_deleg_shrinker_work, 0);
+ return cnt;
+}
+
+static unsigned long
+nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ return SHRINK_STOP;
+}
+
int
nfsd4_init_leases_net(struct nfsd_net *nn)
{
struct sysinfo si;
u64 max_clients;
+ int retval;

nn->nfsd4_lease = 90; /* default lease time */
nn->nfsd4_grace = 90;
@@ -4390,13 +4450,24 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
- return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+ retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+ if (retval)
+ return retval;
+ nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
+ nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
+ nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
+ retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
+ if (retval)
+ unregister_shrinker(&nn->nfsd_client_shrinker);
+ return retval;
+
}

void
nfsd4_leases_net_shutdown(struct nfsd_net *nn)
{
unregister_shrinker(&nn->nfsd_client_shrinker);
+ unregister_shrinker(&nn->nfsd_deleg_shrinker);
}

static void init_nfs4_replay(struct nfs4_replay *rp)
@@ -6135,6 +6206,47 @@ courtesy_client_reaper(struct work_struct *reaper)
nfs4_process_client_reaplist(&reaplist);
}

+static void
+deleg_reaper(struct work_struct *deleg_work)
+{
+ struct list_head *pos, *next;
+ struct nfs4_client *clp;
+ struct list_head cblist;
+ struct delayed_work *dwork = to_delayed_work(deleg_work);
+ struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+ nfsd_deleg_shrinker_work);
+
+ INIT_LIST_HEAD(&cblist);
+ spin_lock(&nn->client_lock);
+ list_for_each_safe(pos, next, &nn->client_lru) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ if (clp->cl_state != NFSD4_ACTIVE ||
+ list_empty(&clp->cl_delegations) ||
+ atomic_read(&clp->cl_delegs_in_recall) ||
+ test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
+ (ktime_get_boottime_seconds() -
+ clp->cl_ra_time < 5)) {
+ continue;
+ }
+ list_add(&clp->cl_ra_cblist, &cblist);
+
+ /* release in nfsd4_cb_recall_any_release */
+ atomic_inc(&clp->cl_rpc_users);
+ set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+ clp->cl_ra_time = ktime_get_boottime_seconds();
+ }
+ spin_unlock(&nn->client_lock);
+ list_for_each_safe(pos, next, &cblist) {
+ clp = list_entry(pos, struct nfs4_client, cl_ra_cblist);
+ list_del_init(&clp->cl_ra_cblist);
+ clp->cl_ra->ra_keep = 0;
+ clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
+ BIT(RCA4_TYPE_MASK_WDATA_DLG);
+ nfsd4_run_cb(&clp->cl_ra->ra_cb);
+ }
+
+}
+
static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
{
if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
@@ -7958,6 +8070,7 @@ static int nfs4_state_create_net(struct net *net)

INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
+ INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
get_net(net);

return 0;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 6b33cbbe76d3..12ce9792c5b6 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -368,6 +368,7 @@ struct nfs4_client {
#define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
1 << NFSD4_CLIENT_CB_KILL)
+#define NFSD4_CLIENT_CB_RECALL_ANY (6)
unsigned long cl_flags;
const struct cred *cl_cb_cred;
struct rpc_clnt *cl_cb_client;
@@ -411,6 +412,10 @@ struct nfs4_client {

unsigned int cl_state;
atomic_t cl_delegs_in_recall;
+
+ struct nfsd4_cb_recall_any *cl_ra;
+ time64_t cl_ra_time;
+ struct list_head cl_ra_cblist;
};

/* struct nfs4_client_reset
@@ -642,6 +647,9 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_RECALL_ANY,
};

+#define RCA4_TYPE_MASK_RDATA_DLG 0
+#define RCA4_TYPE_MASK_WDATA_DLG 1
+
/* Returns true iff a is later than b: */
static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
{
--
2.9.5


2022-11-10 04:19:19

by Dai Ngo

[permalink] [raw]
Subject: [PATCH v4 1/3] NFSD: add support for sending CB_RECALL_ANY

Add XDR encode and decode function for CB_RECALL_ANY.

Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4callback.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/state.h | 1 +
fs/nfsd/xdr4.h | 5 ++++
fs/nfsd/xdr4cb.h | 6 +++++
4 files changed, 74 insertions(+)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index f0e69edf5f0f..01226e5233cd 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -329,6 +329,25 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
}

/*
+ * CB_RECALLANY4args
+ *
+ * struct CB_RECALLANY4args {
+ * uint32_t craa_objects_to_keep;
+ * bitmap4 craa_type_mask;
+ * };
+ */
+static void
+encode_cb_recallany4args(struct xdr_stream *xdr,
+ struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra)
+{
+ encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
+ WARN_ON_ONCE(xdr_stream_encode_u32(xdr, ra->ra_keep) < 0);
+ WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr,
+ ra->ra_bmval, sizeof(ra->ra_bmval) / sizeof(u32)) < 0);
+ hdr->nops++;
+}
+
+/*
* CB_SEQUENCE4args
*
* struct CB_SEQUENCE4args {
@@ -482,6 +501,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_cb_nops(&hdr);
}

+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static void
+nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfsd4_callback *cb = data;
+ struct nfsd4_cb_recall_any *ra;
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = cb->cb_clp->cl_cb_ident,
+ .minorversion = cb->cb_clp->cl_minorversion,
+ };
+
+ ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb);
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+ encode_cb_recallany4args(xdr, &hdr, ra);
+ encode_cb_nops(&hdr);
+}

/*
* NFSv4.0 and NFSv4.1 XDR decode functions
@@ -520,6 +559,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
}

+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static int
+nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfsd4_callback *cb = data;
+ struct nfs4_cb_compound_hdr hdr;
+ int status;
+
+ status = decode_cb_compound4res(xdr, &hdr);
+ if (unlikely(status))
+ return status;
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+ status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
+ return status;
+}
+
#ifdef CONFIG_NFSD_PNFS
/*
* CB_LAYOUTRECALL4args
@@ -783,6 +844,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
#endif
PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
+ PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
};

static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e2daef3cc003..6b33cbbe76d3 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -639,6 +639,7 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_OFFLOAD,
NFSPROC4_CLNT_CB_SEQUENCE,
NFSPROC4_CLNT_CB_NOTIFY_LOCK,
+ NFSPROC4_CLNT_CB_RECALL_ANY,
};

/* Returns true iff a is later than b: */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 0eb00105d845..4fd2cf6d1d2d 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -896,5 +896,10 @@ struct nfsd4_operation {
union nfsd4_op_u *);
};

+struct nfsd4_cb_recall_any {
+ struct nfsd4_callback ra_cb;
+ u32 ra_keep;
+ u32 ra_bmval[1];
+};

#endif
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index 547cf07cf4e0..0d39af1b00a0 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -48,3 +48,9 @@
#define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + \
op_dec_sz)
+#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \
+ cb_sequence_enc_sz + \
+ 1 + 1 + 1)
+#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
+ cb_sequence_dec_sz + \
+ op_dec_sz)
--
2.9.5


2022-11-11 15:37:56

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v4 1/3] NFSD: add support for sending CB_RECALL_ANY



> On Nov 9, 2022, at 11:17 PM, Dai Ngo <[email protected]> wrote:
>
> Add XDR encode and decode function for CB_RECALL_ANY.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/nfs4callback.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++
> fs/nfsd/state.h | 1 +
> fs/nfsd/xdr4.h | 5 ++++
> fs/nfsd/xdr4cb.h | 6 +++++
> 4 files changed, 74 insertions(+)
>
> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
> index f0e69edf5f0f..01226e5233cd 100644
> --- a/fs/nfsd/nfs4callback.c
> +++ b/fs/nfsd/nfs4callback.c
> @@ -329,6 +329,25 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
> }
>
> /*
> + * CB_RECALLANY4args
> + *
> + * struct CB_RECALLANY4args {
> + * uint32_t craa_objects_to_keep;
> + * bitmap4 craa_type_mask;
> + * };
> + */
> +static void
> +encode_cb_recallany4args(struct xdr_stream *xdr,
> + struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra)
> +{
> + encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
> + WARN_ON_ONCE(xdr_stream_encode_u32(xdr, ra->ra_keep) < 0);
> + WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr,
> + ra->ra_bmval, sizeof(ra->ra_bmval) / sizeof(u32)) < 0);

My only nit: You should use ARRAY_SIZE() here.


> + hdr->nops++;
> +}
> +
> +/*
> * CB_SEQUENCE4args
> *
> * struct CB_SEQUENCE4args {
> @@ -482,6 +501,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
> encode_cb_nops(&hdr);
> }
>
> +/*
> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> + */
> +static void
> +nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
> + struct xdr_stream *xdr, const void *data)
> +{
> + const struct nfsd4_callback *cb = data;
> + struct nfsd4_cb_recall_any *ra;
> + struct nfs4_cb_compound_hdr hdr = {
> + .ident = cb->cb_clp->cl_cb_ident,
> + .minorversion = cb->cb_clp->cl_minorversion,
> + };
> +
> + ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb);
> + encode_cb_compound4args(xdr, &hdr);
> + encode_cb_sequence4args(xdr, cb, &hdr);
> + encode_cb_recallany4args(xdr, &hdr, ra);
> + encode_cb_nops(&hdr);
> +}
>
> /*
> * NFSv4.0 and NFSv4.1 XDR decode functions
> @@ -520,6 +559,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
> return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
> }
>
> +/*
> + * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
> + */
> +static int
> +nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
> + struct xdr_stream *xdr,
> + void *data)
> +{
> + struct nfsd4_callback *cb = data;
> + struct nfs4_cb_compound_hdr hdr;
> + int status;
> +
> + status = decode_cb_compound4res(xdr, &hdr);
> + if (unlikely(status))
> + return status;
> + status = decode_cb_sequence4res(xdr, cb);
> + if (unlikely(status || cb->cb_seq_status))
> + return status;
> + status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
> + return status;
> +}
> +
> #ifdef CONFIG_NFSD_PNFS
> /*
> * CB_LAYOUTRECALL4args
> @@ -783,6 +844,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
> #endif
> PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
> PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
> + PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
> };
>
> static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index e2daef3cc003..6b33cbbe76d3 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -639,6 +639,7 @@ enum nfsd4_cb_op {
> NFSPROC4_CLNT_CB_OFFLOAD,
> NFSPROC4_CLNT_CB_SEQUENCE,
> NFSPROC4_CLNT_CB_NOTIFY_LOCK,
> + NFSPROC4_CLNT_CB_RECALL_ANY,
> };
>
> /* Returns true iff a is later than b: */
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 0eb00105d845..4fd2cf6d1d2d 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -896,5 +896,10 @@ struct nfsd4_operation {
> union nfsd4_op_u *);
> };
>
> +struct nfsd4_cb_recall_any {
> + struct nfsd4_callback ra_cb;
> + u32 ra_keep;
> + u32 ra_bmval[1];
> +};
>
> #endif
> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
> index 547cf07cf4e0..0d39af1b00a0 100644
> --- a/fs/nfsd/xdr4cb.h
> +++ b/fs/nfsd/xdr4cb.h
> @@ -48,3 +48,9 @@
> #define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \
> cb_sequence_dec_sz + \
> op_dec_sz)
> +#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \
> + cb_sequence_enc_sz + \
> + 1 + 1 + 1)
> +#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
> + cb_sequence_dec_sz + \
> + op_dec_sz)
> --
> 2.9.5
>

--
Chuck Lever




2022-11-11 15:38:03

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v4 2/3] NFSD: add delegation shrinker to react to low memory condition



> On Nov 9, 2022, at 11:17 PM, Dai Ngo <[email protected]> wrote:
>
> The delegation shrinker is scheduled to run on every shrinker's
> 'count' callback. It scans the client list and sends the
> courtesy CB_RECALL_ANY to the clients that hold delegations.
>
> To avoid flooding the clients with CB_RECALL_ANY requests, the
> delegation shrinker is scheduled to run after a 5 seconds delay.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/netns.h | 3 ++
> fs/nfsd/nfs4state.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/nfsd/state.h | 8 ++++
> 3 files changed, 125 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 8c854ba3285b..394a05fb46d8 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -196,6 +196,9 @@ struct nfsd_net {
> atomic_t nfsd_courtesy_clients;
> struct shrinker nfsd_client_shrinker;
> struct delayed_work nfsd_shrinker_work;
> +
> + struct shrinker nfsd_deleg_shrinker;
> + struct delayed_work nfsd_deleg_shrinker_work;
> };
>
> /* Simple check to find out if a given net was properly initialized */
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 4e718500a00c..813cdb67b370 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2131,6 +2131,7 @@ static void __free_client(struct kref *k)
> kfree(clp->cl_nii_domain.data);
> kfree(clp->cl_nii_name.data);
> idr_destroy(&clp->cl_stateids);
> + kfree(clp->cl_ra);
> kmem_cache_free(client_slab, clp);
> }
>
> @@ -2854,6 +2855,36 @@ static const struct tree_descr client_files[] = {
> [3] = {""},
> };
>
> +static int
> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> + struct rpc_task *task)
> +{
> + switch (task->tk_status) {
> + case -NFS4ERR_DELAY:
> + rpc_delay(task, 2 * HZ);
> + return 0;
> + default:
> + return 1;
> + }
> +}
> +
> +static void
> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> +{
> + struct nfs4_client *clp = cb->cb_clp;
> + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> +
> + spin_lock(&nn->client_lock);
> + clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> + put_client_renew_locked(clp);
> + spin_unlock(&nn->client_lock);
> +}
> +
> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> + .done = nfsd4_cb_recall_any_done,
> + .release = nfsd4_cb_recall_any_release,
> +};
> +
> static struct nfs4_client *create_client(struct xdr_netobj name,
> struct svc_rqst *rqstp, nfs4_verifier *verf)
> {
> @@ -2891,6 +2922,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
> free_client(clp);
> return NULL;
> }
> + clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
> + if (!clp->cl_ra) {
> + free_client(clp);
> + return NULL;
> + }
> + clp->cl_ra_time = 0;
> + nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
> + NFSPROC4_CLNT_CB_RECALL_ANY);
> return clp;
> }
>
> @@ -4365,11 +4404,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
> return SHRINK_STOP;
> }
>
> +static unsigned long
> +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + unsigned long cnt;

No reason not to spell out "count".

> + struct nfsd_net *nn = container_of(shrink,
> + struct nfsd_net, nfsd_deleg_shrinker);
> +
> + cnt = atomic_long_read(&num_delegations);
> + if (cnt)
> + mod_delayed_work(laundry_wq,
> + &nn->nfsd_deleg_shrinker_work, 0);
> + return cnt;
> +}
> +
> +static unsigned long
> +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + return SHRINK_STOP;
> +}
> +
> int
> nfsd4_init_leases_net(struct nfsd_net *nn)
> {
> struct sysinfo si;
> u64 max_clients;
> + int retval;
>
> nn->nfsd4_lease = 90; /* default lease time */
> nn->nfsd4_grace = 90;
> @@ -4390,13 +4450,24 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
> nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
> nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
> nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
> - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> + retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> + if (retval)
> + return retval;
> + nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
> + nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
> + nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
> + retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
> + if (retval)
> + unregister_shrinker(&nn->nfsd_client_shrinker);
> + return retval;
> +
> }
>
> void
> nfsd4_leases_net_shutdown(struct nfsd_net *nn)
> {
> unregister_shrinker(&nn->nfsd_client_shrinker);
> + unregister_shrinker(&nn->nfsd_deleg_shrinker);
> }
>
> static void init_nfs4_replay(struct nfs4_replay *rp)
> @@ -6135,6 +6206,47 @@ courtesy_client_reaper(struct work_struct *reaper)
> nfs4_process_client_reaplist(&reaplist);
> }
>
> +static void
> +deleg_reaper(struct work_struct *deleg_work)
> +{
> + struct list_head *pos, *next;
> + struct nfs4_client *clp;
> + struct list_head cblist;
> + struct delayed_work *dwork = to_delayed_work(deleg_work);
> + struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> + nfsd_deleg_shrinker_work);
> +
> + INIT_LIST_HEAD(&cblist);
> + spin_lock(&nn->client_lock);
> + list_for_each_safe(pos, next, &nn->client_lru) {
> + clp = list_entry(pos, struct nfs4_client, cl_lru);
> + if (clp->cl_state != NFSD4_ACTIVE ||
> + list_empty(&clp->cl_delegations) ||
> + atomic_read(&clp->cl_delegs_in_recall) ||
> + test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
> + (ktime_get_boottime_seconds() -
> + clp->cl_ra_time < 5)) {
> + continue;
> + }
> + list_add(&clp->cl_ra_cblist, &cblist);
> +
> + /* release in nfsd4_cb_recall_any_release */
> + atomic_inc(&clp->cl_rpc_users);
> + set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> + clp->cl_ra_time = ktime_get_boottime_seconds();
> + }
> + spin_unlock(&nn->client_lock);
> + list_for_each_safe(pos, next, &cblist) {

The usual idiom for draining a list like this is

while (!list_empty(&cblist)) {
clp = list_first_entry( ... );


> + clp = list_entry(pos, struct nfs4_client, cl_ra_cblist);
> + list_del_init(&clp->cl_ra_cblist);
> + clp->cl_ra->ra_keep = 0;
> + clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> + BIT(RCA4_TYPE_MASK_WDATA_DLG);
> + nfsd4_run_cb(&clp->cl_ra->ra_cb);
> + }
> +
> +}
> +
> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> {
> if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
> @@ -7958,6 +8070,7 @@ static int nfs4_state_create_net(struct net *net)
>
> INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
> + INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
> get_net(net);
>
> return 0;
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 6b33cbbe76d3..12ce9792c5b6 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -368,6 +368,7 @@ struct nfs4_client {
> #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
> #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
> 1 << NFSD4_CLIENT_CB_KILL)
> +#define NFSD4_CLIENT_CB_RECALL_ANY (6)
> unsigned long cl_flags;
> const struct cred *cl_cb_cred;
> struct rpc_clnt *cl_cb_client;
> @@ -411,6 +412,10 @@ struct nfs4_client {
>
> unsigned int cl_state;
> atomic_t cl_delegs_in_recall;
> +
> + struct nfsd4_cb_recall_any *cl_ra;
> + time64_t cl_ra_time;
> + struct list_head cl_ra_cblist;
> };
>
> /* struct nfs4_client_reset
> @@ -642,6 +647,9 @@ enum nfsd4_cb_op {
> NFSPROC4_CLNT_CB_RECALL_ANY,
> };
>
> +#define RCA4_TYPE_MASK_RDATA_DLG 0
> +#define RCA4_TYPE_MASK_WDATA_DLG 1
> +
> /* Returns true iff a is later than b: */
> static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> {
> --
> 2.9.5
>

--
Chuck Lever




2022-11-11 15:38:29

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints



> On Nov 9, 2022, at 11:17 PM, Dai Ngo <[email protected]> wrote:
>
> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 2 ++
> fs/nfsd/trace.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 55 insertions(+)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 813cdb67b370..eac7212c9218 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2859,6 +2859,7 @@ static int
> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> struct rpc_task *task)
> {
> + trace_nfsd_cb_recall_any_done(cb, task);
> switch (task->tk_status) {
> case -NFS4ERR_DELAY:
> rpc_delay(task, 2 * HZ);
> @@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
> clp->cl_ra->ra_keep = 0;
> clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> BIT(RCA4_TYPE_MASK_WDATA_DLG);
> + trace_nfsd_cb_recall_any(clp->cl_ra);
> nfsd4_run_cb(&clp->cl_ra->ra_cb);
> }
>
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index 06a96e955bd0..efc69c96bcbd 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -9,9 +9,11 @@
> #define _NFSD_TRACE_H
>
> #include <linux/tracepoint.h>
> +#include <linux/sunrpc/xprt.h>
>
> #include "export.h"
> #include "nfsfh.h"
> +#include "xdr4.h"
>
> #define NFSD_TRACE_PROC_RES_FIELDS \
> __field(unsigned int, netns_ino) \
> @@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
>
> +TRACE_EVENT(nfsd_cb_recall_any,
> + TP_PROTO(
> + const struct nfsd4_cb_recall_any *ra
> + ),
> + TP_ARGS(ra),
> + TP_STRUCT__entry(
> + __field(u32, cl_boot)
> + __field(u32, cl_id)
> + __field(u32, ra_keep)
> + __field(u32, ra_bmval)
> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
> + ),
> + TP_fast_assign(
> + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
> + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
> + __entry->ra_keep = ra->ra_keep;
> + __entry->ra_bmval = ra->ra_bmval[0];
> + memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
> + sizeof(struct sockaddr_in6));
> + ),
> + TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
> + __entry->cl_boot, __entry->cl_id,
> + __entry->addr, __entry->ra_keep, __entry->ra_bmval
> + )
> +);

This one should go earlier in the file, after "TRACE_EVENT(nfsd_cb_offload,"

And let's use __sockaddr() and friends like the other nfsd_cb_ tracepoints.


> +
> +TRACE_EVENT(nfsd_cb_recall_any_done,
> + TP_PROTO(
> + const struct nfsd4_callback *cb,
> + const struct rpc_task *task
> + ),
> + TP_ARGS(cb, task),
> + TP_STRUCT__entry(
> + __field(u32, cl_boot)
> + __field(u32, cl_id)
> + __field(int, status)
> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
> + ),
> + TP_fast_assign(
> + __entry->status = task->tk_status;
> + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
> + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
> + memcpy(__entry->addr, &cb->cb_clp->cl_addr,
> + sizeof(struct sockaddr_in6));
> + ),
> + TP_printk("client %08x:%08x addr=%pISpc status=%d",
> + __entry->cl_boot, __entry->cl_id,
> + __entry->addr, __entry->status
> + )
> +);

I'd like you to change this to

DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_any_done);


> +
> #endif /* _NFSD_TRACE_H */
>
> #undef TRACE_INCLUDE_PATH
> --
> 2.9.5
>

--
Chuck Lever




2022-11-11 16:16:10

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH v4 2/3] NFSD: add delegation shrinker to react to low memory condition

On Fri, 2022-11-11 at 15:25 +0000, Chuck Lever III wrote:
>
> > On Nov 9, 2022, at 11:17 PM, Dai Ngo <[email protected]> wrote:
> >
> > The delegation shrinker is scheduled to run on every shrinker's
> > 'count' callback. It scans the client list and sends the
> > courtesy CB_RECALL_ANY to the clients that hold delegations.
> >
> > To avoid flooding the clients with CB_RECALL_ANY requests, the
> > delegation shrinker is scheduled to run after a 5 seconds delay.
> >
> > Signed-off-by: Dai Ngo <[email protected]>
> > ---
> > fs/nfsd/netns.h | 3 ++
> > fs/nfsd/nfs4state.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++-
> > fs/nfsd/state.h | 8 ++++
> > 3 files changed, 125 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> > index 8c854ba3285b..394a05fb46d8 100644
> > --- a/fs/nfsd/netns.h
> > +++ b/fs/nfsd/netns.h
> > @@ -196,6 +196,9 @@ struct nfsd_net {
> > atomic_t nfsd_courtesy_clients;
> > struct shrinker nfsd_client_shrinker;
> > struct delayed_work nfsd_shrinker_work;
> > +
> > + struct shrinker nfsd_deleg_shrinker;
> > + struct delayed_work nfsd_deleg_shrinker_work;
> > };
> >
> > /* Simple check to find out if a given net was properly initialized */
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 4e718500a00c..813cdb67b370 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -2131,6 +2131,7 @@ static void __free_client(struct kref *k)
> > kfree(clp->cl_nii_domain.data);
> > kfree(clp->cl_nii_name.data);
> > idr_destroy(&clp->cl_stateids);
> > + kfree(clp->cl_ra);
> > kmem_cache_free(client_slab, clp);
> > }
> >
> > @@ -2854,6 +2855,36 @@ static const struct tree_descr client_files[] = {
> > [3] = {""},
> > };
> >
> > +static int
> > +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> > + struct rpc_task *task)
> > +{
> > + switch (task->tk_status) {
> > + case -NFS4ERR_DELAY:
> > + rpc_delay(task, 2 * HZ);
> > + return 0;
> > + default:
> > + return 1;
> > + }
> > +}
> > +
> > +static void
> > +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> > +{
> > + struct nfs4_client *clp = cb->cb_clp;
> > + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> > +
> > + spin_lock(&nn->client_lock);
> > + clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> > + put_client_renew_locked(clp);
> > + spin_unlock(&nn->client_lock);
> > +}
> > +
> > +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> > + .done = nfsd4_cb_recall_any_done,
> > + .release = nfsd4_cb_recall_any_release,
> > +};
> > +
> > static struct nfs4_client *create_client(struct xdr_netobj name,
> > struct svc_rqst *rqstp, nfs4_verifier *verf)
> > {
> > @@ -2891,6 +2922,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
> > free_client(clp);
> > return NULL;
> > }
> > + clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
> > + if (!clp->cl_ra) {
> > + free_client(clp);
> > + return NULL;
> > + }
> > + clp->cl_ra_time = 0;
> > + nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
> > + NFSPROC4_CLNT_CB_RECALL_ANY);
> > return clp;
> > }
> >
> > @@ -4365,11 +4404,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
> > return SHRINK_STOP;
> > }
> >
> > +static unsigned long
> > +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> > +{
> > + unsigned long cnt;
>
> No reason not to spell out "count".
>
> > + struct nfsd_net *nn = container_of(shrink,
> > + struct nfsd_net, nfsd_deleg_shrinker);
> > +
> > + cnt = atomic_long_read(&num_delegations);
> > + if (cnt)
> > + mod_delayed_work(laundry_wq,
> > + &nn->nfsd_deleg_shrinker_work, 0);
> > + return cnt;
> > +}
> > +
> > +static unsigned long
> > +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> > +{
> > + return SHRINK_STOP;
> > +}
> > +
> > int
> > nfsd4_init_leases_net(struct nfsd_net *nn)
> > {
> > struct sysinfo si;
> > u64 max_clients;
> > + int retval;
> >
> > nn->nfsd4_lease = 90; /* default lease time */
> > nn->nfsd4_grace = 90;
> > @@ -4390,13 +4450,24 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
> > nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
> > nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
> > nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
> > - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> > + retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> > + if (retval)
> > + return retval;
> > + nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
> > + nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
> > + nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
> > + retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
> > + if (retval)
> > + unregister_shrinker(&nn->nfsd_client_shrinker);
> > + return retval;
> > +
> > }
> >
> > void
> > nfsd4_leases_net_shutdown(struct nfsd_net *nn)
> > {
> > unregister_shrinker(&nn->nfsd_client_shrinker);
> > + unregister_shrinker(&nn->nfsd_deleg_shrinker);
> > }
> >
> > static void init_nfs4_replay(struct nfs4_replay *rp)
> > @@ -6135,6 +6206,47 @@ courtesy_client_reaper(struct work_struct *reaper)
> > nfs4_process_client_reaplist(&reaplist);
> > }
> >
> > +static void
> > +deleg_reaper(struct work_struct *deleg_work)
> > +{
> > + struct list_head *pos, *next;
> > + struct nfs4_client *clp;
> > + struct list_head cblist;
> > + struct delayed_work *dwork = to_delayed_work(deleg_work);
> > + struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> > + nfsd_deleg_shrinker_work);
> > +
> > + INIT_LIST_HEAD(&cblist);
> > + spin_lock(&nn->client_lock);
> > + list_for_each_safe(pos, next, &nn->client_lru) {
> > + clp = list_entry(pos, struct nfs4_client, cl_lru);
> > + if (clp->cl_state != NFSD4_ACTIVE ||
> > + list_empty(&clp->cl_delegations) ||
> > + atomic_read(&clp->cl_delegs_in_recall) ||
> > + test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
> > + (ktime_get_boottime_seconds() -
> > + clp->cl_ra_time < 5)) {
> > + continue;
> > + }
> > + list_add(&clp->cl_ra_cblist, &cblist);
> > +
> > + /* release in nfsd4_cb_recall_any_release */
> > + atomic_inc(&clp->cl_rpc_users);
> > + set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> > + clp->cl_ra_time = ktime_get_boottime_seconds();
> > + }
> > + spin_unlock(&nn->client_lock);
> > + list_for_each_safe(pos, next, &cblist) {
>
> The usual idiom for draining a list like this is
>
> while (!list_empty(&cblist)) {
> clp = list_first_entry( ... );
>

Agreed. Note that it's also slightly more efficient to do it that way,
since you don't require a "next" pointer.

>
> > + clp = list_entry(pos, struct nfs4_client, cl_ra_cblist);
> > + list_del_init(&clp->cl_ra_cblist);
> > + clp->cl_ra->ra_keep = 0;
> > + clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> > + BIT(RCA4_TYPE_MASK_WDATA_DLG);
> > + nfsd4_run_cb(&clp->cl_ra->ra_cb);
> > + }
> > +
> > +}
> > +
> > static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> > {
> > if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
> > @@ -7958,6 +8070,7 @@ static int nfs4_state_create_net(struct net *net)
> >
> > INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> > INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
> > + INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
> > get_net(net);
> >
> > return 0;
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index 6b33cbbe76d3..12ce9792c5b6 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -368,6 +368,7 @@ struct nfs4_client {
> > #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
> > #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
> > 1 << NFSD4_CLIENT_CB_KILL)
> > +#define NFSD4_CLIENT_CB_RECALL_ANY (6)
> > unsigned long cl_flags;
> > const struct cred *cl_cb_cred;
> > struct rpc_clnt *cl_cb_client;
> > @@ -411,6 +412,10 @@ struct nfs4_client {
> >
> > unsigned int cl_state;
> > atomic_t cl_delegs_in_recall;
> > +
> > + struct nfsd4_cb_recall_any *cl_ra;
> > + time64_t cl_ra_time;
> > + struct list_head cl_ra_cblist;
> > };
> >
> > /* struct nfs4_client_reset
> > @@ -642,6 +647,9 @@ enum nfsd4_cb_op {
> > NFSPROC4_CLNT_CB_RECALL_ANY,
> > };
> >
> > +#define RCA4_TYPE_MASK_RDATA_DLG 0
> > +#define RCA4_TYPE_MASK_WDATA_DLG 1
> > +
> > /* Returns true iff a is later than b: */
> > static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> > {
> > --
> > 2.9.5
> >
>
> --
> Chuck Lever
>
>
>

--
Jeff Layton <[email protected]>

2022-11-11 21:35:37

by Dai Ngo

[permalink] [raw]
Subject: Re: [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints


On 11/11/22 7:25 AM, Chuck Lever III wrote:
>
>> On Nov 9, 2022, at 11:17 PM, Dai Ngo <[email protected]> wrote:
>>
>> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> fs/nfsd/nfs4state.c | 2 ++
>> fs/nfsd/trace.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 55 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 813cdb67b370..eac7212c9218 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -2859,6 +2859,7 @@ static int
>> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>> struct rpc_task *task)
>> {
>> + trace_nfsd_cb_recall_any_done(cb, task);
>> switch (task->tk_status) {
>> case -NFS4ERR_DELAY:
>> rpc_delay(task, 2 * HZ);
>> @@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
>> clp->cl_ra->ra_keep = 0;
>> clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>> BIT(RCA4_TYPE_MASK_WDATA_DLG);
>> + trace_nfsd_cb_recall_any(clp->cl_ra);
>> nfsd4_run_cb(&clp->cl_ra->ra_cb);
>> }
>>
>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>> index 06a96e955bd0..efc69c96bcbd 100644
>> --- a/fs/nfsd/trace.h
>> +++ b/fs/nfsd/trace.h
>> @@ -9,9 +9,11 @@
>> #define _NFSD_TRACE_H
>>
>> #include <linux/tracepoint.h>
>> +#include <linux/sunrpc/xprt.h>
>>
>> #include "export.h"
>> #include "nfsfh.h"
>> +#include "xdr4.h"
>>
>> #define NFSD_TRACE_PROC_RES_FIELDS \
>> __field(unsigned int, netns_ino) \
>> @@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
>>
>> +TRACE_EVENT(nfsd_cb_recall_any,
>> + TP_PROTO(
>> + const struct nfsd4_cb_recall_any *ra
>> + ),
>> + TP_ARGS(ra),
>> + TP_STRUCT__entry(
>> + __field(u32, cl_boot)
>> + __field(u32, cl_id)
>> + __field(u32, ra_keep)
>> + __field(u32, ra_bmval)
>> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
>> + ),
>> + TP_fast_assign(
>> + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
>> + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
>> + __entry->ra_keep = ra->ra_keep;
>> + __entry->ra_bmval = ra->ra_bmval[0];
>> + memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
>> + sizeof(struct sockaddr_in6));
>> + ),
>> + TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
>> + __entry->cl_boot, __entry->cl_id,
>> + __entry->addr, __entry->ra_keep, __entry->ra_bmval
>> + )
>> +);
> This one should go earlier in the file, after "TRACE_EVENT(nfsd_cb_offload,"
>
> And let's use __sockaddr() and friends like the other nfsd_cb_ tracepoints.

I tried but it did not work. I got the same error as 'nfsd_cb_args' tracepoint:

[root@nfsvmf24 ~]# uname -r
6.1.0-rc1+
[root@nfsvmf24 ~]# trace-cmd record -e nfsd_cb_args
[root@nfsvmf24 ~]# trace-cmd report
trace-cmd: No such file or directory
Error: expected type 4 but read 5
[nfsd:nfsd_cb_args]*function __get_sockaddr not defined*
cpus=1
nfsd-3976 [000] 410.956975: nfsd_cb_args: [*FAILED TO PARSE*] cl_boot=1668201586 cl_id=2938128369 prog=1073741824 ident=1 addr=ARRAY[02, 00, 80, 93, 0a, 50, 6f, 5f, 00, ..]

I also tried Fedora 36 and got same error.

Can you verify __get_sockaddr works with tracepoints on your system?

Thanks,
-Dai

>
>
>> +
>> +TRACE_EVENT(nfsd_cb_recall_any_done,
>> + TP_PROTO(
>> + const struct nfsd4_callback *cb,
>> + const struct rpc_task *task
>> + ),
>> + TP_ARGS(cb, task),
>> + TP_STRUCT__entry(
>> + __field(u32, cl_boot)
>> + __field(u32, cl_id)
>> + __field(int, status)
>> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
>> + ),
>> + TP_fast_assign(
>> + __entry->status = task->tk_status;
>> + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
>> + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
>> + memcpy(__entry->addr, &cb->cb_clp->cl_addr,
>> + sizeof(struct sockaddr_in6));
>> + ),
>> + TP_printk("client %08x:%08x addr=%pISpc status=%d",
>> + __entry->cl_boot, __entry->cl_id,
>> + __entry->addr, __entry->status
>> + )
>> +);
> I'd like you to change this to
>
> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_any_done);
>
>
>> +
>> #endif /* _NFSD_TRACE_H */
>>
>> #undef TRACE_INCLUDE_PATH
>> --
>> 2.9.5
>>
> --
> Chuck Lever
>
>
>

2022-11-11 22:09:01

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints



> On Nov 11, 2022, at 4:34 PM, Dai Ngo <[email protected]> wrote:
>
>
> On 11/11/22 7:25 AM, Chuck Lever III wrote:
>>
>>> On Nov 9, 2022, at 11:17 PM, Dai Ngo <[email protected]> wrote:
>>>
>>> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>>>
>>> Signed-off-by: Dai Ngo <[email protected]>
>>> ---
>>> fs/nfsd/nfs4state.c | 2 ++
>>> fs/nfsd/trace.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>> 2 files changed, 55 insertions(+)
>>>
>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>> index 813cdb67b370..eac7212c9218 100644
>>> --- a/fs/nfsd/nfs4state.c
>>> +++ b/fs/nfsd/nfs4state.c
>>> @@ -2859,6 +2859,7 @@ static int
>>> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>>> struct rpc_task *task)
>>> {
>>> + trace_nfsd_cb_recall_any_done(cb, task);
>>> switch (task->tk_status) {
>>> case -NFS4ERR_DELAY:
>>> rpc_delay(task, 2 * HZ);
>>> @@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
>>> clp->cl_ra->ra_keep = 0;
>>> clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>>> BIT(RCA4_TYPE_MASK_WDATA_DLG);
>>> + trace_nfsd_cb_recall_any(clp->cl_ra);
>>> nfsd4_run_cb(&clp->cl_ra->ra_cb);
>>> }
>>>
>>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>>> index 06a96e955bd0..efc69c96bcbd 100644
>>> --- a/fs/nfsd/trace.h
>>> +++ b/fs/nfsd/trace.h
>>> @@ -9,9 +9,11 @@
>>> #define _NFSD_TRACE_H
>>>
>>> #include <linux/tracepoint.h>
>>> +#include <linux/sunrpc/xprt.h>
>>>
>>> #include "export.h"
>>> #include "nfsfh.h"
>>> +#include "xdr4.h"
>>>
>>> #define NFSD_TRACE_PROC_RES_FIELDS \
>>> __field(unsigned int, netns_ino) \
>>> @@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
>>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
>>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
>>>
>>> +TRACE_EVENT(nfsd_cb_recall_any,
>>> + TP_PROTO(
>>> + const struct nfsd4_cb_recall_any *ra
>>> + ),
>>> + TP_ARGS(ra),
>>> + TP_STRUCT__entry(
>>> + __field(u32, cl_boot)
>>> + __field(u32, cl_id)
>>> + __field(u32, ra_keep)
>>> + __field(u32, ra_bmval)
>>> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
>>> + ),
>>> + TP_fast_assign(
>>> + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
>>> + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
>>> + __entry->ra_keep = ra->ra_keep;
>>> + __entry->ra_bmval = ra->ra_bmval[0];
>>> + memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
>>> + sizeof(struct sockaddr_in6));
>>> + ),
>>> + TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
>>> + __entry->cl_boot, __entry->cl_id,
>>> + __entry->addr, __entry->ra_keep, __entry->ra_bmval
>>> + )
>>> +);
>> This one should go earlier in the file, after "TRACE_EVENT(nfsd_cb_offload,"
>>
>> And let's use __sockaddr() and friends like the other nfsd_cb_ tracepoints.
>
> I tried but it did not work. I got the same error as 'nfsd_cb_args' tracepoint:
>
> [root@nfsvmf24 ~]# uname -r
> 6.1.0-rc1+
> [root@nfsvmf24 ~]# trace-cmd record -e nfsd_cb_args
> [root@nfsvmf24 ~]# trace-cmd report
> trace-cmd: No such file or directory
> Error: expected type 4 but read 5
> [nfsd:nfsd_cb_args]*function __get_sockaddr not defined*
> cpus=1
> nfsd-3976 [000] 410.956975: nfsd_cb_args: [*FAILED TO PARSE*] cl_boot=1668201586 cl_id=2938128369 prog=1073741824 ident=1 addr=ARRAY[02, 00, 80, 93, 0a, 50, 6f, 5f, 00, ..]
>
> I also tried Fedora 36 and got same error.
>
> Can you verify __get_sockaddr works with tracepoints on your system?

The user space libraries still don't have support for __get_sockaddr().

Assuming that eventually those libraries will get the proper support,
in this case I'm willing to take tracepoints that don't parse.


> Thanks,
> -Dai
>
>>
>>
>>> +
>>> +TRACE_EVENT(nfsd_cb_recall_any_done,
>>> + TP_PROTO(
>>> + const struct nfsd4_callback *cb,
>>> + const struct rpc_task *task
>>> + ),
>>> + TP_ARGS(cb, task),
>>> + TP_STRUCT__entry(
>>> + __field(u32, cl_boot)
>>> + __field(u32, cl_id)
>>> + __field(int, status)
>>> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
>>> + ),
>>> + TP_fast_assign(
>>> + __entry->status = task->tk_status;
>>> + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
>>> + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
>>> + memcpy(__entry->addr, &cb->cb_clp->cl_addr,
>>> + sizeof(struct sockaddr_in6));
>>> + ),
>>> + TP_printk("client %08x:%08x addr=%pISpc status=%d",
>>> + __entry->cl_boot, __entry->cl_id,
>>> + __entry->addr, __entry->status
>>> + )
>>> +);
>> I'd like you to change this to
>>
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_any_done);
>>
>>
>>> +
>>> #endif /* _NFSD_TRACE_H */
>>>
>>> #undef TRACE_INCLUDE_PATH
>>> --
>>> 2.9.5
>>>
>> --
>> Chuck Lever

--
Chuck Lever




2022-11-15 05:24:13

by Dai Ngo

[permalink] [raw]
Subject: Re: [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints


On 11/11/22 7:25 AM, Chuck Lever III wrote:
>
>> On Nov 9, 2022, at 11:17 PM, Dai Ngo <[email protected]> wrote:
>>
>> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> fs/nfsd/nfs4state.c | 2 ++
>> fs/nfsd/trace.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 55 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 813cdb67b370..eac7212c9218 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -2859,6 +2859,7 @@ static int
>> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>> struct rpc_task *task)
>> {
>> + trace_nfsd_cb_recall_any_done(cb, task);
>> switch (task->tk_status) {
>> case -NFS4ERR_DELAY:
>> rpc_delay(task, 2 * HZ);
>> @@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
>> clp->cl_ra->ra_keep = 0;
>> clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>> BIT(RCA4_TYPE_MASK_WDATA_DLG);
>> + trace_nfsd_cb_recall_any(clp->cl_ra);
>> nfsd4_run_cb(&clp->cl_ra->ra_cb);
>> }
>>
>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>> index 06a96e955bd0..efc69c96bcbd 100644
>> --- a/fs/nfsd/trace.h
>> +++ b/fs/nfsd/trace.h
>> @@ -9,9 +9,11 @@
>> #define _NFSD_TRACE_H
>>
>> #include <linux/tracepoint.h>
>> +#include <linux/sunrpc/xprt.h>
>>
>> #include "export.h"
>> #include "nfsfh.h"
>> +#include "xdr4.h"
>>
>> #define NFSD_TRACE_PROC_RES_FIELDS \
>> __field(unsigned int, netns_ino) \
>> @@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
>>
>> +TRACE_EVENT(nfsd_cb_recall_any,
>> + TP_PROTO(
>> + const struct nfsd4_cb_recall_any *ra
>> + ),
>> + TP_ARGS(ra),
>> + TP_STRUCT__entry(
>> + __field(u32, cl_boot)
>> + __field(u32, cl_id)
>> + __field(u32, ra_keep)
>> + __field(u32, ra_bmval)
>> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
>> + ),
>> + TP_fast_assign(
>> + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
>> + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
>> + __entry->ra_keep = ra->ra_keep;
>> + __entry->ra_bmval = ra->ra_bmval[0];
>> + memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
>> + sizeof(struct sockaddr_in6));
>> + ),
>> + TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
>> + __entry->cl_boot, __entry->cl_id,
>> + __entry->addr, __entry->ra_keep, __entry->ra_bmval
>> + )
>> +);
> This one should go earlier in the file, after "TRACE_EVENT(nfsd_cb_offload,"
>
> And let's use __sockaddr() and friends like the other nfsd_cb_ tracepoints.
>
>
>> +
>> +TRACE_EVENT(nfsd_cb_recall_any_done,
>> + TP_PROTO(
>> + const struct nfsd4_callback *cb,
>> + const struct rpc_task *task
>> + ),
>> + TP_ARGS(cb, task),
>> + TP_STRUCT__entry(
>> + __field(u32, cl_boot)
>> + __field(u32, cl_id)
>> + __field(int, status)
>> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
>> + ),
>> + TP_fast_assign(
>> + __entry->status = task->tk_status;
>> + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
>> + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
>> + memcpy(__entry->addr, &cb->cb_clp->cl_addr,
>> + sizeof(struct sockaddr_in6));
>> + ),
>> + TP_printk("client %08x:%08x addr=%pISpc status=%d",
>> + __entry->cl_boot, __entry->cl_id,
>> + __entry->addr, __entry->status
>> + )
>> +);
> I'd like you to change this to
>
> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_any_done);

TP_PRORO of DEFINE_NFSD_CB_DONE_EVENT requires a stateid_t which
CB_RECALL_ANY does not have. Can we can create a dummy stateid_t
for nfsd_cb_recall_any_done?

Note that nfsd_cb_done_class does not print the server IP address
which is more useful for tracing. Should I modify nfsd_cb_recall_any_done
class to print the IP address from rpc_xprt?

-Dai

>
>
>> +
>> #endif /* _NFSD_TRACE_H */
>>
>> #undef TRACE_INCLUDE_PATH
>> --
>> 2.9.5
>>
> --
> Chuck Lever
>
>
>

2022-11-15 14:27:33

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v4 3/3] NFSD: add CB_RECALL_ANY tracepoints



> On Nov 15, 2022, at 12:08 AM, Dai Ngo <[email protected]> wrote:
>
>
> On 11/11/22 7:25 AM, Chuck Lever III wrote:
>>
>>> On Nov 9, 2022, at 11:17 PM, Dai Ngo <[email protected]> wrote:
>>>
>>> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>>>
>>> Signed-off-by: Dai Ngo <[email protected]>
>>> ---
>>> fs/nfsd/nfs4state.c | 2 ++
>>> fs/nfsd/trace.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>> 2 files changed, 55 insertions(+)
>>>
>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>> index 813cdb67b370..eac7212c9218 100644
>>> --- a/fs/nfsd/nfs4state.c
>>> +++ b/fs/nfsd/nfs4state.c
>>> @@ -2859,6 +2859,7 @@ static int
>>> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>>> struct rpc_task *task)
>>> {
>>> + trace_nfsd_cb_recall_any_done(cb, task);
>>> switch (task->tk_status) {
>>> case -NFS4ERR_DELAY:
>>> rpc_delay(task, 2 * HZ);
>>> @@ -6242,6 +6243,7 @@ deleg_reaper(struct work_struct *deleg_work)
>>> clp->cl_ra->ra_keep = 0;
>>> clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>>> BIT(RCA4_TYPE_MASK_WDATA_DLG);
>>> + trace_nfsd_cb_recall_any(clp->cl_ra);
>>> nfsd4_run_cb(&clp->cl_ra->ra_cb);
>>> }
>>>
>>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>>> index 06a96e955bd0..efc69c96bcbd 100644
>>> --- a/fs/nfsd/trace.h
>>> +++ b/fs/nfsd/trace.h
>>> @@ -9,9 +9,11 @@
>>> #define _NFSD_TRACE_H
>>>
>>> #include <linux/tracepoint.h>
>>> +#include <linux/sunrpc/xprt.h>
>>>
>>> #include "export.h"
>>> #include "nfsfh.h"
>>> +#include "xdr4.h"
>>>
>>> #define NFSD_TRACE_PROC_RES_FIELDS \
>>> __field(unsigned int, netns_ino) \
>>> @@ -1510,6 +1512,57 @@ DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
>>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
>>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
>>>
>>> +TRACE_EVENT(nfsd_cb_recall_any,
>>> + TP_PROTO(
>>> + const struct nfsd4_cb_recall_any *ra
>>> + ),
>>> + TP_ARGS(ra),
>>> + TP_STRUCT__entry(
>>> + __field(u32, cl_boot)
>>> + __field(u32, cl_id)
>>> + __field(u32, ra_keep)
>>> + __field(u32, ra_bmval)
>>> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
>>> + ),
>>> + TP_fast_assign(
>>> + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
>>> + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
>>> + __entry->ra_keep = ra->ra_keep;
>>> + __entry->ra_bmval = ra->ra_bmval[0];
>>> + memcpy(__entry->addr, &ra->ra_cb.cb_clp->cl_addr,
>>> + sizeof(struct sockaddr_in6));
>>> + ),
>>> + TP_printk("client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
>>> + __entry->cl_boot, __entry->cl_id,
>>> + __entry->addr, __entry->ra_keep, __entry->ra_bmval
>>> + )
>>> +);
>> This one should go earlier in the file, after "TRACE_EVENT(nfsd_cb_offload,"
>>
>> And let's use __sockaddr() and friends like the other nfsd_cb_ tracepoints.
>>
>>
>>> +
>>> +TRACE_EVENT(nfsd_cb_recall_any_done,
>>> + TP_PROTO(
>>> + const struct nfsd4_callback *cb,
>>> + const struct rpc_task *task
>>> + ),
>>> + TP_ARGS(cb, task),
>>> + TP_STRUCT__entry(
>>> + __field(u32, cl_boot)
>>> + __field(u32, cl_id)
>>> + __field(int, status)
>>> + __array(unsigned char, addr, sizeof(struct sockaddr_in6))
>>> + ),
>>> + TP_fast_assign(
>>> + __entry->status = task->tk_status;
>>> + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
>>> + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
>>> + memcpy(__entry->addr, &cb->cb_clp->cl_addr,
>>> + sizeof(struct sockaddr_in6));
>>> + ),
>>> + TP_printk("client %08x:%08x addr=%pISpc status=%d",
>>> + __entry->cl_boot, __entry->cl_id,
>>> + __entry->addr, __entry->status
>>> + )
>>> +);
>> I'd like you to change this to
>>
>> DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_any_done);
>
> TP_PRORO of DEFINE_NFSD_CB_DONE_EVENT requires a stateid_t which
> CB_RECALL_ANY does not have. Can we can create a dummy stateid_t
> for nfsd_cb_recall_any_done?

Ah, I didn't remember that a state ID was recorded. OK, then a
separate TRACE_EVENT is appropriate for nfsd_cb_recall_any_done.


> Note that nfsd_cb_done_class does not print the server IP address
> which is more useful for tracing. Should I modify nfsd_cb_recall_any_done
> class to print the IP address from rpc_xprt?

The IP address is recorded by the Call side tracepoints. I'm OK
leaving the IP address out of the Reply side tracepoints.


> -Dai
>
>>
>>
>>> +
>>> #endif /* _NFSD_TRACE_H */
>>>
>>> #undef TRACE_INCLUDE_PATH
>>> --
>>> 2.9.5
>>>
>> --
>> Chuck Lever

--
Chuck Lever