2022-11-17 03:50:30

by Dai Ngo

[permalink] [raw]
Subject: [PATCH v5 0/4] add support for CB_RECALL_ANY and the delegation reaper

This patch series adds:

. refactor courtesy_client_reaper to a generic low memory
shrinker.

. XDR encode and decode function for CB_RECALL_ANY op.

. the delegation reaper that sends the advisory CB_RECALL_ANY
to the clients to release unused delegations.
There is only one nfsd4_callback added for each nfs4_cleint.
Access to it must be serialized via the client flag
NFSD4_CLIENT_CB_RECALL_ANY.

. Add CB_RECALL_ANY tracepoints.

v2:
. modify deleg_reaper to check and send CB_RECALL_ANY to client
only once per 5 secs.
v3:
. modify nfsd4_cb_recall_any_release to use nn->client_lock to
protect cl_recall_any_busy and call put_client_renew_locked
to decrement cl_rpc_users.

v4:
. move changes in nfs4state.c from patch (1/2) to patch(2/2).
. use xdr_stream_encode_u32 and xdr_stream_encode_uint32_array
to encode CB_RECALL_ANY arguments.
. add struct nfsd4_cb_recall_any with embedded nfs4_callback
and params for CB_RECALL_ANY op.
. replace cl_recall_any_busy boolean with client flag
NFSD4_CLIENT_CB_RECALL_ANY
. add tracepoints for CB_RECALL_ANY

v5:
. refactor courtesy_client_reaper to a generic low memory
shrinker
. merge courtesy client shrinker and delegtion shrinker into
one.
. reposition nfsd_cb_recall_any and nfsd_cb_recall_any_done
in nfsd/trace.h
. use __get_sockaddr to display server IP address in
tracepoints.
. modify encode_cb_recallany4args to replace sizeof with
ARRAY_SIZE.

---

Dai Ngo (4):
NFSD: refactoring courtesy_client_reaper to a generic low memory shrinker
NFSD: add support for sending CB_RECALL_ANY
NFSD: add delegation shrinker to react to low memory condition
NFSD: add CB_RECALL_ANY tracepoints

fs/nfsd/nfs4callback.c | 62 +++++++++++++++++++++++
fs/nfsd/nfs4state.c | 116 +++++++++++++++++++++++++++++++++++++++-----
fs/nfsd/state.h | 9 ++++
fs/nfsd/trace.h | 49 +++++++++++++++++++
fs/nfsd/xdr4.h | 5 ++
fs/nfsd/xdr4cb.h | 6 +++
6 files changed, 234 insertions(+), 13 deletions(-)


2022-11-17 03:50:43

by Dai Ngo

[permalink] [raw]
Subject: [PATCH v5 1/4] NFSD: refactoring courtesy_client_reaper to a generic low memory shrinker

Refactoring courtesy_client_reaper to generic low memory
shrinker so it can be used for other purposes.

Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4state.c | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 836bd825ca4a..142481bc96de 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4347,7 +4347,7 @@ nfsd4_init_slabs(void)
}

static unsigned long
-nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
+nfsd_lowmem_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
{
int cnt;
struct nfsd_net *nn = container_of(shrink,
@@ -4360,7 +4360,7 @@ nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
}

static unsigned long
-nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
+nfsd_lowmem_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
{
return SHRINK_STOP;
}
@@ -4387,8 +4387,8 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);

atomic_set(&nn->nfsd_courtesy_clients, 0);
- nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
- nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
+ nn->nfsd_client_shrinker.scan_objects = nfsd_lowmem_shrinker_scan;
+ nn->nfsd_client_shrinker.count_objects = nfsd_lowmem_shrinker_count;
nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
}
@@ -6125,17 +6125,24 @@ laundromat_main(struct work_struct *laundry)
}

static void
-courtesy_client_reaper(struct work_struct *reaper)
+courtesy_client_reaper(struct nfsd_net *nn)
{
struct list_head reaplist;
- struct delayed_work *dwork = to_delayed_work(reaper);
- struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
- nfsd_shrinker_work);

nfs4_get_courtesy_client_reaplist(nn, &reaplist);
nfs4_process_client_reaplist(&reaplist);
}

+static void
+nfsd4_lowmem_shrinker(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+ nfsd_shrinker_work);
+
+ courtesy_client_reaper(nn);
+}
+
static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
{
if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
@@ -7958,7 +7965,7 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->blocked_locks_lru);

INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
- INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
+ INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_lowmem_shrinker);
get_net(net);

return 0;
--
2.9.5


2022-11-17 03:51:19

by Dai Ngo

[permalink] [raw]
Subject: [PATCH v5 2/4] NFSD: add support for sending CB_RECALL_ANY

Add XDR encode and decode function for CB_RECALL_ANY.

Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4callback.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/state.h | 1 +
fs/nfsd/xdr4.h | 5 ++++
fs/nfsd/xdr4cb.h | 6 +++++
4 files changed, 74 insertions(+)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index f0e69edf5f0f..01226e5233cd 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -329,6 +329,25 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
}

/*
+ * CB_RECALLANY4args
+ *
+ * struct CB_RECALLANY4args {
+ * uint32_t craa_objects_to_keep;
+ * bitmap4 craa_type_mask;
+ * };
+ */
+static void
+encode_cb_recallany4args(struct xdr_stream *xdr,
+ struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra)
+{
+ encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
+ WARN_ON_ONCE(xdr_stream_encode_u32(xdr, ra->ra_keep) < 0);
+ WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr,
+ ra->ra_bmval, sizeof(ra->ra_bmval) / sizeof(u32)) < 0);
+ hdr->nops++;
+}
+
+/*
* CB_SEQUENCE4args
*
* struct CB_SEQUENCE4args {
@@ -482,6 +501,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_cb_nops(&hdr);
}

+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static void
+nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfsd4_callback *cb = data;
+ struct nfsd4_cb_recall_any *ra;
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = cb->cb_clp->cl_cb_ident,
+ .minorversion = cb->cb_clp->cl_minorversion,
+ };
+
+ ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb);
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+ encode_cb_recallany4args(xdr, &hdr, ra);
+ encode_cb_nops(&hdr);
+}

/*
* NFSv4.0 and NFSv4.1 XDR decode functions
@@ -520,6 +559,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
}

+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static int
+nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfsd4_callback *cb = data;
+ struct nfs4_cb_compound_hdr hdr;
+ int status;
+
+ status = decode_cb_compound4res(xdr, &hdr);
+ if (unlikely(status))
+ return status;
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+ status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
+ return status;
+}
+
#ifdef CONFIG_NFSD_PNFS
/*
* CB_LAYOUTRECALL4args
@@ -783,6 +844,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
#endif
PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
+ PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
};

static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e2daef3cc003..6b33cbbe76d3 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -639,6 +639,7 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_OFFLOAD,
NFSPROC4_CLNT_CB_SEQUENCE,
NFSPROC4_CLNT_CB_NOTIFY_LOCK,
+ NFSPROC4_CLNT_CB_RECALL_ANY,
};

/* Returns true iff a is later than b: */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 0eb00105d845..4fd2cf6d1d2d 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -896,5 +896,10 @@ struct nfsd4_operation {
union nfsd4_op_u *);
};

+struct nfsd4_cb_recall_any {
+ struct nfsd4_callback ra_cb;
+ u32 ra_keep;
+ u32 ra_bmval[1];
+};

#endif
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index 547cf07cf4e0..0d39af1b00a0 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -48,3 +48,9 @@
#define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + \
op_dec_sz)
+#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \
+ cb_sequence_enc_sz + \
+ 1 + 1 + 1)
+#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
+ cb_sequence_dec_sz + \
+ op_dec_sz)
--
2.9.5


2022-11-17 04:00:36

by Dai Ngo

[permalink] [raw]
Subject: [PATCH v5 4/4] NFSD: add CB_RECALL_ANY tracepoints

Add tracepoints to trace start and end of CB_RECALL_ANY operation.

Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4state.c | 2 ++
fs/nfsd/trace.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 13f326ae928c..935d669e2526 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2859,6 +2859,7 @@ static int
nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
struct rpc_task *task)
{
+ trace_nfsd_cb_recall_any_done(cb, task);
switch (task->tk_status) {
case -NFS4ERR_DELAY:
rpc_delay(task, 2 * HZ);
@@ -6209,6 +6210,7 @@ deleg_reaper(struct nfsd_net *nn)
clp->cl_ra->ra_keep = 0;
clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
BIT(RCA4_TYPE_MASK_WDATA_DLG);
+ trace_nfsd_cb_recall_any(clp->cl_ra);
nfsd4_run_cb(&clp->cl_ra->ra_cb);
}
}
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 06a96e955bd0..bee951c335f1 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -9,9 +9,11 @@
#define _NFSD_TRACE_H

#include <linux/tracepoint.h>
+#include <linux/sunrpc/xprt.h>

#include "export.h"
#include "nfsfh.h"
+#include "xdr4.h"

#define NFSD_TRACE_PROC_RES_FIELDS \
__field(unsigned int, netns_ino) \
@@ -1471,6 +1473,53 @@ TRACE_EVENT(nfsd_cb_offload,
__entry->fh_hash, __entry->count, __entry->status)
);

+TRACE_EVENT(nfsd_cb_recall_any,
+ TP_PROTO(
+ const struct nfsd4_cb_recall_any *ra
+ ),
+ TP_ARGS(ra),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, ra_keep)
+ __field(u32, ra_bmval)
+ __sockaddr(addr, sizeof(struct sockaddr_storage))
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
+ __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
+ __entry->ra_keep = ra->ra_keep;
+ __entry->ra_bmval = ra->ra_bmval[0];
+ __assign_sockaddr(addr, &ra->ra_cb.cb_clp->cl_addr,
+ sizeof(struct sockaddr_storage))
+ ),
+ TP_printk("Client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
+ __entry->cl_boot, __entry->cl_id,
+ __get_sockaddr(addr), __entry->ra_keep, __entry->ra_bmval
+ )
+);
+
+TRACE_EVENT(nfsd_cb_recall_any_done,
+ TP_PROTO(
+ const struct nfsd4_callback *cb,
+ const struct rpc_task *task
+ ),
+ TP_ARGS(cb, task),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->status = task->tk_status;
+ __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
+ __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
+ ),
+ TP_printk("client %08x:%08x status=%d",
+ __entry->cl_boot, __entry->cl_id, __entry->status
+ )
+);
+
DECLARE_EVENT_CLASS(nfsd_cb_done_class,
TP_PROTO(
const stateid_t *stp,
--
2.9.5


2022-11-17 04:00:52

by Dai Ngo

[permalink] [raw]
Subject: [PATCH v5 3/4] NFSD: add delegation reaper to react to low memory condition

The delegation reaper is called by nfsd memory shrinker's on
the 'count' callback. It scans the client list and sends the
courtesy CB_RECALL_ANY to the clients that hold delegations.

To avoid flooding the clients with CB_RECALL_ANY requests, the
delegation reaper sends only one CB_RECALL_ANY request to each
client per 5 seconds.

Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4state.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++---
fs/nfsd/state.h | 8 +++++
2 files changed, 93 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 142481bc96de..13f326ae928c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2131,6 +2131,7 @@ static void __free_client(struct kref *k)
kfree(clp->cl_nii_domain.data);
kfree(clp->cl_nii_name.data);
idr_destroy(&clp->cl_stateids);
+ kfree(clp->cl_ra);
kmem_cache_free(client_slab, clp);
}

@@ -2854,6 +2855,36 @@ static const struct tree_descr client_files[] = {
[3] = {""},
};

+static int
+nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
+ struct rpc_task *task)
+{
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 2 * HZ);
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static void
+nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ spin_lock(&nn->client_lock);
+ clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+ put_client_renew_locked(clp);
+ spin_unlock(&nn->client_lock);
+}
+
+static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
+ .done = nfsd4_cb_recall_any_done,
+ .release = nfsd4_cb_recall_any_release,
+};
+
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@@ -2891,6 +2922,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
free_client(clp);
return NULL;
}
+ clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
+ if (!clp->cl_ra) {
+ free_client(clp);
+ return NULL;
+ }
+ clp->cl_ra_time = 0;
+ nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
+ NFSPROC4_CLNT_CB_RECALL_ANY);
return clp;
}

@@ -4349,14 +4388,16 @@ nfsd4_init_slabs(void)
static unsigned long
nfsd_lowmem_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
{
- int cnt;
+ int count;
struct nfsd_net *nn = container_of(shrink,
struct nfsd_net, nfsd_client_shrinker);

- cnt = atomic_read(&nn->nfsd_courtesy_clients);
- if (cnt > 0)
+ count = atomic_read(&nn->nfsd_courtesy_clients);
+ if (!count)
+ count = atomic_long_read(&num_delegations);
+ if (count)
mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
- return (unsigned long)cnt;
+ return (unsigned long)count;
}

static unsigned long
@@ -6134,6 +6175,45 @@ courtesy_client_reaper(struct nfsd_net *nn)
}

static void
+deleg_reaper(struct nfsd_net *nn)
+{
+ struct list_head *pos, *next;
+ struct nfs4_client *clp;
+ struct list_head cblist;
+
+ INIT_LIST_HEAD(&cblist);
+ spin_lock(&nn->client_lock);
+ list_for_each_safe(pos, next, &nn->client_lru) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ if (clp->cl_state != NFSD4_ACTIVE ||
+ list_empty(&clp->cl_delegations) ||
+ atomic_read(&clp->cl_delegs_in_recall) ||
+ test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
+ (ktime_get_boottime_seconds() -
+ clp->cl_ra_time < 5)) {
+ continue;
+ }
+ list_add(&clp->cl_ra_cblist, &cblist);
+
+ /* release in nfsd4_cb_recall_any_release */
+ atomic_inc(&clp->cl_rpc_users);
+ set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+ clp->cl_ra_time = ktime_get_boottime_seconds();
+ }
+ spin_unlock(&nn->client_lock);
+
+ while (!list_empty(&cblist)) {
+ clp = list_first_entry(&cblist, struct nfs4_client,
+ cl_ra_cblist);
+ list_del_init(&clp->cl_ra_cblist);
+ clp->cl_ra->ra_keep = 0;
+ clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
+ BIT(RCA4_TYPE_MASK_WDATA_DLG);
+ nfsd4_run_cb(&clp->cl_ra->ra_cb);
+ }
+}
+
+static void
nfsd4_lowmem_shrinker(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
@@ -6141,6 +6221,7 @@ nfsd4_lowmem_shrinker(struct work_struct *work)
nfsd_shrinker_work);

courtesy_client_reaper(nn);
+ deleg_reaper(nn);
}

static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 6b33cbbe76d3..12ce9792c5b6 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -368,6 +368,7 @@ struct nfs4_client {
#define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
1 << NFSD4_CLIENT_CB_KILL)
+#define NFSD4_CLIENT_CB_RECALL_ANY (6)
unsigned long cl_flags;
const struct cred *cl_cb_cred;
struct rpc_clnt *cl_cb_client;
@@ -411,6 +412,10 @@ struct nfs4_client {

unsigned int cl_state;
atomic_t cl_delegs_in_recall;
+
+ struct nfsd4_cb_recall_any *cl_ra;
+ time64_t cl_ra_time;
+ struct list_head cl_ra_cblist;
};

/* struct nfs4_client_reset
@@ -642,6 +647,9 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_RECALL_ANY,
};

+#define RCA4_TYPE_MASK_RDATA_DLG 0
+#define RCA4_TYPE_MASK_WDATA_DLG 1
+
/* Returns true iff a is later than b: */
static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
{
--
2.9.5


2022-11-17 15:00:39

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v5 0/4] add support for CB_RECALL_ANY and the delegation reaper



> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>
> This patch series adds:
>
> . refactor courtesy_client_reaper to a generic low memory
> shrinker.
>
> . XDR encode and decode function for CB_RECALL_ANY op.
>
> . the delegation reaper that sends the advisory CB_RECALL_ANY
> to the clients to release unused delegations.
> There is only one nfsd4_callback added for each nfs4_cleint.
> Access to it must be serialized via the client flag
> NFSD4_CLIENT_CB_RECALL_ANY.
>
> . Add CB_RECALL_ANY tracepoints.
>
> v2:
> . modify deleg_reaper to check and send CB_RECALL_ANY to client
> only once per 5 secs.
> v3:
> . modify nfsd4_cb_recall_any_release to use nn->client_lock to
> protect cl_recall_any_busy and call put_client_renew_locked
> to decrement cl_rpc_users.
>
> v4:
> . move changes in nfs4state.c from patch (1/2) to patch(2/2).
> . use xdr_stream_encode_u32 and xdr_stream_encode_uint32_array
> to encode CB_RECALL_ANY arguments.
> . add struct nfsd4_cb_recall_any with embedded nfs4_callback
> and params for CB_RECALL_ANY op.
> . replace cl_recall_any_busy boolean with client flag
> NFSD4_CLIENT_CB_RECALL_ANY
> . add tracepoints for CB_RECALL_ANY
>
> v5:
> . refactor courtesy_client_reaper to a generic low memory
> shrinker
> . merge courtesy client shrinker and delegtion shrinker into
> one.
> . reposition nfsd_cb_recall_any and nfsd_cb_recall_any_done
> in nfsd/trace.h
> . use __get_sockaddr to display server IP address in
> tracepoints.
> . modify encode_cb_recallany4args to replace sizeof with
> ARRAY_SIZE.

Hi-

I'm going to apply this version of the series with some minor
changes. I'll reply to the individual patches where we can
discuss those.


> ---
>
> Dai Ngo (4):
> NFSD: refactoring courtesy_client_reaper to a generic low memory shrinker
> NFSD: add support for sending CB_RECALL_ANY
> NFSD: add delegation shrinker to react to low memory condition
> NFSD: add CB_RECALL_ANY tracepoints
>
> fs/nfsd/nfs4callback.c | 62 +++++++++++++++++++++++
> fs/nfsd/nfs4state.c | 116 +++++++++++++++++++++++++++++++++++++++-----
> fs/nfsd/state.h | 9 ++++
> fs/nfsd/trace.h | 49 +++++++++++++++++++
> fs/nfsd/xdr4.h | 5 ++
> fs/nfsd/xdr4cb.h | 6 +++
> 6 files changed, 234 insertions(+), 13 deletions(-)

--
Chuck Lever




2022-11-17 15:00:39

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v5 1/4] NFSD: refactoring courtesy_client_reaper to a generic low memory shrinker



> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>
> Refactoring courtesy_client_reaper to generic low memory
> shrinker so it can be used for other purposes.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 25 ++++++++++++++++---------
> 1 file changed, 16 insertions(+), 9 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 836bd825ca4a..142481bc96de 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -4347,7 +4347,7 @@ nfsd4_init_slabs(void)
> }
>
> static unsigned long
> -nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
> +nfsd_lowmem_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> {
> int cnt;
> struct nfsd_net *nn = container_of(shrink,
> @@ -4360,7 +4360,7 @@ nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
> }
>
> static unsigned long
> -nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
> +nfsd_lowmem_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> {
> return SHRINK_STOP;
> }
> @@ -4387,8 +4387,8 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
> nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);
>
> atomic_set(&nn->nfsd_courtesy_clients, 0);
> - nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
> - nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
> + nn->nfsd_client_shrinker.scan_objects = nfsd_lowmem_shrinker_scan;
> + nn->nfsd_client_shrinker.count_objects = nfsd_lowmem_shrinker_count;
> nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
> return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> }
> @@ -6125,17 +6125,24 @@ laundromat_main(struct work_struct *laundry)
> }
>
> static void
> -courtesy_client_reaper(struct work_struct *reaper)
> +courtesy_client_reaper(struct nfsd_net *nn)
> {
> struct list_head reaplist;
> - struct delayed_work *dwork = to_delayed_work(reaper);
> - struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> - nfsd_shrinker_work);
>
> nfs4_get_courtesy_client_reaplist(nn, &reaplist);
> nfs4_process_client_reaplist(&reaplist);
> }
>
> +static void
> +nfsd4_lowmem_shrinker(struct work_struct *work)

All shrinkers run due to low memory, so I think this name
is a bit redundant. How about nfsd4_state_shrinker?


> +{
> + struct delayed_work *dwork = to_delayed_work(work);
> + struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> + nfsd_shrinker_work);
> +
> + courtesy_client_reaper(nn);
> +}
> +
> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> {
> if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
> @@ -7958,7 +7965,7 @@ static int nfs4_state_create_net(struct net *net)
> INIT_LIST_HEAD(&nn->blocked_locks_lru);
>
> INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> - INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
> + INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_lowmem_shrinker);
> get_net(net);
>
> return 0;
> --
> 2.9.5
>

--
Chuck Lever




2022-11-17 15:05:45

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v5 4/4] NFSD: add CB_RECALL_ANY tracepoints



> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>
> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 2 ++
> fs/nfsd/trace.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 51 insertions(+)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 13f326ae928c..935d669e2526 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2859,6 +2859,7 @@ static int
> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> struct rpc_task *task)
> {
> + trace_nfsd_cb_recall_any_done(cb, task);
> switch (task->tk_status) {
> case -NFS4ERR_DELAY:
> rpc_delay(task, 2 * HZ);
> @@ -6209,6 +6210,7 @@ deleg_reaper(struct nfsd_net *nn)
> clp->cl_ra->ra_keep = 0;
> clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> BIT(RCA4_TYPE_MASK_WDATA_DLG);
> + trace_nfsd_cb_recall_any(clp->cl_ra);
> nfsd4_run_cb(&clp->cl_ra->ra_cb);
> }
> }
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index 06a96e955bd0..bee951c335f1 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -9,9 +9,11 @@
> #define _NFSD_TRACE_H
>
> #include <linux/tracepoint.h>
> +#include <linux/sunrpc/xprt.h>
>
> #include "export.h"
> #include "nfsfh.h"
> +#include "xdr4.h"
>
> #define NFSD_TRACE_PROC_RES_FIELDS \
> __field(unsigned int, netns_ino) \
> @@ -1471,6 +1473,53 @@ TRACE_EVENT(nfsd_cb_offload,
> __entry->fh_hash, __entry->count, __entry->status)
> );
>
> +TRACE_EVENT(nfsd_cb_recall_any,
> + TP_PROTO(
> + const struct nfsd4_cb_recall_any *ra
> + ),
> + TP_ARGS(ra),
> + TP_STRUCT__entry(
> + __field(u32, cl_boot)
> + __field(u32, cl_id)
> + __field(u32, ra_keep)
> + __field(u32, ra_bmval)
> + __sockaddr(addr, sizeof(struct sockaddr_storage))
> + ),
> + TP_fast_assign(
> + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
> + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
> + __entry->ra_keep = ra->ra_keep;
> + __entry->ra_bmval = ra->ra_bmval[0];
> + __assign_sockaddr(addr, &ra->ra_cb.cb_clp->cl_addr,
> + sizeof(struct sockaddr_storage))
> + ),
> + TP_printk("Client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
> + __entry->cl_boot, __entry->cl_id,
> + __get_sockaddr(addr), __entry->ra_keep, __entry->ra_bmval

This needs a "show_recall_any_bitmap" to convert to human-readable
symbols.


> + )
> +);
> +
> +TRACE_EVENT(nfsd_cb_recall_any_done,
> + TP_PROTO(
> + const struct nfsd4_callback *cb,
> + const struct rpc_task *task
> + ),
> + TP_ARGS(cb, task),
> + TP_STRUCT__entry(
> + __field(u32, cl_boot)
> + __field(u32, cl_id)
> + __field(int, status)
> + ),
> + TP_fast_assign(
> + __entry->status = task->tk_status;
> + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
> + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
> + ),
> + TP_printk("client %08x:%08x status=%d",
> + __entry->cl_boot, __entry->cl_id, __entry->status
> + )
> +);
> +
> DECLARE_EVENT_CLASS(nfsd_cb_done_class,
> TP_PROTO(
> const stateid_t *stp,
> --
> 2.9.5
>

--
Chuck Lever




2022-11-17 15:06:44

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v5 3/4] NFSD: add delegation reaper to react to low memory condition



> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>
> The delegation reaper is called by nfsd memory shrinker's on
> the 'count' callback. It scans the client list and sends the
> courtesy CB_RECALL_ANY to the clients that hold delegations.
>
> To avoid flooding the clients with CB_RECALL_ANY requests, the
> delegation reaper sends only one CB_RECALL_ANY request to each
> client per 5 seconds.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++---
> fs/nfsd/state.h | 8 +++++
> 2 files changed, 93 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 142481bc96de..13f326ae928c 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2131,6 +2131,7 @@ static void __free_client(struct kref *k)
> kfree(clp->cl_nii_domain.data);
> kfree(clp->cl_nii_name.data);
> idr_destroy(&clp->cl_stateids);
> + kfree(clp->cl_ra);
> kmem_cache_free(client_slab, clp);
> }
>
> @@ -2854,6 +2855,36 @@ static const struct tree_descr client_files[] = {
> [3] = {""},
> };
>
> +static int
> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> + struct rpc_task *task)
> +{
> + switch (task->tk_status) {
> + case -NFS4ERR_DELAY:
> + rpc_delay(task, 2 * HZ);
> + return 0;
> + default:
> + return 1;
> + }
> +}
> +
> +static void
> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> +{
> + struct nfs4_client *clp = cb->cb_clp;
> + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> +
> + spin_lock(&nn->client_lock);
> + clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> + put_client_renew_locked(clp);
> + spin_unlock(&nn->client_lock);
> +}
> +
> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> + .done = nfsd4_cb_recall_any_done,
> + .release = nfsd4_cb_recall_any_release,
> +};
> +
> static struct nfs4_client *create_client(struct xdr_netobj name,
> struct svc_rqst *rqstp, nfs4_verifier *verf)
> {
> @@ -2891,6 +2922,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
> free_client(clp);
> return NULL;
> }
> + clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
> + if (!clp->cl_ra) {
> + free_client(clp);
> + return NULL;
> + }
> + clp->cl_ra_time = 0;
> + nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
> + NFSPROC4_CLNT_CB_RECALL_ANY);
> return clp;
> }
>
> @@ -4349,14 +4388,16 @@ nfsd4_init_slabs(void)
> static unsigned long
> nfsd_lowmem_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> {
> - int cnt;
> + int count;
> struct nfsd_net *nn = container_of(shrink,
> struct nfsd_net, nfsd_client_shrinker);
>
> - cnt = atomic_read(&nn->nfsd_courtesy_clients);
> - if (cnt > 0)
> + count = atomic_read(&nn->nfsd_courtesy_clients);
> + if (!count)
> + count = atomic_long_read(&num_delegations);
> + if (count)
> mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
> - return (unsigned long)cnt;
> + return (unsigned long)count;
> }
>
> static unsigned long
> @@ -6134,6 +6175,45 @@ courtesy_client_reaper(struct nfsd_net *nn)
> }
>
> static void
> +deleg_reaper(struct nfsd_net *nn)
> +{
> + struct list_head *pos, *next;
> + struct nfs4_client *clp;
> + struct list_head cblist;
> +
> + INIT_LIST_HEAD(&cblist);
> + spin_lock(&nn->client_lock);
> + list_for_each_safe(pos, next, &nn->client_lru) {
> + clp = list_entry(pos, struct nfs4_client, cl_lru);
> + if (clp->cl_state != NFSD4_ACTIVE ||
> + list_empty(&clp->cl_delegations) ||
> + atomic_read(&clp->cl_delegs_in_recall) ||
> + test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
> + (ktime_get_boottime_seconds() -
> + clp->cl_ra_time < 5)) {
> + continue;
> + }
> + list_add(&clp->cl_ra_cblist, &cblist);
> +
> + /* release in nfsd4_cb_recall_any_release */
> + atomic_inc(&clp->cl_rpc_users);
> + set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> + clp->cl_ra_time = ktime_get_boottime_seconds();
> + }
> + spin_unlock(&nn->client_lock);
> +
> + while (!list_empty(&cblist)) {
> + clp = list_first_entry(&cblist, struct nfs4_client,
> + cl_ra_cblist);
> + list_del_init(&clp->cl_ra_cblist);
> + clp->cl_ra->ra_keep = 0;
> + clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> + BIT(RCA4_TYPE_MASK_WDATA_DLG);

Linux NFSD doesn't hand out write delegations. I don't think we
should set WDATA_DLG yet...?


> + nfsd4_run_cb(&clp->cl_ra->ra_cb);
> + }
> +}
> +
> +static void
> nfsd4_lowmem_shrinker(struct work_struct *work)
> {
> struct delayed_work *dwork = to_delayed_work(work);
> @@ -6141,6 +6221,7 @@ nfsd4_lowmem_shrinker(struct work_struct *work)
> nfsd_shrinker_work);
>
> courtesy_client_reaper(nn);
> + deleg_reaper(nn);
> }
>
> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 6b33cbbe76d3..12ce9792c5b6 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -368,6 +368,7 @@ struct nfs4_client {
> #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
> #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
> 1 << NFSD4_CLIENT_CB_KILL)
> +#define NFSD4_CLIENT_CB_RECALL_ANY (6)
> unsigned long cl_flags;
> const struct cred *cl_cb_cred;
> struct rpc_clnt *cl_cb_client;
> @@ -411,6 +412,10 @@ struct nfs4_client {
>
> unsigned int cl_state;
> atomic_t cl_delegs_in_recall;
> +
> + struct nfsd4_cb_recall_any *cl_ra;
> + time64_t cl_ra_time;
> + struct list_head cl_ra_cblist;
> };
>
> /* struct nfs4_client_reset
> @@ -642,6 +647,9 @@ enum nfsd4_cb_op {
> NFSPROC4_CLNT_CB_RECALL_ANY,
> };
>
> +#define RCA4_TYPE_MASK_RDATA_DLG 0
> +#define RCA4_TYPE_MASK_WDATA_DLG 1
> +
> /* Returns true iff a is later than b: */
> static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> {
> --
> 2.9.5
>

--
Chuck Lever




2022-11-17 17:01:28

by Dai Ngo

[permalink] [raw]
Subject: Re: [PATCH v5 1/4] NFSD: refactoring courtesy_client_reaper to a generic low memory shrinker


On 11/17/22 6:45 AM, Chuck Lever III wrote:
>
>> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>>
>> Refactoring courtesy_client_reaper to generic low memory
>> shrinker so it can be used for other purposes.
>>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> fs/nfsd/nfs4state.c | 25 ++++++++++++++++---------
>> 1 file changed, 16 insertions(+), 9 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 836bd825ca4a..142481bc96de 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -4347,7 +4347,7 @@ nfsd4_init_slabs(void)
>> }
>>
>> static unsigned long
>> -nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
>> +nfsd_lowmem_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
>> {
>> int cnt;
>> struct nfsd_net *nn = container_of(shrink,
>> @@ -4360,7 +4360,7 @@ nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
>> }
>>
>> static unsigned long
>> -nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
>> +nfsd_lowmem_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
>> {
>> return SHRINK_STOP;
>> }
>> @@ -4387,8 +4387,8 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
>> nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);
>>
>> atomic_set(&nn->nfsd_courtesy_clients, 0);
>> - nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
>> - nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
>> + nn->nfsd_client_shrinker.scan_objects = nfsd_lowmem_shrinker_scan;
>> + nn->nfsd_client_shrinker.count_objects = nfsd_lowmem_shrinker_count;
>> nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
>> return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
>> }
>> @@ -6125,17 +6125,24 @@ laundromat_main(struct work_struct *laundry)
>> }
>>
>> static void
>> -courtesy_client_reaper(struct work_struct *reaper)
>> +courtesy_client_reaper(struct nfsd_net *nn)
>> {
>> struct list_head reaplist;
>> - struct delayed_work *dwork = to_delayed_work(reaper);
>> - struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
>> - nfsd_shrinker_work);
>>
>> nfs4_get_courtesy_client_reaplist(nn, &reaplist);
>> nfs4_process_client_reaplist(&reaplist);
>> }
>>
>> +static void
>> +nfsd4_lowmem_shrinker(struct work_struct *work)
> All shrinkers run due to low memory, so I think this name
> is a bit redundant. How about nfsd4_state_shrinker?

ok, it makes sense.

Thanks,
-Dai

>
>
>> +{
>> + struct delayed_work *dwork = to_delayed_work(work);
>> + struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
>> + nfsd_shrinker_work);
>> +
>> + courtesy_client_reaper(nn);
>> +}
>> +
>> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
>> {
>> if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
>> @@ -7958,7 +7965,7 @@ static int nfs4_state_create_net(struct net *net)
>> INIT_LIST_HEAD(&nn->blocked_locks_lru);
>>
>> INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
>> - INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
>> + INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_lowmem_shrinker);
>> get_net(net);
>>
>> return 0;
>> --
>> 2.9.5
>>
> --
> Chuck Lever
>
>
>

2022-11-17 17:02:28

by Dai Ngo

[permalink] [raw]
Subject: Re: [PATCH v5 0/4] add support for CB_RECALL_ANY and the delegation reaper


On 11/17/22 6:44 AM, Chuck Lever III wrote:
>
>> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>>
>> This patch series adds:
>>
>> . refactor courtesy_client_reaper to a generic low memory
>> shrinker.
>>
>> . XDR encode and decode function for CB_RECALL_ANY op.
>>
>> . the delegation reaper that sends the advisory CB_RECALL_ANY
>> to the clients to release unused delegations.
>> There is only one nfsd4_callback added for each nfs4_cleint.
>> Access to it must be serialized via the client flag
>> NFSD4_CLIENT_CB_RECALL_ANY.
>>
>> . Add CB_RECALL_ANY tracepoints.
>>
>> v2:
>> . modify deleg_reaper to check and send CB_RECALL_ANY to client
>> only once per 5 secs.
>> v3:
>> . modify nfsd4_cb_recall_any_release to use nn->client_lock to
>> protect cl_recall_any_busy and call put_client_renew_locked
>> to decrement cl_rpc_users.
>>
>> v4:
>> . move changes in nfs4state.c from patch (1/2) to patch(2/2).
>> . use xdr_stream_encode_u32 and xdr_stream_encode_uint32_array
>> to encode CB_RECALL_ANY arguments.
>> . add struct nfsd4_cb_recall_any with embedded nfs4_callback
>> and params for CB_RECALL_ANY op.
>> . replace cl_recall_any_busy boolean with client flag
>> NFSD4_CLIENT_CB_RECALL_ANY
>> . add tracepoints for CB_RECALL_ANY
>>
>> v5:
>> . refactor courtesy_client_reaper to a generic low memory
>> shrinker
>> . merge courtesy client shrinker and delegtion shrinker into
>> one.
>> . reposition nfsd_cb_recall_any and nfsd_cb_recall_any_done
>> in nfsd/trace.h
>> . use __get_sockaddr to display server IP address in
>> tracepoints.
>> . modify encode_cb_recallany4args to replace sizeof with
>> ARRAY_SIZE.
> Hi-
>
> I'm going to apply this version of the series with some minor
> changes. I'll reply to the individual patches where we can
> discuss those.

Thank you Chuck!

-Dai

>
>
>> ---
>>
>> Dai Ngo (4):
>> NFSD: refactoring courtesy_client_reaper to a generic low memory shrinker
>> NFSD: add support for sending CB_RECALL_ANY
>> NFSD: add delegation shrinker to react to low memory condition
>> NFSD: add CB_RECALL_ANY tracepoints
>>
>> fs/nfsd/nfs4callback.c | 62 +++++++++++++++++++++++
>> fs/nfsd/nfs4state.c | 116 +++++++++++++++++++++++++++++++++++++++-----
>> fs/nfsd/state.h | 9 ++++
>> fs/nfsd/trace.h | 49 +++++++++++++++++++
>> fs/nfsd/xdr4.h | 5 ++
>> fs/nfsd/xdr4cb.h | 6 +++
>> 6 files changed, 234 insertions(+), 13 deletions(-)
> --
> Chuck Lever
>
>
>

2022-11-17 17:02:41

by Dai Ngo

[permalink] [raw]
Subject: Re: [PATCH v5 4/4] NFSD: add CB_RECALL_ANY tracepoints


On 11/17/22 6:45 AM, Chuck Lever III wrote:
>
>> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>>
>> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> fs/nfsd/nfs4state.c | 2 ++
>> fs/nfsd/trace.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 51 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 13f326ae928c..935d669e2526 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -2859,6 +2859,7 @@ static int
>> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>> struct rpc_task *task)
>> {
>> + trace_nfsd_cb_recall_any_done(cb, task);
>> switch (task->tk_status) {
>> case -NFS4ERR_DELAY:
>> rpc_delay(task, 2 * HZ);
>> @@ -6209,6 +6210,7 @@ deleg_reaper(struct nfsd_net *nn)
>> clp->cl_ra->ra_keep = 0;
>> clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>> BIT(RCA4_TYPE_MASK_WDATA_DLG);
>> + trace_nfsd_cb_recall_any(clp->cl_ra);
>> nfsd4_run_cb(&clp->cl_ra->ra_cb);
>> }
>> }
>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>> index 06a96e955bd0..bee951c335f1 100644
>> --- a/fs/nfsd/trace.h
>> +++ b/fs/nfsd/trace.h
>> @@ -9,9 +9,11 @@
>> #define _NFSD_TRACE_H
>>
>> #include <linux/tracepoint.h>
>> +#include <linux/sunrpc/xprt.h>
>>
>> #include "export.h"
>> #include "nfsfh.h"
>> +#include "xdr4.h"
>>
>> #define NFSD_TRACE_PROC_RES_FIELDS \
>> __field(unsigned int, netns_ino) \
>> @@ -1471,6 +1473,53 @@ TRACE_EVENT(nfsd_cb_offload,
>> __entry->fh_hash, __entry->count, __entry->status)
>> );
>>
>> +TRACE_EVENT(nfsd_cb_recall_any,
>> + TP_PROTO(
>> + const struct nfsd4_cb_recall_any *ra
>> + ),
>> + TP_ARGS(ra),
>> + TP_STRUCT__entry(
>> + __field(u32, cl_boot)
>> + __field(u32, cl_id)
>> + __field(u32, ra_keep)
>> + __field(u32, ra_bmval)
>> + __sockaddr(addr, sizeof(struct sockaddr_storage))
>> + ),
>> + TP_fast_assign(
>> + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
>> + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
>> + __entry->ra_keep = ra->ra_keep;
>> + __entry->ra_bmval = ra->ra_bmval[0];
>> + __assign_sockaddr(addr, &ra->ra_cb.cb_clp->cl_addr,
>> + sizeof(struct sockaddr_storage))
>> + ),
>> + TP_printk("Client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
>> + __entry->cl_boot, __entry->cl_id,
>> + __get_sockaddr(addr), __entry->ra_keep, __entry->ra_bmval
> This needs a "show_recall_any_bitmap" to convert to human-readable
> symbols.

Yes, it makes sense.

Thanks,
-Dai

>
>
>> + )
>> +);
>> +
>> +TRACE_EVENT(nfsd_cb_recall_any_done,
>> + TP_PROTO(
>> + const struct nfsd4_callback *cb,
>> + const struct rpc_task *task
>> + ),
>> + TP_ARGS(cb, task),
>> + TP_STRUCT__entry(
>> + __field(u32, cl_boot)
>> + __field(u32, cl_id)
>> + __field(int, status)
>> + ),
>> + TP_fast_assign(
>> + __entry->status = task->tk_status;
>> + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
>> + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
>> + ),
>> + TP_printk("client %08x:%08x status=%d",
>> + __entry->cl_boot, __entry->cl_id, __entry->status
>> + )
>> +);
>> +
>> DECLARE_EVENT_CLASS(nfsd_cb_done_class,
>> TP_PROTO(
>> const stateid_t *stp,
>> --
>> 2.9.5
>>
> --
> Chuck Lever
>
>
>

2022-11-17 17:07:30

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v5 0/4] add support for CB_RECALL_ANY and the delegation reaper



> On Nov 17, 2022, at 11:53 AM, Dai Ngo <[email protected]> wrote:
>
>
> On 11/17/22 6:44 AM, Chuck Lever III wrote:
>>
>>> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>>>
>>> This patch series adds:
>>>
>>> . refactor courtesy_client_reaper to a generic low memory
>>> shrinker.
>>>
>>> . XDR encode and decode function for CB_RECALL_ANY op.
>>>
>>> . the delegation reaper that sends the advisory CB_RECALL_ANY
>>> to the clients to release unused delegations.
>>> There is only one nfsd4_callback added for each nfs4_cleint.
>>> Access to it must be serialized via the client flag
>>> NFSD4_CLIENT_CB_RECALL_ANY.
>>>
>>> . Add CB_RECALL_ANY tracepoints.
>>>
>>> v2:
>>> . modify deleg_reaper to check and send CB_RECALL_ANY to client
>>> only once per 5 secs.
>>> v3:
>>> . modify nfsd4_cb_recall_any_release to use nn->client_lock to
>>> protect cl_recall_any_busy and call put_client_renew_locked
>>> to decrement cl_rpc_users.
>>>
>>> v4:
>>> . move changes in nfs4state.c from patch (1/2) to patch(2/2).
>>> . use xdr_stream_encode_u32 and xdr_stream_encode_uint32_array
>>> to encode CB_RECALL_ANY arguments.
>>> . add struct nfsd4_cb_recall_any with embedded nfs4_callback
>>> and params for CB_RECALL_ANY op.
>>> . replace cl_recall_any_busy boolean with client flag
>>> NFSD4_CLIENT_CB_RECALL_ANY
>>> . add tracepoints for CB_RECALL_ANY
>>>
>>> v5:
>>> . refactor courtesy_client_reaper to a generic low memory
>>> shrinker
>>> . merge courtesy client shrinker and delegtion shrinker into
>>> one.
>>> . reposition nfsd_cb_recall_any and nfsd_cb_recall_any_done
>>> in nfsd/trace.h
>>> . use __get_sockaddr to display server IP address in
>>> tracepoints.
>>> . modify encode_cb_recallany4args to replace sizeof with
>>> ARRAY_SIZE.
>> Hi-
>>
>> I'm going to apply this version of the series with some minor
>> changes. I'll reply to the individual patches where we can
>> discuss those.
>
> Thank you Chuck!

Changes folded in and pushed to nfsd's for-next. For the trace
point patch, I've rebased your series on top of the patch that
relocates include/trace/events/nfs.h so that the new
show_rca_mask() macro can go in the common nfs.h instead of
the NFSD-specific fs/nfsd/trace.h.

Feel free to pull and test to make sure I didn't do anything
bone-headed.


--
Chuck Lever




2022-11-17 18:45:04

by Dai Ngo

[permalink] [raw]
Subject: Re: [PATCH v5 0/4] add support for CB_RECALL_ANY and the delegation reaper


On 11/17/22 9:04 AM, Chuck Lever III wrote:
>
>> On Nov 17, 2022, at 11:53 AM, Dai Ngo <[email protected]> wrote:
>>
>>
>> On 11/17/22 6:44 AM, Chuck Lever III wrote:
>>>> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>>>>
>>>> This patch series adds:
>>>>
>>>> . refactor courtesy_client_reaper to a generic low memory
>>>> shrinker.
>>>>
>>>> . XDR encode and decode function for CB_RECALL_ANY op.
>>>>
>>>> . the delegation reaper that sends the advisory CB_RECALL_ANY
>>>> to the clients to release unused delegations.
>>>> There is only one nfsd4_callback added for each nfs4_cleint.
>>>> Access to it must be serialized via the client flag
>>>> NFSD4_CLIENT_CB_RECALL_ANY.
>>>>
>>>> . Add CB_RECALL_ANY tracepoints.
>>>>
>>>> v2:
>>>> . modify deleg_reaper to check and send CB_RECALL_ANY to client
>>>> only once per 5 secs.
>>>> v3:
>>>> . modify nfsd4_cb_recall_any_release to use nn->client_lock to
>>>> protect cl_recall_any_busy and call put_client_renew_locked
>>>> to decrement cl_rpc_users.
>>>>
>>>> v4:
>>>> . move changes in nfs4state.c from patch (1/2) to patch(2/2).
>>>> . use xdr_stream_encode_u32 and xdr_stream_encode_uint32_array
>>>> to encode CB_RECALL_ANY arguments.
>>>> . add struct nfsd4_cb_recall_any with embedded nfs4_callback
>>>> and params for CB_RECALL_ANY op.
>>>> . replace cl_recall_any_busy boolean with client flag
>>>> NFSD4_CLIENT_CB_RECALL_ANY
>>>> . add tracepoints for CB_RECALL_ANY
>>>>
>>>> v5:
>>>> . refactor courtesy_client_reaper to a generic low memory
>>>> shrinker
>>>> . merge courtesy client shrinker and delegtion shrinker into
>>>> one.
>>>> . reposition nfsd_cb_recall_any and nfsd_cb_recall_any_done
>>>> in nfsd/trace.h
>>>> . use __get_sockaddr to display server IP address in
>>>> tracepoints.
>>>> . modify encode_cb_recallany4args to replace sizeof with
>>>> ARRAY_SIZE.
>>> Hi-
>>>
>>> I'm going to apply this version of the series with some minor
>>> changes. I'll reply to the individual patches where we can
>>> discuss those.
>> Thank you Chuck!
> Changes folded in and pushed to nfsd's for-next. For the trace
> point patch, I've rebased your series on top of the patch that
> relocates include/trace/events/nfs.h so that the new
> show_rca_mask() macro can go in the common nfs.h instead of
> the NFSD-specific fs/nfsd/trace.h.
>
> Feel free to pull and test to make sure I didn't do anything
> bone-headed.

I removed the get_sockaddr temporarily to test show_rca_mask.
It works fine:

[root@nfsvmf24 ~]# trace-cmd report
trace-cmd: No such file or directory
Error: expected type 4 but read 5
cpus=1
kworker/u2:6-2297 [000] 1349.863391: nfsd_cb_recall_any: client 63767ac9:adb1a3fb keep=0 bmval0=RDATA_DLG
kworker/u2:0-8698 [000] 1349.869652: nfsd_cb_recall_any_done: client 63767ac9:adb1a3fb status=0

Thanks,
-Dai

>
>
> --
> Chuck Lever
>
>
>

2022-11-17 18:55:25

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v5 0/4] add support for CB_RECALL_ANY and the delegation reaper



> On Nov 17, 2022, at 1:43 PM, Dai Ngo <[email protected]> wrote:
>
>
> On 11/17/22 9:04 AM, Chuck Lever III wrote:
>>
>>> On Nov 17, 2022, at 11:53 AM, Dai Ngo <[email protected]> wrote:
>>>
>>>
>>> On 11/17/22 6:44 AM, Chuck Lever III wrote:
>>>>> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>>>>>
>>>>> This patch series adds:
>>>>>
>>>>> . refactor courtesy_client_reaper to a generic low memory
>>>>> shrinker.
>>>>>
>>>>> . XDR encode and decode function for CB_RECALL_ANY op.
>>>>>
>>>>> . the delegation reaper that sends the advisory CB_RECALL_ANY
>>>>> to the clients to release unused delegations.
>>>>> There is only one nfsd4_callback added for each nfs4_cleint.
>>>>> Access to it must be serialized via the client flag
>>>>> NFSD4_CLIENT_CB_RECALL_ANY.
>>>>>
>>>>> . Add CB_RECALL_ANY tracepoints.
>>>>>
>>>>> v2:
>>>>> . modify deleg_reaper to check and send CB_RECALL_ANY to client
>>>>> only once per 5 secs.
>>>>> v3:
>>>>> . modify nfsd4_cb_recall_any_release to use nn->client_lock to
>>>>> protect cl_recall_any_busy and call put_client_renew_locked
>>>>> to decrement cl_rpc_users.
>>>>>
>>>>> v4:
>>>>> . move changes in nfs4state.c from patch (1/2) to patch(2/2).
>>>>> . use xdr_stream_encode_u32 and xdr_stream_encode_uint32_array
>>>>> to encode CB_RECALL_ANY arguments.
>>>>> . add struct nfsd4_cb_recall_any with embedded nfs4_callback
>>>>> and params for CB_RECALL_ANY op.
>>>>> . replace cl_recall_any_busy boolean with client flag
>>>>> NFSD4_CLIENT_CB_RECALL_ANY
>>>>> . add tracepoints for CB_RECALL_ANY
>>>>>
>>>>> v5:
>>>>> . refactor courtesy_client_reaper to a generic low memory
>>>>> shrinker
>>>>> . merge courtesy client shrinker and delegtion shrinker into
>>>>> one.
>>>>> . reposition nfsd_cb_recall_any and nfsd_cb_recall_any_done
>>>>> in nfsd/trace.h
>>>>> . use __get_sockaddr to display server IP address in
>>>>> tracepoints.
>>>>> . modify encode_cb_recallany4args to replace sizeof with
>>>>> ARRAY_SIZE.
>>>> Hi-
>>>>
>>>> I'm going to apply this version of the series with some minor
>>>> changes. I'll reply to the individual patches where we can
>>>> discuss those.
>>> Thank you Chuck!
>> Changes folded in and pushed to nfsd's for-next. For the trace
>> point patch, I've rebased your series on top of the patch that
>> relocates include/trace/events/nfs.h so that the new
>> show_rca_mask() macro can go in the common nfs.h instead of
>> the NFSD-specific fs/nfsd/trace.h.
>>
>> Feel free to pull and test to make sure I didn't do anything
>> bone-headed.
>
> I removed the get_sockaddr temporarily to test show_rca_mask.
> It works fine:
>
> [root@nfsvmf24 ~]# trace-cmd report
> trace-cmd: No such file or directory
> Error: expected type 4 but read 5
> cpus=1
> kworker/u2:6-2297 [000] 1349.863391: nfsd_cb_recall_any: client 63767ac9:adb1a3fb keep=0 bmval0=RDATA_DLG
> kworker/u2:0-8698 [000] 1349.869652: nfsd_cb_recall_any_done: client 63767ac9:adb1a3fb status=0

Perfect, thank you!


--
Chuck Lever