This patch series adds:
. refactor courtesy_client_reaper to a generic low memory
shrinker.
. XDR encode and decode function for CB_RECALL_ANY op.
. the delegation reaper that sends the advisory CB_RECALL_ANY
to the clients to release unused delegations.
There is only one nfsd4_callback added for each nfs4_cleint.
Access to it must be serialized via the client flag
NFSD4_CLIENT_CB_RECALL_ANY.
. Add CB_RECALL_ANY tracepoints.
v2:
. modify deleg_reaper to check and send CB_RECALL_ANY to client
only once per 5 secs.
v3:
. modify nfsd4_cb_recall_any_release to use nn->client_lock to
protect cl_recall_any_busy and call put_client_renew_locked
to decrement cl_rpc_users.
v4:
. move changes in nfs4state.c from patch (1/2) to patch(2/2).
. use xdr_stream_encode_u32 and xdr_stream_encode_uint32_array
to encode CB_RECALL_ANY arguments.
. add struct nfsd4_cb_recall_any with embedded nfs4_callback
and params for CB_RECALL_ANY op.
. replace cl_recall_any_busy boolean with client flag
NFSD4_CLIENT_CB_RECALL_ANY
. add tracepoints for CB_RECALL_ANY
v5:
. refactor courtesy_client_reaper to a generic low memory
shrinker
. merge courtesy client shrinker and delegtion shrinker into
one.
. reposition nfsd_cb_recall_any and nfsd_cb_recall_any_done
in nfsd/trace.h
. use __get_sockaddr to display server IP address in
tracepoints.
. modify encode_cb_recallany4args to replace sizeof with
ARRAY_SIZE.
---
Dai Ngo (4):
NFSD: refactoring courtesy_client_reaper to a generic low memory shrinker
NFSD: add support for sending CB_RECALL_ANY
NFSD: add delegation shrinker to react to low memory condition
NFSD: add CB_RECALL_ANY tracepoints
fs/nfsd/nfs4callback.c | 62 +++++++++++++++++++++++
fs/nfsd/nfs4state.c | 116 +++++++++++++++++++++++++++++++++++++++-----
fs/nfsd/state.h | 9 ++++
fs/nfsd/trace.h | 49 +++++++++++++++++++
fs/nfsd/xdr4.h | 5 ++
fs/nfsd/xdr4cb.h | 6 +++
6 files changed, 234 insertions(+), 13 deletions(-)
Refactoring courtesy_client_reaper to generic low memory
shrinker so it can be used for other purposes.
Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4state.c | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 836bd825ca4a..142481bc96de 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4347,7 +4347,7 @@ nfsd4_init_slabs(void)
}
static unsigned long
-nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
+nfsd_lowmem_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
{
int cnt;
struct nfsd_net *nn = container_of(shrink,
@@ -4360,7 +4360,7 @@ nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
}
static unsigned long
-nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
+nfsd_lowmem_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
{
return SHRINK_STOP;
}
@@ -4387,8 +4387,8 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);
atomic_set(&nn->nfsd_courtesy_clients, 0);
- nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
- nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
+ nn->nfsd_client_shrinker.scan_objects = nfsd_lowmem_shrinker_scan;
+ nn->nfsd_client_shrinker.count_objects = nfsd_lowmem_shrinker_count;
nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
}
@@ -6125,17 +6125,24 @@ laundromat_main(struct work_struct *laundry)
}
static void
-courtesy_client_reaper(struct work_struct *reaper)
+courtesy_client_reaper(struct nfsd_net *nn)
{
struct list_head reaplist;
- struct delayed_work *dwork = to_delayed_work(reaper);
- struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
- nfsd_shrinker_work);
nfs4_get_courtesy_client_reaplist(nn, &reaplist);
nfs4_process_client_reaplist(&reaplist);
}
+static void
+nfsd4_lowmem_shrinker(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+ nfsd_shrinker_work);
+
+ courtesy_client_reaper(nn);
+}
+
static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
{
if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
@@ -7958,7 +7965,7 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->blocked_locks_lru);
INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
- INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
+ INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_lowmem_shrinker);
get_net(net);
return 0;
--
2.9.5
Add XDR encode and decode function for CB_RECALL_ANY.
Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4callback.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/state.h | 1 +
fs/nfsd/xdr4.h | 5 ++++
fs/nfsd/xdr4cb.h | 6 +++++
4 files changed, 74 insertions(+)
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index f0e69edf5f0f..01226e5233cd 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -329,6 +329,25 @@ static void encode_cb_recall4args(struct xdr_stream *xdr,
}
/*
+ * CB_RECALLANY4args
+ *
+ * struct CB_RECALLANY4args {
+ * uint32_t craa_objects_to_keep;
+ * bitmap4 craa_type_mask;
+ * };
+ */
+static void
+encode_cb_recallany4args(struct xdr_stream *xdr,
+ struct nfs4_cb_compound_hdr *hdr, struct nfsd4_cb_recall_any *ra)
+{
+ encode_nfs_cb_opnum4(xdr, OP_CB_RECALL_ANY);
+ WARN_ON_ONCE(xdr_stream_encode_u32(xdr, ra->ra_keep) < 0);
+ WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr,
+ ra->ra_bmval, sizeof(ra->ra_bmval) / sizeof(u32)) < 0);
+ hdr->nops++;
+}
+
+/*
* CB_SEQUENCE4args
*
* struct CB_SEQUENCE4args {
@@ -482,6 +501,26 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_cb_nops(&hdr);
}
+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static void
+nfs4_xdr_enc_cb_recall_any(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfsd4_callback *cb = data;
+ struct nfsd4_cb_recall_any *ra;
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = cb->cb_clp->cl_cb_ident,
+ .minorversion = cb->cb_clp->cl_minorversion,
+ };
+
+ ra = container_of(cb, struct nfsd4_cb_recall_any, ra_cb);
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+ encode_cb_recallany4args(xdr, &hdr, ra);
+ encode_cb_nops(&hdr);
+}
/*
* NFSv4.0 and NFSv4.1 XDR decode functions
@@ -520,6 +559,28 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
}
+/*
+ * 20.6. Operation 8: CB_RECALL_ANY - Keep Any N Recallable Objects
+ */
+static int
+nfs4_xdr_dec_cb_recall_any(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfsd4_callback *cb = data;
+ struct nfs4_cb_compound_hdr hdr;
+ int status;
+
+ status = decode_cb_compound4res(xdr, &hdr);
+ if (unlikely(status))
+ return status;
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+ status = decode_cb_op_status(xdr, OP_CB_RECALL_ANY, &cb->cb_status);
+ return status;
+}
+
#ifdef CONFIG_NFSD_PNFS
/*
* CB_LAYOUTRECALL4args
@@ -783,6 +844,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
#endif
PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
+ PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
};
static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e2daef3cc003..6b33cbbe76d3 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -639,6 +639,7 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_OFFLOAD,
NFSPROC4_CLNT_CB_SEQUENCE,
NFSPROC4_CLNT_CB_NOTIFY_LOCK,
+ NFSPROC4_CLNT_CB_RECALL_ANY,
};
/* Returns true iff a is later than b: */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 0eb00105d845..4fd2cf6d1d2d 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -896,5 +896,10 @@ struct nfsd4_operation {
union nfsd4_op_u *);
};
+struct nfsd4_cb_recall_any {
+ struct nfsd4_callback ra_cb;
+ u32 ra_keep;
+ u32 ra_bmval[1];
+};
#endif
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index 547cf07cf4e0..0d39af1b00a0 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -48,3 +48,9 @@
#define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + \
op_dec_sz)
+#define NFS4_enc_cb_recall_any_sz (cb_compound_enc_hdr_sz + \
+ cb_sequence_enc_sz + \
+ 1 + 1 + 1)
+#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
+ cb_sequence_dec_sz + \
+ op_dec_sz)
--
2.9.5
Add tracepoints to trace start and end of CB_RECALL_ANY operation.
Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4state.c | 2 ++
fs/nfsd/trace.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 13f326ae928c..935d669e2526 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2859,6 +2859,7 @@ static int
nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
struct rpc_task *task)
{
+ trace_nfsd_cb_recall_any_done(cb, task);
switch (task->tk_status) {
case -NFS4ERR_DELAY:
rpc_delay(task, 2 * HZ);
@@ -6209,6 +6210,7 @@ deleg_reaper(struct nfsd_net *nn)
clp->cl_ra->ra_keep = 0;
clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
BIT(RCA4_TYPE_MASK_WDATA_DLG);
+ trace_nfsd_cb_recall_any(clp->cl_ra);
nfsd4_run_cb(&clp->cl_ra->ra_cb);
}
}
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 06a96e955bd0..bee951c335f1 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -9,9 +9,11 @@
#define _NFSD_TRACE_H
#include <linux/tracepoint.h>
+#include <linux/sunrpc/xprt.h>
#include "export.h"
#include "nfsfh.h"
+#include "xdr4.h"
#define NFSD_TRACE_PROC_RES_FIELDS \
__field(unsigned int, netns_ino) \
@@ -1471,6 +1473,53 @@ TRACE_EVENT(nfsd_cb_offload,
__entry->fh_hash, __entry->count, __entry->status)
);
+TRACE_EVENT(nfsd_cb_recall_any,
+ TP_PROTO(
+ const struct nfsd4_cb_recall_any *ra
+ ),
+ TP_ARGS(ra),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, ra_keep)
+ __field(u32, ra_bmval)
+ __sockaddr(addr, sizeof(struct sockaddr_storage))
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
+ __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
+ __entry->ra_keep = ra->ra_keep;
+ __entry->ra_bmval = ra->ra_bmval[0];
+ __assign_sockaddr(addr, &ra->ra_cb.cb_clp->cl_addr,
+ sizeof(struct sockaddr_storage))
+ ),
+ TP_printk("Client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
+ __entry->cl_boot, __entry->cl_id,
+ __get_sockaddr(addr), __entry->ra_keep, __entry->ra_bmval
+ )
+);
+
+TRACE_EVENT(nfsd_cb_recall_any_done,
+ TP_PROTO(
+ const struct nfsd4_callback *cb,
+ const struct rpc_task *task
+ ),
+ TP_ARGS(cb, task),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->status = task->tk_status;
+ __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
+ __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
+ ),
+ TP_printk("client %08x:%08x status=%d",
+ __entry->cl_boot, __entry->cl_id, __entry->status
+ )
+);
+
DECLARE_EVENT_CLASS(nfsd_cb_done_class,
TP_PROTO(
const stateid_t *stp,
--
2.9.5
The delegation reaper is called by nfsd memory shrinker's on
the 'count' callback. It scans the client list and sends the
courtesy CB_RECALL_ANY to the clients that hold delegations.
To avoid flooding the clients with CB_RECALL_ANY requests, the
delegation reaper sends only one CB_RECALL_ANY request to each
client per 5 seconds.
Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/nfs4state.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++---
fs/nfsd/state.h | 8 +++++
2 files changed, 93 insertions(+), 4 deletions(-)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 142481bc96de..13f326ae928c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2131,6 +2131,7 @@ static void __free_client(struct kref *k)
kfree(clp->cl_nii_domain.data);
kfree(clp->cl_nii_name.data);
idr_destroy(&clp->cl_stateids);
+ kfree(clp->cl_ra);
kmem_cache_free(client_slab, clp);
}
@@ -2854,6 +2855,36 @@ static const struct tree_descr client_files[] = {
[3] = {""},
};
+static int
+nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
+ struct rpc_task *task)
+{
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 2 * HZ);
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static void
+nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
+{
+ struct nfs4_client *clp = cb->cb_clp;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+ spin_lock(&nn->client_lock);
+ clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+ put_client_renew_locked(clp);
+ spin_unlock(&nn->client_lock);
+}
+
+static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
+ .done = nfsd4_cb_recall_any_done,
+ .release = nfsd4_cb_recall_any_release,
+};
+
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@@ -2891,6 +2922,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
free_client(clp);
return NULL;
}
+ clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
+ if (!clp->cl_ra) {
+ free_client(clp);
+ return NULL;
+ }
+ clp->cl_ra_time = 0;
+ nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
+ NFSPROC4_CLNT_CB_RECALL_ANY);
return clp;
}
@@ -4349,14 +4388,16 @@ nfsd4_init_slabs(void)
static unsigned long
nfsd_lowmem_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
{
- int cnt;
+ int count;
struct nfsd_net *nn = container_of(shrink,
struct nfsd_net, nfsd_client_shrinker);
- cnt = atomic_read(&nn->nfsd_courtesy_clients);
- if (cnt > 0)
+ count = atomic_read(&nn->nfsd_courtesy_clients);
+ if (!count)
+ count = atomic_long_read(&num_delegations);
+ if (count)
mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
- return (unsigned long)cnt;
+ return (unsigned long)count;
}
static unsigned long
@@ -6134,6 +6175,45 @@ courtesy_client_reaper(struct nfsd_net *nn)
}
static void
+deleg_reaper(struct nfsd_net *nn)
+{
+ struct list_head *pos, *next;
+ struct nfs4_client *clp;
+ struct list_head cblist;
+
+ INIT_LIST_HEAD(&cblist);
+ spin_lock(&nn->client_lock);
+ list_for_each_safe(pos, next, &nn->client_lru) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ if (clp->cl_state != NFSD4_ACTIVE ||
+ list_empty(&clp->cl_delegations) ||
+ atomic_read(&clp->cl_delegs_in_recall) ||
+ test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
+ (ktime_get_boottime_seconds() -
+ clp->cl_ra_time < 5)) {
+ continue;
+ }
+ list_add(&clp->cl_ra_cblist, &cblist);
+
+ /* release in nfsd4_cb_recall_any_release */
+ atomic_inc(&clp->cl_rpc_users);
+ set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
+ clp->cl_ra_time = ktime_get_boottime_seconds();
+ }
+ spin_unlock(&nn->client_lock);
+
+ while (!list_empty(&cblist)) {
+ clp = list_first_entry(&cblist, struct nfs4_client,
+ cl_ra_cblist);
+ list_del_init(&clp->cl_ra_cblist);
+ clp->cl_ra->ra_keep = 0;
+ clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
+ BIT(RCA4_TYPE_MASK_WDATA_DLG);
+ nfsd4_run_cb(&clp->cl_ra->ra_cb);
+ }
+}
+
+static void
nfsd4_lowmem_shrinker(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
@@ -6141,6 +6221,7 @@ nfsd4_lowmem_shrinker(struct work_struct *work)
nfsd_shrinker_work);
courtesy_client_reaper(nn);
+ deleg_reaper(nn);
}
static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 6b33cbbe76d3..12ce9792c5b6 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -368,6 +368,7 @@ struct nfs4_client {
#define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
1 << NFSD4_CLIENT_CB_KILL)
+#define NFSD4_CLIENT_CB_RECALL_ANY (6)
unsigned long cl_flags;
const struct cred *cl_cb_cred;
struct rpc_clnt *cl_cb_client;
@@ -411,6 +412,10 @@ struct nfs4_client {
unsigned int cl_state;
atomic_t cl_delegs_in_recall;
+
+ struct nfsd4_cb_recall_any *cl_ra;
+ time64_t cl_ra_time;
+ struct list_head cl_ra_cblist;
};
/* struct nfs4_client_reset
@@ -642,6 +647,9 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_RECALL_ANY,
};
+#define RCA4_TYPE_MASK_RDATA_DLG 0
+#define RCA4_TYPE_MASK_WDATA_DLG 1
+
/* Returns true iff a is later than b: */
static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
{
--
2.9.5
> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>
> This patch series adds:
>
> . refactor courtesy_client_reaper to a generic low memory
> shrinker.
>
> . XDR encode and decode function for CB_RECALL_ANY op.
>
> . the delegation reaper that sends the advisory CB_RECALL_ANY
> to the clients to release unused delegations.
> There is only one nfsd4_callback added for each nfs4_cleint.
> Access to it must be serialized via the client flag
> NFSD4_CLIENT_CB_RECALL_ANY.
>
> . Add CB_RECALL_ANY tracepoints.
>
> v2:
> . modify deleg_reaper to check and send CB_RECALL_ANY to client
> only once per 5 secs.
> v3:
> . modify nfsd4_cb_recall_any_release to use nn->client_lock to
> protect cl_recall_any_busy and call put_client_renew_locked
> to decrement cl_rpc_users.
>
> v4:
> . move changes in nfs4state.c from patch (1/2) to patch(2/2).
> . use xdr_stream_encode_u32 and xdr_stream_encode_uint32_array
> to encode CB_RECALL_ANY arguments.
> . add struct nfsd4_cb_recall_any with embedded nfs4_callback
> and params for CB_RECALL_ANY op.
> . replace cl_recall_any_busy boolean with client flag
> NFSD4_CLIENT_CB_RECALL_ANY
> . add tracepoints for CB_RECALL_ANY
>
> v5:
> . refactor courtesy_client_reaper to a generic low memory
> shrinker
> . merge courtesy client shrinker and delegtion shrinker into
> one.
> . reposition nfsd_cb_recall_any and nfsd_cb_recall_any_done
> in nfsd/trace.h
> . use __get_sockaddr to display server IP address in
> tracepoints.
> . modify encode_cb_recallany4args to replace sizeof with
> ARRAY_SIZE.
Hi-
I'm going to apply this version of the series with some minor
changes. I'll reply to the individual patches where we can
discuss those.
> ---
>
> Dai Ngo (4):
> NFSD: refactoring courtesy_client_reaper to a generic low memory shrinker
> NFSD: add support for sending CB_RECALL_ANY
> NFSD: add delegation shrinker to react to low memory condition
> NFSD: add CB_RECALL_ANY tracepoints
>
> fs/nfsd/nfs4callback.c | 62 +++++++++++++++++++++++
> fs/nfsd/nfs4state.c | 116 +++++++++++++++++++++++++++++++++++++++-----
> fs/nfsd/state.h | 9 ++++
> fs/nfsd/trace.h | 49 +++++++++++++++++++
> fs/nfsd/xdr4.h | 5 ++
> fs/nfsd/xdr4cb.h | 6 +++
> 6 files changed, 234 insertions(+), 13 deletions(-)
--
Chuck Lever
> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>
> Refactoring courtesy_client_reaper to generic low memory
> shrinker so it can be used for other purposes.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 25 ++++++++++++++++---------
> 1 file changed, 16 insertions(+), 9 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 836bd825ca4a..142481bc96de 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -4347,7 +4347,7 @@ nfsd4_init_slabs(void)
> }
>
> static unsigned long
> -nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
> +nfsd_lowmem_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> {
> int cnt;
> struct nfsd_net *nn = container_of(shrink,
> @@ -4360,7 +4360,7 @@ nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
> }
>
> static unsigned long
> -nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
> +nfsd_lowmem_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> {
> return SHRINK_STOP;
> }
> @@ -4387,8 +4387,8 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
> nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);
>
> atomic_set(&nn->nfsd_courtesy_clients, 0);
> - nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
> - nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
> + nn->nfsd_client_shrinker.scan_objects = nfsd_lowmem_shrinker_scan;
> + nn->nfsd_client_shrinker.count_objects = nfsd_lowmem_shrinker_count;
> nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
> return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> }
> @@ -6125,17 +6125,24 @@ laundromat_main(struct work_struct *laundry)
> }
>
> static void
> -courtesy_client_reaper(struct work_struct *reaper)
> +courtesy_client_reaper(struct nfsd_net *nn)
> {
> struct list_head reaplist;
> - struct delayed_work *dwork = to_delayed_work(reaper);
> - struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> - nfsd_shrinker_work);
>
> nfs4_get_courtesy_client_reaplist(nn, &reaplist);
> nfs4_process_client_reaplist(&reaplist);
> }
>
> +static void
> +nfsd4_lowmem_shrinker(struct work_struct *work)
All shrinkers run due to low memory, so I think this name
is a bit redundant. How about nfsd4_state_shrinker?
> +{
> + struct delayed_work *dwork = to_delayed_work(work);
> + struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> + nfsd_shrinker_work);
> +
> + courtesy_client_reaper(nn);
> +}
> +
> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> {
> if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
> @@ -7958,7 +7965,7 @@ static int nfs4_state_create_net(struct net *net)
> INIT_LIST_HEAD(&nn->blocked_locks_lru);
>
> INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> - INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
> + INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_lowmem_shrinker);
> get_net(net);
>
> return 0;
> --
> 2.9.5
>
--
Chuck Lever
> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>
> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 2 ++
> fs/nfsd/trace.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 51 insertions(+)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 13f326ae928c..935d669e2526 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2859,6 +2859,7 @@ static int
> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> struct rpc_task *task)
> {
> + trace_nfsd_cb_recall_any_done(cb, task);
> switch (task->tk_status) {
> case -NFS4ERR_DELAY:
> rpc_delay(task, 2 * HZ);
> @@ -6209,6 +6210,7 @@ deleg_reaper(struct nfsd_net *nn)
> clp->cl_ra->ra_keep = 0;
> clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> BIT(RCA4_TYPE_MASK_WDATA_DLG);
> + trace_nfsd_cb_recall_any(clp->cl_ra);
> nfsd4_run_cb(&clp->cl_ra->ra_cb);
> }
> }
> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
> index 06a96e955bd0..bee951c335f1 100644
> --- a/fs/nfsd/trace.h
> +++ b/fs/nfsd/trace.h
> @@ -9,9 +9,11 @@
> #define _NFSD_TRACE_H
>
> #include <linux/tracepoint.h>
> +#include <linux/sunrpc/xprt.h>
>
> #include "export.h"
> #include "nfsfh.h"
> +#include "xdr4.h"
>
> #define NFSD_TRACE_PROC_RES_FIELDS \
> __field(unsigned int, netns_ino) \
> @@ -1471,6 +1473,53 @@ TRACE_EVENT(nfsd_cb_offload,
> __entry->fh_hash, __entry->count, __entry->status)
> );
>
> +TRACE_EVENT(nfsd_cb_recall_any,
> + TP_PROTO(
> + const struct nfsd4_cb_recall_any *ra
> + ),
> + TP_ARGS(ra),
> + TP_STRUCT__entry(
> + __field(u32, cl_boot)
> + __field(u32, cl_id)
> + __field(u32, ra_keep)
> + __field(u32, ra_bmval)
> + __sockaddr(addr, sizeof(struct sockaddr_storage))
> + ),
> + TP_fast_assign(
> + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
> + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
> + __entry->ra_keep = ra->ra_keep;
> + __entry->ra_bmval = ra->ra_bmval[0];
> + __assign_sockaddr(addr, &ra->ra_cb.cb_clp->cl_addr,
> + sizeof(struct sockaddr_storage))
> + ),
> + TP_printk("Client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
> + __entry->cl_boot, __entry->cl_id,
> + __get_sockaddr(addr), __entry->ra_keep, __entry->ra_bmval
This needs a "show_recall_any_bitmap" to convert to human-readable
symbols.
> + )
> +);
> +
> +TRACE_EVENT(nfsd_cb_recall_any_done,
> + TP_PROTO(
> + const struct nfsd4_callback *cb,
> + const struct rpc_task *task
> + ),
> + TP_ARGS(cb, task),
> + TP_STRUCT__entry(
> + __field(u32, cl_boot)
> + __field(u32, cl_id)
> + __field(int, status)
> + ),
> + TP_fast_assign(
> + __entry->status = task->tk_status;
> + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
> + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
> + ),
> + TP_printk("client %08x:%08x status=%d",
> + __entry->cl_boot, __entry->cl_id, __entry->status
> + )
> +);
> +
> DECLARE_EVENT_CLASS(nfsd_cb_done_class,
> TP_PROTO(
> const stateid_t *stp,
> --
> 2.9.5
>
--
Chuck Lever
> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>
> The delegation reaper is called by nfsd memory shrinker's on
> the 'count' callback. It scans the client list and sends the
> courtesy CB_RECALL_ANY to the clients that hold delegations.
>
> To avoid flooding the clients with CB_RECALL_ANY requests, the
> delegation reaper sends only one CB_RECALL_ANY request to each
> client per 5 seconds.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++---
> fs/nfsd/state.h | 8 +++++
> 2 files changed, 93 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 142481bc96de..13f326ae928c 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2131,6 +2131,7 @@ static void __free_client(struct kref *k)
> kfree(clp->cl_nii_domain.data);
> kfree(clp->cl_nii_name.data);
> idr_destroy(&clp->cl_stateids);
> + kfree(clp->cl_ra);
> kmem_cache_free(client_slab, clp);
> }
>
> @@ -2854,6 +2855,36 @@ static const struct tree_descr client_files[] = {
> [3] = {""},
> };
>
> +static int
> +nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
> + struct rpc_task *task)
> +{
> + switch (task->tk_status) {
> + case -NFS4ERR_DELAY:
> + rpc_delay(task, 2 * HZ);
> + return 0;
> + default:
> + return 1;
> + }
> +}
> +
> +static void
> +nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
> +{
> + struct nfs4_client *clp = cb->cb_clp;
> + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
> +
> + spin_lock(&nn->client_lock);
> + clear_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> + put_client_renew_locked(clp);
> + spin_unlock(&nn->client_lock);
> +}
> +
> +static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
> + .done = nfsd4_cb_recall_any_done,
> + .release = nfsd4_cb_recall_any_release,
> +};
> +
> static struct nfs4_client *create_client(struct xdr_netobj name,
> struct svc_rqst *rqstp, nfs4_verifier *verf)
> {
> @@ -2891,6 +2922,14 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
> free_client(clp);
> return NULL;
> }
> + clp->cl_ra = kzalloc(sizeof(*clp->cl_ra), GFP_KERNEL);
> + if (!clp->cl_ra) {
> + free_client(clp);
> + return NULL;
> + }
> + clp->cl_ra_time = 0;
> + nfsd4_init_cb(&clp->cl_ra->ra_cb, clp, &nfsd4_cb_recall_any_ops,
> + NFSPROC4_CLNT_CB_RECALL_ANY);
> return clp;
> }
>
> @@ -4349,14 +4388,16 @@ nfsd4_init_slabs(void)
> static unsigned long
> nfsd_lowmem_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> {
> - int cnt;
> + int count;
> struct nfsd_net *nn = container_of(shrink,
> struct nfsd_net, nfsd_client_shrinker);
>
> - cnt = atomic_read(&nn->nfsd_courtesy_clients);
> - if (cnt > 0)
> + count = atomic_read(&nn->nfsd_courtesy_clients);
> + if (!count)
> + count = atomic_long_read(&num_delegations);
> + if (count)
> mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
> - return (unsigned long)cnt;
> + return (unsigned long)count;
> }
>
> static unsigned long
> @@ -6134,6 +6175,45 @@ courtesy_client_reaper(struct nfsd_net *nn)
> }
>
> static void
> +deleg_reaper(struct nfsd_net *nn)
> +{
> + struct list_head *pos, *next;
> + struct nfs4_client *clp;
> + struct list_head cblist;
> +
> + INIT_LIST_HEAD(&cblist);
> + spin_lock(&nn->client_lock);
> + list_for_each_safe(pos, next, &nn->client_lru) {
> + clp = list_entry(pos, struct nfs4_client, cl_lru);
> + if (clp->cl_state != NFSD4_ACTIVE ||
> + list_empty(&clp->cl_delegations) ||
> + atomic_read(&clp->cl_delegs_in_recall) ||
> + test_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags) ||
> + (ktime_get_boottime_seconds() -
> + clp->cl_ra_time < 5)) {
> + continue;
> + }
> + list_add(&clp->cl_ra_cblist, &cblist);
> +
> + /* release in nfsd4_cb_recall_any_release */
> + atomic_inc(&clp->cl_rpc_users);
> + set_bit(NFSD4_CLIENT_CB_RECALL_ANY, &clp->cl_flags);
> + clp->cl_ra_time = ktime_get_boottime_seconds();
> + }
> + spin_unlock(&nn->client_lock);
> +
> + while (!list_empty(&cblist)) {
> + clp = list_first_entry(&cblist, struct nfs4_client,
> + cl_ra_cblist);
> + list_del_init(&clp->cl_ra_cblist);
> + clp->cl_ra->ra_keep = 0;
> + clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> + BIT(RCA4_TYPE_MASK_WDATA_DLG);
Linux NFSD doesn't hand out write delegations. I don't think we
should set WDATA_DLG yet...?
> + nfsd4_run_cb(&clp->cl_ra->ra_cb);
> + }
> +}
> +
> +static void
> nfsd4_lowmem_shrinker(struct work_struct *work)
> {
> struct delayed_work *dwork = to_delayed_work(work);
> @@ -6141,6 +6221,7 @@ nfsd4_lowmem_shrinker(struct work_struct *work)
> nfsd_shrinker_work);
>
> courtesy_client_reaper(nn);
> + deleg_reaper(nn);
> }
>
> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 6b33cbbe76d3..12ce9792c5b6 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -368,6 +368,7 @@ struct nfs4_client {
> #define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
> #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
> 1 << NFSD4_CLIENT_CB_KILL)
> +#define NFSD4_CLIENT_CB_RECALL_ANY (6)
> unsigned long cl_flags;
> const struct cred *cl_cb_cred;
> struct rpc_clnt *cl_cb_client;
> @@ -411,6 +412,10 @@ struct nfs4_client {
>
> unsigned int cl_state;
> atomic_t cl_delegs_in_recall;
> +
> + struct nfsd4_cb_recall_any *cl_ra;
> + time64_t cl_ra_time;
> + struct list_head cl_ra_cblist;
> };
>
> /* struct nfs4_client_reset
> @@ -642,6 +647,9 @@ enum nfsd4_cb_op {
> NFSPROC4_CLNT_CB_RECALL_ANY,
> };
>
> +#define RCA4_TYPE_MASK_RDATA_DLG 0
> +#define RCA4_TYPE_MASK_WDATA_DLG 1
> +
> /* Returns true iff a is later than b: */
> static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> {
> --
> 2.9.5
>
--
Chuck Lever
On 11/17/22 6:45 AM, Chuck Lever III wrote:
>
>> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>>
>> Refactoring courtesy_client_reaper to generic low memory
>> shrinker so it can be used for other purposes.
>>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> fs/nfsd/nfs4state.c | 25 ++++++++++++++++---------
>> 1 file changed, 16 insertions(+), 9 deletions(-)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 836bd825ca4a..142481bc96de 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -4347,7 +4347,7 @@ nfsd4_init_slabs(void)
>> }
>>
>> static unsigned long
>> -nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
>> +nfsd_lowmem_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
>> {
>> int cnt;
>> struct nfsd_net *nn = container_of(shrink,
>> @@ -4360,7 +4360,7 @@ nfsd_courtesy_client_count(struct shrinker *shrink, struct shrink_control *sc)
>> }
>>
>> static unsigned long
>> -nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
>> +nfsd_lowmem_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
>> {
>> return SHRINK_STOP;
>> }
>> @@ -4387,8 +4387,8 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
>> nn->nfs4_max_clients = max_t(int, max_clients, NFS4_CLIENTS_PER_GB);
>>
>> atomic_set(&nn->nfsd_courtesy_clients, 0);
>> - nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
>> - nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
>> + nn->nfsd_client_shrinker.scan_objects = nfsd_lowmem_shrinker_scan;
>> + nn->nfsd_client_shrinker.count_objects = nfsd_lowmem_shrinker_count;
>> nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
>> return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
>> }
>> @@ -6125,17 +6125,24 @@ laundromat_main(struct work_struct *laundry)
>> }
>>
>> static void
>> -courtesy_client_reaper(struct work_struct *reaper)
>> +courtesy_client_reaper(struct nfsd_net *nn)
>> {
>> struct list_head reaplist;
>> - struct delayed_work *dwork = to_delayed_work(reaper);
>> - struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
>> - nfsd_shrinker_work);
>>
>> nfs4_get_courtesy_client_reaplist(nn, &reaplist);
>> nfs4_process_client_reaplist(&reaplist);
>> }
>>
>> +static void
>> +nfsd4_lowmem_shrinker(struct work_struct *work)
> All shrinkers run due to low memory, so I think this name
> is a bit redundant. How about nfsd4_state_shrinker?
ok, it makes sense.
Thanks,
-Dai
>
>
>> +{
>> + struct delayed_work *dwork = to_delayed_work(work);
>> + struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
>> + nfsd_shrinker_work);
>> +
>> + courtesy_client_reaper(nn);
>> +}
>> +
>> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
>> {
>> if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
>> @@ -7958,7 +7965,7 @@ static int nfs4_state_create_net(struct net *net)
>> INIT_LIST_HEAD(&nn->blocked_locks_lru);
>>
>> INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
>> - INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
>> + INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_lowmem_shrinker);
>> get_net(net);
>>
>> return 0;
>> --
>> 2.9.5
>>
> --
> Chuck Lever
>
>
>
On 11/17/22 6:44 AM, Chuck Lever III wrote:
>
>> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>>
>> This patch series adds:
>>
>> . refactor courtesy_client_reaper to a generic low memory
>> shrinker.
>>
>> . XDR encode and decode function for CB_RECALL_ANY op.
>>
>> . the delegation reaper that sends the advisory CB_RECALL_ANY
>> to the clients to release unused delegations.
>> There is only one nfsd4_callback added for each nfs4_cleint.
>> Access to it must be serialized via the client flag
>> NFSD4_CLIENT_CB_RECALL_ANY.
>>
>> . Add CB_RECALL_ANY tracepoints.
>>
>> v2:
>> . modify deleg_reaper to check and send CB_RECALL_ANY to client
>> only once per 5 secs.
>> v3:
>> . modify nfsd4_cb_recall_any_release to use nn->client_lock to
>> protect cl_recall_any_busy and call put_client_renew_locked
>> to decrement cl_rpc_users.
>>
>> v4:
>> . move changes in nfs4state.c from patch (1/2) to patch(2/2).
>> . use xdr_stream_encode_u32 and xdr_stream_encode_uint32_array
>> to encode CB_RECALL_ANY arguments.
>> . add struct nfsd4_cb_recall_any with embedded nfs4_callback
>> and params for CB_RECALL_ANY op.
>> . replace cl_recall_any_busy boolean with client flag
>> NFSD4_CLIENT_CB_RECALL_ANY
>> . add tracepoints for CB_RECALL_ANY
>>
>> v5:
>> . refactor courtesy_client_reaper to a generic low memory
>> shrinker
>> . merge courtesy client shrinker and delegtion shrinker into
>> one.
>> . reposition nfsd_cb_recall_any and nfsd_cb_recall_any_done
>> in nfsd/trace.h
>> . use __get_sockaddr to display server IP address in
>> tracepoints.
>> . modify encode_cb_recallany4args to replace sizeof with
>> ARRAY_SIZE.
> Hi-
>
> I'm going to apply this version of the series with some minor
> changes. I'll reply to the individual patches where we can
> discuss those.
Thank you Chuck!
-Dai
>
>
>> ---
>>
>> Dai Ngo (4):
>> NFSD: refactoring courtesy_client_reaper to a generic low memory shrinker
>> NFSD: add support for sending CB_RECALL_ANY
>> NFSD: add delegation shrinker to react to low memory condition
>> NFSD: add CB_RECALL_ANY tracepoints
>>
>> fs/nfsd/nfs4callback.c | 62 +++++++++++++++++++++++
>> fs/nfsd/nfs4state.c | 116 +++++++++++++++++++++++++++++++++++++++-----
>> fs/nfsd/state.h | 9 ++++
>> fs/nfsd/trace.h | 49 +++++++++++++++++++
>> fs/nfsd/xdr4.h | 5 ++
>> fs/nfsd/xdr4cb.h | 6 +++
>> 6 files changed, 234 insertions(+), 13 deletions(-)
> --
> Chuck Lever
>
>
>
On 11/17/22 6:45 AM, Chuck Lever III wrote:
>
>> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>>
>> Add tracepoints to trace start and end of CB_RECALL_ANY operation.
>>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> fs/nfsd/nfs4state.c | 2 ++
>> fs/nfsd/trace.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 51 insertions(+)
>>
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 13f326ae928c..935d669e2526 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -2859,6 +2859,7 @@ static int
>> nfsd4_cb_recall_any_done(struct nfsd4_callback *cb,
>> struct rpc_task *task)
>> {
>> + trace_nfsd_cb_recall_any_done(cb, task);
>> switch (task->tk_status) {
>> case -NFS4ERR_DELAY:
>> rpc_delay(task, 2 * HZ);
>> @@ -6209,6 +6210,7 @@ deleg_reaper(struct nfsd_net *nn)
>> clp->cl_ra->ra_keep = 0;
>> clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>> BIT(RCA4_TYPE_MASK_WDATA_DLG);
>> + trace_nfsd_cb_recall_any(clp->cl_ra);
>> nfsd4_run_cb(&clp->cl_ra->ra_cb);
>> }
>> }
>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>> index 06a96e955bd0..bee951c335f1 100644
>> --- a/fs/nfsd/trace.h
>> +++ b/fs/nfsd/trace.h
>> @@ -9,9 +9,11 @@
>> #define _NFSD_TRACE_H
>>
>> #include <linux/tracepoint.h>
>> +#include <linux/sunrpc/xprt.h>
>>
>> #include "export.h"
>> #include "nfsfh.h"
>> +#include "xdr4.h"
>>
>> #define NFSD_TRACE_PROC_RES_FIELDS \
>> __field(unsigned int, netns_ino) \
>> @@ -1471,6 +1473,53 @@ TRACE_EVENT(nfsd_cb_offload,
>> __entry->fh_hash, __entry->count, __entry->status)
>> );
>>
>> +TRACE_EVENT(nfsd_cb_recall_any,
>> + TP_PROTO(
>> + const struct nfsd4_cb_recall_any *ra
>> + ),
>> + TP_ARGS(ra),
>> + TP_STRUCT__entry(
>> + __field(u32, cl_boot)
>> + __field(u32, cl_id)
>> + __field(u32, ra_keep)
>> + __field(u32, ra_bmval)
>> + __sockaddr(addr, sizeof(struct sockaddr_storage))
>> + ),
>> + TP_fast_assign(
>> + __entry->cl_boot = ra->ra_cb.cb_clp->cl_clientid.cl_boot;
>> + __entry->cl_id = ra->ra_cb.cb_clp->cl_clientid.cl_id;
>> + __entry->ra_keep = ra->ra_keep;
>> + __entry->ra_bmval = ra->ra_bmval[0];
>> + __assign_sockaddr(addr, &ra->ra_cb.cb_clp->cl_addr,
>> + sizeof(struct sockaddr_storage))
>> + ),
>> + TP_printk("Client %08x:%08x addr=%pISpc ra_keep=%d ra_bmval=0x%x",
>> + __entry->cl_boot, __entry->cl_id,
>> + __get_sockaddr(addr), __entry->ra_keep, __entry->ra_bmval
> This needs a "show_recall_any_bitmap" to convert to human-readable
> symbols.
Yes, it makes sense.
Thanks,
-Dai
>
>
>> + )
>> +);
>> +
>> +TRACE_EVENT(nfsd_cb_recall_any_done,
>> + TP_PROTO(
>> + const struct nfsd4_callback *cb,
>> + const struct rpc_task *task
>> + ),
>> + TP_ARGS(cb, task),
>> + TP_STRUCT__entry(
>> + __field(u32, cl_boot)
>> + __field(u32, cl_id)
>> + __field(int, status)
>> + ),
>> + TP_fast_assign(
>> + __entry->status = task->tk_status;
>> + __entry->cl_boot = cb->cb_clp->cl_clientid.cl_boot;
>> + __entry->cl_id = cb->cb_clp->cl_clientid.cl_id;
>> + ),
>> + TP_printk("client %08x:%08x status=%d",
>> + __entry->cl_boot, __entry->cl_id, __entry->status
>> + )
>> +);
>> +
>> DECLARE_EVENT_CLASS(nfsd_cb_done_class,
>> TP_PROTO(
>> const stateid_t *stp,
>> --
>> 2.9.5
>>
> --
> Chuck Lever
>
>
>
> On Nov 17, 2022, at 11:53 AM, Dai Ngo <[email protected]> wrote:
>
>
> On 11/17/22 6:44 AM, Chuck Lever III wrote:
>>
>>> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>>>
>>> This patch series adds:
>>>
>>> . refactor courtesy_client_reaper to a generic low memory
>>> shrinker.
>>>
>>> . XDR encode and decode function for CB_RECALL_ANY op.
>>>
>>> . the delegation reaper that sends the advisory CB_RECALL_ANY
>>> to the clients to release unused delegations.
>>> There is only one nfsd4_callback added for each nfs4_cleint.
>>> Access to it must be serialized via the client flag
>>> NFSD4_CLIENT_CB_RECALL_ANY.
>>>
>>> . Add CB_RECALL_ANY tracepoints.
>>>
>>> v2:
>>> . modify deleg_reaper to check and send CB_RECALL_ANY to client
>>> only once per 5 secs.
>>> v3:
>>> . modify nfsd4_cb_recall_any_release to use nn->client_lock to
>>> protect cl_recall_any_busy and call put_client_renew_locked
>>> to decrement cl_rpc_users.
>>>
>>> v4:
>>> . move changes in nfs4state.c from patch (1/2) to patch(2/2).
>>> . use xdr_stream_encode_u32 and xdr_stream_encode_uint32_array
>>> to encode CB_RECALL_ANY arguments.
>>> . add struct nfsd4_cb_recall_any with embedded nfs4_callback
>>> and params for CB_RECALL_ANY op.
>>> . replace cl_recall_any_busy boolean with client flag
>>> NFSD4_CLIENT_CB_RECALL_ANY
>>> . add tracepoints for CB_RECALL_ANY
>>>
>>> v5:
>>> . refactor courtesy_client_reaper to a generic low memory
>>> shrinker
>>> . merge courtesy client shrinker and delegtion shrinker into
>>> one.
>>> . reposition nfsd_cb_recall_any and nfsd_cb_recall_any_done
>>> in nfsd/trace.h
>>> . use __get_sockaddr to display server IP address in
>>> tracepoints.
>>> . modify encode_cb_recallany4args to replace sizeof with
>>> ARRAY_SIZE.
>> Hi-
>>
>> I'm going to apply this version of the series with some minor
>> changes. I'll reply to the individual patches where we can
>> discuss those.
>
> Thank you Chuck!
Changes folded in and pushed to nfsd's for-next. For the trace
point patch, I've rebased your series on top of the patch that
relocates include/trace/events/nfs.h so that the new
show_rca_mask() macro can go in the common nfs.h instead of
the NFSD-specific fs/nfsd/trace.h.
Feel free to pull and test to make sure I didn't do anything
bone-headed.
--
Chuck Lever
On 11/17/22 9:04 AM, Chuck Lever III wrote:
>
>> On Nov 17, 2022, at 11:53 AM, Dai Ngo <[email protected]> wrote:
>>
>>
>> On 11/17/22 6:44 AM, Chuck Lever III wrote:
>>>> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>>>>
>>>> This patch series adds:
>>>>
>>>> . refactor courtesy_client_reaper to a generic low memory
>>>> shrinker.
>>>>
>>>> . XDR encode and decode function for CB_RECALL_ANY op.
>>>>
>>>> . the delegation reaper that sends the advisory CB_RECALL_ANY
>>>> to the clients to release unused delegations.
>>>> There is only one nfsd4_callback added for each nfs4_cleint.
>>>> Access to it must be serialized via the client flag
>>>> NFSD4_CLIENT_CB_RECALL_ANY.
>>>>
>>>> . Add CB_RECALL_ANY tracepoints.
>>>>
>>>> v2:
>>>> . modify deleg_reaper to check and send CB_RECALL_ANY to client
>>>> only once per 5 secs.
>>>> v3:
>>>> . modify nfsd4_cb_recall_any_release to use nn->client_lock to
>>>> protect cl_recall_any_busy and call put_client_renew_locked
>>>> to decrement cl_rpc_users.
>>>>
>>>> v4:
>>>> . move changes in nfs4state.c from patch (1/2) to patch(2/2).
>>>> . use xdr_stream_encode_u32 and xdr_stream_encode_uint32_array
>>>> to encode CB_RECALL_ANY arguments.
>>>> . add struct nfsd4_cb_recall_any with embedded nfs4_callback
>>>> and params for CB_RECALL_ANY op.
>>>> . replace cl_recall_any_busy boolean with client flag
>>>> NFSD4_CLIENT_CB_RECALL_ANY
>>>> . add tracepoints for CB_RECALL_ANY
>>>>
>>>> v5:
>>>> . refactor courtesy_client_reaper to a generic low memory
>>>> shrinker
>>>> . merge courtesy client shrinker and delegtion shrinker into
>>>> one.
>>>> . reposition nfsd_cb_recall_any and nfsd_cb_recall_any_done
>>>> in nfsd/trace.h
>>>> . use __get_sockaddr to display server IP address in
>>>> tracepoints.
>>>> . modify encode_cb_recallany4args to replace sizeof with
>>>> ARRAY_SIZE.
>>> Hi-
>>>
>>> I'm going to apply this version of the series with some minor
>>> changes. I'll reply to the individual patches where we can
>>> discuss those.
>> Thank you Chuck!
> Changes folded in and pushed to nfsd's for-next. For the trace
> point patch, I've rebased your series on top of the patch that
> relocates include/trace/events/nfs.h so that the new
> show_rca_mask() macro can go in the common nfs.h instead of
> the NFSD-specific fs/nfsd/trace.h.
>
> Feel free to pull and test to make sure I didn't do anything
> bone-headed.
I removed the get_sockaddr temporarily to test show_rca_mask.
It works fine:
[root@nfsvmf24 ~]# trace-cmd report
trace-cmd: No such file or directory
Error: expected type 4 but read 5
cpus=1
kworker/u2:6-2297 [000] 1349.863391: nfsd_cb_recall_any: client 63767ac9:adb1a3fb keep=0 bmval0=RDATA_DLG
kworker/u2:0-8698 [000] 1349.869652: nfsd_cb_recall_any_done: client 63767ac9:adb1a3fb status=0
Thanks,
-Dai
>
>
> --
> Chuck Lever
>
>
>
> On Nov 17, 2022, at 1:43 PM, Dai Ngo <[email protected]> wrote:
>
>
> On 11/17/22 9:04 AM, Chuck Lever III wrote:
>>
>>> On Nov 17, 2022, at 11:53 AM, Dai Ngo <[email protected]> wrote:
>>>
>>>
>>> On 11/17/22 6:44 AM, Chuck Lever III wrote:
>>>>> On Nov 16, 2022, at 10:44 PM, Dai Ngo <[email protected]> wrote:
>>>>>
>>>>> This patch series adds:
>>>>>
>>>>> . refactor courtesy_client_reaper to a generic low memory
>>>>> shrinker.
>>>>>
>>>>> . XDR encode and decode function for CB_RECALL_ANY op.
>>>>>
>>>>> . the delegation reaper that sends the advisory CB_RECALL_ANY
>>>>> to the clients to release unused delegations.
>>>>> There is only one nfsd4_callback added for each nfs4_cleint.
>>>>> Access to it must be serialized via the client flag
>>>>> NFSD4_CLIENT_CB_RECALL_ANY.
>>>>>
>>>>> . Add CB_RECALL_ANY tracepoints.
>>>>>
>>>>> v2:
>>>>> . modify deleg_reaper to check and send CB_RECALL_ANY to client
>>>>> only once per 5 secs.
>>>>> v3:
>>>>> . modify nfsd4_cb_recall_any_release to use nn->client_lock to
>>>>> protect cl_recall_any_busy and call put_client_renew_locked
>>>>> to decrement cl_rpc_users.
>>>>>
>>>>> v4:
>>>>> . move changes in nfs4state.c from patch (1/2) to patch(2/2).
>>>>> . use xdr_stream_encode_u32 and xdr_stream_encode_uint32_array
>>>>> to encode CB_RECALL_ANY arguments.
>>>>> . add struct nfsd4_cb_recall_any with embedded nfs4_callback
>>>>> and params for CB_RECALL_ANY op.
>>>>> . replace cl_recall_any_busy boolean with client flag
>>>>> NFSD4_CLIENT_CB_RECALL_ANY
>>>>> . add tracepoints for CB_RECALL_ANY
>>>>>
>>>>> v5:
>>>>> . refactor courtesy_client_reaper to a generic low memory
>>>>> shrinker
>>>>> . merge courtesy client shrinker and delegtion shrinker into
>>>>> one.
>>>>> . reposition nfsd_cb_recall_any and nfsd_cb_recall_any_done
>>>>> in nfsd/trace.h
>>>>> . use __get_sockaddr to display server IP address in
>>>>> tracepoints.
>>>>> . modify encode_cb_recallany4args to replace sizeof with
>>>>> ARRAY_SIZE.
>>>> Hi-
>>>>
>>>> I'm going to apply this version of the series with some minor
>>>> changes. I'll reply to the individual patches where we can
>>>> discuss those.
>>> Thank you Chuck!
>> Changes folded in and pushed to nfsd's for-next. For the trace
>> point patch, I've rebased your series on top of the patch that
>> relocates include/trace/events/nfs.h so that the new
>> show_rca_mask() macro can go in the common nfs.h instead of
>> the NFSD-specific fs/nfsd/trace.h.
>>
>> Feel free to pull and test to make sure I didn't do anything
>> bone-headed.
>
> I removed the get_sockaddr temporarily to test show_rca_mask.
> It works fine:
>
> [root@nfsvmf24 ~]# trace-cmd report
> trace-cmd: No such file or directory
> Error: expected type 4 but read 5
> cpus=1
> kworker/u2:6-2297 [000] 1349.863391: nfsd_cb_recall_any: client 63767ac9:adb1a3fb keep=0 bmval0=RDATA_DLG
> kworker/u2:0-8698 [000] 1349.869652: nfsd_cb_recall_any_done: client 63767ac9:adb1a3fb status=0
Perfect, thank you!
--
Chuck Lever