2022-10-18 05:33:52

by Dai Ngo

[permalink] [raw]
Subject: [PATCH 2/2] NFSD: add delegation shrinker to react to low memory condition

The delegation shrinker is scheduled to run on every shrinker's
'count' callback. It scans the client list and sends the
courtesy CB_RECALL_ANY to the clients that hold delegations.

To avoid flooding the clients with CB_RECALL_ANY requests, the
delegation shrinker is scheduled to run after a 5 seconds delay.

Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/netns.h | 3 +++
fs/nfsd/nfs4state.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
fs/nfsd/state.h | 1 +
3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 8c854ba3285b..394a05fb46d8 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -196,6 +196,9 @@ struct nfsd_net {
atomic_t nfsd_courtesy_clients;
struct shrinker nfsd_client_shrinker;
struct delayed_work nfsd_shrinker_work;
+
+ struct shrinker nfsd_deleg_shrinker;
+ struct delayed_work nfsd_deleg_shrinker_work;
};

/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c60c937dece6..bdaea0e6e72f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4392,11 +4392,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
return SHRINK_STOP;
}

+static unsigned long
+nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ unsigned long cnt;
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_deleg_shrinker);
+
+ cnt = atomic_long_read(&num_delegations);
+ if (cnt)
+ mod_delayed_work(laundry_wq,
+ &nn->nfsd_deleg_shrinker_work, 5 * HZ);
+ return cnt;
+}
+
+static unsigned long
+nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ return SHRINK_STOP;
+}
+
int
nfsd4_init_leases_net(struct nfsd_net *nn)
{
struct sysinfo si;
u64 max_clients;
+ int retval;

nn->nfsd4_lease = 90; /* default lease time */
nn->nfsd4_grace = 90;
@@ -4417,13 +4438,23 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
- return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+ retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+ if (retval)
+ return retval;
+ nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
+ nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
+ nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
+ retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
+ if (retval)
+ unregister_shrinker(&nn->nfsd_client_shrinker);
+ return retval;
}

void
nfsd4_leases_net_shutdown(struct nfsd_net *nn)
{
unregister_shrinker(&nn->nfsd_client_shrinker);
+ unregister_shrinker(&nn->nfsd_deleg_shrinker);
}

static void init_nfs4_replay(struct nfs4_replay *rp)
@@ -6162,6 +6193,42 @@ courtesy_client_reaper(struct work_struct *reaper)
nfs4_process_client_reaplist(&reaplist);
}

+static void
+deleg_reaper(struct work_struct *deleg_work)
+{
+ struct list_head *pos, *next;
+ struct nfs4_client *clp;
+ struct list_head cblist;
+ struct delayed_work *dwork = to_delayed_work(deleg_work);
+ struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+ nfsd_deleg_shrinker_work);
+
+ INIT_LIST_HEAD(&cblist);
+ spin_lock(&nn->client_lock);
+ list_for_each_safe(pos, next, &nn->client_lru) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ if (clp->cl_state != NFSD4_ACTIVE ||
+ list_empty(&clp->cl_delegations) ||
+ atomic_read(&clp->cl_delegs_in_recall) ||
+ clp->cl_recall_any_busy) {
+ continue;
+ }
+ clp->cl_recall_any_busy = true;
+ list_add(&clp->cl_recall_any_cblist, &cblist);
+
+ /* release in nfsd4_cb_recall_any_release */
+ atomic_inc(&clp->cl_rpc_users);
+ }
+ spin_unlock(&nn->client_lock);
+ list_for_each_safe(pos, next, &cblist) {
+ clp = list_entry(pos, struct nfs4_client, cl_recall_any_cblist);
+ list_del_init(&clp->cl_recall_any_cblist);
+ clp->cl_recall_any_bm = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
+ BIT(RCA4_TYPE_MASK_WDATA_DLG);
+ nfsd4_run_cb(&clp->cl_recall_any);
+ }
+}
+
static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
{
if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
@@ -7985,6 +8052,7 @@ static int nfs4_state_create_net(struct net *net)

INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
+ INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
get_net(net);

return 0;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 49ca06169642..05b572ce6605 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -415,6 +415,7 @@ struct nfs4_client {
bool cl_recall_any_busy;
uint32_t cl_recall_any_bm;
struct nfsd4_callback cl_recall_any;
+ struct list_head cl_recall_any_cblist;
};

/* struct nfs4_client_reset
--
2.9.5


2022-10-18 13:13:12

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 2/2] NFSD: add delegation shrinker to react to low memory condition

On Mon, 2022-10-17 at 22:15 -0700, Dai Ngo wrote:
> The delegation shrinker is scheduled to run on every shrinker's
> 'count' callback. It scans the client list and sends the
> courtesy CB_RECALL_ANY to the clients that hold delegations.
>
> To avoid flooding the clients with CB_RECALL_ANY requests, the
> delegation shrinker is scheduled to run after a 5 seconds delay.
>

But...when the kernel needs memory, it generally needs it _now_, and 5s
is a long time. It'd be better to not delay the initial sending of
CB_RECALL_ANY callbacks this much.

Maybe the logic should be changed such that it runs no more frequently
than once every 5s, but don't delay the initial sending of
CB_RECALL_ANYs.

> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/netns.h | 3 +++
> fs/nfsd/nfs4state.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/nfsd/state.h | 1 +
> 3 files changed, 73 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 8c854ba3285b..394a05fb46d8 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -196,6 +196,9 @@ struct nfsd_net {
> atomic_t nfsd_courtesy_clients;
> struct shrinker nfsd_client_shrinker;
> struct delayed_work nfsd_shrinker_work;
> +
> + struct shrinker nfsd_deleg_shrinker;
> + struct delayed_work nfsd_deleg_shrinker_work;
> };
>
> /* Simple check to find out if a given net was properly initialized */
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index c60c937dece6..bdaea0e6e72f 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -4392,11 +4392,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
> return SHRINK_STOP;
> }
>
> +static unsigned long
> +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + unsigned long cnt;
> + struct nfsd_net *nn = container_of(shrink,
> + struct nfsd_net, nfsd_deleg_shrinker);
> +
> + cnt = atomic_long_read(&num_delegations);
> + if (cnt)
> + mod_delayed_work(laundry_wq,
> + &nn->nfsd_deleg_shrinker_work, 5 * HZ);
> + return cnt;
> +}
> +

What's the rationale for doing all of this in the count callback? Why
not just return the value here and leave scheduling to the scan
callback?

> +static unsigned long
> +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + return SHRINK_STOP;
> +}
> +
> int
> nfsd4_init_leases_net(struct nfsd_net *nn)
> {
> struct sysinfo si;
> u64 max_clients;
> + int retval;
>
> nn->nfsd4_lease = 90; /* default lease time */
> nn->nfsd4_grace = 90;
> @@ -4417,13 +4438,23 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
> nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
> nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
> nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
> - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> + retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> + if (retval)
> + return retval;
> + nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
> + nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
> + nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
> + retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
> + if (retval)
> + unregister_shrinker(&nn->nfsd_client_shrinker);
> + return retval;
> }
>
> void
> nfsd4_leases_net_shutdown(struct nfsd_net *nn)
> {
> unregister_shrinker(&nn->nfsd_client_shrinker);
> + unregister_shrinker(&nn->nfsd_deleg_shrinker);
> }
>
> static void init_nfs4_replay(struct nfs4_replay *rp)
> @@ -6162,6 +6193,42 @@ courtesy_client_reaper(struct work_struct *reaper)
> nfs4_process_client_reaplist(&reaplist);
> }
>
> +static void
> +deleg_reaper(struct work_struct *deleg_work)
> +{
> + struct list_head *pos, *next;
> + struct nfs4_client *clp;
> + struct list_head cblist;
> + struct delayed_work *dwork = to_delayed_work(deleg_work);
> + struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> + nfsd_deleg_shrinker_work);
> +
> + INIT_LIST_HEAD(&cblist);
> + spin_lock(&nn->client_lock);
> + list_for_each_safe(pos, next, &nn->client_lru) {
> + clp = list_entry(pos, struct nfs4_client, cl_lru);
> + if (clp->cl_state != NFSD4_ACTIVE ||
> + list_empty(&clp->cl_delegations) ||
> + atomic_read(&clp->cl_delegs_in_recall) ||
> + clp->cl_recall_any_busy) {
> + continue;
> + }
> + clp->cl_recall_any_busy = true;
> + list_add(&clp->cl_recall_any_cblist, &cblist);
> +
> + /* release in nfsd4_cb_recall_any_release */
> + atomic_inc(&clp->cl_rpc_users);
> + }
> + spin_unlock(&nn->client_lock);
> + list_for_each_safe(pos, next, &cblist) {
> + clp = list_entry(pos, struct nfs4_client, cl_recall_any_cblist);
> + list_del_init(&clp->cl_recall_any_cblist);
> + clp->cl_recall_any_bm = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> + BIT(RCA4_TYPE_MASK_WDATA_DLG);
> + nfsd4_run_cb(&clp->cl_recall_any);
> + }
> +}
> +
> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> {
> if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
> @@ -7985,6 +8052,7 @@ static int nfs4_state_create_net(struct net *net)
>
> INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
> + INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
> get_net(net);
>
> return 0;
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 49ca06169642..05b572ce6605 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -415,6 +415,7 @@ struct nfs4_client {
> bool cl_recall_any_busy;
> uint32_t cl_recall_any_bm;
> struct nfsd4_callback cl_recall_any;
> + struct list_head cl_recall_any_cblist;
> };
>
> /* struct nfs4_client_reset

--
Jeff Layton <[email protected]>

2022-10-18 15:56:48

by Dai Ngo

[permalink] [raw]
Subject: Re: [PATCH 2/2] NFSD: add delegation shrinker to react to low memory condition


On 10/18/22 6:07 AM, Jeff Layton wrote:
> On Mon, 2022-10-17 at 22:15 -0700, Dai Ngo wrote:
>> The delegation shrinker is scheduled to run on every shrinker's
>> 'count' callback. It scans the client list and sends the
>> courtesy CB_RECALL_ANY to the clients that hold delegations.
>>
>> To avoid flooding the clients with CB_RECALL_ANY requests, the
>> delegation shrinker is scheduled to run after a 5 seconds delay.
>>
> But...when the kernel needs memory, it generally needs it _now_, and 5s
> is a long time. It'd be better to not delay the initial sending of
> CB_RECALL_ANY callbacks this much.

yes, makes sense.

>
> Maybe the logic should be changed such that it runs no more frequently
> than once every 5s, but don't delay the initial sending of
> CB_RECALL_ANYs.

I'll make this adjustment.

Thanks,
-Dai

>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> fs/nfsd/netns.h | 3 +++
>> fs/nfsd/nfs4state.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>> fs/nfsd/state.h | 1 +
>> 3 files changed, 73 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
>> index 8c854ba3285b..394a05fb46d8 100644
>> --- a/fs/nfsd/netns.h
>> +++ b/fs/nfsd/netns.h
>> @@ -196,6 +196,9 @@ struct nfsd_net {
>> atomic_t nfsd_courtesy_clients;
>> struct shrinker nfsd_client_shrinker;
>> struct delayed_work nfsd_shrinker_work;
>> +
>> + struct shrinker nfsd_deleg_shrinker;
>> + struct delayed_work nfsd_deleg_shrinker_work;
>> };
>>
>> /* Simple check to find out if a given net was properly initialized */
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index c60c937dece6..bdaea0e6e72f 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -4392,11 +4392,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
>> return SHRINK_STOP;
>> }
>>
>> +static unsigned long
>> +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
>> +{
>> + unsigned long cnt;
>> + struct nfsd_net *nn = container_of(shrink,
>> + struct nfsd_net, nfsd_deleg_shrinker);
>> +
>> + cnt = atomic_long_read(&num_delegations);
>> + if (cnt)
>> + mod_delayed_work(laundry_wq,
>> + &nn->nfsd_deleg_shrinker_work, 5 * HZ);
>> + return cnt;
>> +}
>> +
> What's the rationale for doing all of this in the count callback? Why
> not just return the value here and leave scheduling to the scan
> callback?
>
>> +static unsigned long
>> +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
>> +{
>> + return SHRINK_STOP;
>> +}
>> +
>> int
>> nfsd4_init_leases_net(struct nfsd_net *nn)
>> {
>> struct sysinfo si;
>> u64 max_clients;
>> + int retval;
>>
>> nn->nfsd4_lease = 90; /* default lease time */
>> nn->nfsd4_grace = 90;
>> @@ -4417,13 +4438,23 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
>> nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
>> nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
>> nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
>> - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
>> + retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
>> + if (retval)
>> + return retval;
>> + nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
>> + nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
>> + nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
>> + retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
>> + if (retval)
>> + unregister_shrinker(&nn->nfsd_client_shrinker);
>> + return retval;
>> }
>>
>> void
>> nfsd4_leases_net_shutdown(struct nfsd_net *nn)
>> {
>> unregister_shrinker(&nn->nfsd_client_shrinker);
>> + unregister_shrinker(&nn->nfsd_deleg_shrinker);
>> }
>>
>> static void init_nfs4_replay(struct nfs4_replay *rp)
>> @@ -6162,6 +6193,42 @@ courtesy_client_reaper(struct work_struct *reaper)
>> nfs4_process_client_reaplist(&reaplist);
>> }
>>
>> +static void
>> +deleg_reaper(struct work_struct *deleg_work)
>> +{
>> + struct list_head *pos, *next;
>> + struct nfs4_client *clp;
>> + struct list_head cblist;
>> + struct delayed_work *dwork = to_delayed_work(deleg_work);
>> + struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
>> + nfsd_deleg_shrinker_work);
>> +
>> + INIT_LIST_HEAD(&cblist);
>> + spin_lock(&nn->client_lock);
>> + list_for_each_safe(pos, next, &nn->client_lru) {
>> + clp = list_entry(pos, struct nfs4_client, cl_lru);
>> + if (clp->cl_state != NFSD4_ACTIVE ||
>> + list_empty(&clp->cl_delegations) ||
>> + atomic_read(&clp->cl_delegs_in_recall) ||
>> + clp->cl_recall_any_busy) {
>> + continue;
>> + }
>> + clp->cl_recall_any_busy = true;
>> + list_add(&clp->cl_recall_any_cblist, &cblist);
>> +
>> + /* release in nfsd4_cb_recall_any_release */
>> + atomic_inc(&clp->cl_rpc_users);
>> + }
>> + spin_unlock(&nn->client_lock);
>> + list_for_each_safe(pos, next, &cblist) {
>> + clp = list_entry(pos, struct nfs4_client, cl_recall_any_cblist);
>> + list_del_init(&clp->cl_recall_any_cblist);
>> + clp->cl_recall_any_bm = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>> + BIT(RCA4_TYPE_MASK_WDATA_DLG);
>> + nfsd4_run_cb(&clp->cl_recall_any);
>> + }
>> +}
>> +
>> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
>> {
>> if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
>> @@ -7985,6 +8052,7 @@ static int nfs4_state_create_net(struct net *net)
>>
>> INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
>> INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
>> + INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
>> get_net(net);
>>
>> return 0;
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index 49ca06169642..05b572ce6605 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -415,6 +415,7 @@ struct nfs4_client {
>> bool cl_recall_any_busy;
>> uint32_t cl_recall_any_bm;
>> struct nfsd4_callback cl_recall_any;
>> + struct list_head cl_recall_any_cblist;
>> };
>>
>> /* struct nfs4_client_reset