The delegation shrinker is scheduled to run on every shrinker's
'count' callback. It scans the client list and sends the
courtesy CB_RECALL_ANY to the clients that hold delegations.
To avoid flooding the clients with CB_RECALL_ANY requests, the
delegation shrinker is scheduled to run after a 5 seconds delay.
Signed-off-by: Dai Ngo <[email protected]>
---
fs/nfsd/netns.h | 3 +++
fs/nfsd/nfs4state.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
fs/nfsd/state.h | 1 +
3 files changed, 73 insertions(+), 1 deletion(-)
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 8c854ba3285b..394a05fb46d8 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -196,6 +196,9 @@ struct nfsd_net {
atomic_t nfsd_courtesy_clients;
struct shrinker nfsd_client_shrinker;
struct delayed_work nfsd_shrinker_work;
+
+ struct shrinker nfsd_deleg_shrinker;
+ struct delayed_work nfsd_deleg_shrinker_work;
};
/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c60c937dece6..bdaea0e6e72f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4392,11 +4392,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
return SHRINK_STOP;
}
+static unsigned long
+nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ unsigned long cnt;
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_deleg_shrinker);
+
+ cnt = atomic_long_read(&num_delegations);
+ if (cnt)
+ mod_delayed_work(laundry_wq,
+ &nn->nfsd_deleg_shrinker_work, 5 * HZ);
+ return cnt;
+}
+
+static unsigned long
+nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ return SHRINK_STOP;
+}
+
int
nfsd4_init_leases_net(struct nfsd_net *nn)
{
struct sysinfo si;
u64 max_clients;
+ int retval;
nn->nfsd4_lease = 90; /* default lease time */
nn->nfsd4_grace = 90;
@@ -4417,13 +4438,23 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
- return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+ retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
+ if (retval)
+ return retval;
+ nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
+ nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
+ nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
+ retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
+ if (retval)
+ unregister_shrinker(&nn->nfsd_client_shrinker);
+ return retval;
}
void
nfsd4_leases_net_shutdown(struct nfsd_net *nn)
{
unregister_shrinker(&nn->nfsd_client_shrinker);
+ unregister_shrinker(&nn->nfsd_deleg_shrinker);
}
static void init_nfs4_replay(struct nfs4_replay *rp)
@@ -6162,6 +6193,42 @@ courtesy_client_reaper(struct work_struct *reaper)
nfs4_process_client_reaplist(&reaplist);
}
+static void
+deleg_reaper(struct work_struct *deleg_work)
+{
+ struct list_head *pos, *next;
+ struct nfs4_client *clp;
+ struct list_head cblist;
+ struct delayed_work *dwork = to_delayed_work(deleg_work);
+ struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+ nfsd_deleg_shrinker_work);
+
+ INIT_LIST_HEAD(&cblist);
+ spin_lock(&nn->client_lock);
+ list_for_each_safe(pos, next, &nn->client_lru) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ if (clp->cl_state != NFSD4_ACTIVE ||
+ list_empty(&clp->cl_delegations) ||
+ atomic_read(&clp->cl_delegs_in_recall) ||
+ clp->cl_recall_any_busy) {
+ continue;
+ }
+ clp->cl_recall_any_busy = true;
+ list_add(&clp->cl_recall_any_cblist, &cblist);
+
+ /* release in nfsd4_cb_recall_any_release */
+ atomic_inc(&clp->cl_rpc_users);
+ }
+ spin_unlock(&nn->client_lock);
+ list_for_each_safe(pos, next, &cblist) {
+ clp = list_entry(pos, struct nfs4_client, cl_recall_any_cblist);
+ list_del_init(&clp->cl_recall_any_cblist);
+ clp->cl_recall_any_bm = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
+ BIT(RCA4_TYPE_MASK_WDATA_DLG);
+ nfsd4_run_cb(&clp->cl_recall_any);
+ }
+}
+
static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
{
if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
@@ -7985,6 +8052,7 @@ static int nfs4_state_create_net(struct net *net)
INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
+ INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
get_net(net);
return 0;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 49ca06169642..05b572ce6605 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -415,6 +415,7 @@ struct nfs4_client {
bool cl_recall_any_busy;
uint32_t cl_recall_any_bm;
struct nfsd4_callback cl_recall_any;
+ struct list_head cl_recall_any_cblist;
};
/* struct nfs4_client_reset
--
2.9.5
On Mon, 2022-10-17 at 22:15 -0700, Dai Ngo wrote:
> The delegation shrinker is scheduled to run on every shrinker's
> 'count' callback. It scans the client list and sends the
> courtesy CB_RECALL_ANY to the clients that hold delegations.
>
> To avoid flooding the clients with CB_RECALL_ANY requests, the
> delegation shrinker is scheduled to run after a 5 seconds delay.
>
But...when the kernel needs memory, it generally needs it _now_, and 5s
is a long time. It'd be better to not delay the initial sending of
CB_RECALL_ANY callbacks this much.
Maybe the logic should be changed such that it runs no more frequently
than once every 5s, but don't delay the initial sending of
CB_RECALL_ANYs.
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> fs/nfsd/netns.h | 3 +++
> fs/nfsd/nfs4state.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/nfsd/state.h | 1 +
> 3 files changed, 73 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 8c854ba3285b..394a05fb46d8 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -196,6 +196,9 @@ struct nfsd_net {
> atomic_t nfsd_courtesy_clients;
> struct shrinker nfsd_client_shrinker;
> struct delayed_work nfsd_shrinker_work;
> +
> + struct shrinker nfsd_deleg_shrinker;
> + struct delayed_work nfsd_deleg_shrinker_work;
> };
>
> /* Simple check to find out if a given net was properly initialized */
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index c60c937dece6..bdaea0e6e72f 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -4392,11 +4392,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
> return SHRINK_STOP;
> }
>
> +static unsigned long
> +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + unsigned long cnt;
> + struct nfsd_net *nn = container_of(shrink,
> + struct nfsd_net, nfsd_deleg_shrinker);
> +
> + cnt = atomic_long_read(&num_delegations);
> + if (cnt)
> + mod_delayed_work(laundry_wq,
> + &nn->nfsd_deleg_shrinker_work, 5 * HZ);
> + return cnt;
> +}
> +
What's the rationale for doing all of this in the count callback? Why
not just return the value here and leave scheduling to the scan
callback?
> +static unsigned long
> +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + return SHRINK_STOP;
> +}
> +
> int
> nfsd4_init_leases_net(struct nfsd_net *nn)
> {
> struct sysinfo si;
> u64 max_clients;
> + int retval;
>
> nn->nfsd4_lease = 90; /* default lease time */
> nn->nfsd4_grace = 90;
> @@ -4417,13 +4438,23 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
> nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
> nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
> nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
> - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> + retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
> + if (retval)
> + return retval;
> + nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
> + nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
> + nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
> + retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
> + if (retval)
> + unregister_shrinker(&nn->nfsd_client_shrinker);
> + return retval;
> }
>
> void
> nfsd4_leases_net_shutdown(struct nfsd_net *nn)
> {
> unregister_shrinker(&nn->nfsd_client_shrinker);
> + unregister_shrinker(&nn->nfsd_deleg_shrinker);
> }
>
> static void init_nfs4_replay(struct nfs4_replay *rp)
> @@ -6162,6 +6193,42 @@ courtesy_client_reaper(struct work_struct *reaper)
> nfs4_process_client_reaplist(&reaplist);
> }
>
> +static void
> +deleg_reaper(struct work_struct *deleg_work)
> +{
> + struct list_head *pos, *next;
> + struct nfs4_client *clp;
> + struct list_head cblist;
> + struct delayed_work *dwork = to_delayed_work(deleg_work);
> + struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> + nfsd_deleg_shrinker_work);
> +
> + INIT_LIST_HEAD(&cblist);
> + spin_lock(&nn->client_lock);
> + list_for_each_safe(pos, next, &nn->client_lru) {
> + clp = list_entry(pos, struct nfs4_client, cl_lru);
> + if (clp->cl_state != NFSD4_ACTIVE ||
> + list_empty(&clp->cl_delegations) ||
> + atomic_read(&clp->cl_delegs_in_recall) ||
> + clp->cl_recall_any_busy) {
> + continue;
> + }
> + clp->cl_recall_any_busy = true;
> + list_add(&clp->cl_recall_any_cblist, &cblist);
> +
> + /* release in nfsd4_cb_recall_any_release */
> + atomic_inc(&clp->cl_rpc_users);
> + }
> + spin_unlock(&nn->client_lock);
> + list_for_each_safe(pos, next, &cblist) {
> + clp = list_entry(pos, struct nfs4_client, cl_recall_any_cblist);
> + list_del_init(&clp->cl_recall_any_cblist);
> + clp->cl_recall_any_bm = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
> + BIT(RCA4_TYPE_MASK_WDATA_DLG);
> + nfsd4_run_cb(&clp->cl_recall_any);
> + }
> +}
> +
> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> {
> if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
> @@ -7985,6 +8052,7 @@ static int nfs4_state_create_net(struct net *net)
>
> INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
> + INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
> get_net(net);
>
> return 0;
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 49ca06169642..05b572ce6605 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -415,6 +415,7 @@ struct nfs4_client {
> bool cl_recall_any_busy;
> uint32_t cl_recall_any_bm;
> struct nfsd4_callback cl_recall_any;
> + struct list_head cl_recall_any_cblist;
> };
>
> /* struct nfs4_client_reset
--
Jeff Layton <[email protected]>
On 10/18/22 6:07 AM, Jeff Layton wrote:
> On Mon, 2022-10-17 at 22:15 -0700, Dai Ngo wrote:
>> The delegation shrinker is scheduled to run on every shrinker's
>> 'count' callback. It scans the client list and sends the
>> courtesy CB_RECALL_ANY to the clients that hold delegations.
>>
>> To avoid flooding the clients with CB_RECALL_ANY requests, the
>> delegation shrinker is scheduled to run after a 5 seconds delay.
>>
> But...when the kernel needs memory, it generally needs it _now_, and 5s
> is a long time. It'd be better to not delay the initial sending of
> CB_RECALL_ANY callbacks this much.
yes, makes sense.
>
> Maybe the logic should be changed such that it runs no more frequently
> than once every 5s, but don't delay the initial sending of
> CB_RECALL_ANYs.
I'll make this adjustment.
Thanks,
-Dai
>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> fs/nfsd/netns.h | 3 +++
>> fs/nfsd/nfs4state.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>> fs/nfsd/state.h | 1 +
>> 3 files changed, 73 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
>> index 8c854ba3285b..394a05fb46d8 100644
>> --- a/fs/nfsd/netns.h
>> +++ b/fs/nfsd/netns.h
>> @@ -196,6 +196,9 @@ struct nfsd_net {
>> atomic_t nfsd_courtesy_clients;
>> struct shrinker nfsd_client_shrinker;
>> struct delayed_work nfsd_shrinker_work;
>> +
>> + struct shrinker nfsd_deleg_shrinker;
>> + struct delayed_work nfsd_deleg_shrinker_work;
>> };
>>
>> /* Simple check to find out if a given net was properly initialized */
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index c60c937dece6..bdaea0e6e72f 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -4392,11 +4392,32 @@ nfsd_courtesy_client_scan(struct shrinker *shrink, struct shrink_control *sc)
>> return SHRINK_STOP;
>> }
>>
>> +static unsigned long
>> +nfsd_deleg_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
>> +{
>> + unsigned long cnt;
>> + struct nfsd_net *nn = container_of(shrink,
>> + struct nfsd_net, nfsd_deleg_shrinker);
>> +
>> + cnt = atomic_long_read(&num_delegations);
>> + if (cnt)
>> + mod_delayed_work(laundry_wq,
>> + &nn->nfsd_deleg_shrinker_work, 5 * HZ);
>> + return cnt;
>> +}
>> +
> What's the rationale for doing all of this in the count callback? Why
> not just return the value here and leave scheduling to the scan
> callback?
>
>> +static unsigned long
>> +nfsd_deleg_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
>> +{
>> + return SHRINK_STOP;
>> +}
>> +
>> int
>> nfsd4_init_leases_net(struct nfsd_net *nn)
>> {
>> struct sysinfo si;
>> u64 max_clients;
>> + int retval;
>>
>> nn->nfsd4_lease = 90; /* default lease time */
>> nn->nfsd4_grace = 90;
>> @@ -4417,13 +4438,23 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
>> nn->nfsd_client_shrinker.scan_objects = nfsd_courtesy_client_scan;
>> nn->nfsd_client_shrinker.count_objects = nfsd_courtesy_client_count;
>> nn->nfsd_client_shrinker.seeks = DEFAULT_SEEKS;
>> - return register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
>> + retval = register_shrinker(&nn->nfsd_client_shrinker, "nfsd-client");
>> + if (retval)
>> + return retval;
>> + nn->nfsd_deleg_shrinker.scan_objects = nfsd_deleg_shrinker_scan;
>> + nn->nfsd_deleg_shrinker.count_objects = nfsd_deleg_shrinker_count;
>> + nn->nfsd_deleg_shrinker.seeks = DEFAULT_SEEKS;
>> + retval = register_shrinker(&nn->nfsd_deleg_shrinker, "nfsd-delegation");
>> + if (retval)
>> + unregister_shrinker(&nn->nfsd_client_shrinker);
>> + return retval;
>> }
>>
>> void
>> nfsd4_leases_net_shutdown(struct nfsd_net *nn)
>> {
>> unregister_shrinker(&nn->nfsd_client_shrinker);
>> + unregister_shrinker(&nn->nfsd_deleg_shrinker);
>> }
>>
>> static void init_nfs4_replay(struct nfs4_replay *rp)
>> @@ -6162,6 +6193,42 @@ courtesy_client_reaper(struct work_struct *reaper)
>> nfs4_process_client_reaplist(&reaplist);
>> }
>>
>> +static void
>> +deleg_reaper(struct work_struct *deleg_work)
>> +{
>> + struct list_head *pos, *next;
>> + struct nfs4_client *clp;
>> + struct list_head cblist;
>> + struct delayed_work *dwork = to_delayed_work(deleg_work);
>> + struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
>> + nfsd_deleg_shrinker_work);
>> +
>> + INIT_LIST_HEAD(&cblist);
>> + spin_lock(&nn->client_lock);
>> + list_for_each_safe(pos, next, &nn->client_lru) {
>> + clp = list_entry(pos, struct nfs4_client, cl_lru);
>> + if (clp->cl_state != NFSD4_ACTIVE ||
>> + list_empty(&clp->cl_delegations) ||
>> + atomic_read(&clp->cl_delegs_in_recall) ||
>> + clp->cl_recall_any_busy) {
>> + continue;
>> + }
>> + clp->cl_recall_any_busy = true;
>> + list_add(&clp->cl_recall_any_cblist, &cblist);
>> +
>> + /* release in nfsd4_cb_recall_any_release */
>> + atomic_inc(&clp->cl_rpc_users);
>> + }
>> + spin_unlock(&nn->client_lock);
>> + list_for_each_safe(pos, next, &cblist) {
>> + clp = list_entry(pos, struct nfs4_client, cl_recall_any_cblist);
>> + list_del_init(&clp->cl_recall_any_cblist);
>> + clp->cl_recall_any_bm = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
>> + BIT(RCA4_TYPE_MASK_WDATA_DLG);
>> + nfsd4_run_cb(&clp->cl_recall_any);
>> + }
>> +}
>> +
>> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
>> {
>> if (!fh_match(&fhp->fh_handle, &stp->sc_file->fi_fhandle))
>> @@ -7985,6 +8052,7 @@ static int nfs4_state_create_net(struct net *net)
>>
>> INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
>> INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, courtesy_client_reaper);
>> + INIT_DELAYED_WORK(&nn->nfsd_deleg_shrinker_work, deleg_reaper);
>> get_net(net);
>>
>> return 0;
>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>> index 49ca06169642..05b572ce6605 100644
>> --- a/fs/nfsd/state.h
>> +++ b/fs/nfsd/state.h
>> @@ -415,6 +415,7 @@ struct nfs4_client {
>> bool cl_recall_any_busy;
>> uint32_t cl_recall_any_bm;
>> struct nfsd4_callback cl_recall_any;
>> + struct list_head cl_recall_any_cblist;
>> };
>>
>> /* struct nfs4_client_reset