2023-01-11 10:39:15

by Dai Ngo

[permalink] [raw]
Subject: [PATCH v3 1/1] NFSD: fix WARN_ON_ONCE in __queue_delayed_work

Currently nfsd4_state_shrinker_worker can be schduled multiple times
from nfsd4_state_shrinker_count when memory is low. This causes
the WARN_ON_ONCE in __queue_delayed_work to trigger.

This patch allows only one instance of nfsd4_state_shrinker_worker
at a time using the nfsd_shrinker_active flag, protected by the
client_lock.

Change nfsd_shrinker_work from delayed_work to work_struct since we
don't use the delay.

Replace mod_delayed_work in nfsd4_state_shrinker_count with queue_work.

Cancel work_struct nfsd_shrinker_work after unregistering shrinker
in nfs4_state_shutdown_net

Fixes: 44df6f439a17 ("NFSD: add delegation reaper to react to low memory condition")
Reported-by: Mike Galbraith <[email protected]>
Signed-off-by: Dai Ngo <[email protected]>
---
v2:
. Change nfsd_shrinker_work from delayed_work to work_struct
. Replace mod_delayed_work in nfsd4_state_shrinker_count with queue_work
. Cancel work_struct nfsd_shrinker_work after unregistering shrinker
v3:
. set nfsd_shrinker_active earlier in nfsd4_state_shrinker_count

fs/nfsd/netns.h | 3 ++-
fs/nfsd/nfs4state.c | 24 +++++++++++++++++++-----
2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 8c854ba3285b..b0c7b657324b 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -195,7 +195,8 @@ struct nfsd_net {

atomic_t nfsd_courtesy_clients;
struct shrinker nfsd_client_shrinker;
- struct delayed_work nfsd_shrinker_work;
+ struct work_struct nfsd_shrinker_work;
+ bool nfsd_shrinker_active;
};

/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a7cfefd7c205..35ec4cba88b3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4407,11 +4407,22 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
struct nfsd_net *nn = container_of(shrink,
struct nfsd_net, nfsd_client_shrinker);

+ spin_lock(&nn->client_lock);
+ if (nn->nfsd_shrinker_active) {
+ spin_unlock(&nn->client_lock);
+ return 0;
+ }
+ nn->nfsd_shrinker_active = true;
count = atomic_read(&nn->nfsd_courtesy_clients);
if (!count)
count = atomic_long_read(&num_delegations);
- if (count)
- mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
+ if (count) {
+ spin_unlock(&nn->client_lock);
+ queue_work(laundry_wq, &nn->nfsd_shrinker_work);
+ } else {
+ nn->nfsd_shrinker_active = false;
+ spin_unlock(&nn->client_lock);
+ }
return (unsigned long)count;
}

@@ -6233,12 +6244,14 @@ deleg_reaper(struct nfsd_net *nn)
static void
nfsd4_state_shrinker_worker(struct work_struct *work)
{
- struct delayed_work *dwork = to_delayed_work(work);
- struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+ struct nfsd_net *nn = container_of(work, struct nfsd_net,
nfsd_shrinker_work);

courtesy_client_reaper(nn);
deleg_reaper(nn);
+ spin_lock(&nn->client_lock);
+ nn->nfsd_shrinker_active = false;
+ spin_unlock(&nn->client_lock);
}

static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
@@ -8064,7 +8077,7 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->blocked_locks_lru);

INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
- INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
+ INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
get_net(net);

nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
@@ -8171,6 +8184,7 @@ nfs4_state_shutdown_net(struct net *net)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);

unregister_shrinker(&nn->nfsd_client_shrinker);
+ cancel_work(&nn->nfsd_shrinker_work);
cancel_delayed_work_sync(&nn->laundromat_work);
locks_end_grace(&nn->nfsd4_manager);

--
2.9.5


2023-01-11 10:49:55

by Jeffrey Layton

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] NFSD: fix WARN_ON_ONCE in __queue_delayed_work

On Wed, 2023-01-11 at 02:24 -0800, Dai Ngo wrote:
> Currently nfsd4_state_shrinker_worker can be schduled multiple times
> from nfsd4_state_shrinker_count when memory is low. This causes
> the WARN_ON_ONCE in __queue_delayed_work to trigger.
>
> This patch allows only one instance of nfsd4_state_shrinker_worker
> at a time using the nfsd_shrinker_active flag, protected by the
> client_lock.
>
> Change nfsd_shrinker_work from delayed_work to work_struct since we
> don't use the delay.
>
> Replace mod_delayed_work in nfsd4_state_shrinker_count with queue_work.
>
> Cancel work_struct nfsd_shrinker_work after unregistering shrinker
> in nfs4_state_shutdown_net
>
> Fixes: 44df6f439a17 ("NFSD: add delegation reaper to react to low memory condition")
> Reported-by: Mike Galbraith <[email protected]>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> v2:
> . Change nfsd_shrinker_work from delayed_work to work_struct
> . Replace mod_delayed_work in nfsd4_state_shrinker_count with queue_work
> . Cancel work_struct nfsd_shrinker_work after unregistering shrinker
> v3:
> . set nfsd_shrinker_active earlier in nfsd4_state_shrinker_count
>
> fs/nfsd/netns.h | 3 ++-
> fs/nfsd/nfs4state.c | 24 +++++++++++++++++++-----
> 2 files changed, 21 insertions(+), 6 deletions(-)
>
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 8c854ba3285b..b0c7b657324b 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -195,7 +195,8 @@ struct nfsd_net {
>
> atomic_t nfsd_courtesy_clients;
> struct shrinker nfsd_client_shrinker;
> - struct delayed_work nfsd_shrinker_work;
> + struct work_struct nfsd_shrinker_work;
> + bool nfsd_shrinker_active;
> };
>
> /* Simple check to find out if a given net was properly initialized */
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index a7cfefd7c205..35ec4cba88b3 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -4407,11 +4407,22 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
> struct nfsd_net *nn = container_of(shrink,
> struct nfsd_net, nfsd_client_shrinker);
>
> + spin_lock(&nn->client_lock);
> + if (nn->nfsd_shrinker_active) {
> + spin_unlock(&nn->client_lock);
> + return 0;
> + }
> + nn->nfsd_shrinker_active = true;
> count = atomic_read(&nn->nfsd_courtesy_clients);
> if (!count)
> count = atomic_long_read(&num_delegations);
> - if (count)
> - mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
> + if (count) {
> + spin_unlock(&nn->client_lock);
> + queue_work(laundry_wq, &nn->nfsd_shrinker_work);
> + } else {
> + nn->nfsd_shrinker_active = false;
> + spin_unlock(&nn->client_lock);
> + }

The change to normal work_struct is an improvement, but NAK on this
patch. The spinlocking and flag are not needed here. I seriously doubt
that we have a clear understanding of this problem.

> return (unsigned long)count;
> }
>
> @@ -6233,12 +6244,14 @@ deleg_reaper(struct nfsd_net *nn)
> static void
> nfsd4_state_shrinker_worker(struct work_struct *work)
> {
> - struct delayed_work *dwork = to_delayed_work(work);
> - struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> + struct nfsd_net *nn = container_of(work, struct nfsd_net,
> nfsd_shrinker_work);
>
> courtesy_client_reaper(nn);
> deleg_reaper(nn);
> + spin_lock(&nn->client_lock);
> + nn->nfsd_shrinker_active = false;
> + spin_unlock(&nn->client_lock);
> }
>
> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> @@ -8064,7 +8077,7 @@ static int nfs4_state_create_net(struct net *net)
> INIT_LIST_HEAD(&nn->blocked_locks_lru);
>
> INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> - INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
> + INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
> get_net(net);
>
> nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
> @@ -8171,6 +8184,7 @@ nfs4_state_shutdown_net(struct net *net)
> struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>
> unregister_shrinker(&nn->nfsd_client_shrinker);
> + cancel_work(&nn->nfsd_shrinker_work);
> cancel_delayed_work_sync(&nn->laundromat_work);
> locks_end_grace(&nn->nfsd4_manager);
>

--
Jeff Layton <[email protected]>

2023-01-11 11:09:31

by Mike Galbraith

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] NFSD: fix WARN_ON_ONCE in __queue_delayed_work

No change: last 2 hunks don't apply to virgin source, wedging patch in
results in 1 kernel/workqueue.c:1499 warning.

On Wed, 2023-01-11 at 02:24 -0800, Dai Ngo wrote:
> Currently nfsd4_state_shrinker_worker can be schduled multiple times
> from nfsd4_state_shrinker_count when memory is low. This causes
> the WARN_ON_ONCE in __queue_delayed_work to trigger.
>
> This patch allows only one instance of nfsd4_state_shrinker_worker
> at a time using the nfsd_shrinker_active flag, protected by the
> client_lock.
>
> Change nfsd_shrinker_work from delayed_work to work_struct since we
> don't use the delay.
>
> Replace mod_delayed_work in nfsd4_state_shrinker_count with queue_work.
>
> Cancel work_struct nfsd_shrinker_work after unregistering shrinker
> in nfs4_state_shutdown_net
>
> Fixes: 44df6f439a17 ("NFSD: add delegation reaper to react to low memory condition")
> Reported-by: Mike Galbraith <[email protected]>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> v2:
>   . Change nfsd_shrinker_work from delayed_work to work_struct
>   . Replace mod_delayed_work in nfsd4_state_shrinker_count with queue_work
>   . Cancel work_struct nfsd_shrinker_work after unregistering shrinker
> v3:
>   . set nfsd_shrinker_active earlier in nfsd4_state_shrinker_count
>
>  fs/nfsd/netns.h     |  3 ++-
>  fs/nfsd/nfs4state.c | 24 +++++++++++++++++++-----
>  2 files changed, 21 insertions(+), 6 deletions(-)
>
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 8c854ba3285b..b0c7b657324b 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -195,7 +195,8 @@ struct nfsd_net {
>  
>         atomic_t                nfsd_courtesy_clients;
>         struct shrinker         nfsd_client_shrinker;
> -       struct delayed_work     nfsd_shrinker_work;
> +       struct work_struct      nfsd_shrinker_work;
> +       bool                    nfsd_shrinker_active;
>  };
>  
>  /* Simple check to find out if a given net was properly initialized */
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index a7cfefd7c205..35ec4cba88b3 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -4407,11 +4407,22 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
>         struct nfsd_net *nn = container_of(shrink,
>                         struct nfsd_net, nfsd_client_shrinker);
>  
> +       spin_lock(&nn->client_lock);
> +       if (nn->nfsd_shrinker_active) {
> +               spin_unlock(&nn->client_lock);
> +               return 0;
> +       }
> +       nn->nfsd_shrinker_active = true;
>         count = atomic_read(&nn->nfsd_courtesy_clients);
>         if (!count)
>                 count = atomic_long_read(&num_delegations);
> -       if (count)
> -               mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
> +       if (count) {
> +               spin_unlock(&nn->client_lock);
> +               queue_work(laundry_wq, &nn->nfsd_shrinker_work);
> +       } else {
> +               nn->nfsd_shrinker_active = false;
> +               spin_unlock(&nn->client_lock);
> +       }
>         return (unsigned long)count;
>  }
>  
> @@ -6233,12 +6244,14 @@ deleg_reaper(struct nfsd_net *nn)
>  static void
>  nfsd4_state_shrinker_worker(struct work_struct *work)
>  {
> -       struct delayed_work *dwork = to_delayed_work(work);
> -       struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> +       struct nfsd_net *nn = container_of(work, struct nfsd_net,
>                                 nfsd_shrinker_work);
>  
>         courtesy_client_reaper(nn);
>         deleg_reaper(nn);
> +       spin_lock(&nn->client_lock);
> +       nn->nfsd_shrinker_active = false;
> +       spin_unlock(&nn->client_lock);
>  }
>  
>  static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> @@ -8064,7 +8077,7 @@ static int nfs4_state_create_net(struct net *net)
>         INIT_LIST_HEAD(&nn->blocked_locks_lru);
>  
>         INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> -       INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
> +       INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
>         get_net(net);
>  
>         nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
> @@ -8171,6 +8184,7 @@ nfs4_state_shutdown_net(struct net *net)
>         struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>  
>         unregister_shrinker(&nn->nfsd_client_shrinker);
> +       cancel_work(&nn->nfsd_shrinker_work);
>         cancel_delayed_work_sync(&nn->laundromat_work);
>         locks_end_grace(&nn->nfsd4_manager);
>  

2023-01-11 11:41:11

by Dai Ngo

[permalink] [raw]
Subject: Re: [PATCH v3 1/1] NFSD: fix WARN_ON_ONCE in __queue_delayed_work


On 1/11/23 2:47 AM, Jeff Layton wrote:
> On Wed, 2023-01-11 at 02:24 -0800, Dai Ngo wrote:
>> Currently nfsd4_state_shrinker_worker can be schduled multiple times
>> from nfsd4_state_shrinker_count when memory is low. This causes
>> the WARN_ON_ONCE in __queue_delayed_work to trigger.
>>
>> This patch allows only one instance of nfsd4_state_shrinker_worker
>> at a time using the nfsd_shrinker_active flag, protected by the
>> client_lock.
>>
>> Change nfsd_shrinker_work from delayed_work to work_struct since we
>> don't use the delay.
>>
>> Replace mod_delayed_work in nfsd4_state_shrinker_count with queue_work.
>>
>> Cancel work_struct nfsd_shrinker_work after unregistering shrinker
>> in nfs4_state_shutdown_net
>>
>> Fixes: 44df6f439a17 ("NFSD: add delegation reaper to react to low memory condition")
>> Reported-by: Mike Galbraith <[email protected]>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> v2:
>> . Change nfsd_shrinker_work from delayed_work to work_struct
>> . Replace mod_delayed_work in nfsd4_state_shrinker_count with queue_work
>> . Cancel work_struct nfsd_shrinker_work after unregistering shrinker
>> v3:
>> . set nfsd_shrinker_active earlier in nfsd4_state_shrinker_count
>>
>> fs/nfsd/netns.h | 3 ++-
>> fs/nfsd/nfs4state.c | 24 +++++++++++++++++++-----
>> 2 files changed, 21 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
>> index 8c854ba3285b..b0c7b657324b 100644
>> --- a/fs/nfsd/netns.h
>> +++ b/fs/nfsd/netns.h
>> @@ -195,7 +195,8 @@ struct nfsd_net {
>>
>> atomic_t nfsd_courtesy_clients;
>> struct shrinker nfsd_client_shrinker;
>> - struct delayed_work nfsd_shrinker_work;
>> + struct work_struct nfsd_shrinker_work;
>> + bool nfsd_shrinker_active;
>> };
>>
>> /* Simple check to find out if a given net was properly initialized */
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index a7cfefd7c205..35ec4cba88b3 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -4407,11 +4407,22 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
>> struct nfsd_net *nn = container_of(shrink,
>> struct nfsd_net, nfsd_client_shrinker);
>>
>> + spin_lock(&nn->client_lock);
>> + if (nn->nfsd_shrinker_active) {
>> + spin_unlock(&nn->client_lock);
>> + return 0;
>> + }
>> + nn->nfsd_shrinker_active = true;
>> count = atomic_read(&nn->nfsd_courtesy_clients);
>> if (!count)
>> count = atomic_long_read(&num_delegations);
>> - if (count)
>> - mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
>> + if (count) {
>> + spin_unlock(&nn->client_lock);
>> + queue_work(laundry_wq, &nn->nfsd_shrinker_work);
>> + } else {
>> + nn->nfsd_shrinker_active = false;
>> + spin_unlock(&nn->client_lock);
>> + }
> The change to normal work_struct is an improvement, but NAK on this
> patch. The spinlocking and flag are not needed here. I seriously doubt
> that we have a clear understanding of this problem.

Agreed. We need to get to the bottom of this.

-Dai

>
>> return (unsigned long)count;
>> }
>>
>> @@ -6233,12 +6244,14 @@ deleg_reaper(struct nfsd_net *nn)
>> static void
>> nfsd4_state_shrinker_worker(struct work_struct *work)
>> {
>> - struct delayed_work *dwork = to_delayed_work(work);
>> - struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
>> + struct nfsd_net *nn = container_of(work, struct nfsd_net,
>> nfsd_shrinker_work);
>>
>> courtesy_client_reaper(nn);
>> deleg_reaper(nn);
>> + spin_lock(&nn->client_lock);
>> + nn->nfsd_shrinker_active = false;
>> + spin_unlock(&nn->client_lock);
>> }
>>
>> static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
>> @@ -8064,7 +8077,7 @@ static int nfs4_state_create_net(struct net *net)
>> INIT_LIST_HEAD(&nn->blocked_locks_lru);
>>
>> INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
>> - INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
>> + INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
>> get_net(net);
>>
>> nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
>> @@ -8171,6 +8184,7 @@ nfs4_state_shutdown_net(struct net *net)
>> struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>>
>> unregister_shrinker(&nn->nfsd_client_shrinker);
>> + cancel_work(&nn->nfsd_shrinker_work);
>> cancel_delayed_work_sync(&nn->laundromat_work);
>> locks_end_grace(&nn->nfsd4_manager);
>>