2023-01-11 04:53:31

by Dai Ngo

[permalink] [raw]
Subject: [PATCH v2 1/1] NFSD: fix WARN_ON_ONCE in __queue_delayed_work

Currently nfsd4_state_shrinker_worker can be schduled multiple times
from nfsd4_state_shrinker_count when memory is low. This causes
the WARN_ON_ONCE in __queue_delayed_work to trigger.

This patch allows only one instance of nfsd4_state_shrinker_worker
at a time using the nfsd_shrinker_active flag, protected by the
client_lock.

Replace mod_delayed_work in nfsd4_state_shrinker_count with queue_work.

Change nfsd_shrinker_work from delayed_work to work_struct since we
don't use the delay.

Cancel work_struct nfsd_shrinker_work after unregistering shrinker
in nfs4_state_shutdown_net

Fixes: 44df6f439a17 ("NFSD: add delegation reaper to react to low memory condition")
Reported-by: Mike Galbraith <[email protected]>
Signed-off-by: Dai Ngo <[email protected]>
---
v2:
. Replace mod_delayed_work in nfsd4_state_shrinker_count with queue_work
. Change nfsd_shrinker_work from delayed_work to work_struct
. Cancel work_struct nfsd_shrinker_work after unregistering shrinker

fs/nfsd/netns.h | 3 ++-
fs/nfsd/nfs4state.c | 22 +++++++++++++++++-----
2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 8c854ba3285b..b0c7b657324b 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -195,7 +195,8 @@ struct nfsd_net {

atomic_t nfsd_courtesy_clients;
struct shrinker nfsd_client_shrinker;
- struct delayed_work nfsd_shrinker_work;
+ struct work_struct nfsd_shrinker_work;
+ bool nfsd_shrinker_active;
};

/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a7cfefd7c205..6508f9c79315 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4407,11 +4407,20 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
struct nfsd_net *nn = container_of(shrink,
struct nfsd_net, nfsd_client_shrinker);

+ spin_lock(&nn->client_lock);
+ if (nn->nfsd_shrinker_active) {
+ spin_unlock(&nn->client_lock);
+ return 0;
+ }
count = atomic_read(&nn->nfsd_courtesy_clients);
if (!count)
count = atomic_long_read(&num_delegations);
- if (count)
- mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
+ if (count) {
+ nn->nfsd_shrinker_active = true;
+ spin_unlock(&nn->client_lock);
+ queue_work(laundry_wq, &nn->nfsd_shrinker_work);
+ } else
+ spin_unlock(&nn->client_lock);
return (unsigned long)count;
}

@@ -6233,12 +6242,14 @@ deleg_reaper(struct nfsd_net *nn)
static void
nfsd4_state_shrinker_worker(struct work_struct *work)
{
- struct delayed_work *dwork = to_delayed_work(work);
- struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
+ struct nfsd_net *nn = container_of(work, struct nfsd_net,
nfsd_shrinker_work);

courtesy_client_reaper(nn);
deleg_reaper(nn);
+ spin_lock(&nn->client_lock);
+ nn->nfsd_shrinker_active = 0;
+ spin_unlock(&nn->client_lock);
}

static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
@@ -8064,7 +8075,7 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->blocked_locks_lru);

INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
- INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
+ INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
get_net(net);

nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
@@ -8171,6 +8182,7 @@ nfs4_state_shutdown_net(struct net *net)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);

unregister_shrinker(&nn->nfsd_client_shrinker);
+ cancel_work(&nn->nfsd_shrinker_work);
cancel_delayed_work_sync(&nn->laundromat_work);
locks_end_grace(&nn->nfsd4_manager);

--
2.9.5


2023-01-11 06:04:37

by Mike Galbraith

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] NFSD: fix WARN_ON_ONCE in __queue_delayed_work

The last two hunks don't apply to virgin source, but after wedging them
in, reproducer no longer inspires box to moan, groan and brick.

-Mike

On Tue, 2023-01-10 at 20:43 -0800, Dai Ngo wrote:
> Currently nfsd4_state_shrinker_worker can be schduled multiple times
> from nfsd4_state_shrinker_count when memory is low. This causes
> the WARN_ON_ONCE in __queue_delayed_work to trigger.
>
> This patch allows only one instance of nfsd4_state_shrinker_worker
> at a time using the nfsd_shrinker_active flag, protected by the
> client_lock.
>
> Replace mod_delayed_work in nfsd4_state_shrinker_count with queue_work.
>
> Change nfsd_shrinker_work from delayed_work to work_struct since we
> don't use the delay.
>
> Cancel work_struct nfsd_shrinker_work after unregistering shrinker
> in nfs4_state_shutdown_net
>
> Fixes: 44df6f439a17 ("NFSD: add delegation reaper to react to low memory condition")
> Reported-by: Mike Galbraith <[email protected]>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> v2:
>   . Replace mod_delayed_work in nfsd4_state_shrinker_count with queue_work
>   . Change nfsd_shrinker_work from delayed_work to work_struct
>   . Cancel work_struct nfsd_shrinker_work after unregistering shrinker
>
>  fs/nfsd/netns.h     |  3 ++-
>  fs/nfsd/nfs4state.c | 22 +++++++++++++++++-----
>  2 files changed, 19 insertions(+), 6 deletions(-)
>
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 8c854ba3285b..b0c7b657324b 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -195,7 +195,8 @@ struct nfsd_net {
>  
>         atomic_t                nfsd_courtesy_clients;
>         struct shrinker         nfsd_client_shrinker;
> -       struct delayed_work     nfsd_shrinker_work;
> +       struct work_struct      nfsd_shrinker_work;
> +       bool                    nfsd_shrinker_active;
>  };
>  
>  /* Simple check to find out if a given net was properly initialized */
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index a7cfefd7c205..6508f9c79315 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -4407,11 +4407,20 @@ nfsd4_state_shrinker_count(struct shrinker *shrink, struct shrink_control *sc)
>         struct nfsd_net *nn = container_of(shrink,
>                         struct nfsd_net, nfsd_client_shrinker);
>  
> +       spin_lock(&nn->client_lock);
> +       if (nn->nfsd_shrinker_active) {
> +               spin_unlock(&nn->client_lock);
> +               return 0;
> +       }
>         count = atomic_read(&nn->nfsd_courtesy_clients);
>         if (!count)
>                 count = atomic_long_read(&num_delegations);
> -       if (count)
> -               mod_delayed_work(laundry_wq, &nn->nfsd_shrinker_work, 0);
> +       if (count) {
> +               nn->nfsd_shrinker_active = true;
> +               spin_unlock(&nn->client_lock);
> +               queue_work(laundry_wq, &nn->nfsd_shrinker_work);
> +       } else
> +               spin_unlock(&nn->client_lock);
>         return (unsigned long)count;
>  }
>  
> @@ -6233,12 +6242,14 @@ deleg_reaper(struct nfsd_net *nn)
>  static void
>  nfsd4_state_shrinker_worker(struct work_struct *work)
>  {
> -       struct delayed_work *dwork = to_delayed_work(work);
> -       struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
> +       struct nfsd_net *nn = container_of(work, struct nfsd_net,
>                                 nfsd_shrinker_work);
>  
>         courtesy_client_reaper(nn);
>         deleg_reaper(nn);
> +       spin_lock(&nn->client_lock);
> +       nn->nfsd_shrinker_active = 0;
> +       spin_unlock(&nn->client_lock);
>  }
>  
>  static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stid *stp)
> @@ -8064,7 +8075,7 @@ static int nfs4_state_create_net(struct net *net)
>         INIT_LIST_HEAD(&nn->blocked_locks_lru);
>  
>         INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
> -       INIT_DELAYED_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
> +       INIT_WORK(&nn->nfsd_shrinker_work, nfsd4_state_shrinker_worker);
>         get_net(net);
>  
>         nn->nfsd_client_shrinker.scan_objects = nfsd4_state_shrinker_scan;
> @@ -8171,6 +8182,7 @@ nfs4_state_shutdown_net(struct net *net)
>         struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>  
>         unregister_shrinker(&nn->nfsd_client_shrinker);
> +       cancel_work(&nn->nfsd_shrinker_work);
>         cancel_delayed_work_sync(&nn->laundromat_work);
>         locks_end_grace(&nn->nfsd4_manager);
>  

2023-01-11 06:15:54

by Mike Galbraith

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] NFSD: fix WARN_ON_ONCE in __queue_delayed_work

On Wed, 2023-01-11 at 06:57 +0100, Mike Galbraith wrote:
> The last two hunks don't apply to virgin source, but after wedging them
> in, reproducer no longer inspires box to moan, groan and brick.

Unless of course I actually boot the freshly installed f-ing kernel
before moving on to repro procedure <thwap>. No brick, but 1 moan.

[ 50.248802] __vm_enough_memory: pid: 4180, comm: min_free_kbytes, no enough memory for the allocation
[ 50.272784] __vm_enough_memory: pid: 2022, comm: plasmashell, no enough memory for the allocation
[ 50.273398] __vm_enough_memory: pid: 2022, comm: plasmashell, no enough memory for the allocation
[ 50.932149] __vm_enough_memory: pid: 4279, comm: min_free_kbytes, no enough memory for the allocation
[ 51.732046] __vm_enough_memory: pid: 4489, comm: QQmlThread, no enough memory for the allocation
[ 51.732119] __vm_enough_memory: pid: 4363, comm: min_free_kbytes, no enough memory for the allocation
[ 51.733205] __vm_enough_memory: pid: 4489, comm: QQmlThread, no enough memory for the allocation
[ 51.735339] __vm_enough_memory: pid: 4489, comm: QQmlThread, no enough memory for the allocation
[ 51.736425] __vm_enough_memory: pid: 4489, comm: QQmlThread, no enough memory for the allocation
[ 51.737616] __vm_enough_memory: pid: 4489, comm: QQmlThread, no enough memory for the allocation
[ 51.816074] plasmashell[4413]: segfault at 0 ip 00007fd4cef2ce95 sp 00007ffd66c9c1a0 error 6 in swrast_dri.so[7fd4ce200000+16d2000] likely on CPU 1 (core 0, socket 0)
[ 51.817541] Code: c7 86 f0 1a 00 00 00 00 00 00 c1 e9 03 f3 48 ab 4c 89 e7 e8 bd 76 a7 ff be 10 06 01 00 bf 01 00 00 00 e8 fe 30 46 ff 4c 89 e6 <48> 89 18 48 89 ef 49 89 c5 e8 4d 69 a7 ff 0f 1f 44 00 00 48 89 ef
[ 54.930735] ------------[ cut here ]------------
[ 54.931137] WARNING: CPU: 6 PID: 77 at kernel/workqueue.c:1499 __queue_work+0x33b/0x3d0
[ 54.931747] Modules linked in: netconsole(E) rpcsec_gss_krb5(E) nfsv4(E) dns_resolver(E) nfs(E) fscache(E) netfs(E) af_packet(E) ip6table_mangle(E) ip6table_raw(E) iptable_raw(E) bridge(E) stp(E) llc(E) rfkill(E) nfnetlink(E) ebtable_filter(E) ebtables(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E) bpfilter(E) joydev(E) snd_hda_codec_generic(E) ledtrig_audio(E) intel_rapl_msr(E) intel_rapl_common(E) snd_hda_intel(E) snd_intel_dspcfg(E) snd_hda_codec(E) snd_hwdep(E) kvm_intel(E) snd_hda_core(E) snd_pcm(E) iTCO_wdt(E) intel_pmc_bxt(E) kvm(E) snd_timer(E) iTCO_vendor_support(E) irqbypass(E) snd(E) i2c_i801(E) pcspkr(E) lpc_ich(E) i2c_smbus(E) soundcore(E) virtio_balloon(E) mfd_core(E) virtio_net(E) net_failover(E) failover(E) button(E) nfsd(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) sch_fq_codel(E) fuse(E) sunrpc(E) configfs(E) ip_tables(E) x_tables(E) ext4(E) crc16(E) mbcache(E) jbd2(E) hid_generic(E) usbhid(E) crct10dif_pclmul(E) qxl(E) crc32_pclmul(E) drm_ttm_helper(E)
[ 54.931812] crc32c_intel(E) ghash_clmulni_intel(E) ttm(E) sha512_ssse3(E) sha512_generic(E) drm_kms_helper(E) xhci_pci(E) ahci(E) syscopyarea(E) sysfillrect(E) aesni_intel(E) sysimgblt(E) crypto_simd(E) xhci_hcd(E) libahci(E) virtio_blk(E) virtio_console(E) cryptd(E) serio_raw(E) libata(E) usbcore(E) virtio_pci(E) virtio_pci_legacy_dev(E) usb_common(E) drm(E) virtio_pci_modern_dev(E) sg(E) dm_multipath(E) dm_mod(E) scsi_dh_rdac(E) scsi_dh_emc(E) scsi_dh_alua(E) scsi_mod(E) scsi_common(E) msr(E) virtio_rng(E) virtio(E) virtio_ring(E) autofs4(E)
[ 54.942372] CPU: 6 PID: 77 Comm: kswapd0 Kdump: loaded Tainted: G E 6.2.0.g7dd4b80-master #67
[ 54.943162] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014
[ 54.944039] RIP: 0010:__queue_work+0x33b/0x3d0
[ 54.944434] Code: 25 40 d3 02 00 f6 47 2c 20 74 18 e8 6f 6f 00 00 48 85 c0 74 0e 48 8b 40 20 48 3b 68 08 0f 84 f5 fc ff ff 0f 0b e9 fe fd ff ff <0f> 0b e9 ee fd ff ff 83 c9 02 49 8d 57 68 e9 d7 fd ff ff 80 3d 83
[ 54.945938] RSP: 0018:ffff888100dd7c50 EFLAGS: 00010003
[ 54.946573] RAX: ffff8881042ac350 RBX: ffffffff81fcc880 RCX: 0000000000000000
[ 54.947176] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff888100078000
[ 54.947749] RBP: ffff88810713e800 R08: ffff888100400028 R09: ffff888100400000
[ 54.948313] R10: 0000000000000000 R11: ffffffff8225d5c8 R12: 0000000000000008
[ 54.948928] R13: 0000000000000006 R14: ffff8881042ac348 R15: ffff888103462400
[ 54.949492] FS: 0000000000000000(0000) GS:ffff888277d80000(0000) knlGS:0000000000000000
[ 54.950153] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 54.950617] CR2: 00007f8dff5ff9c8 CR3: 000000014ac78002 CR4: 0000000000170ee0
[ 54.951174] Call Trace:
[ 54.951389] <TASK>
[ 54.951575] queue_work_on+0x24/0x30
[ 54.951876] nfsd4_state_shrinker_count+0x69/0x80 [nfsd]
[ 54.952357] shrink_slab.constprop.94+0x9d/0x370
[ 54.952740] shrink_node+0x1c5/0x420
[ 54.953044] balance_pgdat+0x25f/0x530
[ 54.953359] ? __pfx_autoremove_wake_function+0x10/0x10
[ 54.953785] kswapd+0x12c/0x360
[ 54.954062] ? __pfx_autoremove_wake_function+0x10/0x10
[ 54.954483] ? __pfx_kswapd+0x10/0x10
[ 54.954787] kthread+0xc0/0xe0
[ 54.955053] ? __pfx_kthread+0x10/0x10
[ 54.955360] ret_from_fork+0x29/0x50
[ 54.955655] </TASK>
[ 54.955844] ---[ end trace 0000000000000000 ]---

2023-01-11 08:44:52

by Dai Ngo

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] NFSD: fix WARN_ON_ONCE in __queue_delayed_work

Thank you Mike! looking...

-Dai

On 1/10/23 10:13 PM, Mike Galbraith wrote:
> On Wed, 2023-01-11 at 06:57 +0100, Mike Galbraith wrote:
>> The last two hunks don't apply to virgin source, but after wedging them
>> in, reproducer no longer inspires box to moan, groan and brick.
> Unless of course I actually boot the freshly installed f-ing kernel
> before moving on to repro procedure <thwap>. No brick, but 1 moan.
>
> [ 50.248802] __vm_enough_memory: pid: 4180, comm: min_free_kbytes, no enough memory for the allocation
> [ 50.272784] __vm_enough_memory: pid: 2022, comm: plasmashell, no enough memory for the allocation
> [ 50.273398] __vm_enough_memory: pid: 2022, comm: plasmashell, no enough memory for the allocation
> [ 50.932149] __vm_enough_memory: pid: 4279, comm: min_free_kbytes, no enough memory for the allocation
> [ 51.732046] __vm_enough_memory: pid: 4489, comm: QQmlThread, no enough memory for the allocation
> [ 51.732119] __vm_enough_memory: pid: 4363, comm: min_free_kbytes, no enough memory for the allocation
> [ 51.733205] __vm_enough_memory: pid: 4489, comm: QQmlThread, no enough memory for the allocation
> [ 51.735339] __vm_enough_memory: pid: 4489, comm: QQmlThread, no enough memory for the allocation
> [ 51.736425] __vm_enough_memory: pid: 4489, comm: QQmlThread, no enough memory for the allocation
> [ 51.737616] __vm_enough_memory: pid: 4489, comm: QQmlThread, no enough memory for the allocation
> [ 51.816074] plasmashell[4413]: segfault at 0 ip 00007fd4cef2ce95 sp 00007ffd66c9c1a0 error 6 in swrast_dri.so[7fd4ce200000+16d2000] likely on CPU 1 (core 0, socket 0)
> [ 51.817541] Code: c7 86 f0 1a 00 00 00 00 00 00 c1 e9 03 f3 48 ab 4c 89 e7 e8 bd 76 a7 ff be 10 06 01 00 bf 01 00 00 00 e8 fe 30 46 ff 4c 89 e6 <48> 89 18 48 89 ef 49 89 c5 e8 4d 69 a7 ff 0f 1f 44 00 00 48 89 ef
> [ 54.930735] ------------[ cut here ]------------
> [ 54.931137] WARNING: CPU: 6 PID: 77 at kernel/workqueue.c:1499 __queue_work+0x33b/0x3d0
> [ 54.931747] Modules linked in: netconsole(E) rpcsec_gss_krb5(E) nfsv4(E) dns_resolver(E) nfs(E) fscache(E) netfs(E) af_packet(E) ip6table_mangle(E) ip6table_raw(E) iptable_raw(E) bridge(E) stp(E) llc(E) rfkill(E) nfnetlink(E) ebtable_filter(E) ebtables(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E) bpfilter(E) joydev(E) snd_hda_codec_generic(E) ledtrig_audio(E) intel_rapl_msr(E) intel_rapl_common(E) snd_hda_intel(E) snd_intel_dspcfg(E) snd_hda_codec(E) snd_hwdep(E) kvm_intel(E) snd_hda_core(E) snd_pcm(E) iTCO_wdt(E) intel_pmc_bxt(E) kvm(E) snd_timer(E) iTCO_vendor_support(E) irqbypass(E) snd(E) i2c_i801(E) pcspkr(E) lpc_ich(E) i2c_smbus(E) soundcore(E) virtio_balloon(E) mfd_core(E) virtio_net(E) net_failover(E) failover(E) button(E) nfsd(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) sch_fq_codel(E) fuse(E) sunrpc(E) configfs(E) ip_tables(E) x_tables(E) ext4(E) crc16(E) mbcache(E) jbd2(E) hid_generic(E) usbhid(E) crct10dif_pclmul(E) qxl(E) crc32_pclmul(E) drm_ttm_helper(E)
> [ 54.931812] crc32c_intel(E) ghash_clmulni_intel(E) ttm(E) sha512_ssse3(E) sha512_generic(E) drm_kms_helper(E) xhci_pci(E) ahci(E) syscopyarea(E) sysfillrect(E) aesni_intel(E) sysimgblt(E) crypto_simd(E) xhci_hcd(E) libahci(E) virtio_blk(E) virtio_console(E) cryptd(E) serio_raw(E) libata(E) usbcore(E) virtio_pci(E) virtio_pci_legacy_dev(E) usb_common(E) drm(E) virtio_pci_modern_dev(E) sg(E) dm_multipath(E) dm_mod(E) scsi_dh_rdac(E) scsi_dh_emc(E) scsi_dh_alua(E) scsi_mod(E) scsi_common(E) msr(E) virtio_rng(E) virtio(E) virtio_ring(E) autofs4(E)
> [ 54.942372] CPU: 6 PID: 77 Comm: kswapd0 Kdump: loaded Tainted: G E 6.2.0.g7dd4b80-master #67
> [ 54.943162] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014
> [ 54.944039] RIP: 0010:__queue_work+0x33b/0x3d0
> [ 54.944434] Code: 25 40 d3 02 00 f6 47 2c 20 74 18 e8 6f 6f 00 00 48 85 c0 74 0e 48 8b 40 20 48 3b 68 08 0f 84 f5 fc ff ff 0f 0b e9 fe fd ff ff <0f> 0b e9 ee fd ff ff 83 c9 02 49 8d 57 68 e9 d7 fd ff ff 80 3d 83
> [ 54.945938] RSP: 0018:ffff888100dd7c50 EFLAGS: 00010003
> [ 54.946573] RAX: ffff8881042ac350 RBX: ffffffff81fcc880 RCX: 0000000000000000
> [ 54.947176] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff888100078000
> [ 54.947749] RBP: ffff88810713e800 R08: ffff888100400028 R09: ffff888100400000
> [ 54.948313] R10: 0000000000000000 R11: ffffffff8225d5c8 R12: 0000000000000008
> [ 54.948928] R13: 0000000000000006 R14: ffff8881042ac348 R15: ffff888103462400
> [ 54.949492] FS: 0000000000000000(0000) GS:ffff888277d80000(0000) knlGS:0000000000000000
> [ 54.950153] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 54.950617] CR2: 00007f8dff5ff9c8 CR3: 000000014ac78002 CR4: 0000000000170ee0
> [ 54.951174] Call Trace:
> [ 54.951389] <TASK>
> [ 54.951575] queue_work_on+0x24/0x30
> [ 54.951876] nfsd4_state_shrinker_count+0x69/0x80 [nfsd]
> [ 54.952357] shrink_slab.constprop.94+0x9d/0x370
> [ 54.952740] shrink_node+0x1c5/0x420
> [ 54.953044] balance_pgdat+0x25f/0x530
> [ 54.953359] ? __pfx_autoremove_wake_function+0x10/0x10
> [ 54.953785] kswapd+0x12c/0x360
> [ 54.954062] ? __pfx_autoremove_wake_function+0x10/0x10
> [ 54.954483] ? __pfx_kswapd+0x10/0x10
> [ 54.954787] kthread+0xc0/0xe0
> [ 54.955053] ? __pfx_kthread+0x10/0x10
> [ 54.955360] ret_from_fork+0x29/0x50
> [ 54.955655] </TASK>
> [ 54.955844] ---[ end trace 0000000000000000 ]---
>

2023-01-11 10:17:55

by Dai Ngo

[permalink] [raw]
Subject: Re: [PATCH v2 1/1] NFSD: fix WARN_ON_ONCE in __queue_delayed_work

Hi Jeff,

As you mentioned before, we should not need to use the spinlock in
nfsd4_state_shrinker_count to prevent calling queue_work multiple
times since queue_work_on checks for WORK_STRUCT_PENDING_BIT and
just skip the call to __queue_work.

However, somehow it does not work as expected, the WARN_ONCE in
__queue_work is still triggered regardless of whether the spinlock
is used or not. The spinlock only helps to reduce the frequency of
the WARN_ON.

The previous patch uses mod_delayed_work and that shuts off the
WARN_ON completely. I suspect there is timing problem in queue_work.

I'll try to rearranging the code in nfsd4_state_shrinker_count to
see if it helps to stop the WARN_ON.

-Dai

On 1/10/23 10:13 PM, Mike Galbraith wrote:
> On Wed, 2023-01-11 at 06:57 +0100, Mike Galbraith wrote:
>> The last two hunks don't apply to virgin source, but after wedging them
>> in, reproducer no longer inspires box to moan, groan and brick.
> Unless of course I actually boot the freshly installed f-ing kernel
> before moving on to repro procedure <thwap>. No brick, but 1 moan.
>
> [ 50.248802] __vm_enough_memory: pid: 4180, comm: min_free_kbytes, no enough memory for the allocation
> [ 50.272784] __vm_enough_memory: pid: 2022, comm: plasmashell, no enough memory for the allocation
> [ 50.273398] __vm_enough_memory: pid: 2022, comm: plasmashell, no enough memory for the allocation
> [ 50.932149] __vm_enough_memory: pid: 4279, comm: min_free_kbytes, no enough memory for the allocation
> [ 51.732046] __vm_enough_memory: pid: 4489, comm: QQmlThread, no enough memory for the allocation
> [ 51.732119] __vm_enough_memory: pid: 4363, comm: min_free_kbytes, no enough memory for the allocation
> [ 51.733205] __vm_enough_memory: pid: 4489, comm: QQmlThread, no enough memory for the allocation
> [ 51.735339] __vm_enough_memory: pid: 4489, comm: QQmlThread, no enough memory for the allocation
> [ 51.736425] __vm_enough_memory: pid: 4489, comm: QQmlThread, no enough memory for the allocation
> [ 51.737616] __vm_enough_memory: pid: 4489, comm: QQmlThread, no enough memory for the allocation
> [ 51.816074] plasmashell[4413]: segfault at 0 ip 00007fd4cef2ce95 sp 00007ffd66c9c1a0 error 6 in swrast_dri.so[7fd4ce200000+16d2000] likely on CPU 1 (core 0, socket 0)
> [ 51.817541] Code: c7 86 f0 1a 00 00 00 00 00 00 c1 e9 03 f3 48 ab 4c 89 e7 e8 bd 76 a7 ff be 10 06 01 00 bf 01 00 00 00 e8 fe 30 46 ff 4c 89 e6 <48> 89 18 48 89 ef 49 89 c5 e8 4d 69 a7 ff 0f 1f 44 00 00 48 89 ef
> [ 54.930735] ------------[ cut here ]------------
> [ 54.931137] WARNING: CPU: 6 PID: 77 at kernel/workqueue.c:1499 __queue_work+0x33b/0x3d0
> [ 54.931747] Modules linked in: netconsole(E) rpcsec_gss_krb5(E) nfsv4(E) dns_resolver(E) nfs(E) fscache(E) netfs(E) af_packet(E) ip6table_mangle(E) ip6table_raw(E) iptable_raw(E) bridge(E) stp(E) llc(E) rfkill(E) nfnetlink(E) ebtable_filter(E) ebtables(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E) bpfilter(E) joydev(E) snd_hda_codec_generic(E) ledtrig_audio(E) intel_rapl_msr(E) intel_rapl_common(E) snd_hda_intel(E) snd_intel_dspcfg(E) snd_hda_codec(E) snd_hwdep(E) kvm_intel(E) snd_hda_core(E) snd_pcm(E) iTCO_wdt(E) intel_pmc_bxt(E) kvm(E) snd_timer(E) iTCO_vendor_support(E) irqbypass(E) snd(E) i2c_i801(E) pcspkr(E) lpc_ich(E) i2c_smbus(E) soundcore(E) virtio_balloon(E) mfd_core(E) virtio_net(E) net_failover(E) failover(E) button(E) nfsd(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) sch_fq_codel(E) fuse(E) sunrpc(E) configfs(E) ip_tables(E) x_tables(E) ext4(E) crc16(E) mbcache(E) jbd2(E) hid_generic(E) usbhid(E) crct10dif_pclmul(E) qxl(E) crc32_pclmul(E) drm_ttm_helper(E)
> [ 54.931812] crc32c_intel(E) ghash_clmulni_intel(E) ttm(E) sha512_ssse3(E) sha512_generic(E) drm_kms_helper(E) xhci_pci(E) ahci(E) syscopyarea(E) sysfillrect(E) aesni_intel(E) sysimgblt(E) crypto_simd(E) xhci_hcd(E) libahci(E) virtio_blk(E) virtio_console(E) cryptd(E) serio_raw(E) libata(E) usbcore(E) virtio_pci(E) virtio_pci_legacy_dev(E) usb_common(E) drm(E) virtio_pci_modern_dev(E) sg(E) dm_multipath(E) dm_mod(E) scsi_dh_rdac(E) scsi_dh_emc(E) scsi_dh_alua(E) scsi_mod(E) scsi_common(E) msr(E) virtio_rng(E) virtio(E) virtio_ring(E) autofs4(E)
> [ 54.942372] CPU: 6 PID: 77 Comm: kswapd0 Kdump: loaded Tainted: G E 6.2.0.g7dd4b80-master #67
> [ 54.943162] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-0-g2dd4b9b-rebuilt.opensuse.org 04/01/2014
> [ 54.944039] RIP: 0010:__queue_work+0x33b/0x3d0
> [ 54.944434] Code: 25 40 d3 02 00 f6 47 2c 20 74 18 e8 6f 6f 00 00 48 85 c0 74 0e 48 8b 40 20 48 3b 68 08 0f 84 f5 fc ff ff 0f 0b e9 fe fd ff ff <0f> 0b e9 ee fd ff ff 83 c9 02 49 8d 57 68 e9 d7 fd ff ff 80 3d 83
> [ 54.945938] RSP: 0018:ffff888100dd7c50 EFLAGS: 00010003
> [ 54.946573] RAX: ffff8881042ac350 RBX: ffffffff81fcc880 RCX: 0000000000000000
> [ 54.947176] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff888100078000
> [ 54.947749] RBP: ffff88810713e800 R08: ffff888100400028 R09: ffff888100400000
> [ 54.948313] R10: 0000000000000000 R11: ffffffff8225d5c8 R12: 0000000000000008
> [ 54.948928] R13: 0000000000000006 R14: ffff8881042ac348 R15: ffff888103462400
> [ 54.949492] FS: 0000000000000000(0000) GS:ffff888277d80000(0000) knlGS:0000000000000000
> [ 54.950153] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 54.950617] CR2: 00007f8dff5ff9c8 CR3: 000000014ac78002 CR4: 0000000000170ee0
> [ 54.951174] Call Trace:
> [ 54.951389] <TASK>
> [ 54.951575] queue_work_on+0x24/0x30
> [ 54.951876] nfsd4_state_shrinker_count+0x69/0x80 [nfsd]
> [ 54.952357] shrink_slab.constprop.94+0x9d/0x370
> [ 54.952740] shrink_node+0x1c5/0x420
> [ 54.953044] balance_pgdat+0x25f/0x530
> [ 54.953359] ? __pfx_autoremove_wake_function+0x10/0x10
> [ 54.953785] kswapd+0x12c/0x360
> [ 54.954062] ? __pfx_autoremove_wake_function+0x10/0x10
> [ 54.954483] ? __pfx_kswapd+0x10/0x10
> [ 54.954787] kthread+0xc0/0xe0
> [ 54.955053] ? __pfx_kthread+0x10/0x10
> [ 54.955360] ret_from_fork+0x29/0x50
> [ 54.955655] </TASK>
> [ 54.955844] ---[ end trace 0000000000000000 ]---
>