2018-03-05 22:27:01

by Mkrtchyan, Tigran

[permalink] [raw]
Subject: Kernel ops with flexfiles

Hi Trond et al.

looks like I can always reproduce the following ops with a simple cp:

[10722.729463] nfs4flexfilelayout_init: NFSv4 Flexfile Layout Driver Registering...
[10736.187403] ------------[ cut here ]------------
[10736.187405] Kernel BUG at 00000000f71645f7 [verbose debug info unavailable]
[10736.187417] ------------[ cut here ]------------
[10736.187432] refcount_t hit zero at pnfs_layout_remove_lseg+0x46/0x90 [nfsv4] in kworker/1:0[16409], uid/euid: 0/0
[10736.187436] WARNING: CPU: 1 PID: 16409 at kernel/panic.c:657 refcount_error_report+0x94/0x9e
[10736.187436] Modules linked in: nfs_layout_flexfiles rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache xt_nat veth ipt_MASQUERADE nf_nat_masquerade_ipv4 xt_addrtype br_netfilter overlay tun rfcomm fuse ccm nf_conntrack_netbios_ns nf_conntrack_broadcast xt_CT ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack xt_multiport ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_raw ip6table_security iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c iptable_mangle iptable_raw iptable_security ebtable_filter ebtables ip6table_filter ip6_tables cmac binfmt_misc bnep sunrpc vfat fat arc4 snd_soc_skl snd_hda_codec_hdmi snd_soc_skl_ipc snd_hda_ext_core snd_soc_sst_dsp
[10736.187463] snd_soc_sst_ipc snd_soc_acpi uvcvideo snd_soc_core videobuf2_vmalloc btusb btrtl intel_rapl btbcm btintel videobuf2_memops x86_pkg_temp_thermal intel_powerclamp videobuf2_v4l2 bluetooth snd_hda_codec_realtek videobuf2_core iwlmvm snd_hda_codec_generic coretemp videodev kvm_intel snd_compress snd_pcm_dmaengine ac97_bus mac80211 media snd_hda_intel kvm snd_hda_codec iTCO_wdt iTCO_vendor_support iwlwifi mei_wdt dell_wmi ecdh_generic wmi_bmof dell_smbios_wmi dell_laptop sparse_keymap snd_hda_core dell_wmi_descriptor ppdev dell_smbios_smm dell_smbios dcdbas snd_hwdep irqbypass crct10dif_pclmul dell_smm_hwmon crc32_pclmul snd_seq cfg80211 snd_seq_device ghash_clmulni_intel snd_pcm intel_cstate intel_uncore i2c_i801 intel_rapl_perf snd_timer joydev rtsx_pci_ms memstick snd mei_me soundcore
[10736.187491] mei processor_thermal_device shpchp intel_soc_dts_iosf intel_pch_thermal wmi parport_pc parport dell_rbtn int3400_thermal acpi_thermal_rel acpi_pad int3403_thermal rfkill int340x_thermal_zone i915 rtsx_pci_sdmmc mmc_core i2c_algo_bit drm_kms_helper drm e1000e crc32c_intel serio_raw rtsx_pci ptp pps_core video
[10736.187504] CPU: 1 PID: 16409 Comm: kworker/1:0 Tainted: G W 4.15.6-300.fc27.x86_64 #1
[10736.187505] Hardware name: Dell Inc. Latitude E7470/0T6HHJ, BIOS 1.6.3 06/15/2016
[10736.187518] Workqueue: nfsiod rpc_async_release [sunrpc]
[10736.187520] RIP: 0010:refcount_error_report+0x94/0x9e
[10736.187521] RSP: 0018:ffffa5c6d196fac0 EFLAGS: 00010282
[10736.187522] RAX: 0000000000000000 RBX: ffffffff93088273 RCX: 0000000000000006
[10736.187523] RDX: 0000000000000007 RSI: 0000000000000082 RDI: ffff9a780dc968f0
[10736.187523] RBP: ffffa5c6d196fc08 R08: 0000000000000651 R09: 0000000000000004
[10736.187524] R10: ffffffff93206a80 R11: 0000000000000001 R12: ffff9a776fe63f80
[10736.187525] R13: 0000000000000000 R14: ffffffff930795b5 R15: 0000000000000004
[10736.187526] FS: 0000000000000000(0000) GS:ffff9a780dc80000(0000) knlGS:0000000000000000
[10736.187527] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[10736.187528] CR2: 000000000000000c CR3: 000000036120a005 CR4: 00000000003606e0
[10736.187529] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[10736.187529] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[10736.187530] Call Trace:
[10736.187534] ex_handler_refcount+0x4e/0x80
[10736.187536] fixup_exception+0x33/0x40
[10736.187538] do_trap+0x83/0x140
[10736.187540] do_error_trap+0x9d/0x120
[10736.187550] ? nfs42_proc_clone+0x2c7/0x309 [nfsv4]
[10736.187552] ? __update_load_avg_se.isra.30+0x1b6/0x1c0
[10736.187553] ? __update_load_avg_se.isra.30+0x1b6/0x1c0
[10736.187555] ? update_load_avg+0x558/0x6b0
[10736.187557] invalid_op+0x51/0x70
[10736.187566] RIP: 0010:pnfs_layout_remove_lseg+0x46/0x90 [nfsv4]
[10736.187566] RSP: 0018:ffffa5c6d196fcb8 EFLAGS: 00010246
[10736.187567] RAX: ffff9a765cc07f28 RBX: ffff9a757fe5f000 RCX: ffff9a765cc07f00
[10736.187568] RDX: ffff9a765cc07f28 RSI: ffff9a757fe5f000 RDI: ffff9a757fe5f000
[10736.187569] RBP: ffff9a765cc07f00 R08: 0000000000000004 R09: 0fb19d5a01000000
[10736.187570] R10: ffffa5c6d196fda8 R11: ffffffffc10e3470 R12: ffff9a765cc07f01
[10736.187570] R13: ffff9a756cc2d8c0 R14: 0000000000000000 R15: ffff9a765cc07f00
[10736.187579] ? pnfs_layout_remove_lseg+0x1d/0x90 [nfsv4]
[10736.187585] pnfs_lseg_dec_and_remove_zero+0x37/0x70 [nfsv4]
[10736.187593] mark_lseg_invalid+0x29/0x50 [nfsv4]
[10736.187600] pnfs_roc+0x1ad/0x310 [nfsv4]
[10736.187606] ? nfs4_do_close+0x1c9/0x2e0 [nfsv4]
[10736.187611] nfs4_do_close+0x1c9/0x2e0 [nfsv4]
[10736.187620] __put_nfs_open_context+0x7c/0x100 [nfs]
[10736.187627] nfs_commitdata_release+0x15/0x30 [nfs]
[10736.187634] rpc_free_task+0x2d/0x70 [sunrpc]
[10736.187637] process_one_work+0x175/0x390
[10736.187640] worker_thread+0x2e/0x380
[10736.187641] ? process_one_work+0x390/0x390
[10736.187644] kthread+0x113/0x130
[10736.187645] ? kthread_create_worker_on_cpu+0x70/0x70
[10736.187647] ? kthread_create_worker_on_cpu+0x70/0x70
[10736.187648] ret_from_fork+0x35/0x40
[10736.187649] Code: 48 8b 95 80 00 00 00 41 55 49 8d 8c 24 48 07 00 00 45 8b 84 24 68 05 00 00 41 89 c1 48 89 de 48 c7 c7 c0 c7 08 93 e8 4c fa ff ff <0f> 0b 58 5b 5d 41 5c 41 5d c3 0f 1f 44 00 00 55 48 89 e5 41 56
[10736.187673] ---[ end trace ae865330f8bfd4f0 ]---
[10736.187699] ------------[ cut here ]------------
[10736.187700] Kernel BUG at 00000000c11ed4b1 [verbose debug info unavailable]


If I downgrade my kernel to any of 4.14 - ops is gone. The 4.16-rc4 panics as well.
Let me know if you need more info or testing.


Regards,
Tigran


2018-03-06 22:13:14

by Mkrtchyan, Tigran

[permalink] [raw]
Subject: Re: Kernel ops with flexfiles



Looks like it have snicked in (or made visible) with one of the refcount changes added in 4.15.
I am trying to bisect it.

Tigran.

----- Original Message -----
> From: "Tigran Mkrtchyan" <[email protected]>
> To: "linux-nfs" <[email protected]>
> Cc: "Trond Myklebust" <[email protected]>
> Sent: Monday, March 5, 2018 11:26:59 PM
> Subject: Kernel ops with flexfiles

> Hi Trond et al.
>
> looks like I can always reproduce the following ops with a simple cp:
>
> [10722.729463] nfs4flexfilelayout_init: NFSv4 Flexfile Layout Driver
> Registering...
> [10736.187403] ------------[ cut here ]------------
> [10736.187405] Kernel BUG at 00000000f71645f7 [verbose debug info unavailable]
> [10736.187417] ------------[ cut here ]------------
> [10736.187432] refcount_t hit zero at pnfs_layout_remove_lseg+0x46/0x90 [nfsv4]
> in kworker/1:0[16409], uid/euid: 0/0
> [10736.187436] WARNING: CPU: 1 PID: 16409 at kernel/panic.c:657
> refcount_error_report+0x94/0x9e
> [10736.187436] Modules linked in: nfs_layout_flexfiles rpcsec_gss_krb5
> auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache xt_nat veth
> ipt_MASQUERADE nf_nat_masquerade_ipv4 xt_addrtype br_netfilter overlay tun
> rfcomm fuse ccm nf_conntrack_netbios_ns nf_conntrack_broadcast xt_CT
> ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack xt_multiport ip_set
> nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_nat
> nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_raw
> ip6table_security iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4
> nf_nat nf_conntrack libcrc32c iptable_mangle iptable_raw iptable_security
> ebtable_filter ebtables ip6table_filter ip6_tables cmac binfmt_misc bnep sunrpc
> vfat fat arc4 snd_soc_skl snd_hda_codec_hdmi snd_soc_skl_ipc snd_hda_ext_core
> snd_soc_sst_dsp
> [10736.187463] snd_soc_sst_ipc snd_soc_acpi uvcvideo snd_soc_core
> videobuf2_vmalloc btusb btrtl intel_rapl btbcm btintel videobuf2_memops
> x86_pkg_temp_thermal intel_powerclamp videobuf2_v4l2 bluetooth
> snd_hda_codec_realtek videobuf2_core iwlmvm snd_hda_codec_generic coretemp
> videodev kvm_intel snd_compress snd_pcm_dmaengine ac97_bus mac80211 media
> snd_hda_intel kvm snd_hda_codec iTCO_wdt iTCO_vendor_support iwlwifi mei_wdt
> dell_wmi ecdh_generic wmi_bmof dell_smbios_wmi dell_laptop sparse_keymap
> snd_hda_core dell_wmi_descriptor ppdev dell_smbios_smm dell_smbios dcdbas
> snd_hwdep irqbypass crct10dif_pclmul dell_smm_hwmon crc32_pclmul snd_seq
> cfg80211 snd_seq_device ghash_clmulni_intel snd_pcm intel_cstate intel_uncore
> i2c_i801 intel_rapl_perf snd_timer joydev rtsx_pci_ms memstick snd mei_me
> soundcore
> [10736.187491] mei processor_thermal_device shpchp intel_soc_dts_iosf
> intel_pch_thermal wmi parport_pc parport dell_rbtn int3400_thermal
> acpi_thermal_rel acpi_pad int3403_thermal rfkill int340x_thermal_zone i915
> rtsx_pci_sdmmc mmc_core i2c_algo_bit drm_kms_helper drm e1000e crc32c_intel
> serio_raw rtsx_pci ptp pps_core video
> [10736.187504] CPU: 1 PID: 16409 Comm: kworker/1:0 Tainted: G W
> 4.15.6-300.fc27.x86_64 #1
> [10736.187505] Hardware name: Dell Inc. Latitude E7470/0T6HHJ, BIOS 1.6.3
> 06/15/2016
> [10736.187518] Workqueue: nfsiod rpc_async_release [sunrpc]
> [10736.187520] RIP: 0010:refcount_error_report+0x94/0x9e
> [10736.187521] RSP: 0018:ffffa5c6d196fac0 EFLAGS: 00010282
> [10736.187522] RAX: 0000000000000000 RBX: ffffffff93088273 RCX: 0000000000000006
> [10736.187523] RDX: 0000000000000007 RSI: 0000000000000082 RDI: ffff9a780dc968f0
> [10736.187523] RBP: ffffa5c6d196fc08 R08: 0000000000000651 R09: 0000000000000004
> [10736.187524] R10: ffffffff93206a80 R11: 0000000000000001 R12: ffff9a776fe63f80
> [10736.187525] R13: 0000000000000000 R14: ffffffff930795b5 R15: 0000000000000004
> [10736.187526] FS: 0000000000000000(0000) GS:ffff9a780dc80000(0000)
> knlGS:0000000000000000
> [10736.187527] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [10736.187528] CR2: 000000000000000c CR3: 000000036120a005 CR4: 00000000003606e0
> [10736.187529] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [10736.187529] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [10736.187530] Call Trace:
> [10736.187534] ex_handler_refcount+0x4e/0x80
> [10736.187536] fixup_exception+0x33/0x40
> [10736.187538] do_trap+0x83/0x140
> [10736.187540] do_error_trap+0x9d/0x120
> [10736.187550] ? nfs42_proc_clone+0x2c7/0x309 [nfsv4]
> [10736.187552] ? __update_load_avg_se.isra.30+0x1b6/0x1c0
> [10736.187553] ? __update_load_avg_se.isra.30+0x1b6/0x1c0
> [10736.187555] ? update_load_avg+0x558/0x6b0
> [10736.187557] invalid_op+0x51/0x70
> [10736.187566] RIP: 0010:pnfs_layout_remove_lseg+0x46/0x90 [nfsv4]
> [10736.187566] RSP: 0018:ffffa5c6d196fcb8 EFLAGS: 00010246
> [10736.187567] RAX: ffff9a765cc07f28 RBX: ffff9a757fe5f000 RCX: ffff9a765cc07f00
> [10736.187568] RDX: ffff9a765cc07f28 RSI: ffff9a757fe5f000 RDI: ffff9a757fe5f000
> [10736.187569] RBP: ffff9a765cc07f00 R08: 0000000000000004 R09: 0fb19d5a01000000
> [10736.187570] R10: ffffa5c6d196fda8 R11: ffffffffc10e3470 R12: ffff9a765cc07f01
> [10736.187570] R13: ffff9a756cc2d8c0 R14: 0000000000000000 R15: ffff9a765cc07f00
> [10736.187579] ? pnfs_layout_remove_lseg+0x1d/0x90 [nfsv4]
> [10736.187585] pnfs_lseg_dec_and_remove_zero+0x37/0x70 [nfsv4]
> [10736.187593] mark_lseg_invalid+0x29/0x50 [nfsv4]
> [10736.187600] pnfs_roc+0x1ad/0x310 [nfsv4]
> [10736.187606] ? nfs4_do_close+0x1c9/0x2e0 [nfsv4]
> [10736.187611] nfs4_do_close+0x1c9/0x2e0 [nfsv4]
> [10736.187620] __put_nfs_open_context+0x7c/0x100 [nfs]
> [10736.187627] nfs_commitdata_release+0x15/0x30 [nfs]
> [10736.187634] rpc_free_task+0x2d/0x70 [sunrpc]
> [10736.187637] process_one_work+0x175/0x390
> [10736.187640] worker_thread+0x2e/0x380
> [10736.187641] ? process_one_work+0x390/0x390
> [10736.187644] kthread+0x113/0x130
> [10736.187645] ? kthread_create_worker_on_cpu+0x70/0x70
> [10736.187647] ? kthread_create_worker_on_cpu+0x70/0x70
> [10736.187648] ret_from_fork+0x35/0x40
> [10736.187649] Code: 48 8b 95 80 00 00 00 41 55 49 8d 8c 24 48 07 00 00 45 8b 84
> 24 68 05 00 00 41 89 c1 48 89 de 48 c7 c7 c0 c7 08 93 e8 4c fa ff ff <0f> 0b 58
> 5b 5d 41 5c 41 5d c3 0f 1f 44 00 00 55 48 89 e5 41 56
> [10736.187673] ---[ end trace ae865330f8bfd4f0 ]---
> [10736.187699] ------------[ cut here ]------------
> [10736.187700] Kernel BUG at 00000000c11ed4b1 [verbose debug info unavailable]
>
>
> If I downgrade my kernel to any of 4.14 - ops is gone. The 4.16-rc4 panics as
> well.
> Let me know if you need more info or testing.
>
>
> Regards,
> Tigran
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2018-03-07 06:47:58

by Christoph Hellwig

[permalink] [raw]
Subject: Re: Kernel ops with flexfiles

FYI, I see very similar issues with blocklayout, also caused by the
refcount changes. But I didn't really didn't have any time to dig
into it yet.

2018-03-07 17:00:29

by Mkrtchyan, Tigran

[permalink] [raw]
Subject: Re: Kernel ops with flexfiles

After some digging I found that the number of increments and decrements for
lo->plh_refcoun't doesn't match. While the number of pnfs_get_layout_hdr matches
to the number of pnfs_put_layout_hdr calls, pnfs_layout_remove_lseg does yet
another decrement. Something like this fixes the issue:

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c13e826614b5..a7b01cd87e6a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -490,7 +490,7 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
list_del_init(&lseg->pls_list);
/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
- refcount_dec(&lo->plh_refcount);
+ refcount_dec_not_one(&lo->plh_refcount);
if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
return;
if (list_empty(&lo->plh_segs) &&


Or may be we don't need refcount_dec(&lo->plh_refcount) at all.
Can someone comment on it?

Thanks,
Tigran.


----- Original Message -----
> From: "Christoph Hellwig" <[email protected]>
> To: "Tigran Mkrtchyan" <[email protected]>
> Cc: "linux-nfs" <[email protected]>, "Trond Myklebust" <[email protected]>
> Sent: Wednesday, March 7, 2018 7:47:56 AM
> Subject: Re: Kernel ops with flexfiles

> FYI, I see very similar issues with blocklayout, also caused by the
> refcount changes. But I didn't really didn't have any time to dig
> into it yet.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2018-03-07 20:09:29

by Trond Myklebust

[permalink] [raw]
Subject: Re: Kernel ops with flexfiles

SGkgVGlncmFuLA0KDQpPbiBNb24sIDIwMTgtMDMtMDUgYXQgMjM6MjYgKzAxMDAsIE1rcnRjaHlh
biwgVGlncmFuIHdyb3RlOg0KPiBIaSBUcm9uZCBldCBhbC4NCj4gDQo+IGxvb2tzIGxpa2UgSSBj
YW4gYWx3YXlzIHJlcHJvZHVjZSB0aGUgZm9sbG93aW5nIG9wcyB3aXRoIGEgc2ltcGxlIGNwOg0K
PiANCj4gWzEwNzIyLjcyOTQ2M10gbmZzNGZsZXhmaWxlbGF5b3V0X2luaXQ6IE5GU3Y0IEZsZXhm
aWxlIExheW91dCBEcml2ZXINCj4gUmVnaXN0ZXJpbmcuLi4NCj4gWzEwNzM2LjE4NzQwM10gLS0t
LS0tLS0tLS0tWyBjdXQgaGVyZSBdLS0tLS0tLS0tLS0tDQo+IFsxMDczNi4xODc0MDVdIEtlcm5l
bCBCVUcgYXQgMDAwMDAwMDBmNzE2NDVmNyBbdmVyYm9zZSBkZWJ1ZyBpbmZvDQo+IHVuYXZhaWxh
YmxlXQ0KPiBbMTA3MzYuMTg3NDE3XSAtLS0tLS0tLS0tLS1bIGN1dCBoZXJlIF0tLS0tLS0tLS0t
LS0NCj4gWzEwNzM2LjE4NzQzMl0gcmVmY291bnRfdCBoaXQgemVybyBhdA0KPiBwbmZzX2xheW91
dF9yZW1vdmVfbHNlZysweDQ2LzB4OTAgW25mc3Y0XSBpbiBrd29ya2VyLzE6MFsxNjQwOV0sDQo+
IHVpZC9ldWlkOiAwLzANCj4gWzEwNzM2LjE4NzQzNl0gV0FSTklORzogQ1BVOiAxIFBJRDogMTY0
MDkgYXQga2VybmVsL3BhbmljLmM6NjU3DQo+IHJlZmNvdW50X2Vycm9yX3JlcG9ydCsweDk0LzB4
OWUNCj4gWzEwNzM2LjE4NzQzNl0gTW9kdWxlcyBsaW5rZWQgaW46IG5mc19sYXlvdXRfZmxleGZp
bGVzDQo+IHJwY3NlY19nc3Nfa3JiNSBhdXRoX3JwY2dzcyBuZnN2NCBkbnNfcmVzb2x2ZXIgbmZz
IGxvY2tkIGdyYWNlDQo+IGZzY2FjaGUgeHRfbmF0IHZldGggaXB0X01BU1FVRVJBREUgbmZfbmF0
X21hc3F1ZXJhZGVfaXB2NCB4dF9hZGRydHlwZQ0KPiBicl9uZXRmaWx0ZXIgb3ZlcmxheSB0dW4g
cmZjb21tIGZ1c2UgY2NtIG5mX2Nvbm50cmFja19uZXRiaW9zX25zDQo+IG5mX2Nvbm50cmFja19i
cm9hZGNhc3QgeHRfQ1QgaXA2dF9ycGZpbHRlciBpcDZ0X1JFSkVDVCBuZl9yZWplY3RfaXB2Ng0K
PiB4dF9jb25udHJhY2sgeHRfbXVsdGlwb3J0IGlwX3NldCBuZm5ldGxpbmsgZWJ0YWJsZV9uYXQg
ZWJ0YWJsZV9icm91dGUNCj4gYnJpZGdlIHN0cCBsbGMgaXA2dGFibGVfbmF0IG5mX2Nvbm50cmFj
a19pcHY2IG5mX2RlZnJhZ19pcHY2DQo+IG5mX25hdF9pcHY2IGlwNnRhYmxlX21hbmdsZSBpcDZ0
YWJsZV9yYXcgaXA2dGFibGVfc2VjdXJpdHkNCj4gaXB0YWJsZV9uYXQgbmZfY29ubnRyYWNrX2lw
djQgbmZfZGVmcmFnX2lwdjQgbmZfbmF0X2lwdjQgbmZfbmF0DQo+IG5mX2Nvbm50cmFjayBsaWJj
cmMzMmMgaXB0YWJsZV9tYW5nbGUgaXB0YWJsZV9yYXcgaXB0YWJsZV9zZWN1cml0eQ0KPiBlYnRh
YmxlX2ZpbHRlciBlYnRhYmxlcyBpcDZ0YWJsZV9maWx0ZXIgaXA2X3RhYmxlcyBjbWFjIGJpbmZt
dF9taXNjDQo+IGJuZXAgc3VucnBjIHZmYXQgZmF0IGFyYzQgc25kX3NvY19za2wgc25kX2hkYV9j
b2RlY19oZG1pDQo+IHNuZF9zb2Nfc2tsX2lwYyBzbmRfaGRhX2V4dF9jb3JlIHNuZF9zb2Nfc3N0
X2RzcA0KPiBbMTA3MzYuMTg3NDYzXSAgc25kX3NvY19zc3RfaXBjIHNuZF9zb2NfYWNwaSB1dmN2
aWRlbyBzbmRfc29jX2NvcmUNCj4gdmlkZW9idWYyX3ZtYWxsb2MgYnR1c2IgYnRydGwgaW50ZWxf
cmFwbCBidGJjbSBidGludGVsDQo+IHZpZGVvYnVmMl9tZW1vcHMgeDg2X3BrZ190ZW1wX3RoZXJt
YWwgaW50ZWxfcG93ZXJjbGFtcCB2aWRlb2J1ZjJfdjRsMg0KPiBibHVldG9vdGggc25kX2hkYV9j
b2RlY19yZWFsdGVrIHZpZGVvYnVmMl9jb3JlIGl3bG12bQ0KPiBzbmRfaGRhX2NvZGVjX2dlbmVy
aWMgY29yZXRlbXAgdmlkZW9kZXYga3ZtX2ludGVsIHNuZF9jb21wcmVzcw0KPiBzbmRfcGNtX2Rt
YWVuZ2luZSBhYzk3X2J1cyBtYWM4MDIxMSBtZWRpYSBzbmRfaGRhX2ludGVsIGt2bQ0KPiBzbmRf
aGRhX2NvZGVjIGlUQ09fd2R0IGlUQ09fdmVuZG9yX3N1cHBvcnQgaXdsd2lmaSBtZWlfd2R0IGRl
bGxfd21pDQo+IGVjZGhfZ2VuZXJpYyB3bWlfYm1vZiBkZWxsX3NtYmlvc193bWkgZGVsbF9sYXB0
b3Agc3BhcnNlX2tleW1hcA0KPiBzbmRfaGRhX2NvcmUgZGVsbF93bWlfZGVzY3JpcHRvciBwcGRl
diBkZWxsX3NtYmlvc19zbW0gZGVsbF9zbWJpb3MNCj4gZGNkYmFzIHNuZF9od2RlcCBpcnFieXBh
c3MgY3JjdDEwZGlmX3BjbG11bCBkZWxsX3NtbV9od21vbg0KPiBjcmMzMl9wY2xtdWwgc25kX3Nl
cSBjZmc4MDIxMSBzbmRfc2VxX2RldmljZSBnaGFzaF9jbG11bG5pX2ludGVsDQo+IHNuZF9wY20g
aW50ZWxfY3N0YXRlIGludGVsX3VuY29yZSBpMmNfaTgwMSBpbnRlbF9yYXBsX3BlcmYgc25kX3Rp
bWVyDQo+IGpveWRldiBydHN4X3BjaV9tcyBtZW1zdGljayBzbmQgbWVpX21lIHNvdW5kY29yZQ0K
PiBbMTA3MzYuMTg3NDkxXSAgbWVpIHByb2Nlc3Nvcl90aGVybWFsX2RldmljZSBzaHBjaHANCj4g
aW50ZWxfc29jX2R0c19pb3NmIGludGVsX3BjaF90aGVybWFsIHdtaSBwYXJwb3J0X3BjIHBhcnBv
cnQgZGVsbF9yYnRuDQo+IGludDM0MDBfdGhlcm1hbCBhY3BpX3RoZXJtYWxfcmVsIGFjcGlfcGFk
IGludDM0MDNfdGhlcm1hbCByZmtpbGwNCj4gaW50MzQweF90aGVybWFsX3pvbmUgaTkxNSBydHN4
X3BjaV9zZG1tYyBtbWNfY29yZSBpMmNfYWxnb19iaXQNCj4gZHJtX2ttc19oZWxwZXIgZHJtIGUx
MDAwZSBjcmMzMmNfaW50ZWwgc2VyaW9fcmF3IHJ0c3hfcGNpIHB0cA0KPiBwcHNfY29yZSB2aWRl
bw0KPiBbMTA3MzYuMTg3NTA0XSBDUFU6IDEgUElEOiAxNjQwOSBDb21tOiBrd29ya2VyLzE6MCBU
YWludGVkOg0KPiBHICAgICAgICBXICAgICAgICA0LjE1LjYtMzAwLmZjMjcueDg2XzY0ICMxDQo+
IFsxMDczNi4xODc1MDVdIEhhcmR3YXJlIG5hbWU6IERlbGwgSW5jLiBMYXRpdHVkZSBFNzQ3MC8w
VDZISEosIEJJT1MNCj4gMS42LjMgMDYvMTUvMjAxNg0KPiBbMTA3MzYuMTg3NTE4XSBXb3JrcXVl
dWU6IG5mc2lvZCBycGNfYXN5bmNfcmVsZWFzZSBbc3VucnBjXQ0KPiBbMTA3MzYuMTg3NTIwXSBS
SVA6IDAwMTA6cmVmY291bnRfZXJyb3JfcmVwb3J0KzB4OTQvMHg5ZQ0KPiBbMTA3MzYuMTg3NTIx
XSBSU1A6IDAwMTg6ZmZmZmE1YzZkMTk2ZmFjMCBFRkxBR1M6IDAwMDEwMjgyDQo+IFsxMDczNi4x
ODc1MjJdIFJBWDogMDAwMDAwMDAwMDAwMDAwMCBSQlg6IGZmZmZmZmZmOTMwODgyNzMgUkNYOg0K
PiAwMDAwMDAwMDAwMDAwMDA2DQo+IFsxMDczNi4xODc1MjNdIFJEWDogMDAwMDAwMDAwMDAwMDAw
NyBSU0k6IDAwMDAwMDAwMDAwMDAwODIgUkRJOg0KPiBmZmZmOWE3ODBkYzk2OGYwDQo+IFsxMDcz
Ni4xODc1MjNdIFJCUDogZmZmZmE1YzZkMTk2ZmMwOCBSMDg6IDAwMDAwMDAwMDAwMDA2NTEgUjA5
Og0KPiAwMDAwMDAwMDAwMDAwMDA0DQo+IFsxMDczNi4xODc1MjRdIFIxMDogZmZmZmZmZmY5MzIw
NmE4MCBSMTE6IDAwMDAwMDAwMDAwMDAwMDEgUjEyOg0KPiBmZmZmOWE3NzZmZTYzZjgwDQo+IFsx
MDczNi4xODc1MjVdIFIxMzogMDAwMDAwMDAwMDAwMDAwMCBSMTQ6IGZmZmZmZmZmOTMwNzk1YjUg
UjE1Og0KPiAwMDAwMDAwMDAwMDAwMDA0DQo+IFsxMDczNi4xODc1MjZdIEZTOiAgMDAwMDAwMDAw
MDAwMDAwMCgwMDAwKSBHUzpmZmZmOWE3ODBkYzgwMDAwKDAwMDApDQo+IGtubEdTOjAwMDAwMDAw
MDAwMDAwMDANCj4gWzEwNzM2LjE4NzUyN10gQ1M6ICAwMDEwIERTOiAwMDAwIEVTOiAwMDAwIENS
MDogMDAwMDAwMDA4MDA1MDAzMw0KPiBbMTA3MzYuMTg3NTI4XSBDUjI6IDAwMDAwMDAwMDAwMDAw
MGMgQ1IzOiAwMDAwMDAwMzYxMjBhMDA1IENSNDoNCj4gMDAwMDAwMDAwMDM2MDZlMA0KPiBbMTA3
MzYuMTg3NTI5XSBEUjA6IDAwMDAwMDAwMDAwMDAwMDAgRFIxOiAwMDAwMDAwMDAwMDAwMDAwIERS
MjoNCj4gMDAwMDAwMDAwMDAwMDAwMA0KPiBbMTA3MzYuMTg3NTI5XSBEUjM6IDAwMDAwMDAwMDAw
MDAwMDAgRFI2OiAwMDAwMDAwMGZmZmUwZmYwIERSNzoNCj4gMDAwMDAwMDAwMDAwMDQwMA0KPiBb
MTA3MzYuMTg3NTMwXSBDYWxsIFRyYWNlOg0KPiBbMTA3MzYuMTg3NTM0XSAgZXhfaGFuZGxlcl9y
ZWZjb3VudCsweDRlLzB4ODANCj4gWzEwNzM2LjE4NzUzNl0gIGZpeHVwX2V4Y2VwdGlvbisweDMz
LzB4NDANCj4gWzEwNzM2LjE4NzUzOF0gIGRvX3RyYXArMHg4My8weDE0MA0KPiBbMTA3MzYuMTg3
NTQwXSAgZG9fZXJyb3JfdHJhcCsweDlkLzB4MTIwDQo+IFsxMDczNi4xODc1NTBdICA/IG5mczQy
X3Byb2NfY2xvbmUrMHgyYzcvMHgzMDkgW25mc3Y0XQ0KPiBbMTA3MzYuMTg3NTUyXSAgPyBfX3Vw
ZGF0ZV9sb2FkX2F2Z19zZS5pc3JhLjMwKzB4MWI2LzB4MWMwDQo+IFsxMDczNi4xODc1NTNdICA/
IF9fdXBkYXRlX2xvYWRfYXZnX3NlLmlzcmEuMzArMHgxYjYvMHgxYzANCj4gWzEwNzM2LjE4NzU1
NV0gID8gdXBkYXRlX2xvYWRfYXZnKzB4NTU4LzB4NmIwDQo+IFsxMDczNi4xODc1NTddICBpbnZh
bGlkX29wKzB4NTEvMHg3MA0KPiBbMTA3MzYuMTg3NTY2XSBSSVA6IDAwMTA6cG5mc19sYXlvdXRf
cmVtb3ZlX2xzZWcrMHg0Ni8weDkwIFtuZnN2NF0NCj4gWzEwNzM2LjE4NzU2Nl0gUlNQOiAwMDE4
OmZmZmZhNWM2ZDE5NmZjYjggRUZMQUdTOiAwMDAxMDI0Ng0KPiBbMTA3MzYuMTg3NTY3XSBSQVg6
IGZmZmY5YTc2NWNjMDdmMjggUkJYOiBmZmZmOWE3NTdmZTVmMDAwIFJDWDoNCj4gZmZmZjlhNzY1
Y2MwN2YwMA0KPiBbMTA3MzYuMTg3NTY4XSBSRFg6IGZmZmY5YTc2NWNjMDdmMjggUlNJOiBmZmZm
OWE3NTdmZTVmMDAwIFJESToNCj4gZmZmZjlhNzU3ZmU1ZjAwMA0KPiBbMTA3MzYuMTg3NTY5XSBS
QlA6IGZmZmY5YTc2NWNjMDdmMDAgUjA4OiAwMDAwMDAwMDAwMDAwMDA0IFIwOToNCj4gMGZiMTlk
NWEwMTAwMDAwMA0KPiBbMTA3MzYuMTg3NTcwXSBSMTA6IGZmZmZhNWM2ZDE5NmZkYTggUjExOiBm
ZmZmZmZmZmMxMGUzNDcwIFIxMjoNCj4gZmZmZjlhNzY1Y2MwN2YwMQ0KPiBbMTA3MzYuMTg3NTcw
XSBSMTM6IGZmZmY5YTc1NmNjMmQ4YzAgUjE0OiAwMDAwMDAwMDAwMDAwMDAwIFIxNToNCj4gZmZm
ZjlhNzY1Y2MwN2YwMA0KPiBbMTA3MzYuMTg3NTc5XSAgPyBwbmZzX2xheW91dF9yZW1vdmVfbHNl
ZysweDFkLzB4OTAgW25mc3Y0XQ0KPiBbMTA3MzYuMTg3NTg1XSAgcG5mc19sc2VnX2RlY19hbmRf
cmVtb3ZlX3plcm8rMHgzNy8weDcwIFtuZnN2NF0NCj4gWzEwNzM2LjE4NzU5M10gIG1hcmtfbHNl
Z19pbnZhbGlkKzB4MjkvMHg1MCBbbmZzdjRdDQo+IFsxMDczNi4xODc2MDBdICBwbmZzX3JvYysw
eDFhZC8weDMxMCBbbmZzdjRdDQo+IFsxMDczNi4xODc2MDZdICA/IG5mczRfZG9fY2xvc2UrMHgx
YzkvMHgyZTAgW25mc3Y0XQ0KPiBbMTA3MzYuMTg3NjExXSAgbmZzNF9kb19jbG9zZSsweDFjOS8w
eDJlMCBbbmZzdjRdDQo+IFsxMDczNi4xODc2MjBdICBfX3B1dF9uZnNfb3Blbl9jb250ZXh0KzB4
N2MvMHgxMDAgW25mc10NCj4gWzEwNzM2LjE4NzYyN10gIG5mc19jb21taXRkYXRhX3JlbGVhc2Ur
MHgxNS8weDMwIFtuZnNdDQo+IFsxMDczNi4xODc2MzRdICBycGNfZnJlZV90YXNrKzB4MmQvMHg3
MCBbc3VucnBjXQ0KPiBbMTA3MzYuMTg3NjM3XSAgcHJvY2Vzc19vbmVfd29yaysweDE3NS8weDM5
MA0KPiBbMTA3MzYuMTg3NjQwXSAgd29ya2VyX3RocmVhZCsweDJlLzB4MzgwDQo+IFsxMDczNi4x
ODc2NDFdICA/IHByb2Nlc3Nfb25lX3dvcmsrMHgzOTAvMHgzOTANCj4gWzEwNzM2LjE4NzY0NF0g
IGt0aHJlYWQrMHgxMTMvMHgxMzANCj4gWzEwNzM2LjE4NzY0NV0gID8ga3RocmVhZF9jcmVhdGVf
d29ya2VyX29uX2NwdSsweDcwLzB4NzANCj4gWzEwNzM2LjE4NzY0N10gID8ga3RocmVhZF9jcmVh
dGVfd29ya2VyX29uX2NwdSsweDcwLzB4NzANCj4gWzEwNzM2LjE4NzY0OF0gIHJldF9mcm9tX2Zv
cmsrMHgzNS8weDQwDQo+IFsxMDczNi4xODc2NDldIENvZGU6IDQ4IDhiIDk1IDgwIDAwIDAwIDAw
IDQxIDU1IDQ5IDhkIDhjIDI0IDQ4IDA3IDAwDQo+IDAwIDQ1IDhiIDg0IDI0IDY4IDA1IDAwIDAw
IDQxIDg5IGMxIDQ4IDg5IGRlIDQ4IGM3IGM3IGMwIGM3IDA4IDkzIGU4DQo+IDRjIGZhIGZmIGZm
IDwwZj4gMGIgNTggNWIgNWQgNDEgNWMgNDEgNWQgYzMgMGYgMWYgNDQgMDAgMDAgNTUgNDggODkN
Cj4gZTUgNDEgNTYgDQo+IFsxMDczNi4xODc2NzNdIC0tLVsgZW5kIHRyYWNlIGFlODY1MzMwZjhi
ZmQ0ZjAgXS0tLQ0KPiBbMTA3MzYuMTg3Njk5XSAtLS0tLS0tLS0tLS1bIGN1dCBoZXJlIF0tLS0t
LS0tLS0tLS0NCj4gWzEwNzM2LjE4NzcwMF0gS2VybmVsIEJVRyBhdCAwMDAwMDAwMGMxMWVkNGIx
IFt2ZXJib3NlIGRlYnVnIGluZm8NCj4gdW5hdmFpbGFibGVdDQo+IA0KPiANCj4gSWYgSSBkb3du
Z3JhZGUgbXkga2VybmVsIHRvIGFueSBvZiA0LjE0IC0gb3BzIGlzIGdvbmUuIFRoZSA0LjE2LXJj
NA0KPiBwYW5pY3MgYXMgd2VsbC4NCj4gTGV0IG1lIGtub3cgaWYgeW91IG5lZWQgbW9yZSBpbmZv
IG9yIHRlc3RpbmcuDQoNCkRvZXMgdGhlIGZvbGxvd2luZyBwYXRjaCBmaXggaXQ/DQo4PC0tLS0t
LS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tDQpGcm9t
IGE1YzgxZjUwNDBhOWU5ODZlYWZiNzI4NzE5ZGZhYjJkNTg4ZmUyZDAgTW9uIFNlcCAxNyAwMDow
MDowMCAyMDAxDQpGcm9tOiBUcm9uZCBNeWtsZWJ1c3QgPHRyb25kLm15a2xlYnVzdEBwcmltYXJ5
ZGF0YS5jb20+DQpEYXRlOiBXZWQsIDcgTWFyIDIwMTggMTQ6NDk6MDYgLTA1MDANClN1YmplY3Q6
IFtQQVRDSF0gcE5GUzogUHJldmVudCB0aGUgbGF5b3V0IGhlYWRlciByZWZjb3VudCBnb2luZyB0
byB6ZXJvIGluDQogcG5mc19yb2MoKQ0KDQpFbnN1cmUgdGhhdCB3ZSBob2xkIGEgcmVmZXJlbmNl
IHRvIHRoZSBsYXlvdXQgaGVhZGVyIHdoZW4gcHJvY2Vzc2luZw0KdGhlIHBORlMgcmV0dXJuLW9u
LWNsb3NlIHNvIHRoYXQgdGhlIHJlZmNvdW50IHZhbHVlIGRvZXMgbm90IGluYWR2ZXJ0ZW50bHkN
CmdvIHRvIHplcm8uDQoNClJlcG9ydGVkLWJ5OiBUaWdyYW4gTWtydGNoeWFuIDx0aWdyYW4ubWty
dGNoeWFuQGRlc3kuZGU+DQpTaWduZWQtb2ZmLWJ5OiBUcm9uZCBNeWtsZWJ1c3QgPHRyb25kLm15
a2xlYnVzdEBwcmltYXJ5ZGF0YS5jb20+DQpDYzogc3RhYmxlQHZnZXIua2VybmVsLm9yZyAjIHY0
LjEwKw0KLS0tDQogZnMvbmZzL3BuZnMuYyB8IDEzICsrKysrKysrKystLS0NCiAxIGZpbGUgY2hh
bmdlZCwgMTAgaW5zZXJ0aW9ucygrKSwgMyBkZWxldGlvbnMoLSkNCg0KZGlmZiAtLWdpdCBhL2Zz
L25mcy9wbmZzLmMgYi9mcy9uZnMvcG5mcy5jDQppbmRleCBjMTNlODI2NjE0YjUuLmVlNzIzYWEx
NTNhMyAxMDA2NDQNCi0tLSBhL2ZzL25mcy9wbmZzLmMNCisrKyBiL2ZzL25mcy9wbmZzLmMNCkBA
IC0yOTIsOCArMjkyLDExIEBAIHBuZnNfZGV0YWNoX2xheW91dF9oZHIoc3RydWN0IHBuZnNfbGF5
b3V0X2hkciAqbG8pDQogdm9pZA0KIHBuZnNfcHV0X2xheW91dF9oZHIoc3RydWN0IHBuZnNfbGF5
b3V0X2hkciAqbG8pDQogew0KLQlzdHJ1Y3QgaW5vZGUgKmlub2RlID0gbG8tPnBsaF9pbm9kZTsN
CisJc3RydWN0IGlub2RlICppbm9kZTsNCiANCisJaWYgKCFsbykNCisJCXJldHVybjsNCisJaW5v
ZGUgPSBsby0+cGxoX2lub2RlOw0KIAlwbmZzX2xheW91dHJldHVybl9iZWZvcmVfcHV0X2xheW91
dF9oZHIobG8pOw0KIA0KIAlpZiAocmVmY291bnRfZGVjX2FuZF9sb2NrKCZsby0+cGxoX3JlZmNv
dW50LCAmaW5vZGUtPmlfbG9jaykpIHsNCkBAIC0xMjQxLDEwICsxMjQ0LDEyIEBAIGJvb2wgcG5m
c19yb2Moc3RydWN0IGlub2RlICppbm8sDQogCXNwaW5fbG9jaygmaW5vLT5pX2xvY2spOw0KIAls
byA9IG5mc2ktPmxheW91dDsNCiAJaWYgKCFsbyB8fCAhcG5mc19sYXlvdXRfaXNfdmFsaWQobG8p
IHx8DQotCSAgICB0ZXN0X2JpdChORlNfTEFZT1VUX0JVTEtfUkVDQUxMLCAmbG8tPnBsaF9mbGFn
cykpDQorCSAgICB0ZXN0X2JpdChORlNfTEFZT1VUX0JVTEtfUkVDQUxMLCAmbG8tPnBsaF9mbGFn
cykpIHsNCisJCWxvID0gTlVMTDsNCiAJCWdvdG8gb3V0X25vcm9jOw0KKwl9DQorCXBuZnNfZ2V0
X2xheW91dF9oZHIobG8pOw0KIAlpZiAodGVzdF9iaXQoTkZTX0xBWU9VVF9SRVRVUk5fTE9DSywg
JmxvLT5wbGhfZmxhZ3MpKSB7DQotCQlwbmZzX2dldF9sYXlvdXRfaGRyKGxvKTsNCiAJCXNwaW5f
dW5sb2NrKCZpbm8tPmlfbG9jayk7DQogCQl3YWl0X29uX2JpdCgmbG8tPnBsaF9mbGFncywgTkZT
X0xBWU9VVF9SRVRVUk4sDQogCQkJCVRBU0tfVU5JTlRFUlJVUFRJQkxFKTsNCkBAIC0xMzEyLDEw
ICsxMzE3LDEyIEBAIGJvb2wgcG5mc19yb2Moc3RydWN0IGlub2RlICppbm8sDQogCQlzdHJ1Y3Qg
cG5mc19sYXlvdXRkcml2ZXJfdHlwZSAqbGQgPSBORlNfU0VSVkVSKGlubyktPnBuZnNfY3Vycl9s
ZDsNCiAJCWlmIChsZC0+cHJlcGFyZV9sYXlvdXRyZXR1cm4pDQogCQkJbGQtPnByZXBhcmVfbGF5
b3V0cmV0dXJuKGFyZ3MpOw0KKwkJcG5mc19wdXRfbGF5b3V0X2hkcihsbyk7DQogCQlyZXR1cm4g
dHJ1ZTsNCiAJfQ0KIAlpZiAobGF5b3V0cmV0dXJuKQ0KIAkJcG5mc19zZW5kX2xheW91dHJldHVy
bihsbywgJnN0YXRlaWQsIGlvbW9kZSwgdHJ1ZSk7DQorCXBuZnNfcHV0X2xheW91dF9oZHIobG8p
Ow0KIAlyZXR1cm4gZmFsc2U7DQogfQ0KIA0KLS0gDQoyLjE0LjMNCg0KLS0gDQpUcm9uZCBNeWts
ZWJ1c3QNCkxpbnV4IE5GUyBjbGllbnQgbWFpbnRhaW5lciwgUHJpbWFyeURhdGENCnRyb25kLm15
a2xlYnVzdEBwcmltYXJ5ZGF0YS5jb20NCg==


2018-03-07 20:56:18

by Mkrtchyan, Tigran

[permalink] [raw]
Subject: Re: Kernel ops with flexfiles

Hi Trond,

----- Original Message -----
> From: "Trond Myklebust" <[email protected]>
> To: "Tigran Mkrtchyan" <[email protected]>, "linux-nfs" <linux-nfs=
@vger.kernel.org>
> Sent: Wednesday, March 7, 2018 9:09:24 PM
> Subject: Re: Kernel ops with flexfiles

> Hi Tigran,
>=20
> On Mon, 2018-03-05 at 23:26 +0100, Mkrtchyan, Tigran wrote:
>> Hi Trond et al.
>>=20
>> looks like I can always reproduce the following ops with a simple cp:
>>=20
>> [10722.729463] nfs4flexfilelayout_init: NFSv4 Flexfile Layout Driver
>> Registering...
>> [10736.187403] ------------[ cut here ]------------
>> [10736.187405] Kernel BUG at 00000000f71645f7 [verbose debug info
>> unavailable]
>> [10736.187417] ------------[ cut here ]------------
>> [10736.187432] refcount_t hit zero at
>> pnfs_layout_remove_lseg+0x46/0x90 [nfsv4] in kworker/1:0[16409],
>> uid/euid: 0/0
>> [10736.187436] WARNING: CPU: 1 PID: 16409 at kernel/panic.c:657
>> refcount_error_report+0x94/0x9e
>> [10736.187436] Modules linked in: nfs_layout_flexfiles
>> rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace
>> fscache xt_nat veth ipt_MASQUERADE nf_nat_masquerade_ipv4 xt_addrtype
>> br_netfilter overlay tun rfcomm fuse ccm nf_conntrack_netbios_ns
>> nf_conntrack_broadcast xt_CT ip6t_rpfilter ip6t_REJECT nf_reject_ipv6
>> xt_conntrack xt_multiport ip_set nfnetlink ebtable_nat ebtable_broute
>> bridge stp llc ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6
>> nf_nat_ipv6 ip6table_mangle ip6table_raw ip6table_security
>> iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat
>> nf_conntrack libcrc32c iptable_mangle iptable_raw iptable_security
>> ebtable_filter ebtables ip6table_filter ip6_tables cmac binfmt_misc
>> bnep sunrpc vfat fat arc4 snd_soc_skl snd_hda_codec_hdmi
>> snd_soc_skl_ipc snd_hda_ext_core snd_soc_sst_dsp
>> [10736.187463] snd_soc_sst_ipc snd_soc_acpi uvcvideo snd_soc_core
>> videobuf2_vmalloc btusb btrtl intel_rapl btbcm btintel
>> videobuf2_memops x86_pkg_temp_thermal intel_powerclamp videobuf2_v4l2
>> bluetooth snd_hda_codec_realtek videobuf2_core iwlmvm
>> snd_hda_codec_generic coretemp videodev kvm_intel snd_compress
>> snd_pcm_dmaengine ac97_bus mac80211 media snd_hda_intel kvm
>> snd_hda_codec iTCO_wdt iTCO_vendor_support iwlwifi mei_wdt dell_wmi
>> ecdh_generic wmi_bmof dell_smbios_wmi dell_laptop sparse_keymap
>> snd_hda_core dell_wmi_descriptor ppdev dell_smbios_smm dell_smbios
>> dcdbas snd_hwdep irqbypass crct10dif_pclmul dell_smm_hwmon
>> crc32_pclmul snd_seq cfg80211 snd_seq_device ghash_clmulni_intel
>> snd_pcm intel_cstate intel_uncore i2c_i801 intel_rapl_perf snd_timer
>> joydev rtsx_pci_ms memstick snd mei_me soundcore
>> [10736.187491] mei processor_thermal_device shpchp
>> intel_soc_dts_iosf intel_pch_thermal wmi parport_pc parport dell_rbtn
>> int3400_thermal acpi_thermal_rel acpi_pad int3403_thermal rfkill
>> int340x_thermal_zone i915 rtsx_pci_sdmmc mmc_core i2c_algo_bit
>> drm_kms_helper drm e1000e crc32c_intel serio_raw rtsx_pci ptp
>> pps_core video
>> [10736.187504] CPU: 1 PID: 16409 Comm: kworker/1:0 Tainted:
>> G W 4.15.6-300.fc27.x86_64 #1
>> [10736.187505] Hardware name: Dell Inc. Latitude E7470/0T6HHJ, BIOS
>> 1.6.3 06/15/2016
>> [10736.187518] Workqueue: nfsiod rpc_async_release [sunrpc]
>> [10736.187520] RIP: 0010:refcount_error_report+0x94/0x9e
>> [10736.187521] RSP: 0018:ffffa5c6d196fac0 EFLAGS: 00010282
>> [10736.187522] RAX: 0000000000000000 RBX: ffffffff93088273 RCX:
>> 0000000000000006
>> [10736.187523] RDX: 0000000000000007 RSI: 0000000000000082 RDI:
>> ffff9a780dc968f0
>> [10736.187523] RBP: ffffa5c6d196fc08 R08: 0000000000000651 R09:
>> 0000000000000004
>> [10736.187524] R10: ffffffff93206a80 R11: 0000000000000001 R12:
>> ffff9a776fe63f80
>> [10736.187525] R13: 0000000000000000 R14: ffffffff930795b5 R15:
>> 0000000000000004
>> [10736.187526] FS: 0000000000000000(0000) GS:ffff9a780dc80000(0000)
>> knlGS:0000000000000000
>> [10736.187527] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [10736.187528] CR2: 000000000000000c CR3: 000000036120a005 CR4:
>> 00000000003606e0
>> [10736.187529] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
>> 0000000000000000
>> [10736.187529] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
>> 0000000000000400
>> [10736.187530] Call Trace:
>> [10736.187534] ex_handler_refcount+0x4e/0x80
>> [10736.187536] fixup_exception+0x33/0x40
>> [10736.187538] do_trap+0x83/0x140
>> [10736.187540] do_error_trap+0x9d/0x120
>> [10736.187550] ? nfs42_proc_clone+0x2c7/0x309 [nfsv4]
>> [10736.187552] ? __update_load_avg_se.isra.30+0x1b6/0x1c0
>> [10736.187553] ? __update_load_avg_se.isra.30+0x1b6/0x1c0
>> [10736.187555] ? update_load_avg+0x558/0x6b0
>> [10736.187557] invalid_op+0x51/0x70
>> [10736.187566] RIP: 0010:pnfs_layout_remove_lseg+0x46/0x90 [nfsv4]
>> [10736.187566] RSP: 0018:ffffa5c6d196fcb8 EFLAGS: 00010246
>> [10736.187567] RAX: ffff9a765cc07f28 RBX: ffff9a757fe5f000 RCX:
>> ffff9a765cc07f00
>> [10736.187568] RDX: ffff9a765cc07f28 RSI: ffff9a757fe5f000 RDI:
>> ffff9a757fe5f000
>> [10736.187569] RBP: ffff9a765cc07f00 R08: 0000000000000004 R09:
>> 0fb19d5a01000000
>> [10736.187570] R10: ffffa5c6d196fda8 R11: ffffffffc10e3470 R12:
>> ffff9a765cc07f01
>> [10736.187570] R13: ffff9a756cc2d8c0 R14: 0000000000000000 R15:
>> ffff9a765cc07f00
>> [10736.187579] ? pnfs_layout_remove_lseg+0x1d/0x90 [nfsv4]
>> [10736.187585] pnfs_lseg_dec_and_remove_zero+0x37/0x70 [nfsv4]
>> [10736.187593] mark_lseg_invalid+0x29/0x50 [nfsv4]
>> [10736.187600] pnfs_roc+0x1ad/0x310 [nfsv4]
>> [10736.187606] ? nfs4_do_close+0x1c9/0x2e0 [nfsv4]
>> [10736.187611] nfs4_do_close+0x1c9/0x2e0 [nfsv4]
>> [10736.187620] __put_nfs_open_context+0x7c/0x100 [nfs]
>> [10736.187627] nfs_commitdata_release+0x15/0x30 [nfs]
>> [10736.187634] rpc_free_task+0x2d/0x70 [sunrpc]
>> [10736.187637] process_one_work+0x175/0x390
>> [10736.187640] worker_thread+0x2e/0x380
>> [10736.187641] ? process_one_work+0x390/0x390
>> [10736.187644] kthread+0x113/0x130
>> [10736.187645] ? kthread_create_worker_on_cpu+0x70/0x70
>> [10736.187647] ? kthread_create_worker_on_cpu+0x70/0x70
>> [10736.187648] ret_from_fork+0x35/0x40
>> [10736.187649] Code: 48 8b 95 80 00 00 00 41 55 49 8d 8c 24 48 07 00
>> 00 45 8b 84 24 68 05 00 00 41 89 c1 48 89 de 48 c7 c7 c0 c7 08 93 e8
>> 4c fa ff ff <0f> 0b 58 5b 5d 41 5c 41 5d c3 0f 1f 44 00 00 55 48 89
>> e5 41 56
>> [10736.187673] ---[ end trace ae865330f8bfd4f0 ]---
>> [10736.187699] ------------[ cut here ]------------
>> [10736.187700] Kernel BUG at 00000000c11ed4b1 [verbose debug info
>> unavailable]
>>=20
>>=20
>> If I downgrade my kernel to any of 4.14 - ops is gone. The 4.16-rc4
>> panics as well.
>> Let me know if you need more info or testing.
>=20
> Does the following patch fix it?


Looks good.

Tested-by: ...

Tigran.


> 8<--------------------------------------------------------
> From a5c81f5040a9e986eafb728719dfab2d588fe2d0 Mon Sep 17 00:00:00 2001
> From: Trond Myklebust <[email protected]>
> Date: Wed, 7 Mar 2018 14:49:06 -0500
> Subject: [PATCH] pNFS: Prevent the layout header refcount going to zero i=
n
> pnfs_roc()
>=20
> Ensure that we hold a reference to the layout header when processing
> the pNFS return-on-close so that the refcount value does not inadvertentl=
y
> go to zero.
>=20
> Reported-by: Tigran Mkrtchyan <[email protected]>
> Signed-off-by: Trond Myklebust <[email protected]>
> Cc: [email protected] # v4.10+
> ---
> fs/nfs/pnfs.c | 13 ++++++++++---
> 1 file changed, 10 insertions(+), 3 deletions(-)
>=20
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index c13e826614b5..ee723aa153a3 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -292,8 +292,11 @@ pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo)
> void
> pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
> {
> -=09struct inode *inode =3D lo->plh_inode;
> +=09struct inode *inode;
>=20
> +=09if (!lo)
> +=09=09return;
> +=09inode =3D lo->plh_inode;
> =09pnfs_layoutreturn_before_put_layout_hdr(lo);
>=20
> =09if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
> @@ -1241,10 +1244,12 @@ bool pnfs_roc(struct inode *ino,
> =09spin_lock(&ino->i_lock);
> =09lo =3D nfsi->layout;
> =09if (!lo || !pnfs_layout_is_valid(lo) ||
> -=09 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
> +=09 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
> +=09=09lo =3D NULL;
> =09=09goto out_noroc;
> +=09}
> +=09pnfs_get_layout_hdr(lo);
> =09if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
> -=09=09pnfs_get_layout_hdr(lo);
> =09=09spin_unlock(&ino->i_lock);
> =09=09wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
> =09=09=09=09TASK_UNINTERRUPTIBLE);
> @@ -1312,10 +1317,12 @@ bool pnfs_roc(struct inode *ino,
> =09=09struct pnfs_layoutdriver_type *ld =3D NFS_SERVER(ino)->pnfs_curr_ld=
;
> =09=09if (ld->prepare_layoutreturn)
> =09=09=09ld->prepare_layoutreturn(args);
> +=09=09pnfs_put_layout_hdr(lo);
> =09=09return true;
> =09}
> =09if (layoutreturn)
> =09=09pnfs_send_layoutreturn(lo, &stateid, iomode, true);
> +=09pnfs_put_layout_hdr(lo);
> =09return false;
> }
>=20
> --
> 2.14.3
>=20
> --
> Trond Myklebust
> Linux NFS client maintainer, PrimaryData
> [email protected]
> N=EF=BF=BD=EF=BF=BD=EF=BF=BD=EF=BF=BD=EF=BF=BDr=EF=BF=BD=EF=BF=BDy=EF=BF=
=BD=EF=BF=BD=EF=BF=BDb=EF=BF=BDX=EF=BF=BD=EF=BF=BD=C7=A7v=EF=BF=BD^=EF=BF=
=BD)=DE=BA{.n=EF=BF=BD+=EF=BF=BD=EF=BF=BD=EF=BF=BD=EF=BF=BD{=EF=BF=BD=EF=BF=
=BD=EF=BF=BD"=EF=BF=BD=EF=BF=BD^n=EF=BF=BDr=EF=BF=BD=EF=BF=BD=EF=BF=BDz=EF=
=BF=BD=EF=BF=BD=EF=BF=BDh=EF=BF=BD=EF=BF=BD=EF=BF=BD=EF=BF=BD&=EF=BF=BD=EF=
=BF=BD=EF=BF=BDG=EF=BF=BD=EF=BF=BD=EF=BF=BDh=EF=BF=BD(=EF=BF=BD=E9=9A=8E=EF=
=BF=BD=DD=A2j"=EF=BF=BD=EF=BF=BD=EF=BF=BDm=EF=BF=BD=EF=BF=BD=EF=BF=BD=EF=BF=
=BD=EF=BF=BDz=EF=BF=BD=DE=96=EF=BF=BD=EF=BF=BD=EF=BF=BDf=EF=BF=BD=EF=BF=BD=
=EF=BF=BDh=EF=BF=BD=EF=BF=BD=EF=BF=BD~=EF=BF=BDm=EF=BF=BD