Hello,
I have just hit a problem that a process writing a large file (bigger than
available) in nfs mountpoint hangs forever (even after it's full).
It might loop in read()/write() when checked by `ltrace`.
Please does anyone hit this problem? And do we need to fix it?
Thanks,
Yongcheng
Steps:
~~~~~~~~~~~~
- NFS server
1. Prepare a partition with arbitrary size (e.g., 4G).
2. Format the partition as xfs or ext4.
3. Mount the partition and then exports the directory via nfs.
- NFS client
1. Mount the NFS server's directory.
2. Execute dd command to the directory's file which should hit the
size limit and it should return "No space left on device".
# dd if=/dev/zero of=/mnt/nfs/file bs=100K count=4G
For examples (I just test in single host):
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[root ~]# truncate --size 4G /tmp/test.img
[root ~]# losetup -f
/dev/loop0
[root ~]# losetup /dev/loop0 /tmp/test.img
[root ~]# mkdir /export_test /mnt/mnt_test
[root ~]# mkfs.ext4 /dev/loop0
mke2fs 1.44.6 (5-Mar-2019)
Discarding device blocks: done
Creating filesystem with 1048576 4k blocks and 262144 inodes
Filesystem UUID: c3bd7b89-b134-4930-82f3-7489baa76849
Superblock backups stored on blocks:
32768, 98304, 163840, 229376, 294912, 819200, 884736
Allocating group tables: done
Writing inode tables: done
Creating journal (16384 blocks): done
Writing superblocks and filesystem accounting information: done
[root ~]# echo $?
0
[root ~]# mount -t ext4 /dev/loop0 /export_test/
[root ~]# systemctl start nfs-server
[root ~]# exportfs -o rw,no_root_squash *:/export_test/
[root ~]# mount -t nfs $HOSTNAME:/export_test /mnt/mnt_test
[root ~]# dd if=/dev/random of=/mnt/mnt_test/testfile bs=100K count=4G
...
# hang
...
[root ~]# umount /mnt/mnt_test/
[root ~]#
[root ~]# systemctl stop nfs-server
[root ~]#
[root ~]# umount /export_test/
[root ~]# mount | grep loop
[root ~]# wipefs -a /dev/loop0
/dev/loop0: 2 bytes were erased at offset 0x00000438 (ext4): 53 ef
[root ~]#
[root ~]#
[root ~]# mkfs.xfs /dev/loop0
meta-data=/dev/loop0 isize=512 agcount=4, agsize=262144 blks
= sectsz=512 attr=2, projid32bit=1
= crc=1 finobt=1, sparse=1, rmapbt=0
= reflink=1
data = bsize=4096 blocks=1048576, imaxpct=25
= sunit=0 swidth=0 blks
naming =version 2 bsize=4096 ascii-ci=0, ftype=1
log =internal log bsize=4096 blocks=2560, version=2
= sectsz=512 sunit=0 blks, lazy-count=1
realtime =none extsz=4096 blocks=0, rtextents=0
[root ~]#
[root ~]# mount -t xfs /dev/loop0 /export_test/
[root ~]# systemctl start nfs-server
[root ~]# exportfs -o rw,no_root_squash *:/export_test/
[root ~]# mount -t nfs $HOSTNAME:/export_test /mnt/mnt_test
[root ~]# dd if=/dev/zero of=/mnt/mnt_test/testfile count=4G
...
# hang
...
#
# debug from another terminal
#
[root temp]# nfsstat -m
/mnt/mnt_test from nfsserver.redhat.com:/export_test
Flags: rw,relatime,vers=4.2,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=10.73.4.183,local_lock=none,addr=10.73.4.183
[root temp]# df -h /mnt/mnt_test/
Filesystem Size Used Avail Use% Mounted on
nfsserver.redhat.com:/export_test 4.0G 4.0G 1.0M 100% /mnt/mnt_test
[root temp]# ps aux | grep -v grep | grep -w dd
root 5569 97.5 0.0 4792 1700 pts/0 R+ 03:10 132:13 dd if=/dev/zero of=/mnt/mnt_test/testfile count=4G
[root temp]# ltrace -p 5569
memcpy(0x56179886d000, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 0x56179886d000
write(1, "", 512) = 512
read(0, "", 512) = 512
memcpy(0x56179886d000, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 0x56179886d000
write(1, "", 512) = 512
read(0, "", 512) = 512
memcpy(0x56179886d000, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 0x56179886d000
write(1, "", 512) = 512
read(0, "", 512) = 512
...
...
...
...