2021-05-12 10:38:30

by Richard W.M. Jones

[permalink] [raw]
Subject: [PATCH v2] fuse: Allow fallocate(FALLOC_FL_ZERO_RANGE)

Version 2 restores the #comments in the script in the git commit
message. The patch itself is identical.

Rich.



2021-05-12 10:41:25

by Richard W.M. Jones

[permalink] [raw]
Subject: [PATCH v2] fuse: Allow fallocate(FALLOC_FL_ZERO_RANGE)

libnbd's nbdfuse utility would like to translate fallocate zero
requests into NBD_CMD_WRITE_ZEROES. Currently the fuse module filters
these out, returning -EOPNOTSUPP. This commit treats these almost the
same way as FALLOC_FL_PUNCH_HOLE except not calling
truncate_pagecache_range.

A way to test this is with the following script:

--------------------
#!/bin/bash
# Requires fuse >= 3, nbdkit >= 1.8, and latest nbdfuse from
# https://gitlab.com/nbdkit/libnbd/-/tree/master/fuse
set -e
set -x

export output=$PWD/output
rm -f test.img $output

# Create an nbdkit instance that prints the NBD requests seen.
nbdkit sh - <<'EOF'
case "$1" in
get_size) echo 1M ;;
can_write|can_trim|can_zero|can_fast_zero) ;;
pread) echo "$@" >>$output; dd if=/dev/zero count=$3 iflag=count_bytes ;;
pwrite) echo "$@" >>$output; cat >/dev/null ;;
trim|zero) echo "$@" >>$output ;;
*) exit 2 ;;
esac
EOF

# Fuse-mount NBD instance as a file.
touch test.img
nbdfuse test.img nbd://localhost & sleep 2
ls -lh test.img

# Run a read, write, trim and zero request.
dd if=test.img of=/dev/null bs=512 skip=1024 count=1
dd if=/dev/zero of=test.img bs=512 skip=2048 count=1
fallocate -p -l 512 -o 4096 test.img
fallocate -z -l 512 -o 8192 test.img

# Print the output from the NBD server.
cat $output

# Clean up.
fusermount3 -u test.img
killall nbdkit
rm test.img $output
--------------------

which will print:

pread 4096 524288 # number depends on readahead
pwrite 512 0
trim 512 4096
zero 512 8192 may_trim

The last line indicates that the FALLOC_FL_ZERO_RANGE request was
successfully passed through by the kernel module to nbdfuse,
translated to NBD_CMD_WRITE_ZEROES and sent through to the server.

Signed-off-by: Richard W.M. Jones <[email protected]>
---
fs/fuse/file.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 09ef2a4d25ed..22e8e88c78d4 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2907,11 +2907,13 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
};
int err;
bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
- (mode & FALLOC_FL_PUNCH_HOLE);
+ (mode & FALLOC_FL_PUNCH_HOLE) ||
+ (mode & FALLOC_FL_ZERO_RANGE);

bool block_faults = FUSE_IS_DAX(inode) && lock_inode;

- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_ZERO_RANGE))
return -EOPNOTSUPP;

if (fm->fc->no_fallocate)
@@ -2926,7 +2928,8 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
goto out;
}

- if (mode & FALLOC_FL_PUNCH_HOLE) {
+ if ((mode & FALLOC_FL_PUNCH_HOLE) ||
+ (mode & FALLOC_FL_ZERO_RANGE)) {
loff_t endbyte = offset + length - 1;

err = fuse_writeback_range(inode, offset, endbyte);
--
2.31.1

2021-05-12 14:29:15

by Shachar Sharon

[permalink] [raw]
Subject: Re: [PATCH v2] fuse: Allow fallocate(FALLOC_FL_ZERO_RANGE)

On Wed, May 12, 2021 at 11:37:04AM +0100, Richard W.M. Jones wrote:
>libnbd's nbdfuse utility would like to translate fallocate zero
>requests into NBD_CMD_WRITE_ZEROES. Currently the fuse module filters
>these out, returning -EOPNOTSUPP. This commit treats these almost the
>same way as FALLOC_FL_PUNCH_HOLE except not calling
>truncate_pagecache_range.
>
Why don't you call 'truncate_pagecache_range' ?

>A way to test this is with the following script:
>
>--------------------
> #!/bin/bash
> # Requires fuse >= 3, nbdkit >= 1.8, and latest nbdfuse from
> # https://gitlab.com/nbdkit/libnbd/-/tree/master/fuse
> set -e
> set -x
>
> export output=$PWD/output
> rm -f test.img $output
>
> # Create an nbdkit instance that prints the NBD requests seen.
> nbdkit sh - <<'EOF'
> case "$1" in
> get_size) echo 1M ;;
> can_write|can_trim|can_zero|can_fast_zero) ;;
> pread) echo "$@" >>$output; dd if=/dev/zero count=$3 iflag=count_bytes ;;
> pwrite) echo "$@" >>$output; cat >/dev/null ;;
> trim|zero) echo "$@" >>$output ;;
> *) exit 2 ;;
> esac
> EOF
>
> # Fuse-mount NBD instance as a file.
> touch test.img
> nbdfuse test.img nbd://localhost & sleep 2
> ls -lh test.img
>
> # Run a read, write, trim and zero request.
> dd if=test.img of=/dev/null bs=512 skip=1024 count=1
> dd if=/dev/zero of=test.img bs=512 skip=2048 count=1
> fallocate -p -l 512 -o 4096 test.img
> fallocate -z -l 512 -o 8192 test.img
>
> # Print the output from the NBD server.
> cat $output
>
> # Clean up.
> fusermount3 -u test.img
> killall nbdkit
> rm test.img $output
> --------------------
>
>which will print:
>
> pread 4096 524288 # number depends on readahead
> pwrite 512 0
> trim 512 4096
> zero 512 8192 may_trim
>
>The last line indicates that the FALLOC_FL_ZERO_RANGE request was
>successfully passed through by the kernel module to nbdfuse,
>translated to NBD_CMD_WRITE_ZEROES and sent through to the server.
>
>Signed-off-by: Richard W.M. Jones <[email protected]>
>---
> fs/fuse/file.c | 9 ++++++---
> 1 file changed, 6 insertions(+), 3 deletions(-)
>
>diff --git a/fs/fuse/file.c b/fs/fuse/file.c
>index 09ef2a4d25ed..22e8e88c78d4 100644
>--- a/fs/fuse/file.c
>+++ b/fs/fuse/file.c
>@@ -2907,11 +2907,13 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
> };
> int err;
> bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
>- (mode & FALLOC_FL_PUNCH_HOLE);
>+ (mode & FALLOC_FL_PUNCH_HOLE) ||
>+ (mode & FALLOC_FL_ZERO_RANGE);
To stay aligned with existing code style, consider:
- (mode & FALLOC_FL_PUNCH_HOLE);
+? ? ? (mode & (FALLOC_FL_PUNCH_HOLE |
+? ? ? ? FALLOC_FL_ZERO_RANGE));

>
> bool block_faults = FUSE_IS_DAX(inode) && lock_inode;
>
>- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
>+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
>+ FALLOC_FL_ZERO_RANGE))
> return -EOPNOTSUPP;
>
> if (fm->fc->no_fallocate)
>@@ -2926,7 +2928,8 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
> goto out;
> }
>
>- if (mode & FALLOC_FL_PUNCH_HOLE) {
>+ if ((mode & FALLOC_FL_PUNCH_HOLE) ||
>+ (mode & FALLOC_FL_ZERO_RANGE)) {
> loff_t endbyte = offset + length - 1;
>
> err = fuse_writeback_range(inode, offset, endbyte);
>--
>2.31.1
>

2021-05-12 14:52:58

by Richard W.M. Jones

[permalink] [raw]
Subject: Re: [PATCH v2] fuse: Allow fallocate(FALLOC_FL_ZERO_RANGE)

On Wed, May 12, 2021 at 05:27:22PM +0300, Shachar Sharon wrote:
> On Wed, May 12, 2021 at 11:37:04AM +0100, Richard W.M. Jones wrote:
> >libnbd's nbdfuse utility would like to translate fallocate zero
> >requests into NBD_CMD_WRITE_ZEROES. Currently the fuse module filters
> >these out, returning -EOPNOTSUPP. This commit treats these almost the
> >same way as FALLOC_FL_PUNCH_HOLE except not calling
> >truncate_pagecache_range.
> >
> Why don't you call 'truncate_pagecache_range' ?

Very good point. I just assumed that it would only be useful when
hole-punching, but now I actually read the description of the function
I see we need it.

Also looking at other filesystems that also support FALLOC_FL_ZERO_RANGE:

ext4_zero_range -> calls truncate_pagecache_range
f2fs_zero_range -> calls it
xfs -> calls it indirectly
btrfs_zero_range -> does not call it (?)

I'll add this, and retest everything.

> >A way to test this is with the following script:

In my next version I'll also address this script which is rather
long-winded. I think there's an easier way for people to test this:

> >--------------------
> > #!/bin/bash
> > # Requires fuse >= 3, nbdkit >= 1.8, and latest nbdfuse from
> > # https://gitlab.com/nbdkit/libnbd/-/tree/master/fuse
> > set -e
> > set -x
> >
> > export output=$PWD/output
> > rm -f test.img $output
> >
> > # Create an nbdkit instance that prints the NBD requests seen.
> > nbdkit sh - <<'EOF'
> > case "$1" in
> > get_size) echo 1M ;;
> > can_write|can_trim|can_zero|can_fast_zero) ;;
> > pread) echo "$@" >>$output; dd if=/dev/zero count=$3 iflag=count_bytes ;;
> > pwrite) echo "$@" >>$output; cat >/dev/null ;;
> > trim|zero) echo "$@" >>$output ;;
> > *) exit 2 ;;
> > esac
[etc]
> >diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> >index 09ef2a4d25ed..22e8e88c78d4 100644
> >--- a/fs/fuse/file.c
> >+++ b/fs/fuse/file.c
> >@@ -2907,11 +2907,13 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
> > };
> > int err;
> > bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
> >- (mode & FALLOC_FL_PUNCH_HOLE);
> >+ (mode & FALLOC_FL_PUNCH_HOLE) ||
> >+ (mode & FALLOC_FL_ZERO_RANGE);
> To stay aligned with existing code style, consider:
> - (mode & FALLOC_FL_PUNCH_HOLE);
> +? ? ? (mode & (FALLOC_FL_PUNCH_HOLE |
> +? ? ? ? FALLOC_FL_ZERO_RANGE));

Good idea.

Thanks for the quick review.

Rich.

--
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
libguestfs lets you edit virtual machines. Supports shell scripting,
bindings from many languages. http://libguestfs.org