There are several FUSE filesystems that can implement server-side copy
or other efficient copy/duplication/clone methods. The copy_file_range()
syscall is the standard interface that users have access to while not
depending on external libraries that bypass FUSE.
Signed-off-by: Niels de Vos <[email protected]>
---
fs/fuse/file.c | 65 +++++++++++++++++++++++
fs/fuse/fuse_i.h | 3 ++
include/uapi/linux/fuse.h | 106 ++++++++++++++++++++++----------------
3 files changed, 130 insertions(+), 44 deletions(-)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a201fb0ac64f..ffc6c294a639 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3009,6 +3009,70 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
return err;
}
+static long fuse_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
+{
+ struct fuse_file *ff_in = file_in->private_data;
+ struct fuse_file *ff_out = file_out->private_data;
+ struct inode *inode_out = file_inode(file_out);
+ struct fuse_inode *fi_out = get_fuse_inode(inode_out);
+ struct fuse_conn *fc = ff_in->fc;
+ FUSE_ARGS(args);
+ struct fuse_copy_file_range_in inarg = {
+ .fh_in = ff_in->fh,
+ .off_in = pos_in,
+ .fh_out = ff_out->fh,
+ .off_out = pos_out,
+ .len = len,
+ .flags = flags
+ };
+ struct fuse_copy_file_range_out outarg;
+ long err;
+
+ if (fc->no_copy_file_range)
+ return -EOPNOTSUPP;
+
+ inode_lock(inode_out);
+ set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
+
+ args.in.h.opcode = FUSE_COPY_FILE_RANGE;
+ args.in.h.nodeid = ff_in->nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, &args);
+ if (err == -ENOSYS) {
+ fc->no_copy_file_range = 1;
+ err = -EOPNOTSUPP;
+ }
+ if (err)
+ goto out;
+
+ /* we might have extended the file */
+ if (outarg.size > 0) {
+ /* Size of inode_out may not have changed in case of
+ * overwrites, oh well. */
+ bool changed = fuse_write_update_size(inode_out,
+ pos_out + outarg.size);
+
+ if (changed && fc->writeback_cache)
+ file_update_time(file_out);
+ }
+
+ fuse_invalidate_attr(inode_out);
+
+ err = outarg.size;
+out:
+ clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
+ inode_unlock(inode_out);
+
+ return err;
+}
+
static const struct file_operations fuse_file_operations = {
.llseek = fuse_file_llseek,
.read_iter = fuse_file_read_iter,
@@ -3025,6 +3089,7 @@ static const struct file_operations fuse_file_operations = {
.compat_ioctl = fuse_file_compat_ioctl,
.poll = fuse_file_poll,
.fallocate = fuse_file_fallocate,
+ .copy_file_range = fuse_copy_file_range,
};
static const struct file_operations fuse_direct_io_file_operations = {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5256ad333b05..ea848bb7d9e2 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -637,6 +637,9 @@ struct fuse_conn {
/** Allow other than the mounter user to access the filesystem ? */
unsigned allow_other:1;
+ /** Does the filesystem support copy_file_range? */
+ unsigned no_copy_file_range:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 92fa24c24c92..77330fa510f1 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -116,6 +116,9 @@
*
* 7.27
* - add FUSE_ABORT_ERROR
+ *
+ * 7.28
+ * - add FUSE_COPY_FILE_RANGE
*/
#ifndef _LINUX_FUSE_H
@@ -337,50 +340,51 @@ struct fuse_file_lock {
#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0)
enum fuse_opcode {
- FUSE_LOOKUP = 1,
- FUSE_FORGET = 2, /* no reply */
- FUSE_GETATTR = 3,
- FUSE_SETATTR = 4,
- FUSE_READLINK = 5,
- FUSE_SYMLINK = 6,
- FUSE_MKNOD = 8,
- FUSE_MKDIR = 9,
- FUSE_UNLINK = 10,
- FUSE_RMDIR = 11,
- FUSE_RENAME = 12,
- FUSE_LINK = 13,
- FUSE_OPEN = 14,
- FUSE_READ = 15,
- FUSE_WRITE = 16,
- FUSE_STATFS = 17,
- FUSE_RELEASE = 18,
- FUSE_FSYNC = 20,
- FUSE_SETXATTR = 21,
- FUSE_GETXATTR = 22,
- FUSE_LISTXATTR = 23,
- FUSE_REMOVEXATTR = 24,
- FUSE_FLUSH = 25,
- FUSE_INIT = 26,
- FUSE_OPENDIR = 27,
- FUSE_READDIR = 28,
- FUSE_RELEASEDIR = 29,
- FUSE_FSYNCDIR = 30,
- FUSE_GETLK = 31,
- FUSE_SETLK = 32,
- FUSE_SETLKW = 33,
- FUSE_ACCESS = 34,
- FUSE_CREATE = 35,
- FUSE_INTERRUPT = 36,
- FUSE_BMAP = 37,
- FUSE_DESTROY = 38,
- FUSE_IOCTL = 39,
- FUSE_POLL = 40,
- FUSE_NOTIFY_REPLY = 41,
- FUSE_BATCH_FORGET = 42,
- FUSE_FALLOCATE = 43,
- FUSE_READDIRPLUS = 44,
- FUSE_RENAME2 = 45,
- FUSE_LSEEK = 46,
+ FUSE_LOOKUP = 1,
+ FUSE_FORGET = 2, /* no reply */
+ FUSE_GETATTR = 3,
+ FUSE_SETATTR = 4,
+ FUSE_READLINK = 5,
+ FUSE_SYMLINK = 6,
+ FUSE_MKNOD = 8,
+ FUSE_MKDIR = 9,
+ FUSE_UNLINK = 10,
+ FUSE_RMDIR = 11,
+ FUSE_RENAME = 12,
+ FUSE_LINK = 13,
+ FUSE_OPEN = 14,
+ FUSE_READ = 15,
+ FUSE_WRITE = 16,
+ FUSE_STATFS = 17,
+ FUSE_RELEASE = 18,
+ FUSE_FSYNC = 20,
+ FUSE_SETXATTR = 21,
+ FUSE_GETXATTR = 22,
+ FUSE_LISTXATTR = 23,
+ FUSE_REMOVEXATTR = 24,
+ FUSE_FLUSH = 25,
+ FUSE_INIT = 26,
+ FUSE_OPENDIR = 27,
+ FUSE_READDIR = 28,
+ FUSE_RELEASEDIR = 29,
+ FUSE_FSYNCDIR = 30,
+ FUSE_GETLK = 31,
+ FUSE_SETLK = 32,
+ FUSE_SETLKW = 33,
+ FUSE_ACCESS = 34,
+ FUSE_CREATE = 35,
+ FUSE_INTERRUPT = 36,
+ FUSE_BMAP = 37,
+ FUSE_DESTROY = 38,
+ FUSE_IOCTL = 39,
+ FUSE_POLL = 40,
+ FUSE_NOTIFY_REPLY = 41,
+ FUSE_BATCH_FORGET = 42,
+ FUSE_FALLOCATE = 43,
+ FUSE_READDIRPLUS = 44,
+ FUSE_RENAME2 = 45,
+ FUSE_LSEEK = 46,
+ FUSE_COPY_FILE_RANGE = 47,
/* CUSE specific operations */
CUSE_INIT = 4096,
@@ -792,4 +796,18 @@ struct fuse_lseek_out {
uint64_t offset;
};
+struct fuse_copy_file_range_in {
+ uint64_t fh_in;
+ uint64_t off_in;
+ uint64_t fh_out;
+ uint64_t off_out;
+ uint64_t len;
+ uint32_t flags;
+};
+
+struct fuse_copy_file_range_out {
+ uint32_t size;
+ uint32_t padding;
+};
+
#endif /* _LINUX_FUSE_H */
--
2.17.1
There are several FUSE filesystems that can implement server-side copy
or other efficient copy/duplication/clone methods. The copy_file_range()
syscall is the standard interface that users have access to while not
depending on external libraries that bypass FUSE.
Signed-off-by: Niels de Vos <[email protected]>
---
v2: return ssize_t instead of long
---
fs/fuse/file.c | 65 +++++++++++++++++++++++
fs/fuse/fuse_i.h | 3 ++
include/uapi/linux/fuse.h | 106 ++++++++++++++++++++++----------------
3 files changed, 130 insertions(+), 44 deletions(-)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 67648ccbdd43..be8b3da4b788 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3009,6 +3009,70 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
return err;
}
+static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
+{
+ struct fuse_file *ff_in = file_in->private_data;
+ struct fuse_file *ff_out = file_out->private_data;
+ struct inode *inode_out = file_inode(file_out);
+ struct fuse_inode *fi_out = get_fuse_inode(inode_out);
+ struct fuse_conn *fc = ff_in->fc;
+ FUSE_ARGS(args);
+ struct fuse_copy_file_range_in inarg = {
+ .fh_in = ff_in->fh,
+ .off_in = pos_in,
+ .fh_out = ff_out->fh,
+ .off_out = pos_out,
+ .len = len,
+ .flags = flags
+ };
+ struct fuse_copy_file_range_out outarg;
+ ssize_t err;
+
+ if (fc->no_copy_file_range)
+ return -EOPNOTSUPP;
+
+ inode_lock(inode_out);
+ set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
+
+ args.in.h.opcode = FUSE_COPY_FILE_RANGE;
+ args.in.h.nodeid = ff_in->nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, &args);
+ if (err == -ENOSYS) {
+ fc->no_copy_file_range = 1;
+ err = -EOPNOTSUPP;
+ }
+ if (err)
+ goto out;
+
+ /* we might have extended the file */
+ if (outarg.size > 0) {
+ /* Size of inode_out may not have changed in case of
+ * overwrites, oh well. */
+ bool changed = fuse_write_update_size(inode_out,
+ pos_out + outarg.size);
+
+ if (changed && fc->writeback_cache)
+ file_update_time(file_out);
+ }
+
+ fuse_invalidate_attr(inode_out);
+
+ err = outarg.size;
+out:
+ clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
+ inode_unlock(inode_out);
+
+ return err;
+}
+
static const struct file_operations fuse_file_operations = {
.llseek = fuse_file_llseek,
.read_iter = fuse_file_read_iter,
@@ -3025,6 +3089,7 @@ static const struct file_operations fuse_file_operations = {
.compat_ioctl = fuse_file_compat_ioctl,
.poll = fuse_file_poll,
.fallocate = fuse_file_fallocate,
+ .copy_file_range = fuse_copy_file_range,
};
static const struct file_operations fuse_direct_io_file_operations = {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5256ad333b05..ea848bb7d9e2 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -637,6 +637,9 @@ struct fuse_conn {
/** Allow other than the mounter user to access the filesystem ? */
unsigned allow_other:1;
+ /** Does the filesystem support copy_file_range? */
+ unsigned no_copy_file_range:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 92fa24c24c92..77330fa510f1 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -116,6 +116,9 @@
*
* 7.27
* - add FUSE_ABORT_ERROR
+ *
+ * 7.28
+ * - add FUSE_COPY_FILE_RANGE
*/
#ifndef _LINUX_FUSE_H
@@ -337,50 +340,51 @@ struct fuse_file_lock {
#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0)
enum fuse_opcode {
- FUSE_LOOKUP = 1,
- FUSE_FORGET = 2, /* no reply */
- FUSE_GETATTR = 3,
- FUSE_SETATTR = 4,
- FUSE_READLINK = 5,
- FUSE_SYMLINK = 6,
- FUSE_MKNOD = 8,
- FUSE_MKDIR = 9,
- FUSE_UNLINK = 10,
- FUSE_RMDIR = 11,
- FUSE_RENAME = 12,
- FUSE_LINK = 13,
- FUSE_OPEN = 14,
- FUSE_READ = 15,
- FUSE_WRITE = 16,
- FUSE_STATFS = 17,
- FUSE_RELEASE = 18,
- FUSE_FSYNC = 20,
- FUSE_SETXATTR = 21,
- FUSE_GETXATTR = 22,
- FUSE_LISTXATTR = 23,
- FUSE_REMOVEXATTR = 24,
- FUSE_FLUSH = 25,
- FUSE_INIT = 26,
- FUSE_OPENDIR = 27,
- FUSE_READDIR = 28,
- FUSE_RELEASEDIR = 29,
- FUSE_FSYNCDIR = 30,
- FUSE_GETLK = 31,
- FUSE_SETLK = 32,
- FUSE_SETLKW = 33,
- FUSE_ACCESS = 34,
- FUSE_CREATE = 35,
- FUSE_INTERRUPT = 36,
- FUSE_BMAP = 37,
- FUSE_DESTROY = 38,
- FUSE_IOCTL = 39,
- FUSE_POLL = 40,
- FUSE_NOTIFY_REPLY = 41,
- FUSE_BATCH_FORGET = 42,
- FUSE_FALLOCATE = 43,
- FUSE_READDIRPLUS = 44,
- FUSE_RENAME2 = 45,
- FUSE_LSEEK = 46,
+ FUSE_LOOKUP = 1,
+ FUSE_FORGET = 2, /* no reply */
+ FUSE_GETATTR = 3,
+ FUSE_SETATTR = 4,
+ FUSE_READLINK = 5,
+ FUSE_SYMLINK = 6,
+ FUSE_MKNOD = 8,
+ FUSE_MKDIR = 9,
+ FUSE_UNLINK = 10,
+ FUSE_RMDIR = 11,
+ FUSE_RENAME = 12,
+ FUSE_LINK = 13,
+ FUSE_OPEN = 14,
+ FUSE_READ = 15,
+ FUSE_WRITE = 16,
+ FUSE_STATFS = 17,
+ FUSE_RELEASE = 18,
+ FUSE_FSYNC = 20,
+ FUSE_SETXATTR = 21,
+ FUSE_GETXATTR = 22,
+ FUSE_LISTXATTR = 23,
+ FUSE_REMOVEXATTR = 24,
+ FUSE_FLUSH = 25,
+ FUSE_INIT = 26,
+ FUSE_OPENDIR = 27,
+ FUSE_READDIR = 28,
+ FUSE_RELEASEDIR = 29,
+ FUSE_FSYNCDIR = 30,
+ FUSE_GETLK = 31,
+ FUSE_SETLK = 32,
+ FUSE_SETLKW = 33,
+ FUSE_ACCESS = 34,
+ FUSE_CREATE = 35,
+ FUSE_INTERRUPT = 36,
+ FUSE_BMAP = 37,
+ FUSE_DESTROY = 38,
+ FUSE_IOCTL = 39,
+ FUSE_POLL = 40,
+ FUSE_NOTIFY_REPLY = 41,
+ FUSE_BATCH_FORGET = 42,
+ FUSE_FALLOCATE = 43,
+ FUSE_READDIRPLUS = 44,
+ FUSE_RENAME2 = 45,
+ FUSE_LSEEK = 46,
+ FUSE_COPY_FILE_RANGE = 47,
/* CUSE specific operations */
CUSE_INIT = 4096,
@@ -792,4 +796,18 @@ struct fuse_lseek_out {
uint64_t offset;
};
+struct fuse_copy_file_range_in {
+ uint64_t fh_in;
+ uint64_t off_in;
+ uint64_t fh_out;
+ uint64_t off_out;
+ uint64_t len;
+ uint32_t flags;
+};
+
+struct fuse_copy_file_range_out {
+ uint32_t size;
+ uint32_t padding;
+};
+
#endif /* _LINUX_FUSE_H */
--
2.17.1
Hi Niels,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on fuse/for-next]
[also build test ERROR on v4.18-rc2 next-20180626]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Niels-de-Vos/fuse-add-support-for-copy_file_range/20180627-155404
base: https://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git for-next
config: xtensa-allyesconfig (attached as .config)
compiler: xtensa-linux-gcc (GCC) 8.1.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=8.1.0 make.cross ARCH=xtensa
All errors (new ones prefixed by >>):
>> fs//fuse/file.c:3092:21: error: initialization of 'ssize_t (*)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int)' {aka 'int (*)(struct file *, long long int, struct file *, long long int, unsigned int, unsigned int)'} from incompatible pointer type 'long int (*)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int)' {aka 'long int (*)(struct file *, long long int, struct file *, long long int, unsigned int, unsigned int)'} [-Werror=incompatible-pointer-types]
.copy_file_range = fuse_copy_file_range,
^~~~~~~~~~~~~~~~~~~~
fs//fuse/file.c:3092:21: note: (near initialization for 'fuse_file_operations.copy_file_range')
cc1: some warnings being treated as errors
vim +3092 fs//fuse/file.c
3075
3076 static const struct file_operations fuse_file_operations = {
3077 .llseek = fuse_file_llseek,
3078 .read_iter = fuse_file_read_iter,
3079 .write_iter = fuse_file_write_iter,
3080 .mmap = fuse_file_mmap,
3081 .open = fuse_open,
3082 .flush = fuse_flush,
3083 .release = fuse_release,
3084 .fsync = fuse_fsync,
3085 .lock = fuse_file_lock,
3086 .flock = fuse_file_flock,
3087 .splice_read = generic_file_splice_read,
3088 .unlocked_ioctl = fuse_file_ioctl,
3089 .compat_ioctl = fuse_file_compat_ioctl,
3090 .poll = fuse_file_poll,
3091 .fallocate = fuse_file_fallocate,
> 3092 .copy_file_range = fuse_copy_file_range,
3093 };
3094
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
Hi Niels,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on fuse/for-next]
[also build test ERROR on v4.18-rc2 next-20180626]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Niels-de-Vos/fuse-add-support-for-copy_file_range/20180627-155404
base: https://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git for-next
config: mips-allyesconfig (attached as .config)
compiler: mips-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=7.2.0 make.cross ARCH=mips
All errors (new ones prefixed by >>):
>> fs/fuse/file.c:3092:21: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types]
.copy_file_range = fuse_copy_file_range,
^~~~~~~~~~~~~~~~~~~~
fs/fuse/file.c:3092:21: note: (near initialization for 'fuse_file_operations.copy_file_range')
cc1: some warnings being treated as errors
vim +3092 fs/fuse/file.c
3075
3076 static const struct file_operations fuse_file_operations = {
3077 .llseek = fuse_file_llseek,
3078 .read_iter = fuse_file_read_iter,
3079 .write_iter = fuse_file_write_iter,
3080 .mmap = fuse_file_mmap,
3081 .open = fuse_open,
3082 .flush = fuse_flush,
3083 .release = fuse_release,
3084 .fsync = fuse_fsync,
3085 .lock = fuse_file_lock,
3086 .flock = fuse_file_flock,
3087 .splice_read = generic_file_splice_read,
3088 .unlocked_ioctl = fuse_file_ioctl,
3089 .compat_ioctl = fuse_file_compat_ioctl,
3090 .poll = fuse_file_poll,
3091 .fallocate = fuse_file_fallocate,
> 3092 .copy_file_range = fuse_copy_file_range,
3093 };
3094
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
On Wed, Jun 27, 2018 at 10:46:48AM +0200, Niels de Vos wrote:
> There are several FUSE filesystems that can implement server-side copy
> or other efficient copy/duplication/clone methods. The copy_file_range()
> syscall is the standard interface that users have access to while not
> depending on external libraries that bypass FUSE.
>
> Signed-off-by: Niels de Vos <[email protected]>
NACK on this by myself, see reason further below.
>
> ---
> v2: return ssize_t instead of long
> ---
> fs/fuse/file.c | 65 +++++++++++++++++++++++
> fs/fuse/fuse_i.h | 3 ++
> include/uapi/linux/fuse.h | 106 ++++++++++++++++++++++----------------
> 3 files changed, 130 insertions(+), 44 deletions(-)
...
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index 92fa24c24c92..77330fa510f1 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
...
> @@ -792,4 +796,18 @@ struct fuse_lseek_out {
> uint64_t offset;
> };
>
> +struct fuse_copy_file_range_in {
> + uint64_t fh_in;
> + uint64_t off_in;
It seems that libfuse has the expectation of having access to the nodeid
when a file-descriptor is passed to userspace.
From Marcin Sulikowski in https://github.com/libfuse/libfuse/pull/259:
> There is a caveat though – the kernel module change proposed in
> https://lkml.org/lkml/2018/6/27/114 provides only ino_in argument
> because that is passed in the request's header (args.in.h.nodeid =
> ff_in->nodeid; in fs/fuse/fuse.c in the kernel) whereas ino_out is not
> given by the kernel to the userspace implementation. Isn't this an
> oversight in the kernel module? The lack of the inode number would
> make it impossible to create high-level interface which follows the
> libfuse's convention where the high-level callbacks are given paths to
> files which are to be modified.
So, the next version of this patch will add nodeid_out to the struct.
Niels
> + uint64_t fh_out;
> + uint64_t off_out;
> + uint64_t len;
> + uint32_t flags;
> +};
> +
> +struct fuse_copy_file_range_out {
> + uint32_t size;
> + uint32_t padding;
> +};
> +
> #endif /* _LINUX_FUSE_H */
> --
> 2.17.1
>
There are several FUSE filesystems that can implement server-side copy
or other efficient copy/duplication/clone methods. The copy_file_range()
syscall is the standard interface that users have access to while not
depending on external libraries that bypass FUSE.
Signed-off-by: Niels de Vos <[email protected]>
---
v2: return ssize_t instead of long
v3: add nodeid_out to fuse_copy_file_range_in for libfuse expectations
---
fs/fuse/file.c | 66 +++++++++++++++++++++++
fs/fuse/fuse_i.h | 3 ++
include/uapi/linux/fuse.h | 107 ++++++++++++++++++++++----------------
3 files changed, 132 insertions(+), 44 deletions(-)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 67648ccbdd43..864939a1215d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3009,6 +3009,71 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
return err;
}
+static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
+{
+ struct fuse_file *ff_in = file_in->private_data;
+ struct fuse_file *ff_out = file_out->private_data;
+ struct inode *inode_out = file_inode(file_out);
+ struct fuse_inode *fi_out = get_fuse_inode(inode_out);
+ struct fuse_conn *fc = ff_in->fc;
+ FUSE_ARGS(args);
+ struct fuse_copy_file_range_in inarg = {
+ .fh_in = ff_in->fh,
+ .off_in = pos_in,
+ .nodeid_out = ff_out->nodeid,
+ .fh_out = ff_out->fh,
+ .off_out = pos_out,
+ .len = len,
+ .flags = flags
+ };
+ struct fuse_copy_file_range_out outarg;
+ ssize_t err;
+
+ if (fc->no_copy_file_range)
+ return -EOPNOTSUPP;
+
+ inode_lock(inode_out);
+ set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
+
+ args.in.h.opcode = FUSE_COPY_FILE_RANGE;
+ args.in.h.nodeid = ff_in->nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, &args);
+ if (err == -ENOSYS) {
+ fc->no_copy_file_range = 1;
+ err = -EOPNOTSUPP;
+ }
+ if (err)
+ goto out;
+
+ /* we might have extended the file */
+ if (outarg.size > 0) {
+ /* Size of inode_out may not have changed in case of
+ * overwrites, oh well. */
+ bool changed = fuse_write_update_size(inode_out,
+ pos_out + outarg.size);
+
+ if (changed && fc->writeback_cache)
+ file_update_time(file_out);
+ }
+
+ fuse_invalidate_attr(inode_out);
+
+ err = outarg.size;
+out:
+ clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
+ inode_unlock(inode_out);
+
+ return err;
+}
+
static const struct file_operations fuse_file_operations = {
.llseek = fuse_file_llseek,
.read_iter = fuse_file_read_iter,
@@ -3025,6 +3090,7 @@ static const struct file_operations fuse_file_operations = {
.compat_ioctl = fuse_file_compat_ioctl,
.poll = fuse_file_poll,
.fallocate = fuse_file_fallocate,
+ .copy_file_range = fuse_copy_file_range,
};
static const struct file_operations fuse_direct_io_file_operations = {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5256ad333b05..ea848bb7d9e2 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -637,6 +637,9 @@ struct fuse_conn {
/** Allow other than the mounter user to access the filesystem ? */
unsigned allow_other:1;
+ /** Does the filesystem support copy_file_range? */
+ unsigned no_copy_file_range:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 92fa24c24c92..84aa810e04c8 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -116,6 +116,9 @@
*
* 7.27
* - add FUSE_ABORT_ERROR
+ *
+ * 7.28
+ * - add FUSE_COPY_FILE_RANGE
*/
#ifndef _LINUX_FUSE_H
@@ -337,50 +340,51 @@ struct fuse_file_lock {
#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0)
enum fuse_opcode {
- FUSE_LOOKUP = 1,
- FUSE_FORGET = 2, /* no reply */
- FUSE_GETATTR = 3,
- FUSE_SETATTR = 4,
- FUSE_READLINK = 5,
- FUSE_SYMLINK = 6,
- FUSE_MKNOD = 8,
- FUSE_MKDIR = 9,
- FUSE_UNLINK = 10,
- FUSE_RMDIR = 11,
- FUSE_RENAME = 12,
- FUSE_LINK = 13,
- FUSE_OPEN = 14,
- FUSE_READ = 15,
- FUSE_WRITE = 16,
- FUSE_STATFS = 17,
- FUSE_RELEASE = 18,
- FUSE_FSYNC = 20,
- FUSE_SETXATTR = 21,
- FUSE_GETXATTR = 22,
- FUSE_LISTXATTR = 23,
- FUSE_REMOVEXATTR = 24,
- FUSE_FLUSH = 25,
- FUSE_INIT = 26,
- FUSE_OPENDIR = 27,
- FUSE_READDIR = 28,
- FUSE_RELEASEDIR = 29,
- FUSE_FSYNCDIR = 30,
- FUSE_GETLK = 31,
- FUSE_SETLK = 32,
- FUSE_SETLKW = 33,
- FUSE_ACCESS = 34,
- FUSE_CREATE = 35,
- FUSE_INTERRUPT = 36,
- FUSE_BMAP = 37,
- FUSE_DESTROY = 38,
- FUSE_IOCTL = 39,
- FUSE_POLL = 40,
- FUSE_NOTIFY_REPLY = 41,
- FUSE_BATCH_FORGET = 42,
- FUSE_FALLOCATE = 43,
- FUSE_READDIRPLUS = 44,
- FUSE_RENAME2 = 45,
- FUSE_LSEEK = 46,
+ FUSE_LOOKUP = 1,
+ FUSE_FORGET = 2, /* no reply */
+ FUSE_GETATTR = 3,
+ FUSE_SETATTR = 4,
+ FUSE_READLINK = 5,
+ FUSE_SYMLINK = 6,
+ FUSE_MKNOD = 8,
+ FUSE_MKDIR = 9,
+ FUSE_UNLINK = 10,
+ FUSE_RMDIR = 11,
+ FUSE_RENAME = 12,
+ FUSE_LINK = 13,
+ FUSE_OPEN = 14,
+ FUSE_READ = 15,
+ FUSE_WRITE = 16,
+ FUSE_STATFS = 17,
+ FUSE_RELEASE = 18,
+ FUSE_FSYNC = 20,
+ FUSE_SETXATTR = 21,
+ FUSE_GETXATTR = 22,
+ FUSE_LISTXATTR = 23,
+ FUSE_REMOVEXATTR = 24,
+ FUSE_FLUSH = 25,
+ FUSE_INIT = 26,
+ FUSE_OPENDIR = 27,
+ FUSE_READDIR = 28,
+ FUSE_RELEASEDIR = 29,
+ FUSE_FSYNCDIR = 30,
+ FUSE_GETLK = 31,
+ FUSE_SETLK = 32,
+ FUSE_SETLKW = 33,
+ FUSE_ACCESS = 34,
+ FUSE_CREATE = 35,
+ FUSE_INTERRUPT = 36,
+ FUSE_BMAP = 37,
+ FUSE_DESTROY = 38,
+ FUSE_IOCTL = 39,
+ FUSE_POLL = 40,
+ FUSE_NOTIFY_REPLY = 41,
+ FUSE_BATCH_FORGET = 42,
+ FUSE_FALLOCATE = 43,
+ FUSE_READDIRPLUS = 44,
+ FUSE_RENAME2 = 45,
+ FUSE_LSEEK = 46,
+ FUSE_COPY_FILE_RANGE = 47,
/* CUSE specific operations */
CUSE_INIT = 4096,
@@ -792,4 +796,19 @@ struct fuse_lseek_out {
uint64_t offset;
};
+struct fuse_copy_file_range_in {
+ uint64_t fh_in;
+ uint64_t off_in;
+ uint64_t nodeid_out;
+ uint64_t fh_out;
+ uint64_t off_out;
+ uint64_t len;
+ uint32_t flags;
+};
+
+struct fuse_copy_file_range_out {
+ uint32_t size;
+ uint32_t padding;
+};
+
#endif /* _LINUX_FUSE_H */
--
2.17.1
Hi Miklos,
On Fri, Jun 29, 2018 at 02:53:41PM +0200, Niels de Vos wrote:
> There are several FUSE filesystems that can implement server-side copy
> or other efficient copy/duplication/clone methods. The copy_file_range()
> syscall is the standard interface that users have access to while not
> depending on external libraries that bypass FUSE.
Could you have a look at this patch? A review would be most welcome.
This has been tested with libfuse, and the pull-request for that is
available at https://github.com/libfuse/libfuse/pull/259
Marcin had a look already too, with his feedback we landed at V3 of this
change.
Thanks,
Niels
> Signed-off-by: Niels de Vos <[email protected]>
>
> ---
> v2: return ssize_t instead of long
> v3: add nodeid_out to fuse_copy_file_range_in for libfuse expectations
> ---
> fs/fuse/file.c | 66 +++++++++++++++++++++++
> fs/fuse/fuse_i.h | 3 ++
> include/uapi/linux/fuse.h | 107 ++++++++++++++++++++++----------------
> 3 files changed, 132 insertions(+), 44 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 67648ccbdd43..864939a1215d 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -3009,6 +3009,71 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
> return err;
> }
>
> +static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
> + struct file *file_out, loff_t pos_out,
> + size_t len, unsigned int flags)
> +{
> + struct fuse_file *ff_in = file_in->private_data;
> + struct fuse_file *ff_out = file_out->private_data;
> + struct inode *inode_out = file_inode(file_out);
> + struct fuse_inode *fi_out = get_fuse_inode(inode_out);
> + struct fuse_conn *fc = ff_in->fc;
> + FUSE_ARGS(args);
> + struct fuse_copy_file_range_in inarg = {
> + .fh_in = ff_in->fh,
> + .off_in = pos_in,
> + .nodeid_out = ff_out->nodeid,
> + .fh_out = ff_out->fh,
> + .off_out = pos_out,
> + .len = len,
> + .flags = flags
> + };
> + struct fuse_copy_file_range_out outarg;
> + ssize_t err;
> +
> + if (fc->no_copy_file_range)
> + return -EOPNOTSUPP;
> +
> + inode_lock(inode_out);
> + set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
> +
> + args.in.h.opcode = FUSE_COPY_FILE_RANGE;
> + args.in.h.nodeid = ff_in->nodeid;
> + args.in.numargs = 1;
> + args.in.args[0].size = sizeof(inarg);
> + args.in.args[0].value = &inarg;
> + args.out.numargs = 1;
> + args.out.args[0].size = sizeof(outarg);
> + args.out.args[0].value = &outarg;
> + err = fuse_simple_request(fc, &args);
> + if (err == -ENOSYS) {
> + fc->no_copy_file_range = 1;
> + err = -EOPNOTSUPP;
> + }
> + if (err)
> + goto out;
> +
> + /* we might have extended the file */
> + if (outarg.size > 0) {
> + /* Size of inode_out may not have changed in case of
> + * overwrites, oh well. */
> + bool changed = fuse_write_update_size(inode_out,
> + pos_out + outarg.size);
> +
> + if (changed && fc->writeback_cache)
> + file_update_time(file_out);
> + }
> +
> + fuse_invalidate_attr(inode_out);
> +
> + err = outarg.size;
> +out:
> + clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
> + inode_unlock(inode_out);
> +
> + return err;
> +}
> +
> static const struct file_operations fuse_file_operations = {
> .llseek = fuse_file_llseek,
> .read_iter = fuse_file_read_iter,
> @@ -3025,6 +3090,7 @@ static const struct file_operations fuse_file_operations = {
> .compat_ioctl = fuse_file_compat_ioctl,
> .poll = fuse_file_poll,
> .fallocate = fuse_file_fallocate,
> + .copy_file_range = fuse_copy_file_range,
> };
>
> static const struct file_operations fuse_direct_io_file_operations = {
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 5256ad333b05..ea848bb7d9e2 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -637,6 +637,9 @@ struct fuse_conn {
> /** Allow other than the mounter user to access the filesystem ? */
> unsigned allow_other:1;
>
> + /** Does the filesystem support copy_file_range? */
> + unsigned no_copy_file_range:1;
> +
> /** The number of requests waiting for completion */
> atomic_t num_waiting;
>
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index 92fa24c24c92..84aa810e04c8 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -116,6 +116,9 @@
> *
> * 7.27
> * - add FUSE_ABORT_ERROR
> + *
> + * 7.28
> + * - add FUSE_COPY_FILE_RANGE
> */
>
> #ifndef _LINUX_FUSE_H
> @@ -337,50 +340,51 @@ struct fuse_file_lock {
> #define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0)
>
> enum fuse_opcode {
> - FUSE_LOOKUP = 1,
> - FUSE_FORGET = 2, /* no reply */
> - FUSE_GETATTR = 3,
> - FUSE_SETATTR = 4,
> - FUSE_READLINK = 5,
> - FUSE_SYMLINK = 6,
> - FUSE_MKNOD = 8,
> - FUSE_MKDIR = 9,
> - FUSE_UNLINK = 10,
> - FUSE_RMDIR = 11,
> - FUSE_RENAME = 12,
> - FUSE_LINK = 13,
> - FUSE_OPEN = 14,
> - FUSE_READ = 15,
> - FUSE_WRITE = 16,
> - FUSE_STATFS = 17,
> - FUSE_RELEASE = 18,
> - FUSE_FSYNC = 20,
> - FUSE_SETXATTR = 21,
> - FUSE_GETXATTR = 22,
> - FUSE_LISTXATTR = 23,
> - FUSE_REMOVEXATTR = 24,
> - FUSE_FLUSH = 25,
> - FUSE_INIT = 26,
> - FUSE_OPENDIR = 27,
> - FUSE_READDIR = 28,
> - FUSE_RELEASEDIR = 29,
> - FUSE_FSYNCDIR = 30,
> - FUSE_GETLK = 31,
> - FUSE_SETLK = 32,
> - FUSE_SETLKW = 33,
> - FUSE_ACCESS = 34,
> - FUSE_CREATE = 35,
> - FUSE_INTERRUPT = 36,
> - FUSE_BMAP = 37,
> - FUSE_DESTROY = 38,
> - FUSE_IOCTL = 39,
> - FUSE_POLL = 40,
> - FUSE_NOTIFY_REPLY = 41,
> - FUSE_BATCH_FORGET = 42,
> - FUSE_FALLOCATE = 43,
> - FUSE_READDIRPLUS = 44,
> - FUSE_RENAME2 = 45,
> - FUSE_LSEEK = 46,
> + FUSE_LOOKUP = 1,
> + FUSE_FORGET = 2, /* no reply */
> + FUSE_GETATTR = 3,
> + FUSE_SETATTR = 4,
> + FUSE_READLINK = 5,
> + FUSE_SYMLINK = 6,
> + FUSE_MKNOD = 8,
> + FUSE_MKDIR = 9,
> + FUSE_UNLINK = 10,
> + FUSE_RMDIR = 11,
> + FUSE_RENAME = 12,
> + FUSE_LINK = 13,
> + FUSE_OPEN = 14,
> + FUSE_READ = 15,
> + FUSE_WRITE = 16,
> + FUSE_STATFS = 17,
> + FUSE_RELEASE = 18,
> + FUSE_FSYNC = 20,
> + FUSE_SETXATTR = 21,
> + FUSE_GETXATTR = 22,
> + FUSE_LISTXATTR = 23,
> + FUSE_REMOVEXATTR = 24,
> + FUSE_FLUSH = 25,
> + FUSE_INIT = 26,
> + FUSE_OPENDIR = 27,
> + FUSE_READDIR = 28,
> + FUSE_RELEASEDIR = 29,
> + FUSE_FSYNCDIR = 30,
> + FUSE_GETLK = 31,
> + FUSE_SETLK = 32,
> + FUSE_SETLKW = 33,
> + FUSE_ACCESS = 34,
> + FUSE_CREATE = 35,
> + FUSE_INTERRUPT = 36,
> + FUSE_BMAP = 37,
> + FUSE_DESTROY = 38,
> + FUSE_IOCTL = 39,
> + FUSE_POLL = 40,
> + FUSE_NOTIFY_REPLY = 41,
> + FUSE_BATCH_FORGET = 42,
> + FUSE_FALLOCATE = 43,
> + FUSE_READDIRPLUS = 44,
> + FUSE_RENAME2 = 45,
> + FUSE_LSEEK = 46,
> + FUSE_COPY_FILE_RANGE = 47,
>
> /* CUSE specific operations */
> CUSE_INIT = 4096,
> @@ -792,4 +796,19 @@ struct fuse_lseek_out {
> uint64_t offset;
> };
>
> +struct fuse_copy_file_range_in {
> + uint64_t fh_in;
> + uint64_t off_in;
> + uint64_t nodeid_out;
> + uint64_t fh_out;
> + uint64_t off_out;
> + uint64_t len;
> + uint32_t flags;
> +};
> +
> +struct fuse_copy_file_range_out {
> + uint32_t size;
> + uint32_t padding;
> +};
> +
> #endif /* _LINUX_FUSE_H */
> --
> 2.17.1
>
On Fri, Jun 29, 2018 at 2:53 PM, Niels de Vos <[email protected]> wrote:
> There are several FUSE filesystems that can implement server-side copy
> or other efficient copy/duplication/clone methods. The copy_file_range()
> syscall is the standard interface that users have access to while not
> depending on external libraries that bypass FUSE.
>
> Signed-off-by: Niels de Vos <[email protected]>
>
> ---
> v2: return ssize_t instead of long
> v3: add nodeid_out to fuse_copy_file_range_in for libfuse expectations
> ---
> fs/fuse/file.c | 66 +++++++++++++++++++++++
> fs/fuse/fuse_i.h | 3 ++
> include/uapi/linux/fuse.h | 107 ++++++++++++++++++++++----------------
> 3 files changed, 132 insertions(+), 44 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 67648ccbdd43..864939a1215d 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -3009,6 +3009,71 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
> return err;
> }
>
> +static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
> + struct file *file_out, loff_t pos_out,
> + size_t len, unsigned int flags)
> +{
> + struct fuse_file *ff_in = file_in->private_data;
> + struct fuse_file *ff_out = file_out->private_data;
> + struct inode *inode_out = file_inode(file_out);
> + struct fuse_inode *fi_out = get_fuse_inode(inode_out);
> + struct fuse_conn *fc = ff_in->fc;
> + FUSE_ARGS(args);
> + struct fuse_copy_file_range_in inarg = {
> + .fh_in = ff_in->fh,
> + .off_in = pos_in,
> + .nodeid_out = ff_out->nodeid,
> + .fh_out = ff_out->fh,
> + .off_out = pos_out,
> + .len = len,
> + .flags = flags
> + };
> + struct fuse_copy_file_range_out outarg;
> + ssize_t err;
> +
> + if (fc->no_copy_file_range)
> + return -EOPNOTSUPP;
> +
> + inode_lock(inode_out);
> + set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
This one is only needed in the non-writeback-cache case and only if
the operations is size extending.
Here's how the writeback-cache is supposed to work: the kernel buffers
writes, just like a normal filesystem, as well as buffering related
metadata updates (size & [cm]time), again, just like a normal
filesystem. This means we just don't care about i_size being updated
in userspace, any such change will be overwritten when the metadata is
flushed out.
In writeback-cache mode, when we do any other data modification, we
need to first flush out the cache so that the order of writes is not
mixed up. See fallocate() for example. We could be selective and
only flush the range covered by [pos, pos+len], but just flushing
everything is okay.
I could add these, but you already have a test for this set up, so, I
wouldn't mind if you post a new version.
> +
> + args.in.h.opcode = FUSE_COPY_FILE_RANGE;
> + args.in.h.nodeid = ff_in->nodeid;
> + args.in.numargs = 1;
> + args.in.args[0].size = sizeof(inarg);
> + args.in.args[0].value = &inarg;
> + args.out.numargs = 1;
> + args.out.args[0].size = sizeof(outarg);
> + args.out.args[0].value = &outarg;
> + err = fuse_simple_request(fc, &args);
> + if (err == -ENOSYS) {
> + fc->no_copy_file_range = 1;
> + err = -EOPNOTSUPP;
> + }
> + if (err)
> + goto out;
> +
> + /* we might have extended the file */
> + if (outarg.size > 0) {
> + /* Size of inode_out may not have changed in case of
> + * overwrites, oh well. */
> + bool changed = fuse_write_update_size(inode_out,
> + pos_out + outarg.size);
> +
> + if (changed && fc->writeback_cache)
> + file_update_time(file_out);
> + }
> +
> + fuse_invalidate_attr(inode_out);
> +
> + err = outarg.size;
> +out:
> + clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
> + inode_unlock(inode_out);
> +
> + return err;
> +}
> +
> static const struct file_operations fuse_file_operations = {
> .llseek = fuse_file_llseek,
> .read_iter = fuse_file_read_iter,
> @@ -3025,6 +3090,7 @@ static const struct file_operations fuse_file_operations = {
> .compat_ioctl = fuse_file_compat_ioctl,
> .poll = fuse_file_poll,
> .fallocate = fuse_file_fallocate,
> + .copy_file_range = fuse_copy_file_range,
> };
>
> static const struct file_operations fuse_direct_io_file_operations = {
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 5256ad333b05..ea848bb7d9e2 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -637,6 +637,9 @@ struct fuse_conn {
> /** Allow other than the mounter user to access the filesystem ? */
> unsigned allow_other:1;
>
> + /** Does the filesystem support copy_file_range? */
> + unsigned no_copy_file_range:1;
> +
> /** The number of requests waiting for completion */
> atomic_t num_waiting;
>
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index 92fa24c24c92..84aa810e04c8 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -116,6 +116,9 @@
> *
> * 7.27
> * - add FUSE_ABORT_ERROR
> + *
> + * 7.28
> + * - add FUSE_COPY_FILE_RANGE
> */
>
> #ifndef _LINUX_FUSE_H
> @@ -337,50 +340,51 @@ struct fuse_file_lock {
> #define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0)
>
> enum fuse_opcode {
> - FUSE_LOOKUP = 1,
> - FUSE_FORGET = 2, /* no reply */
> - FUSE_GETATTR = 3,
> - FUSE_SETATTR = 4,
> - FUSE_READLINK = 5,
> - FUSE_SYMLINK = 6,
> - FUSE_MKNOD = 8,
> - FUSE_MKDIR = 9,
> - FUSE_UNLINK = 10,
> - FUSE_RMDIR = 11,
> - FUSE_RENAME = 12,
> - FUSE_LINK = 13,
> - FUSE_OPEN = 14,
> - FUSE_READ = 15,
> - FUSE_WRITE = 16,
> - FUSE_STATFS = 17,
> - FUSE_RELEASE = 18,
> - FUSE_FSYNC = 20,
> - FUSE_SETXATTR = 21,
> - FUSE_GETXATTR = 22,
> - FUSE_LISTXATTR = 23,
> - FUSE_REMOVEXATTR = 24,
> - FUSE_FLUSH = 25,
> - FUSE_INIT = 26,
> - FUSE_OPENDIR = 27,
> - FUSE_READDIR = 28,
> - FUSE_RELEASEDIR = 29,
> - FUSE_FSYNCDIR = 30,
> - FUSE_GETLK = 31,
> - FUSE_SETLK = 32,
> - FUSE_SETLKW = 33,
> - FUSE_ACCESS = 34,
> - FUSE_CREATE = 35,
> - FUSE_INTERRUPT = 36,
> - FUSE_BMAP = 37,
> - FUSE_DESTROY = 38,
> - FUSE_IOCTL = 39,
> - FUSE_POLL = 40,
> - FUSE_NOTIFY_REPLY = 41,
> - FUSE_BATCH_FORGET = 42,
> - FUSE_FALLOCATE = 43,
> - FUSE_READDIRPLUS = 44,
> - FUSE_RENAME2 = 45,
> - FUSE_LSEEK = 46,
> + FUSE_LOOKUP = 1,
> + FUSE_FORGET = 2, /* no reply */
> + FUSE_GETATTR = 3,
> + FUSE_SETATTR = 4,
> + FUSE_READLINK = 5,
> + FUSE_SYMLINK = 6,
> + FUSE_MKNOD = 8,
> + FUSE_MKDIR = 9,
> + FUSE_UNLINK = 10,
> + FUSE_RMDIR = 11,
> + FUSE_RENAME = 12,
> + FUSE_LINK = 13,
> + FUSE_OPEN = 14,
> + FUSE_READ = 15,
> + FUSE_WRITE = 16,
> + FUSE_STATFS = 17,
> + FUSE_RELEASE = 18,
> + FUSE_FSYNC = 20,
> + FUSE_SETXATTR = 21,
> + FUSE_GETXATTR = 22,
> + FUSE_LISTXATTR = 23,
> + FUSE_REMOVEXATTR = 24,
> + FUSE_FLUSH = 25,
> + FUSE_INIT = 26,
> + FUSE_OPENDIR = 27,
> + FUSE_READDIR = 28,
> + FUSE_RELEASEDIR = 29,
> + FUSE_FSYNCDIR = 30,
> + FUSE_GETLK = 31,
> + FUSE_SETLK = 32,
> + FUSE_SETLKW = 33,
> + FUSE_ACCESS = 34,
> + FUSE_CREATE = 35,
> + FUSE_INTERRUPT = 36,
> + FUSE_BMAP = 37,
> + FUSE_DESTROY = 38,
> + FUSE_IOCTL = 39,
> + FUSE_POLL = 40,
> + FUSE_NOTIFY_REPLY = 41,
> + FUSE_BATCH_FORGET = 42,
> + FUSE_FALLOCATE = 43,
> + FUSE_READDIRPLUS = 44,
> + FUSE_RENAME2 = 45,
> + FUSE_LSEEK = 46,
> + FUSE_COPY_FILE_RANGE = 47,
Nit: please do tabulation with tabs instead of spaces.
>
> /* CUSE specific operations */
> CUSE_INIT = 4096,
> @@ -792,4 +796,19 @@ struct fuse_lseek_out {
> uint64_t offset;
> };
>
> +struct fuse_copy_file_range_in {
> + uint64_t fh_in;
> + uint64_t off_in;
> + uint64_t nodeid_out;
> + uint64_t fh_out;
> + uint64_t off_out;
> + uint64_t len;
> + uint32_t flags;
Why not uint64_t for flags?
> +};
> +
> +struct fuse_copy_file_range_out {
> + uint32_t size;
> + uint32_t padding;
> +};
Could reuse "struct fuse_write_out" for this. Helps with the
userspace interface as well, since the same fuse_reply_write()
function can be used.
Thanks,
Miklos
On Tue, Aug 07, 2018 at 02:02:35PM +0200, Miklos Szeredi wrote:
> On Fri, Jun 29, 2018 at 2:53 PM, Niels de Vos <[email protected]> wrote:
> > There are several FUSE filesystems that can implement server-side copy
> > or other efficient copy/duplication/clone methods. The copy_file_range()
> > syscall is the standard interface that users have access to while not
> > depending on external libraries that bypass FUSE.
> >
> > Signed-off-by: Niels de Vos <[email protected]>
> >
> > ---
> > v2: return ssize_t instead of long
> > v3: add nodeid_out to fuse_copy_file_range_in for libfuse expectations
> > ---
> > fs/fuse/file.c | 66 +++++++++++++++++++++++
> > fs/fuse/fuse_i.h | 3 ++
> > include/uapi/linux/fuse.h | 107 ++++++++++++++++++++++----------------
> > 3 files changed, 132 insertions(+), 44 deletions(-)
> >
> > diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> > index 67648ccbdd43..864939a1215d 100644
> > --- a/fs/fuse/file.c
> > +++ b/fs/fuse/file.c
> > @@ -3009,6 +3009,71 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
> > return err;
> > }
> >
> > +static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
> > + struct file *file_out, loff_t pos_out,
> > + size_t len, unsigned int flags)
> > +{
> > + struct fuse_file *ff_in = file_in->private_data;
> > + struct fuse_file *ff_out = file_out->private_data;
> > + struct inode *inode_out = file_inode(file_out);
> > + struct fuse_inode *fi_out = get_fuse_inode(inode_out);
> > + struct fuse_conn *fc = ff_in->fc;
> > + FUSE_ARGS(args);
> > + struct fuse_copy_file_range_in inarg = {
> > + .fh_in = ff_in->fh,
> > + .off_in = pos_in,
> > + .nodeid_out = ff_out->nodeid,
> > + .fh_out = ff_out->fh,
> > + .off_out = pos_out,
> > + .len = len,
> > + .flags = flags
> > + };
> > + struct fuse_copy_file_range_out outarg;
> > + ssize_t err;
> > +
> > + if (fc->no_copy_file_range)
> > + return -EOPNOTSUPP;
> > +
> > + inode_lock(inode_out);
> > + set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
>
> This one is only needed in the non-writeback-cache case and only if
> the operations is size extending.
>
> Here's how the writeback-cache is supposed to work: the kernel buffers
> writes, just like a normal filesystem, as well as buffering related
> metadata updates (size & [cm]time), again, just like a normal
> filesystem. This means we just don't care about i_size being updated
> in userspace, any such change will be overwritten when the metadata is
> flushed out.
>
> In writeback-cache mode, when we do any other data modification, we
> need to first flush out the cache so that the order of writes is not
> mixed up. See fallocate() for example. We could be selective and
> only flush the range covered by [pos, pos+len], but just flushing
> everything is okay.
Thanks! I think I understood what you mean and I'll be sending an
updated version soon.
> I could add these, but you already have a test for this set up, so, I
> wouldn't mind if you post a new version.
No problem. I got something ready and tested on my side.
...
> > + FUSE_POLL = 40,
> > + FUSE_NOTIFY_REPLY = 41,
> > + FUSE_BATCH_FORGET = 42,
> > + FUSE_FALLOCATE = 43,
> > + FUSE_READDIRPLUS = 44,
> > + FUSE_RENAME2 = 45,
> > + FUSE_LSEEK = 46,
> > + FUSE_COPY_FILE_RANGE = 47,
>
> Nit: please do tabulation with tabs instead of spaces.
Will do.
> >
> > /* CUSE specific operations */
> > CUSE_INIT = 4096,
> > @@ -792,4 +796,19 @@ struct fuse_lseek_out {
> > uint64_t offset;
> > };
> >
> > +struct fuse_copy_file_range_in {
> > + uint64_t fh_in;
> > + uint64_t off_in;
> > + uint64_t nodeid_out;
> > + uint64_t fh_out;
> > + uint64_t off_out;
> > + uint64_t len;
> > + uint32_t flags;
>
> Why not uint64_t for flags?
Everything else uses uint32_t for flags in this file. I'll make it
uint64_t in the next update.
> > +};
> > +
> > +struct fuse_copy_file_range_out {
> > + uint32_t size;
> > + uint32_t padding;
> > +};
>
> Could reuse "struct fuse_write_out" for this. Helps with the
> userspace interface as well, since the same fuse_reply_write()
> function can be used.
I considered that before as well. In case the interface changes an
updated struct fuse_copy_file_range_out can always be added later. And
hopefully there is no reason to change it at all.
At the moment I am running a few more test to verify an updated patch,
and will send it out later today.
Niels
There are several FUSE filesystems that can implement server-side copy
or other efficient copy/duplication/clone methods. The copy_file_range()
syscall is the standard interface that users have access to while not
depending on external libraries that bypass FUSE.
Signed-off-by: Niels de Vos <[email protected]>
---
libfuse: https://github.com/libfuse/libfuse/pull/259
v2: return ssize_t instead of long
v3: add nodeid_out to fuse_copy_file_range_in for libfuse expectations
v4: use fuse_write_out as response, correct write-back handling
---
fs/fuse/file.c | 80 +++++++++++++++++++++++++++++
fs/fuse/fuse_i.h | 3 ++
include/uapi/linux/fuse.h | 104 +++++++++++++++++++++-----------------
3 files changed, 142 insertions(+), 45 deletions(-)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a201fb0ac64f..6aea3675fd88 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3009,6 +3009,85 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
return err;
}
+static ssize_t fuse_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t len, unsigned int flags)
+{
+ struct fuse_file *ff_in = file_in->private_data;
+ struct fuse_file *ff_out = file_out->private_data;
+ struct inode *inode_out = file_inode(file_out);
+ struct fuse_inode *fi_out = get_fuse_inode(inode_out);
+ struct fuse_conn *fc = ff_in->fc;
+ FUSE_ARGS(args);
+ struct fuse_copy_file_range_in inarg = {
+ .fh_in = ff_in->fh,
+ .off_in = pos_in,
+ .nodeid_out = ff_out->nodeid,
+ .fh_out = ff_out->fh,
+ .off_out = pos_out,
+ .len = len,
+ .flags = flags
+ };
+ struct fuse_write_out outarg;
+ ssize_t err;
+ /* mark unstable when write-back is not used, and file_out gets
+ * extended */
+ bool is_unstable = (!fc->writeback_cache) &&
+ ((pos_out + len) > inode_out->i_size);
+
+ if (fc->no_copy_file_range)
+ return -EOPNOTSUPP;
+
+ inode_lock(inode_out);
+
+ if (fc->writeback_cache) {
+ err = filemap_write_and_wait_range(inode_out->i_mapping,
+ pos_out, pos_out + len);
+ if (err)
+ goto out;
+
+ fuse_sync_writes(inode_out);
+ }
+
+ if (is_unstable)
+ set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
+
+ args.in.h.opcode = FUSE_COPY_FILE_RANGE;
+ args.in.h.nodeid = ff_in->nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, &args);
+ if (err == -ENOSYS) {
+ fc->no_copy_file_range = 1;
+ err = -EOPNOTSUPP;
+ }
+ if (err)
+ goto out;
+
+ if (fc->writeback_cache) {
+ bool changed = fuse_write_update_size(inode_out,
+ pos_out + outarg.size);
+
+ if (changed)
+ file_update_time(file_out);
+ }
+
+ fuse_invalidate_attr(inode_out);
+
+ err = outarg.size;
+out:
+ if (is_unstable)
+ clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
+
+ inode_unlock(inode_out);
+
+ return err;
+}
+
static const struct file_operations fuse_file_operations = {
.llseek = fuse_file_llseek,
.read_iter = fuse_file_read_iter,
@@ -3025,6 +3104,7 @@ static const struct file_operations fuse_file_operations = {
.compat_ioctl = fuse_file_compat_ioctl,
.poll = fuse_file_poll,
.fallocate = fuse_file_fallocate,
+ .copy_file_range = fuse_copy_file_range,
};
static const struct file_operations fuse_direct_io_file_operations = {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 5256ad333b05..ea848bb7d9e2 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -637,6 +637,9 @@ struct fuse_conn {
/** Allow other than the mounter user to access the filesystem ? */
unsigned allow_other:1;
+ /** Does the filesystem support copy_file_range? */
+ unsigned no_copy_file_range:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 92fa24c24c92..c806a17beaef 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -116,6 +116,9 @@
*
* 7.27
* - add FUSE_ABORT_ERROR
+ *
+ * 7.28
+ * - add FUSE_COPY_FILE_RANGE
*/
#ifndef _LINUX_FUSE_H
@@ -337,53 +340,54 @@ struct fuse_file_lock {
#define FUSE_POLL_SCHEDULE_NOTIFY (1 << 0)
enum fuse_opcode {
- FUSE_LOOKUP = 1,
- FUSE_FORGET = 2, /* no reply */
- FUSE_GETATTR = 3,
- FUSE_SETATTR = 4,
- FUSE_READLINK = 5,
- FUSE_SYMLINK = 6,
- FUSE_MKNOD = 8,
- FUSE_MKDIR = 9,
- FUSE_UNLINK = 10,
- FUSE_RMDIR = 11,
- FUSE_RENAME = 12,
- FUSE_LINK = 13,
- FUSE_OPEN = 14,
- FUSE_READ = 15,
- FUSE_WRITE = 16,
- FUSE_STATFS = 17,
- FUSE_RELEASE = 18,
- FUSE_FSYNC = 20,
- FUSE_SETXATTR = 21,
- FUSE_GETXATTR = 22,
- FUSE_LISTXATTR = 23,
- FUSE_REMOVEXATTR = 24,
- FUSE_FLUSH = 25,
- FUSE_INIT = 26,
- FUSE_OPENDIR = 27,
- FUSE_READDIR = 28,
- FUSE_RELEASEDIR = 29,
- FUSE_FSYNCDIR = 30,
- FUSE_GETLK = 31,
- FUSE_SETLK = 32,
- FUSE_SETLKW = 33,
- FUSE_ACCESS = 34,
- FUSE_CREATE = 35,
- FUSE_INTERRUPT = 36,
- FUSE_BMAP = 37,
- FUSE_DESTROY = 38,
- FUSE_IOCTL = 39,
- FUSE_POLL = 40,
- FUSE_NOTIFY_REPLY = 41,
- FUSE_BATCH_FORGET = 42,
- FUSE_FALLOCATE = 43,
- FUSE_READDIRPLUS = 44,
- FUSE_RENAME2 = 45,
- FUSE_LSEEK = 46,
+ FUSE_LOOKUP = 1,
+ FUSE_FORGET = 2, /* no reply */
+ FUSE_GETATTR = 3,
+ FUSE_SETATTR = 4,
+ FUSE_READLINK = 5,
+ FUSE_SYMLINK = 6,
+ FUSE_MKNOD = 8,
+ FUSE_MKDIR = 9,
+ FUSE_UNLINK = 10,
+ FUSE_RMDIR = 11,
+ FUSE_RENAME = 12,
+ FUSE_LINK = 13,
+ FUSE_OPEN = 14,
+ FUSE_READ = 15,
+ FUSE_WRITE = 16,
+ FUSE_STATFS = 17,
+ FUSE_RELEASE = 18,
+ FUSE_FSYNC = 20,
+ FUSE_SETXATTR = 21,
+ FUSE_GETXATTR = 22,
+ FUSE_LISTXATTR = 23,
+ FUSE_REMOVEXATTR = 24,
+ FUSE_FLUSH = 25,
+ FUSE_INIT = 26,
+ FUSE_OPENDIR = 27,
+ FUSE_READDIR = 28,
+ FUSE_RELEASEDIR = 29,
+ FUSE_FSYNCDIR = 30,
+ FUSE_GETLK = 31,
+ FUSE_SETLK = 32,
+ FUSE_SETLKW = 33,
+ FUSE_ACCESS = 34,
+ FUSE_CREATE = 35,
+ FUSE_INTERRUPT = 36,
+ FUSE_BMAP = 37,
+ FUSE_DESTROY = 38,
+ FUSE_IOCTL = 39,
+ FUSE_POLL = 40,
+ FUSE_NOTIFY_REPLY = 41,
+ FUSE_BATCH_FORGET = 42,
+ FUSE_FALLOCATE = 43,
+ FUSE_READDIRPLUS = 44,
+ FUSE_RENAME2 = 45,
+ FUSE_LSEEK = 46,
+ FUSE_COPY_FILE_RANGE = 47,
/* CUSE specific operations */
- CUSE_INIT = 4096,
+ CUSE_INIT = 4096,
};
enum fuse_notify_code {
@@ -792,4 +796,14 @@ struct fuse_lseek_out {
uint64_t offset;
};
+struct fuse_copy_file_range_in {
+ uint64_t fh_in;
+ uint64_t off_in;
+ uint64_t nodeid_out;
+ uint64_t fh_out;
+ uint64_t off_out;
+ uint64_t len;
+ uint64_t flags;
+};
+
#endif /* _LINUX_FUSE_H */
--
2.17.1
On Tue, Aug 21, 2018 at 2:36 PM, Niels de Vos <[email protected]> wrote:
> There are several FUSE filesystems that can implement server-side copy
> or other efficient copy/duplication/clone methods. The copy_file_range()
> syscall is the standard interface that users have access to while not
> depending on external libraries that bypass FUSE.
>
> Signed-off-by: Niels de Vos <[email protected]>
Applied and pushed to
git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git#for-next
Thanks,
Miklos