Subject: RFC: generic file operation for fstrim ioctl()

Hello friends,


here's an RFC for a generic implementation of the FITRIM ioctl:

* introduce a new file_operation call vector for fstrim
* move the generic pieces (eg. cap check, copy from/to userland, etc)
into a generic implementation that then calls into the fileop
* passing to the old ioctl fileop when fstrim op is NULL
* convert a few fs'es to the new schema

Rationale: it seems that all file systems implement generic stuff like
permission checks and buffer copy from/to userspace, all on their own.
We already have a common place for generic ioctl() handling (do_vfs_ioctl).
I feel its time for factoring this out the common fstrim pieces, too.

The first patch of this series introduces a new file_operation and calls
it on FITRIM (from do_vfs_ioctl) if it's set - otherwise just passes
the ioctl to file_ioctl(), as it had been before. So, this only becomes
active on a fs thats converted to the new file_operation. Subsequent patches
do the conversion for a few file systems.

Note this is just an RFC, don't apply it - there might still be bugs in there.


have fun,

--mtx






Subject: [RFC PATCH 6/6] fs: f2fs: move fstrim to file_operation

Use the newly introduced file_operation callback for FITRIM ioctl.
This removes some common code, eg. permission check, buffer copying,
which is now done by generic vfs code.
---
fs/f2fs/file.c | 21 ++++-----------------
1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index ceb575f99048..4c3f5eab22aa 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2252,38 +2252,27 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
return ret;
}

-static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
+static long f2fs_ioc_fitrim(struct file *file, struct fstrim_range *range)
{
struct inode *inode = file_inode(filp);
struct super_block *sb = inode->i_sb;
struct request_queue *q = bdev_get_queue(sb->s_bdev);
- struct fstrim_range range;
int ret;

- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
if (!f2fs_hw_support_discard(F2FS_SB(sb)))
return -EOPNOTSUPP;

- if (copy_from_user(&range, (struct fstrim_range __user *)arg,
- sizeof(range)))
- return -EFAULT;
-
ret = mnt_want_write_file(filp);
if (ret)
return ret;

- range.minlen = max((unsigned int)range.minlen,
+ range->minlen = max((unsigned int)range->minlen,
q->limits.discard_granularity);
- ret = f2fs_trim_fs(F2FS_SB(sb), &range);
+ ret = f2fs_trim_fs(F2FS_SB(sb), range);
mnt_drop_write_file(filp);
if (ret < 0)
return ret;

- if (copy_to_user((struct fstrim_range __user *)arg, &range,
- sizeof(range)))
- return -EFAULT;
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return 0;
}
@@ -4124,8 +4113,6 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_abort_volatile_write(filp);
case F2FS_IOC_SHUTDOWN:
return f2fs_ioc_shutdown(filp, arg);
- case FITRIM:
- return f2fs_ioc_fitrim(filp, arg);
case FS_IOC_SET_ENCRYPTION_POLICY:
return f2fs_ioc_set_encryption_policy(filp, arg);
case FS_IOC_GET_ENCRYPTION_POLICY:
@@ -4405,7 +4392,6 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC_RELEASE_VOLATILE_WRITE:
case F2FS_IOC_ABORT_VOLATILE_WRITE:
case F2FS_IOC_SHUTDOWN:
- case FITRIM:
case FS_IOC_SET_ENCRYPTION_POLICY:
case FS_IOC_GET_ENCRYPTION_PWSALT:
case FS_IOC_GET_ENCRYPTION_POLICY:
@@ -4461,4 +4447,5 @@ const struct file_operations f2fs_file_operations = {
#endif
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
+ .fitrim = f2fs_ioc_fitrim,
};
--
2.20.1

Subject: [RFC PATCH 5/6] fs: fat: move fstrim to file_operation

Use the newly introduced file_operation callback for FITRIM ioctl.
This removes some common code, eg. permission check, buffer copying,
which is now done by generic vfs code.
---
fs/fat/file.c | 25 ++++---------------------
1 file changed, 4 insertions(+), 21 deletions(-)

diff --git a/fs/fat/file.c b/fs/fat/file.c
index 13855ba49cd9..5f57f06341d0 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -122,35 +122,20 @@ static int fat_ioctl_get_volume_id(struct inode *inode, u32 __user *user_attr)
return put_user(sbi->vol_id, user_attr);
}

-static int fat_ioctl_fitrim(struct inode *inode, unsigned long arg)
+static int fat_ioctl_fitrim(struct file *file, struct fstrim_range *range)
{
+ struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
- struct fstrim_range __user *user_range;
- struct fstrim_range range;
struct request_queue *q = bdev_get_queue(sb->s_bdev);
int err;

- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
if (!blk_queue_discard(q))
return -EOPNOTSUPP;

- user_range = (struct fstrim_range __user *)arg;
- if (copy_from_user(&range, user_range, sizeof(range)))
- return -EFAULT;
-
- range.minlen = max_t(unsigned int, range.minlen,
+ range->minlen = max_t(unsigned int, range->minlen,
q->limits.discard_granularity);

- err = fat_trim_fs(inode, &range);
- if (err < 0)
- return err;
-
- if (copy_to_user(user_range, &range, sizeof(range)))
- return -EFAULT;
-
- return 0;
+ return fat_trim_fs(inode, range);
}

long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
@@ -165,8 +150,6 @@ long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return fat_ioctl_set_attributes(filp, user_attr);
case FAT_IOCTL_GET_VOLUME_ID:
return fat_ioctl_get_volume_id(inode, user_attr);
- case FITRIM:
- return fat_ioctl_fitrim(inode, arg);
default:
return -ENOTTY; /* Inappropriate ioctl for device */
}
--
2.20.1

Subject: [RFC PATCH 4/6] fs: btrfs: move fstrim to file_operation

Use the newly introduced file_operation callback for FITRIM ioctl.
This removes some common code, eg. permission check, buffer copying,
which is now done by generic vfs code.
---
fs/btrfs/ctree.h | 1 +
fs/btrfs/inode.c | 1 +
fs/btrfs/ioctl.c | 26 +++++++-------------------
3 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9fb76829a281..0361d95fe690 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3206,6 +3206,7 @@ void btrfs_update_inode_bytes(struct btrfs_inode *inode,

/* ioctl.c */
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+long btrfs_ioctl_fitrim(struct file *file, struct fstrim_range *range);
long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int btrfs_fileattr_get(struct dentry *dentry, struct fileattr *fa);
int btrfs_fileattr_set(struct user_namespace *mnt_userns,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 46f392943f4d..5f0d1032c890 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10640,6 +10640,7 @@ static const struct file_operations btrfs_dir_file_operations = {
#endif
.release = btrfs_release_file,
.fsync = btrfs_sync_file,
+ .fitrim = btrfs_ioctl_fitrim,
};

/*
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5dc2fd843ae3..38b1de381836 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -372,19 +372,16 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
return put_user(inode->i_generation, arg);
}

-static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
- void __user *arg)
+long btrfs_ioctl_fitrim(struct file *file, struct fstrim_range *range)
{
+ struct inode *inode = file_inode(file);
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_device *device;
struct request_queue *q;
- struct fstrim_range range;
u64 minlen = ULLONG_MAX;
u64 num_devices = 0;
int ret;

- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
/*
* btrfs_trim_block_group() depends on space cache, which is not
* available in zoned filesystem. So, disallow fitrim on a zoned
@@ -419,26 +416,19 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,

if (!num_devices)
return -EOPNOTSUPP;
- if (copy_from_user(&range, arg, sizeof(range)))
- return -EFAULT;

/*
* NOTE: Don't truncate the range using super->total_bytes. Bytenr of
* block group is in the logical address space, which can be any
* sectorsize aligned bytenr in the range [0, U64_MAX].
*/
- if (range.len < fs_info->sb->s_blocksize)
+ if (range->len < fs_info->sb->s_blocksize)
return -EINVAL;

- range.minlen = max(range.minlen, minlen);
- ret = btrfs_trim_fs(fs_info, &range);
- if (ret < 0)
- return ret;
-
- if (copy_to_user(arg, &range, sizeof(range)))
- return -EFAULT;
+ range->minlen = max(range->minlen, minlen);
+ ret = btrfs_trim_fs(fs_info, range);

- return 0;
+ return ret;
}

int __pure btrfs_is_empty_uuid(u8 *uuid)
@@ -4796,8 +4786,6 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_get_fslabel(fs_info, argp);
case FS_IOC_SETFSLABEL:
return btrfs_ioctl_set_fslabel(file, argp);
- case FITRIM:
- return btrfs_ioctl_fitrim(fs_info, argp);
case BTRFS_IOC_SNAP_CREATE:
return btrfs_ioctl_snap_create(file, argp, 0);
case BTRFS_IOC_SNAP_CREATE_V2:
--
2.20.1

Subject: [RFC PATCH 1/6] fs: generic file operation for fstrim

In order to factor out common parts of fstrim ioctl's, introduce a new
file_operation field for the file system specific parts.

The generic ioctl code (do_vfs_ioctl) calls an helper for doing the common
actions (eg. permission checks or buffer copying). If the file system
doesn't supply an fstrim function, the call will be passed down directly
to the file system, as it used to be.

This change also enables an easier way for possible future relaxing of
permissions, eg. allowing unprivileged users to trim their own files
sounds like an apelling idea.

Signed-off-by: Enrico Weigelt, metux IT consult <[email protected]>
---
fs/ioctl.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++
include/linux/fs.h | 1 +
2 files changed, 49 insertions(+)

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1e2204fa9963..3a638f1eb0f5 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -961,6 +961,51 @@ static int ioctl_fssetxattr(struct file *file, void __user *argp)
return err;
}

+/*
+ * ioctl_fitrim() generic handling for FITRIM ioctl() via dedicated file
+ * operation. It does't common things like copying the arg from/to userland,
+ * permission check, etc.
+ *
+ * If the handler in file_operation is NULL, just pass the ioctl down to the
+ * generic ioctl() handler, as it used to be.
+ */
+static int ioctl_fitrim(struct file *filp, unsigned int fd, unsigned int cmd,
+ void __user *argp)
+{
+ struct fstrim_range;
+ int ret;
+
+
+ if (S_ISREG(inode->i_mode))
+ return file_ioctl(filp, cmd, argp);
+ break;
+
+ /* if the fs doesn't implement the fitrim operation, just pass it to
+ the fs's ioctl() operation, so remaining implementations are kept
+ intact. this can be removed when all fs'es are converted */
+ if (!filp->f_op->fitrim) {
+ if (S_ISREG(inode->i_mode))
+ return file_ioctl(filp, cmd, argp);
+
+ return -ENOIOCTLCMD;
+ }
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&range, (struct fstrim_range __user)*argp,
+ sizeof(range)))
+ return -EFAULT;
+
+ ret = filp->f_op->fitrim(filp, &range);
+
+ if (copy_to_user((struct fstrim_range __user)*argp, &range,
+ sizeof(range)))
+ return -EFAULT;
+
+ return ret;
+}
+
/*
* do_vfs_ioctl() is not for drivers and not intended to be EXPORT_SYMBOL()'d.
* It's just a simple helper for sys_ioctl and compat_sys_ioctl.
@@ -1043,6 +1088,9 @@ static int do_vfs_ioctl(struct file *filp, unsigned int fd,
case FS_IOC_FSSETXATTR:
return ioctl_fssetxattr(filp, argp);

+ case FITRIM:
+ return ioctl_fitrim(filp, cmd, argp);
+
default:
if (S_ISREG(inode->i_mode))
return file_ioctl(filp, cmd, argp);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c3c88fdb9b2a..9e2f0cd5c787 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2059,6 +2059,7 @@ struct file_operations {
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
int (*fadvise)(struct file *, loff_t, loff_t, int);
+ long (*fstrim)(struct file *, struct fstrim_range *range);
} __randomize_layout;

struct inode_operations {
--
2.20.1