From: Daeho Jeong <[email protected]>
Added a new ioctl to send discard commands or/and zero out
to whole data area of a regular file for security reason.
Signed-off-by: Daeho Jeong <[email protected]>
---
fs/f2fs/f2fs.h | 8 +++
fs/f2fs/file.c | 149 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 157 insertions(+)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c812fb8e2d9c..ca139fac5a73 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -434,6 +434,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
_IOR(F2FS_IOCTL_MAGIC, 18, __u64)
#define F2FS_IOC_RESERVE_COMPRESS_BLOCKS \
_IOR(F2FS_IOCTL_MAGIC, 19, __u64)
+#define F2FS_IOC_SEC_TRIM_FILE _IOW(F2FS_IOCTL_MAGIC, 20, __u32)
#define F2FS_IOC_GET_VOLUME_NAME FS_IOC_GETFSLABEL
#define F2FS_IOC_SET_VOLUME_NAME FS_IOC_SETFSLABEL
@@ -453,6 +454,13 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
#define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */
#define F2FS_GOING_DOWN_NEED_FSCK 0x4 /* going down to trigger fsck */
+/*
+ * Flags used by F2FS_IOC_SEC_TRIM_FILE
+ */
+#define F2FS_TRIM_FILE_DISCARD 0x1 /* send discard command */
+#define F2FS_TRIM_FILE_ZEROOUT 0x2 /* zero out */
+#define F2FS_TRIM_FILE_MASK 0x3
+
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
* ioctl commands in 32 bit emulation
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index dfa1ac2d751a..df5149a4a627 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3749,6 +3749,152 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
return ret;
}
+static int f2fs_secure_erase(struct block_device *bdev, block_t block,
+ block_t len, u32 flags)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+ sector_t sector = SECTOR_FROM_BLOCK(block);
+ sector_t nr_sects = SECTOR_FROM_BLOCK(len);
+ int ret = 0;
+
+ if (!q)
+ return -ENXIO;
+
+ if (flags & F2FS_TRIM_FILE_DISCARD)
+ ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS,
+ blk_queue_secure_erase(q) ?
+ BLKDEV_DISCARD_SECURE : 0);
+
+ if (!ret && flags & F2FS_TRIM_FILE_ZEROOUT)
+ ret = blkdev_issue_zeroout(bdev, sector, nr_sects, GFP_NOFS, 0);
+
+ return ret;
+}
+
+static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct address_space *mapping = inode->i_mapping;
+ struct block_device *prev_bdev = NULL;
+ loff_t file_size;
+ pgoff_t index, pg_start = 0, pg_end;
+ block_t prev_block = 0, len = 0;
+ u32 flags;
+ int ret = 0;
+
+ if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode) ||
+ f2fs_compressed_file(inode))
+ return -EINVAL;
+
+ if (f2fs_readonly(sbi->sb))
+ return -EROFS;
+
+ if (f2fs_lfs_mode(sbi))
+ return -EOPNOTSUPP;
+
+ if (get_user(flags, (u32 __user *)arg))
+ return -EFAULT;
+ if (!(flags & F2FS_TRIM_FILE_MASK))
+ return -EINVAL;
+
+ if (flags & F2FS_TRIM_FILE_DISCARD && !f2fs_hw_support_discard(sbi))
+ return -EOPNOTSUPP;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ inode_lock(inode);
+
+ file_size = i_size_read(inode);
+ if (!file_size)
+ goto err;
+ pg_end = (pgoff_t)round_up(file_size, PAGE_SIZE) >> PAGE_SHIFT;
+
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ goto err;
+
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
+ ret = filemap_write_and_wait(mapping);
+ if (ret)
+ goto out;
+
+ truncate_inode_pages(mapping, 0);
+
+ for (index = pg_start; index < pg_end;) {
+ struct dnode_of_data dn;
+ unsigned int end_offset;
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ if (ret)
+ goto out;
+
+ end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
+ if (pg_end < end_offset + index)
+ end_offset = pg_end - index;
+
+ for (; dn.ofs_in_node < end_offset;
+ dn.ofs_in_node++, index++) {
+ struct block_device *cur_bdev;
+ block_t blkaddr = f2fs_data_blkaddr(&dn);
+
+ if (__is_valid_data_blkaddr(blkaddr)) {
+ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
+ blkaddr, DATA_GENERIC_ENHANCE)) {
+ ret = -EFSCORRUPTED;
+ goto out;
+ }
+ } else
+ continue;
+
+ cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
+ if (f2fs_is_multi_device(sbi)) {
+ int i = f2fs_target_device_index(sbi, blkaddr);
+
+ blkaddr -= FDEV(i).start_blk;
+ }
+
+ if (len) {
+ if (prev_bdev == cur_bdev &&
+ blkaddr == prev_block + len) {
+ len++;
+ } else {
+ ret = f2fs_secure_erase(prev_bdev,
+ prev_block, len, flags);
+ if (ret)
+ goto out;
+
+ len = 0;
+ }
+ }
+
+ if (!len) {
+ prev_bdev = cur_bdev;
+ prev_block = blkaddr;
+ len = 1;
+ }
+ }
+
+ f2fs_put_dnode(&dn);
+ }
+
+ if (len)
+ ret = f2fs_secure_erase(prev_bdev, prev_block, len, flags);
+out:
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+err:
+ inode_unlock(inode);
+ mnt_drop_write_file(filp);
+
+ return ret;
+}
+
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
@@ -3835,6 +3981,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_release_compress_blocks(filp, arg);
case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
return f2fs_reserve_compress_blocks(filp, arg);
+ case F2FS_IOC_SEC_TRIM_FILE:
+ return f2fs_sec_trim_file(filp, arg);
default:
return -ENOTTY;
}
@@ -4004,6 +4152,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC_GET_COMPRESS_BLOCKS:
case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
+ case F2FS_IOC_SEC_TRIM_FILE:
break;
default:
return -ENOIOCTLCMD;
--
2.27.0.278.ge193c7cf3a9-goog
On Tue, Jun 09, 2020 at 03:01:37PM +0900, Daeho Jeong wrote:
> From: Daeho Jeong <[email protected]>
>
> Added a new ioctl to send discard commands or/and zero out
> to whole data area of a regular file for security reason.
With this ioctl available, what is the exact procedure to write and then later
securely erase a file on f2fs? In particular, how can the user prevent f2fs
from making multiple copies of file data blocks as part of garbage collection?
> +static int f2fs_secure_erase(struct block_device *bdev, block_t block,
> + block_t len, u32 flags)
> +{
> + struct request_queue *q = bdev_get_queue(bdev);
> + sector_t sector = SECTOR_FROM_BLOCK(block);
> + sector_t nr_sects = SECTOR_FROM_BLOCK(len);
> + int ret = 0;
> +
> + if (!q)
> + return -ENXIO;
Why can the request_queue be NULL here?
> +
> + if (flags & F2FS_TRIM_FILE_DISCARD)
> + ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS,
> + blk_queue_secure_erase(q) ?
> + BLKDEV_DISCARD_SECURE : 0);
> +
> + if (!ret && flags & F2FS_TRIM_FILE_ZEROOUT)
> + ret = blkdev_issue_zeroout(bdev, sector, nr_sects, GFP_NOFS, 0);
> +
> + return ret;
> +}
> +
> +static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
> +{
> + struct inode *inode = file_inode(filp);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> + struct address_space *mapping = inode->i_mapping;
> + struct block_device *prev_bdev = NULL;
> + loff_t file_size;
> + pgoff_t index, pg_start = 0, pg_end;
> + block_t prev_block = 0, len = 0;
> + u32 flags;
> + int ret = 0;
> +
> + if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode) ||
> + f2fs_compressed_file(inode))
> + return -EINVAL;
Is it valid to check f2fs_is_atomic_file() and f2fs_compressed_file() outside of
inode_lock()?
> +
> + if (f2fs_readonly(sbi->sb))
> + return -EROFS;
Isn't this redundant with mnt_want_write_file()?
Also, shouldn't write access to the file be required, i.e.
(filp->f_mode & FMODE_WRITE)? Then the f2fs_readonly() and
mnt_want_write_file() checks would be unnecessary.
> +
> + if (f2fs_lfs_mode(sbi))
> + return -EOPNOTSUPP;
Doesn't this check have to be serialized with remount?
> +
> + if (get_user(flags, (u32 __user *)arg))
> + return -EFAULT;
> + if (!(flags & F2FS_TRIM_FILE_MASK))
> + return -EINVAL;
Need to reject unknown flags:
if (flags & ~F2FS_TRIM_FILE_MASK)
return -EINVAL;
> +
> + if (flags & F2FS_TRIM_FILE_DISCARD && !f2fs_hw_support_discard(sbi))
> + return -EOPNOTSUPP;
> +
> + ret = mnt_want_write_file(filp);
> + if (ret)
> + return ret;
> +
> + inode_lock(inode);
> +
> + file_size = i_size_read(inode);
> + if (!file_size)
> + goto err;
->i_size is stable while holding inode_lock(). So just use ->i_size instead of
i_size_read().
> + pg_end = (pgoff_t)round_up(file_size, PAGE_SIZE) >> PAGE_SHIFT;
This can be simplified to:
pg_end = DIV_ROUND_UP(file_size, PAGE_SIZE);
- Eric
> > Added a new ioctl to send discard commands or/and zero out
> > to whole data area of a regular file for security reason.
>
> With this ioctl available, what is the exact procedure to write and then later
> securely erase a file on f2fs? In particular, how can the user prevent f2fs
> from making multiple copies of file data blocks as part of garbage collection?
>
To prevent the file data from garbage collecting, the user needs to
use pinfile ioctl and fallocate system call after creating the file.
The sequence is like below.
1. create an empty file
2. pinfile
3. fallocate()
> > +static int f2fs_secure_erase(struct block_device *bdev, block_t block,
> > + block_t len, u32 flags)
> > +{
> > + struct request_queue *q = bdev_get_queue(bdev);
> > + sector_t sector = SECTOR_FROM_BLOCK(block);
> > + sector_t nr_sects = SECTOR_FROM_BLOCK(len);
> > + int ret = 0;
> > +
> > + if (!q)
> > + return -ENXIO;
>
> Why can the request_queue be NULL here?
>
This check is copied from __blkdev_issue_discard(), even if
bdev_get_queue() says it doesn't return NULL value.
__blkdev_issue_discard() does the same thing.
> > +
> > + if (flags & F2FS_TRIM_FILE_DISCARD)
> > + ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS,
> > + blk_queue_secure_erase(q) ?
> > + BLKDEV_DISCARD_SECURE : 0);
> > +
> > + if (!ret && flags & F2FS_TRIM_FILE_ZEROOUT)
> > + ret = blkdev_issue_zeroout(bdev, sector, nr_sects, GFP_NOFS, 0);
> > +
> > + return ret;
> > +}
> > +
> > +static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
> > +{
> > + struct inode *inode = file_inode(filp);
> > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > + struct address_space *mapping = inode->i_mapping;
> > + struct block_device *prev_bdev = NULL;
> > + loff_t file_size;
> > + pgoff_t index, pg_start = 0, pg_end;
> > + block_t prev_block = 0, len = 0;
> > + u32 flags;
> > + int ret = 0;
> > +
> > + if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode) ||
> > + f2fs_compressed_file(inode))
> > + return -EINVAL;
>
> Is it valid to check f2fs_is_atomic_file() and f2fs_compressed_file() outside of
> inode_lock()?
>
Right, it's better to move the check after inode_lock().
> > +
> > + if (f2fs_readonly(sbi->sb))
> > + return -EROFS;
>
> Isn't this redundant with mnt_want_write_file()?
>
> Also, shouldn't write access to the file be required, i.e.
> (filp->f_mode & FMODE_WRITE)? Then the f2fs_readonly() and
> mnt_want_write_file() checks would be unnecessary.
>
Using FMODE_WRITE is more proper for this case, since we're going to
modify the data. But I think mnt_want_write_file() is still required
to prevent the filesystem from freezing or something else.
> > +
> > + if (f2fs_lfs_mode(sbi))
> > + return -EOPNOTSUPP;
>
> Doesn't this check have to be serialized with remount?
Need to do that, thanks.
>
> > +
> > + if (get_user(flags, (u32 __user *)arg))
> > + return -EFAULT;
> > + if (!(flags & F2FS_TRIM_FILE_MASK))
> > + return -EINVAL;
>
> Need to reject unknown flags:
>
> if (flags & ~F2FS_TRIM_FILE_MASK)
> return -EINVAL;
I needed a different thing here. This was to check neither discard nor
zeroing out are not here. But we still need to check unknown flags,
too.
The below might be better.
if (!flags || flags & ~F2FS_TRIM_FILE_MASK)
return -EINVAL;
>
> > +
> > + if (flags & F2FS_TRIM_FILE_DISCARD && !f2fs_hw_support_discard(sbi))
> > + return -EOPNOTSUPP;
> > +
> > + ret = mnt_want_write_file(filp);
> > + if (ret)
> > + return ret;
> > +
> > + inode_lock(inode);
> > +
> > + file_size = i_size_read(inode);
> > + if (!file_size)
> > + goto err;
>
> ->i_size is stable while holding inode_lock(). So just use ->i_size instead of
> i_size_read().
Yes.
>
> > + pg_end = (pgoff_t)round_up(file_size, PAGE_SIZE) >> PAGE_SHIFT;
>
> This can be simplified to:
>
> pg_end = DIV_ROUND_UP(file_size, PAGE_SIZE);
Cool~ :)
>
>
> - Eric
2020년 6월 10일 (수) 오전 1:51, Eric Biggers <[email protected]>님이 작성:
>
> On Tue, Jun 09, 2020 at 03:01:37PM +0900, Daeho Jeong wrote:
> > From: Daeho Jeong <[email protected]>
> >
> > Added a new ioctl to send discard commands or/and zero out
> > to whole data area of a regular file for security reason.
>
> With this ioctl available, what is the exact procedure to write and then later
> securely erase a file on f2fs? In particular, how can the user prevent f2fs
> from making multiple copies of file data blocks as part of garbage collection?
>
> > +static int f2fs_secure_erase(struct block_device *bdev, block_t block,
> > + block_t len, u32 flags)
> > +{
> > + struct request_queue *q = bdev_get_queue(bdev);
> > + sector_t sector = SECTOR_FROM_BLOCK(block);
> > + sector_t nr_sects = SECTOR_FROM_BLOCK(len);
> > + int ret = 0;
> > +
> > + if (!q)
> > + return -ENXIO;
>
> Why can the request_queue be NULL here?
>
> > +
> > + if (flags & F2FS_TRIM_FILE_DISCARD)
> > + ret = blkdev_issue_discard(bdev, sector, nr_sects, GFP_NOFS,
> > + blk_queue_secure_erase(q) ?
> > + BLKDEV_DISCARD_SECURE : 0);
> > +
> > + if (!ret && flags & F2FS_TRIM_FILE_ZEROOUT)
> > + ret = blkdev_issue_zeroout(bdev, sector, nr_sects, GFP_NOFS, 0);
> > +
> > + return ret;
> > +}
> > +
> > +static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
> > +{
> > + struct inode *inode = file_inode(filp);
> > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > + struct address_space *mapping = inode->i_mapping;
> > + struct block_device *prev_bdev = NULL;
> > + loff_t file_size;
> > + pgoff_t index, pg_start = 0, pg_end;
> > + block_t prev_block = 0, len = 0;
> > + u32 flags;
> > + int ret = 0;
> > +
> > + if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode) ||
> > + f2fs_compressed_file(inode))
> > + return -EINVAL;
>
> Is it valid to check f2fs_is_atomic_file() and f2fs_compressed_file() outside of
> inode_lock()?
>
> > +
> > + if (f2fs_readonly(sbi->sb))
> > + return -EROFS;
>
> Isn't this redundant with mnt_want_write_file()?
>
> Also, shouldn't write access to the file be required, i.e.
> (filp->f_mode & FMODE_WRITE)? Then the f2fs_readonly() and
> mnt_want_write_file() checks would be unnecessary.
>
> > +
> > + if (f2fs_lfs_mode(sbi))
> > + return -EOPNOTSUPP;
>
> Doesn't this check have to be serialized with remount?
>
> > +
> > + if (get_user(flags, (u32 __user *)arg))
> > + return -EFAULT;
> > + if (!(flags & F2FS_TRIM_FILE_MASK))
> > + return -EINVAL;
>
> Need to reject unknown flags:
>
> if (flags & ~F2FS_TRIM_FILE_MASK)
> return -EINVAL;
>
> > +
> > + if (flags & F2FS_TRIM_FILE_DISCARD && !f2fs_hw_support_discard(sbi))
> > + return -EOPNOTSUPP;
> > +
> > + ret = mnt_want_write_file(filp);
> > + if (ret)
> > + return ret;
> > +
> > + inode_lock(inode);
> > +
> > + file_size = i_size_read(inode);
> > + if (!file_size)
> > + goto err;
>
> ->i_size is stable while holding inode_lock(). So just use ->i_size instead of
> i_size_read().
>
> > + pg_end = (pgoff_t)round_up(file_size, PAGE_SIZE) >> PAGE_SHIFT;
>
> This can be simplified to:
>
> pg_end = DIV_ROUND_UP(file_size, PAGE_SIZE);
>
>
> - Eric
On Wed, Jun 10, 2020 at 11:05:46AM +0900, Daeho Jeong wrote:
> > > Added a new ioctl to send discard commands or/and zero out
> > > to whole data area of a regular file for security reason.
> >
> > With this ioctl available, what is the exact procedure to write and then later
> > securely erase a file on f2fs? In particular, how can the user prevent f2fs
> > from making multiple copies of file data blocks as part of garbage collection?
> >
>
> To prevent the file data from garbage collecting, the user needs to
> use pinfile ioctl and fallocate system call after creating the file.
> The sequence is like below.
> 1. create an empty file
> 2. pinfile
> 3. fallocate()
Is that persistent? So the file will never be moved afterwards?
Is there a place where this is (or should be) documented?
> > > +
> > > + if (f2fs_readonly(sbi->sb))
> > > + return -EROFS;
> >
> > Isn't this redundant with mnt_want_write_file()?
> >
> > Also, shouldn't write access to the file be required, i.e.
> > (filp->f_mode & FMODE_WRITE)? Then the f2fs_readonly() and
> > mnt_want_write_file() checks would be unnecessary.
> >
>
> Using FMODE_WRITE is more proper for this case, since we're going to
> modify the data. But I think mnt_want_write_file() is still required
> to prevent the filesystem from freezing or something else.
Right, the freezing check is actually still necessary. But getting write access
to the mount is not necessary. I think you should use file_start_write() and
file_end_write(), like vfs_write() does.
> >
> > > +
> > > + if (get_user(flags, (u32 __user *)arg))
> > > + return -EFAULT;
> > > + if (!(flags & F2FS_TRIM_FILE_MASK))
> > > + return -EINVAL;
> >
> > Need to reject unknown flags:
> >
> > if (flags & ~F2FS_TRIM_FILE_MASK)
> > return -EINVAL;
>
> I needed a different thing here. This was to check neither discard nor
> zeroing out are not here. But we still need to check unknown flags,
> too.
> The below might be better.
> if (!flags || flags & ~F2FS_TRIM_FILE_MASK)
> return -EINVAL;
Sure, but please put parentheses around the second clause:
if (flags == 0 || (flags & ~F2FS_TRIM_FILE_MASK))
return -EINVAL;
- Eric
> >
> > To prevent the file data from garbage collecting, the user needs to
> > use pinfile ioctl and fallocate system call after creating the file.
> > The sequence is like below.
> > 1. create an empty file
> > 2. pinfile
> > 3. fallocate()
>
> Is that persistent? So the file will never be moved afterwards?
>
> Is there a place where this is (or should be) documented?
Yes, this is persistent. F2FS_IOC_SET_PIN_FILE ioctl is to prevent
file data from moving and being garbage collected, and further update
to the file will be handled in in-place update manner.
I don't see any document on this, but you can find the below in
Documentation/filesystems/f2fs.rst
However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to
fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addresses having
zero or random data, which is useful to the below scenario where:
1. create(fd)
2. ioctl(fd, F2FS_IOC_SET_PIN_FILE)
3. fallocate(fd, 0, 0, size)
4. address = fibmap(fd, offset)
5. open(blkdev)
6. write(blkdev, address)
> Right, the freezing check is actually still necessary. But getting write access
> to the mount is not necessary. I think you should use file_start_write() and
> file_end_write(), like vfs_write() does.
Yes, agreed.
2020년 6월 10일 (수) 오후 12:15, Eric Biggers <[email protected]>님이 작성:
>
> On Wed, Jun 10, 2020 at 11:05:46AM +0900, Daeho Jeong wrote:
> > > > Added a new ioctl to send discard commands or/and zero out
> > > > to whole data area of a regular file for security reason.
> > >
> > > With this ioctl available, what is the exact procedure to write and then later
> > > securely erase a file on f2fs? In particular, how can the user prevent f2fs
> > > from making multiple copies of file data blocks as part of garbage collection?
> > >
> >
> > To prevent the file data from garbage collecting, the user needs to
> > use pinfile ioctl and fallocate system call after creating the file.
> > The sequence is like below.
> > 1. create an empty file
> > 2. pinfile
> > 3. fallocate()
>
> Is that persistent? So the file will never be moved afterwards?
>
> Is there a place where this is (or should be) documented?
>
> > > > +
> > > > + if (f2fs_readonly(sbi->sb))
> > > > + return -EROFS;
> > >
> > > Isn't this redundant with mnt_want_write_file()?
> > >
> > > Also, shouldn't write access to the file be required, i.e.
> > > (filp->f_mode & FMODE_WRITE)? Then the f2fs_readonly() and
> > > mnt_want_write_file() checks would be unnecessary.
> > >
> >
> > Using FMODE_WRITE is more proper for this case, since we're going to
> > modify the data. But I think mnt_want_write_file() is still required
> > to prevent the filesystem from freezing or something else.
>
> Right, the freezing check is actually still necessary. But getting write access
> to the mount is not necessary. I think you should use file_start_write() and
> file_end_write(), like vfs_write() does.
>
> > >
> > > > +
> > > > + if (get_user(flags, (u32 __user *)arg))
> > > > + return -EFAULT;
> > > > + if (!(flags & F2FS_TRIM_FILE_MASK))
> > > > + return -EINVAL;
> > >
> > > Need to reject unknown flags:
> > >
> > > if (flags & ~F2FS_TRIM_FILE_MASK)
> > > return -EINVAL;
> >
> > I needed a different thing here. This was to check neither discard nor
> > zeroing out are not here. But we still need to check unknown flags,
> > too.
> > The below might be better.
> > if (!flags || flags & ~F2FS_TRIM_FILE_MASK)
> > return -EINVAL;
>
> Sure, but please put parentheses around the second clause:
>
> if (flags == 0 || (flags & ~F2FS_TRIM_FILE_MASK))
> return -EINVAL;
>
> - Eric
> > > > > +
> > > > > + if (f2fs_readonly(sbi->sb))
> > > > > + return -EROFS;
> > > >
> > > > Isn't this redundant with mnt_want_write_file()?
> > > >
> > > > Also, shouldn't write access to the file be required, i.e.
> > > > (filp->f_mode & FMODE_WRITE)? Then the f2fs_readonly() and
> > > > mnt_want_write_file() checks would be unnecessary.
> > > >
> > >
> > > Using FMODE_WRITE is more proper for this case, since we're going to
> > > modify the data. But I think mnt_want_write_file() is still required
> > > to prevent the filesystem from freezing or something else.
> >
> > Right, the freezing check is actually still necessary. But getting write access
> > to the mount is not necessary. I think you should use file_start_write() and
> > file_end_write(), like vfs_write() does.
I've checked this again.
But I think mnt_want_write_file() looks better than the combination of
checking FMODE_WRITE and file_start_write(), because
mnt_want_write_file() handles all the things we need.
It checks FMODE_WRITER, which is set in do_dentry_open() when
FMODE_WRITE is already set, and does the stuff that file_start_write()
is doing. This is why the other filesystem system calls use it.
What do you think?
2020년 6월 10일 (수) 오후 12:55, Daeho Jeong <[email protected]>님이 작성:
>
> > >
> > > To prevent the file data from garbage collecting, the user needs to
> > > use pinfile ioctl and fallocate system call after creating the file.
> > > The sequence is like below.
> > > 1. create an empty file
> > > 2. pinfile
> > > 3. fallocate()
> >
> > Is that persistent? So the file will never be moved afterwards?
> >
> > Is there a place where this is (or should be) documented?
>
> Yes, this is persistent. F2FS_IOC_SET_PIN_FILE ioctl is to prevent
> file data from moving and being garbage collected, and further update
> to the file will be handled in in-place update manner.
> I don't see any document on this, but you can find the below in
> Documentation/filesystems/f2fs.rst
>
> However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to
> fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addresses having
> zero or random data, which is useful to the below scenario where:
>
> 1. create(fd)
> 2. ioctl(fd, F2FS_IOC_SET_PIN_FILE)
> 3. fallocate(fd, 0, 0, size)
> 4. address = fibmap(fd, offset)
> 5. open(blkdev)
> 6. write(blkdev, address)
>
> > Right, the freezing check is actually still necessary. But getting write access
> > to the mount is not necessary. I think you should use file_start_write() and
> > file_end_write(), like vfs_write() does.
>
> Yes, agreed.
>
> 2020년 6월 10일 (수) 오후 12:15, Eric Biggers <[email protected]>님이 작성:
> >
> > On Wed, Jun 10, 2020 at 11:05:46AM +0900, Daeho Jeong wrote:
> > > > > Added a new ioctl to send discard commands or/and zero out
> > > > > to whole data area of a regular file for security reason.
> > > >
> > > > With this ioctl available, what is the exact procedure to write and then later
> > > > securely erase a file on f2fs? In particular, how can the user prevent f2fs
> > > > from making multiple copies of file data blocks as part of garbage collection?
> > > >
> > >
> > > To prevent the file data from garbage collecting, the user needs to
> > > use pinfile ioctl and fallocate system call after creating the file.
> > > The sequence is like below.
> > > 1. create an empty file
> > > 2. pinfile
> > > 3. fallocate()
> >
> > Is that persistent? So the file will never be moved afterwards?
> >
> > Is there a place where this is (or should be) documented?
> >
> > > > > +
> > > > > + if (f2fs_readonly(sbi->sb))
> > > > > + return -EROFS;
> > > >
> > > > Isn't this redundant with mnt_want_write_file()?
> > > >
> > > > Also, shouldn't write access to the file be required, i.e.
> > > > (filp->f_mode & FMODE_WRITE)? Then the f2fs_readonly() and
> > > > mnt_want_write_file() checks would be unnecessary.
> > > >
> > >
> > > Using FMODE_WRITE is more proper for this case, since we're going to
> > > modify the data. But I think mnt_want_write_file() is still required
> > > to prevent the filesystem from freezing or something else.
> >
> > Right, the freezing check is actually still necessary. But getting write access
> > to the mount is not necessary. I think you should use file_start_write() and
> > file_end_write(), like vfs_write() does.
> >
> > > >
> > > > > +
> > > > > + if (get_user(flags, (u32 __user *)arg))
> > > > > + return -EFAULT;
> > > > > + if (!(flags & F2FS_TRIM_FILE_MASK))
> > > > > + return -EINVAL;
> > > >
> > > > Need to reject unknown flags:
> > > >
> > > > if (flags & ~F2FS_TRIM_FILE_MASK)
> > > > return -EINVAL;
> > >
> > > I needed a different thing here. This was to check neither discard nor
> > > zeroing out are not here. But we still need to check unknown flags,
> > > too.
> > > The below might be better.
> > > if (!flags || flags & ~F2FS_TRIM_FILE_MASK)
> > > return -EINVAL;
> >
> > Sure, but please put parentheses around the second clause:
> >
> > if (flags == 0 || (flags & ~F2FS_TRIM_FILE_MASK))
> > return -EINVAL;
> >
> > - Eric
> > > > Using FMODE_WRITE is more proper for this case, since we're going to
> > > > modify the data. But I think mnt_want_write_file() is still required
> > > > to prevent the filesystem from freezing or something else.
> > >
> > > Right, the freezing check is actually still necessary. But getting write access
> > > to the mount is not necessary. I think you should use file_start_write() and
> > > file_end_write(), like vfs_write() does.
>
> I've checked this again.
>
> But I think mnt_want_write_file() looks better than the combination of
> checking FMODE_WRITE and file_start_write(), because
> mnt_want_write_file() handles all the things we need.
> It checks FMODE_WRITER, which is set in do_dentry_open() when
> FMODE_WRITE is already set, and does the stuff that file_start_write()
> is doing. This is why the other filesystem system calls use it.
>
> What do you think?
Hmm, we still need FMODE_WRITE check.
But mnt_want_write_file() looks better, because it'll call
mnt_clone_write() internally, if the file is open for write already.
in ext4/ioctl.c
case EXT4_IOC_SWAP_BOOT:
{
int err;
if (!(filp->f_mode & FMODE_WRITE))
return -EBADF;
err = mnt_want_write_file(filp);
if (err)
return err;2020년 6월 11일 (목) 오전 8:31, Daeho
Jeong <[email protected]>님이 작성:
>
> > > > > > +
> > > > > > + if (f2fs_readonly(sbi->sb))
> > > > > > + return -EROFS;
> > > > >
> > > > > Isn't this redundant with mnt_want_write_file()?
> > > > >
> > > > > Also, shouldn't write access to the file be required, i.e.
> > > > > (filp->f_mode & FMODE_WRITE)? Then the f2fs_readonly() and
> > > > > mnt_want_write_file() checks would be unnecessary.
> > > > >
> > > >
> > > > Using FMODE_WRITE is more proper for this case, since we're going to
> > > > modify the data. But I think mnt_want_write_file() is still required
> > > > to prevent the filesystem from freezing or something else.
> > >
> > > Right, the freezing check is actually still necessary. But getting write access
> > > to the mount is not necessary. I think you should use file_start_write() and
> > > file_end_write(), like vfs_write() does.
>
> I've checked this again.
>
> But I think mnt_want_write_file() looks better than the combination of
> checking FMODE_WRITE and file_start_write(), because
> mnt_want_write_file() handles all the things we need.
> It checks FMODE_WRITER, which is set in do_dentry_open() when
> FMODE_WRITE is already set, and does the stuff that file_start_write()
> is doing. This is why the other filesystem system calls use it.
>
> What do you think?
>
> 2020년 6월 10일 (수) 오후 12:55, Daeho Jeong <[email protected]>님이 작성:
> >
> > > >
> > > > To prevent the file data from garbage collecting, the user needs to
> > > > use pinfile ioctl and fallocate system call after creating the file.
> > > > The sequence is like below.
> > > > 1. create an empty file
> > > > 2. pinfile
> > > > 3. fallocate()
> > >
> > > Is that persistent? So the file will never be moved afterwards?
> > >
> > > Is there a place where this is (or should be) documented?
> >
> > Yes, this is persistent. F2FS_IOC_SET_PIN_FILE ioctl is to prevent
> > file data from moving and being garbage collected, and further update
> > to the file will be handled in in-place update manner.
> > I don't see any document on this, but you can find the below in
> > Documentation/filesystems/f2fs.rst
> >
> > However, once F2FS receives ioctl(fd, F2FS_IOC_SET_PIN_FILE) in prior to
> > fallocate(fd, DEFAULT_MODE), it allocates on-disk blocks addresses having
> > zero or random data, which is useful to the below scenario where:
> >
> > 1. create(fd)
> > 2. ioctl(fd, F2FS_IOC_SET_PIN_FILE)
> > 3. fallocate(fd, 0, 0, size)
> > 4. address = fibmap(fd, offset)
> > 5. open(blkdev)
> > 6. write(blkdev, address)
> >
> > > Right, the freezing check is actually still necessary. But getting write access
> > > to the mount is not necessary. I think you should use file_start_write() and
> > > file_end_write(), like vfs_write() does.
> >
> > Yes, agreed.
> >
> > 2020년 6월 10일 (수) 오후 12:15, Eric Biggers <[email protected]>님이 작성:
> > >
> > > On Wed, Jun 10, 2020 at 11:05:46AM +0900, Daeho Jeong wrote:
> > > > > > Added a new ioctl to send discard commands or/and zero out
> > > > > > to whole data area of a regular file for security reason.
> > > > >
> > > > > With this ioctl available, what is the exact procedure to write and then later
> > > > > securely erase a file on f2fs? In particular, how can the user prevent f2fs
> > > > > from making multiple copies of file data blocks as part of garbage collection?
> > > > >
> > > >
> > > > To prevent the file data from garbage collecting, the user needs to
> > > > use pinfile ioctl and fallocate system call after creating the file.
> > > > The sequence is like below.
> > > > 1. create an empty file
> > > > 2. pinfile
> > > > 3. fallocate()
> > >
> > > Is that persistent? So the file will never be moved afterwards?
> > >
> > > Is there a place where this is (or should be) documented?
> > >
> > > > > > +
> > > > > > + if (f2fs_readonly(sbi->sb))
> > > > > > + return -EROFS;
> > > > >
> > > > > Isn't this redundant with mnt_want_write_file()?
> > > > >
> > > > > Also, shouldn't write access to the file be required, i.e.
> > > > > (filp->f_mode & FMODE_WRITE)? Then the f2fs_readonly() and
> > > > > mnt_want_write_file() checks would be unnecessary.
> > > > >
> > > >
> > > > Using FMODE_WRITE is more proper for this case, since we're going to
> > > > modify the data. But I think mnt_want_write_file() is still required
> > > > to prevent the filesystem from freezing or something else.
> > >
> > > Right, the freezing check is actually still necessary. But getting write access
> > > to the mount is not necessary. I think you should use file_start_write() and
> > > file_end_write(), like vfs_write() does.
> > >
> > > > >
> > > > > > +
> > > > > > + if (get_user(flags, (u32 __user *)arg))
> > > > > > + return -EFAULT;
> > > > > > + if (!(flags & F2FS_TRIM_FILE_MASK))
> > > > > > + return -EINVAL;
> > > > >
> > > > > Need to reject unknown flags:
> > > > >
> > > > > if (flags & ~F2FS_TRIM_FILE_MASK)
> > > > > return -EINVAL;
> > > >
> > > > I needed a different thing here. This was to check neither discard nor
> > > > zeroing out are not here. But we still need to check unknown flags,
> > > > too.
> > > > The below might be better.
> > > > if (!flags || flags & ~F2FS_TRIM_FILE_MASK)
> > > > return -EINVAL;
> > >
> > > Sure, but please put parentheses around the second clause:
> > >
> > > if (flags == 0 || (flags & ~F2FS_TRIM_FILE_MASK))
> > > return -EINVAL;
> > >
> > > - Eric
On Thu, Jun 11, 2020 at 08:53:10AM +0900, Daeho Jeong wrote:
> > > > > Using FMODE_WRITE is more proper for this case, since we're going to
> > > > > modify the data. But I think mnt_want_write_file() is still required
> > > > > to prevent the filesystem from freezing or something else.
> > > >
> > > > Right, the freezing check is actually still necessary. But getting write access
> > > > to the mount is not necessary. I think you should use file_start_write() and
> > > > file_end_write(), like vfs_write() does.
> >
> > I've checked this again.
> >
> > But I think mnt_want_write_file() looks better than the combination of
> > checking FMODE_WRITE and file_start_write(), because
> > mnt_want_write_file() handles all the things we need.
> > It checks FMODE_WRITER, which is set in do_dentry_open() when
> > FMODE_WRITE is already set, and does the stuff that file_start_write()
> > is doing. This is why the other filesystem system calls use it.
> >
> > What do you think?
>
> Hmm, we still need FMODE_WRITE check.
> But mnt_want_write_file() looks better, because it'll call
> mnt_clone_write() internally, if the file is open for write already.
There's no need to get write access to the mount if you already have a writable
fd. You just need file_start_write() for the freeze protection. Again, see
vfs_write().
- Eric
Yes, I saw the implementation in vfs_write().
But if we use mnt_want_write_file() here, it'll call mnt_clone_write()
internally if the file is already open in write mode.
Don't you think the below thing is needed? We can increase the counter
each of them, open and ioctl, like other filesystems such as ext4.
int mnt_clone_write(struct vfsmount *mnt)
{
/* superblock may be r/o */
if (__mnt_is_readonly(mnt))
return -EROFS;
preempt_disable();
mnt_inc_writers(real_mount(mnt));
preempt_enable();
return 0;
}
2020년 6월 11일 (목) 오전 9:00, Eric Biggers <[email protected]>님이 작성:
>
> On Thu, Jun 11, 2020 at 08:53:10AM +0900, Daeho Jeong wrote:
> > > > > > Using FMODE_WRITE is more proper for this case, since we're going to
> > > > > > modify the data. But I think mnt_want_write_file() is still required
> > > > > > to prevent the filesystem from freezing or something else.
> > > > >
> > > > > Right, the freezing check is actually still necessary. But getting write access
> > > > > to the mount is not necessary. I think you should use file_start_write() and
> > > > > file_end_write(), like vfs_write() does.
> > >
> > > I've checked this again.
> > >
> > > But I think mnt_want_write_file() looks better than the combination of
> > > checking FMODE_WRITE and file_start_write(), because
> > > mnt_want_write_file() handles all the things we need.
> > > It checks FMODE_WRITER, which is set in do_dentry_open() when
> > > FMODE_WRITE is already set, and does the stuff that file_start_write()
> > > is doing. This is why the other filesystem system calls use it.
> > >
> > > What do you think?
> >
> > Hmm, we still need FMODE_WRITE check.
> > But mnt_want_write_file() looks better, because it'll call
> > mnt_clone_write() internally, if the file is open for write already.
>
> There's no need to get write access to the mount if you already have a writable
> fd. You just need file_start_write() for the freeze protection. Again, see
> vfs_write().
>
> - Eric
On Thu, Jun 11, 2020 at 09:23:23AM +0900, Daeho Jeong wrote:
> Yes, I saw the implementation in vfs_write().
> But if we use mnt_want_write_file() here, it'll call mnt_clone_write()
> internally if the file is already open in write mode.
> Don't you think the below thing is needed? We can increase the counter
> each of them, open and ioctl, like other filesystems such as ext4.
>
> int mnt_clone_write(struct vfsmount *mnt)
> {
> /* superblock may be r/o */
> if (__mnt_is_readonly(mnt))
> return -EROFS;
> preempt_disable();
> mnt_inc_writers(real_mount(mnt));
> preempt_enable();
> return 0;
> }
No, this seems to be left over from when mnt_want_write_file() was paired with
mnt_drop_write() instead of mnt_drop_write_file(). I sent a patch to remove it:
https://lkml.kernel.org/r/[email protected]
- Eric
Ok, I got it. Thanks for quick response~ :)
2020년 6월 11일 (목) 오전 10:56, Eric Biggers <[email protected]>님이 작성:
>
> On Thu, Jun 11, 2020 at 09:23:23AM +0900, Daeho Jeong wrote:
> > Yes, I saw the implementation in vfs_write().
> > But if we use mnt_want_write_file() here, it'll call mnt_clone_write()
> > internally if the file is already open in write mode.
> > Don't you think the below thing is needed? We can increase the counter
> > each of them, open and ioctl, like other filesystems such as ext4.
> >
> > int mnt_clone_write(struct vfsmount *mnt)
> > {
> > /* superblock may be r/o */
> > if (__mnt_is_readonly(mnt))
> > return -EROFS;
> > preempt_disable();
> > mnt_inc_writers(real_mount(mnt));
> > preempt_enable();
> > return 0;
> > }
>
> No, this seems to be left over from when mnt_want_write_file() was paired with
> mnt_drop_write() instead of mnt_drop_write_file(). I sent a patch to remove it:
> https://lkml.kernel.org/r/[email protected]
>
> - Eric