From: Daeho Jeong <[email protected]>
We need to make sure i_size doesn't change until atomic write commit is
successful and restore it when commit is failed.
Signed-off-by: Daeho Jeong <[email protected]>
---
v6: change FI_ATOMIC_COMMIT to FI_ATOMIC_COMMITTED
v5: prevent only i_size update for atomic files when dirtying inode
v4: move i_size update after clearing atomic file flag in
f2fs_abort_atomic_write()
v3: make sure inode is clean while atomic writing
---
fs/f2fs/f2fs.h | 8 ++++++++
fs/f2fs/file.c | 18 +++++++++++-------
fs/f2fs/inode.c | 5 ++++-
fs/f2fs/segment.c | 14 ++++++++++----
4 files changed, 33 insertions(+), 12 deletions(-)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e7e750e6b332..e29f9adf60ca 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -764,6 +764,7 @@ enum {
FI_COMPRESS_RELEASED, /* compressed blocks were released */
FI_ALIGNED_WRITE, /* enable aligned write */
FI_COW_FILE, /* indicate COW file */
+ FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */
FI_MAX, /* max flag, never be used */
};
@@ -822,6 +823,7 @@ struct f2fs_inode_info {
unsigned int i_cluster_size; /* cluster size */
unsigned int atomic_write_cnt;
+ loff_t original_i_size; /* original i_size before atomic write */
};
static inline void get_extent_info(struct extent_info *ext,
@@ -3072,6 +3074,8 @@ static inline void f2fs_i_blocks_write(struct inode *inode,
set_inode_flag(inode, FI_AUTO_RECOVER);
}
+static inline bool f2fs_is_atomic_file(struct inode *inode);
+
static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size)
{
bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE);
@@ -3081,6 +3085,10 @@ static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size)
return;
i_size_write(inode, i_size);
+
+ if (f2fs_is_atomic_file(inode))
+ return;
+
f2fs_mark_inode_dirty_sync(inode, true);
if (clean || recover)
set_inode_flag(inode, FI_AUTO_RECOVER);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index ec9ee0f6d502..7ce629c95f4a 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1991,6 +1991,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct inode *pinode;
+ loff_t isize;
int ret;
if (!inode_owner_or_capable(mnt_userns, inode))
@@ -2049,7 +2050,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
goto out;
}
- f2fs_i_size_write(fi->cow_inode, i_size_read(inode));
+
+ f2fs_write_inode(inode, NULL);
+
+ isize = i_size_read(inode);
+ fi->original_i_size = isize;
+ f2fs_i_size_write(fi->cow_inode, isize);
stat_inc_atomic_inode(inode);
@@ -2087,16 +2093,14 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
if (f2fs_is_atomic_file(inode)) {
ret = f2fs_commit_atomic_write(inode);
- if (ret)
- goto unlock_out;
-
- ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
if (!ret)
- f2fs_abort_atomic_write(inode, false);
+ ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
+
+ f2fs_abort_atomic_write(inode, ret);
} else {
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
}
-unlock_out:
+
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 9f0d3864d9f1..577f109b4e1d 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -621,9 +621,12 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
ri->i_uid = cpu_to_le32(i_uid_read(inode));
ri->i_gid = cpu_to_le32(i_gid_read(inode));
ri->i_links = cpu_to_le32(inode->i_nlink);
- ri->i_size = cpu_to_le64(i_size_read(inode));
ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1);
+ if (!f2fs_is_atomic_file(inode) ||
+ is_inode_flag_set(inode, FI_ATOMIC_COMMITTED))
+ ri->i_size = cpu_to_le64(i_size_read(inode));
+
if (et) {
read_lock(&et->lock);
set_raw_extent(&et->largest, &ri->i_ext);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 289bcb7ca300..9cbf88092c78 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -192,14 +192,18 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean)
if (!f2fs_is_atomic_file(inode))
return;
- if (clean)
- truncate_inode_pages_final(inode->i_mapping);
clear_inode_flag(fi->cow_inode, FI_COW_FILE);
iput(fi->cow_inode);
fi->cow_inode = NULL;
release_atomic_write_cnt(inode);
+ clear_inode_flag(inode, FI_ATOMIC_COMMITTED);
clear_inode_flag(inode, FI_ATOMIC_FILE);
stat_dec_atomic_inode(inode);
+
+ if (clean) {
+ truncate_inode_pages_final(inode->i_mapping);
+ f2fs_i_size_write(inode, fi->original_i_size);
+ }
}
static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
@@ -335,10 +339,12 @@ static int __f2fs_commit_atomic_write(struct inode *inode)
}
out:
- if (ret)
+ if (ret) {
sbi->revoked_atomic_block += fi->atomic_write_cnt;
- else
+ } else {
sbi->committed_atomic_block += fi->atomic_write_cnt;
+ set_inode_flag(inode, FI_ATOMIC_COMMITTED);
+ }
__complete_revoke_list(inode, &revoke_list, ret ? true : false);
--
2.38.1.273.g43a17bfeac-goog
From: Daeho Jeong <[email protected]>
introduce a new ioctl to replace the whole content of a file atomically,
which means it induces truncate and content update at the same time.
We can start it with F2FS_IOC_START_ATOMIC_REPLACE and complete it with
F2FS_IOC_COMMIT_ATOMIC_WRITE. Or abort it with
F2FS_IOC_ABORT_ATOMIC_WRITE.
Signed-off-by: Daeho Jeong <[email protected]>
---
v3: move i_size change after setting atomic write flag
v2: add undefined ioctl number reported by <[email protected]>
---
fs/f2fs/data.c | 3 +++
fs/f2fs/f2fs.h | 1 +
fs/f2fs/file.c | 20 ++++++++++++++------
fs/f2fs/segment.c | 14 +++++++++++++-
include/uapi/linux/f2fs.h | 1 +
5 files changed, 32 insertions(+), 7 deletions(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 5f895ddcd64a..bce4dcc3ad78 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3457,6 +3457,9 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
else if (*blk_addr != NULL_ADDR)
return 0;
+ if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE))
+ goto reserve_block;
+
/* Look for the block in the original inode */
err = __find_data_block(inode, index, &ori_blk_addr);
if (err)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e29f9adf60ca..d513ecd17550 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -765,6 +765,7 @@ enum {
FI_ALIGNED_WRITE, /* enable aligned write */
FI_COW_FILE, /* indicate COW file */
FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */
+ FI_ATOMIC_REPLACE, /* indicate atomic replace */
FI_MAX, /* max flag, never be used */
};
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 7ce629c95f4a..7b2d956bc52f 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1984,7 +1984,7 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
return put_user(inode->i_generation, (int __user *)arg);
}
-static int f2fs_ioc_start_atomic_write(struct file *filp)
+static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
{
struct inode *inode = file_inode(filp);
struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
@@ -2053,10 +2053,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
f2fs_write_inode(inode, NULL);
- isize = i_size_read(inode);
- fi->original_i_size = isize;
- f2fs_i_size_write(fi->cow_inode, isize);
-
stat_inc_atomic_inode(inode);
set_inode_flag(inode, FI_ATOMIC_FILE);
@@ -2064,6 +2060,16 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
+ isize = i_size_read(inode);
+ fi->original_i_size = isize;
+ if (truncate) {
+ set_inode_flag(inode, FI_ATOMIC_REPLACE);
+ truncate_inode_pages_final(inode->i_mapping);
+ f2fs_i_size_write(inode, 0);
+ isize = 0;
+ }
+ f2fs_i_size_write(fi->cow_inode, isize);
+
f2fs_update_time(sbi, REQ_TIME);
fi->atomic_write_task = current;
stat_update_max_atomic_write(inode);
@@ -4089,7 +4095,9 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case FS_IOC_GETVERSION:
return f2fs_ioc_getversion(filp, arg);
case F2FS_IOC_START_ATOMIC_WRITE:
- return f2fs_ioc_start_atomic_write(filp);
+ return f2fs_ioc_start_atomic_write(filp, false);
+ case F2FS_IOC_START_ATOMIC_REPLACE:
+ return f2fs_ioc_start_atomic_write(filp, true);
case F2FS_IOC_COMMIT_ATOMIC_WRITE:
return f2fs_ioc_commit_atomic_write(filp);
case F2FS_IOC_ABORT_ATOMIC_WRITE:
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9cbf88092c78..7f62118cbe6e 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -261,14 +261,26 @@ static void __complete_revoke_list(struct inode *inode, struct list_head *head,
bool revoke)
{
struct revoke_entry *cur, *tmp;
+ pgoff_t start_index = 0;
+ bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE);
list_for_each_entry_safe(cur, tmp, head, list) {
- if (revoke)
+ if (revoke) {
__replace_atomic_write_block(inode, cur->index,
cur->old_addr, NULL, true);
+ } else if (truncate) {
+ f2fs_truncate_hole(inode, start_index, cur->index);
+ start_index = cur->index + 1;
+ }
+
list_del(&cur->list);
kmem_cache_free(revoke_entry_slab, cur);
}
+
+ if (!revoke && truncate) {
+ f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false);
+ clear_inode_flag(inode, FI_ATOMIC_REPLACE);
+ }
}
static int __f2fs_commit_atomic_write(struct inode *inode)
diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
index 3121d127d5aa..955d440be104 100644
--- a/include/uapi/linux/f2fs.h
+++ b/include/uapi/linux/f2fs.h
@@ -42,6 +42,7 @@
struct f2fs_comp_option)
#define F2FS_IOC_DECOMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 23)
#define F2FS_IOC_COMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 24)
+#define F2FS_IOC_START_ATOMIC_REPLACE _IO(F2FS_IOCTL_MAGIC, 25)
/*
* should be same as XFS_IOC_GOINGDOWN.
--
2.38.1.273.g43a17bfeac-goog
On 2022/11/1 3:24, Daeho Jeong wrote:
> From: Daeho Jeong <[email protected]>
>
> We need to make sure i_size doesn't change until atomic write commit is
> successful and restore it when commit is failed.
>
> Signed-off-by: Daeho Jeong <[email protected]>
Reviewed-by: Chao Yu <[email protected]>
Thanks,
On 2022/11/1 3:24, Daeho Jeong wrote:
> From: Daeho Jeong <[email protected]>
>
> introduce a new ioctl to replace the whole content of a file atomically,
> which means it induces truncate and content update at the same time.
> We can start it with F2FS_IOC_START_ATOMIC_REPLACE and complete it with
> F2FS_IOC_COMMIT_ATOMIC_WRITE. Or abort it with
> F2FS_IOC_ABORT_ATOMIC_WRITE.
>
> Signed-off-by: Daeho Jeong <[email protected]>
> ---
> v3: move i_size change after setting atomic write flag
> v2: add undefined ioctl number reported by <[email protected]>
> ---
> fs/f2fs/data.c | 3 +++
> fs/f2fs/f2fs.h | 1 +
> fs/f2fs/file.c | 20 ++++++++++++++------
> fs/f2fs/segment.c | 14 +++++++++++++-
> include/uapi/linux/f2fs.h | 1 +
> 5 files changed, 32 insertions(+), 7 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 5f895ddcd64a..bce4dcc3ad78 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -3457,6 +3457,9 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
> else if (*blk_addr != NULL_ADDR)
> return 0;
>
> + if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE))
> + goto reserve_block;
> +
> /* Look for the block in the original inode */
> err = __find_data_block(inode, index, &ori_blk_addr);
> if (err)
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index e29f9adf60ca..d513ecd17550 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -765,6 +765,7 @@ enum {
> FI_ALIGNED_WRITE, /* enable aligned write */
> FI_COW_FILE, /* indicate COW file */
> FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */
> + FI_ATOMIC_REPLACE, /* indicate atomic replace */
Need to clear FI_ATOMIC_REPLACE in f2fs_abort_atomic_write() for the case
atomic_commit is not called after atomic_replace?
> FI_MAX, /* max flag, never be used */
> };
>
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 7ce629c95f4a..7b2d956bc52f 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1984,7 +1984,7 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
> return put_user(inode->i_generation, (int __user *)arg);
> }
>
> -static int f2fs_ioc_start_atomic_write(struct file *filp)
> +static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
> {
> struct inode *inode = file_inode(filp);
> struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
> @@ -2053,10 +2053,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
>
> f2fs_write_inode(inode, NULL);
>
> - isize = i_size_read(inode);
> - fi->original_i_size = isize;
> - f2fs_i_size_write(fi->cow_inode, isize);
> -
> stat_inc_atomic_inode(inode);
>
> set_inode_flag(inode, FI_ATOMIC_FILE);
> @@ -2064,6 +2060,16 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
> clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
> f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
>
> + isize = i_size_read(inode);
> + fi->original_i_size = isize;
> + if (truncate) {
> + set_inode_flag(inode, FI_ATOMIC_REPLACE);
> + truncate_inode_pages_final(inode->i_mapping);
> + f2fs_i_size_write(inode, 0);
> + isize = 0;
> + }
> + f2fs_i_size_write(fi->cow_inode, isize);
Should cover above operations w/ fi->i_gc_rwsem[WRITE] to avoid
racing with background gc?
Thanks,
> +
> f2fs_update_time(sbi, REQ_TIME);
> fi->atomic_write_task = current;
> stat_update_max_atomic_write(inode);
> @@ -4089,7 +4095,9 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> case FS_IOC_GETVERSION:
> return f2fs_ioc_getversion(filp, arg);
> case F2FS_IOC_START_ATOMIC_WRITE:
> - return f2fs_ioc_start_atomic_write(filp);
> + return f2fs_ioc_start_atomic_write(filp, false);
> + case F2FS_IOC_START_ATOMIC_REPLACE:
> + return f2fs_ioc_start_atomic_write(filp, true);
> case F2FS_IOC_COMMIT_ATOMIC_WRITE:
> return f2fs_ioc_commit_atomic_write(filp);
> case F2FS_IOC_ABORT_ATOMIC_WRITE:
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 9cbf88092c78..7f62118cbe6e 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -261,14 +261,26 @@ static void __complete_revoke_list(struct inode *inode, struct list_head *head,
> bool revoke)
> {
> struct revoke_entry *cur, *tmp;
> + pgoff_t start_index = 0;
> + bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE);
>
> list_for_each_entry_safe(cur, tmp, head, list) {
> - if (revoke)
> + if (revoke) {
> __replace_atomic_write_block(inode, cur->index,
> cur->old_addr, NULL, true);
> + } else if (truncate) {
> + f2fs_truncate_hole(inode, start_index, cur->index);
> + start_index = cur->index + 1;
> + }
> +
> list_del(&cur->list);
> kmem_cache_free(revoke_entry_slab, cur);
> }
> +
> + if (!revoke && truncate) {
> + f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false);
> + clear_inode_flag(inode, FI_ATOMIC_REPLACE);
> + }
> }
>
> static int __f2fs_commit_atomic_write(struct inode *inode)
> diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
> index 3121d127d5aa..955d440be104 100644
> --- a/include/uapi/linux/f2fs.h
> +++ b/include/uapi/linux/f2fs.h
> @@ -42,6 +42,7 @@
> struct f2fs_comp_option)
> #define F2FS_IOC_DECOMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 23)
> #define F2FS_IOC_COMPRESS_FILE _IO(F2FS_IOCTL_MAGIC, 24)
> +#define F2FS_IOC_START_ATOMIC_REPLACE _IO(F2FS_IOCTL_MAGIC, 25)
>
> /*
> * should be same as XFS_IOC_GOINGDOWN.