2021-11-16 21:45:20

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 1/6] f2fs: rework write preallocations

From: Eric Biggers <[email protected]>

f2fs_write_begin() assumes that all blocks were preallocated by
default unless FI_NO_PREALLOC is explicitly set. This invites data
corruption, as there are cases in which not all blocks are preallocated.
Commit 47501f87c61a ("f2fs: preallocate DIO blocks when forcing
buffered_io") fixed one case, but there are others remaining.

Fix up this logic by replacing this flag with FI_PREALLOCATED_ALL, which
only gets set if all blocks for the current write were preallocated.

Also clean up f2fs_preallocate_blocks(), move it to file.c, and make it
handle some of the logic that was previously in write_iter() directly.

Jaegeuk:
DIO to holes are turning into buffered IO in f2fs_direct_IO, so we should
not preallocate blocks.

Signed-off-by: Eric Biggers <[email protected]>
Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/data.c | 55 ++-------------------
fs/f2fs/f2fs.h | 3 +-
fs/f2fs/file.c | 130 +++++++++++++++++++++++++++++++------------------
3 files changed, 87 insertions(+), 101 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9f754aaef558..3b27fb7daa8b 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1384,53 +1384,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
return 0;
}

-int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
-{
- struct inode *inode = file_inode(iocb->ki_filp);
- struct f2fs_map_blocks map;
- int flag;
- int err = 0;
- bool direct_io = iocb->ki_flags & IOCB_DIRECT;
-
- map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
- map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
- if (map.m_len > map.m_lblk)
- map.m_len -= map.m_lblk;
- else
- map.m_len = 0;
-
- map.m_next_pgofs = NULL;
- map.m_next_extent = NULL;
- map.m_seg_type = NO_CHECK_TYPE;
- map.m_may_create = true;
-
- if (direct_io) {
- map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
- flag = f2fs_force_buffered_io(inode, iocb, from) ?
- F2FS_GET_BLOCK_PRE_AIO :
- F2FS_GET_BLOCK_PRE_DIO;
- goto map_blocks;
- }
- if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
- err = f2fs_convert_inline_inode(inode);
- if (err)
- return err;
- }
- if (f2fs_has_inline_data(inode))
- return err;
-
- flag = F2FS_GET_BLOCK_PRE_AIO;
-
-map_blocks:
- err = f2fs_map_blocks(inode, &map, 1, flag);
- if (map.m_len > 0 && err == -ENOSPC) {
- if (!direct_io)
- set_inode_flag(inode, FI_NO_PREALLOC);
- err = 0;
- }
- return err;
-}
-
void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
{
if (flag == F2FS_GET_BLOCK_PRE_AIO) {
@@ -3339,12 +3292,10 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
int flag;

/*
- * we already allocated all the blocks, so we don't need to get
- * the block addresses when there is no need to fill the page.
+ * If a whole page is being written and we already preallocated all the
+ * blocks, then there is no need to get a block address now.
*/
- if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
- !is_inode_flag_set(inode, FI_NO_PREALLOC) &&
- !f2fs_verity_in_progress(inode))
+ if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
return 0;

/* f2fs_lock_op avoids race between write CP and convert_inline_page */
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ce9fc9f13000..be871a79c634 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -715,7 +715,7 @@ enum {
FI_INLINE_DOTS, /* indicate inline dot dentries */
FI_DO_DEFRAG, /* indicate defragment is running */
FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
- FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
+ FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */
FI_HOT_DATA, /* indicate file is hot */
FI_EXTRA_ATTR, /* indicate file has extra attribute */
FI_PROJ_INHERIT, /* indicate file inherits projectid */
@@ -3614,7 +3614,6 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
int f2fs_reserve_new_block(struct dnode_of_data *dn);
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
-int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
int op_flags, bool for_write);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 92ec2699bc85..4bf77a5bf998 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4235,10 +4235,76 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
return ret;
}

+/*
+ * Preallocate blocks for a write request, if it is possible and helpful to do
+ * so. Returns a positive number if blocks may have been preallocated, 0 if no
+ * blocks were preallocated, or a negative errno value if something went
+ * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the
+ * requested blocks (not just some of them) have been allocated.
+ */
+static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ const loff_t pos = iocb->ki_pos;
+ const size_t count = iov_iter_count(iter);
+ struct f2fs_map_blocks map = {};
+ bool dio = (iocb->ki_flags & IOCB_DIRECT) &&
+ !f2fs_force_buffered_io(inode, iocb, iter);
+ int flag;
+ int ret;
+
+ /* If it will be an out-of-place direct write, don't bother. */
+ if (dio && f2fs_lfs_mode(sbi))
+ return 0;
+
+ /* No-wait I/O can't allocate blocks. */
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return 0;
+
+ /* If it will be a short write, don't bother. */
+ if (fault_in_iov_iter_readable(iter, count))
+ return 0;
+
+ if (f2fs_has_inline_data(inode)) {
+ /* If the data will fit inline, don't bother. */
+ if (pos + count <= MAX_INLINE_DATA(inode))
+ return 0;
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+ }
+
+ map.m_lblk = F2FS_BLK_ALIGN(pos);
+ map.m_len = F2FS_BYTES_TO_BLK(pos + count);
+ if (map.m_len > map.m_lblk)
+ map.m_len -= map.m_lblk;
+ else
+ map.m_len = 0;
+ map.m_may_create = true;
+ if (dio) {
+ map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
+ flag = F2FS_GET_BLOCK_PRE_DIO;
+ } else {
+ map.m_seg_type = NO_CHECK_TYPE;
+ flag = F2FS_GET_BLOCK_PRE_AIO;
+ }
+
+ ret = f2fs_map_blocks(inode, &map, 1, flag);
+ /* -ENOSPC is only a fatal error if no blocks could be allocated. */
+ if (ret < 0 && !(ret == -ENOSPC && map.m_len > 0))
+ return ret;
+ if (ret == 0)
+ set_inode_flag(inode, FI_PREALLOCATED_ALL);
+ return map.m_len;
+}
+
static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
+ loff_t target_size;
+ int preallocated;
ssize_t ret;

if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
@@ -4262,84 +4328,54 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)

if (unlikely(IS_IMMUTABLE(inode))) {
ret = -EPERM;
- goto unlock;
+ goto out_unlock;
}

if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
ret = -EPERM;
- goto unlock;
+ goto out_unlock;
}

ret = generic_write_checks(iocb, from);
if (ret > 0) {
- bool preallocated = false;
- size_t target_size = 0;
- int err;
-
- if (fault_in_iov_iter_readable(from, iov_iter_count(from)))
- set_inode_flag(inode, FI_NO_PREALLOC);
-
- if ((iocb->ki_flags & IOCB_NOWAIT)) {
+ if (iocb->ki_flags & IOCB_NOWAIT) {
if (!f2fs_overwrite_io(inode, iocb->ki_pos,
iov_iter_count(from)) ||
f2fs_has_inline_data(inode) ||
f2fs_force_buffered_io(inode, iocb, from)) {
- clear_inode_flag(inode, FI_NO_PREALLOC);
- inode_unlock(inode);
ret = -EAGAIN;
- goto out;
+ goto out_unlock;
}
- goto write;
}
-
- if (is_inode_flag_set(inode, FI_NO_PREALLOC))
- goto write;
-
if (iocb->ki_flags & IOCB_DIRECT) {
- /*
- * Convert inline data for Direct I/O before entering
- * f2fs_direct_IO().
- */
- err = f2fs_convert_inline_inode(inode);
- if (err)
- goto out_err;
- /*
- * If force_buffere_io() is true, we have to allocate
- * blocks all the time, since f2fs_direct_IO will fall
- * back to buffered IO.
- */
- if (!f2fs_force_buffered_io(inode, iocb, from) &&
- f2fs_lfs_mode(F2FS_I_SB(inode)))
- goto write;
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ goto out_unlock;
}
- preallocated = true;
+ /* Possibly preallocate the blocks for the write. */
target_size = iocb->ki_pos + iov_iter_count(from);
-
- err = f2fs_preallocate_blocks(iocb, from);
- if (err) {
-out_err:
- clear_inode_flag(inode, FI_NO_PREALLOC);
- inode_unlock(inode);
- ret = err;
- goto out;
+ preallocated = f2fs_preallocate_blocks(iocb, from);
+ if (preallocated < 0) {
+ ret = preallocated;
+ goto out_unlock;
}
-write:
+
ret = __generic_file_write_iter(iocb, from);
- clear_inode_flag(inode, FI_NO_PREALLOC);

- /* if we couldn't write data, we should deallocate blocks. */
- if (preallocated && i_size_read(inode) < target_size) {
+ /* Don't leave any preallocated blocks around past i_size. */
+ if (preallocated > 0 && i_size_read(inode) < target_size) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
f2fs_truncate(inode);
filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
+ clear_inode_flag(inode, FI_PREALLOCATED_ALL);

if (ret > 0)
f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
}
-unlock:
+out_unlock:
inode_unlock(inode);
out:
trace_f2fs_file_write_iter(inode, iocb->ki_pos,
--
2.34.0.rc1.387.gb447b232ab-goog



2021-11-16 21:45:22

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 2/6] f2fs: do not expose unwritten blocks to user by DIO

DIO preallocates physical blocks before writing data, but if an error occurrs
or power-cut happens, we can see block contents from the disk. This patch tries
to fix it by 1) turning to buffered writes for DIO into holes, 2) truncating
unwritten blocks from error or power-cut.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/data.c | 5 ++++-
fs/f2fs/f2fs.h | 5 +++++
fs/f2fs/file.c | 24 +++++++++++++++++++++++-
3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3b27fb7daa8b..7ac1a39fcad2 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1543,8 +1543,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
flag != F2FS_GET_BLOCK_DIO);
err = __allocate_data_block(&dn,
map->m_seg_type);
- if (!err)
+ if (!err) {
+ if (flag == F2FS_GET_BLOCK_PRE_DIO)
+ file_need_truncate(inode);
set_inode_flag(inode, FI_APPEND_WRITE);
+ }
}
if (err)
goto sync_out;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index be871a79c634..14bea669f87e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -654,6 +654,7 @@ enum {
#define FADVISE_KEEP_SIZE_BIT 0x10
#define FADVISE_HOT_BIT 0x20
#define FADVISE_VERITY_BIT 0x40
+#define FADVISE_TRUNC_BIT 0x80

#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)

@@ -681,6 +682,10 @@ enum {
#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)

+#define file_should_truncate(inode) is_file(inode, FADVISE_TRUNC_BIT)
+#define file_need_truncate(inode) set_file(inode, FADVISE_TRUNC_BIT)
+#define file_dont_truncate(inode) clear_file(inode, FADVISE_TRUNC_BIT)
+
#define DEF_DIR_LEVEL 0

enum {
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 4bf77a5bf998..ec8de0662437 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -960,10 +960,21 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);

+ /*
+ * Truncate stale preallocated blocks used by the previous DIO.
+ */
+ if (file_should_truncate(inode)) {
+ err = f2fs_truncate(inode);
+ if (err)
+ goto out_unlock;
+ file_dont_truncate(inode);
+ }
+
truncate_setsize(inode, attr->ia_size);

if (attr->ia_size <= old_size)
err = f2fs_truncate(inode);
+out_unlock:
/*
* do not trim all blocks after i_size if target size is
* larger than i_size.
@@ -4257,6 +4268,13 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
/* If it will be an out-of-place direct write, don't bother. */
if (dio && f2fs_lfs_mode(sbi))
return 0;
+ /*
+ * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
+ * buffered IO, if DIO meets any holes.
+ */
+ if (dio && i_size_read(inode) &&
+ (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
+ return 0;

/* No-wait I/O can't allocate blocks. */
if (iocb->ki_flags & IOCB_NOWAIT)
@@ -4366,10 +4384,14 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (preallocated > 0 && i_size_read(inode) < target_size) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
- f2fs_truncate(inode);
+ if (!f2fs_truncate(inode))
+ file_dont_truncate(inode);
filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ } else {
+ file_dont_truncate(inode);
}
+
clear_inode_flag(inode, FI_PREALLOCATED_ALL);

if (ret > 0)
--
2.34.0.rc1.387.gb447b232ab-goog


2021-11-16 21:45:26

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 3/6] f2fs: reduce indentation in f2fs_file_write_iter()

From: Eric Biggers <[email protected]>

Replace 'if (ret > 0)' with 'if (ret <= 0) goto out_unlock;'.
No change in behavior.

Signed-off-by: Eric Biggers <[email protected]>
Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/file.c | 69 ++++++++++++++++++++++++++------------------------
1 file changed, 36 insertions(+), 33 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index ec8de0662437..fdc440d212c3 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4355,48 +4355,51 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
}

ret = generic_write_checks(iocb, from);
- if (ret > 0) {
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (!f2fs_overwrite_io(inode, iocb->ki_pos,
- iov_iter_count(from)) ||
+ if (ret <= 0)
+ goto out_unlock;
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!f2fs_overwrite_io(inode, iocb->ki_pos,
+ iov_iter_count(from)) ||
f2fs_has_inline_data(inode) ||
f2fs_force_buffered_io(inode, iocb, from)) {
- ret = -EAGAIN;
- goto out_unlock;
- }
- }
- if (iocb->ki_flags & IOCB_DIRECT) {
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- goto out_unlock;
- }
- /* Possibly preallocate the blocks for the write. */
- target_size = iocb->ki_pos + iov_iter_count(from);
- preallocated = f2fs_preallocate_blocks(iocb, from);
- if (preallocated < 0) {
- ret = preallocated;
+ ret = -EAGAIN;
goto out_unlock;
}
+ }

- ret = __generic_file_write_iter(iocb, from);
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ goto out_unlock;
+ }
+ /* Possibly preallocate the blocks for the write. */
+ target_size = iocb->ki_pos + iov_iter_count(from);
+ preallocated = f2fs_preallocate_blocks(iocb, from);
+ if (preallocated < 0) {
+ ret = preallocated;
+ goto out_unlock;
+ }

- /* Don't leave any preallocated blocks around past i_size. */
- if (preallocated > 0 && i_size_read(inode) < target_size) {
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- filemap_invalidate_lock(inode->i_mapping);
- if (!f2fs_truncate(inode))
- file_dont_truncate(inode);
- filemap_invalidate_unlock(inode->i_mapping);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
- } else {
+ ret = __generic_file_write_iter(iocb, from);
+
+ /* Don't leave any preallocated blocks around past i_size. */
+ if (preallocated > 0 && i_size_read(inode) < target_size) {
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ filemap_invalidate_lock(inode->i_mapping);
+ if (!f2fs_truncate(inode))
file_dont_truncate(inode);
- }
+ filemap_invalidate_unlock(inode->i_mapping);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ } else {
+ file_dont_truncate(inode);
+ }

- clear_inode_flag(inode, FI_PREALLOCATED_ALL);
+ clear_inode_flag(inode, FI_PREALLOCATED_ALL);
+
+ if (ret > 0)
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);

- if (ret > 0)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
- }
out_unlock:
inode_unlock(inode);
out:
--
2.34.0.rc1.387.gb447b232ab-goog


2021-11-16 21:45:28

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 4/6] f2fs: fix the f2fs_file_write_iter tracepoint

From: Eric Biggers <[email protected]>

Pass in the original position and count rather than the position and
count that were updated by the write. Also use the correct types for
all arguments, in particular the file offset which was being truncated
to 32 bits on 32-bit platforms.

Signed-off-by: Eric Biggers <[email protected]>
Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/file.c | 5 +++--
include/trace/events/f2fs.h | 12 ++++++------
2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index fdc440d212c3..709fa893d832 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4321,6 +4321,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
+ const loff_t orig_pos = iocb->ki_pos;
+ const size_t orig_count = iov_iter_count(from);
loff_t target_size;
int preallocated;
ssize_t ret;
@@ -4403,8 +4405,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
out_unlock:
inode_unlock(inode);
out:
- trace_f2fs_file_write_iter(inode, iocb->ki_pos,
- iov_iter_count(from), ret);
+ trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
return ret;
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index f8cb916f3595..dcb94d740e12 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -540,17 +540,17 @@ TRACE_EVENT(f2fs_truncate_partial_nodes,

TRACE_EVENT(f2fs_file_write_iter,

- TP_PROTO(struct inode *inode, unsigned long offset,
- unsigned long length, int ret),
+ TP_PROTO(struct inode *inode, loff_t offset, size_t length,
+ ssize_t ret),

TP_ARGS(inode, offset, length, ret),

TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
- __field(unsigned long, offset)
- __field(unsigned long, length)
- __field(int, ret)
+ __field(loff_t, offset)
+ __field(size_t, length)
+ __field(ssize_t, ret)
),

TP_fast_assign(
@@ -562,7 +562,7 @@ TRACE_EVENT(f2fs_file_write_iter,
),

TP_printk("dev = (%d,%d), ino = %lu, "
- "offset = %lu, length = %lu, written(err) = %d",
+ "offset = %lld, length = %zu, written(err) = %zd",
show_dev_ino(__entry),
__entry->offset,
__entry->length,
--
2.34.0.rc1.387.gb447b232ab-goog


2021-11-16 21:45:29

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 5/6] f2fs: implement iomap operations

From: Eric Biggers <[email protected]>

Implement 'struct iomap_ops' for f2fs, in preparation for making f2fs
use iomap for direct I/O.

Note that this may be used for other things besides direct I/O in the
future; however, for now I've only tested it for direct I/O.

Signed-off-by: Eric Biggers <[email protected]>
Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/Kconfig | 1 +
fs/f2fs/data.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++
fs/f2fs/f2fs.h | 1 +
3 files changed, 60 insertions(+)

diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 7eea3cfd894d..f46a7339d6cf 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -7,6 +7,7 @@ config F2FS_FS
select CRYPTO_CRC32
select F2FS_FS_XATTR if FS_ENCRYPTION
select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
+ select FS_IOMAP
select LZ4_COMPRESS if F2FS_FS_LZ4
select LZ4_DECOMPRESS if F2FS_FS_LZ4
select LZ4HC_COMPRESS if F2FS_FS_LZ4HC
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7ac1a39fcad2..43b3ca7cabe0 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -21,6 +21,7 @@
#include <linux/cleancache.h>
#include <linux/sched/signal.h>
#include <linux/fiemap.h>
+#include <linux/iomap.h>

#include "f2fs.h"
#include "node.h"
@@ -4236,3 +4237,60 @@ void f2fs_destroy_bio_entry_cache(void)
{
kmem_cache_destroy(bio_entry_slab);
}
+
+static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+ unsigned int flags, struct iomap *iomap,
+ struct iomap *srcmap)
+{
+ struct f2fs_map_blocks map = {};
+ pgoff_t next_pgofs = 0;
+ int err;
+
+ map.m_lblk = bytes_to_blks(inode, offset);
+ map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
+ map.m_next_pgofs = &next_pgofs;
+ map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
+ if (flags & IOMAP_WRITE)
+ map.m_may_create = true;
+
+ err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE,
+ F2FS_GET_BLOCK_DIO);
+ if (err)
+ return err;
+
+ iomap->offset = blks_to_bytes(inode, map.m_lblk);
+
+ if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) {
+ iomap->length = blks_to_bytes(inode, map.m_len);
+ if (map.m_flags & F2FS_MAP_MAPPED) {
+ iomap->type = IOMAP_MAPPED;
+ iomap->flags |= IOMAP_F_MERGED;
+ } else {
+ iomap->type = IOMAP_UNWRITTEN;
+ }
+ if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
+ return -EINVAL;
+ iomap->addr = blks_to_bytes(inode, map.m_pblk);
+
+ if (WARN_ON_ONCE(f2fs_is_multi_device(F2FS_I_SB(inode))))
+ return -EINVAL;
+ iomap->bdev = inode->i_sb->s_bdev;
+ } else {
+ iomap->length = blks_to_bytes(inode, next_pgofs) -
+ iomap->offset;
+ iomap->type = IOMAP_HOLE;
+ iomap->addr = IOMAP_NULL_ADDR;
+ }
+
+ if (map.m_flags & F2FS_MAP_NEW)
+ iomap->flags |= IOMAP_F_NEW;
+ if ((inode->i_state & I_DIRTY_DATASYNC) ||
+ offset + length > i_size_read(inode))
+ iomap->flags |= IOMAP_F_DIRTY;
+
+ return 0;
+}
+
+const struct iomap_ops f2fs_iomap_ops = {
+ .iomap_begin = f2fs_iomap_begin,
+};
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 14bea669f87e..0d199e8f2c1d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3654,6 +3654,7 @@ int f2fs_init_post_read_processing(void);
void f2fs_destroy_post_read_processing(void);
int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
+extern const struct iomap_ops f2fs_iomap_ops;

/*
* gc.c
--
2.34.0.rc1.387.gb447b232ab-goog


2021-11-16 21:45:31

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 6/6] f2fs: use iomap for direct I/O

From: Eric Biggers <[email protected]>

Make f2fs_file_read_iter() and f2fs_file_write_iter() use the iomap
direct I/O implementation instead of the fs/direct-io.c one.

The iomap implementation is more efficient, and it also avoids the need
to add new features and optimizations to the old implementation.

This new implementation also eliminates the need for f2fs to hook bio
submission and completion and to allocate memory per-bio. This is
because it's possible to correctly update f2fs's in-flight DIO counters
using __iomap_dio_rw() in combination with an implementation of
iomap_dio_ops::end_io() (as suggested by Christoph Hellwig).

When possible, this new implementation preserves existing f2fs behavior
such as the conditions for falling back to buffered I/O.

This patch has been tested with xfstests by running 'gce-xfstests -c
f2fs -g auto -X generic/017' with and without this patch; no regressions
were seen. (Some tests fail both before and after. generic/017 hangs
both before and after, so it had to be excluded.)

Signed-off-by: Eric Biggers <[email protected]>
Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/data.c | 205 +---------------------------
fs/f2fs/f2fs.h | 8 +-
fs/f2fs/file.c | 343 +++++++++++++++++++++++++++++++++++++++++------
fs/f2fs/iostat.c | 15 +--
4 files changed, 311 insertions(+), 260 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 43b3ca7cabe0..a9124d94a5d1 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1377,11 +1377,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
f2fs_invalidate_compress_page(sbi, old_blkaddr);
}
f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
-
- /*
- * i_size will be updated by direct_IO. Otherwise, we'll get stale
- * data from unwritten block via dio_read.
- */
return 0;
}

@@ -1743,50 +1738,6 @@ static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
return (blks << inode->i_blkbits);
}

-static int __get_data_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh, int create, int flag,
- pgoff_t *next_pgofs, int seg_type, bool may_write)
-{
- struct f2fs_map_blocks map;
- int err;
-
- map.m_lblk = iblock;
- map.m_len = bytes_to_blks(inode, bh->b_size);
- map.m_next_pgofs = next_pgofs;
- map.m_next_extent = NULL;
- map.m_seg_type = seg_type;
- map.m_may_create = may_write;
-
- err = f2fs_map_blocks(inode, &map, create, flag);
- if (!err) {
- map_bh(bh, inode->i_sb, map.m_pblk);
- bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
- bh->b_size = blks_to_bytes(inode, map.m_len);
-
- if (map.m_multidev_dio)
- bh->b_bdev = map.m_bdev;
- }
- return err;
-}
-
-static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- return __get_data_block(inode, iblock, bh_result, create,
- F2FS_GET_BLOCK_DIO, NULL,
- f2fs_rw_hint_to_seg_type(inode->i_write_hint),
- true);
-}
-
-static int get_data_block_dio(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- return __get_data_block(inode, iblock, bh_result, create,
- F2FS_GET_BLOCK_DIO, NULL,
- f2fs_rw_hint_to_seg_type(inode->i_write_hint),
- false);
-}
-
static int f2fs_xattr_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo)
{
@@ -3262,7 +3213,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
FS_CP_DATA_IO : FS_DATA_IO);
}

-static void f2fs_write_failed(struct inode *inode, loff_t to)
+void f2fs_write_failed(struct inode *inode, loff_t to)
{
loff_t i_size = i_size_read(inode);

@@ -3550,158 +3501,6 @@ static int f2fs_write_end(struct file *file,
return copied;
}

-static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
- loff_t offset)
-{
- unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
- unsigned blkbits = i_blkbits;
- unsigned blocksize_mask = (1 << blkbits) - 1;
- unsigned long align = offset | iov_iter_alignment(iter);
- struct block_device *bdev = inode->i_sb->s_bdev;
-
- if (iov_iter_rw(iter) == READ && offset >= i_size_read(inode))
- return 1;
-
- if (align & blocksize_mask) {
- if (bdev)
- blkbits = blksize_bits(bdev_logical_block_size(bdev));
- blocksize_mask = (1 << blkbits) - 1;
- if (align & blocksize_mask)
- return -EINVAL;
- return 1;
- }
- return 0;
-}
-
-static void f2fs_dio_end_io(struct bio *bio)
-{
- struct f2fs_private_dio *dio = bio->bi_private;
-
- dec_page_count(F2FS_I_SB(dio->inode),
- dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
-
- bio->bi_private = dio->orig_private;
- bio->bi_end_io = dio->orig_end_io;
-
- kfree(dio);
-
- bio_endio(bio);
-}
-
-static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
- loff_t file_offset)
-{
- struct f2fs_private_dio *dio;
- bool write = (bio_op(bio) == REQ_OP_WRITE);
-
- dio = f2fs_kzalloc(F2FS_I_SB(inode),
- sizeof(struct f2fs_private_dio), GFP_NOFS);
- if (!dio)
- goto out;
-
- dio->inode = inode;
- dio->orig_end_io = bio->bi_end_io;
- dio->orig_private = bio->bi_private;
- dio->write = write;
-
- bio->bi_end_io = f2fs_dio_end_io;
- bio->bi_private = dio;
-
- inc_page_count(F2FS_I_SB(inode),
- write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
-
- submit_bio(bio);
- return;
-out:
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
-}
-
-static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
- struct address_space *mapping = iocb->ki_filp->f_mapping;
- struct inode *inode = mapping->host;
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct f2fs_inode_info *fi = F2FS_I(inode);
- size_t count = iov_iter_count(iter);
- loff_t offset = iocb->ki_pos;
- int rw = iov_iter_rw(iter);
- int err;
- enum rw_hint hint = iocb->ki_hint;
- int whint_mode = F2FS_OPTION(sbi).whint_mode;
- bool do_opu;
-
- err = check_direct_IO(inode, iter, offset);
- if (err)
- return err < 0 ? err : 0;
-
- if (f2fs_force_buffered_io(inode, iocb, iter))
- return 0;
-
- do_opu = rw == WRITE && f2fs_lfs_mode(sbi);
-
- trace_f2fs_direct_IO_enter(inode, offset, count, rw);
-
- if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
- iocb->ki_hint = WRITE_LIFE_NOT_SET;
-
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
- iocb->ki_hint = hint;
- err = -EAGAIN;
- goto out;
- }
- if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
- up_read(&fi->i_gc_rwsem[rw]);
- iocb->ki_hint = hint;
- err = -EAGAIN;
- goto out;
- }
- } else {
- down_read(&fi->i_gc_rwsem[rw]);
- if (do_opu)
- down_read(&fi->i_gc_rwsem[READ]);
- }
-
- err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
- iter, rw == WRITE ? get_data_block_dio_write :
- get_data_block_dio, NULL, f2fs_dio_submit_bio,
- rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES :
- DIO_SKIP_HOLES);
-
- if (do_opu)
- up_read(&fi->i_gc_rwsem[READ]);
-
- up_read(&fi->i_gc_rwsem[rw]);
-
- if (rw == WRITE) {
- if (whint_mode == WHINT_MODE_OFF)
- iocb->ki_hint = hint;
- if (err > 0) {
- f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
- err);
- if (!do_opu)
- set_inode_flag(inode, FI_UPDATE_WRITE);
- } else if (err == -EIOCBQUEUED) {
- f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
- count - iov_iter_count(iter));
- } else if (err < 0) {
- f2fs_write_failed(inode, offset + count);
- }
- } else {
- if (err > 0)
- f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
- else if (err == -EIOCBQUEUED)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
- count - iov_iter_count(iter));
- }
-
-out:
- trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
-
- return err;
-}
-
void f2fs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length)
{
@@ -4157,7 +3956,7 @@ const struct address_space_operations f2fs_dblock_aops = {
.set_page_dirty = f2fs_set_data_page_dirty,
.invalidatepage = f2fs_invalidate_page,
.releasepage = f2fs_release_page,
- .direct_IO = f2fs_direct_IO,
+ .direct_IO = noop_direct_IO,
.bmap = f2fs_bmap,
.swap_activate = f2fs_swap_activate,
.swap_deactivate = f2fs_swap_deactivate,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 0d199e8f2c1d..26e92799ac6c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1806,13 +1806,6 @@ struct f2fs_sb_info {
#endif
};

-struct f2fs_private_dio {
- struct inode *inode;
- void *orig_private;
- bio_end_io_t *orig_end_io;
- bool write;
-};
-
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define f2fs_show_injection_info(sbi, type) \
printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \
@@ -3641,6 +3634,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
struct writeback_control *wbc,
enum iostat_type io_type,
int compr_blocks, bool allow_balance);
+void f2fs_write_failed(struct inode *inode, loff_t to);
void f2fs_invalidate_page(struct page *page, unsigned int offset,
unsigned int length);
int f2fs_release_page(struct page *page, gfp_t wait);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 709fa893d832..60e5e2f1c1e8 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -24,6 +24,7 @@
#include <linux/sched/signal.h>
#include <linux/fileattr.h>
#include <linux/fadvise.h>
+#include <linux/iomap.h>

#include "f2fs.h"
#include "node.h"
@@ -4229,23 +4230,145 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return __f2fs_ioctl(filp, cmd, arg);
}

-static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+/*
+ * Return %true if the given read or write request should use direct I/O, or
+ * %false if it should use buffered I/O.
+ */
+static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb,
+ struct iov_iter *iter)
+{
+ unsigned int align;
+
+ if (!(iocb->ki_flags & IOCB_DIRECT))
+ return false;
+
+ if (f2fs_force_buffered_io(inode, iocb, iter))
+ return false;
+
+ /*
+ * Direct I/O not aligned to the disk's logical_block_size will be
+ * attempted, but will fail with -EINVAL.
+ *
+ * f2fs additionally requires that direct I/O be aligned to the
+ * filesystem block size, which is often a stricter requirement.
+ * However, f2fs traditionally falls back to buffered I/O on requests
+ * that are logical_block_size-aligned but not fs-block aligned.
+ *
+ * The below logic implements this behavior.
+ */
+ align = iocb->ki_pos | iov_iter_alignment(iter);
+ if (!IS_ALIGNED(align, i_blocksize(inode)) &&
+ IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev)))
+ return false;
+
+ return true;
+}
+
+static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
+ unsigned int flags)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
+
+ dec_page_count(sbi, F2FS_DIO_READ);
+ if (error)
+ return error;
+ f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, size);
+ return 0;
+}
+
+static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = {
+ .end_io = f2fs_dio_read_end_io,
+};
+
+static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- int ret;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ const loff_t pos = iocb->ki_pos;
+ const size_t count = iov_iter_count(to);
+ struct iomap_dio *dio;
+ ssize_t ret;
+
+ if (count == 0)
+ return 0; /* skip atime update */
+
+ trace_f2fs_direct_IO_enter(inode, pos, count, READ);
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!down_read_trylock(&fi->i_gc_rwsem[READ])) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ } else {
+ down_read(&fi->i_gc_rwsem[READ]);
+ }
+
+ /*
+ * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
+ * the higher-level function iomap_dio_rw() in order to ensure that the
+ * F2FS_DIO_READ counter will be decremented correctly in all cases.
+ */
+ inc_page_count(sbi, F2FS_DIO_READ);
+ dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops,
+ &f2fs_iomap_dio_read_ops, 0, 0);
+ if (IS_ERR_OR_NULL(dio)) {
+ ret = PTR_ERR_OR_ZERO(dio);
+ if (ret != -EIOCBQUEUED)
+ dec_page_count(sbi, F2FS_DIO_READ);
+ } else {
+ ret = iomap_dio_complete(dio);
+ }
+
+ up_read(&fi->i_gc_rwsem[READ]);
+
+ file_accessed(file);
+out:
+ trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
+ return ret;
+}
+
+static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ ssize_t ret;

if (!f2fs_is_compress_backend_ready(inode))
return -EOPNOTSUPP;

- ret = generic_file_read_iter(iocb, iter);
+ if (f2fs_should_use_dio(inode, iocb, to))
+ return f2fs_dio_read_iter(iocb, to);

+ ret = filemap_read(iocb, to, 0);
if (ret > 0)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_READ_IO, ret);
-
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret);
return ret;
}

+static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ ssize_t count;
+ int err;
+
+ if (IS_IMMUTABLE(inode))
+ return -EPERM;
+
+ if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
+ return -EPERM;
+
+ count = generic_write_checks(iocb, from);
+ if (count <= 0)
+ return count;
+
+ err = file_modified(file);
+ if (err)
+ return err;
+ return count;
+}
+
/*
* Preallocate blocks for a write request, if it is possible and helpful to do
* so. Returns a positive number if blocks may have been preallocated, 0 if no
@@ -4253,15 +4376,14 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
* seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the
* requested blocks (not just some of them) have been allocated.
*/
-static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
+static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
+ bool dio)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
const loff_t pos = iocb->ki_pos;
const size_t count = iov_iter_count(iter);
struct f2fs_map_blocks map = {};
- bool dio = (iocb->ki_flags & IOCB_DIRECT) &&
- !f2fs_force_buffered_io(inode, iocb, iter);
int flag;
int ret;

@@ -4317,13 +4439,174 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
return map.m_len;
}

-static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
+ struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ ssize_t ret;
+
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ return -EOPNOTSUPP;
+
+ current->backing_dev_info = inode_to_bdi(inode);
+ ret = generic_perform_write(file, from, iocb->ki_pos);
+ current->backing_dev_info = NULL;
+
+ if (ret > 0) {
+ iocb->ki_pos += ret;
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_IO, ret);
+ }
+ return ret;
+}
+
+static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
+ unsigned int flags)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
+
+ dec_page_count(sbi, F2FS_DIO_WRITE);
+ if (error)
+ return error;
+ f2fs_update_iostat(sbi, APP_DIRECT_IO, size);
+ return 0;
+}
+
+static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = {
+ .end_io = f2fs_dio_write_end_io,
+};
+
+static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
+ bool *may_need_sync)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ const bool do_opu = f2fs_lfs_mode(sbi);
+ const int whint_mode = F2FS_OPTION(sbi).whint_mode;
+ const loff_t pos = iocb->ki_pos;
+ const ssize_t count = iov_iter_count(from);
+ const enum rw_hint hint = iocb->ki_hint;
+ unsigned int dio_flags;
+ struct iomap_dio *dio;
+ ssize_t ret;
+
+ trace_f2fs_direct_IO_enter(inode, pos, count, WRITE);
+
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ /* f2fs_convert_inline_inode() and block allocation can block */
+ if (f2fs_has_inline_data(inode) ||
+ !f2fs_overwrite_io(inode, pos, count)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ if (!down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
+ up_read(&fi->i_gc_rwsem[WRITE]);
+ ret = -EAGAIN;
+ goto out;
+ }
+ } else {
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ goto out;
+
+ down_read(&fi->i_gc_rwsem[WRITE]);
+ if (do_opu)
+ down_read(&fi->i_gc_rwsem[READ]);
+ }
+ if (whint_mode == WHINT_MODE_OFF)
+ iocb->ki_hint = WRITE_LIFE_NOT_SET;
+
+ /*
+ * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
+ * the higher-level function iomap_dio_rw() in order to ensure that the
+ * F2FS_DIO_WRITE counter will be decremented correctly in all cases.
+ */
+ inc_page_count(sbi, F2FS_DIO_WRITE);
+ dio_flags = 0;
+ if (pos + count > inode->i_size)
+ dio_flags |= IOMAP_DIO_FORCE_WAIT;
+ dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
+ &f2fs_iomap_dio_write_ops, dio_flags, 0);
+ if (IS_ERR_OR_NULL(dio)) {
+ ret = PTR_ERR_OR_ZERO(dio);
+ if (ret == -ENOTBLK)
+ ret = 0;
+ if (ret != -EIOCBQUEUED)
+ dec_page_count(sbi, F2FS_DIO_WRITE);
+ } else {
+ ret = iomap_dio_complete(dio);
+ }
+
+ if (whint_mode == WHINT_MODE_OFF)
+ iocb->ki_hint = hint;
+ if (do_opu)
+ up_read(&fi->i_gc_rwsem[READ]);
+ up_read(&fi->i_gc_rwsem[WRITE]);
+
+ if (ret < 0)
+ goto out;
+ if (pos + ret > inode->i_size)
+ f2fs_i_size_write(inode, pos + ret);
+ if (!do_opu)
+ set_inode_flag(inode, FI_UPDATE_WRITE);
+
+ if (iov_iter_count(from)) {
+ ssize_t ret2;
+ loff_t bufio_start_pos = iocb->ki_pos;
+
+ /*
+ * The direct write was partial, so we need to fall back to a
+ * buffered write for the remainder.
+ */
+
+ ret2 = f2fs_buffered_write_iter(iocb, from);
+ if (iov_iter_count(from))
+ f2fs_write_failed(inode, iocb->ki_pos);
+ if (ret2 < 0)
+ goto out;
+
+ /*
+ * Ensure that the pagecache pages are written to disk and
+ * invalidated to preserve the expected O_DIRECT semantics.
+ */
+ if (ret2 > 0) {
+ loff_t bufio_end_pos = bufio_start_pos + ret2 - 1;
+
+ ret += ret2;
+
+ ret2 = filemap_write_and_wait_range(file->f_mapping,
+ bufio_start_pos,
+ bufio_end_pos);
+ if (ret2 < 0)
+ goto out;
+ invalidate_mapping_pages(file->f_mapping,
+ bufio_start_pos >> PAGE_SHIFT,
+ bufio_end_pos >> PAGE_SHIFT);
+ }
+ } else {
+ /* iomap_dio_rw() already handled the generic_write_sync(). */
+ *may_need_sync = false;
+ }
+out:
+ trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret);
+ return ret;
+}
+
+static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
const loff_t orig_pos = iocb->ki_pos;
const size_t orig_count = iov_iter_count(from);
loff_t target_size;
+ bool dio;
+ bool may_need_sync = true;
int preallocated;
ssize_t ret;

@@ -4346,44 +4629,26 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
}

- if (unlikely(IS_IMMUTABLE(inode))) {
- ret = -EPERM;
- goto out_unlock;
- }
-
- if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
- ret = -EPERM;
- goto out_unlock;
- }
-
- ret = generic_write_checks(iocb, from);
+ ret = f2fs_write_checks(iocb, from);
if (ret <= 0)
goto out_unlock;

- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (!f2fs_overwrite_io(inode, iocb->ki_pos,
- iov_iter_count(from)) ||
- f2fs_has_inline_data(inode) ||
- f2fs_force_buffered_io(inode, iocb, from)) {
- ret = -EAGAIN;
- goto out_unlock;
- }
- }
+ /* Determine whether we will do a direct write or a buffered write. */
+ dio = f2fs_should_use_dio(inode, iocb, from);

- if (iocb->ki_flags & IOCB_DIRECT) {
- ret = f2fs_convert_inline_inode(inode);
- if (ret)
- goto out_unlock;
- }
/* Possibly preallocate the blocks for the write. */
target_size = iocb->ki_pos + iov_iter_count(from);
- preallocated = f2fs_preallocate_blocks(iocb, from);
+ preallocated = f2fs_preallocate_blocks(iocb, from, dio);
if (preallocated < 0) {
ret = preallocated;
goto out_unlock;
}

- ret = __generic_file_write_iter(iocb, from);
+ /* Do the actual write. */
+ if (dio)
+ ret = f2fs_dio_write_iter(iocb, from, &may_need_sync);
+ else
+ ret = f2fs_buffered_write_iter(iocb, from);

/* Don't leave any preallocated blocks around past i_size. */
if (preallocated > 0 && i_size_read(inode) < target_size) {
@@ -4398,15 +4663,11 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
}

clear_inode_flag(inode, FI_PREALLOCATED_ALL);
-
- if (ret > 0)
- f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
-
out_unlock:
inode_unlock(inode);
out:
trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
- if (ret > 0)
+ if (ret > 0 && may_need_sync)
ret = generic_write_sync(iocb, ret);
return ret;
}
diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
index cdcf54ae0db8..b911ea73c21a 100644
--- a/fs/f2fs/iostat.c
+++ b/fs/f2fs/iostat.c
@@ -166,15 +166,12 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi,
spin_lock(&sbi->iostat_lock);
sbi->rw_iostat[type] += io_bytes;

- if (type == APP_WRITE_IO || type == APP_DIRECT_IO)
- sbi->rw_iostat[APP_BUFFERED_IO] =
- sbi->rw_iostat[APP_WRITE_IO] -
- sbi->rw_iostat[APP_DIRECT_IO];
-
- if (type == APP_READ_IO || type == APP_DIRECT_READ_IO)
- sbi->rw_iostat[APP_BUFFERED_READ_IO] =
- sbi->rw_iostat[APP_READ_IO] -
- sbi->rw_iostat[APP_DIRECT_READ_IO];
+ if (type == APP_BUFFERED_IO || type == APP_DIRECT_IO)
+ sbi->rw_iostat[APP_WRITE_IO] += io_bytes;
+
+ if (type == APP_BUFFERED_READ_IO || type == APP_DIRECT_READ_IO)
+ sbi->rw_iostat[APP_READ_IO] += io_bytes;
+
spin_unlock(&sbi->iostat_lock);

f2fs_record_iostat(sbi);
--
2.34.0.rc1.387.gb447b232ab-goog


2021-11-17 13:32:02

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 1/6] f2fs: rework write preallocations

On 2021/11/17 5:45, Jaegeuk Kim wrote:
> From: Eric Biggers <[email protected]>
>
> f2fs_write_begin() assumes that all blocks were preallocated by
> default unless FI_NO_PREALLOC is explicitly set. This invites data
> corruption, as there are cases in which not all blocks are preallocated.
> Commit 47501f87c61a ("f2fs: preallocate DIO blocks when forcing
> buffered_io") fixed one case, but there are others remaining.
>
> Fix up this logic by replacing this flag with FI_PREALLOCATED_ALL, which
> only gets set if all blocks for the current write were preallocated.
>
> Also clean up f2fs_preallocate_blocks(), move it to file.c, and make it
> handle some of the logic that was previously in write_iter() directly.
>
> Jaegeuk:
> DIO to holes are turning into buffered IO in f2fs_direct_IO, so we should
> not preallocate blocks.
>
> Signed-off-by: Eric Biggers <[email protected]>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/data.c | 55 ++-------------------
> fs/f2fs/f2fs.h | 3 +-
> fs/f2fs/file.c | 130 +++++++++++++++++++++++++++++++------------------
> 3 files changed, 87 insertions(+), 101 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 9f754aaef558..3b27fb7daa8b 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1384,53 +1384,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
> return 0;
> }
>
> -int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
> -{
> - struct inode *inode = file_inode(iocb->ki_filp);
> - struct f2fs_map_blocks map;
> - int flag;
> - int err = 0;
> - bool direct_io = iocb->ki_flags & IOCB_DIRECT;
> -
> - map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
> - map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
> - if (map.m_len > map.m_lblk)
> - map.m_len -= map.m_lblk;
> - else
> - map.m_len = 0;
> -
> - map.m_next_pgofs = NULL;
> - map.m_next_extent = NULL;
> - map.m_seg_type = NO_CHECK_TYPE;
> - map.m_may_create = true;
> -
> - if (direct_io) {
> - map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
> - flag = f2fs_force_buffered_io(inode, iocb, from) ?
> - F2FS_GET_BLOCK_PRE_AIO :
> - F2FS_GET_BLOCK_PRE_DIO;
> - goto map_blocks;
> - }
> - if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
> - err = f2fs_convert_inline_inode(inode);
> - if (err)
> - return err;
> - }
> - if (f2fs_has_inline_data(inode))
> - return err;
> -
> - flag = F2FS_GET_BLOCK_PRE_AIO;
> -
> -map_blocks:
> - err = f2fs_map_blocks(inode, &map, 1, flag);
> - if (map.m_len > 0 && err == -ENOSPC) {
> - if (!direct_io)
> - set_inode_flag(inode, FI_NO_PREALLOC);
> - err = 0;
> - }
> - return err;
> -}
> -
> void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
> {
> if (flag == F2FS_GET_BLOCK_PRE_AIO) {
> @@ -3339,12 +3292,10 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
> int flag;
>
> /*
> - * we already allocated all the blocks, so we don't need to get
> - * the block addresses when there is no need to fill the page.
> + * If a whole page is being written and we already preallocated all the
> + * blocks, then there is no need to get a block address now.
> */
> - if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
> - !is_inode_flag_set(inode, FI_NO_PREALLOC) &&
> - !f2fs_verity_in_progress(inode))
> + if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
> return 0;
>
> /* f2fs_lock_op avoids race between write CP and convert_inline_page */
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index ce9fc9f13000..be871a79c634 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -715,7 +715,7 @@ enum {
> FI_INLINE_DOTS, /* indicate inline dot dentries */
> FI_DO_DEFRAG, /* indicate defragment is running */
> FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
> - FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
> + FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */
> FI_HOT_DATA, /* indicate file is hot */
> FI_EXTRA_ATTR, /* indicate file has extra attribute */
> FI_PROJ_INHERIT, /* indicate file inherits projectid */
> @@ -3614,7 +3614,6 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
> int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
> int f2fs_reserve_new_block(struct dnode_of_data *dn);
> int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
> -int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
> int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
> struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
> int op_flags, bool for_write);
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 92ec2699bc85..4bf77a5bf998 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -4235,10 +4235,76 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> return ret;
> }
>
> +/*
> + * Preallocate blocks for a write request, if it is possible and helpful to do
> + * so. Returns a positive number if blocks may have been preallocated, 0 if no
> + * blocks were preallocated, or a negative errno value if something went
> + * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the
> + * requested blocks (not just some of them) have been allocated.
> + */
> +static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
> +{
> + struct inode *inode = file_inode(iocb->ki_filp);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> + const loff_t pos = iocb->ki_pos;
> + const size_t count = iov_iter_count(iter);
> + struct f2fs_map_blocks map = {};
> + bool dio = (iocb->ki_flags & IOCB_DIRECT) &&
> + !f2fs_force_buffered_io(inode, iocb, iter);
> + int flag;
> + int ret;
> +
> + /* If it will be an out-of-place direct write, don't bother. */
> + if (dio && f2fs_lfs_mode(sbi))
> + return 0;
> +
> + /* No-wait I/O can't allocate blocks. */
> + if (iocb->ki_flags & IOCB_NOWAIT)
> + return 0;
> +
> + /* If it will be a short write, don't bother. */
> + if (fault_in_iov_iter_readable(iter, count))
> + return 0;
> +
> + if (f2fs_has_inline_data(inode)) {
> + /* If the data will fit inline, don't bother. */
> + if (pos + count <= MAX_INLINE_DATA(inode))
> + return 0;
> + ret = f2fs_convert_inline_inode(inode);
> + if (ret)
> + return ret;
> + }
> +

/* do not preallocate block which is partially written */

Otherwise, it looks good to me.

Reviewed-by: Chao Yu <[email protected]>

Thanks,

> + map.m_lblk = F2FS_BLK_ALIGN(pos);
> + map.m_len = F2FS_BYTES_TO_BLK(pos + count);
> + if (map.m_len > map.m_lblk)
> + map.m_len -= map.m_lblk;
> + else
> + map.m_len = 0;
> + map.m_may_create = true;
> + if (dio) {
> + map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
> + flag = F2FS_GET_BLOCK_PRE_DIO;
> + } else {
> + map.m_seg_type = NO_CHECK_TYPE;
> + flag = F2FS_GET_BLOCK_PRE_AIO;
> + }
> +
> + ret = f2fs_map_blocks(inode, &map, 1, flag);
> + /* -ENOSPC is only a fatal error if no blocks could be allocated. */
> + if (ret < 0 && !(ret == -ENOSPC && map.m_len > 0))
> + return ret;
> + if (ret == 0)
> + set_inode_flag(inode, FI_PREALLOCATED_ALL);
> + return map.m_len;
> +}
> +
> static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> {
> struct file *file = iocb->ki_filp;
> struct inode *inode = file_inode(file);
> + loff_t target_size;
> + int preallocated;
> ssize_t ret;
>
> if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
> @@ -4262,84 +4328,54 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
>
> if (unlikely(IS_IMMUTABLE(inode))) {
> ret = -EPERM;
> - goto unlock;
> + goto out_unlock;
> }
>
> if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
> ret = -EPERM;
> - goto unlock;
> + goto out_unlock;
> }
>
> ret = generic_write_checks(iocb, from);
> if (ret > 0) {
> - bool preallocated = false;
> - size_t target_size = 0;
> - int err;
> -
> - if (fault_in_iov_iter_readable(from, iov_iter_count(from)))
> - set_inode_flag(inode, FI_NO_PREALLOC);
> -
> - if ((iocb->ki_flags & IOCB_NOWAIT)) {
> + if (iocb->ki_flags & IOCB_NOWAIT) {
> if (!f2fs_overwrite_io(inode, iocb->ki_pos,
> iov_iter_count(from)) ||
> f2fs_has_inline_data(inode) ||
> f2fs_force_buffered_io(inode, iocb, from)) {
> - clear_inode_flag(inode, FI_NO_PREALLOC);
> - inode_unlock(inode);
> ret = -EAGAIN;
> - goto out;
> + goto out_unlock;
> }
> - goto write;
> }
> -
> - if (is_inode_flag_set(inode, FI_NO_PREALLOC))
> - goto write;
> -
> if (iocb->ki_flags & IOCB_DIRECT) {
> - /*
> - * Convert inline data for Direct I/O before entering
> - * f2fs_direct_IO().
> - */
> - err = f2fs_convert_inline_inode(inode);
> - if (err)
> - goto out_err;
> - /*
> - * If force_buffere_io() is true, we have to allocate
> - * blocks all the time, since f2fs_direct_IO will fall
> - * back to buffered IO.
> - */
> - if (!f2fs_force_buffered_io(inode, iocb, from) &&
> - f2fs_lfs_mode(F2FS_I_SB(inode)))
> - goto write;
> + ret = f2fs_convert_inline_inode(inode);
> + if (ret)
> + goto out_unlock;
> }
> - preallocated = true;
> + /* Possibly preallocate the blocks for the write. */
> target_size = iocb->ki_pos + iov_iter_count(from);
> -
> - err = f2fs_preallocate_blocks(iocb, from);
> - if (err) {
> -out_err:
> - clear_inode_flag(inode, FI_NO_PREALLOC);
> - inode_unlock(inode);
> - ret = err;
> - goto out;
> + preallocated = f2fs_preallocate_blocks(iocb, from);
> + if (preallocated < 0) {
> + ret = preallocated;
> + goto out_unlock;
> }
> -write:
> +
> ret = __generic_file_write_iter(iocb, from);
> - clear_inode_flag(inode, FI_NO_PREALLOC);
>
> - /* if we couldn't write data, we should deallocate blocks. */
> - if (preallocated && i_size_read(inode) < target_size) {
> + /* Don't leave any preallocated blocks around past i_size. */
> + if (preallocated > 0 && i_size_read(inode) < target_size) {
> down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> filemap_invalidate_lock(inode->i_mapping);
> f2fs_truncate(inode);
> filemap_invalidate_unlock(inode->i_mapping);
> up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> }
> + clear_inode_flag(inode, FI_PREALLOCATED_ALL);
>
> if (ret > 0)
> f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
> }
> -unlock:
> +out_unlock:
> inode_unlock(inode);
> out:
> trace_f2fs_file_write_iter(inode, iocb->ki_pos,
>

2021-11-17 15:17:02

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 2/6] f2fs: do not expose unwritten blocks to user by DIO

On 2021/11/17 5:45, Jaegeuk Kim wrote:
> DIO preallocates physical blocks before writing data, but if an error occurrs
> or power-cut happens, we can see block contents from the disk. This patch tries
> to fix it by 1) turning to buffered writes for DIO into holes, 2) truncating
> unwritten blocks from error or power-cut.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/data.c | 5 ++++-
> fs/f2fs/f2fs.h | 5 +++++
> fs/f2fs/file.c | 24 +++++++++++++++++++++++-
> 3 files changed, 32 insertions(+), 2 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 3b27fb7daa8b..7ac1a39fcad2 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1543,8 +1543,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
> flag != F2FS_GET_BLOCK_DIO);
> err = __allocate_data_block(&dn,
> map->m_seg_type);
> - if (!err)
> + if (!err) {
> + if (flag == F2FS_GET_BLOCK_PRE_DIO)
> + file_need_truncate(inode);

We will leave FADVISE_TRUNC_BIT flag in below call path, is this as expected?

- fallocate
- expand_inode_data
- f2fs_map_blocks(F2FS_GET_BLOCK_PRE_DIO)

Thanks,

> set_inode_flag(inode, FI_APPEND_WRITE);
> + }
> }
> if (err)
> goto sync_out;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index be871a79c634..14bea669f87e 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -654,6 +654,7 @@ enum {
> #define FADVISE_KEEP_SIZE_BIT 0x10
> #define FADVISE_HOT_BIT 0x20
> #define FADVISE_VERITY_BIT 0x40
> +#define FADVISE_TRUNC_BIT 0x80
>
> #define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
>
> @@ -681,6 +682,10 @@ enum {
> #define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
> #define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
>
> +#define file_should_truncate(inode) is_file(inode, FADVISE_TRUNC_BIT)
> +#define file_need_truncate(inode) set_file(inode, FADVISE_TRUNC_BIT)
> +#define file_dont_truncate(inode) clear_file(inode, FADVISE_TRUNC_BIT)
> +
> #define DEF_DIR_LEVEL 0
>
> enum {
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 4bf77a5bf998..ec8de0662437 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -960,10 +960,21 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
> down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> filemap_invalidate_lock(inode->i_mapping);
>
> + /*
> + * Truncate stale preallocated blocks used by the previous DIO.
> + */
> + if (file_should_truncate(inode)) {
> + err = f2fs_truncate(inode);
> + if (err)
> + goto out_unlock;
> + file_dont_truncate(inode);
> + }
> +
> truncate_setsize(inode, attr->ia_size);
>
> if (attr->ia_size <= old_size)
> err = f2fs_truncate(inode);
> +out_unlock:
> /*
> * do not trim all blocks after i_size if target size is
> * larger than i_size.
> @@ -4257,6 +4268,13 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
> /* If it will be an out-of-place direct write, don't bother. */
> if (dio && f2fs_lfs_mode(sbi))
> return 0;
> + /*
> + * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
> + * buffered IO, if DIO meets any holes.
> + */
> + if (dio && i_size_read(inode) &&
> + (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
> + return 0;
>
> /* No-wait I/O can't allocate blocks. */
> if (iocb->ki_flags & IOCB_NOWAIT)
> @@ -4366,10 +4384,14 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> if (preallocated > 0 && i_size_read(inode) < target_size) {
> down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> filemap_invalidate_lock(inode->i_mapping);
> - f2fs_truncate(inode);
> + if (!f2fs_truncate(inode))
> + file_dont_truncate(inode);
> filemap_invalidate_unlock(inode->i_mapping);
> up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> + } else {
> + file_dont_truncate(inode);
> }
> +
> clear_inode_flag(inode, FI_PREALLOCATED_ALL);
>
> if (ret > 0)
>

2021-11-17 19:27:56

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 1/6] f2fs: rework write preallocations

On 11/17, Chao Yu wrote:
> On 2021/11/17 5:45, Jaegeuk Kim wrote:
> > From: Eric Biggers <[email protected]>
> >
> > f2fs_write_begin() assumes that all blocks were preallocated by
> > default unless FI_NO_PREALLOC is explicitly set. This invites data
> > corruption, as there are cases in which not all blocks are preallocated.
> > Commit 47501f87c61a ("f2fs: preallocate DIO blocks when forcing
> > buffered_io") fixed one case, but there are others remaining.
> >
> > Fix up this logic by replacing this flag with FI_PREALLOCATED_ALL, which
> > only gets set if all blocks for the current write were preallocated.
> >
> > Also clean up f2fs_preallocate_blocks(), move it to file.c, and make it
> > handle some of the logic that was previously in write_iter() directly.
> >
> > Jaegeuk:
> > DIO to holes are turning into buffered IO in f2fs_direct_IO, so we should
> > not preallocate blocks.
> >
> > Signed-off-by: Eric Biggers <[email protected]>
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > fs/f2fs/data.c | 55 ++-------------------
> > fs/f2fs/f2fs.h | 3 +-
> > fs/f2fs/file.c | 130 +++++++++++++++++++++++++++++++------------------
> > 3 files changed, 87 insertions(+), 101 deletions(-)
> >
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 9f754aaef558..3b27fb7daa8b 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -1384,53 +1384,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
> > return 0;
> > }
> > -int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
> > -{
> > - struct inode *inode = file_inode(iocb->ki_filp);
> > - struct f2fs_map_blocks map;
> > - int flag;
> > - int err = 0;
> > - bool direct_io = iocb->ki_flags & IOCB_DIRECT;
> > -
> > - map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
> > - map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
> > - if (map.m_len > map.m_lblk)
> > - map.m_len -= map.m_lblk;
> > - else
> > - map.m_len = 0;
> > -
> > - map.m_next_pgofs = NULL;
> > - map.m_next_extent = NULL;
> > - map.m_seg_type = NO_CHECK_TYPE;
> > - map.m_may_create = true;
> > -
> > - if (direct_io) {
> > - map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
> > - flag = f2fs_force_buffered_io(inode, iocb, from) ?
> > - F2FS_GET_BLOCK_PRE_AIO :
> > - F2FS_GET_BLOCK_PRE_DIO;
> > - goto map_blocks;
> > - }
> > - if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
> > - err = f2fs_convert_inline_inode(inode);
> > - if (err)
> > - return err;
> > - }
> > - if (f2fs_has_inline_data(inode))
> > - return err;
> > -
> > - flag = F2FS_GET_BLOCK_PRE_AIO;
> > -
> > -map_blocks:
> > - err = f2fs_map_blocks(inode, &map, 1, flag);
> > - if (map.m_len > 0 && err == -ENOSPC) {
> > - if (!direct_io)
> > - set_inode_flag(inode, FI_NO_PREALLOC);
> > - err = 0;
> > - }
> > - return err;
> > -}
> > -
> > void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
> > {
> > if (flag == F2FS_GET_BLOCK_PRE_AIO) {
> > @@ -3339,12 +3292,10 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
> > int flag;
> > /*
> > - * we already allocated all the blocks, so we don't need to get
> > - * the block addresses when there is no need to fill the page.
> > + * If a whole page is being written and we already preallocated all the
> > + * blocks, then there is no need to get a block address now.
> > */
> > - if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
> > - !is_inode_flag_set(inode, FI_NO_PREALLOC) &&
> > - !f2fs_verity_in_progress(inode))
> > + if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
> > return 0;
> > /* f2fs_lock_op avoids race between write CP and convert_inline_page */
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index ce9fc9f13000..be871a79c634 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -715,7 +715,7 @@ enum {
> > FI_INLINE_DOTS, /* indicate inline dot dentries */
> > FI_DO_DEFRAG, /* indicate defragment is running */
> > FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
> > - FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
> > + FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */
> > FI_HOT_DATA, /* indicate file is hot */
> > FI_EXTRA_ATTR, /* indicate file has extra attribute */
> > FI_PROJ_INHERIT, /* indicate file inherits projectid */
> > @@ -3614,7 +3614,6 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
> > int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
> > int f2fs_reserve_new_block(struct dnode_of_data *dn);
> > int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
> > -int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
> > int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
> > struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
> > int op_flags, bool for_write);
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 92ec2699bc85..4bf77a5bf998 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -4235,10 +4235,76 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> > return ret;
> > }
> > +/*
> > + * Preallocate blocks for a write request, if it is possible and helpful to do
> > + * so. Returns a positive number if blocks may have been preallocated, 0 if no
> > + * blocks were preallocated, or a negative errno value if something went
> > + * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the
> > + * requested blocks (not just some of them) have been allocated.
> > + */
> > +static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
> > +{
> > + struct inode *inode = file_inode(iocb->ki_filp);
> > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > + const loff_t pos = iocb->ki_pos;
> > + const size_t count = iov_iter_count(iter);
> > + struct f2fs_map_blocks map = {};
> > + bool dio = (iocb->ki_flags & IOCB_DIRECT) &&
> > + !f2fs_force_buffered_io(inode, iocb, iter);
> > + int flag;
> > + int ret;
> > +
> > + /* If it will be an out-of-place direct write, don't bother. */
> > + if (dio && f2fs_lfs_mode(sbi))
> > + return 0;
> > +
> > + /* No-wait I/O can't allocate blocks. */
> > + if (iocb->ki_flags & IOCB_NOWAIT)
> > + return 0;
> > +
> > + /* If it will be a short write, don't bother. */
> > + if (fault_in_iov_iter_readable(iter, count))
> > + return 0;
> > +
> > + if (f2fs_has_inline_data(inode)) {
> > + /* If the data will fit inline, don't bother. */
> > + if (pos + count <= MAX_INLINE_DATA(inode))
> > + return 0;
> > + ret = f2fs_convert_inline_inode(inode);
> > + if (ret)
> > + return ret;
> > + }
> > +
>
> /* do not preallocate block which is partially written */
>
> Otherwise, it looks good to me.

Applied with this.
/* Do not preallocate blocks that will be written partially in 4KB. */

>
> Reviewed-by: Chao Yu <[email protected]>
>
> Thanks,
>
> > + map.m_lblk = F2FS_BLK_ALIGN(pos);
> > + map.m_len = F2FS_BYTES_TO_BLK(pos + count);
> > + if (map.m_len > map.m_lblk)
> > + map.m_len -= map.m_lblk;
> > + else
> > + map.m_len = 0;
> > + map.m_may_create = true;
> > + if (dio) {
> > + map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
> > + flag = F2FS_GET_BLOCK_PRE_DIO;
> > + } else {
> > + map.m_seg_type = NO_CHECK_TYPE;
> > + flag = F2FS_GET_BLOCK_PRE_AIO;
> > + }
> > +
> > + ret = f2fs_map_blocks(inode, &map, 1, flag);
> > + /* -ENOSPC is only a fatal error if no blocks could be allocated. */
> > + if (ret < 0 && !(ret == -ENOSPC && map.m_len > 0))
> > + return ret;
> > + if (ret == 0)
> > + set_inode_flag(inode, FI_PREALLOCATED_ALL);
> > + return map.m_len;
> > +}
> > +
> > static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > {
> > struct file *file = iocb->ki_filp;
> > struct inode *inode = file_inode(file);
> > + loff_t target_size;
> > + int preallocated;
> > ssize_t ret;
> > if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
> > @@ -4262,84 +4328,54 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > if (unlikely(IS_IMMUTABLE(inode))) {
> > ret = -EPERM;
> > - goto unlock;
> > + goto out_unlock;
> > }
> > if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
> > ret = -EPERM;
> > - goto unlock;
> > + goto out_unlock;
> > }
> > ret = generic_write_checks(iocb, from);
> > if (ret > 0) {
> > - bool preallocated = false;
> > - size_t target_size = 0;
> > - int err;
> > -
> > - if (fault_in_iov_iter_readable(from, iov_iter_count(from)))
> > - set_inode_flag(inode, FI_NO_PREALLOC);
> > -
> > - if ((iocb->ki_flags & IOCB_NOWAIT)) {
> > + if (iocb->ki_flags & IOCB_NOWAIT) {
> > if (!f2fs_overwrite_io(inode, iocb->ki_pos,
> > iov_iter_count(from)) ||
> > f2fs_has_inline_data(inode) ||
> > f2fs_force_buffered_io(inode, iocb, from)) {
> > - clear_inode_flag(inode, FI_NO_PREALLOC);
> > - inode_unlock(inode);
> > ret = -EAGAIN;
> > - goto out;
> > + goto out_unlock;
> > }
> > - goto write;
> > }
> > -
> > - if (is_inode_flag_set(inode, FI_NO_PREALLOC))
> > - goto write;
> > -
> > if (iocb->ki_flags & IOCB_DIRECT) {
> > - /*
> > - * Convert inline data for Direct I/O before entering
> > - * f2fs_direct_IO().
> > - */
> > - err = f2fs_convert_inline_inode(inode);
> > - if (err)
> > - goto out_err;
> > - /*
> > - * If force_buffere_io() is true, we have to allocate
> > - * blocks all the time, since f2fs_direct_IO will fall
> > - * back to buffered IO.
> > - */
> > - if (!f2fs_force_buffered_io(inode, iocb, from) &&
> > - f2fs_lfs_mode(F2FS_I_SB(inode)))
> > - goto write;
> > + ret = f2fs_convert_inline_inode(inode);
> > + if (ret)
> > + goto out_unlock;
> > }
> > - preallocated = true;
> > + /* Possibly preallocate the blocks for the write. */
> > target_size = iocb->ki_pos + iov_iter_count(from);
> > -
> > - err = f2fs_preallocate_blocks(iocb, from);
> > - if (err) {
> > -out_err:
> > - clear_inode_flag(inode, FI_NO_PREALLOC);
> > - inode_unlock(inode);
> > - ret = err;
> > - goto out;
> > + preallocated = f2fs_preallocate_blocks(iocb, from);
> > + if (preallocated < 0) {
> > + ret = preallocated;
> > + goto out_unlock;
> > }
> > -write:
> > +
> > ret = __generic_file_write_iter(iocb, from);
> > - clear_inode_flag(inode, FI_NO_PREALLOC);
> > - /* if we couldn't write data, we should deallocate blocks. */
> > - if (preallocated && i_size_read(inode) < target_size) {
> > + /* Don't leave any preallocated blocks around past i_size. */
> > + if (preallocated > 0 && i_size_read(inode) < target_size) {
> > down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> > filemap_invalidate_lock(inode->i_mapping);
> > f2fs_truncate(inode);
> > filemap_invalidate_unlock(inode->i_mapping);
> > up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> > }
> > + clear_inode_flag(inode, FI_PREALLOCATED_ALL);
> > if (ret > 0)
> > f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
> > }
> > -unlock:
> > +out_unlock:
> > inode_unlock(inode);
> > out:
> > trace_f2fs_file_write_iter(inode, iocb->ki_pos,
> >

2021-11-17 19:32:56

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 2/6] f2fs: do not expose unwritten blocks to user by DIO

On 11/17, Chao Yu wrote:
> On 2021/11/17 5:45, Jaegeuk Kim wrote:
> > DIO preallocates physical blocks before writing data, but if an error occurrs
> > or power-cut happens, we can see block contents from the disk. This patch tries
> > to fix it by 1) turning to buffered writes for DIO into holes, 2) truncating
> > unwritten blocks from error or power-cut.
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > fs/f2fs/data.c | 5 ++++-
> > fs/f2fs/f2fs.h | 5 +++++
> > fs/f2fs/file.c | 24 +++++++++++++++++++++++-
> > 3 files changed, 32 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 3b27fb7daa8b..7ac1a39fcad2 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -1543,8 +1543,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
> > flag != F2FS_GET_BLOCK_DIO);
> > err = __allocate_data_block(&dn,
> > map->m_seg_type);
> > - if (!err)
> > + if (!err) {
> > + if (flag == F2FS_GET_BLOCK_PRE_DIO)
> > + file_need_truncate(inode);
>
> We will leave FADVISE_TRUNC_BIT flag in below call path, is this as expected?
>
> - fallocate
> - expand_inode_data
> - f2fs_map_blocks(F2FS_GET_BLOCK_PRE_DIO)

I missed it. :)

--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1698,6 +1698,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,

map.m_seg_type = CURSEG_COLD_DATA_PINNED;
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+ file_dont_truncate(inode);

up_write(&sbi->pin_sem);

>
> Thanks,
>
> > set_inode_flag(inode, FI_APPEND_WRITE);
> > + }
> > }
> > if (err)
> > goto sync_out;
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index be871a79c634..14bea669f87e 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -654,6 +654,7 @@ enum {
> > #define FADVISE_KEEP_SIZE_BIT 0x10
> > #define FADVISE_HOT_BIT 0x20
> > #define FADVISE_VERITY_BIT 0x40
> > +#define FADVISE_TRUNC_BIT 0x80
> > #define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
> > @@ -681,6 +682,10 @@ enum {
> > #define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
> > #define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
> > +#define file_should_truncate(inode) is_file(inode, FADVISE_TRUNC_BIT)
> > +#define file_need_truncate(inode) set_file(inode, FADVISE_TRUNC_BIT)
> > +#define file_dont_truncate(inode) clear_file(inode, FADVISE_TRUNC_BIT)
> > +
> > #define DEF_DIR_LEVEL 0
> > enum {
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 4bf77a5bf998..ec8de0662437 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -960,10 +960,21 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
> > down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> > filemap_invalidate_lock(inode->i_mapping);
> > + /*
> > + * Truncate stale preallocated blocks used by the previous DIO.
> > + */
> > + if (file_should_truncate(inode)) {
> > + err = f2fs_truncate(inode);
> > + if (err)
> > + goto out_unlock;
> > + file_dont_truncate(inode);
> > + }
> > +
> > truncate_setsize(inode, attr->ia_size);
> > if (attr->ia_size <= old_size)
> > err = f2fs_truncate(inode);
> > +out_unlock:
> > /*
> > * do not trim all blocks after i_size if target size is
> > * larger than i_size.
> > @@ -4257,6 +4268,13 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
> > /* If it will be an out-of-place direct write, don't bother. */
> > if (dio && f2fs_lfs_mode(sbi))
> > return 0;
> > + /*
> > + * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
> > + * buffered IO, if DIO meets any holes.
> > + */
> > + if (dio && i_size_read(inode) &&
> > + (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
> > + return 0;
> > /* No-wait I/O can't allocate blocks. */
> > if (iocb->ki_flags & IOCB_NOWAIT)
> > @@ -4366,10 +4384,14 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > if (preallocated > 0 && i_size_read(inode) < target_size) {
> > down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> > filemap_invalidate_lock(inode->i_mapping);
> > - f2fs_truncate(inode);
> > + if (!f2fs_truncate(inode))
> > + file_dont_truncate(inode);
> > filemap_invalidate_unlock(inode->i_mapping);
> > up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> > + } else {
> > + file_dont_truncate(inode);
> > }
> > +
> > clear_inode_flag(inode, FI_PREALLOCATED_ALL);
> > if (ret > 0)
> >

2021-11-18 06:47:53

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 1/6] f2fs: rework write preallocations

On 2021/11/18 3:27, Jaegeuk Kim wrote:
>> /* do not preallocate block which is partially written */
>>
>> Otherwise, it looks good to me.
>
> Applied with this.
> /* Do not preallocate blocks that will be written partially in 4KB. */

Better. :)

Thanks,

2021-11-23 06:59:34

by kernel test robot

[permalink] [raw]
Subject: [f2fs] e029ce2460: aim7.jobs-per-min -35.8% regression



Greeting,

FYI, we noticed a -35.8% regression of aim7.jobs-per-min due to commit:


commit: e029ce24606afa6d6f2955f335eacd9ece4b1aa5 ("[PATCH 2/6] f2fs: do not expose unwritten blocks to user by DIO")
url: https://github.com/0day-ci/linux/commits/Jaegeuk-Kim/f2fs-rework-write-preallocations/20211117-054644
base: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git 8ab774587903771821b59471cc723bba6d893942
patch link: https://lore.kernel.org/lkml/[email protected]

in testcase: aim7
on test machine: 96 threads 2 sockets Ice Lake with 256G memory
with following parameters:

disk: 1BRD_48G
fs: f2fs
test: disk_cp
load: 3000
cpufreq_governor: performance
ucode: 0xb000280

test-description: AIM7 is a traditional UNIX system level benchmark suite which is used to test and measure the performance of multiuser system.
test-url: https://sourceforge.net/projects/aimbench/files/aim-suite7/



If you fix the issue, kindly add following tag
Reported-by: kernel test robot <[email protected]>


Details are as below:
-------------------------------------------------------------------------------------------------->


To reproduce:

git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
sudo bin/lkp install job.yaml # job file is attached in this email
bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run
sudo bin/lkp run generated-yaml-file

# if come across any failure that blocks the test,
# please remove ~/.lkp and /lkp dir to run from a clean state.

=========================================================================================
compiler/cpufreq_governor/disk/fs/kconfig/load/rootfs/tbox_group/test/testcase/ucode:
gcc-9/performance/1BRD_48G/f2fs/x86_64-rhel-8.3/3000/debian-10.4-x86_64-20200603.cgz/lkp-icl-2sp1/disk_cp/aim7/0xb000280

commit:
b68dc3fea3 ("f2fs: rework write preallocations")
e029ce2460 ("f2fs: do not expose unwritten blocks to user by DIO")

b68dc3fea39693dd e029ce24606afa6d6f2955f335e
---------------- ---------------------------
%stddev %change %stddev
\ | \
87100 -35.8% 55923 aim7.jobs-per-min
206.86 +55.7% 322.12 aim7.time.elapsed_time
206.86 +55.7% 322.12 aim7.time.elapsed_time.max
1516775 +167.3% 4054678 ? 8% aim7.time.involuntary_context_switches
236791 +13.6% 268918 aim7.time.minor_page_faults
18616 +58.3% 29460 aim7.time.system_time
277681 ? 3% +821.5% 2558836 ?170% aim7.time.voluntary_context_switches
2422605 +45.2% 3517069 ? 53% cpuidle..usage
257.78 +44.6% 372.82 uptime.boot
6.49 ? 2% -30.2% 4.53 ? 12% iostat.cpu.idle
93.26 +2.2% 95.29 iostat.cpu.system
5.62 ? 2% -1.7 3.96 ? 13% mpstat.cpu.all.idle%
0.81 ? 4% -0.1 0.75 ? 3% mpstat.cpu.all.irq%
0.25 -0.1 0.17 ? 14% mpstat.cpu.all.usr%
5843 -34.1% 3853 vmstat.io.bo
208.17 ? 2% +64.0% 341.33 ? 3% vmstat.procs.r
10397 +139.4% 24897 ? 76% vmstat.system.cs
12826 ? 23% +120.5% 28277 ? 24% turbostat.C1
1587177 ? 31% +101.7% 3200802 ? 62% turbostat.C1E
3.33 ? 60% -2.6 0.75 ?145% turbostat.C6%
43052753 +53.5% 66076019 ? 2% turbostat.IRQ
50351 ? 5% +108.1% 104795 ? 40% numa-meminfo.node0.Dirty
362998 ? 3% +14.3% 414897 ? 7% numa-meminfo.node0.Inactive
50846 ? 8% +106.7% 105114 ? 42% numa-meminfo.node0.Inactive(file)
50880 ? 2% +107.7% 105657 ? 42% numa-meminfo.node1.Dirty
157330 ? 6% +62.0% 254842 ? 71% numa-meminfo.node1.Inactive
50014 ? 3% +108.8% 104440 ? 43% numa-meminfo.node1.Inactive(file)
12385 ? 4% +112.2% 26288 ? 41% numa-vmstat.node0.nr_dirty
12513 ? 7% +110.7% 26363 ? 43% numa-vmstat.node0.nr_inactive_file
12512 ? 7% +110.7% 26363 ? 43% numa-vmstat.node0.nr_zone_inactive_file
12388 ? 4% +112.2% 26290 ? 41% numa-vmstat.node0.nr_zone_write_pending
12521 +112.9% 26655 ? 42% numa-vmstat.node1.nr_dirty
12286 ? 2% +114.5% 26357 ? 43% numa-vmstat.node1.nr_inactive_file
12286 ? 2% +114.5% 26358 ? 43% numa-vmstat.node1.nr_zone_inactive_file
12522 +112.9% 26656 ? 42% numa-vmstat.node1.nr_zone_write_pending
424488 +89.0% 802172 ? 67% meminfo.Active
419205 +89.8% 795834 ? 68% meminfo.Active(anon)
5282 ? 5% +20.0% 6337 ? 14% meminfo.Active(file)
158198 +11.0% 175605 meminfo.AnonHugePages
102671 ? 3% +106.8% 212340 ? 40% meminfo.Dirty
521120 +28.9% 671691 ? 31% meminfo.Inactive
102220 ? 4% +106.9% 211456 ? 42% meminfo.Inactive(file)
255.83 ?223% +266.5% 937.67 ? 94% meminfo.Mlocked
468411 +89.2% 886050 ? 74% meminfo.Shmem
104722 +90.1% 199047 ? 68% proc-vmstat.nr_active_anon
1318 ? 5% +20.0% 1582 ? 13% proc-vmstat.nr_active_file
25488 ? 3% +107.2% 52815 ? 40% proc-vmstat.nr_dirty
785899 +16.8% 917769 ? 20% proc-vmstat.nr_file_pages
25386 ? 4% +107.1% 52576 ? 42% proc-vmstat.nr_inactive_file
64.33 ?223% +264.2% 234.33 ? 94% proc-vmstat.nr_mlock
117180 +89.0% 221498 ? 74% proc-vmstat.nr_shmem
306281 +2.2% 312981 proc-vmstat.nr_written
104722 +90.1% 199047 ? 68% proc-vmstat.nr_zone_active_anon
1318 ? 5% +20.0% 1582 ? 13% proc-vmstat.nr_zone_active_file
25386 ? 4% +107.1% 52576 ? 42% proc-vmstat.nr_zone_inactive_file
25490 ? 3% +107.2% 52817 ? 40% proc-vmstat.nr_zone_write_pending
85819 ? 7% +71.6% 147301 ? 50% proc-vmstat.numa_hint_faults
56269 ? 8% +59.2% 89607 ? 40% proc-vmstat.numa_hint_faults_local
210173 ? 4% +34.5% 282647 ? 29% proc-vmstat.numa_pte_updates
183300 +50.9% 276540 ? 65% proc-vmstat.pgactivate
1055131 +31.8% 1390380 ? 7% proc-vmstat.pgfault
55211 +46.4% 80822 proc-vmstat.pgreuse
1674 +9.1% 1826 proc-vmstat.unevictable_pgs_culled
13.13 ? 15% -19.3% 10.60 ? 8% perf-stat.i.MPKI
3.601e+09 -16.9% 2.991e+09 perf-stat.i.branch-instructions
0.98 ? 18% -0.3 0.63 ? 14% perf-stat.i.branch-miss-rate%
25420567 ? 3% -31.2% 17484559 ? 4% perf-stat.i.branch-misses
39.14 +1.8 40.92 perf-stat.i.cache-miss-rate%
70685250 -19.0% 57224802 ? 4% perf-stat.i.cache-misses
1.76e+08 -21.1% 1.389e+08 ? 7% perf-stat.i.cache-references
10406 +141.1% 25086 ? 76% perf-stat.i.context-switches
17.03 +26.9% 21.61 perf-stat.i.cpi
2.888e+11 +1.7% 2.938e+11 perf-stat.i.cpu-cycles
971.96 +251.9% 3420 ?149% perf-stat.i.cpu-migrations
4351 ? 3% +20.3% 5234 ? 3% perf-stat.i.cycles-between-cache-misses
4.523e+09 -19.9% 3.623e+09 ? 2% perf-stat.i.dTLB-loads
1.683e+09 -30.1% 1.176e+09 ? 4% perf-stat.i.dTLB-stores
1.645e+10 -18.7% 1.337e+10 ? 2% perf-stat.i.instructions
0.07 ? 5% -19.4% 0.06 ? 4% perf-stat.i.ipc
3.01 +1.7% 3.06 perf-stat.i.metric.GHz
407.78 ? 2% -21.6% 319.59 ? 5% perf-stat.i.metric.K/sec
103.94 -20.6% 82.57 ? 2% perf-stat.i.metric.M/sec
4471 -13.2% 3881 ? 7% perf-stat.i.minor-faults
30.85 +1.4 32.25 perf-stat.i.node-store-miss-rate%
8782535 -17.8% 7220567 ? 4% perf-stat.i.node-store-misses
19917112 -23.8% 15183368 ? 3% perf-stat.i.node-stores
4483 -13.2% 3891 ? 7% perf-stat.i.page-faults
0.71 ? 2% -0.1 0.58 ? 3% perf-stat.overall.branch-miss-rate%
17.56 +25.2% 22.00 ? 2% perf-stat.overall.cpi
4086 +25.9% 5144 ? 4% perf-stat.overall.cycles-between-cache-misses
0.01 ? 10% +0.0 0.01 ? 13% perf-stat.overall.dTLB-store-miss-rate%
0.06 -20.1% 0.05 ? 2% perf-stat.overall.ipc
30.60 +1.6 32.22 perf-stat.overall.node-store-miss-rate%
3.586e+09 -16.8% 2.983e+09 perf-stat.ps.branch-instructions
25294275 ? 3% -31.1% 17429845 ? 4% perf-stat.ps.branch-misses
70387551 -18.9% 57063161 ? 4% perf-stat.ps.cache-misses
1.753e+08 -21.0% 1.385e+08 ? 7% perf-stat.ps.cache-references
10353 +141.2% 24967 ? 76% perf-stat.ps.context-switches
2.876e+11 +1.9% 2.93e+11 perf-stat.ps.cpu-cycles
966.20 +251.5% 3396 ?148% perf-stat.ps.cpu-migrations
4.504e+09 -19.8% 3.613e+09 ? 2% perf-stat.ps.dTLB-loads
1.676e+09 -30.0% 1.172e+09 ? 4% perf-stat.ps.dTLB-stores
1.638e+10 -18.6% 1.333e+10 ? 2% perf-stat.ps.instructions
4431 -12.9% 3861 ? 7% perf-stat.ps.minor-faults
8744668 -17.7% 7199843 ? 4% perf-stat.ps.node-store-misses
19833140 -23.7% 15141433 ? 3% perf-stat.ps.node-stores
3.399e+12 +26.6% 4.303e+12 ? 2% perf-stat.total.instructions
0.46 ? 30% -59.4% 0.19 ? 60% perf-sched.sch_delay.avg.ms.do_task_dead.do_exit.do_group_exit.__x64_sys_exit_group.do_syscall_64
0.00 ?123% +5366.7% 0.19 ? 92% perf-sched.sch_delay.avg.ms.exit_to_user_mode_prepare.irqentry_exit_to_user_mode.asm_sysvec_call_function_single.[unknown]
0.07 ? 50% +707.6% 0.55 ? 51% perf-sched.sch_delay.avg.ms.preempt_schedule_common.__cond_resched.__filemap_get_folio.pagecache_get_page.__get_node_page
0.00 ?182% +65300.0% 0.65 ? 93% perf-sched.sch_delay.avg.ms.preempt_schedule_common.__cond_resched.__filemap_get_folio.pagecache_get_page.f2fs_get_read_data_page
0.00 ?152% +40900.0% 0.20 ?195% perf-sched.sch_delay.avg.ms.preempt_schedule_common.__cond_resched.down_read.__submit_merged_write_cond.f2fs_wait_on_page_writeback
0.02 ?197% +2522.5% 0.45 ? 58% perf-sched.sch_delay.avg.ms.preempt_schedule_common.__cond_resched.down_read.f2fs_get_node_info.f2fs_convert_inline_page
0.01 ?195% +3829.7% 0.48 ? 58% perf-sched.sch_delay.avg.ms.preempt_schedule_common.__cond_resched.down_write.f2fs_drop_nlink.f2fs_unlink
0.02 ?221% +1954.7% 0.44 ? 67% perf-sched.sch_delay.avg.ms.preempt_schedule_common.__cond_resched.dput.do_unlinkat.__x64_sys_unlink
0.09 ? 15% +829.5% 0.85 ? 42% perf-sched.sch_delay.avg.ms.preempt_schedule_common.__cond_resched.generic_perform_write.__generic_file_write_iter.f2fs_file_write_iter
8.46 ? 16% -77.2% 1.93 ? 79% perf-sched.sch_delay.avg.ms.preempt_schedule_common.__cond_resched.kmem_cache_alloc.__alloc_nat_entry.constprop
0.00 ?124% +37786.4% 1.39 ?152% perf-sched.sch_delay.avg.ms.preempt_schedule_common.__cond_resched.rmap_walk_anon.remove_migration_ptes.migrate_pages
11.73 ? 8% -63.0% 4.34 ? 46% perf-sched.sch_delay.avg.ms.preempt_schedule_common.__cond_resched.stop_one_cpu.sched_exec.bprm_execve
0.00 ?223% +5.4e+05% 0.91 ? 54% perf-sched.sch_delay.avg.ms.preempt_schedule_common.__cond_resched.truncate_inode_pages_range.f2fs_evict_inode.evict
0.21 ?223% +428.0% 1.12 ? 41% perf-sched.sch_delay.avg.ms.rwsem_down_read_slowpath.f2fs_do_truncate_blocks.f2fs_truncate.f2fs_evict_inode
0.00 ? 86% +8753.8% 0.38 ? 34% perf-sched.sch_delay.avg.ms.rwsem_down_read_slowpath.f2fs_get_node_info.truncate_node.f2fs_remove_inode_page
0.00 ?141% +920.0% 0.02 ? 10% perf-sched.sch_delay.avg.ms.rwsem_down_write_slowpath.block_operations.f2fs_write_checkpoint.__write_checkpoint_sync
2.11 ? 9% -81.2% 0.40 ?125% perf-sched.sch_delay.avg.ms.rwsem_down_write_slowpath.set_node_addr.f2fs_new_node_page.f2fs_new_inode_page
1.17 ? 7% -85.1% 0.17 ? 53% perf-sched.sch_delay.avg.ms.rwsem_down_write_slowpath.set_node_addr.truncate_node.f2fs_remove_inode_page
0.04 ? 6% -25.4% 0.03 ? 26% perf-sched.sch_delay.avg.ms.schedule_timeout.rcu_gp_fqs_loop.rcu_gp_kthread.kthread
0.02 ? 7% -34.5% 0.01 ? 25% perf-sched.sch_delay.avg.ms.smpboot_thread_fn.kthread.ret_from_fork
3.60 ? 12% -63.9% 1.30 ? 57% perf-sched.sch_delay.avg.ms.wait_for_partner.fifo_open.do_dentry_open.do_open.isra
0.03 ?119% +13967.6% 4.41 ?108% perf-sched.sch_delay.max.ms.exit_to_user_mode_prepare.irqentry_exit_to_user_mode.asm_sysvec_call_function_single.[unknown]
0.01 ?187% +60658.5% 4.15 ?104% perf-sched.sch_delay.max.ms.preempt_schedule_common.__cond_resched.__filemap_get_folio.pagecache_get_page.f2fs_get_read_data_page
0.01 ?142% +19116.4% 2.15 ?185% perf-sched.sch_delay.max.ms.preempt_schedule_common.__cond_resched.down_read.__submit_merged_write_cond.f2fs_wait_on_page_writeback
0.40 ?202% +946.4% 4.16 ? 57% perf-sched.sch_delay.max.ms.preempt_schedule_common.__cond_resched.down_write.f2fs_drop_nlink.f2fs_unlink
1.49 ?219% +411.7% 7.64 ? 60% perf-sched.sch_delay.max.ms.preempt_schedule_common.__cond_resched.dput.do_unlinkat.__x64_sys_unlink
22.55 ? 16% -66.6% 7.54 ? 96% perf-sched.sch_delay.max.ms.preempt_schedule_common.__cond_resched.kmem_cache_alloc.__alloc_nat_entry.constprop
47.78 ? 18% -65.9% 16.28 ? 53% perf-sched.sch_delay.max.ms.preempt_schedule_common.__cond_resched.stop_one_cpu.sched_exec.bprm_execve
0.02 ?201% +68657.9% 10.89 ? 49% perf-sched.sch_delay.max.ms.preempt_schedule_common.__cond_resched.truncate_inode_pages_range.f2fs_evict_inode.evict
2.14 ?223% +968.9% 22.90 ? 41% perf-sched.sch_delay.max.ms.rwsem_down_read_slowpath.f2fs_do_truncate_blocks.f2fs_truncate.f2fs_evict_inode
2.10 ?223% +887.7% 20.74 ? 51% perf-sched.sch_delay.max.ms.rwsem_down_read_slowpath.f2fs_evict_inode.evict.__dentry_kill
5.02 ?144% +19328.9% 976.11 ?215% perf-sched.sch_delay.max.ms.rwsem_down_read_slowpath.f2fs_write_begin.generic_perform_write.__generic_file_write_iter
0.02 ?155% +12554.5% 2.13 ? 78% perf-sched.sch_delay.max.ms.rwsem_down_write_slowpath.block_operations.f2fs_write_checkpoint.__write_checkpoint_sync
20.30 ? 11% -65.0% 7.11 ? 53% perf-sched.sch_delay.max.ms.rwsem_down_write_slowpath.set_node_addr.truncate_node.f2fs_remove_inode_page
21.40 ? 14% -38.3% 13.19 ? 48% perf-sched.sch_delay.max.ms.wait_for_partner.fifo_open.do_dentry_open.do_open.isra
175.61 ? 3% -49.4% 88.94 ? 38% perf-sched.total_wait_and_delay.average.ms
100978 ? 5% +416.1% 521131 ?161% perf-sched.total_wait_and_delay.count.ms
175.45 ? 3% -49.8% 88.12 ? 38% perf-sched.total_wait_time.average.ms
177.38 ? 5% -60.7% 69.71 ? 39% perf-sched.wait_and_delay.avg.ms.preempt_schedule_common.__cond_resched.generic_perform_write.__generic_file_write_iter.f2fs_file_write_iter
1041 ? 29% -86.1% 144.59 ?115% perf-sched.wait_and_delay.avg.ms.preempt_schedule_common.__cond_resched.stop_one_cpu.migrate_task_to.task_numa_migrate
363.34 ? 29% -72.4% 100.46 ?100% perf-sched.wait_and_delay.avg.ms.preempt_schedule_common.__cond_resched.wait_for_completion.stop_two_cpus.migrate_swap
17.79 ? 33% -100.0% 0.00 perf-sched.wait_and_delay.avg.ms.rwsem_down_read_slowpath.f2fs_get_node_info.truncate_node.f2fs_remove_inode_page
8.42 ?180% +1003.1% 92.89 ? 38% perf-sched.wait_and_delay.avg.ms.rwsem_down_read_slowpath.f2fs_write_begin.generic_perform_write.__generic_file_write_iter
814.45 ? 14% -60.0% 326.13 ? 87% perf-sched.wait_and_delay.avg.ms.rwsem_down_write_slowpath.set_node_addr.f2fs_new_node_page.f2fs_new_inode_page
19.03 ? 3% -100.0% 0.00 perf-sched.wait_and_delay.avg.ms.rwsem_down_write_slowpath.set_node_addr.truncate_node.f2fs_remove_inode_page
2521 ? 6% +628.9% 18377 ? 56% perf-sched.wait_and_delay.count.exit_to_user_mode_prepare.syscall_exit_to_user_mode.do_syscall_64.entry_SYSCALL_64_after_hwframe
73522 ? 8% -66.0% 24985 ? 65% perf-sched.wait_and_delay.count.preempt_schedule_common.__cond_resched.generic_perform_write.__generic_file_write_iter.f2fs_file_write_iter
245.67 ? 9% -61.4% 94.83 ? 86% perf-sched.wait_and_delay.count.preempt_schedule_common.__cond_resched.wait_for_completion.stop_two_cpus.migrate_swap
157.17 ? 15% -87.1% 20.33 ? 55% perf-sched.wait_and_delay.count.rwsem_down_read_slowpath.f2fs_get_node_info.f2fs_convert_inline_page.f2fs_convert_inline_inode
2833 ? 12% -100.0% 0.00 perf-sched.wait_and_delay.count.rwsem_down_read_slowpath.f2fs_get_node_info.truncate_node.f2fs_remove_inode_page
206.83 ?143% +2.1e+05% 427837 ?192% perf-sched.wait_and_delay.count.rwsem_down_read_slowpath.f2fs_write_begin.generic_perform_write.__generic_file_write_iter
4989 ? 3% -30.5% 3466 ? 44% perf-sched.wait_and_delay.count.rwsem_down_write_slowpath.path_openat.do_filp_open.do_sys_openat2
114.67 ? 13% -87.2% 14.67 ? 75% perf-sched.wait_and_delay.count.rwsem_down_write_slowpath.set_node_addr.f2fs_new_node_page.f2fs_new_inode_page
2693 ? 12% -100.0% 0.00 perf-sched.wait_and_delay.count.rwsem_down_write_slowpath.set_node_addr.truncate_node.f2fs_remove_inode_page
2453 -25.1% 1837 ? 6% perf-sched.wait_and_delay.count.smpboot_thread_fn.kthread.ret_from_fork
3247 ? 40% -55.0% 1459 ? 46% perf-sched.wait_and_delay.max.ms.do_task_dead.do_exit.do_group_exit.__x64_sys_exit_group.do_syscall_64
3476 ? 69% -100.0% 0.00 perf-sched.wait_and_delay.max.ms.rwsem_down_read_slowpath.f2fs_get_node_info.truncate_node.f2fs_remove_inode_page
848.44 ?220% +473.4% 4864 ? 44% perf-sched.wait_and_delay.max.ms.rwsem_down_read_slowpath.f2fs_write_begin.generic_perform_write.__generic_file_write_iter
96.54 ? 11% -100.0% 0.00 perf-sched.wait_and_delay.max.ms.rwsem_down_write_slowpath.set_node_addr.truncate_node.f2fs_remove_inode_page
0.85 ?185% +1434.2% 13.09 ? 17% perf-sched.wait_time.avg.ms.io_schedule.__filemap_get_folio.pagecache_get_page.__get_node_page
256.79 ? 39% -82.9% 43.98 ?200% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.__alloc_pages.folio_alloc.__filemap_get_folio
122.03 ? 37% -70.7% 35.70 ?108% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.__filemap_get_folio.pagecache_get_page.__get_node_page
16.14 ? 10% -75.2% 4.00 ? 69% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.__invalidate_mapping_pages.truncate_node.f2fs_remove_inode_page
186.06 ? 23% -80.3% 36.57 ?129% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.copy_page_to_iter.filemap_read.f2fs_file_read_iter
16.03 ? 9% -72.1% 4.47 ? 80% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.down_read.f2fs_evict_inode.evict
16.03 ? 26% -85.2% 2.38 ?118% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.down_read.f2fs_getxattr.__f2fs_get_acl
12.79 ? 63% -98.3% 0.21 ?203% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.down_read.f2fs_new_inode.f2fs_create
175.40 ? 27% -97.8% 3.92 ? 79% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.down_write.f2fs_file_write_iter.new_sync_write
3.50 ? 20% -70.5% 1.03 ? 30% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.dput.__fput.task_work_run
181.38 ? 51% -97.9% 3.85 ? 91% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.filemap_read.f2fs_file_read_iter.new_sync_read
177.29 ? 5% -61.2% 68.86 ? 39% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.generic_perform_write.__generic_file_write_iter.f2fs_file_write_iter
16.56 ? 12% -75.5% 4.05 ? 68% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.kmem_cache_alloc.__alloc_nat_entry.constprop
12.69 ? 10% -100.0% 0.00 perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.mutex_lock.f2fs_build_free_nids.f2fs_balance_fs_bg
14.75 ? 16% -77.8% 3.27 ? 88% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.rmap_walk_anon.remove_migration_ptes.migrate_pages
1040 ? 29% -84.7% 159.61 ? 96% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.stop_one_cpu.migrate_task_to.task_numa_migrate
0.02 ?223% +3459.6% 0.56 ? 94% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.stop_one_cpu.sched_exec.bprm_execve
15.96 ? 6% -75.6% 3.90 ? 93% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.truncate_inode_pages_range.f2fs_evict_inode.evict
170.31 ? 25% -78.2% 37.20 ?193% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.vfs_write.ksys_write.do_syscall_64
362.45 ? 30% -72.2% 100.68 ? 98% perf-sched.wait_time.avg.ms.preempt_schedule_common.__cond_resched.wait_for_completion.stop_two_cpus.migrate_swap
0.35 ?220% +1140.5% 4.36 ? 78% perf-sched.wait_time.avg.ms.rwsem_down_read_slowpath.f2fs_do_truncate_blocks.f2fs_truncate.f2fs_evict_inode
0.57 ?222% +924.5% 5.84 ? 5% perf-sched.wait_time.avg.ms.rwsem_down_read_slowpath.f2fs_evict_inode.evict.__dentry_kill
17.79 ? 33% -69.6% 5.41 ? 21% perf-sched.wait_time.avg.ms.rwsem_down_read_slowpath.f2fs_get_node_info.truncate_node.f2fs_remove_inode_page
8.17 ?185% +1024.3% 91.82 ? 38% perf-sched.wait_time.avg.ms.rwsem_down_read_slowpath.f2fs_write_begin.generic_perform_write.__generic_file_write_iter
0.04 ?202% +3988.7% 1.51 ? 86% perf-sched.wait_time.avg.ms.rwsem_down_write_slowpath.block_operations.f2fs_write_checkpoint.__write_checkpoint_sync
812.34 ? 14% -59.6% 327.90 ? 86% perf-sched.wait_time.avg.ms.rwsem_down_write_slowpath.set_node_addr.f2fs_new_node_page.f2fs_new_inode_page
17.86 ? 4% -78.1% 3.92 ? 24% perf-sched.wait_time.avg.ms.rwsem_down_write_slowpath.set_node_addr.truncate_node.f2fs_remove_inode_page
1.98 ? 14% -87.1% 0.25 ? 90% perf-sched.wait_time.avg.ms.wait_for_partner.fifo_open.do_dentry_open.do_open.isra
3247 ? 40% -55.1% 1459 ? 46% perf-sched.wait_time.max.ms.do_task_dead.do_exit.do_group_exit.__x64_sys_exit_group.do_syscall_64
7.31 ?155% +66037.9% 4832 ? 44% perf-sched.wait_time.max.ms.io_schedule.__filemap_get_folio.pagecache_get_page.__get_node_page
49.14 ? 32% -62.9% 18.22 ? 15% perf-sched.wait_time.max.ms.preempt_schedule_common.__cond_resched.__invalidate_mapping_pages.truncate_node.f2fs_remove_inode_page
46.96 ? 38% -56.1% 20.61 ? 36% perf-sched.wait_time.max.ms.preempt_schedule_common.__cond_resched.down_read.f2fs_evict_inode.evict
23.17 ? 26% -79.4% 4.78 ?121% perf-sched.wait_time.max.ms.preempt_schedule_common.__cond_resched.down_read.f2fs_getxattr.__f2fs_get_acl
15.37 ? 58% -97.5% 0.38 ?212% perf-sched.wait_time.max.ms.preempt_schedule_common.__cond_resched.down_read.f2fs_new_inode.f2fs_create
5177 ? 2% -99.7% 15.57 ? 33% perf-sched.wait_time.max.ms.preempt_schedule_common.__cond_resched.down_write.f2fs_file_write_iter.new_sync_write
4424 ? 44% -99.6% 16.06 ? 17% perf-sched.wait_time.max.ms.preempt_schedule_common.__cond_resched.filemap_read.f2fs_file_read_iter.new_sync_read
42.99 ? 24% -71.6% 12.20 ? 74% perf-sched.wait_time.max.ms.preempt_schedule_common.__cond_resched.kmem_cache_alloc.__alloc_nat_entry.constprop
20.98 ? 13% -100.0% 0.00 perf-sched.wait_time.max.ms.preempt_schedule_common.__cond_resched.mutex_lock.f2fs_build_free_nids.f2fs_balance_fs_bg
29.08 ? 22% -70.5% 8.58 ? 77% perf-sched.wait_time.max.ms.preempt_schedule_common.__cond_resched.rmap_walk_anon.remove_migration_ptes.migrate_pages
55.12 ? 16% -74.8% 13.89 ? 13% perf-sched.wait_time.max.ms.preempt_schedule_common.__cond_resched.truncate_inode_pages_range.f2fs_evict_inode.evict
3.52 ?221% +695.4% 28.03 ? 21% perf-sched.wait_time.max.ms.rwsem_down_read_slowpath.f2fs_do_truncate_blocks.f2fs_truncate.f2fs_evict_inode
2.33 ?222% +1266.3% 31.90 ? 33% perf-sched.wait_time.max.ms.rwsem_down_read_slowpath.f2fs_evict_inode.evict.__dentry_kill
3476 ? 69% -99.4% 22.05 ? 34% perf-sched.wait_time.max.ms.rwsem_down_read_slowpath.f2fs_get_node_info.truncate_node.f2fs_remove_inode_page
846.34 ?220% +474.4% 4861 ? 44% perf-sched.wait_time.max.ms.rwsem_down_read_slowpath.f2fs_write_begin.generic_perform_write.__generic_file_write_iter
0.85 ?217% +2854.3% 25.17 ? 32% perf-sched.wait_time.max.ms.rwsem_down_write_slowpath.block_operations.f2fs_write_checkpoint.__write_checkpoint_sync
96.49 ? 11% -80.7% 18.61 ? 27% perf-sched.wait_time.max.ms.rwsem_down_write_slowpath.set_node_addr.truncate_node.f2fs_remove_inode_page
21.00 ? 32% -62.0% 7.98 ? 63% perf-sched.wait_time.max.ms.wait_for_partner.fifo_open.do_dentry_open.do_open.isra
95.85 -36.9 58.96 perf-profile.calltrace.cycles-pp.__generic_file_write_iter.f2fs_file_write_iter.new_sync_write.vfs_write.ksys_write
95.79 -36.9 58.92 perf-profile.calltrace.cycles-pp.generic_perform_write.__generic_file_write_iter.f2fs_file_write_iter.new_sync_write.vfs_write
63.02 -24.4 38.60 perf-profile.calltrace.cycles-pp.f2fs_write_end.generic_perform_write.__generic_file_write_iter.f2fs_file_write_iter.new_sync_write
62.22 -24.1 38.12 perf-profile.calltrace.cycles-pp.f2fs_mark_inode_dirty_sync.f2fs_write_end.generic_perform_write.__generic_file_write_iter.f2fs_file_write_iter
62.21 -24.1 38.12 perf-profile.calltrace.cycles-pp.f2fs_inode_dirtied.f2fs_mark_inode_dirty_sync.f2fs_write_end.generic_perform_write.__generic_file_write_iter
62.19 -24.1 38.10 perf-profile.calltrace.cycles-pp._raw_spin_lock.f2fs_inode_dirtied.f2fs_mark_inode_dirty_sync.f2fs_write_end.generic_perform_write
61.92 -24.0 37.95 perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath._raw_spin_lock.f2fs_inode_dirtied.f2fs_mark_inode_dirty_sync.f2fs_write_end
32.48 -12.4 20.14 perf-profile.calltrace.cycles-pp.f2fs_write_begin.generic_perform_write.__generic_file_write_iter.f2fs_file_write_iter.new_sync_write
31.29 -11.9 19.37 perf-profile.calltrace.cycles-pp.f2fs_get_block.f2fs_write_begin.generic_perform_write.__generic_file_write_iter.f2fs_file_write_iter
31.23 -11.9 19.33 perf-profile.calltrace.cycles-pp.f2fs_reserve_block.f2fs_get_block.f2fs_write_begin.generic_perform_write.__generic_file_write_iter
31.22 -11.9 19.33 perf-profile.calltrace.cycles-pp.f2fs_reserve_new_blocks.f2fs_reserve_block.f2fs_get_block.f2fs_write_begin.generic_perform_write
15.60 -6.0 9.63 perf-profile.calltrace.cycles-pp.__mark_inode_dirty.f2fs_reserve_new_blocks.f2fs_reserve_block.f2fs_get_block.f2fs_write_begin
15.49 -5.9 9.56 perf-profile.calltrace.cycles-pp.f2fs_inode_dirtied.__mark_inode_dirty.f2fs_reserve_new_blocks.f2fs_reserve_block.f2fs_get_block
15.48 -5.9 9.56 perf-profile.calltrace.cycles-pp._raw_spin_lock.f2fs_inode_dirtied.__mark_inode_dirty.f2fs_reserve_new_blocks.f2fs_reserve_block
15.42 -5.9 9.52 perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath._raw_spin_lock.f2fs_inode_dirtied.__mark_inode_dirty.f2fs_reserve_new_blocks
15.49 -5.9 9.63 perf-profile.calltrace.cycles-pp.f2fs_mark_inode_dirty_sync.f2fs_reserve_new_blocks.f2fs_reserve_block.f2fs_get_block.f2fs_write_begin
15.48 -5.9 9.63 perf-profile.calltrace.cycles-pp.f2fs_inode_dirtied.f2fs_mark_inode_dirty_sync.f2fs_reserve_new_blocks.f2fs_reserve_block.f2fs_get_block
15.48 -5.8 9.63 perf-profile.calltrace.cycles-pp._raw_spin_lock.f2fs_inode_dirtied.f2fs_mark_inode_dirty_sync.f2fs_reserve_new_blocks.f2fs_reserve_block
15.46 -5.8 9.62 perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath._raw_spin_lock.f2fs_inode_dirtied.f2fs_mark_inode_dirty_sync.f2fs_reserve_new_blocks
0.78 -0.5 0.26 ?100% perf-profile.calltrace.cycles-pp.ksys_read.do_syscall_64.entry_SYSCALL_64_after_hwframe.read
0.83 -0.4 0.44 ? 44% perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.read
0.82 -0.4 0.44 ? 44% perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.read
0.92 -0.3 0.59 ? 3% perf-profile.calltrace.cycles-pp.read
96.75 +1.7 98.44 perf-profile.calltrace.cycles-pp.write
96.64 +1.7 98.37 perf-profile.calltrace.cycles-pp.entry_SYSCALL_64_after_hwframe.write
96.62 +1.7 98.35 perf-profile.calltrace.cycles-pp.do_syscall_64.entry_SYSCALL_64_after_hwframe.write
96.57 +1.8 98.32 perf-profile.calltrace.cycles-pp.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe.write
96.53 +1.8 98.30 perf-profile.calltrace.cycles-pp.vfs_write.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe.write
96.46 +1.8 98.25 perf-profile.calltrace.cycles-pp.new_sync_write.vfs_write.ksys_write.do_syscall_64.entry_SYSCALL_64_after_hwframe
96.42 +1.8 98.24 perf-profile.calltrace.cycles-pp.f2fs_file_write_iter.new_sync_write.vfs_write.ksys_write.do_syscall_64
0.00 +38.6 38.60 perf-profile.calltrace.cycles-pp.native_queued_spin_lock_slowpath._raw_spin_lock.f2fs_inode_dirtied.f2fs_mark_inode_dirty_sync.f2fs_file_write_iter
0.00 +38.7 38.69 perf-profile.calltrace.cycles-pp._raw_spin_lock.f2fs_inode_dirtied.f2fs_mark_inode_dirty_sync.f2fs_file_write_iter.new_sync_write
0.00 +38.7 38.70 perf-profile.calltrace.cycles-pp.f2fs_inode_dirtied.f2fs_mark_inode_dirty_sync.f2fs_file_write_iter.new_sync_write.vfs_write
0.00 +38.7 38.70 perf-profile.calltrace.cycles-pp.f2fs_mark_inode_dirty_sync.f2fs_file_write_iter.new_sync_write.vfs_write.ksys_write
95.87 -36.9 58.97 perf-profile.children.cycles-pp.__generic_file_write_iter
95.81 -36.9 58.94 perf-profile.children.cycles-pp.generic_perform_write
63.03 -24.4 38.60 perf-profile.children.cycles-pp.f2fs_write_end
32.49 -12.4 20.14 perf-profile.children.cycles-pp.f2fs_write_begin
31.35 -11.9 19.40 perf-profile.children.cycles-pp.f2fs_reserve_new_blocks
31.36 -11.9 19.41 perf-profile.children.cycles-pp.f2fs_reserve_block
31.29 -11.9 19.37 perf-profile.children.cycles-pp.f2fs_get_block
15.85 -6.1 9.78 perf-profile.children.cycles-pp.__mark_inode_dirty
1.34 ? 7% -1.0 0.34 ? 14% perf-profile.children.cycles-pp.rwsem_down_write_slowpath
1.00 ? 9% -0.8 0.19 ? 22% perf-profile.children.cycles-pp.osq_lock
0.99 ? 4% -0.7 0.32 ? 9% perf-profile.children.cycles-pp.unlink
0.99 ? 4% -0.7 0.32 ? 9% perf-profile.children.cycles-pp.__x64_sys_unlink
0.99 ? 4% -0.7 0.32 ? 9% perf-profile.children.cycles-pp.do_unlinkat
0.80 ? 6% -0.5 0.30 ? 9% perf-profile.children.cycles-pp.creat64
0.81 ? 6% -0.5 0.31 ? 8% perf-profile.children.cycles-pp.do_sys_open
0.81 ? 6% -0.5 0.31 ? 8% perf-profile.children.cycles-pp.do_sys_openat2
0.81 ? 6% -0.5 0.31 ? 8% perf-profile.children.cycles-pp.do_filp_open
0.81 ? 6% -0.5 0.31 ? 8% perf-profile.children.cycles-pp.path_openat
0.94 -0.3 0.60 ? 2% perf-profile.children.cycles-pp.read
0.78 -0.3 0.50 ? 3% perf-profile.children.cycles-pp.ksys_read
0.74 -0.3 0.47 ? 3% perf-profile.children.cycles-pp.vfs_read
0.69 -0.3 0.44 ? 3% perf-profile.children.cycles-pp.new_sync_read
0.66 -0.2 0.42 ? 3% perf-profile.children.cycles-pp.f2fs_file_read_iter
0.64 -0.2 0.41 ? 3% perf-profile.children.cycles-pp.filemap_read
0.52 -0.2 0.32 ? 3% perf-profile.children.cycles-pp.folio_unlock
0.51 -0.2 0.32 ? 2% perf-profile.children.cycles-pp.syscall_exit_to_user_mode
0.32 ? 2% -0.2 0.14 ? 3% perf-profile.children.cycles-pp.rwsem_spin_on_owner
0.47 -0.2 0.30 perf-profile.children.cycles-pp.exit_to_user_mode_prepare
0.46 -0.2 0.28 perf-profile.children.cycles-pp.__close
0.45 -0.2 0.28 ? 2% perf-profile.children.cycles-pp.evict
0.46 -0.2 0.28 perf-profile.children.cycles-pp.task_work_run
0.46 -0.2 0.28 ? 2% perf-profile.children.cycles-pp.__fput
0.46 -0.2 0.28 ? 2% perf-profile.children.cycles-pp.__dentry_kill
0.45 -0.2 0.28 ? 2% perf-profile.children.cycles-pp.f2fs_evict_inode
0.46 -0.2 0.28 perf-profile.children.cycles-pp.dput
0.39 -0.1 0.26 ? 2% perf-profile.children.cycles-pp.pagecache_get_page
0.37 ? 4% -0.1 0.24 ? 3% perf-profile.children.cycles-pp.filemap_get_pages
0.35 ? 3% -0.1 0.23 ? 3% perf-profile.children.cycles-pp.filemap_get_read_batch
0.28 -0.1 0.16 ? 4% perf-profile.children.cycles-pp.folio_mark_dirty
0.32 -0.1 0.20 perf-profile.children.cycles-pp.f2fs_convert_inline_inode
0.38 ? 2% -0.1 0.26 ? 3% perf-profile.children.cycles-pp.__filemap_get_folio
0.32 -0.1 0.20 ? 2% perf-profile.children.cycles-pp.f2fs_convert_inline_page
0.26 -0.1 0.15 ? 3% perf-profile.children.cycles-pp.f2fs_set_data_page_dirty
0.26 -0.1 0.16 ? 2% perf-profile.children.cycles-pp.f2fs_create
0.25 ? 2% -0.1 0.15 ? 3% perf-profile.children.cycles-pp.f2fs_add_regular_entry
0.25 ? 2% -0.1 0.16 ? 3% perf-profile.children.cycles-pp.f2fs_do_add_link
0.25 ? 2% -0.1 0.16 ? 3% perf-profile.children.cycles-pp.f2fs_add_dentry
0.18 ? 2% -0.1 0.10 ? 4% perf-profile.children.cycles-pp.f2fs_update_dirty_page
0.20 ? 2% -0.1 0.12 ? 3% perf-profile.children.cycles-pp.f2fs_truncate
0.21 ? 3% -0.1 0.14 ? 2% perf-profile.children.cycles-pp.f2fs_lookup_extent_cache
0.20 -0.1 0.13 ? 2% perf-profile.children.cycles-pp.__get_node_page
0.18 ? 2% -0.1 0.11 ? 4% perf-profile.children.cycles-pp.vfs_unlink
0.18 ? 2% -0.1 0.11 ? 4% perf-profile.children.cycles-pp.f2fs_unlink
0.18 -0.1 0.11 perf-profile.children.cycles-pp.truncate_inode_pages_range
0.18 -0.1 0.11 ? 3% perf-profile.children.cycles-pp.f2fs_inode_synced
0.18 ? 2% -0.1 0.11 ? 3% perf-profile.children.cycles-pp.f2fs_update_parent_metadata
0.16 ? 2% -0.1 0.10 ? 4% perf-profile.children.cycles-pp.copy_page_from_iter_atomic
0.14 ? 3% -0.1 0.08 ? 5% perf-profile.children.cycles-pp.f2fs_do_truncate_blocks
0.12 ? 3% -0.0 0.07 ? 5% perf-profile.children.cycles-pp.f2fs_update_inode
0.13 -0.0 0.08 ? 5% perf-profile.children.cycles-pp.f2fs_truncate_data_blocks_range
0.08 -0.0 0.03 ? 70% perf-profile.children.cycles-pp.truncate_cleanup_page
0.12 ? 3% -0.0 0.08 ? 6% perf-profile.children.cycles-pp.copy_page_to_iter
0.14 ? 3% -0.0 0.09 ? 6% perf-profile.children.cycles-pp.copy_user_enhanced_fast_string
0.07 -0.0 0.02 ? 99% perf-profile.children.cycles-pp.fault_in_iov_iter_readable
0.13 ? 2% -0.0 0.09 ? 5% perf-profile.children.cycles-pp.filemap_add_folio
0.08 ? 4% -0.0 0.05 perf-profile.children.cycles-pp.__entry_text_start
0.09 ? 4% -0.0 0.07 ? 7% perf-profile.children.cycles-pp.__filemap_add_folio
0.08 -0.0 0.06 ? 6% perf-profile.children.cycles-pp.copyin
0.07 ? 5% -0.0 0.05 perf-profile.children.cycles-pp.xas_load
99.76 +0.1 99.83 perf-profile.children.cycles-pp.entry_SYSCALL_64_after_hwframe
99.73 +0.1 99.82 perf-profile.children.cycles-pp.do_syscall_64
96.78 +1.7 98.46 perf-profile.children.cycles-pp.write
96.58 +1.8 98.33 perf-profile.children.cycles-pp.ksys_write
96.55 +1.8 98.31 perf-profile.children.cycles-pp.vfs_write
96.47 +1.8 98.26 perf-profile.children.cycles-pp.new_sync_write
96.43 +1.8 98.24 perf-profile.children.cycles-pp.f2fs_file_write_iter
94.52 +2.3 96.81 perf-profile.children.cycles-pp._raw_spin_lock
94.05 +2.4 96.47 perf-profile.children.cycles-pp.native_queued_spin_lock_slowpath
94.21 +2.4 96.65 perf-profile.children.cycles-pp.f2fs_inode_dirtied
78.48 +8.5 86.94 perf-profile.children.cycles-pp.f2fs_mark_inode_dirty_sync
0.99 ? 9% -0.8 0.19 ? 22% perf-profile.self.cycles-pp.osq_lock
0.52 -0.2 0.31 ? 2% perf-profile.self.cycles-pp.folio_unlock
0.32 ? 2% -0.2 0.14 ? 3% perf-profile.self.cycles-pp.rwsem_spin_on_owner
0.47 ? 2% -0.1 0.34 ? 3% perf-profile.self.cycles-pp._raw_spin_lock
0.33 ? 3% -0.1 0.22 ? 3% perf-profile.self.cycles-pp.filemap_get_read_batch
0.19 ? 3% -0.1 0.13 ? 2% perf-profile.self.cycles-pp.f2fs_lookup_extent_cache
0.14 ? 3% -0.0 0.09 ? 6% perf-profile.self.cycles-pp.copy_user_enhanced_fast_string
0.12 ? 4% -0.0 0.07 perf-profile.self.cycles-pp.f2fs_write_begin
0.10 ? 4% -0.0 0.06 perf-profile.self.cycles-pp.f2fs_update_dirty_page
0.11 -0.0 0.07 ? 5% perf-profile.self.cycles-pp.__mark_inode_dirty
0.07 ? 7% -0.0 0.02 ? 99% perf-profile.self.cycles-pp.__get_node_page
0.08 -0.0 0.04 ? 44% perf-profile.self.cycles-pp.filemap_read
0.11 ? 6% -0.0 0.07 perf-profile.self.cycles-pp.f2fs_write_end
0.10 ? 3% -0.0 0.06 ? 6% perf-profile.self.cycles-pp.__filemap_get_folio
0.08 ? 6% -0.0 0.05 perf-profile.self.cycles-pp.copy_page_from_iter_atomic
0.14 ? 3% +0.2 0.32 perf-profile.self.cycles-pp.f2fs_file_write_iter
93.73 +2.4 96.13 perf-profile.self.cycles-pp.native_queued_spin_lock_slowpath
10239 ? 6% +120.6% 22584 ? 14% softirqs.CPU0.RCU
10013 ? 6% +131.6% 23186 ? 7% softirqs.CPU1.RCU
7384 ? 12% +70.1% 12560 ? 44% softirqs.CPU1.SCHED
10089 ? 16% +136.5% 23864 ? 8% softirqs.CPU10.RCU
6343 ? 16% +98.8% 12608 ? 45% softirqs.CPU10.SCHED
10300 ? 12% +127.1% 23391 ? 8% softirqs.CPU11.RCU
7009 ? 14% +76.5% 12371 ? 55% softirqs.CPU11.SCHED
9322 ? 18% +156.3% 23890 ? 6% softirqs.CPU12.RCU
9434 ? 3% +150.4% 23625 ? 6% softirqs.CPU13.RCU
5910 ? 11% +91.7% 11327 ? 56% softirqs.CPU13.SCHED
9223 ? 3% +155.4% 23556 ? 7% softirqs.CPU14.RCU
5696 ? 4% +98.1% 11281 ? 57% softirqs.CPU14.SCHED
9000 ? 5% +159.2% 23329 ? 6% softirqs.CPU15.RCU
5820 ? 8% +84.6% 10742 ? 60% softirqs.CPU15.SCHED
8547 ? 5% +186.6% 24501 ? 8% softirqs.CPU16.RCU
5797 ? 6% +91.7% 11113 ? 54% softirqs.CPU16.SCHED
8860 ? 6% +176.7% 24516 ? 8% softirqs.CPU17.RCU
5585 ? 12% +96.5% 10975 ? 58% softirqs.CPU17.SCHED
8679 ? 8% +181.7% 24447 ? 7% softirqs.CPU18.RCU
5469 ? 7% +100.2% 10948 ? 57% softirqs.CPU18.SCHED
8855 ? 7% +182.5% 25012 ? 9% softirqs.CPU19.RCU
5572 ? 5% +94.0% 10809 ? 58% softirqs.CPU19.SCHED
9919 ? 16% +131.2% 22931 ? 6% softirqs.CPU2.RCU
6794 ? 11% +70.8% 11604 ? 54% softirqs.CPU2.SCHED
8635 ? 7% +183.0% 24434 ? 8% softirqs.CPU20.RCU
5303 ? 5% +101.9% 10710 ? 59% softirqs.CPU20.SCHED
8974 ? 8% +175.4% 24713 ? 8% softirqs.CPU21.RCU
5207 ? 7% +106.4% 10745 ? 60% softirqs.CPU21.SCHED
8767 ? 8% +179.7% 24523 ? 7% softirqs.CPU22.RCU
5211 ? 7% +111.5% 11023 ? 58% softirqs.CPU22.SCHED
9013 ? 8% +180.0% 25234 ? 7% softirqs.CPU23.RCU
5130 ? 10% +107.3% 10635 ? 62% softirqs.CPU23.SCHED
8507 ? 6% +150.3% 21293 ? 8% softirqs.CPU24.RCU
4914 ? 9% +109.4% 10289 ? 66% softirqs.CPU24.SCHED
8702 ? 13% +144.8% 21304 ? 7% softirqs.CPU25.RCU
8485 ? 6% +150.4% 21249 ? 7% softirqs.CPU26.RCU
8363 ? 6% +152.1% 21081 ? 7% softirqs.CPU27.RCU
4905 ? 5% +107.7% 10189 ? 64% softirqs.CPU27.SCHED
8278 ? 6% +153.3% 20967 ? 7% softirqs.CPU28.RCU
5045 ? 9% +99.5% 10067 ? 65% softirqs.CPU28.SCHED
8224 ? 7% +154.2% 20907 ? 7% softirqs.CPU29.RCU
5307 ? 16% +93.2% 10251 ? 66% softirqs.CPU29.SCHED
9017 ? 6% +154.7% 22972 ? 18% softirqs.CPU3.RCU
6197 ? 14% +80.6% 11189 ? 53% softirqs.CPU3.SCHED
8160 ? 8% +159.3% 21156 ? 7% softirqs.CPU30.RCU
5119 ? 14% +99.6% 10217 ? 65% softirqs.CPU30.SCHED
8385 ? 7% +152.9% 21205 ? 8% softirqs.CPU31.RCU
4901 ? 8% +108.5% 10220 ? 64% softirqs.CPU31.SCHED
8256 ? 6% +171.0% 22372 ? 6% softirqs.CPU32.RCU
4843 ? 2% +108.3% 10087 ? 65% softirqs.CPU32.SCHED
8429 ? 8% +158.4% 21786 ? 17% softirqs.CPU33.RCU
4863 ? 6% +116.7% 10539 ? 61% softirqs.CPU33.SCHED
8484 ? 6% +166.5% 22607 ? 6% softirqs.CPU34.RCU
8556 ? 8% +162.2% 22431 ? 6% softirqs.CPU35.RCU
5229 ? 17% +95.0% 10195 ? 65% softirqs.CPU35.SCHED
8830 ? 7% +157.8% 22762 ? 7% softirqs.CPU36.RCU
5181 ? 10% +99.9% 10356 ? 66% softirqs.CPU36.SCHED
8675 ? 6% +164.6% 22958 ? 6% softirqs.CPU37.RCU
4962 ? 8% +105.2% 10183 ? 64% softirqs.CPU37.SCHED
8350 ? 8% +166.4% 22246 ? 5% softirqs.CPU38.RCU
4833 ? 5% +111.9% 10239 ? 66% softirqs.CPU38.SCHED
8578 ? 8% +177.2% 23781 ? 13% softirqs.CPU39.RCU
4892 ? 6% +104.6% 10012 ? 63% softirqs.CPU39.SCHED
9393 ? 13% +127.3% 21351 ? 14% softirqs.CPU4.RCU
5994 ? 7% +89.1% 11334 ? 52% softirqs.CPU4.SCHED
8595 ? 8% +166.4% 22900 ? 6% softirqs.CPU40.RCU
4918 ? 5% +104.7% 10071 ? 65% softirqs.CPU40.SCHED
8259 ? 6% +167.8% 22117 ? 6% softirqs.CPU41.RCU
4983 ? 9% +103.4% 10133 ? 67% softirqs.CPU41.SCHED
8703 ? 9% +164.7% 23040 ? 7% softirqs.CPU42.RCU
5280 ? 12% +93.1% 10195 ? 65% softirqs.CPU42.SCHED
8651 ? 7% +163.6% 22803 ? 7% softirqs.CPU43.RCU
5042 ? 10% +104.5% 10310 ? 68% softirqs.CPU43.SCHED
8708 ? 8% +164.0% 22989 ? 5% softirqs.CPU44.RCU
5066 ? 10% +99.9% 10128 ? 65% softirqs.CPU44.SCHED
8703 ? 7% +165.8% 23136 ? 7% softirqs.CPU45.RCU
4876 ? 11% +110.8% 10280 ? 66% softirqs.CPU45.SCHED
8597 ? 7% +163.5% 22654 ? 6% softirqs.CPU46.RCU
4903 ? 7% +111.1% 10351 ? 67% softirqs.CPU46.SCHED
8601 ? 7% +167.9% 23040 ? 7% softirqs.CPU47.RCU
4869 ? 6% +108.9% 10172 ? 67% softirqs.CPU47.SCHED
9029 ? 2% +135.8% 21296 ? 10% softirqs.CPU48.RCU
4506 ? 12% +118.4% 9842 ? 66% softirqs.CPU48.SCHED
8747 ? 7% +164.3% 23124 ? 6% softirqs.CPU49.RCU
5024 ? 10% +136.0% 11860 ? 56% softirqs.CPU49.SCHED
8833 ? 5% +156.3% 22637 ? 6% softirqs.CPU5.RCU
5965 ? 9% +87.0% 11158 ? 52% softirqs.CPU5.SCHED
8867 ? 3% +156.6% 22757 ? 5% softirqs.CPU50.RCU
5206 ? 8% +110.7% 10971 ? 57% softirqs.CPU50.SCHED
9109 ? 8% +138.0% 21679 ? 16% softirqs.CPU51.RCU
5685 ? 13% +89.9% 10795 ? 52% softirqs.CPU51.SCHED
8814 ? 4% +146.8% 21752 ? 8% softirqs.CPU52.RCU
5280 ? 8% +100.0% 10563 ? 62% softirqs.CPU52.SCHED
9058 ? 6% +145.8% 22263 ? 6% softirqs.CPU53.RCU
5520 ? 7% +93.2% 10668 ? 60% softirqs.CPU53.SCHED
8735 ? 8% +157.1% 22455 ? 5% softirqs.CPU54.RCU
5597 ? 18% +88.6% 10557 ? 59% softirqs.CPU54.SCHED
8882 ? 4% +156.2% 22760 ? 6% softirqs.CPU55.RCU
5302 ? 11% +105.2% 10882 ? 62% softirqs.CPU55.SCHED
9029 ? 6% +151.0% 22660 ? 6% softirqs.CPU56.RCU
5796 ? 9% +82.0% 10550 ? 62% softirqs.CPU56.SCHED
8831 ? 5% +151.2% 22182 ? 6% softirqs.CPU57.RCU
5677 ? 8% +83.7% 10428 ? 64% softirqs.CPU57.SCHED
9145 ? 5% +135.1% 21499 ? 14% softirqs.CPU58.RCU
5583 ? 10% +93.8% 10821 ? 60% softirqs.CPU58.SCHED
8968 ? 5% +151.7% 22574 ? 5% softirqs.CPU59.RCU
5776 ? 13% +83.1% 10578 ? 61% softirqs.CPU59.SCHED
8984 ? 6% +154.5% 22861 ? 6% softirqs.CPU6.RCU
5813 ? 10% +88.4% 10953 ? 60% softirqs.CPU6.SCHED
8530 ? 12% +170.9% 23110 ? 6% softirqs.CPU60.RCU
9702 ? 15% +140.3% 23313 ? 6% softirqs.CPU61.RCU
5438 ? 6% +98.0% 10767 ? 59% softirqs.CPU61.SCHED
9116 ? 4% +153.7% 23129 ? 5% softirqs.CPU62.RCU
5657 ? 9% +84.3% 10425 ? 63% softirqs.CPU62.SCHED
9204 ? 6% +152.8% 23272 ? 5% softirqs.CPU63.RCU
5507 ? 8% +101.9% 11120 ? 56% softirqs.CPU63.SCHED
8385 ? 6% +175.6% 23105 ? 8% softirqs.CPU64.RCU
5555 ? 10% +90.8% 10596 ? 61% softirqs.CPU64.SCHED
8768 ? 6% +168.5% 23542 ? 8% softirqs.CPU65.RCU
8614 ? 8% +182.1% 24306 ? 18% softirqs.CPU66.RCU
5770 ? 5% +89.1% 10909 ? 59% softirqs.CPU66.SCHED
8643 ? 7% +173.0% 23597 ? 9% softirqs.CPU67.RCU
5777 ? 13% +87.3% 10822 ? 57% softirqs.CPU67.SCHED
8694 ? 7% +169.9% 23468 ? 8% softirqs.CPU68.RCU
5822 ? 13% +92.3% 11195 ? 54% softirqs.CPU68.SCHED
8910 ? 4% +164.6% 23579 ? 8% softirqs.CPU69.RCU
5772 ? 11% +90.9% 11016 ? 56% softirqs.CPU69.SCHED
8919 ? 5% +154.2% 22671 ? 7% softirqs.CPU7.RCU
5705 ? 10% +93.3% 11027 ? 59% softirqs.CPU7.SCHED
10213 ? 27% +129.7% 23454 ? 7% softirqs.CPU70.RCU
10733 ? 26% +131.6% 24855 ? 8% softirqs.CPU71.RCU
5826 ? 3% +92.7% 11227 ? 53% softirqs.CPU71.SCHED
8033 ? 6% +160.9% 20957 ? 8% softirqs.CPU72.RCU
4853 ? 5% +111.5% 10263 ? 67% softirqs.CPU72.SCHED
7997 ? 8% +159.7% 20774 ? 7% softirqs.CPU73.RCU
4878 ? 7% +110.0% 10242 ? 68% softirqs.CPU73.SCHED
8052 ? 6% +155.0% 20535 ? 7% softirqs.CPU74.RCU
4909 ? 7% +106.8% 10151 ? 68% softirqs.CPU74.SCHED
8158 ? 6% +154.8% 20783 ? 7% softirqs.CPU75.RCU
4901 ? 9% +106.4% 10115 ? 69% softirqs.CPU75.SCHED
8019 ? 7% +160.3% 20874 ? 7% softirqs.CPU76.RCU
4894 ? 8% +105.3% 10050 ? 66% softirqs.CPU76.SCHED
7934 ? 8% +159.2% 20568 ? 6% softirqs.CPU77.RCU
4909 ? 9% +105.2% 10074 ? 66% softirqs.CPU77.SCHED
7943 ? 7% +161.4% 20765 ? 7% softirqs.CPU78.RCU
4876 ? 7% +108.3% 10155 ? 67% softirqs.CPU78.SCHED
8068 ? 7% +159.6% 20944 ? 6% softirqs.CPU79.RCU
5066 ? 13% +100.1% 10136 ? 67% softirqs.CPU79.SCHED
8791 ? 5% +161.6% 23002 ? 7% softirqs.CPU8.RCU
5609 ? 6% +106.5% 11582 ? 51% softirqs.CPU8.SCHED
8189 ? 7% +177.4% 22718 ? 5% softirqs.CPU80.RCU
4940 ? 9% +105.4% 10149 ? 68% softirqs.CPU80.SCHED
8648 ? 4% +147.9% 21437 ? 11% softirqs.CPU81.RCU
5175 ? 16% +94.4% 10060 ? 65% softirqs.CPU81.SCHED
8445 ? 9% +170.2% 22818 ? 5% softirqs.CPU82.RCU
4996 ? 11% +105.1% 10245 ? 68% softirqs.CPU82.SCHED
8520 ? 8% +163.7% 22469 ? 5% softirqs.CPU83.RCU
5153 ? 11% +99.1% 10259 ? 67% softirqs.CPU83.SCHED
8598 ? 8% +169.1% 23142 ? 7% softirqs.CPU84.RCU
4863 ? 14% +106.3% 10032 ? 65% softirqs.CPU84.SCHED
8537 ? 8% +171.6% 23188 ? 5% softirqs.CPU85.RCU
8316 ? 8% +169.0% 22368 ? 5% softirqs.CPU86.RCU
5356 ? 16% +109.7% 11233 ? 57% softirqs.CPU86.SCHED
8517 ? 8% +170.1% 23005 ? 3% softirqs.CPU87.RCU
5122 ? 11% +100.3% 10260 ? 65% softirqs.CPU87.SCHED
8623 ? 9% +169.8% 23267 ? 5% softirqs.CPU88.RCU
4965 ? 12% +119.2% 10884 ? 69% softirqs.CPU88.SCHED
8197 ? 9% +175.3% 22566 ? 6% softirqs.CPU89.RCU
4993 ? 10% +101.6% 10066 ? 66% softirqs.CPU89.SCHED
10575 ? 14% +124.4% 23727 ? 8% softirqs.CPU9.RCU
8524 ? 8% +166.9% 22751 ? 5% softirqs.CPU90.RCU
4967 ? 10% +104.0% 10132 ? 67% softirqs.CPU90.SCHED
8501 ? 9% +169.1% 22875 ? 5% softirqs.CPU91.RCU
4961 ? 10% +102.6% 10052 ? 66% softirqs.CPU91.SCHED
8570 ? 8% +166.7% 22861 ? 5% softirqs.CPU92.RCU
4952 ? 9% +108.8% 10339 ? 71% softirqs.CPU92.SCHED
8730 ? 9% +162.2% 22886 ? 5% softirqs.CPU93.RCU
4922 ? 10% +106.6% 10170 ? 66% softirqs.CPU93.SCHED
8496 ? 7% +169.1% 22861 ? 4% softirqs.CPU94.RCU
5301 ? 18% +103.9% 10806 ? 60% softirqs.CPU94.SCHED
8496 ? 6% +174.3% 23304 ? 5% softirqs.CPU95.RCU
4745 ? 19% +107.5% 9847 ? 72% softirqs.CPU95.SCHED
843358 ? 5% +158.5% 2180256 ? 6% softirqs.RCU
525101 ? 6% +95.8% 1028170 ? 61% softirqs.SCHED
51024 ? 2% +38.3% 70551 ? 2% softirqs.TIMER
416.67 +55.4% 647.67 interrupts.9:IR-IO-APIC.9-fasteoi.acpi
147615 ? 19% +434.5% 789026 ?152% interrupts.CAL:Function_call_interrupts
1513 ? 20% +482.1% 8810 ?167% interrupts.CPU0.CAL:Function_call_interrupts
416443 +55.4% 647304 interrupts.CPU0.LOC:Local_timer_interrupts
1557 ? 4% +103.9% 3175 ? 57% interrupts.CPU0.RES:Rescheduling_interrupts
416.67 +55.4% 647.67 interrupts.CPU1.9:IR-IO-APIC.9-fasteoi.acpi
416539 +55.4% 647230 interrupts.CPU1.LOC:Local_timer_interrupts
1191 ? 4% +106.2% 2457 ? 70% interrupts.CPU1.RES:Rescheduling_interrupts
1533 ? 18% +481.8% 8919 ?166% interrupts.CPU10.CAL:Function_call_interrupts
416528 +55.4% 647290 interrupts.CPU10.LOC:Local_timer_interrupts
1118 ? 3% +127.2% 2540 ? 67% interrupts.CPU10.RES:Rescheduling_interrupts
1567 ? 21% +453.1% 8670 ?168% interrupts.CPU11.CAL:Function_call_interrupts
416502 +55.4% 647272 interrupts.CPU11.LOC:Local_timer_interrupts
1123 ? 6% +110.2% 2361 ? 71% interrupts.CPU11.RES:Rescheduling_interrupts
416661 +55.3% 647236 interrupts.CPU12.LOC:Local_timer_interrupts
1132 ? 3% +120.9% 2502 ? 66% interrupts.CPU12.RES:Rescheduling_interrupts
416545 +55.4% 647264 interrupts.CPU13.LOC:Local_timer_interrupts
1126 ? 4% +128.3% 2571 ? 67% interrupts.CPU13.RES:Rescheduling_interrupts
1532 ? 17% +471.4% 8756 ?168% interrupts.CPU14.CAL:Function_call_interrupts
416535 +55.4% 647201 interrupts.CPU14.LOC:Local_timer_interrupts
1104 ? 4% +121.1% 2441 ? 71% interrupts.CPU14.RES:Rescheduling_interrupts
1500 ? 19% +482.1% 8733 ?169% interrupts.CPU15.CAL:Function_call_interrupts
416548 +55.4% 647272 interrupts.CPU15.LOC:Local_timer_interrupts
1134 ? 5% +115.4% 2442 ? 73% interrupts.CPU15.RES:Rescheduling_interrupts
416441 +55.4% 647158 interrupts.CPU16.LOC:Local_timer_interrupts
1181 ? 11% +103.9% 2408 ? 70% interrupts.CPU16.RES:Rescheduling_interrupts
1450 ? 21% +505.7% 8785 ?167% interrupts.CPU17.CAL:Function_call_interrupts
416541 +55.3% 647039 interrupts.CPU17.LOC:Local_timer_interrupts
1105 ? 5% +123.4% 2470 ? 70% interrupts.CPU17.RES:Rescheduling_interrupts
1480 ? 19% +488.1% 8708 ?164% interrupts.CPU18.CAL:Function_call_interrupts
416402 +55.4% 647187 interrupts.CPU18.LOC:Local_timer_interrupts
1137 ? 5% +116.8% 2464 ? 69% interrupts.CPU18.RES:Rescheduling_interrupts
1469 ? 20% +496.7% 8766 ?167% interrupts.CPU19.CAL:Function_call_interrupts
416566 +55.3% 647031 interrupts.CPU19.LOC:Local_timer_interrupts
1141 ? 5% +119.2% 2501 ? 70% interrupts.CPU19.RES:Rescheduling_interrupts
416569 +55.3% 647121 interrupts.CPU2.LOC:Local_timer_interrupts
1099 ? 3% +123.8% 2461 ? 72% interrupts.CPU2.RES:Rescheduling_interrupts
1499 ? 20% +476.1% 8637 ?166% interrupts.CPU20.CAL:Function_call_interrupts
416542 +55.4% 647239 interrupts.CPU20.LOC:Local_timer_interrupts
1159 ? 7% +114.1% 2482 ? 69% interrupts.CPU20.RES:Rescheduling_interrupts
1486 ? 19% +481.4% 8642 ?166% interrupts.CPU21.CAL:Function_call_interrupts
416490 +55.4% 647191 interrupts.CPU21.LOC:Local_timer_interrupts
1112 ? 3% +124.5% 2496 ? 69% interrupts.CPU21.RES:Rescheduling_interrupts
1464 ? 18% +500.3% 8788 ?166% interrupts.CPU22.CAL:Function_call_interrupts
416519 +55.4% 647134 interrupts.CPU22.LOC:Local_timer_interrupts
1177 ? 8% +109.1% 2462 ? 69% interrupts.CPU22.RES:Rescheduling_interrupts
1511 ? 19% +473.8% 8670 ?167% interrupts.CPU23.CAL:Function_call_interrupts
416450 +55.4% 647182 interrupts.CPU23.LOC:Local_timer_interrupts
1193 ? 12% +108.5% 2487 ? 68% interrupts.CPU23.RES:Rescheduling_interrupts
1626 ? 20% +375.7% 7738 ?135% interrupts.CPU24.CAL:Function_call_interrupts
416433 +55.4% 647133 interrupts.CPU24.LOC:Local_timer_interrupts
1546 ? 2% +122.4% 3439 ? 65% interrupts.CPU24.RES:Rescheduling_interrupts
1661 ? 17% +368.0% 7774 ?135% interrupts.CPU25.CAL:Function_call_interrupts
416452 +55.4% 647153 interrupts.CPU25.LOC:Local_timer_interrupts
1087 ? 3% +127.6% 2474 ?102% interrupts.CPU25.RES:Rescheduling_interrupts
1641 ? 17% +371.6% 7740 ?134% interrupts.CPU26.CAL:Function_call_interrupts
416476 +55.3% 646895 interrupts.CPU26.LOC:Local_timer_interrupts
1072 ? 7% +127.7% 2442 ?108% interrupts.CPU26.RES:Rescheduling_interrupts
1669 ? 17% +371.3% 7865 ?134% interrupts.CPU27.CAL:Function_call_interrupts
416486 +55.4% 647137 interrupts.CPU27.LOC:Local_timer_interrupts
1050 ? 3% +132.0% 2436 ?101% interrupts.CPU27.RES:Rescheduling_interrupts
1610 ? 19% +380.3% 7733 ?137% interrupts.CPU28.CAL:Function_call_interrupts
416432 +55.4% 647040 interrupts.CPU28.LOC:Local_timer_interrupts
1062 ? 8% +126.7% 2407 ?106% interrupts.CPU28.RES:Rescheduling_interrupts
1618 ? 17% +383.8% 7831 ?137% interrupts.CPU29.CAL:Function_call_interrupts
416462 +55.4% 647132 interrupts.CPU29.LOC:Local_timer_interrupts
1039 ? 4% +137.1% 2465 ? 98% interrupts.CPU29.RES:Rescheduling_interrupts
416516 +55.4% 647305 interrupts.CPU3.LOC:Local_timer_interrupts
1082 ? 5% +124.4% 2428 ? 69% interrupts.CPU3.RES:Rescheduling_interrupts
1643 ? 17% +375.0% 7809 ?135% interrupts.CPU30.CAL:Function_call_interrupts
416520 +55.4% 647101 interrupts.CPU30.LOC:Local_timer_interrupts
1079 ? 4% +124.8% 2427 ?104% interrupts.CPU30.RES:Rescheduling_interrupts
1660 ? 18% +375.1% 7887 ?133% interrupts.CPU31.CAL:Function_call_interrupts
416438 +55.4% 647080 interrupts.CPU31.LOC:Local_timer_interrupts
1120 ? 4% +115.8% 2417 ?104% interrupts.CPU31.RES:Rescheduling_interrupts
1588 ? 18% +380.0% 7626 ?134% interrupts.CPU32.CAL:Function_call_interrupts
416446 +55.4% 647139 interrupts.CPU32.LOC:Local_timer_interrupts
1050 ? 4% +131.8% 2434 ?104% interrupts.CPU32.RES:Rescheduling_interrupts
1605 ? 18% +386.4% 7808 ?136% interrupts.CPU33.CAL:Function_call_interrupts
416461 +55.4% 647145 interrupts.CPU33.LOC:Local_timer_interrupts
1066 ? 5% +126.2% 2412 ?103% interrupts.CPU33.RES:Rescheduling_interrupts
1596 ? 18% +386.4% 7767 ?136% interrupts.CPU34.CAL:Function_call_interrupts
416485 +55.4% 647067 interrupts.CPU34.LOC:Local_timer_interrupts
1045 ? 5% +128.6% 2390 ?104% interrupts.CPU34.RES:Rescheduling_interrupts
1586 ? 19% +391.5% 7794 ?136% interrupts.CPU35.CAL:Function_call_interrupts
416440 +55.4% 647187 interrupts.CPU35.LOC:Local_timer_interrupts
1058 ? 3% +131.7% 2452 ?104% interrupts.CPU35.RES:Rescheduling_interrupts
1592 ? 17% +385.0% 7721 ?135% interrupts.CPU36.CAL:Function_call_interrupts
416432 +55.4% 647094 interrupts.CPU36.LOC:Local_timer_interrupts
1071 ? 7% +125.5% 2416 ?105% interrupts.CPU36.RES:Rescheduling_interrupts
1572 ? 19% +388.4% 7680 ?136% interrupts.CPU37.CAL:Function_call_interrupts
416436 +55.4% 647105 interrupts.CPU37.LOC:Local_timer_interrupts
1052 ? 2% +130.1% 2421 ?102% interrupts.CPU37.RES:Rescheduling_interrupts
1597 ? 16% +388.2% 7801 ?137% interrupts.CPU38.CAL:Function_call_interrupts
416466 +55.4% 647148 interrupts.CPU38.LOC:Local_timer_interrupts
1104 ? 5% +118.0% 2408 ?105% interrupts.CPU38.RES:Rescheduling_interrupts
1559 ? 19% +386.9% 7593 ?134% interrupts.CPU39.CAL:Function_call_interrupts
416415 +55.4% 647151 interrupts.CPU39.LOC:Local_timer_interrupts
1041 ? 2% +131.5% 2410 ?100% interrupts.CPU39.RES:Rescheduling_interrupts
1556 ? 18% +443.7% 8462 ?165% interrupts.CPU4.CAL:Function_call_interrupts
416490 +55.4% 647057 interrupts.CPU4.LOC:Local_timer_interrupts
1134 ? 7% +118.5% 2479 ? 70% interrupts.CPU4.RES:Rescheduling_interrupts
1552 ? 19% +398.8% 7745 ?136% interrupts.CPU40.CAL:Function_call_interrupts
416549 +55.4% 647513 interrupts.CPU40.LOC:Local_timer_interrupts
1085 ? 4% +123.5% 2425 ?102% interrupts.CPU40.RES:Rescheduling_interrupts
1607 ? 21% +379.7% 7710 ?137% interrupts.CPU41.CAL:Function_call_interrupts
416440 +55.4% 647180 interrupts.CPU41.LOC:Local_timer_interrupts
1062 ? 2% +125.7% 2397 ?104% interrupts.CPU41.RES:Rescheduling_interrupts
1563 ? 20% +397.9% 7784 ?135% interrupts.CPU42.CAL:Function_call_interrupts
416461 +55.4% 647109 interrupts.CPU42.LOC:Local_timer_interrupts
1088 ? 5% +126.5% 2464 ?100% interrupts.CPU42.RES:Rescheduling_interrupts
1531 ? 19% +400.2% 7660 ?136% interrupts.CPU43.CAL:Function_call_interrupts
416452 +55.5% 647414 interrupts.CPU43.LOC:Local_timer_interrupts
1100 ? 4% +116.0% 2377 ?103% interrupts.CPU43.RES:Rescheduling_interrupts
1575 ? 18% +395.7% 7809 ?136% interrupts.CPU44.CAL:Function_call_interrupts
416447 +55.4% 647078 interrupts.CPU44.LOC:Local_timer_interrupts
1072 ? 4% +124.7% 2410 ?104% interrupts.CPU44.RES:Rescheduling_interrupts
1560 ? 19% +400.2% 7805 ?136% interrupts.CPU45.CAL:Function_call_interrupts
416458 +55.4% 647081 interrupts.CPU45.LOC:Local_timer_interrupts
1066 ? 2% +126.3% 2413 ?102% interrupts.CPU45.RES:Rescheduling_interrupts
1556 ? 21% +399.9% 7782 ?135% interrupts.CPU46.CAL:Function_call_interrupts
416461 +55.4% 647104 interrupts.CPU46.LOC:Local_timer_interrupts
1069 +128.5% 2443 ?103% interrupts.CPU46.RES:Rescheduling_interrupts
1608 ? 17% +382.2% 7755 ?137% interrupts.CPU47.CAL:Function_call_interrupts
416508 +55.4% 647178 interrupts.CPU47.LOC:Local_timer_interrupts
1107 ? 6% +117.6% 2410 ?103% interrupts.CPU47.RES:Rescheduling_interrupts
416518 +55.4% 647258 interrupts.CPU48.LOC:Local_timer_interrupts
1166 ? 7% +111.4% 2465 ? 72% interrupts.CPU48.RES:Rescheduling_interrupts
1543 ? 20% +468.2% 8767 ?165% interrupts.CPU49.CAL:Function_call_interrupts
416446 +55.4% 647073 interrupts.CPU49.LOC:Local_timer_interrupts
1138 ? 3% +121.9% 2525 ? 67% interrupts.CPU49.RES:Rescheduling_interrupts
1591 ? 20% +446.4% 8697 ?165% interrupts.CPU5.CAL:Function_call_interrupts
416566 +55.4% 647261 interrupts.CPU5.LOC:Local_timer_interrupts
1121 ? 7% +126.5% 2539 ? 69% interrupts.CPU5.RES:Rescheduling_interrupts
416515 +55.4% 647220 interrupts.CPU50.LOC:Local_timer_interrupts
1185 ? 7% +109.9% 2488 ? 67% interrupts.CPU50.RES:Rescheduling_interrupts
1447 ? 20% +417.1% 7486 ?158% interrupts.CPU51.CAL:Function_call_interrupts
416413 +55.4% 647242 interrupts.CPU51.LOC:Local_timer_interrupts
1182 ? 7% +114.7% 2538 ? 69% interrupts.CPU51.RES:Rescheduling_interrupts
1451 ? 19% +490.6% 8569 ?166% interrupts.CPU52.CAL:Function_call_interrupts
416472 +55.4% 647221 interrupts.CPU52.LOC:Local_timer_interrupts
1109 ? 5% +125.1% 2497 ? 71% interrupts.CPU52.RES:Rescheduling_interrupts
416553 +55.4% 647279 interrupts.CPU53.LOC:Local_timer_interrupts
1225 ? 7% +104.5% 2504 ? 67% interrupts.CPU53.RES:Rescheduling_interrupts
1426 ? 20% +500.1% 8559 ?166% interrupts.CPU54.CAL:Function_call_interrupts
416586 +55.4% 647224 interrupts.CPU54.LOC:Local_timer_interrupts
1123 ? 4% +117.5% 2442 ? 70% interrupts.CPU54.RES:Rescheduling_interrupts
1501 ? 20% +490.0% 8857 ?166% interrupts.CPU55.CAL:Function_call_interrupts
416520 +55.4% 647262 interrupts.CPU55.LOC:Local_timer_interrupts
1100 ? 5% +135.6% 2591 ? 68% interrupts.CPU55.RES:Rescheduling_interrupts
416533 +55.4% 647171 interrupts.CPU56.LOC:Local_timer_interrupts
1145 ? 10% +117.3% 2489 ? 68% interrupts.CPU56.RES:Rescheduling_interrupts
1457 ? 19% +501.0% 8760 ?168% interrupts.CPU57.CAL:Function_call_interrupts
416478 +55.4% 647123 interrupts.CPU57.LOC:Local_timer_interrupts
1126 ? 2% +122.3% 2503 ? 68% interrupts.CPU57.RES:Rescheduling_interrupts
1489 ? 20% +497.6% 8900 ?167% interrupts.CPU58.CAL:Function_call_interrupts
416529 +55.4% 647196 interrupts.CPU58.LOC:Local_timer_interrupts
1196 ? 11% +108.4% 2493 ? 70% interrupts.CPU58.RES:Rescheduling_interrupts
1462 ? 18% +498.1% 8749 ?166% interrupts.CPU59.CAL:Function_call_interrupts
416588 +55.4% 647321 interrupts.CPU59.LOC:Local_timer_interrupts
1156 ? 11% +116.7% 2505 ? 71% interrupts.CPU59.RES:Rescheduling_interrupts
416339 +55.4% 647144 interrupts.CPU6.LOC:Local_timer_interrupts
1104 ? 3% +120.5% 2435 ? 69% interrupts.CPU6.RES:Rescheduling_interrupts
1460 ? 18% +502.7% 8799 ?167% interrupts.CPU60.CAL:Function_call_interrupts
416524 +55.4% 647095 interrupts.CPU60.LOC:Local_timer_interrupts
1149 ? 13% +113.6% 2455 ? 68% interrupts.CPU60.RES:Rescheduling_interrupts
1462 ? 20% +514.6% 8988 ?166% interrupts.CPU61.CAL:Function_call_interrupts
416443 +55.4% 647335 interrupts.CPU61.LOC:Local_timer_interrupts
1119 ? 4% +127.7% 2548 ? 68% interrupts.CPU61.RES:Rescheduling_interrupts
1437 ? 21% +514.1% 8825 ?167% interrupts.CPU62.CAL:Function_call_interrupts
416525 +55.4% 647258 interrupts.CPU62.LOC:Local_timer_interrupts
12300 ? 20% +21.6% 14963 ? 13% interrupts.CPU62.NMI:Non-maskable_interrupts
12300 ? 20% +21.6% 14963 ? 13% interrupts.CPU62.PMI:Performance_monitoring_interrupts
1139 ? 3% +120.3% 2511 ? 71% interrupts.CPU62.RES:Rescheduling_interrupts
1502 ? 21% +484.9% 8789 ?166% interrupts.CPU63.CAL:Function_call_interrupts
416523 +55.4% 647244 interrupts.CPU63.LOC:Local_timer_interrupts
1156 ? 5% +117.5% 2516 ? 69% interrupts.CPU63.RES:Rescheduling_interrupts
1491 ? 23% +489.6% 8792 ?166% interrupts.CPU64.CAL:Function_call_interrupts
416420 +55.4% 647313 interrupts.CPU64.LOC:Local_timer_interrupts
1148 ? 4% +117.3% 2496 ? 71% interrupts.CPU64.RES:Rescheduling_interrupts
1465 ? 19% +489.4% 8637 ?167% interrupts.CPU65.CAL:Function_call_interrupts
416459 +55.4% 647257 interrupts.CPU65.LOC:Local_timer_interrupts
1139 ? 8% +120.9% 2517 ? 73% interrupts.CPU65.RES:Rescheduling_interrupts
416424 +55.4% 647244 interrupts.CPU66.LOC:Local_timer_interrupts
1100 ? 5% +126.7% 2494 ? 68% interrupts.CPU66.RES:Rescheduling_interrupts
1528 ? 22% +465.6% 8647 ?163% interrupts.CPU67.CAL:Function_call_interrupts
416471 +55.4% 647153 interrupts.CPU67.LOC:Local_timer_interrupts
1160 ? 4% +115.3% 2499 ? 67% interrupts.CPU67.RES:Rescheduling_interrupts
1473 ? 21% +480.5% 8553 ?164% interrupts.CPU68.CAL:Function_call_interrupts
416589 +55.4% 647222 interrupts.CPU68.LOC:Local_timer_interrupts
1150 ? 6% +121.4% 2548 ? 69% interrupts.CPU68.RES:Rescheduling_interrupts
1470 ? 18% +491.7% 8698 ?167% interrupts.CPU69.CAL:Function_call_interrupts
416472 +55.4% 647218 interrupts.CPU69.LOC:Local_timer_interrupts
1145 ? 10% +112.9% 2439 ? 72% interrupts.CPU69.RES:Rescheduling_interrupts
416362 +55.5% 647275 interrupts.CPU7.LOC:Local_timer_interrupts
1117 ? 4% +124.0% 2502 ? 70% interrupts.CPU7.RES:Rescheduling_interrupts
1515 ? 19% +472.2% 8672 ?167% interrupts.CPU70.CAL:Function_call_interrupts
416478 +55.4% 647269 interrupts.CPU70.LOC:Local_timer_interrupts
1156 ? 7% +113.0% 2464 ? 66% interrupts.CPU70.RES:Rescheduling_interrupts
1517 ? 21% +480.4% 8805 ?167% interrupts.CPU71.CAL:Function_call_interrupts
416343 +55.4% 647006 interrupts.CPU71.LOC:Local_timer_interrupts
1151 ? 10% +124.3% 2582 ? 68% interrupts.CPU71.RES:Rescheduling_interrupts
1503 ? 21% +404.4% 7581 ?139% interrupts.CPU72.CAL:Function_call_interrupts
416446 +55.4% 647167 interrupts.CPU72.LOC:Local_timer_interrupts
1038 ? 3% +123.7% 2322 ?108% interrupts.CPU72.RES:Rescheduling_interrupts
1548 ? 13% +403.5% 7793 ?138% interrupts.CPU73.CAL:Function_call_interrupts
416543 +55.4% 647122 interrupts.CPU73.LOC:Local_timer_interrupts
1069 ? 8% +122.3% 2377 ?105% interrupts.CPU73.RES:Rescheduling_interrupts
1536 ? 20% +406.2% 7774 ?137% interrupts.CPU74.CAL:Function_call_interrupts
416547 +55.4% 647126 interrupts.CPU74.LOC:Local_timer_interrupts
1074 ? 2% +123.1% 2396 ?104% interrupts.CPU74.RES:Rescheduling_interrupts
1582 ? 16% +381.4% 7618 ?136% interrupts.CPU75.CAL:Function_call_interrupts
416481 +55.4% 647193 interrupts.CPU75.LOC:Local_timer_interrupts
11425 ? 17% +30.9% 14959 ? 13% interrupts.CPU75.NMI:Non-maskable_interrupts
11425 ? 17% +30.9% 14959 ? 13% interrupts.CPU75.PMI:Performance_monitoring_interrupts
1114 ? 5% +113.0% 2373 ?102% interrupts.CPU75.RES:Rescheduling_interrupts
1573 ? 19% +394.1% 7774 ?135% interrupts.CPU76.CAL:Function_call_interrupts
416497 +55.4% 647201 interrupts.CPU76.LOC:Local_timer_interrupts
12306 ? 20% +28.7% 15839 interrupts.CPU76.NMI:Non-maskable_interrupts
12306 ? 20% +28.7% 15839 interrupts.CPU76.PMI:Performance_monitoring_interrupts
1095 ? 5% +119.5% 2404 ?106% interrupts.CPU76.RES:Rescheduling_interrupts
1535 ? 20% +399.6% 7668 ?135% interrupts.CPU77.CAL:Function_call_interrupts
416525 +55.4% 647160 interrupts.CPU77.LOC:Local_timer_interrupts
1053 ? 2% +126.4% 2384 ?103% interrupts.CPU77.RES:Rescheduling_interrupts
1564 ? 18% +398.4% 7796 ?139% interrupts.CPU78.CAL:Function_call_interrupts
416532 +55.4% 647161 interrupts.CPU78.LOC:Local_timer_interrupts
1104 ? 4% +120.1% 2430 ?106% interrupts.CPU78.RES:Rescheduling_interrupts
1518 ? 18% +420.0% 7896 ?135% interrupts.CPU79.CAL:Function_call_interrupts
416530 +55.4% 647216 interrupts.CPU79.LOC:Local_timer_interrupts
1077 ? 3% +134.0% 2521 ? 97% interrupts.CPU79.RES:Rescheduling_interrupts
1518 ? 18% +474.0% 8716 ?163% interrupts.CPU8.CAL:Function_call_interrupts
416466 +55.4% 647247 interrupts.CPU8.LOC:Local_timer_interrupts
1168 ? 14% +111.6% 2472 ? 67% interrupts.CPU8.RES:Rescheduling_interrupts
1570 ? 18% +384.6% 7610 ?136% interrupts.CPU80.CAL:Function_call_interrupts
416488 +55.4% 647236 interrupts.CPU80.LOC:Local_timer_interrupts
1069 ? 4% +124.7% 2402 ?103% interrupts.CPU80.RES:Rescheduling_interrupts
1543 ? 21% +392.8% 7608 ?138% interrupts.CPU81.CAL:Function_call_interrupts
416539 +55.4% 647193 interrupts.CPU81.LOC:Local_timer_interrupts
1069 ? 4% +128.8% 2446 ?101% interrupts.CPU81.RES:Rescheduling_interrupts
1563 ? 20% +396.0% 7754 ?137% interrupts.CPU82.CAL:Function_call_interrupts
416581 +55.4% 647221 interrupts.CPU82.LOC:Local_timer_interrupts
1045 ? 3% +136.1% 2467 ?103% interrupts.CPU82.RES:Rescheduling_interrupts
1519 ? 20% +415.9% 7838 ?137% interrupts.CPU83.CAL:Function_call_interrupts
416568 +55.4% 647195 interrupts.CPU83.LOC:Local_timer_interrupts
1056 ? 4% +137.7% 2511 ?101% interrupts.CPU83.RES:Rescheduling_interrupts
1553 ? 19% +398.9% 7748 ?136% interrupts.CPU84.CAL:Function_call_interrupts
416538 +55.4% 647223 interrupts.CPU84.LOC:Local_timer_interrupts
1079 ? 3% +124.5% 2422 ?102% interrupts.CPU84.RES:Rescheduling_interrupts
1594 ? 17% +385.9% 7746 ?137% interrupts.CPU85.CAL:Function_call_interrupts
416624 +55.3% 647208 interrupts.CPU85.LOC:Local_timer_interrupts
1052 ? 2% +130.2% 2422 ?106% interrupts.CPU85.RES:Rescheduling_interrupts
1551 ? 19% +402.6% 7797 ?138% interrupts.CPU86.CAL:Function_call_interrupts
416566 +55.4% 647183 interrupts.CPU86.LOC:Local_timer_interrupts
1083 ? 3% +128.2% 2472 ?104% interrupts.CPU86.RES:Rescheduling_interrupts
1468 ? 9% +421.0% 7649 ?136% interrupts.CPU87.CAL:Function_call_interrupts
416596 +55.3% 647160 interrupts.CPU87.LOC:Local_timer_interrupts
1067 ? 4% +125.3% 2404 ?103% interrupts.CPU87.RES:Rescheduling_interrupts
1559 ? 20% +394.9% 7719 ?137% interrupts.CPU88.CAL:Function_call_interrupts
416507 +55.4% 647127 interrupts.CPU88.LOC:Local_timer_interrupts
1059 ? 2% +126.5% 2399 ?103% interrupts.CPU88.RES:Rescheduling_interrupts
1522 ? 21% +410.1% 7768 ?136% interrupts.CPU89.CAL:Function_call_interrupts
416610 +55.3% 647164 interrupts.CPU89.LOC:Local_timer_interrupts
1095 ? 3% +122.1% 2432 ?102% interrupts.CPU89.RES:Rescheduling_interrupts
416550 +55.4% 647335 interrupts.CPU9.LOC:Local_timer_interrupts
1088 ? 3% +128.2% 2482 ? 72% interrupts.CPU9.RES:Rescheduling_interrupts
1573 ? 18% +389.4% 7697 ?137% interrupts.CPU90.CAL:Function_call_interrupts
416545 +55.4% 647169 interrupts.CPU90.LOC:Local_timer_interrupts
1063 ? 4% +127.2% 2416 ?103% interrupts.CPU90.RES:Rescheduling_interrupts
1525 ? 21% +409.8% 7774 ?137% interrupts.CPU91.CAL:Function_call_interrupts
416600 +55.4% 647198 interrupts.CPU91.LOC:Local_timer_interrupts
1057 ? 5% +127.5% 2404 ?104% interrupts.CPU91.RES:Rescheduling_interrupts
1538 ? 20% +402.4% 7728 ?135% interrupts.CPU92.CAL:Function_call_interrupts
416603 +55.4% 647215 interrupts.CPU92.LOC:Local_timer_interrupts
1059 ? 3% +128.8% 2423 ?104% interrupts.CPU92.RES:Rescheduling_interrupts
1563 ? 19% +393.8% 7719 ?135% interrupts.CPU93.CAL:Function_call_interrupts
416547 +55.4% 647217 interrupts.CPU93.LOC:Local_timer_interrupts
1105 ? 4% +120.6% 2439 ?101% interrupts.CPU93.RES:Rescheduling_interrupts
1517 ? 20% +412.7% 7778 ?134% interrupts.CPU94.CAL:Function_call_interrupts
416529 +55.4% 647210 interrupts.CPU94.LOC:Local_timer_interrupts
1066 ? 5% +131.2% 2464 ?103% interrupts.CPU94.RES:Rescheduling_interrupts
1630 ? 19% +383.1% 7876 ?136% interrupts.CPU95.CAL:Function_call_interrupts
416491 +55.4% 647205 interrupts.CPU95.LOC:Local_timer_interrupts
1187 ? 2% +113.1% 2530 ? 97% interrupts.CPU95.RES:Rescheduling_interrupts
39984058 +55.4% 62130055 interrupts.LOC:Local_timer_interrupts
107229 ? 2% +121.6% 237657 ? 85% interrupts.RES:Rescheduling_interrupts
69.17 ? 62% +236.9% 233.00 ? 42% interrupts.TLB:TLB_shootdowns



aim7.jobs-per-min

95000 +-------------------------------------------------------------------+
| +. ++. + |
90000 |+.+++.+ + .+++.+ + +.++.++ :+. + .+ ++.++ .+++.+ .+ + .+|
85000 |-+ ++ + +.+ + + +.+ + ++ +.+ + |
| |
80000 |-+ |
75000 |-+ |
| |
70000 |-+ |
65000 |-+ |
| |
60000 |-+ |
55000 |O+OOO OO O O OOO O O OOO OO OOO O |
| O O O |
50000 +-------------------------------------------------------------------+


perf-sched.total_wait_time.average.ms

200 +---------------------------------------------------------------------+
180 |+.+ +. +.+ .+ .+ +. + +.+ +.+ .+ .+ + .+|
| +.++ + ++. + ++. + + + + +.++.+++.+ ++.+ ++ + + + + |
160 |-+ + + ++ + |
140 |-+ |
| |
120 |-+ O O |
100 |-+O OO OO OO O O O O OO OO |
80 |-+ |
| |
60 |-+ |
40 |-+ |
| |
20 |O+ O O O O O O O |
0 +---------------------------------------------------------------------+


perf-sched.total_wait_and_delay.count.ms

2.5e+06 +-----------------------------------------------------------------+
|O O O O |
| |
2e+06 |-+ |
| |
| |
1.5e+06 |-+ |
| |
1e+06 |-+ |
| |
| |
500000 |-+ |
| |
|+.+++.+++.+++.++++.+++.+++.+++.+++.+++.+++.+++.++++.+++.+++.+++.+|
0 +-----------------------------------------------------------------+


perf-sched.total_wait_and_delay.average.ms

200 +---------------------------------------------------------------------+
180 |+.+ +. +.+ .+ .+ +. +. +.+ +.+ .+ .+ + .+|
| +.++ + ++. + ++. + + + + +.++.+++.+ ++.+ ++ + + + + |
160 |-+ + + + + |
140 |-+ |
| |
120 |-+ O O O |
100 |-+O OO OO OO O O O O O OO |
80 |-+ |
| |
60 |-+ |
40 |-+ |
| |
20 |O+ O O O O O O O |
0 +---------------------------------------------------------------------+




45000 +-------------------------------------------------------------------+
| O |
40000 |O+ O O O |
35000 |-+ O O O |
| |
30000 |-+ |
25000 |-+ |
| |
20000 |-+ |
15000 |-+ O O O |
| O O O OO OO OO OO OO O |
10000 |-+ |
5000 |-+ + |
|+.+++.++.+++.+++.+++.+++.++.+++.+ +.+++.++.+++.+++.+++.+++.++.+++.+|
0 +-------------------------------------------------------------------+




5 +---------------------------------------------------------------------+
4.5 |O+ O O |
| O O O O O |
4 |-+ |
3.5 |-+ + |
| : |
3 |-+ : |
2.5 |-+ :: |
2 |-+ :: |
| :: |
1.5 |-+ : : |
1 |-+O OO OO OO O O O O OO OO : : |
| O O : : |
0.5 |-.+ : : .+ .+ .+|
0 +---------------------------------------------------------------------+




6000 +--------------------------------------------------------------------+
| |
5000 |-+ + |
| : + |
| : : |
4000 |-+ :: : |
| :: : |
3000 |-+ :: : |
| :: : |
2000 |-+ : : : : |
| : : : : |
| : : : : |
1000 |-+ : : : : |
| : : : : |
0 +--------------------------------------------------------------------+




25 +----------------------------------------------------------------------+
| + +. +. |
| :+ ++ .+ : + : + |
20 |-: + +. + + + .++. +++ : + : :.+ : :.+ + |
|:: ++.+++.+ + +. + +++ + + +. + +: + : + + .+ + +.+|
|+ + + + + + + + |
15 |-+ |
| |
10 |-+ |
| |
| |
5 |-+ OOO O O O O O |
| O O O O OO O O O |
|O O O O O O O O |
0 +----------------------------------------------------------------------+




14 +----------------------------------------------------------------------+
| + : :: + + + ++.+ + + + + + |
12 |-+ : :.+ .++. : : .+ + ::+ :+ : .++.+ +. + + + ++ .+ + + |
|+.+ + ++ ++ ++ + + + ++ + ++ ++ + + + |
10 |-+ +|
| |
8 |-+ |
| |
6 |-+O O O OO O O |
| O O O O O O O |
4 |-+ O O O |
| |
2 |-+ |
| |
0 +----------------------------------------------------------------------+




200 +---------------------------------------------------------------------+
180 |-+ + +. : +|
| + +.+ +. : ++ +. + .+: : ::|
160 |+. + :+. : + +.: +.+ ++ : +.++ +. +. ++ + : : |
140 |:++ + + + : : + : : +. : + : + + :+ |
| : + : + + : +.+ : + + |
120 |-+ :+ + :: :: |
100 |-+ + + + |
80 |-+ |
| |
60 |-+ |
40 |-+ O |
| O OO O O O O O |
20 |-+ OO O O OO OO |
0 +---------------------------------------------------------------------+


aim7.time.system_time

32000 +-------------------------------------------------------------------+
| O O O |
30000 |O+OO OO O O O O O OO |
28000 |-+ O OO O OO OO O O |
| |
26000 |-+ |
| |
24000 |-+ |
| |
22000 |-+ |
20000 |-+ |
| + + +. .+ +. +.+ +. + |
18000 |+.+++.++.+ +.+++.+ +.+ +.++.++ +++ + + ++.++ +++.+ +.++.+++.+|
| + |
16000 +-------------------------------------------------------------------+


aim7.time.elapsed_time

340 +---------------------------------------------------------------------+
|O OO O OO O OO O O |
320 |-+ O O OO OO O OO OOO O O |
300 |-+ |
| |
280 |-+ |
| |
260 |-+ |
| |
240 |-+ |
220 |-+ |
| +.+ .+ +.+ .+ .+ +.+ .+ .+ ++. |
200 |+.++.+++.+ ++.++. ++ +.+++.+ ++ + + +.+++ +.+++ +.+ ++.+|
| + |
180 +---------------------------------------------------------------------+


aim7.time.elapsed_time.max

340 +---------------------------------------------------------------------+
|O OO O OO O OO O O |
320 |-+ O O OO OO O OO OOO O O |
300 |-+ |
| |
280 |-+ |
| |
260 |-+ |
| |
240 |-+ |
220 |-+ |
| +.+ .+ +.+ .+ .+ +.+ .+ .+ ++. |
200 |+.++.+++.+ ++.++. ++ +.+++.+ ++ + + +.+++ +.+++ +.+ ++.+|
| + |
180 +---------------------------------------------------------------------+


aim7.time.involuntary_context_switches

5e+06 +-----------------------------------------------------------------+
|O O O O O O O |
4.5e+06 |-+ O |
4e+06 |-+ O O O O |
| O OO O O O OO OO OO O |
3.5e+06 |-+ |
| |
3e+06 |-+ |
| |
2.5e+06 |-+ |
2e+06 |-+ |
| |
1.5e+06 |+. .+ .++ +. +. ++.+ .+ +.+++.+ ++ +.+++.+++.+|
| +++ ++ +.++++.++ ++ + ++ + ++.+ +.++ |
1e+06 +-----------------------------------------------------------------+




160 +---------------------------------------------------------------------+
|+ + + |
140 |:+ + + :+ :+ + |
120 |:++ + :.+ :+ + : + : + + :: .+|
| : + + : : + +.++ + +. :+ : : : : .+ + : : + |
100 |-+ :.++ ++ : : : :+ +.+ : + +++.+ :+.+ :+ + + :: |
| + +: ++.+ + + + + + |
80 |-+ + |
| |
60 |-+ |
40 |-+ |
| |
20 |-+ O O O OO OO |
| O O O |
0 +---------------------------------------------------------------------+




3400 +--------------------------------------------------------------------+
| + |
3200 |-+ + + + : |
|+ : : : + + +: |
3000 |:+ :: : :: : : : : |
|:: : : : : : :: :: : : .+|
2800 |-: +: :+ :: + : : + + + : : : : + + : + :|
| : :+ + + :: : + : ::.+ : : : : : : ::+ : : |
2600 |-++ : ++ : : : : ::+ : : : : + + + + :+ : : |
| : : :: + :: : : : : :: : + : : : : : |
2400 |-+ : : :: +: :: + : : : + + : : : : : |
| + : : + :: +.+ + + + : + : + |
2200 |-+ +: + :: +: +: |
| + : + + |
2000 +--------------------------------------------------------------------+




40 +----------------------------------------------------------------------+
|+ + + |
35 |:+ : : |
|: : : |
|:: : : |
30 |:: + :: : + |
|:: : :: : : |
25 |:: + :: : : :: :: |
| : : + :: : : : : :: |
20 |-: : : : : : : : : : :: |
| : + : :: + + : : : : : : + : : +|
| :+. : : .+ : :+.+ :+ :+.: :+: :.+ : :.+ .+ ::: + : |
15 |-++ + + ++.+ +. + + :: : : + + ::: + +.: + + + :: :: |
| :+ + + + +.+ + + + + + |
10 +----------------------------------------------------------------------+




3200 +--------------------------------------------------------------------+
| : |
3000 |-+ + + + +: |
|+ : : : + + : : |
2800 |:: :: : :: : :: : : |
|:: + : :+ :: + : : + + : : :: + + : .+|
2600 |-+: :+ ::: :: : : : ::.+ : + : : : : ::+ : + |
| + : + : + :: : + : :+ : : : : + : + :+ : : |
2400 |-+: : +: : + :: : : : : :: : ++ : + : : :: |
| : : :: +: :: + : : :: + + : : : : : |
2200 |-+ + : :: + :: +.+ : + + : + : + |
| +: : :: + +: +: |
2000 |-+ + + : + + |
| + |
1800 +--------------------------------------------------------------------+




5500 +--------------------------------------------------------------------+
5000 |-+ + |
| : + |
4500 |-+ : : |
4000 |-+ : : |
3500 |-+ :: : |
3000 |-+ :: : |
| :: : |
2500 |-+ : : :: |
2000 |-+ : : : : |
1500 |-+ : : : : |
1000 |-+ : : : : |
| : : : : |
500 |-+ : : : : |
0 +--------------------------------------------------------------------+




24 +----------------------------------------------------------------------+
| : :: :: |
23 |-+:: :: :: |
22 |-+:+ + : + : + |
| : : :+ .+ : : : : |
21 |-: : : : +++ : + : : + : : + |
| : : + : : + : : : : : : : : : |
20 |-: : :+ + : : : +. : + : :: :::: :: : + + |
| : : +.+ + + + :+ : + : : + :: :::: :: + :: :: |
19 |:: ++. + : : + + : : : : :: + : + : + :: : |
18 |:+ + :: : + :: : :: + +|
|: + + : : :: |
17 |:+ : : + |
|+ + + |
16 +----------------------------------------------------------------------+


[*] bisect-good sample
[O] bisect-bad sample



Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.


---
0DAY/LKP+ Test Infrastructure Open Source Technology Center
https://lists.01.org/hyperkitty/list/[email protected] Intel Corporation

Thanks,
Oliver Sang


Attachments:
(No filename) (109.44 kB)
config-5.16.0-rc1-00005-ge029ce24606a (169.41 kB)
job-script (8.18 kB)
job.yaml (5.44 kB)
reproduce (693.00 B)
Download all attachments

2021-12-02 02:39:46

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 2/6] f2fs: do not expose unwritten blocks to user by DIO

On 2021/11/17 5:45, Jaegeuk Kim wrote:
> DIO preallocates physical blocks before writing data, but if an error occurrs
> or power-cut happens, we can see block contents from the disk. This patch tries
> to fix it by 1) turning to buffered writes for DIO into holes, 2) truncating
> unwritten blocks from error or power-cut.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/data.c | 5 ++++-
> fs/f2fs/f2fs.h | 5 +++++
> fs/f2fs/file.c | 24 +++++++++++++++++++++++-
> 3 files changed, 32 insertions(+), 2 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 3b27fb7daa8b..7ac1a39fcad2 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1543,8 +1543,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
> flag != F2FS_GET_BLOCK_DIO);
> err = __allocate_data_block(&dn,
> map->m_seg_type);
> - if (!err)
> + if (!err) {
> + if (flag == F2FS_GET_BLOCK_PRE_DIO)
> + file_need_truncate(inode);
> set_inode_flag(inode, FI_APPEND_WRITE);
> + }
> }
> if (err)
> goto sync_out;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index be871a79c634..14bea669f87e 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -654,6 +654,7 @@ enum {
> #define FADVISE_KEEP_SIZE_BIT 0x10
> #define FADVISE_HOT_BIT 0x20
> #define FADVISE_VERITY_BIT 0x40
> +#define FADVISE_TRUNC_BIT 0x80
>
> #define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
>
> @@ -681,6 +682,10 @@ enum {
> #define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
> #define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
>
> +#define file_should_truncate(inode) is_file(inode, FADVISE_TRUNC_BIT)
> +#define file_need_truncate(inode) set_file(inode, FADVISE_TRUNC_BIT)
> +#define file_dont_truncate(inode) clear_file(inode, FADVISE_TRUNC_BIT)
> +
> #define DEF_DIR_LEVEL 0
>
> enum {
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 4bf77a5bf998..ec8de0662437 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -960,10 +960,21 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
> down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> filemap_invalidate_lock(inode->i_mapping);
>
> + /*
> + * Truncate stale preallocated blocks used by the previous DIO.
> + */
> + if (file_should_truncate(inode)) {
> + err = f2fs_truncate(inode);
> + if (err)
> + goto out_unlock;
> + file_dont_truncate(inode);
> + }
> +
> truncate_setsize(inode, attr->ia_size);
>
> if (attr->ia_size <= old_size)
> err = f2fs_truncate(inode);
> +out_unlock:
> /*
> * do not trim all blocks after i_size if target size is
> * larger than i_size.
> @@ -4257,6 +4268,13 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
> /* If it will be an out-of-place direct write, don't bother. */
> if (dio && f2fs_lfs_mode(sbi))
> return 0;
> + /*
> + * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
> + * buffered IO, if DIO meets any holes.
> + */
> + if (dio && i_size_read(inode) &&
> + (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
> + return 0;
>
> /* No-wait I/O can't allocate blocks. */
> if (iocb->ki_flags & IOCB_NOWAIT)
> @@ -4366,10 +4384,14 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> if (preallocated > 0 && i_size_read(inode) < target_size) {
> down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> filemap_invalidate_lock(inode->i_mapping);
> - f2fs_truncate(inode);
> + if (!f2fs_truncate(inode))
> + file_dont_truncate(inode);
> filemap_invalidate_unlock(inode->i_mapping);
> up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> + } else {
> + file_dont_truncate(inode);

How about this case:

- touch file
- DIO write [0, 8kb] to file
- preallocate 2 physical blocks
- set FADVISE_TRUNC_BIT
- SPO
- BUFIO write [0, 4kb] to file
- file_dont_truncate -- it leaks unwritten [4kb, 8kb] to user after
truncating file to 8kb

Thanks,

> }
> +
> clear_inode_flag(inode, FI_PREALLOCATED_ALL);
>
> if (ret > 0)
>

2021-12-02 02:58:49

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 3/6] f2fs: reduce indentation in f2fs_file_write_iter()

On 2021/11/17 5:45, Jaegeuk Kim wrote:
> From: Eric Biggers <[email protected]>
>
> Replace 'if (ret > 0)' with 'if (ret <= 0) goto out_unlock;'.
> No change in behavior.
>
> Signed-off-by: Eric Biggers <[email protected]>
> Signed-off-by: Jaegeuk Kim <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

Thanks,

2021-12-02 02:59:31

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 4/6] f2fs: fix the f2fs_file_write_iter tracepoint

On 2021/11/17 5:45, Jaegeuk Kim wrote:
> From: Eric Biggers <[email protected]>
>
> Pass in the original position and count rather than the position and
> count that were updated by the write. Also use the correct types for
> all arguments, in particular the file offset which was being truncated
> to 32 bits on 32-bit platforms.
>
> Signed-off-by: Eric Biggers <[email protected]>
> Signed-off-by: Jaegeuk Kim <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

Thanks,

2021-12-02 03:10:49

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 5/6] f2fs: implement iomap operations

On 2021/11/17 5:45, Jaegeuk Kim wrote:
> From: Eric Biggers <[email protected]>
>
> Implement 'struct iomap_ops' for f2fs, in preparation for making f2fs
> use iomap for direct I/O.
>
> Note that this may be used for other things besides direct I/O in the
> future; however, for now I've only tested it for direct I/O.
>
> Signed-off-by: Eric Biggers <[email protected]>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/Kconfig | 1 +
> fs/f2fs/data.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++
> fs/f2fs/f2fs.h | 1 +
> 3 files changed, 60 insertions(+)
>
> diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
> index 7eea3cfd894d..f46a7339d6cf 100644
> --- a/fs/f2fs/Kconfig
> +++ b/fs/f2fs/Kconfig
> @@ -7,6 +7,7 @@ config F2FS_FS
> select CRYPTO_CRC32
> select F2FS_FS_XATTR if FS_ENCRYPTION
> select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
> + select FS_IOMAP
> select LZ4_COMPRESS if F2FS_FS_LZ4
> select LZ4_DECOMPRESS if F2FS_FS_LZ4
> select LZ4HC_COMPRESS if F2FS_FS_LZ4HC
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 7ac1a39fcad2..43b3ca7cabe0 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -21,6 +21,7 @@
> #include <linux/cleancache.h>
> #include <linux/sched/signal.h>
> #include <linux/fiemap.h>
> +#include <linux/iomap.h>
>
> #include "f2fs.h"
> #include "node.h"
> @@ -4236,3 +4237,60 @@ void f2fs_destroy_bio_entry_cache(void)
> {
> kmem_cache_destroy(bio_entry_slab);
> }
> +
> +static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> + unsigned int flags, struct iomap *iomap,
> + struct iomap *srcmap)
> +{
> + struct f2fs_map_blocks map = {};
> + pgoff_t next_pgofs = 0;
> + int err;
> +
> + map.m_lblk = bytes_to_blks(inode, offset);
> + map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
> + map.m_next_pgofs = &next_pgofs;
> + map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
> + if (flags & IOMAP_WRITE)
> + map.m_may_create = true;
> +
> + err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE,
> + F2FS_GET_BLOCK_DIO);
> + if (err)
> + return err;
> +
> + iomap->offset = blks_to_bytes(inode, map.m_lblk);
> +
> + if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) {
> + iomap->length = blks_to_bytes(inode, map.m_len);
> + if (map.m_flags & F2FS_MAP_MAPPED) {
> + iomap->type = IOMAP_MAPPED;
> + iomap->flags |= IOMAP_F_MERGED;
> + } else {
> + iomap->type = IOMAP_UNWRITTEN;
> + }
> + if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
> + return -EINVAL;
> + iomap->addr = blks_to_bytes(inode, map.m_pblk);
> +
> + if (WARN_ON_ONCE(f2fs_is_multi_device(F2FS_I_SB(inode))))
> + return -EINVAL;

Why not relocating this check before f2fs_map_blocks()?

Thanks,

> + iomap->bdev = inode->i_sb->s_bdev;
> + } else {
> + iomap->length = blks_to_bytes(inode, next_pgofs) -
> + iomap->offset;
> + iomap->type = IOMAP_HOLE;
> + iomap->addr = IOMAP_NULL_ADDR;
> + }
> +
> + if (map.m_flags & F2FS_MAP_NEW)
> + iomap->flags |= IOMAP_F_NEW;
> + if ((inode->i_state & I_DIRTY_DATASYNC) ||
> + offset + length > i_size_read(inode))
> + iomap->flags |= IOMAP_F_DIRTY;
> +
> + return 0;
> +}
> +
> +const struct iomap_ops f2fs_iomap_ops = {
> + .iomap_begin = f2fs_iomap_begin,
> +};
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 14bea669f87e..0d199e8f2c1d 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -3654,6 +3654,7 @@ int f2fs_init_post_read_processing(void);
> void f2fs_destroy_post_read_processing(void);
> int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
> void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
> +extern const struct iomap_ops f2fs_iomap_ops;
>
> /*
> * gc.c
>

2021-12-02 04:16:25

by Eric Biggers

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 5/6] f2fs: implement iomap operations

On Thu, Dec 02, 2021 at 11:10:41AM +0800, Chao Yu wrote:
> > +static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> > + unsigned int flags, struct iomap *iomap,
> > + struct iomap *srcmap)
> > +{
> > + struct f2fs_map_blocks map = {};
> > + pgoff_t next_pgofs = 0;
> > + int err;
> > +
> > + map.m_lblk = bytes_to_blks(inode, offset);
> > + map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
> > + map.m_next_pgofs = &next_pgofs;
> > + map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
> > + if (flags & IOMAP_WRITE)
> > + map.m_may_create = true;
> > +
> > + err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE,
> > + F2FS_GET_BLOCK_DIO);
> > + if (err)
> > + return err;
> > +
> > + iomap->offset = blks_to_bytes(inode, map.m_lblk);
> > +
> > + if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) {
> > + iomap->length = blks_to_bytes(inode, map.m_len);
> > + if (map.m_flags & F2FS_MAP_MAPPED) {
> > + iomap->type = IOMAP_MAPPED;
> > + iomap->flags |= IOMAP_F_MERGED;
> > + } else {
> > + iomap->type = IOMAP_UNWRITTEN;
> > + }
> > + if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
> > + return -EINVAL;
> > + iomap->addr = blks_to_bytes(inode, map.m_pblk);
> > +
> > + if (WARN_ON_ONCE(f2fs_is_multi_device(F2FS_I_SB(inode))))
> > + return -EINVAL;
>
> Why not relocating this check before f2fs_map_blocks()?
>
> Thanks,
>
> > + iomap->bdev = inode->i_sb->s_bdev;

Are you talking about the check for !f2fs_is_multi_device()? It could go in
either location, but I think it makes the most sense to have it right before the
line that uses 'inode->i_sb->s_bdev', since that is the place which makes the
assumption that the filesystem has only one block device.

- Eric

2021-12-02 14:04:21

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 5/6] f2fs: implement iomap operations

On 2021/12/2 12:15, Eric Biggers wrote:
> On Thu, Dec 02, 2021 at 11:10:41AM +0800, Chao Yu wrote:
>> Why not relocating this check before f2fs_map_blocks()?

Wait, it supports DIO in multi-device image after commit 71f2c8206202
("f2fs: multidevice: support direct IO"), how about
checking with f2fs_allow_multi_device_dio()?

Thanks,

>>
>> Thanks,
>>
>>> + iomap->bdev = inode->i_sb->s_bdev;
>
> Are you talking about the check for !f2fs_is_multi_device()? It could go in
> either location, but I think it makes the most sense to have it right before the
> line that uses 'inode->i_sb->s_bdev', since that is the place which makes the
> assumption that the filesystem has only one block device.
>
> - Eric
>

2021-12-02 18:13:14

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 2/6] f2fs: do not expose unwritten blocks to user by DIO

On 12/02, Chao Yu wrote:
> On 2021/11/17 5:45, Jaegeuk Kim wrote:
> > DIO preallocates physical blocks before writing data, but if an error occurrs
> > or power-cut happens, we can see block contents from the disk. This patch tries
> > to fix it by 1) turning to buffered writes for DIO into holes, 2) truncating
> > unwritten blocks from error or power-cut.
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > fs/f2fs/data.c | 5 ++++-
> > fs/f2fs/f2fs.h | 5 +++++
> > fs/f2fs/file.c | 24 +++++++++++++++++++++++-
> > 3 files changed, 32 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 3b27fb7daa8b..7ac1a39fcad2 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -1543,8 +1543,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
> > flag != F2FS_GET_BLOCK_DIO);
> > err = __allocate_data_block(&dn,
> > map->m_seg_type);
> > - if (!err)
> > + if (!err) {
> > + if (flag == F2FS_GET_BLOCK_PRE_DIO)
> > + file_need_truncate(inode);
> > set_inode_flag(inode, FI_APPEND_WRITE);
> > + }
> > }
> > if (err)
> > goto sync_out;
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index be871a79c634..14bea669f87e 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -654,6 +654,7 @@ enum {
> > #define FADVISE_KEEP_SIZE_BIT 0x10
> > #define FADVISE_HOT_BIT 0x20
> > #define FADVISE_VERITY_BIT 0x40
> > +#define FADVISE_TRUNC_BIT 0x80
> > #define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
> > @@ -681,6 +682,10 @@ enum {
> > #define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
> > #define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
> > +#define file_should_truncate(inode) is_file(inode, FADVISE_TRUNC_BIT)
> > +#define file_need_truncate(inode) set_file(inode, FADVISE_TRUNC_BIT)
> > +#define file_dont_truncate(inode) clear_file(inode, FADVISE_TRUNC_BIT)
> > +
> > #define DEF_DIR_LEVEL 0
> > enum {
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 4bf77a5bf998..ec8de0662437 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -960,10 +960,21 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
> > down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> > filemap_invalidate_lock(inode->i_mapping);
> > + /*
> > + * Truncate stale preallocated blocks used by the previous DIO.
> > + */
> > + if (file_should_truncate(inode)) {
> > + err = f2fs_truncate(inode);
> > + if (err)
> > + goto out_unlock;
> > + file_dont_truncate(inode);
> > + }
> > +
> > truncate_setsize(inode, attr->ia_size);
> > if (attr->ia_size <= old_size)
> > err = f2fs_truncate(inode);
> > +out_unlock:
> > /*
> > * do not trim all blocks after i_size if target size is
> > * larger than i_size.
> > @@ -4257,6 +4268,13 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
> > /* If it will be an out-of-place direct write, don't bother. */
> > if (dio && f2fs_lfs_mode(sbi))
> > return 0;
> > + /*
> > + * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
> > + * buffered IO, if DIO meets any holes.
> > + */
> > + if (dio && i_size_read(inode) &&
> > + (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
> > + return 0;
> > /* No-wait I/O can't allocate blocks. */
> > if (iocb->ki_flags & IOCB_NOWAIT)
> > @@ -4366,10 +4384,14 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > if (preallocated > 0 && i_size_read(inode) < target_size) {
> > down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> > filemap_invalidate_lock(inode->i_mapping);
> > - f2fs_truncate(inode);
> > + if (!f2fs_truncate(inode))
> > + file_dont_truncate(inode);
> > filemap_invalidate_unlock(inode->i_mapping);
> > up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> > + } else {
> > + file_dont_truncate(inode);
>
> How about this case:
>
> - touch file
> - DIO write [0, 8kb] to file
> - preallocate 2 physical blocks
> - set FADVISE_TRUNC_BIT
> - SPO
> - BUFIO write [0, 4kb] to file
> - file_dont_truncate -- it leaks unwritten [4kb, 8kb] to user after
> truncating file to 8kb

i_size should be 4kb, no?

>
> Thanks,
>
> > }
> > +
> > clear_inode_flag(inode, FI_PREALLOCATED_ALL);
> > if (ret > 0)
> >

2021-12-02 18:25:56

by Eric Biggers

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 5/6] f2fs: implement iomap operations

On Thu, Dec 02, 2021 at 10:04:11PM +0800, Chao Yu wrote:
> On 2021/12/2 12:15, Eric Biggers wrote:
> > On Thu, Dec 02, 2021 at 11:10:41AM +0800, Chao Yu wrote:
> > > Why not relocating this check before f2fs_map_blocks()?
>
> Wait, it supports DIO in multi-device image after commit 71f2c8206202
> ("f2fs: multidevice: support direct IO"), how about
> checking with f2fs_allow_multi_device_dio()?
>
> Thanks,
>

Okay, that was not the case when I sent this patch originally. We'll need to
update this to support multiple devices.

- Eric

2021-12-02 18:58:00

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 5/6] f2fs: implement iomap operations

On 12/02, Eric Biggers wrote:
> On Thu, Dec 02, 2021 at 10:04:11PM +0800, Chao Yu wrote:
> > On 2021/12/2 12:15, Eric Biggers wrote:
> > > On Thu, Dec 02, 2021 at 11:10:41AM +0800, Chao Yu wrote:
> > > > Why not relocating this check before f2fs_map_blocks()?
> >
> > Wait, it supports DIO in multi-device image after commit 71f2c8206202
> > ("f2fs: multidevice: support direct IO"), how about
> > checking with f2fs_allow_multi_device_dio()?
> >
> > Thanks,
> >
>
> Okay, that was not the case when I sent this patch originally. We'll need to
> update this to support multiple devices.

Chao/Eric, does this make sense?

--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -4070,11 +4070,10 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
}
if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
return -EINVAL;
- iomap->addr = blks_to_bytes(inode, map.m_pblk);

- if (WARN_ON_ONCE(f2fs_is_multi_device(F2FS_I_SB(inode))))
- return -EINVAL;
- iomap->bdev = inode->i_sb->s_bdev;
+ iomap->bdev = map->m_multidev_dio ? map.m_bdev :
+ inode->i_sb->s_bdev;
+ iomap->addr = blks_to_bytes(inode, map.m_pblk);
} else {
iomap->length = blks_to_bytes(inode, next_pgofs) -
iomap->offset;

>
> - Eric

2021-12-02 19:00:54

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 5/6] f2fs: implement iomap operations

On 12/02, Jaegeuk Kim wrote:
> On 12/02, Eric Biggers wrote:
> > On Thu, Dec 02, 2021 at 10:04:11PM +0800, Chao Yu wrote:
> > > On 2021/12/2 12:15, Eric Biggers wrote:
> > > > On Thu, Dec 02, 2021 at 11:10:41AM +0800, Chao Yu wrote:
> > > > > Why not relocating this check before f2fs_map_blocks()?
> > >
> > > Wait, it supports DIO in multi-device image after commit 71f2c8206202
> > > ("f2fs: multidevice: support direct IO"), how about
> > > checking with f2fs_allow_multi_device_dio()?
> > >
> > > Thanks,
> > >
> >
> > Okay, that was not the case when I sent this patch originally. We'll need to
> > update this to support multiple devices.
>
> Chao/Eric, does this make sense?
>
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -4070,11 +4070,10 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> }
> if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
> return -EINVAL;
> - iomap->addr = blks_to_bytes(inode, map.m_pblk);
>
> - if (WARN_ON_ONCE(f2fs_is_multi_device(F2FS_I_SB(inode))))
> - return -EINVAL;
> - iomap->bdev = inode->i_sb->s_bdev;
> + iomap->bdev = map->m_multidev_dio ? map.m_bdev :

correction: map.m_multidev_dio

> + inode->i_sb->s_bdev;
> + iomap->addr = blks_to_bytes(inode, map.m_pblk);
> } else {
> iomap->length = blks_to_bytes(inode, next_pgofs) -
> iomap->offset;
>
> >
> > - Eric
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2021-12-02 20:58:13

by Eric Biggers

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 5/6] f2fs: implement iomap operations

On Thu, Dec 02, 2021 at 11:00:47AM -0800, Jaegeuk Kim wrote:
> On 12/02, Jaegeuk Kim wrote:
> > On 12/02, Eric Biggers wrote:
> > > On Thu, Dec 02, 2021 at 10:04:11PM +0800, Chao Yu wrote:
> > > > On 2021/12/2 12:15, Eric Biggers wrote:
> > > > > On Thu, Dec 02, 2021 at 11:10:41AM +0800, Chao Yu wrote:
> > > > > > Why not relocating this check before f2fs_map_blocks()?
> > > >
> > > > Wait, it supports DIO in multi-device image after commit 71f2c8206202
> > > > ("f2fs: multidevice: support direct IO"), how about
> > > > checking with f2fs_allow_multi_device_dio()?
> > > >
> > > > Thanks,
> > > >
> > >
> > > Okay, that was not the case when I sent this patch originally. We'll need to
> > > update this to support multiple devices.
> >
> > Chao/Eric, does this make sense?
> >
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -4070,11 +4070,10 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> > }
> > if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
> > return -EINVAL;
> > - iomap->addr = blks_to_bytes(inode, map.m_pblk);
> >
> > - if (WARN_ON_ONCE(f2fs_is_multi_device(F2FS_I_SB(inode))))
> > - return -EINVAL;
> > - iomap->bdev = inode->i_sb->s_bdev;
> > + iomap->bdev = map->m_multidev_dio ? map.m_bdev :
>
> correction: map.m_multidev_dio
>

I guess so, but why doesn't f2fs_map_blocks() just always set m_bdev to the
correct block device? What is the point of m_multidev_dio?

- Eric

2021-12-02 21:19:53

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 5/6] f2fs: implement iomap operations

On 12/02, Eric Biggers wrote:
> On Thu, Dec 02, 2021 at 11:00:47AM -0800, Jaegeuk Kim wrote:
> > On 12/02, Jaegeuk Kim wrote:
> > > On 12/02, Eric Biggers wrote:
> > > > On Thu, Dec 02, 2021 at 10:04:11PM +0800, Chao Yu wrote:
> > > > > On 2021/12/2 12:15, Eric Biggers wrote:
> > > > > > On Thu, Dec 02, 2021 at 11:10:41AM +0800, Chao Yu wrote:
> > > > > > > Why not relocating this check before f2fs_map_blocks()?
> > > > >
> > > > > Wait, it supports DIO in multi-device image after commit 71f2c8206202
> > > > > ("f2fs: multidevice: support direct IO"), how about
> > > > > checking with f2fs_allow_multi_device_dio()?
> > > > >
> > > > > Thanks,
> > > > >
> > > >
> > > > Okay, that was not the case when I sent this patch originally. We'll need to
> > > > update this to support multiple devices.
> > >
> > > Chao/Eric, does this make sense?
> > >
> > > --- a/fs/f2fs/data.c
> > > +++ b/fs/f2fs/data.c
> > > @@ -4070,11 +4070,10 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> > > }
> > > if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
> > > return -EINVAL;
> > > - iomap->addr = blks_to_bytes(inode, map.m_pblk);
> > >
> > > - if (WARN_ON_ONCE(f2fs_is_multi_device(F2FS_I_SB(inode))))
> > > - return -EINVAL;
> > > - iomap->bdev = inode->i_sb->s_bdev;
> > > + iomap->bdev = map->m_multidev_dio ? map.m_bdev :
> >
> > correction: map.m_multidev_dio
> >
>
> I guess so, but why doesn't f2fs_map_blocks() just always set m_bdev to the
> correct block device? What is the point of m_multidev_dio?

It seems we can simply assign iomap->bdev = map.m_bdev, and remove
map->m_multidev_dio.

>
> - Eric

2021-12-02 21:28:43

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 5/6] f2fs: implement iomap operations

On 12/02, Jaegeuk Kim wrote:
> On 12/02, Eric Biggers wrote:
> > On Thu, Dec 02, 2021 at 11:00:47AM -0800, Jaegeuk Kim wrote:
> > > On 12/02, Jaegeuk Kim wrote:
> > > > On 12/02, Eric Biggers wrote:
> > > > > On Thu, Dec 02, 2021 at 10:04:11PM +0800, Chao Yu wrote:
> > > > > > On 2021/12/2 12:15, Eric Biggers wrote:
> > > > > > > On Thu, Dec 02, 2021 at 11:10:41AM +0800, Chao Yu wrote:
> > > > > > > > Why not relocating this check before f2fs_map_blocks()?
> > > > > >
> > > > > > Wait, it supports DIO in multi-device image after commit 71f2c8206202
> > > > > > ("f2fs: multidevice: support direct IO"), how about
> > > > > > checking with f2fs_allow_multi_device_dio()?
> > > > > >
> > > > > > Thanks,
> > > > > >
> > > > >
> > > > > Okay, that was not the case when I sent this patch originally. We'll need to
> > > > > update this to support multiple devices.
> > > >
> > > > Chao/Eric, does this make sense?
> > > >
> > > > --- a/fs/f2fs/data.c
> > > > +++ b/fs/f2fs/data.c
> > > > @@ -4070,11 +4070,10 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> > > > }
> > > > if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
> > > > return -EINVAL;
> > > > - iomap->addr = blks_to_bytes(inode, map.m_pblk);
> > > >
> > > > - if (WARN_ON_ONCE(f2fs_is_multi_device(F2FS_I_SB(inode))))
> > > > - return -EINVAL;
> > > > - iomap->bdev = inode->i_sb->s_bdev;
> > > > + iomap->bdev = map->m_multidev_dio ? map.m_bdev :
> > >
> > > correction: map.m_multidev_dio
> > >
> >
> > I guess so, but why doesn't f2fs_map_blocks() just always set m_bdev to the
> > correct block device? What is the point of m_multidev_dio?
>
> It seems we can simply assign iomap->bdev = map.m_bdev, and remove
> map->m_multidev_dio.

Ok, it was used in previous get_block flow, but I think it'd be worth to keep it
to show by f2fs_map_blocks tracepoint.

>
> >
> > - Eric
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2021-12-03 06:43:15

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 2/6] f2fs: do not expose unwritten blocks to user by DIO

On 2021/12/3 2:13, Jaegeuk Kim wrote:
> On 12/02, Chao Yu wrote:
>> On 2021/11/17 5:45, Jaegeuk Kim wrote:
>>> DIO preallocates physical blocks before writing data, but if an error occurrs
>>> or power-cut happens, we can see block contents from the disk. This patch tries
>>> to fix it by 1) turning to buffered writes for DIO into holes, 2) truncating
>>> unwritten blocks from error or power-cut.
>>>
>>> Signed-off-by: Jaegeuk Kim <[email protected]>
>>> ---
>>> fs/f2fs/data.c | 5 ++++-
>>> fs/f2fs/f2fs.h | 5 +++++
>>> fs/f2fs/file.c | 24 +++++++++++++++++++++++-
>>> 3 files changed, 32 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>> index 3b27fb7daa8b..7ac1a39fcad2 100644
>>> --- a/fs/f2fs/data.c
>>> +++ b/fs/f2fs/data.c
>>> @@ -1543,8 +1543,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
>>> flag != F2FS_GET_BLOCK_DIO);
>>> err = __allocate_data_block(&dn,
>>> map->m_seg_type);
>>> - if (!err)
>>> + if (!err) {
>>> + if (flag == F2FS_GET_BLOCK_PRE_DIO)
>>> + file_need_truncate(inode);
>>> set_inode_flag(inode, FI_APPEND_WRITE);
>>> + }
>>> }
>>> if (err)
>>> goto sync_out;
>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>> index be871a79c634..14bea669f87e 100644
>>> --- a/fs/f2fs/f2fs.h
>>> +++ b/fs/f2fs/f2fs.h
>>> @@ -654,6 +654,7 @@ enum {
>>> #define FADVISE_KEEP_SIZE_BIT 0x10
>>> #define FADVISE_HOT_BIT 0x20
>>> #define FADVISE_VERITY_BIT 0x40
>>> +#define FADVISE_TRUNC_BIT 0x80
>>> #define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
>>> @@ -681,6 +682,10 @@ enum {
>>> #define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
>>> #define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
>>> +#define file_should_truncate(inode) is_file(inode, FADVISE_TRUNC_BIT)
>>> +#define file_need_truncate(inode) set_file(inode, FADVISE_TRUNC_BIT)
>>> +#define file_dont_truncate(inode) clear_file(inode, FADVISE_TRUNC_BIT)
>>> +
>>> #define DEF_DIR_LEVEL 0
>>> enum {
>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>> index 4bf77a5bf998..ec8de0662437 100644
>>> --- a/fs/f2fs/file.c
>>> +++ b/fs/f2fs/file.c
>>> @@ -960,10 +960,21 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
>>> down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
>>> filemap_invalidate_lock(inode->i_mapping);
>>> + /*
>>> + * Truncate stale preallocated blocks used by the previous DIO.
>>> + */
>>> + if (file_should_truncate(inode)) {
>>> + err = f2fs_truncate(inode);
>>> + if (err)
>>> + goto out_unlock;
>>> + file_dont_truncate(inode);
>>> + }
>>> +
>>> truncate_setsize(inode, attr->ia_size);
>>> if (attr->ia_size <= old_size)
>>> err = f2fs_truncate(inode);
>>> +out_unlock:
>>> /*
>>> * do not trim all blocks after i_size if target size is
>>> * larger than i_size.
>>> @@ -4257,6 +4268,13 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
>>> /* If it will be an out-of-place direct write, don't bother. */
>>> if (dio && f2fs_lfs_mode(sbi))
>>> return 0;
>>> + /*
>>> + * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
>>> + * buffered IO, if DIO meets any holes.
>>> + */
>>> + if (dio && i_size_read(inode) &&
>>> + (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
>>> + return 0;
>>> /* No-wait I/O can't allocate blocks. */
>>> if (iocb->ki_flags & IOCB_NOWAIT)
>>> @@ -4366,10 +4384,14 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
>>> if (preallocated > 0 && i_size_read(inode) < target_size) {
>>> down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
>>> filemap_invalidate_lock(inode->i_mapping);
>>> - f2fs_truncate(inode);
>>> + if (!f2fs_truncate(inode))
>>> + file_dont_truncate(inode);
>>> filemap_invalidate_unlock(inode->i_mapping);
>>> up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
>>> + } else {
>>> + file_dont_truncate(inode);
>>
>> How about this case:
>>
>> - touch file
>> - DIO write [0, 8kb] to file
>> - preallocate 2 physical blocks
>> - set FADVISE_TRUNC_BIT
>> - SPO
>> - BUFIO write [0, 4kb] to file
>> - file_dont_truncate -- it leaks unwritten [4kb, 8kb] to user after
>> truncating file to 8kb
>
> i_size should be 4kb, no?

- BUFIO write [0, 4kb] to file
- file_dont_truncate()
i_size should be 4kb, w/ FADVISE_TRUNC_BIT

- setattr 8kb
i_size should be 8kb, last [4kb, 8kb] is unwritten

Thanks,

>
>>
>> Thanks,
>>
>>> }
>>> +
>>> clear_inode_flag(inode, FI_PREALLOCATED_ALL);
>>> if (ret > 0)
>>>

2021-12-03 06:51:24

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 5/6] f2fs: implement iomap operations

On 2021/12/3 5:28, Jaegeuk Kim wrote:
> On 12/02, Jaegeuk Kim wrote:
>> On 12/02, Eric Biggers wrote:
>>> On Thu, Dec 02, 2021 at 11:00:47AM -0800, Jaegeuk Kim wrote:
>>>> On 12/02, Jaegeuk Kim wrote:
>>>>> On 12/02, Eric Biggers wrote:
>>>>>> On Thu, Dec 02, 2021 at 10:04:11PM +0800, Chao Yu wrote:
>>>>>>> On 2021/12/2 12:15, Eric Biggers wrote:
>>>>>>>> On Thu, Dec 02, 2021 at 11:10:41AM +0800, Chao Yu wrote:
>>>>>>>>> Why not relocating this check before f2fs_map_blocks()?
>>>>>>>
>>>>>>> Wait, it supports DIO in multi-device image after commit 71f2c8206202
>>>>>>> ("f2fs: multidevice: support direct IO"), how about
>>>>>>> checking with f2fs_allow_multi_device_dio()?
>>>>>>>
>>>>>>> Thanks,
>>>>>>>
>>>>>>
>>>>>> Okay, that was not the case when I sent this patch originally. We'll need to
>>>>>> update this to support multiple devices.
>>>>>
>>>>> Chao/Eric, does this make sense?
>>>>>
>>>>> --- a/fs/f2fs/data.c
>>>>> +++ b/fs/f2fs/data.c
>>>>> @@ -4070,11 +4070,10 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>>>>> }
>>>>> if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
>>>>> return -EINVAL;
>>>>> - iomap->addr = blks_to_bytes(inode, map.m_pblk);
>>>>>
>>>>> - if (WARN_ON_ONCE(f2fs_is_multi_device(F2FS_I_SB(inode))))
>>>>> - return -EINVAL;
>>>>> - iomap->bdev = inode->i_sb->s_bdev;
>>>>> + iomap->bdev = map->m_multidev_dio ? map.m_bdev :
>>>>
>>>> correction: map.m_multidev_dio
>>>>
>>>
>>> I guess so, but why doesn't f2fs_map_blocks() just always set m_bdev to the
>>> correct block device? What is the point of m_multidev_dio?
>>
>> It seems we can simply assign iomap->bdev = map.m_bdev, and remove
>> map->m_multidev_dio.
>
> Ok, it was used in previous get_block flow, but I think it'd be worth to keep it
> to show by f2fs_map_blocks tracepoint.

Yes, it was added for that.

Thanks,

>
>>
>>>
>>> - Eric
>>
>>
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> [email protected]
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>

2021-12-03 22:54:04

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 2/6] f2fs: do not expose unwritten blocks to user by DIO

On 12/03, Chao Yu wrote:
> On 2021/12/3 2:13, Jaegeuk Kim wrote:
> > On 12/02, Chao Yu wrote:
> > > On 2021/11/17 5:45, Jaegeuk Kim wrote:
> > > > DIO preallocates physical blocks before writing data, but if an error occurrs
> > > > or power-cut happens, we can see block contents from the disk. This patch tries
> > > > to fix it by 1) turning to buffered writes for DIO into holes, 2) truncating
> > > > unwritten blocks from error or power-cut.
> > > >
> > > > Signed-off-by: Jaegeuk Kim <[email protected]>
> > > > ---
> > > > fs/f2fs/data.c | 5 ++++-
> > > > fs/f2fs/f2fs.h | 5 +++++
> > > > fs/f2fs/file.c | 24 +++++++++++++++++++++++-
> > > > 3 files changed, 32 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > > index 3b27fb7daa8b..7ac1a39fcad2 100644
> > > > --- a/fs/f2fs/data.c
> > > > +++ b/fs/f2fs/data.c
> > > > @@ -1543,8 +1543,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
> > > > flag != F2FS_GET_BLOCK_DIO);
> > > > err = __allocate_data_block(&dn,
> > > > map->m_seg_type);
> > > > - if (!err)
> > > > + if (!err) {
> > > > + if (flag == F2FS_GET_BLOCK_PRE_DIO)
> > > > + file_need_truncate(inode);
> > > > set_inode_flag(inode, FI_APPEND_WRITE);
> > > > + }
> > > > }
> > > > if (err)
> > > > goto sync_out;
> > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > > index be871a79c634..14bea669f87e 100644
> > > > --- a/fs/f2fs/f2fs.h
> > > > +++ b/fs/f2fs/f2fs.h
> > > > @@ -654,6 +654,7 @@ enum {
> > > > #define FADVISE_KEEP_SIZE_BIT 0x10
> > > > #define FADVISE_HOT_BIT 0x20
> > > > #define FADVISE_VERITY_BIT 0x40
> > > > +#define FADVISE_TRUNC_BIT 0x80
> > > > #define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
> > > > @@ -681,6 +682,10 @@ enum {
> > > > #define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
> > > > #define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
> > > > +#define file_should_truncate(inode) is_file(inode, FADVISE_TRUNC_BIT)
> > > > +#define file_need_truncate(inode) set_file(inode, FADVISE_TRUNC_BIT)
> > > > +#define file_dont_truncate(inode) clear_file(inode, FADVISE_TRUNC_BIT)
> > > > +
> > > > #define DEF_DIR_LEVEL 0
> > > > enum {
> > > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > > index 4bf77a5bf998..ec8de0662437 100644
> > > > --- a/fs/f2fs/file.c
> > > > +++ b/fs/f2fs/file.c
> > > > @@ -960,10 +960,21 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
> > > > down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> > > > filemap_invalidate_lock(inode->i_mapping);
> > > > + /*
> > > > + * Truncate stale preallocated blocks used by the previous DIO.
> > > > + */
> > > > + if (file_should_truncate(inode)) {
> > > > + err = f2fs_truncate(inode);
> > > > + if (err)
> > > > + goto out_unlock;
> > > > + file_dont_truncate(inode);
> > > > + }
> > > > +
> > > > truncate_setsize(inode, attr->ia_size);
> > > > if (attr->ia_size <= old_size)
> > > > err = f2fs_truncate(inode);
> > > > +out_unlock:
> > > > /*
> > > > * do not trim all blocks after i_size if target size is
> > > > * larger than i_size.
> > > > @@ -4257,6 +4268,13 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
> > > > /* If it will be an out-of-place direct write, don't bother. */
> > > > if (dio && f2fs_lfs_mode(sbi))
> > > > return 0;
> > > > + /*
> > > > + * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
> > > > + * buffered IO, if DIO meets any holes.
> > > > + */
> > > > + if (dio && i_size_read(inode) &&
> > > > + (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
> > > > + return 0;
> > > > /* No-wait I/O can't allocate blocks. */
> > > > if (iocb->ki_flags & IOCB_NOWAIT)
> > > > @@ -4366,10 +4384,14 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > > > if (preallocated > 0 && i_size_read(inode) < target_size) {
> > > > down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> > > > filemap_invalidate_lock(inode->i_mapping);
> > > > - f2fs_truncate(inode);
> > > > + if (!f2fs_truncate(inode))
> > > > + file_dont_truncate(inode);
> > > > filemap_invalidate_unlock(inode->i_mapping);
> > > > up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
> > > > + } else {
> > > > + file_dont_truncate(inode);
> > >
> > > How about this case:
> > >
> > > - touch file
> > > - DIO write [0, 8kb] to file
> > > - preallocate 2 physical blocks
> > > - set FADVISE_TRUNC_BIT
> > > - SPO
> > > - BUFIO write [0, 4kb] to file
> > > - file_dont_truncate -- it leaks unwritten [4kb, 8kb] to user after
> > > truncating file to 8kb
> >
> > i_size should be 4kb, no?
>
> - BUFIO write [0, 4kb] to file
> - file_dont_truncate()
> i_size should be 4kb, w/ FADVISE_TRUNC_BIT
>
> - setattr 8kb
> i_size should be 8kb, last [4kb, 8kb] is unwritten

How about this to truncate it in f2fs_iget() inestead of f2fs_setattr()?

DIO preallocates physical blocks before writing data, but if an error occurrs
or power-cut happens, we can see block contents from the disk. This patch tries
to fix it by 1) turning to buffered writes for DIO into holes, 2) truncating
unwritten blocks from error or power-cut.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/data.c | 5 ++++-
fs/f2fs/f2fs.h | 5 +++++
fs/f2fs/file.c | 14 +++++++++++++-
fs/f2fs/inode.c | 8 ++++++++
4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3db0f3049b90..9c867de1ec29 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1543,8 +1543,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
flag != F2FS_GET_BLOCK_DIO);
err = __allocate_data_block(&dn,
map->m_seg_type);
- if (!err)
+ if (!err) {
+ if (flag == F2FS_GET_BLOCK_PRE_DIO)
+ file_need_truncate(inode);
set_inode_flag(inode, FI_APPEND_WRITE);
+ }
}
if (err)
goto sync_out;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 6f196621f772..d7435fcb9658 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -654,6 +654,7 @@ enum {
#define FADVISE_KEEP_SIZE_BIT 0x10
#define FADVISE_HOT_BIT 0x20
#define FADVISE_VERITY_BIT 0x40
+#define FADVISE_TRUNC_BIT 0x80

#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)

@@ -681,6 +682,10 @@ enum {
#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)

+#define file_should_truncate(inode) is_file(inode, FADVISE_TRUNC_BIT)
+#define file_need_truncate(inode) set_file(inode, FADVISE_TRUNC_BIT)
+#define file_dont_truncate(inode) clear_file(inode, FADVISE_TRUNC_BIT)
+
#define DEF_DIR_LEVEL 0

enum {
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index fc87d0f5b82b..689720b612f7 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1687,6 +1687,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,

map.m_seg_type = CURSEG_COLD_DATA_PINNED;
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+ file_dont_truncate(inode);

up_write(&sbi->pin_sem);

@@ -4257,6 +4258,13 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
/* If it will be an out-of-place direct write, don't bother. */
if (dio && f2fs_lfs_mode(sbi))
return 0;
+ /*
+ * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
+ * buffered IO, if DIO meets any holes.
+ */
+ if (dio && i_size_read(inode) &&
+ (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
+ return 0;

/* No-wait I/O can't allocate blocks. */
if (iocb->ki_flags & IOCB_NOWAIT)
@@ -4367,10 +4375,14 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (preallocated > 0 && i_size_read(inode) < target_size) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
- f2fs_truncate(inode);
+ if (!f2fs_truncate(inode))
+ file_dont_truncate(inode);
filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ } else {
+ file_dont_truncate(inode);
}
+
clear_inode_flag(inode, FI_PREALLOCATED_ALL);

if (ret > 0)
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 0f8b2df3e1e0..6998eb1d6bdb 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -544,6 +544,14 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
goto bad_inode;
}
f2fs_set_inode_flags(inode);
+
+ if (file_should_truncate(inode)) {
+ ret = f2fs_truncate(inode);
+ if (ret)
+ goto bad_inode;
+ file_dont_truncate(inode);
+ }
+
unlock_new_inode(inode);
trace_f2fs_iget(inode);
return inode;
--
2.34.1.400.ga245620fadb-goog


2021-12-04 19:01:45

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 2/6 v2] f2fs: do not expose unwritten blocks to user by DIO

DIO preallocates physical blocks before writing data, but if an error occurrs
or power-cut happens, we can see block contents from the disk. This patch tries
to fix it by 1) turning to buffered writes for DIO into holes, 2) truncating
unwritten blocks from error or power-cut.

Signed-off-by: Jaegeuk Kim <[email protected]>
---

Change log from v1:
- fix quota xfstests failure
- cover missing case

fs/f2fs/data.c | 5 ++++-
fs/f2fs/f2fs.h | 5 +++++
fs/f2fs/file.c | 27 ++++++++++++++++++---------
fs/f2fs/inode.c | 8 ++++++++
4 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3db0f3049b90..9c867de1ec29 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1543,8 +1543,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
flag != F2FS_GET_BLOCK_DIO);
err = __allocate_data_block(&dn,
map->m_seg_type);
- if (!err)
+ if (!err) {
+ if (flag == F2FS_GET_BLOCK_PRE_DIO)
+ file_need_truncate(inode);
set_inode_flag(inode, FI_APPEND_WRITE);
+ }
}
if (err)
goto sync_out;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 6f196621f772..d7435fcb9658 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -654,6 +654,7 @@ enum {
#define FADVISE_KEEP_SIZE_BIT 0x10
#define FADVISE_HOT_BIT 0x20
#define FADVISE_VERITY_BIT 0x40
+#define FADVISE_TRUNC_BIT 0x80

#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)

@@ -681,6 +682,10 @@ enum {
#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)

+#define file_should_truncate(inode) is_file(inode, FADVISE_TRUNC_BIT)
+#define file_need_truncate(inode) set_file(inode, FADVISE_TRUNC_BIT)
+#define file_dont_truncate(inode) clear_file(inode, FADVISE_TRUNC_BIT)
+
#define DEF_DIR_LEVEL 0

enum {
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 808a7c24d993..e1445cf915ea 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1687,6 +1687,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,

map.m_seg_type = CURSEG_COLD_DATA_PINNED;
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+ file_dont_truncate(inode);

up_write(&sbi->pin_sem);

@@ -4257,6 +4258,13 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
/* If it will be an out-of-place direct write, don't bother. */
if (dio && f2fs_lfs_mode(sbi))
return 0;
+ /*
+ * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
+ * buffered IO, if DIO meets any holes.
+ */
+ if (dio && i_size_read(inode) &&
+ (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
+ return 0;

/* No-wait I/O can't allocate blocks. */
if (iocb->ki_flags & IOCB_NOWAIT)
@@ -4292,8 +4300,8 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
}

ret = f2fs_map_blocks(inode, &map, 1, flag);
- /* -ENOSPC is only a fatal error if no blocks could be allocated. */
- if (ret < 0 && !(ret == -ENOSPC && map.m_len > 0))
+ /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */
+ if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0))
return ret;
if (ret == 0)
set_inode_flag(inode, FI_PREALLOCATED_ALL);
@@ -4359,20 +4367,21 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
/* Possibly preallocate the blocks for the write. */
target_size = iocb->ki_pos + iov_iter_count(from);
preallocated = f2fs_preallocate_blocks(iocb, from);
- if (preallocated < 0) {
+ if (preallocated < 0)
ret = preallocated;
- goto out_unlock;
- }
-
- ret = __generic_file_write_iter(iocb, from);
+ else
+ ret = __generic_file_write_iter(iocb, from);

/* Don't leave any preallocated blocks around past i_size. */
- if (preallocated > 0 && i_size_read(inode) < target_size) {
+ if (preallocated && i_size_read(inode) < target_size) {
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
- f2fs_truncate(inode);
+ if (!f2fs_truncate(inode))
+ file_dont_truncate(inode);
filemap_invalidate_unlock(inode->i_mapping);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ } else {
+ file_dont_truncate(inode);
}

clear_inode_flag(inode, FI_PREALLOCATED_ALL);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 0f8b2df3e1e0..6998eb1d6bdb 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -544,6 +544,14 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
goto bad_inode;
}
f2fs_set_inode_flags(inode);
+
+ if (file_should_truncate(inode)) {
+ ret = f2fs_truncate(inode);
+ if (ret)
+ goto bad_inode;
+ file_dont_truncate(inode);
+ }
+
unlock_new_inode(inode);
trace_f2fs_iget(inode);
return inode;
--
2.34.1.400.ga245620fadb-goog


2021-12-10 23:58:08

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH 6/6] f2fs: use iomap for direct I/O

On 11/16, Jaegeuk Kim wrote:
> From: Eric Biggers <[email protected]>
>
> Make f2fs_file_read_iter() and f2fs_file_write_iter() use the iomap
> direct I/O implementation instead of the fs/direct-io.c one.
>
> The iomap implementation is more efficient, and it also avoids the need
> to add new features and optimizations to the old implementation.
>
> This new implementation also eliminates the need for f2fs to hook bio
> submission and completion and to allocate memory per-bio. This is
> because it's possible to correctly update f2fs's in-flight DIO counters
> using __iomap_dio_rw() in combination with an implementation of
> iomap_dio_ops::end_io() (as suggested by Christoph Hellwig).
>
> When possible, this new implementation preserves existing f2fs behavior
> such as the conditions for falling back to buffered I/O.
>
> This patch has been tested with xfstests by running 'gce-xfstests -c
> f2fs -g auto -X generic/017' with and without this patch; no regressions
> were seen. (Some tests fail both before and after. generic/017 hangs
> both before and after, so it had to be excluded.)
>
> Signed-off-by: Eric Biggers <[email protected]>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/data.c | 205 +---------------------------
> fs/f2fs/f2fs.h | 8 +-
> fs/f2fs/file.c | 343 +++++++++++++++++++++++++++++++++++++++++------
> fs/f2fs/iostat.c | 15 +--
> 4 files changed, 311 insertions(+), 260 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 43b3ca7cabe0..a9124d94a5d1 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1377,11 +1377,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
> f2fs_invalidate_compress_page(sbi, old_blkaddr);
> }
> f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
> -
> - /*
> - * i_size will be updated by direct_IO. Otherwise, we'll get stale
> - * data from unwritten block via dio_read.
> - */
> return 0;
> }
>
> @@ -1743,50 +1738,6 @@ static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
> return (blks << inode->i_blkbits);
> }
>
> -static int __get_data_block(struct inode *inode, sector_t iblock,
> - struct buffer_head *bh, int create, int flag,
> - pgoff_t *next_pgofs, int seg_type, bool may_write)
> -{
> - struct f2fs_map_blocks map;
> - int err;
> -
> - map.m_lblk = iblock;
> - map.m_len = bytes_to_blks(inode, bh->b_size);
> - map.m_next_pgofs = next_pgofs;
> - map.m_next_extent = NULL;
> - map.m_seg_type = seg_type;
> - map.m_may_create = may_write;
> -
> - err = f2fs_map_blocks(inode, &map, create, flag);
> - if (!err) {
> - map_bh(bh, inode->i_sb, map.m_pblk);
> - bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
> - bh->b_size = blks_to_bytes(inode, map.m_len);
> -
> - if (map.m_multidev_dio)
> - bh->b_bdev = map.m_bdev;
> - }
> - return err;
> -}
> -
> -static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
> - struct buffer_head *bh_result, int create)
> -{
> - return __get_data_block(inode, iblock, bh_result, create,
> - F2FS_GET_BLOCK_DIO, NULL,
> - f2fs_rw_hint_to_seg_type(inode->i_write_hint),
> - true);
> -}
> -
> -static int get_data_block_dio(struct inode *inode, sector_t iblock,
> - struct buffer_head *bh_result, int create)
> -{
> - return __get_data_block(inode, iblock, bh_result, create,
> - F2FS_GET_BLOCK_DIO, NULL,
> - f2fs_rw_hint_to_seg_type(inode->i_write_hint),
> - false);
> -}
> -
> static int f2fs_xattr_fiemap(struct inode *inode,
> struct fiemap_extent_info *fieinfo)
> {
> @@ -3262,7 +3213,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
> FS_CP_DATA_IO : FS_DATA_IO);
> }
>
> -static void f2fs_write_failed(struct inode *inode, loff_t to)
> +void f2fs_write_failed(struct inode *inode, loff_t to)
> {
> loff_t i_size = i_size_read(inode);
>
> @@ -3550,158 +3501,6 @@ static int f2fs_write_end(struct file *file,
> return copied;
> }
>
> -static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
> - loff_t offset)
> -{
> - unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
> - unsigned blkbits = i_blkbits;
> - unsigned blocksize_mask = (1 << blkbits) - 1;
> - unsigned long align = offset | iov_iter_alignment(iter);
> - struct block_device *bdev = inode->i_sb->s_bdev;
> -
> - if (iov_iter_rw(iter) == READ && offset >= i_size_read(inode))
> - return 1;
> -
> - if (align & blocksize_mask) {
> - if (bdev)
> - blkbits = blksize_bits(bdev_logical_block_size(bdev));
> - blocksize_mask = (1 << blkbits) - 1;
> - if (align & blocksize_mask)
> - return -EINVAL;
> - return 1;
> - }
> - return 0;
> -}
> -
> -static void f2fs_dio_end_io(struct bio *bio)
> -{
> - struct f2fs_private_dio *dio = bio->bi_private;
> -
> - dec_page_count(F2FS_I_SB(dio->inode),
> - dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
> -
> - bio->bi_private = dio->orig_private;
> - bio->bi_end_io = dio->orig_end_io;
> -
> - kfree(dio);
> -
> - bio_endio(bio);
> -}
> -
> -static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
> - loff_t file_offset)
> -{
> - struct f2fs_private_dio *dio;
> - bool write = (bio_op(bio) == REQ_OP_WRITE);
> -
> - dio = f2fs_kzalloc(F2FS_I_SB(inode),
> - sizeof(struct f2fs_private_dio), GFP_NOFS);
> - if (!dio)
> - goto out;
> -
> - dio->inode = inode;
> - dio->orig_end_io = bio->bi_end_io;
> - dio->orig_private = bio->bi_private;
> - dio->write = write;
> -
> - bio->bi_end_io = f2fs_dio_end_io;
> - bio->bi_private = dio;
> -
> - inc_page_count(F2FS_I_SB(inode),
> - write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
> -
> - submit_bio(bio);
> - return;
> -out:
> - bio->bi_status = BLK_STS_IOERR;
> - bio_endio(bio);
> -}
> -
> -static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
> -{
> - struct address_space *mapping = iocb->ki_filp->f_mapping;
> - struct inode *inode = mapping->host;
> - struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> - struct f2fs_inode_info *fi = F2FS_I(inode);
> - size_t count = iov_iter_count(iter);
> - loff_t offset = iocb->ki_pos;
> - int rw = iov_iter_rw(iter);
> - int err;
> - enum rw_hint hint = iocb->ki_hint;
> - int whint_mode = F2FS_OPTION(sbi).whint_mode;
> - bool do_opu;
> -
> - err = check_direct_IO(inode, iter, offset);
> - if (err)
> - return err < 0 ? err : 0;
> -
> - if (f2fs_force_buffered_io(inode, iocb, iter))
> - return 0;
> -
> - do_opu = rw == WRITE && f2fs_lfs_mode(sbi);
> -
> - trace_f2fs_direct_IO_enter(inode, offset, count, rw);
> -
> - if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
> - iocb->ki_hint = WRITE_LIFE_NOT_SET;
> -
> - if (iocb->ki_flags & IOCB_NOWAIT) {
> - if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
> - iocb->ki_hint = hint;
> - err = -EAGAIN;
> - goto out;
> - }
> - if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
> - up_read(&fi->i_gc_rwsem[rw]);
> - iocb->ki_hint = hint;
> - err = -EAGAIN;
> - goto out;
> - }
> - } else {
> - down_read(&fi->i_gc_rwsem[rw]);
> - if (do_opu)
> - down_read(&fi->i_gc_rwsem[READ]);
> - }
> -
> - err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
> - iter, rw == WRITE ? get_data_block_dio_write :
> - get_data_block_dio, NULL, f2fs_dio_submit_bio,
> - rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES :
> - DIO_SKIP_HOLES);
> -
> - if (do_opu)
> - up_read(&fi->i_gc_rwsem[READ]);
> -
> - up_read(&fi->i_gc_rwsem[rw]);
> -
> - if (rw == WRITE) {
> - if (whint_mode == WHINT_MODE_OFF)
> - iocb->ki_hint = hint;
> - if (err > 0) {
> - f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
> - err);
> - if (!do_opu)
> - set_inode_flag(inode, FI_UPDATE_WRITE);
> - } else if (err == -EIOCBQUEUED) {
> - f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
> - count - iov_iter_count(iter));
> - } else if (err < 0) {
> - f2fs_write_failed(inode, offset + count);
> - }
> - } else {
> - if (err > 0)
> - f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
> - else if (err == -EIOCBQUEUED)
> - f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
> - count - iov_iter_count(iter));
> - }
> -
> -out:
> - trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
> -
> - return err;
> -}
> -
> void f2fs_invalidate_page(struct page *page, unsigned int offset,
> unsigned int length)
> {
> @@ -4157,7 +3956,7 @@ const struct address_space_operations f2fs_dblock_aops = {
> .set_page_dirty = f2fs_set_data_page_dirty,
> .invalidatepage = f2fs_invalidate_page,
> .releasepage = f2fs_release_page,
> - .direct_IO = f2fs_direct_IO,
> + .direct_IO = noop_direct_IO,
> .bmap = f2fs_bmap,
> .swap_activate = f2fs_swap_activate,
> .swap_deactivate = f2fs_swap_deactivate,
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 0d199e8f2c1d..26e92799ac6c 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1806,13 +1806,6 @@ struct f2fs_sb_info {
> #endif
> };
>
> -struct f2fs_private_dio {
> - struct inode *inode;
> - void *orig_private;
> - bio_end_io_t *orig_end_io;
> - bool write;
> -};
> -
> #ifdef CONFIG_F2FS_FAULT_INJECTION
> #define f2fs_show_injection_info(sbi, type) \
> printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \
> @@ -3641,6 +3634,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
> struct writeback_control *wbc,
> enum iostat_type io_type,
> int compr_blocks, bool allow_balance);
> +void f2fs_write_failed(struct inode *inode, loff_t to);
> void f2fs_invalidate_page(struct page *page, unsigned int offset,
> unsigned int length);
> int f2fs_release_page(struct page *page, gfp_t wait);
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 709fa893d832..60e5e2f1c1e8 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -24,6 +24,7 @@
> #include <linux/sched/signal.h>
> #include <linux/fileattr.h>
> #include <linux/fadvise.h>
> +#include <linux/iomap.h>
>
> #include "f2fs.h"
> #include "node.h"
> @@ -4229,23 +4230,145 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> return __f2fs_ioctl(filp, cmd, arg);
> }
>
> -static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> +/*
> + * Return %true if the given read or write request should use direct I/O, or
> + * %false if it should use buffered I/O.
> + */
> +static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb,
> + struct iov_iter *iter)
> +{
> + unsigned int align;
> +
> + if (!(iocb->ki_flags & IOCB_DIRECT))
> + return false;
> +
> + if (f2fs_force_buffered_io(inode, iocb, iter))
> + return false;
> +
> + /*
> + * Direct I/O not aligned to the disk's logical_block_size will be
> + * attempted, but will fail with -EINVAL.
> + *
> + * f2fs additionally requires that direct I/O be aligned to the
> + * filesystem block size, which is often a stricter requirement.
> + * However, f2fs traditionally falls back to buffered I/O on requests
> + * that are logical_block_size-aligned but not fs-block aligned.
> + *
> + * The below logic implements this behavior.
> + */
> + align = iocb->ki_pos | iov_iter_alignment(iter);
> + if (!IS_ALIGNED(align, i_blocksize(inode)) &&
> + IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev)))
> + return false;
> +
> + return true;
> +}
> +
> +static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
> + unsigned int flags)
> +{
> + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
> +
> + dec_page_count(sbi, F2FS_DIO_READ);
> + if (error)
> + return error;
> + f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, size);

I hit one deadlock issue by using spin_lock in softirq context. I replaced it
with spin_lock_bh.

> + return 0;
> +}
> +
> +static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = {
> + .end_io = f2fs_dio_read_end_io,
> +};
> +
> +static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
> {
> struct file *file = iocb->ki_filp;
> struct inode *inode = file_inode(file);
> - int ret;
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> + struct f2fs_inode_info *fi = F2FS_I(inode);
> + const loff_t pos = iocb->ki_pos;
> + const size_t count = iov_iter_count(to);
> + struct iomap_dio *dio;
> + ssize_t ret;
> +
> + if (count == 0)
> + return 0; /* skip atime update */
> +
> + trace_f2fs_direct_IO_enter(inode, pos, count, READ);
> +
> + if (iocb->ki_flags & IOCB_NOWAIT) {
> + if (!down_read_trylock(&fi->i_gc_rwsem[READ])) {
> + ret = -EAGAIN;
> + goto out;
> + }
> + } else {
> + down_read(&fi->i_gc_rwsem[READ]);
> + }
> +
> + /*
> + * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
> + * the higher-level function iomap_dio_rw() in order to ensure that the
> + * F2FS_DIO_READ counter will be decremented correctly in all cases.
> + */
> + inc_page_count(sbi, F2FS_DIO_READ);
> + dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops,
> + &f2fs_iomap_dio_read_ops, 0, 0);
> + if (IS_ERR_OR_NULL(dio)) {
> + ret = PTR_ERR_OR_ZERO(dio);
> + if (ret != -EIOCBQUEUED)
> + dec_page_count(sbi, F2FS_DIO_READ);
> + } else {
> + ret = iomap_dio_complete(dio);
> + }
> +
> + up_read(&fi->i_gc_rwsem[READ]);
> +
> + file_accessed(file);
> +out:
> + trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
> + return ret;
> +}
> +
> +static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
> +{
> + struct inode *inode = file_inode(iocb->ki_filp);
> + ssize_t ret;
>
> if (!f2fs_is_compress_backend_ready(inode))
> return -EOPNOTSUPP;
>
> - ret = generic_file_read_iter(iocb, iter);
> + if (f2fs_should_use_dio(inode, iocb, to))
> + return f2fs_dio_read_iter(iocb, to);
>
> + ret = filemap_read(iocb, to, 0);
> if (ret > 0)
> - f2fs_update_iostat(F2FS_I_SB(inode), APP_READ_IO, ret);
> -
> + f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret);
> return ret;
> }
>
> +static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
> +{
> + struct file *file = iocb->ki_filp;
> + struct inode *inode = file_inode(file);
> + ssize_t count;
> + int err;
> +
> + if (IS_IMMUTABLE(inode))
> + return -EPERM;
> +
> + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
> + return -EPERM;
> +
> + count = generic_write_checks(iocb, from);
> + if (count <= 0)
> + return count;
> +
> + err = file_modified(file);
> + if (err)
> + return err;
> + return count;
> +}
> +
> /*
> * Preallocate blocks for a write request, if it is possible and helpful to do
> * so. Returns a positive number if blocks may have been preallocated, 0 if no
> @@ -4253,15 +4376,14 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
> * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the
> * requested blocks (not just some of them) have been allocated.
> */
> -static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
> +static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
> + bool dio)
> {
> struct inode *inode = file_inode(iocb->ki_filp);
> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> const loff_t pos = iocb->ki_pos;
> const size_t count = iov_iter_count(iter);
> struct f2fs_map_blocks map = {};
> - bool dio = (iocb->ki_flags & IOCB_DIRECT) &&
> - !f2fs_force_buffered_io(inode, iocb, iter);
> int flag;
> int ret;
>
> @@ -4317,13 +4439,174 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
> return map.m_len;
> }
>
> -static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> +static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
> + struct iov_iter *from)
> +{
> + struct file *file = iocb->ki_filp;
> + struct inode *inode = file_inode(file);
> + ssize_t ret;
> +
> + if (iocb->ki_flags & IOCB_NOWAIT)
> + return -EOPNOTSUPP;
> +
> + current->backing_dev_info = inode_to_bdi(inode);
> + ret = generic_perform_write(file, from, iocb->ki_pos);
> + current->backing_dev_info = NULL;
> +
> + if (ret > 0) {
> + iocb->ki_pos += ret;
> + f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_IO, ret);
> + }
> + return ret;
> +}
> +
> +static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
> + unsigned int flags)
> +{
> + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
> +
> + dec_page_count(sbi, F2FS_DIO_WRITE);
> + if (error)
> + return error;
> + f2fs_update_iostat(sbi, APP_DIRECT_IO, size);
> + return 0;
> +}
> +
> +static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = {
> + .end_io = f2fs_dio_write_end_io,
> +};
> +
> +static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
> + bool *may_need_sync)
> {
> struct file *file = iocb->ki_filp;
> struct inode *inode = file_inode(file);
> + struct f2fs_inode_info *fi = F2FS_I(inode);
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> + const bool do_opu = f2fs_lfs_mode(sbi);
> + const int whint_mode = F2FS_OPTION(sbi).whint_mode;
> + const loff_t pos = iocb->ki_pos;
> + const ssize_t count = iov_iter_count(from);
> + const enum rw_hint hint = iocb->ki_hint;
> + unsigned int dio_flags;
> + struct iomap_dio *dio;
> + ssize_t ret;
> +
> + trace_f2fs_direct_IO_enter(inode, pos, count, WRITE);
> +
> + if (iocb->ki_flags & IOCB_NOWAIT) {
> + /* f2fs_convert_inline_inode() and block allocation can block */
> + if (f2fs_has_inline_data(inode) ||
> + !f2fs_overwrite_io(inode, pos, count)) {
> + ret = -EAGAIN;
> + goto out;
> + }
> +
> + if (!down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
> + ret = -EAGAIN;
> + goto out;
> + }
> + if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
> + up_read(&fi->i_gc_rwsem[WRITE]);
> + ret = -EAGAIN;
> + goto out;
> + }
> + } else {
> + ret = f2fs_convert_inline_inode(inode);
> + if (ret)
> + goto out;
> +
> + down_read(&fi->i_gc_rwsem[WRITE]);
> + if (do_opu)
> + down_read(&fi->i_gc_rwsem[READ]);
> + }
> + if (whint_mode == WHINT_MODE_OFF)
> + iocb->ki_hint = WRITE_LIFE_NOT_SET;
> +
> + /*
> + * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
> + * the higher-level function iomap_dio_rw() in order to ensure that the
> + * F2FS_DIO_WRITE counter will be decremented correctly in all cases.
> + */
> + inc_page_count(sbi, F2FS_DIO_WRITE);
> + dio_flags = 0;
> + if (pos + count > inode->i_size)
> + dio_flags |= IOMAP_DIO_FORCE_WAIT;
> + dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
> + &f2fs_iomap_dio_write_ops, dio_flags, 0);
> + if (IS_ERR_OR_NULL(dio)) {
> + ret = PTR_ERR_OR_ZERO(dio);
> + if (ret == -ENOTBLK)
> + ret = 0;
> + if (ret != -EIOCBQUEUED)
> + dec_page_count(sbi, F2FS_DIO_WRITE);
> + } else {
> + ret = iomap_dio_complete(dio);
> + }
> +
> + if (whint_mode == WHINT_MODE_OFF)
> + iocb->ki_hint = hint;
> + if (do_opu)
> + up_read(&fi->i_gc_rwsem[READ]);
> + up_read(&fi->i_gc_rwsem[WRITE]);
> +
> + if (ret < 0)
> + goto out;
> + if (pos + ret > inode->i_size)
> + f2fs_i_size_write(inode, pos + ret);
> + if (!do_opu)
> + set_inode_flag(inode, FI_UPDATE_WRITE);
> +
> + if (iov_iter_count(from)) {
> + ssize_t ret2;
> + loff_t bufio_start_pos = iocb->ki_pos;
> +
> + /*
> + * The direct write was partial, so we need to fall back to a
> + * buffered write for the remainder.
> + */
> +
> + ret2 = f2fs_buffered_write_iter(iocb, from);
> + if (iov_iter_count(from))
> + f2fs_write_failed(inode, iocb->ki_pos);
> + if (ret2 < 0)
> + goto out;
> +
> + /*
> + * Ensure that the pagecache pages are written to disk and
> + * invalidated to preserve the expected O_DIRECT semantics.
> + */
> + if (ret2 > 0) {
> + loff_t bufio_end_pos = bufio_start_pos + ret2 - 1;
> +
> + ret += ret2;
> +
> + ret2 = filemap_write_and_wait_range(file->f_mapping,
> + bufio_start_pos,
> + bufio_end_pos);
> + if (ret2 < 0)
> + goto out;
> + invalidate_mapping_pages(file->f_mapping,
> + bufio_start_pos >> PAGE_SHIFT,
> + bufio_end_pos >> PAGE_SHIFT);
> + }
> + } else {
> + /* iomap_dio_rw() already handled the generic_write_sync(). */
> + *may_need_sync = false;
> + }
> +out:
> + trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret);
> + return ret;
> +}
> +
> +static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> +{
> + struct inode *inode = file_inode(iocb->ki_filp);
> const loff_t orig_pos = iocb->ki_pos;
> const size_t orig_count = iov_iter_count(from);
> loff_t target_size;
> + bool dio;
> + bool may_need_sync = true;
> int preallocated;
> ssize_t ret;
>
> @@ -4346,44 +4629,26 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> inode_lock(inode);
> }
>
> - if (unlikely(IS_IMMUTABLE(inode))) {
> - ret = -EPERM;
> - goto out_unlock;
> - }
> -
> - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
> - ret = -EPERM;
> - goto out_unlock;
> - }
> -
> - ret = generic_write_checks(iocb, from);
> + ret = f2fs_write_checks(iocb, from);
> if (ret <= 0)
> goto out_unlock;
>
> - if (iocb->ki_flags & IOCB_NOWAIT) {
> - if (!f2fs_overwrite_io(inode, iocb->ki_pos,
> - iov_iter_count(from)) ||
> - f2fs_has_inline_data(inode) ||
> - f2fs_force_buffered_io(inode, iocb, from)) {
> - ret = -EAGAIN;
> - goto out_unlock;
> - }
> - }
> + /* Determine whether we will do a direct write or a buffered write. */
> + dio = f2fs_should_use_dio(inode, iocb, from);
>
> - if (iocb->ki_flags & IOCB_DIRECT) {
> - ret = f2fs_convert_inline_inode(inode);
> - if (ret)
> - goto out_unlock;
> - }
> /* Possibly preallocate the blocks for the write. */
> target_size = iocb->ki_pos + iov_iter_count(from);
> - preallocated = f2fs_preallocate_blocks(iocb, from);
> + preallocated = f2fs_preallocate_blocks(iocb, from, dio);
> if (preallocated < 0) {
> ret = preallocated;
> goto out_unlock;
> }
>
> - ret = __generic_file_write_iter(iocb, from);
> + /* Do the actual write. */
> + if (dio)
> + ret = f2fs_dio_write_iter(iocb, from, &may_need_sync);
> + else
> + ret = f2fs_buffered_write_iter(iocb, from);
>
> /* Don't leave any preallocated blocks around past i_size. */
> if (preallocated > 0 && i_size_read(inode) < target_size) {
> @@ -4398,15 +4663,11 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
> }
>
> clear_inode_flag(inode, FI_PREALLOCATED_ALL);
> -
> - if (ret > 0)
> - f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
> -
> out_unlock:
> inode_unlock(inode);
> out:
> trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
> - if (ret > 0)
> + if (ret > 0 && may_need_sync)
> ret = generic_write_sync(iocb, ret);
> return ret;
> }
> diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
> index cdcf54ae0db8..b911ea73c21a 100644
> --- a/fs/f2fs/iostat.c
> +++ b/fs/f2fs/iostat.c
> @@ -166,15 +166,12 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi,
> spin_lock(&sbi->iostat_lock);
> sbi->rw_iostat[type] += io_bytes;
>
> - if (type == APP_WRITE_IO || type == APP_DIRECT_IO)
> - sbi->rw_iostat[APP_BUFFERED_IO] =
> - sbi->rw_iostat[APP_WRITE_IO] -
> - sbi->rw_iostat[APP_DIRECT_IO];
> -
> - if (type == APP_READ_IO || type == APP_DIRECT_READ_IO)
> - sbi->rw_iostat[APP_BUFFERED_READ_IO] =
> - sbi->rw_iostat[APP_READ_IO] -
> - sbi->rw_iostat[APP_DIRECT_READ_IO];
> + if (type == APP_BUFFERED_IO || type == APP_DIRECT_IO)
> + sbi->rw_iostat[APP_WRITE_IO] += io_bytes;
> +
> + if (type == APP_BUFFERED_READ_IO || type == APP_DIRECT_READ_IO)
> + sbi->rw_iostat[APP_READ_IO] += io_bytes;
> +
> spin_unlock(&sbi->iostat_lock);
>
> f2fs_record_iostat(sbi);
> --
> 2.34.0.rc1.387.gb447b232ab-goog

2021-12-30 06:43:28

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 5/6] f2fs: implement iomap operations

On 2021/12/3 5:28, Jaegeuk Kim wrote:
> On 12/02, Jaegeuk Kim wrote:
>> On 12/02, Eric Biggers wrote:
>>> On Thu, Dec 02, 2021 at 11:00:47AM -0800, Jaegeuk Kim wrote:
>>>> On 12/02, Jaegeuk Kim wrote:
>>>>> On 12/02, Eric Biggers wrote:
>>>>>> On Thu, Dec 02, 2021 at 10:04:11PM +0800, Chao Yu wrote:
>>>>>>> On 2021/12/2 12:15, Eric Biggers wrote:
>>>>>>>> On Thu, Dec 02, 2021 at 11:10:41AM +0800, Chao Yu wrote:
>>>>>>>>> Why not relocating this check before f2fs_map_blocks()?
>>>>>>>
>>>>>>> Wait, it supports DIO in multi-device image after commit 71f2c8206202
>>>>>>> ("f2fs: multidevice: support direct IO"), how about
>>>>>>> checking with f2fs_allow_multi_device_dio()?
>>>>>>>
>>>>>>> Thanks,
>>>>>>>
>>>>>>
>>>>>> Okay, that was not the case when I sent this patch originally. We'll need to
>>>>>> update this to support multiple devices.
>>>>>
>>>>> Chao/Eric, does this make sense?
>>>>>
>>>>> --- a/fs/f2fs/data.c
>>>>> +++ b/fs/f2fs/data.c
>>>>> @@ -4070,11 +4070,10 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>>>>> }
>>>>> if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
>>>>> return -EINVAL;
>>>>> - iomap->addr = blks_to_bytes(inode, map.m_pblk);
>>>>>
>>>>> - if (WARN_ON_ONCE(f2fs_is_multi_device(F2FS_I_SB(inode))))
>>>>> - return -EINVAL;
>>>>> - iomap->bdev = inode->i_sb->s_bdev;
>>>>> + iomap->bdev = map->m_multidev_dio ? map.m_bdev :
>>>>
>>>> correction: map.m_multidev_dio
>>>>
>>>
>>> I guess so, but why doesn't f2fs_map_blocks() just always set m_bdev to the
>>> correct block device? What is the point of m_multidev_dio?
>>
>> It seems we can simply assign iomap->bdev = map.m_bdev, and remove
>> map->m_multidev_dio.
>
> Ok, it was used in previous get_block flow, but I think it'd be worth to keep it
> to show by f2fs_map_blocks tracepoint.

The last version wasn't sent into mailing list?

Thanks,

>
>>
>>>
>>> - Eric
>>
>>
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> [email protected]
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2022-01-04 21:15:15

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 5/6] f2fs: implement iomap operations

On 12/30, Chao Yu wrote:
> On 2021/12/3 5:28, Jaegeuk Kim wrote:
> > On 12/02, Jaegeuk Kim wrote:
> > > On 12/02, Eric Biggers wrote:
> > > > On Thu, Dec 02, 2021 at 11:00:47AM -0800, Jaegeuk Kim wrote:
> > > > > On 12/02, Jaegeuk Kim wrote:
> > > > > > On 12/02, Eric Biggers wrote:
> > > > > > > On Thu, Dec 02, 2021 at 10:04:11PM +0800, Chao Yu wrote:
> > > > > > > > On 2021/12/2 12:15, Eric Biggers wrote:
> > > > > > > > > On Thu, Dec 02, 2021 at 11:10:41AM +0800, Chao Yu wrote:
> > > > > > > > > > Why not relocating this check before f2fs_map_blocks()?
> > > > > > > >
> > > > > > > > Wait, it supports DIO in multi-device image after commit 71f2c8206202
> > > > > > > > ("f2fs: multidevice: support direct IO"), how about
> > > > > > > > checking with f2fs_allow_multi_device_dio()?
> > > > > > > >
> > > > > > > > Thanks,
> > > > > > > >
> > > > > > >
> > > > > > > Okay, that was not the case when I sent this patch originally. We'll need to
> > > > > > > update this to support multiple devices.
> > > > > >
> > > > > > Chao/Eric, does this make sense?
> > > > > >
> > > > > > --- a/fs/f2fs/data.c
> > > > > > +++ b/fs/f2fs/data.c
> > > > > > @@ -4070,11 +4070,10 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> > > > > > }
> > > > > > if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
> > > > > > return -EINVAL;
> > > > > > - iomap->addr = blks_to_bytes(inode, map.m_pblk);
> > > > > >
> > > > > > - if (WARN_ON_ONCE(f2fs_is_multi_device(F2FS_I_SB(inode))))
> > > > > > - return -EINVAL;
> > > > > > - iomap->bdev = inode->i_sb->s_bdev;
> > > > > > + iomap->bdev = map->m_multidev_dio ? map.m_bdev :
> > > > >
> > > > > correction: map.m_multidev_dio
> > > > >
> > > >
> > > > I guess so, but why doesn't f2fs_map_blocks() just always set m_bdev to the
> > > > correct block device? What is the point of m_multidev_dio?
> > >
> > > It seems we can simply assign iomap->bdev = map.m_bdev, and remove
> > > map->m_multidev_dio.
> >
> > Ok, it was used in previous get_block flow, but I think it'd be worth to keep it
> > to show by f2fs_map_blocks tracepoint.
>
> The last version wasn't sent into mailing list?

No, will post it again soon.

>
> Thanks,
>
> >
> > >
> > > >
> > > > - Eric
> > >
> > >
> > > _______________________________________________
> > > Linux-f2fs-devel mailing list
> > > [email protected]
> > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> >
> >
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > [email protected]
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel