From: Daeho Jeong <[email protected]>
Whenever we notice some sluggish issues on our machines, we are always
curious about how well all types of I/O in the f2fs filesystem are
handled. But, it's hard to get this kind of real data. First of all,
we need to reproduce the issue while turning on the profiling tool like
blktrace, but the issue doesn't happen again easily. Second, with the
intervention of any tools, the overall timing of the issue will be
slightly changed and it sometimes makes us hard to figure it out.
So, I added F2FS_IOSTAT_IO_LATENCY config option to support printing out
IO latency statistics tracepoint events which are minimal things to
understand filesystem's I/O related behaviors. With "iostat_enable" sysfs
node on, we can get this statistics info in a periodic way and it
would cause the least overhead.
[samples]
f2fs_ckpt-254:1-507 [003] .... 2842.439683: f2fs_iostat_latency:
dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count],
rd_data [136/1/801], rd_node [136/1/1704], rd_meta [4/2/4],
wr_sync_data [164/16/3331], wr_sync_node [152/3/648],
wr_sync_meta [160/2/4243], wr_async_data [24/13/15],
wr_async_node [0/0/0], wr_async_meta [0/0/0]
f2fs_ckpt-254:1-507 [002] .... 2845.450514: f2fs_iostat_latency:
dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count],
rd_data [60/3/456], rd_node [60/3/1258], rd_meta [0/0/1],
wr_sync_data [120/12/2285], wr_sync_node [88/5/428],
wr_sync_meta [52/6/2990], wr_async_data [4/1/3],
wr_async_node [0/0/0], wr_async_meta [0/0/0]
Signed-off-by: Daeho Jeong <[email protected]>
---
v2: clean up with wrappers and fix a build breakage reported by
kernel test robot <[email protected]>
---
fs/f2fs/Kconfig | 9 +++
fs/f2fs/data.c | 108 ++++++++++++++++++++++++++++++++++--
fs/f2fs/f2fs.h | 86 ++++++++++++++++++++++++++++
fs/f2fs/super.c | 11 +++-
fs/f2fs/sysfs.c | 44 +++++++++++++++
include/trace/events/f2fs.h | 88 +++++++++++++++++++++++++++++
6 files changed, 341 insertions(+), 5 deletions(-)
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 7669de7b49ce..28dc5463bc87 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -135,3 +135,12 @@ config F2FS_FS_LZORLE
default y
help
Support LZO-RLE compress algorithm, if unsure, say Y.
+
+config F2FS_IOSTAT_IO_LATENCY
+ bool "F2FS IO statistics IO latency information"
+ depends on F2FS_FS
+ default n
+ help
+ Support printing out periodic IO latency statistics tracepoint
+ events. With this, you have to turn on "iostat_enable" sysfs
+ node to print this out.
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 948083c88d17..a2690f995b47 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -34,6 +34,12 @@ static struct kmem_cache *bio_entry_slab;
static mempool_t *bio_post_read_ctx_pool;
static struct bio_set f2fs_bioset;
+#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
+#define NUM_PREALLOC_IOSTAT_CTXS 128
+static struct kmem_cache *bio_iostat_ctx_cache;
+static mempool_t *bio_iostat_ctx_pool;
+#endif
+
#define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE
int __init f2fs_init_bioset(void)
@@ -267,10 +273,67 @@ static void f2fs_post_read_work(struct work_struct *work)
f2fs_verify_and_finish_bio(ctx->bio);
}
+#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
+static inline void __update_and_unbind_iostat_ctx(struct bio *bio, int rw)
+{
+ struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
+ int sync = bio->bi_opf & REQ_SYNC ? 0 : 1;
+
+ if (rw == 0)
+ bio->bi_private = iostat_ctx->post_read_ctx;
+ else
+ bio->bi_private = iostat_ctx->sbi;
+ __update_iostat_latency(iostat_ctx, rw, sync);
+ mempool_free(iostat_ctx, bio_iostat_ctx_pool);
+}
+
+static inline void __alloc_and_bind_iostat_ctx(struct f2fs_sb_info *sbi,
+ struct bio *bio, struct bio_post_read_ctx *ctx)
+{
+ struct bio_iostat_ctx *iostat_ctx;
+ /* Due to the mempool, this never fails. */
+ iostat_ctx = mempool_alloc(bio_iostat_ctx_pool, GFP_NOFS);
+ iostat_ctx->sbi = sbi;
+ iostat_ctx->submit_ts = 0;
+ iostat_ctx->type = 0;
+ iostat_ctx->post_read_ctx = ctx;
+ bio->bi_private = iostat_ctx;
+}
+
+static inline void __update_submit_iostat_ctx(struct bio *bio,
+ enum page_type type)
+{
+ struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
+
+ iostat_ctx->submit_ts = jiffies;
+ iostat_ctx->type = type;
+}
+
+static inline struct bio_post_read_ctx *__get_post_read_ctx(struct bio *bio)
+{
+ struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
+
+ return iostat_ctx->post_read_ctx;
+}
+#else
+static inline void __update_and_unbind_iostat_ctx(struct bio *bio, int rw) {}
+static inline void __alloc_and_bind_iostat_ctx(struct f2fs_sb_info *sbi,
+ struct bio *bio, struct bio_post_read_ctx *ctx) {}
+static inline void __update_submit_iostat_ctx(struct bio *bio,
+ enum page_type type) {}
+static inline struct bio_post_read_ctx *__get_post_read_ctx(struct bio *bio)
+{
+ return bio->bi_private;
+}
+#endif
+
static void f2fs_read_end_io(struct bio *bio)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
- struct bio_post_read_ctx *ctx = bio->bi_private;
+ struct bio_post_read_ctx *ctx;
+
+ __update_and_unbind_iostat_ctx(bio, 0);
+ ctx = bio->bi_private;
if (time_to_inject(sbi, FAULT_READ_IO)) {
f2fs_show_injection_info(sbi, FAULT_READ_IO);
@@ -292,10 +355,13 @@ static void f2fs_read_end_io(struct bio *bio)
static void f2fs_write_end_io(struct bio *bio)
{
- struct f2fs_sb_info *sbi = bio->bi_private;
+ struct f2fs_sb_info *sbi;
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
+ __update_and_unbind_iostat_ctx(bio, 1);
+ sbi = bio->bi_private;
+
if (time_to_inject(sbi, FAULT_WRITE_IO)) {
f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
bio->bi_status = BLK_STS_IOERR;
@@ -399,6 +465,8 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
fio->type, fio->temp);
}
+ __alloc_and_bind_iostat_ctx(sbi, bio, NULL);
+
if (fio->io_wbc)
wbc_init_bio(fio->io_wbc, bio);
@@ -480,6 +548,9 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
trace_f2fs_submit_read_bio(sbi->sb, type, bio);
else
trace_f2fs_submit_write_bio(sbi->sb, type, bio);
+
+ __update_submit_iostat_ctx(bio, type);
+
submit_bio(bio);
}
@@ -971,7 +1042,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
- struct bio_post_read_ctx *ctx;
+ struct bio_post_read_ctx *ctx = NULL;
unsigned int post_read_steps = 0;
bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL,
@@ -1007,6 +1078,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
ctx->fs_blkaddr = blkaddr;
bio->bi_private = ctx;
}
+ __alloc_and_bind_iostat_ctx(sbi, bio, ctx);
return bio;
}
@@ -2231,7 +2303,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
if (bio_add_page(bio, page, blocksize, 0) < blocksize)
goto submit_and_realloc;
- ctx = bio->bi_private;
+ ctx = __get_post_read_ctx(bio);
ctx->enabled_steps |= STEP_DECOMPRESS;
refcount_inc(&dic->refcnt);
@@ -4132,6 +4204,34 @@ void f2fs_destroy_post_read_processing(void)
kmem_cache_destroy(bio_post_read_ctx_cache);
}
+#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
+int __init f2fs_init_iostat_processing(void)
+{
+ bio_iostat_ctx_cache =
+ kmem_cache_create("f2fs_bio_iostat_ctx",
+ sizeof(struct bio_iostat_ctx), 0, 0, NULL);
+ if (!bio_iostat_ctx_cache)
+ goto fail;
+ bio_iostat_ctx_pool =
+ mempool_create_slab_pool(NUM_PREALLOC_IOSTAT_CTXS,
+ bio_iostat_ctx_cache);
+ if (!bio_iostat_ctx_pool)
+ goto fail_free_cache;
+ return 0;
+
+fail_free_cache:
+ kmem_cache_destroy(bio_iostat_ctx_cache);
+fail:
+ return -ENOMEM;
+}
+
+void f2fs_destroy_iostat_processing(void)
+{
+ mempool_destroy(bio_iostat_ctx_pool);
+ kmem_cache_destroy(bio_iostat_ctx_cache);
+}
+#endif
+
int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
{
if (!f2fs_sb_has_encrypt(sbi) &&
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 10a94ed839b1..fccee18ab776 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1755,6 +1755,17 @@ struct f2fs_sb_info {
unsigned int compress_watermark; /* cache page watermark */
atomic_t compress_page_hit; /* cache hit count */
#endif
+
+#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
+ /* for io latency related statistics info in one iostat period */
+ spinlock_t iostat_lat_lock;
+ unsigned long rd_sum_lat[NR_PAGE_TYPE]; /* sum of read io latencies */
+ unsigned long rd_peak_lat[NR_PAGE_TYPE]; /* peak read io latency */
+ unsigned int rd_bio_cnt[NR_PAGE_TYPE]; /* read bio count */
+ unsigned long wr_sum_lat[2][NR_PAGE_TYPE]; /* sum of write io latencies (sync/async) */
+ unsigned long wr_peak_lat[2][NR_PAGE_TYPE]; /* peak write io latency (sync/async) */
+ unsigned int wr_bio_cnt[2][NR_PAGE_TYPE]; /* write bio count (sync/async) */
+#endif
};
struct f2fs_private_dio {
@@ -3233,6 +3244,26 @@ static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
sbi->prev_rw_iostat[i] = 0;
}
spin_unlock(&sbi->iostat_lock);
+
+#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
+ spin_lock_irq(&sbi->iostat_lat_lock);
+ for (i = 0; i < NR_PAGE_TYPE; i++) {
+ sbi->rd_sum_lat[i] = 0;
+ sbi->rd_peak_lat[i] = 0;
+ sbi->rd_bio_cnt[i] = 0;
+ }
+
+ for (i = 0; i < 2; i++) {
+ int iotype;
+
+ for (iotype = 0; iotype < NR_PAGE_TYPE; iotype++) {
+ sbi->wr_sum_lat[i][iotype] = 0;
+ sbi->wr_peak_lat[i][iotype] = 0;
+ sbi->wr_bio_cnt[i][iotype] = 0;
+ }
+ }
+ spin_unlock_irq(&sbi->iostat_lat_lock);
+#endif
}
extern void f2fs_record_iostat(struct f2fs_sb_info *sbi);
@@ -3259,6 +3290,54 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
f2fs_record_iostat(sbi);
}
+#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
+
+struct bio_post_read_ctx;
+
+struct bio_iostat_ctx {
+ struct f2fs_sb_info *sbi;
+ unsigned long submit_ts;
+ enum page_type type;
+ struct bio_post_read_ctx *post_read_ctx;
+};
+
+struct f2fs_iostat_latency {
+ unsigned int peak_lat;
+ unsigned int avg_lat;
+ unsigned int cnt;
+};
+
+static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
+ int rw, int sync)
+{
+ unsigned long ts_diff;
+ unsigned int iotype = iostat_ctx->type;
+ unsigned long flags;
+ struct f2fs_sb_info *sbi = iostat_ctx->sbi;
+
+ if (!sbi->iostat_enable)
+ return;
+
+ ts_diff = jiffies - iostat_ctx->submit_ts;
+ if (iotype >= META_FLUSH)
+ iotype = META;
+
+ spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
+ if (rw == 0) {
+ sbi->rd_sum_lat[iotype] += ts_diff;
+ sbi->rd_bio_cnt[iotype]++;
+ if (ts_diff > sbi->rd_peak_lat[iotype])
+ sbi->rd_peak_lat[iotype] = ts_diff;
+ } else {
+ sbi->wr_sum_lat[sync][iotype] += ts_diff;
+ sbi->wr_bio_cnt[sync][iotype]++;
+ if (ts_diff > sbi->wr_peak_lat[sync][iotype])
+ sbi->wr_peak_lat[sync][iotype] = ts_diff;
+ }
+ spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
+}
+#endif
+
#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1)
#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META)
@@ -3645,6 +3724,13 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
void f2fs_clear_page_cache_dirty_tag(struct page *page);
int f2fs_init_post_read_processing(void);
void f2fs_destroy_post_read_processing(void);
+#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
+int f2fs_init_iostat_processing(void);
+void f2fs_destroy_iostat_processing(void);
+#else
+static inline int f2fs_init_iostat_processing(void) { return 0; }
+static inline void f2fs_destroy_iostat_processing(void) {}
+#endif
int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 04aeb7c0df7d..9ead6d2e703b 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3905,6 +3905,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
/* init iostat info */
spin_lock_init(&sbi->iostat_lock);
+#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
+ spin_lock_init(&sbi->iostat_lat_lock);
+#endif
sbi->iostat_enable = false;
sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
@@ -4415,9 +4418,12 @@ static int __init init_f2fs_fs(void)
err = f2fs_init_post_read_processing();
if (err)
goto free_root_stats;
- err = f2fs_init_bio_entry_cache();
+ err = f2fs_init_iostat_processing();
if (err)
goto free_post_read;
+ err = f2fs_init_bio_entry_cache();
+ if (err)
+ goto free_iostat;
err = f2fs_init_bioset();
if (err)
goto free_bio_enrty_cache;
@@ -4439,6 +4445,8 @@ static int __init init_f2fs_fs(void)
f2fs_destroy_bioset();
free_bio_enrty_cache:
f2fs_destroy_bio_entry_cache();
+free_iostat:
+ f2fs_destroy_iostat_processing();
free_post_read:
f2fs_destroy_post_read_processing();
free_root_stats:
@@ -4473,6 +4481,7 @@ static void __exit exit_f2fs_fs(void)
f2fs_destroy_compress_mempool();
f2fs_destroy_bioset();
f2fs_destroy_bio_entry_cache();
+ f2fs_destroy_iostat_processing();
f2fs_destroy_post_read_processing();
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index f3a3aecbe111..4cb0561ec89d 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -1074,6 +1074,48 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
return 0;
}
+#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
+static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
+{
+ int io, sync, idx = 0;
+ unsigned int cnt;
+ struct f2fs_iostat_latency iostat_lat[3][NR_PAGE_TYPE];
+
+ spin_lock_irq(&sbi->iostat_lat_lock);
+ for (io = 0; io < NR_PAGE_TYPE; io++) {
+ cnt = sbi->rd_bio_cnt[io];
+ iostat_lat[idx][io].peak_lat =
+ jiffies_to_msecs(sbi->rd_peak_lat[io]);
+ iostat_lat[idx][io].cnt = cnt;
+ iostat_lat[idx][io].avg_lat = cnt ?
+ jiffies_to_msecs(sbi->rd_sum_lat[io]) / cnt : 0;
+ sbi->rd_sum_lat[io] = 0;
+ sbi->rd_peak_lat[io] = 0;
+ sbi->rd_bio_cnt[io] = 0;
+ }
+
+ for (sync = 0; sync < 2; sync++) {
+ idx++;
+ for (io = 0; io < NR_PAGE_TYPE; io++) {
+ cnt = sbi->wr_bio_cnt[sync][io];
+ iostat_lat[idx][io].peak_lat =
+ jiffies_to_msecs(sbi->wr_peak_lat[sync][io]);
+ iostat_lat[idx][io].cnt = cnt;
+ iostat_lat[idx][io].avg_lat = cnt ?
+ jiffies_to_msecs(sbi->wr_sum_lat[sync][io]) / cnt : 0;
+ sbi->wr_sum_lat[sync][io] = 0;
+ sbi->wr_peak_lat[sync][io] = 0;
+ sbi->wr_bio_cnt[sync][io] = 0;
+ }
+ }
+ spin_unlock_irq(&sbi->iostat_lat_lock);
+
+ trace_f2fs_iostat_latency(sbi, iostat_lat);
+}
+#else
+static inline void __record_iostat_latency(struct f2fs_sb_info *sbi) {}
+#endif
+
void f2fs_record_iostat(struct f2fs_sb_info *sbi)
{
unsigned long long iostat_diff[NR_IO_TYPE];
@@ -1099,6 +1141,8 @@ void f2fs_record_iostat(struct f2fs_sb_info *sbi)
spin_unlock(&sbi->iostat_lock);
trace_f2fs_iostat(sbi, iostat_diff);
+
+ __record_iostat_latency(sbi);
}
static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 56b113e3cd6a..b9ede1b9207d 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -1894,6 +1894,94 @@ TRACE_EVENT(f2fs_iostat,
__entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio)
);
+#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
+TRACE_EVENT(f2fs_iostat_latency,
+
+ TP_PROTO(struct f2fs_sb_info *sbi, struct f2fs_iostat_latency (*iostat_lat)[NR_PAGE_TYPE]),
+
+ TP_ARGS(sbi, iostat_lat),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, d_rd_peak)
+ __field(unsigned int, d_rd_avg)
+ __field(unsigned int, d_rd_cnt)
+ __field(unsigned int, n_rd_peak)
+ __field(unsigned int, n_rd_avg)
+ __field(unsigned int, n_rd_cnt)
+ __field(unsigned int, m_rd_peak)
+ __field(unsigned int, m_rd_avg)
+ __field(unsigned int, m_rd_cnt)
+ __field(unsigned int, d_wr_s_peak)
+ __field(unsigned int, d_wr_s_avg)
+ __field(unsigned int, d_wr_s_cnt)
+ __field(unsigned int, n_wr_s_peak)
+ __field(unsigned int, n_wr_s_avg)
+ __field(unsigned int, n_wr_s_cnt)
+ __field(unsigned int, m_wr_s_peak)
+ __field(unsigned int, m_wr_s_avg)
+ __field(unsigned int, m_wr_s_cnt)
+ __field(unsigned int, d_wr_as_peak)
+ __field(unsigned int, d_wr_as_avg)
+ __field(unsigned int, d_wr_as_cnt)
+ __field(unsigned int, n_wr_as_peak)
+ __field(unsigned int, n_wr_as_avg)
+ __field(unsigned int, n_wr_as_cnt)
+ __field(unsigned int, m_wr_as_peak)
+ __field(unsigned int, m_wr_as_avg)
+ __field(unsigned int, m_wr_as_cnt)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sbi->sb->s_dev;
+ __entry->d_rd_peak = iostat_lat[0][DATA].peak_lat;
+ __entry->d_rd_avg = iostat_lat[0][DATA].avg_lat;
+ __entry->d_rd_cnt = iostat_lat[0][DATA].cnt;
+ __entry->n_rd_peak = iostat_lat[0][NODE].peak_lat;
+ __entry->n_rd_avg = iostat_lat[0][NODE].avg_lat;
+ __entry->n_rd_cnt = iostat_lat[0][NODE].cnt;
+ __entry->m_rd_peak = iostat_lat[0][META].peak_lat;
+ __entry->m_rd_avg = iostat_lat[0][META].avg_lat;
+ __entry->m_rd_cnt = iostat_lat[0][META].cnt;
+ __entry->d_wr_s_peak = iostat_lat[1][DATA].peak_lat;
+ __entry->d_wr_s_avg = iostat_lat[1][DATA].avg_lat;
+ __entry->d_wr_s_cnt = iostat_lat[1][DATA].cnt;
+ __entry->n_wr_s_peak = iostat_lat[1][NODE].peak_lat;
+ __entry->n_wr_s_avg = iostat_lat[1][NODE].avg_lat;
+ __entry->n_wr_s_cnt = iostat_lat[1][NODE].cnt;
+ __entry->m_wr_s_peak = iostat_lat[1][META].peak_lat;
+ __entry->m_wr_s_avg = iostat_lat[1][META].avg_lat;
+ __entry->m_wr_s_cnt = iostat_lat[1][META].cnt;
+ __entry->d_wr_as_peak = iostat_lat[2][DATA].peak_lat;
+ __entry->d_wr_as_avg = iostat_lat[2][DATA].avg_lat;
+ __entry->d_wr_as_cnt = iostat_lat[2][DATA].cnt;
+ __entry->n_wr_as_peak = iostat_lat[2][NODE].peak_lat;
+ __entry->n_wr_as_avg = iostat_lat[2][NODE].avg_lat;
+ __entry->n_wr_as_cnt = iostat_lat[2][NODE].cnt;
+ __entry->m_wr_as_peak = iostat_lat[2][META].peak_lat;
+ __entry->m_wr_as_avg = iostat_lat[2][META].avg_lat;
+ __entry->m_wr_as_cnt = iostat_lat[2][META].cnt;
+ ),
+
+ TP_printk("dev = (%d,%d), "
+ "iotype [peak lat.(ms)/avg lat.(ms)/count], "
+ "rd_data [%u/%u/%u], rd_node [%u/%u/%u], rd_meta [%u/%u/%u], "
+ "wr_sync_data [%u/%u/%u], wr_sync_node [%u/%u/%u], "
+ "wr_sync_meta [%u/%u/%u], wr_async_data [%u/%u/%u], "
+ "wr_async_node [%u/%u/%u], wr_async_meta [%u/%u/%u]",
+ show_dev(__entry->dev),
+ __entry->d_rd_peak, __entry->d_rd_avg, __entry->d_rd_cnt,
+ __entry->n_rd_peak, __entry->n_rd_avg, __entry->n_rd_cnt,
+ __entry->m_rd_peak, __entry->m_rd_avg, __entry->m_rd_cnt,
+ __entry->d_wr_s_peak, __entry->d_wr_s_avg, __entry->d_wr_s_cnt,
+ __entry->n_wr_s_peak, __entry->n_wr_s_avg, __entry->n_wr_s_cnt,
+ __entry->m_wr_s_peak, __entry->m_wr_s_avg, __entry->m_wr_s_cnt,
+ __entry->d_wr_as_peak, __entry->d_wr_as_avg, __entry->d_wr_as_cnt,
+ __entry->n_wr_as_peak, __entry->n_wr_as_avg, __entry->n_wr_as_cnt,
+ __entry->m_wr_as_peak, __entry->m_wr_as_avg, __entry->m_wr_as_cnt)
+);
+#endif
+
TRACE_EVENT(f2fs_bmap,
TP_PROTO(struct inode *inode, sector_t lblock, sector_t pblock),
--
2.32.0.554.ge1b32706d8-goog
Hi Daeho,
On 2021/8/4 6:55, Daeho Jeong wrote:
> From: Daeho Jeong <[email protected]>
>
> Whenever we notice some sluggish issues on our machines, we are always
> curious about how well all types of I/O in the f2fs filesystem are
> handled. But, it's hard to get this kind of real data. First of all,
> we need to reproduce the issue while turning on the profiling tool like
> blktrace, but the issue doesn't happen again easily. Second, with the
> intervention of any tools, the overall timing of the issue will be
> slightly changed and it sometimes makes us hard to figure it out.
>
> So, I added F2FS_IOSTAT_IO_LATENCY config option to support printing out
> IO latency statistics tracepoint events which are minimal things to
> understand filesystem's I/O related behaviors. With "iostat_enable" sysfs
> node on, we can get this statistics info in a periodic way and it
> would cause the least overhead.
>
> [samples]
> f2fs_ckpt-254:1-507 [003] .... 2842.439683: f2fs_iostat_latency:
> dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count],
> rd_data [136/1/801], rd_node [136/1/1704], rd_meta [4/2/4],
> wr_sync_data [164/16/3331], wr_sync_node [152/3/648],
> wr_sync_meta [160/2/4243], wr_async_data [24/13/15],
> wr_async_node [0/0/0], wr_async_meta [0/0/0]
>
> f2fs_ckpt-254:1-507 [002] .... 2845.450514: f2fs_iostat_latency:
> dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count],
> rd_data [60/3/456], rd_node [60/3/1258], rd_meta [0/0/1],
> wr_sync_data [120/12/2285], wr_sync_node [88/5/428],
> wr_sync_meta [52/6/2990], wr_async_data [4/1/3],
> wr_async_node [0/0/0], wr_async_meta [0/0/0]
>
> Signed-off-by: Daeho Jeong <[email protected]>
>
> ---
> v2: clean up with wrappers and fix a build breakage reported by
> kernel test robot <[email protected]>
> ---
> fs/f2fs/Kconfig | 9 +++
I try to apply this patch in my local dev branch, but it failed due to
conflicting with below commit, it needs to rebase this patch to last dev
branch.
https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git/commit/?h=dev&id=6b3ba1e77d8913ee6ffb3972e889bc35550ed95c
> fs/f2fs/data.c | 108 ++++++++++++++++++++++++++++++++++--
> fs/f2fs/f2fs.h | 86 ++++++++++++++++++++++++++++
> fs/f2fs/super.c | 11 +++-
> fs/f2fs/sysfs.c | 44 +++++++++++++++
> include/trace/events/f2fs.h | 88 +++++++++++++++++++++++++++++
> 6 files changed, 341 insertions(+), 5 deletions(-)
>
> diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
> index 7669de7b49ce..28dc5463bc87 100644
> --- a/fs/f2fs/Kconfig
> +++ b/fs/f2fs/Kconfig
> @@ -135,3 +135,12 @@ config F2FS_FS_LZORLE
> default y
> help
> Support LZO-RLE compress algorithm, if unsure, say Y.
> +
> +config F2FS_IOSTAT_IO_LATENCY
> + bool "F2FS IO statistics IO latency information"
> + depends on F2FS_FS
> + default n
> + help
> + Support printing out periodic IO latency statistics tracepoint
> + events. With this, you have to turn on "iostat_enable" sysfs
> + node to print this out.
This functionality looks independent, how about introuducing iostat.h
and iostat.c (not sure, maybe trace.[hc])to include newly added structure
and functions for dispersive codes cleanup.
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 948083c88d17..a2690f995b47 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -34,6 +34,12 @@ static struct kmem_cache *bio_entry_slab;
> static mempool_t *bio_post_read_ctx_pool;
> static struct bio_set f2fs_bioset;
>
> +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> +#define NUM_PREALLOC_IOSTAT_CTXS 128
> +static struct kmem_cache *bio_iostat_ctx_cache;
> +static mempool_t *bio_iostat_ctx_pool;
> +#endif
> +
> #define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE
>
> int __init f2fs_init_bioset(void)
> @@ -267,10 +273,67 @@ static void f2fs_post_read_work(struct work_struct *work)
> f2fs_verify_and_finish_bio(ctx->bio);
> }
>
> +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> +static inline void __update_and_unbind_iostat_ctx(struct bio *bio, int rw)
> +{
> + struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> + int sync = bio->bi_opf & REQ_SYNC ? 0 : 1;
int sync = bio->bi_opf & REQ_SYNC ? 1 : 0;
> +
> + if (rw == 0)
> + bio->bi_private = iostat_ctx->post_read_ctx;
> + else
> + bio->bi_private = iostat_ctx->sbi;
> + __update_iostat_latency(iostat_ctx, rw, sync);
> + mempool_free(iostat_ctx, bio_iostat_ctx_pool);
> +}
> +
> +static inline void __alloc_and_bind_iostat_ctx(struct f2fs_sb_info *sbi,
> + struct bio *bio, struct bio_post_read_ctx *ctx)
> +{
> + struct bio_iostat_ctx *iostat_ctx;
> + /* Due to the mempool, this never fails. */
> + iostat_ctx = mempool_alloc(bio_iostat_ctx_pool, GFP_NOFS);
> + iostat_ctx->sbi = sbi;
> + iostat_ctx->submit_ts = 0;
> + iostat_ctx->type = 0;
> + iostat_ctx->post_read_ctx = ctx;
> + bio->bi_private = iostat_ctx;
> +}
> +
> +static inline void __update_submit_iostat_ctx(struct bio *bio,
> + enum page_type type)
> +{
> + struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> +
> + iostat_ctx->submit_ts = jiffies;
> + iostat_ctx->type = type;
> +}
> +
> +static inline struct bio_post_read_ctx *__get_post_read_ctx(struct bio *bio)
> +{
> + struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> +
> + return iostat_ctx->post_read_ctx;
> +}
> +#else
> +static inline void __update_and_unbind_iostat_ctx(struct bio *bio, int rw) {}
> +static inline void __alloc_and_bind_iostat_ctx(struct f2fs_sb_info *sbi,
> + struct bio *bio, struct bio_post_read_ctx *ctx) {}
> +static inline void __update_submit_iostat_ctx(struct bio *bio,
> + enum page_type type) {}
> +static inline struct bio_post_read_ctx *__get_post_read_ctx(struct bio *bio)
> +{
> + return bio->bi_private;
> +}
> +#endif
> +
> static void f2fs_read_end_io(struct bio *bio)
> {
> struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
> - struct bio_post_read_ctx *ctx = bio->bi_private;
> + struct bio_post_read_ctx *ctx;
> +
> + __update_and_unbind_iostat_ctx(bio, 0);
> + ctx = bio->bi_private;
>
> if (time_to_inject(sbi, FAULT_READ_IO)) {
> f2fs_show_injection_info(sbi, FAULT_READ_IO);
> @@ -292,10 +355,13 @@ static void f2fs_read_end_io(struct bio *bio)
>
> static void f2fs_write_end_io(struct bio *bio)
> {
> - struct f2fs_sb_info *sbi = bio->bi_private;
> + struct f2fs_sb_info *sbi;
> struct bio_vec *bvec;
> struct bvec_iter_all iter_all;
>
> + __update_and_unbind_iostat_ctx(bio, 1);
> + sbi = bio->bi_private;
> +
> if (time_to_inject(sbi, FAULT_WRITE_IO)) {
> f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
> bio->bi_status = BLK_STS_IOERR;
> @@ -399,6 +465,8 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
> bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
> fio->type, fio->temp);
> }
> + __alloc_and_bind_iostat_ctx(sbi, bio, NULL);
> +
> if (fio->io_wbc)
> wbc_init_bio(fio->io_wbc, bio);
>
> @@ -480,6 +548,9 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
> trace_f2fs_submit_read_bio(sbi->sb, type, bio);
> else
> trace_f2fs_submit_write_bio(sbi->sb, type, bio);
> +
> + __update_submit_iostat_ctx(bio, type);
> +
> submit_bio(bio);
> }
>
> @@ -971,7 +1042,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
> {
> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> struct bio *bio;
> - struct bio_post_read_ctx *ctx;
> + struct bio_post_read_ctx *ctx = NULL;
> unsigned int post_read_steps = 0;
>
> bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL,
> @@ -1007,6 +1078,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
> ctx->fs_blkaddr = blkaddr;
> bio->bi_private = ctx;
> }
> + __alloc_and_bind_iostat_ctx(sbi, bio, ctx);
>
> return bio;
> }
> @@ -2231,7 +2303,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> if (bio_add_page(bio, page, blocksize, 0) < blocksize)
> goto submit_and_realloc;
>
> - ctx = bio->bi_private;
> + ctx = __get_post_read_ctx(bio);
> ctx->enabled_steps |= STEP_DECOMPRESS;
> refcount_inc(&dic->refcnt);
>
> @@ -4132,6 +4204,34 @@ void f2fs_destroy_post_read_processing(void)
> kmem_cache_destroy(bio_post_read_ctx_cache);
> }
>
> +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> +int __init f2fs_init_iostat_processing(void)
> +{
> + bio_iostat_ctx_cache =
> + kmem_cache_create("f2fs_bio_iostat_ctx",
> + sizeof(struct bio_iostat_ctx), 0, 0, NULL);
> + if (!bio_iostat_ctx_cache)
> + goto fail;
> + bio_iostat_ctx_pool =
> + mempool_create_slab_pool(NUM_PREALLOC_IOSTAT_CTXS,
> + bio_iostat_ctx_cache);
> + if (!bio_iostat_ctx_pool)
> + goto fail_free_cache;
> + return 0;
> +
> +fail_free_cache:
> + kmem_cache_destroy(bio_iostat_ctx_cache);
> +fail:
> + return -ENOMEM;
> +}
> +
> +void f2fs_destroy_iostat_processing(void)
> +{
> + mempool_destroy(bio_iostat_ctx_pool);
> + kmem_cache_destroy(bio_iostat_ctx_cache);
> +}
> +#endif
> +
> int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
> {
> if (!f2fs_sb_has_encrypt(sbi) &&
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 10a94ed839b1..fccee18ab776 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1755,6 +1755,17 @@ struct f2fs_sb_info {
> unsigned int compress_watermark; /* cache page watermark */
> atomic_t compress_page_hit; /* cache hit count */
> #endif
> +
> +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> + /* for io latency related statistics info in one iostat period */
> + spinlock_t iostat_lat_lock;
> + unsigned long rd_sum_lat[NR_PAGE_TYPE]; /* sum of read io latencies */
> + unsigned long rd_peak_lat[NR_PAGE_TYPE]; /* peak read io latency */
> + unsigned int rd_bio_cnt[NR_PAGE_TYPE]; /* read bio count */
> + unsigned long wr_sum_lat[2][NR_PAGE_TYPE]; /* sum of write io latencies (sync/async) */
> + unsigned long wr_peak_lat[2][NR_PAGE_TYPE]; /* peak write io latency (sync/async) */
> + unsigned int wr_bio_cnt[2][NR_PAGE_TYPE]; /* write bio count (sync/async) */
How about wrapping them in a structure? and we can move this into iostat.h?
> +#endif
> };
>
> struct f2fs_private_dio {
> @@ -3233,6 +3244,26 @@ static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
> sbi->prev_rw_iostat[i] = 0;
> }
> spin_unlock(&sbi->iostat_lock);
> +
> +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> + spin_lock_irq(&sbi->iostat_lat_lock);
> + for (i = 0; i < NR_PAGE_TYPE; i++) {
> + sbi->rd_sum_lat[i] = 0;
> + sbi->rd_peak_lat[i] = 0;
> + sbi->rd_bio_cnt[i] = 0;
> + }
> +
> + for (i = 0; i < 2; i++) {
> + int iotype;
> +
> + for (iotype = 0; iotype < NR_PAGE_TYPE; iotype++) {
> + sbi->wr_sum_lat[i][iotype] = 0;
> + sbi->wr_peak_lat[i][iotype] = 0;
> + sbi->wr_bio_cnt[i][iotype] = 0;
> + }
> + }
> + spin_unlock_irq(&sbi->iostat_lat_lock);
> +#endif
> }
>
> extern void f2fs_record_iostat(struct f2fs_sb_info *sbi);
> @@ -3259,6 +3290,54 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
> f2fs_record_iostat(sbi);
> }
>
> +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> +
> +struct bio_post_read_ctx;
> +
> +struct bio_iostat_ctx {
> + struct f2fs_sb_info *sbi;
> + unsigned long submit_ts;
> + enum page_type type;
> + struct bio_post_read_ctx *post_read_ctx;
> +};
> +
> +struct f2fs_iostat_latency {
> + unsigned int peak_lat;
> + unsigned int avg_lat;
> + unsigned int cnt;
> +};
> +
> +static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
> + int rw, int sync)
> +{
> + unsigned long ts_diff;
> + unsigned int iotype = iostat_ctx->type;
> + unsigned long flags;
> + struct f2fs_sb_info *sbi = iostat_ctx->sbi;
> +
> + if (!sbi->iostat_enable)
> + return;
> +
> + ts_diff = jiffies - iostat_ctx->submit_ts;
> + if (iotype >= META_FLUSH)
> + iotype = META;
> +
> + spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
> + if (rw == 0) {
> + sbi->rd_sum_lat[iotype] += ts_diff;
> + sbi->rd_bio_cnt[iotype]++;
> + if (ts_diff > sbi->rd_peak_lat[iotype])
> + sbi->rd_peak_lat[iotype] = ts_diff;
> + } else {
> + sbi->wr_sum_lat[sync][iotype] += ts_diff;
> + sbi->wr_bio_cnt[sync][iotype]++;
> + if (ts_diff > sbi->wr_peak_lat[sync][iotype])
> + sbi->wr_peak_lat[sync][iotype] = ts_diff;
> + }
> + spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
> +}
> +#endif
> +
> #define __is_large_section(sbi) ((sbi)->segs_per_sec > 1)
>
> #define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META)
> @@ -3645,6 +3724,13 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
> void f2fs_clear_page_cache_dirty_tag(struct page *page);
> int f2fs_init_post_read_processing(void);
> void f2fs_destroy_post_read_processing(void);
> +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> +int f2fs_init_iostat_processing(void);
> +void f2fs_destroy_iostat_processing(void);
> +#else
> +static inline int f2fs_init_iostat_processing(void) { return 0; }
> +static inline void f2fs_destroy_iostat_processing(void) {}
> +#endif
> int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
> void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
>
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 04aeb7c0df7d..9ead6d2e703b 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -3905,6 +3905,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>
> /* init iostat info */
> spin_lock_init(&sbi->iostat_lock);
> +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> + spin_lock_init(&sbi->iostat_lat_lock);
> +#endif
> sbi->iostat_enable = false;
> sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
>
> @@ -4415,9 +4418,12 @@ static int __init init_f2fs_fs(void)
> err = f2fs_init_post_read_processing();
> if (err)
> goto free_root_stats;
> - err = f2fs_init_bio_entry_cache();
> + err = f2fs_init_iostat_processing();
> if (err)
> goto free_post_read;
> + err = f2fs_init_bio_entry_cache();
> + if (err)
> + goto free_iostat;
> err = f2fs_init_bioset();
> if (err)
> goto free_bio_enrty_cache;
> @@ -4439,6 +4445,8 @@ static int __init init_f2fs_fs(void)
> f2fs_destroy_bioset();
> free_bio_enrty_cache:
> f2fs_destroy_bio_entry_cache();
> +free_iostat:
> + f2fs_destroy_iostat_processing();
> free_post_read:
> f2fs_destroy_post_read_processing();
> free_root_stats:
> @@ -4473,6 +4481,7 @@ static void __exit exit_f2fs_fs(void)
> f2fs_destroy_compress_mempool();
> f2fs_destroy_bioset();
> f2fs_destroy_bio_entry_cache();
> + f2fs_destroy_iostat_processing();
> f2fs_destroy_post_read_processing();
> f2fs_destroy_root_stats();
> unregister_filesystem(&f2fs_fs_type);
> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> index f3a3aecbe111..4cb0561ec89d 100644
> --- a/fs/f2fs/sysfs.c
> +++ b/fs/f2fs/sysfs.c
> @@ -1074,6 +1074,48 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
> return 0;
> }
>
> +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> +static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
> +{
> + int io, sync, idx = 0;
> + unsigned int cnt;
> + struct f2fs_iostat_latency iostat_lat[3][NR_PAGE_TYPE];
> +
> + spin_lock_irq(&sbi->iostat_lat_lock);
> + for (io = 0; io < NR_PAGE_TYPE; io++) {
> + cnt = sbi->rd_bio_cnt[io];
> + iostat_lat[idx][io].peak_lat =
> + jiffies_to_msecs(sbi->rd_peak_lat[io]);
> + iostat_lat[idx][io].cnt = cnt;
> + iostat_lat[idx][io].avg_lat = cnt ?
> + jiffies_to_msecs(sbi->rd_sum_lat[io]) / cnt : 0;
> + sbi->rd_sum_lat[io] = 0;
> + sbi->rd_peak_lat[io] = 0;
> + sbi->rd_bio_cnt[io] = 0;
> + }
> +
> + for (sync = 0; sync < 2; sync++) {
> + idx++;
> + for (io = 0; io < NR_PAGE_TYPE; io++) {
> + cnt = sbi->wr_bio_cnt[sync][io];
> + iostat_lat[idx][io].peak_lat =
> + jiffies_to_msecs(sbi->wr_peak_lat[sync][io]);
> + iostat_lat[idx][io].cnt = cnt;
> + iostat_lat[idx][io].avg_lat = cnt ?
> + jiffies_to_msecs(sbi->wr_sum_lat[sync][io]) / cnt : 0;
> + sbi->wr_sum_lat[sync][io] = 0;
> + sbi->wr_peak_lat[sync][io] = 0;
> + sbi->wr_bio_cnt[sync][io] = 0;
> + }
> + }
> + spin_unlock_irq(&sbi->iostat_lat_lock);
What not defining read/write stat variables into the same array? so codes in
__record_iostat_latency() and f2fs_reset_iostat() could be simplified.
enum {
READ_IO,
WRITE_ASYNC_IO,
WRITE_SYNC_IO,
MAX_IO_TYPE,
};
unsigned long sum_lat[MAX_IO_TYPE][NR_PAGE_TYPE]; /* sum of io latencies */
unsigned long peak_lat[MAX_IO_TYPE][NR_PAGE_TYPE]; /* peak io latency */
unsigned int bio_cnt[MAX_IO_TYPE][NR_PAGE_TYPE]; /* bio count */
Thanks,
> +
> + trace_f2fs_iostat_latency(sbi, iostat_lat);
> +}
> +#else
> +static inline void __record_iostat_latency(struct f2fs_sb_info *sbi) {}
> +#endif
> +
> void f2fs_record_iostat(struct f2fs_sb_info *sbi)
> {
> unsigned long long iostat_diff[NR_IO_TYPE];
> @@ -1099,6 +1141,8 @@ void f2fs_record_iostat(struct f2fs_sb_info *sbi)
> spin_unlock(&sbi->iostat_lock);
>
> trace_f2fs_iostat(sbi, iostat_diff);
> +
> + __record_iostat_latency(sbi);
> }
>
> static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
> diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
> index 56b113e3cd6a..b9ede1b9207d 100644
> --- a/include/trace/events/f2fs.h
> +++ b/include/trace/events/f2fs.h
> @@ -1894,6 +1894,94 @@ TRACE_EVENT(f2fs_iostat,
> __entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio)
> );
>
> +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> +TRACE_EVENT(f2fs_iostat_latency,
> +
> + TP_PROTO(struct f2fs_sb_info *sbi, struct f2fs_iostat_latency (*iostat_lat)[NR_PAGE_TYPE]),
> +
> + TP_ARGS(sbi, iostat_lat),
> +
> + TP_STRUCT__entry(
> + __field(dev_t, dev)
> + __field(unsigned int, d_rd_peak)
> + __field(unsigned int, d_rd_avg)
> + __field(unsigned int, d_rd_cnt)
> + __field(unsigned int, n_rd_peak)
> + __field(unsigned int, n_rd_avg)
> + __field(unsigned int, n_rd_cnt)
> + __field(unsigned int, m_rd_peak)
> + __field(unsigned int, m_rd_avg)
> + __field(unsigned int, m_rd_cnt)
> + __field(unsigned int, d_wr_s_peak)
> + __field(unsigned int, d_wr_s_avg)
> + __field(unsigned int, d_wr_s_cnt)
> + __field(unsigned int, n_wr_s_peak)
> + __field(unsigned int, n_wr_s_avg)
> + __field(unsigned int, n_wr_s_cnt)
> + __field(unsigned int, m_wr_s_peak)
> + __field(unsigned int, m_wr_s_avg)
> + __field(unsigned int, m_wr_s_cnt)
> + __field(unsigned int, d_wr_as_peak)
> + __field(unsigned int, d_wr_as_avg)
> + __field(unsigned int, d_wr_as_cnt)
> + __field(unsigned int, n_wr_as_peak)
> + __field(unsigned int, n_wr_as_avg)
> + __field(unsigned int, n_wr_as_cnt)
> + __field(unsigned int, m_wr_as_peak)
> + __field(unsigned int, m_wr_as_avg)
> + __field(unsigned int, m_wr_as_cnt)
> + ),
> +
> + TP_fast_assign(
> + __entry->dev = sbi->sb->s_dev;
> + __entry->d_rd_peak = iostat_lat[0][DATA].peak_lat;
> + __entry->d_rd_avg = iostat_lat[0][DATA].avg_lat;
> + __entry->d_rd_cnt = iostat_lat[0][DATA].cnt;
> + __entry->n_rd_peak = iostat_lat[0][NODE].peak_lat;
> + __entry->n_rd_avg = iostat_lat[0][NODE].avg_lat;
> + __entry->n_rd_cnt = iostat_lat[0][NODE].cnt;
> + __entry->m_rd_peak = iostat_lat[0][META].peak_lat;
> + __entry->m_rd_avg = iostat_lat[0][META].avg_lat;
> + __entry->m_rd_cnt = iostat_lat[0][META].cnt;
> + __entry->d_wr_s_peak = iostat_lat[1][DATA].peak_lat;
> + __entry->d_wr_s_avg = iostat_lat[1][DATA].avg_lat;
> + __entry->d_wr_s_cnt = iostat_lat[1][DATA].cnt;
> + __entry->n_wr_s_peak = iostat_lat[1][NODE].peak_lat;
> + __entry->n_wr_s_avg = iostat_lat[1][NODE].avg_lat;
> + __entry->n_wr_s_cnt = iostat_lat[1][NODE].cnt;
> + __entry->m_wr_s_peak = iostat_lat[1][META].peak_lat;
> + __entry->m_wr_s_avg = iostat_lat[1][META].avg_lat;
> + __entry->m_wr_s_cnt = iostat_lat[1][META].cnt;
> + __entry->d_wr_as_peak = iostat_lat[2][DATA].peak_lat;
> + __entry->d_wr_as_avg = iostat_lat[2][DATA].avg_lat;
> + __entry->d_wr_as_cnt = iostat_lat[2][DATA].cnt;
> + __entry->n_wr_as_peak = iostat_lat[2][NODE].peak_lat;
> + __entry->n_wr_as_avg = iostat_lat[2][NODE].avg_lat;
> + __entry->n_wr_as_cnt = iostat_lat[2][NODE].cnt;
> + __entry->m_wr_as_peak = iostat_lat[2][META].peak_lat;
> + __entry->m_wr_as_avg = iostat_lat[2][META].avg_lat;
> + __entry->m_wr_as_cnt = iostat_lat[2][META].cnt;
> + ),
> +
> + TP_printk("dev = (%d,%d), "
> + "iotype [peak lat.(ms)/avg lat.(ms)/count], "
> + "rd_data [%u/%u/%u], rd_node [%u/%u/%u], rd_meta [%u/%u/%u], "
> + "wr_sync_data [%u/%u/%u], wr_sync_node [%u/%u/%u], "
> + "wr_sync_meta [%u/%u/%u], wr_async_data [%u/%u/%u], "
> + "wr_async_node [%u/%u/%u], wr_async_meta [%u/%u/%u]",
> + show_dev(__entry->dev),
> + __entry->d_rd_peak, __entry->d_rd_avg, __entry->d_rd_cnt,
> + __entry->n_rd_peak, __entry->n_rd_avg, __entry->n_rd_cnt,
> + __entry->m_rd_peak, __entry->m_rd_avg, __entry->m_rd_cnt,
> + __entry->d_wr_s_peak, __entry->d_wr_s_avg, __entry->d_wr_s_cnt,
> + __entry->n_wr_s_peak, __entry->n_wr_s_avg, __entry->n_wr_s_cnt,
> + __entry->m_wr_s_peak, __entry->m_wr_s_avg, __entry->m_wr_s_cnt,
> + __entry->d_wr_as_peak, __entry->d_wr_as_avg, __entry->d_wr_as_cnt,
> + __entry->n_wr_as_peak, __entry->n_wr_as_avg, __entry->n_wr_as_cnt,
> + __entry->m_wr_as_peak, __entry->m_wr_as_avg, __entry->m_wr_as_cnt)
> +);
> +#endif
> +
> TRACE_EVENT(f2fs_bmap,
>
> TP_PROTO(struct inode *inode, sector_t lblock, sector_t pblock),
>
On 08/11, Chao Yu wrote:
> Hi Daeho,
>
> On 2021/8/4 6:55, Daeho Jeong wrote:
> > From: Daeho Jeong <[email protected]>
> >
> > Whenever we notice some sluggish issues on our machines, we are always
> > curious about how well all types of I/O in the f2fs filesystem are
> > handled. But, it's hard to get this kind of real data. First of all,
> > we need to reproduce the issue while turning on the profiling tool like
> > blktrace, but the issue doesn't happen again easily. Second, with the
> > intervention of any tools, the overall timing of the issue will be
> > slightly changed and it sometimes makes us hard to figure it out.
> >
> > So, I added F2FS_IOSTAT_IO_LATENCY config option to support printing out
> > IO latency statistics tracepoint events which are minimal things to
> > understand filesystem's I/O related behaviors. With "iostat_enable" sysfs
> > node on, we can get this statistics info in a periodic way and it
> > would cause the least overhead.
> >
> > [samples]
> > f2fs_ckpt-254:1-507 [003] .... 2842.439683: f2fs_iostat_latency:
> > dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count],
> > rd_data [136/1/801], rd_node [136/1/1704], rd_meta [4/2/4],
> > wr_sync_data [164/16/3331], wr_sync_node [152/3/648],
> > wr_sync_meta [160/2/4243], wr_async_data [24/13/15],
> > wr_async_node [0/0/0], wr_async_meta [0/0/0]
> >
> > f2fs_ckpt-254:1-507 [002] .... 2845.450514: f2fs_iostat_latency:
> > dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count],
> > rd_data [60/3/456], rd_node [60/3/1258], rd_meta [0/0/1],
> > wr_sync_data [120/12/2285], wr_sync_node [88/5/428],
> > wr_sync_meta [52/6/2990], wr_async_data [4/1/3],
> > wr_async_node [0/0/0], wr_async_meta [0/0/0]
> >
> > Signed-off-by: Daeho Jeong <[email protected]>
> >
> > ---
> > v2: clean up with wrappers and fix a build breakage reported by
> > kernel test robot <[email protected]>
> > ---
> > fs/f2fs/Kconfig | 9 +++
>
> I try to apply this patch in my local dev branch, but it failed due to
> conflicting with below commit, it needs to rebase this patch to last dev
> branch.
I applied this in dev branch. Could you please check?
>
> https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git/commit/?h=dev&id=6b3ba1e77d8913ee6ffb3972e889bc35550ed95c
>
> > fs/f2fs/data.c | 108 ++++++++++++++++++++++++++++++++++--
> > fs/f2fs/f2fs.h | 86 ++++++++++++++++++++++++++++
> > fs/f2fs/super.c | 11 +++-
> > fs/f2fs/sysfs.c | 44 +++++++++++++++
> > include/trace/events/f2fs.h | 88 +++++++++++++++++++++++++++++
> > 6 files changed, 341 insertions(+), 5 deletions(-)
> >
> > diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
> > index 7669de7b49ce..28dc5463bc87 100644
> > --- a/fs/f2fs/Kconfig
> > +++ b/fs/f2fs/Kconfig
> > @@ -135,3 +135,12 @@ config F2FS_FS_LZORLE
> > default y
> > help
> > Support LZO-RLE compress algorithm, if unsure, say Y.
> > +
> > +config F2FS_IOSTAT_IO_LATENCY
> > + bool "F2FS IO statistics IO latency information"
> > + depends on F2FS_FS
> > + default n
> > + help
> > + Support printing out periodic IO latency statistics tracepoint
> > + events. With this, you have to turn on "iostat_enable" sysfs
> > + node to print this out.
>
> This functionality looks independent, how about introuducing iostat.h
> and iostat.c (not sure, maybe trace.[hc])to include newly added structure
> and functions for dispersive codes cleanup.
>
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 948083c88d17..a2690f995b47 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -34,6 +34,12 @@ static struct kmem_cache *bio_entry_slab;
> > static mempool_t *bio_post_read_ctx_pool;
> > static struct bio_set f2fs_bioset;
> > +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> > +#define NUM_PREALLOC_IOSTAT_CTXS 128
> > +static struct kmem_cache *bio_iostat_ctx_cache;
> > +static mempool_t *bio_iostat_ctx_pool;
> > +#endif
> > +
> > #define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE
> > int __init f2fs_init_bioset(void)
> > @@ -267,10 +273,67 @@ static void f2fs_post_read_work(struct work_struct *work)
> > f2fs_verify_and_finish_bio(ctx->bio);
> > }
> > +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> > +static inline void __update_and_unbind_iostat_ctx(struct bio *bio, int rw)
> > +{
> > + struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> > + int sync = bio->bi_opf & REQ_SYNC ? 0 : 1;
>
> int sync = bio->bi_opf & REQ_SYNC ? 1 : 0;
>
> > +
> > + if (rw == 0)
> > + bio->bi_private = iostat_ctx->post_read_ctx;
> > + else
> > + bio->bi_private = iostat_ctx->sbi;
> > + __update_iostat_latency(iostat_ctx, rw, sync);
> > + mempool_free(iostat_ctx, bio_iostat_ctx_pool);
> > +}
> > +
> > +static inline void __alloc_and_bind_iostat_ctx(struct f2fs_sb_info *sbi,
> > + struct bio *bio, struct bio_post_read_ctx *ctx)
> > +{
> > + struct bio_iostat_ctx *iostat_ctx;
> > + /* Due to the mempool, this never fails. */
> > + iostat_ctx = mempool_alloc(bio_iostat_ctx_pool, GFP_NOFS);
> > + iostat_ctx->sbi = sbi;
> > + iostat_ctx->submit_ts = 0;
> > + iostat_ctx->type = 0;
> > + iostat_ctx->post_read_ctx = ctx;
> > + bio->bi_private = iostat_ctx;
> > +}
> > +
> > +static inline void __update_submit_iostat_ctx(struct bio *bio,
> > + enum page_type type)
> > +{
> > + struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> > +
> > + iostat_ctx->submit_ts = jiffies;
> > + iostat_ctx->type = type;
> > +}
> > +
> > +static inline struct bio_post_read_ctx *__get_post_read_ctx(struct bio *bio)
> > +{
> > + struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
> > +
> > + return iostat_ctx->post_read_ctx;
> > +}
> > +#else
> > +static inline void __update_and_unbind_iostat_ctx(struct bio *bio, int rw) {}
> > +static inline void __alloc_and_bind_iostat_ctx(struct f2fs_sb_info *sbi,
> > + struct bio *bio, struct bio_post_read_ctx *ctx) {}
> > +static inline void __update_submit_iostat_ctx(struct bio *bio,
> > + enum page_type type) {}
> > +static inline struct bio_post_read_ctx *__get_post_read_ctx(struct bio *bio)
> > +{
> > + return bio->bi_private;
> > +}
> > +#endif
> > +
> > static void f2fs_read_end_io(struct bio *bio)
> > {
> > struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
> > - struct bio_post_read_ctx *ctx = bio->bi_private;
> > + struct bio_post_read_ctx *ctx;
> > +
> > + __update_and_unbind_iostat_ctx(bio, 0);
> > + ctx = bio->bi_private;
> > if (time_to_inject(sbi, FAULT_READ_IO)) {
> > f2fs_show_injection_info(sbi, FAULT_READ_IO);
> > @@ -292,10 +355,13 @@ static void f2fs_read_end_io(struct bio *bio)
> > static void f2fs_write_end_io(struct bio *bio)
> > {
> > - struct f2fs_sb_info *sbi = bio->bi_private;
> > + struct f2fs_sb_info *sbi;
> > struct bio_vec *bvec;
> > struct bvec_iter_all iter_all;
> > + __update_and_unbind_iostat_ctx(bio, 1);
> > + sbi = bio->bi_private;
> > +
> > if (time_to_inject(sbi, FAULT_WRITE_IO)) {
> > f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
> > bio->bi_status = BLK_STS_IOERR;
> > @@ -399,6 +465,8 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
> > bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
> > fio->type, fio->temp);
> > }
> > + __alloc_and_bind_iostat_ctx(sbi, bio, NULL);
> > +
> > if (fio->io_wbc)
> > wbc_init_bio(fio->io_wbc, bio);
> > @@ -480,6 +548,9 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
> > trace_f2fs_submit_read_bio(sbi->sb, type, bio);
> > else
> > trace_f2fs_submit_write_bio(sbi->sb, type, bio);
> > +
> > + __update_submit_iostat_ctx(bio, type);
> > +
> > submit_bio(bio);
> > }
> > @@ -971,7 +1042,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
> > {
> > struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > struct bio *bio;
> > - struct bio_post_read_ctx *ctx;
> > + struct bio_post_read_ctx *ctx = NULL;
> > unsigned int post_read_steps = 0;
> > bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL,
> > @@ -1007,6 +1078,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
> > ctx->fs_blkaddr = blkaddr;
> > bio->bi_private = ctx;
> > }
> > + __alloc_and_bind_iostat_ctx(sbi, bio, ctx);
> > return bio;
> > }
> > @@ -2231,7 +2303,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
> > if (bio_add_page(bio, page, blocksize, 0) < blocksize)
> > goto submit_and_realloc;
> > - ctx = bio->bi_private;
> > + ctx = __get_post_read_ctx(bio);
> > ctx->enabled_steps |= STEP_DECOMPRESS;
> > refcount_inc(&dic->refcnt);
> > @@ -4132,6 +4204,34 @@ void f2fs_destroy_post_read_processing(void)
> > kmem_cache_destroy(bio_post_read_ctx_cache);
> > }
> > +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> > +int __init f2fs_init_iostat_processing(void)
> > +{
> > + bio_iostat_ctx_cache =
> > + kmem_cache_create("f2fs_bio_iostat_ctx",
> > + sizeof(struct bio_iostat_ctx), 0, 0, NULL);
> > + if (!bio_iostat_ctx_cache)
> > + goto fail;
> > + bio_iostat_ctx_pool =
> > + mempool_create_slab_pool(NUM_PREALLOC_IOSTAT_CTXS,
> > + bio_iostat_ctx_cache);
> > + if (!bio_iostat_ctx_pool)
> > + goto fail_free_cache;
> > + return 0;
> > +
> > +fail_free_cache:
> > + kmem_cache_destroy(bio_iostat_ctx_cache);
> > +fail:
> > + return -ENOMEM;
> > +}
> > +
> > +void f2fs_destroy_iostat_processing(void)
> > +{
> > + mempool_destroy(bio_iostat_ctx_pool);
> > + kmem_cache_destroy(bio_iostat_ctx_cache);
> > +}
> > +#endif
> > +
> > int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
> > {
> > if (!f2fs_sb_has_encrypt(sbi) &&
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 10a94ed839b1..fccee18ab776 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -1755,6 +1755,17 @@ struct f2fs_sb_info {
> > unsigned int compress_watermark; /* cache page watermark */
> > atomic_t compress_page_hit; /* cache hit count */
> > #endif
> > +
> > +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> > + /* for io latency related statistics info in one iostat period */
> > + spinlock_t iostat_lat_lock;
> > + unsigned long rd_sum_lat[NR_PAGE_TYPE]; /* sum of read io latencies */
> > + unsigned long rd_peak_lat[NR_PAGE_TYPE]; /* peak read io latency */
> > + unsigned int rd_bio_cnt[NR_PAGE_TYPE]; /* read bio count */
> > + unsigned long wr_sum_lat[2][NR_PAGE_TYPE]; /* sum of write io latencies (sync/async) */
> > + unsigned long wr_peak_lat[2][NR_PAGE_TYPE]; /* peak write io latency (sync/async) */
> > + unsigned int wr_bio_cnt[2][NR_PAGE_TYPE]; /* write bio count (sync/async) */
>
> How about wrapping them in a structure? and we can move this into iostat.h?
>
> > +#endif
> > };
> > struct f2fs_private_dio {
> > @@ -3233,6 +3244,26 @@ static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
> > sbi->prev_rw_iostat[i] = 0;
> > }
> > spin_unlock(&sbi->iostat_lock);
> > +
> > +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> > + spin_lock_irq(&sbi->iostat_lat_lock);
> > + for (i = 0; i < NR_PAGE_TYPE; i++) {
> > + sbi->rd_sum_lat[i] = 0;
> > + sbi->rd_peak_lat[i] = 0;
> > + sbi->rd_bio_cnt[i] = 0;
> > + }
> > +
> > + for (i = 0; i < 2; i++) {
> > + int iotype;
> > +
> > + for (iotype = 0; iotype < NR_PAGE_TYPE; iotype++) {
> > + sbi->wr_sum_lat[i][iotype] = 0;
> > + sbi->wr_peak_lat[i][iotype] = 0;
> > + sbi->wr_bio_cnt[i][iotype] = 0;
> > + }
> > + }
> > + spin_unlock_irq(&sbi->iostat_lat_lock);
> > +#endif
> > }
> > extern void f2fs_record_iostat(struct f2fs_sb_info *sbi);
> > @@ -3259,6 +3290,54 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
> > f2fs_record_iostat(sbi);
> > }
> > +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> > +
> > +struct bio_post_read_ctx;
> > +
> > +struct bio_iostat_ctx {
> > + struct f2fs_sb_info *sbi;
> > + unsigned long submit_ts;
> > + enum page_type type;
> > + struct bio_post_read_ctx *post_read_ctx;
> > +};
> > +
> > +struct f2fs_iostat_latency {
> > + unsigned int peak_lat;
> > + unsigned int avg_lat;
> > + unsigned int cnt;
> > +};
> > +
> > +static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
> > + int rw, int sync)
> > +{
> > + unsigned long ts_diff;
> > + unsigned int iotype = iostat_ctx->type;
> > + unsigned long flags;
> > + struct f2fs_sb_info *sbi = iostat_ctx->sbi;
> > +
> > + if (!sbi->iostat_enable)
> > + return;
> > +
> > + ts_diff = jiffies - iostat_ctx->submit_ts;
> > + if (iotype >= META_FLUSH)
> > + iotype = META;
> > +
> > + spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
> > + if (rw == 0) {
> > + sbi->rd_sum_lat[iotype] += ts_diff;
> > + sbi->rd_bio_cnt[iotype]++;
> > + if (ts_diff > sbi->rd_peak_lat[iotype])
> > + sbi->rd_peak_lat[iotype] = ts_diff;
> > + } else {
> > + sbi->wr_sum_lat[sync][iotype] += ts_diff;
> > + sbi->wr_bio_cnt[sync][iotype]++;
> > + if (ts_diff > sbi->wr_peak_lat[sync][iotype])
> > + sbi->wr_peak_lat[sync][iotype] = ts_diff;
> > + }
> > + spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
> > +}
> > +#endif
> > +
> > #define __is_large_section(sbi) ((sbi)->segs_per_sec > 1)
> > #define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META)
> > @@ -3645,6 +3724,13 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
> > void f2fs_clear_page_cache_dirty_tag(struct page *page);
> > int f2fs_init_post_read_processing(void);
> > void f2fs_destroy_post_read_processing(void);
> > +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> > +int f2fs_init_iostat_processing(void);
> > +void f2fs_destroy_iostat_processing(void);
> > +#else
> > +static inline int f2fs_init_iostat_processing(void) { return 0; }
> > +static inline void f2fs_destroy_iostat_processing(void) {}
> > +#endif
> > int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
> > void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index 04aeb7c0df7d..9ead6d2e703b 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -3905,6 +3905,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> > /* init iostat info */
> > spin_lock_init(&sbi->iostat_lock);
> > +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> > + spin_lock_init(&sbi->iostat_lat_lock);
> > +#endif
> > sbi->iostat_enable = false;
> > sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
> > @@ -4415,9 +4418,12 @@ static int __init init_f2fs_fs(void)
> > err = f2fs_init_post_read_processing();
> > if (err)
> > goto free_root_stats;
> > - err = f2fs_init_bio_entry_cache();
> > + err = f2fs_init_iostat_processing();
> > if (err)
> > goto free_post_read;
> > + err = f2fs_init_bio_entry_cache();
> > + if (err)
> > + goto free_iostat;
> > err = f2fs_init_bioset();
> > if (err)
> > goto free_bio_enrty_cache;
> > @@ -4439,6 +4445,8 @@ static int __init init_f2fs_fs(void)
> > f2fs_destroy_bioset();
> > free_bio_enrty_cache:
> > f2fs_destroy_bio_entry_cache();
> > +free_iostat:
> > + f2fs_destroy_iostat_processing();
> > free_post_read:
> > f2fs_destroy_post_read_processing();
> > free_root_stats:
> > @@ -4473,6 +4481,7 @@ static void __exit exit_f2fs_fs(void)
> > f2fs_destroy_compress_mempool();
> > f2fs_destroy_bioset();
> > f2fs_destroy_bio_entry_cache();
> > + f2fs_destroy_iostat_processing();
> > f2fs_destroy_post_read_processing();
> > f2fs_destroy_root_stats();
> > unregister_filesystem(&f2fs_fs_type);
> > diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> > index f3a3aecbe111..4cb0561ec89d 100644
> > --- a/fs/f2fs/sysfs.c
> > +++ b/fs/f2fs/sysfs.c
> > @@ -1074,6 +1074,48 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
> > return 0;
> > }
> > +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> > +static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
> > +{
> > + int io, sync, idx = 0;
> > + unsigned int cnt;
> > + struct f2fs_iostat_latency iostat_lat[3][NR_PAGE_TYPE];
> > +
> > + spin_lock_irq(&sbi->iostat_lat_lock);
> > + for (io = 0; io < NR_PAGE_TYPE; io++) {
> > + cnt = sbi->rd_bio_cnt[io];
> > + iostat_lat[idx][io].peak_lat =
> > + jiffies_to_msecs(sbi->rd_peak_lat[io]);
> > + iostat_lat[idx][io].cnt = cnt;
> > + iostat_lat[idx][io].avg_lat = cnt ?
> > + jiffies_to_msecs(sbi->rd_sum_lat[io]) / cnt : 0;
> > + sbi->rd_sum_lat[io] = 0;
> > + sbi->rd_peak_lat[io] = 0;
> > + sbi->rd_bio_cnt[io] = 0;
> > + }
> > +
> > + for (sync = 0; sync < 2; sync++) {
> > + idx++;
> > + for (io = 0; io < NR_PAGE_TYPE; io++) {
> > + cnt = sbi->wr_bio_cnt[sync][io];
> > + iostat_lat[idx][io].peak_lat =
> > + jiffies_to_msecs(sbi->wr_peak_lat[sync][io]);
> > + iostat_lat[idx][io].cnt = cnt;
> > + iostat_lat[idx][io].avg_lat = cnt ?
> > + jiffies_to_msecs(sbi->wr_sum_lat[sync][io]) / cnt : 0;
> > + sbi->wr_sum_lat[sync][io] = 0;
> > + sbi->wr_peak_lat[sync][io] = 0;
> > + sbi->wr_bio_cnt[sync][io] = 0;
> > + }
> > + }
> > + spin_unlock_irq(&sbi->iostat_lat_lock);
>
> What not defining read/write stat variables into the same array? so codes in
> __record_iostat_latency() and f2fs_reset_iostat() could be simplified.
>
> enum {
> READ_IO,
> WRITE_ASYNC_IO,
> WRITE_SYNC_IO,
> MAX_IO_TYPE,
> };
>
> unsigned long sum_lat[MAX_IO_TYPE][NR_PAGE_TYPE]; /* sum of io latencies */
> unsigned long peak_lat[MAX_IO_TYPE][NR_PAGE_TYPE]; /* peak io latency */
> unsigned int bio_cnt[MAX_IO_TYPE][NR_PAGE_TYPE]; /* bio count */
>
> Thanks,
>
> > +
> > + trace_f2fs_iostat_latency(sbi, iostat_lat);
> > +}
> > +#else
> > +static inline void __record_iostat_latency(struct f2fs_sb_info *sbi) {}
> > +#endif
> > +
> > void f2fs_record_iostat(struct f2fs_sb_info *sbi)
> > {
> > unsigned long long iostat_diff[NR_IO_TYPE];
> > @@ -1099,6 +1141,8 @@ void f2fs_record_iostat(struct f2fs_sb_info *sbi)
> > spin_unlock(&sbi->iostat_lock);
> > trace_f2fs_iostat(sbi, iostat_diff);
> > +
> > + __record_iostat_latency(sbi);
> > }
> > static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
> > diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
> > index 56b113e3cd6a..b9ede1b9207d 100644
> > --- a/include/trace/events/f2fs.h
> > +++ b/include/trace/events/f2fs.h
> > @@ -1894,6 +1894,94 @@ TRACE_EVENT(f2fs_iostat,
> > __entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio)
> > );
> > +#ifdef CONFIG_F2FS_IOSTAT_IO_LATENCY
> > +TRACE_EVENT(f2fs_iostat_latency,
> > +
> > + TP_PROTO(struct f2fs_sb_info *sbi, struct f2fs_iostat_latency (*iostat_lat)[NR_PAGE_TYPE]),
> > +
> > + TP_ARGS(sbi, iostat_lat),
> > +
> > + TP_STRUCT__entry(
> > + __field(dev_t, dev)
> > + __field(unsigned int, d_rd_peak)
> > + __field(unsigned int, d_rd_avg)
> > + __field(unsigned int, d_rd_cnt)
> > + __field(unsigned int, n_rd_peak)
> > + __field(unsigned int, n_rd_avg)
> > + __field(unsigned int, n_rd_cnt)
> > + __field(unsigned int, m_rd_peak)
> > + __field(unsigned int, m_rd_avg)
> > + __field(unsigned int, m_rd_cnt)
> > + __field(unsigned int, d_wr_s_peak)
> > + __field(unsigned int, d_wr_s_avg)
> > + __field(unsigned int, d_wr_s_cnt)
> > + __field(unsigned int, n_wr_s_peak)
> > + __field(unsigned int, n_wr_s_avg)
> > + __field(unsigned int, n_wr_s_cnt)
> > + __field(unsigned int, m_wr_s_peak)
> > + __field(unsigned int, m_wr_s_avg)
> > + __field(unsigned int, m_wr_s_cnt)
> > + __field(unsigned int, d_wr_as_peak)
> > + __field(unsigned int, d_wr_as_avg)
> > + __field(unsigned int, d_wr_as_cnt)
> > + __field(unsigned int, n_wr_as_peak)
> > + __field(unsigned int, n_wr_as_avg)
> > + __field(unsigned int, n_wr_as_cnt)
> > + __field(unsigned int, m_wr_as_peak)
> > + __field(unsigned int, m_wr_as_avg)
> > + __field(unsigned int, m_wr_as_cnt)
> > + ),
> > +
> > + TP_fast_assign(
> > + __entry->dev = sbi->sb->s_dev;
> > + __entry->d_rd_peak = iostat_lat[0][DATA].peak_lat;
> > + __entry->d_rd_avg = iostat_lat[0][DATA].avg_lat;
> > + __entry->d_rd_cnt = iostat_lat[0][DATA].cnt;
> > + __entry->n_rd_peak = iostat_lat[0][NODE].peak_lat;
> > + __entry->n_rd_avg = iostat_lat[0][NODE].avg_lat;
> > + __entry->n_rd_cnt = iostat_lat[0][NODE].cnt;
> > + __entry->m_rd_peak = iostat_lat[0][META].peak_lat;
> > + __entry->m_rd_avg = iostat_lat[0][META].avg_lat;
> > + __entry->m_rd_cnt = iostat_lat[0][META].cnt;
> > + __entry->d_wr_s_peak = iostat_lat[1][DATA].peak_lat;
> > + __entry->d_wr_s_avg = iostat_lat[1][DATA].avg_lat;
> > + __entry->d_wr_s_cnt = iostat_lat[1][DATA].cnt;
> > + __entry->n_wr_s_peak = iostat_lat[1][NODE].peak_lat;
> > + __entry->n_wr_s_avg = iostat_lat[1][NODE].avg_lat;
> > + __entry->n_wr_s_cnt = iostat_lat[1][NODE].cnt;
> > + __entry->m_wr_s_peak = iostat_lat[1][META].peak_lat;
> > + __entry->m_wr_s_avg = iostat_lat[1][META].avg_lat;
> > + __entry->m_wr_s_cnt = iostat_lat[1][META].cnt;
> > + __entry->d_wr_as_peak = iostat_lat[2][DATA].peak_lat;
> > + __entry->d_wr_as_avg = iostat_lat[2][DATA].avg_lat;
> > + __entry->d_wr_as_cnt = iostat_lat[2][DATA].cnt;
> > + __entry->n_wr_as_peak = iostat_lat[2][NODE].peak_lat;
> > + __entry->n_wr_as_avg = iostat_lat[2][NODE].avg_lat;
> > + __entry->n_wr_as_cnt = iostat_lat[2][NODE].cnt;
> > + __entry->m_wr_as_peak = iostat_lat[2][META].peak_lat;
> > + __entry->m_wr_as_avg = iostat_lat[2][META].avg_lat;
> > + __entry->m_wr_as_cnt = iostat_lat[2][META].cnt;
> > + ),
> > +
> > + TP_printk("dev = (%d,%d), "
> > + "iotype [peak lat.(ms)/avg lat.(ms)/count], "
> > + "rd_data [%u/%u/%u], rd_node [%u/%u/%u], rd_meta [%u/%u/%u], "
> > + "wr_sync_data [%u/%u/%u], wr_sync_node [%u/%u/%u], "
> > + "wr_sync_meta [%u/%u/%u], wr_async_data [%u/%u/%u], "
> > + "wr_async_node [%u/%u/%u], wr_async_meta [%u/%u/%u]",
> > + show_dev(__entry->dev),
> > + __entry->d_rd_peak, __entry->d_rd_avg, __entry->d_rd_cnt,
> > + __entry->n_rd_peak, __entry->n_rd_avg, __entry->n_rd_cnt,
> > + __entry->m_rd_peak, __entry->m_rd_avg, __entry->m_rd_cnt,
> > + __entry->d_wr_s_peak, __entry->d_wr_s_avg, __entry->d_wr_s_cnt,
> > + __entry->n_wr_s_peak, __entry->n_wr_s_avg, __entry->n_wr_s_cnt,
> > + __entry->m_wr_s_peak, __entry->m_wr_s_avg, __entry->m_wr_s_cnt,
> > + __entry->d_wr_as_peak, __entry->d_wr_as_avg, __entry->d_wr_as_cnt,
> > + __entry->n_wr_as_peak, __entry->n_wr_as_avg, __entry->n_wr_as_cnt,
> > + __entry->m_wr_as_peak, __entry->m_wr_as_avg, __entry->m_wr_as_cnt)
> > +);
> > +#endif
> > +
> > TRACE_EVENT(f2fs_bmap,
> > TP_PROTO(struct inode *inode, sector_t lblock, sector_t pblock),
> >
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
On 2021/8/13 4:52, Jaegeuk Kim wrote:
> On 08/11, Chao Yu wrote:
>> Hi Daeho,
>>
>> On 2021/8/4 6:55, Daeho Jeong wrote:
>>> From: Daeho Jeong <[email protected]>
>>>
>>> Whenever we notice some sluggish issues on our machines, we are always
>>> curious about how well all types of I/O in the f2fs filesystem are
>>> handled. But, it's hard to get this kind of real data. First of all,
>>> we need to reproduce the issue while turning on the profiling tool like
>>> blktrace, but the issue doesn't happen again easily. Second, with the
>>> intervention of any tools, the overall timing of the issue will be
>>> slightly changed and it sometimes makes us hard to figure it out.
>>>
>>> So, I added F2FS_IOSTAT_IO_LATENCY config option to support printing out
>>> IO latency statistics tracepoint events which are minimal things to
>>> understand filesystem's I/O related behaviors. With "iostat_enable" sysfs
>>> node on, we can get this statistics info in a periodic way and it
>>> would cause the least overhead.
>>>
>>> [samples]
>>> f2fs_ckpt-254:1-507 [003] .... 2842.439683: f2fs_iostat_latency:
>>> dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count],
>>> rd_data [136/1/801], rd_node [136/1/1704], rd_meta [4/2/4],
>>> wr_sync_data [164/16/3331], wr_sync_node [152/3/648],
>>> wr_sync_meta [160/2/4243], wr_async_data [24/13/15],
>>> wr_async_node [0/0/0], wr_async_meta [0/0/0]
>>>
>>> f2fs_ckpt-254:1-507 [002] .... 2845.450514: f2fs_iostat_latency:
>>> dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count],
>>> rd_data [60/3/456], rd_node [60/3/1258], rd_meta [0/0/1],
>>> wr_sync_data [120/12/2285], wr_sync_node [88/5/428],
>>> wr_sync_meta [52/6/2990], wr_async_data [4/1/3],
>>> wr_async_node [0/0/0], wr_async_meta [0/0/0]
>>>
>>> Signed-off-by: Daeho Jeong <[email protected]>
>>>
>>> ---
>>> v2: clean up with wrappers and fix a build breakage reported by
>>> kernel test robot <[email protected]>
>>> ---
>>> fs/f2fs/Kconfig | 9 +++
>>
>> I try to apply this patch in my local dev branch, but it failed due to
>> conflicting with below commit, it needs to rebase this patch to last dev
>> branch.
>
> I applied this in dev branch. Could you please check?
Yeah, I see.
>>> +config F2FS_IOSTAT_IO_LATENCY
>>> + bool "F2FS IO statistics IO latency information"
>>> + depends on F2FS_FS
>>> + default n
>>> + help
>>> + Support printing out periodic IO latency statistics tracepoint
>>> + events. With this, you have to turn on "iostat_enable" sysfs
>>> + node to print this out.
>>
>> This functionality looks independent, how about introuducing iostat.h
>> and iostat.c (not sure, maybe trace.[hc])to include newly added structure
>> and functions for dispersive codes cleanup.
Thoughts? this also can avoid using CONFIG_F2FS_IOSTAT_IO_LATENCY in many places.
Thanks,
On 08/13, Chao Yu wrote:
> On 2021/8/13 4:52, Jaegeuk Kim wrote:
> > On 08/11, Chao Yu wrote:
> > > Hi Daeho,
> > >
> > > On 2021/8/4 6:55, Daeho Jeong wrote:
> > > > From: Daeho Jeong <[email protected]>
> > > >
> > > > Whenever we notice some sluggish issues on our machines, we are always
> > > > curious about how well all types of I/O in the f2fs filesystem are
> > > > handled. But, it's hard to get this kind of real data. First of all,
> > > > we need to reproduce the issue while turning on the profiling tool like
> > > > blktrace, but the issue doesn't happen again easily. Second, with the
> > > > intervention of any tools, the overall timing of the issue will be
> > > > slightly changed and it sometimes makes us hard to figure it out.
> > > >
> > > > So, I added F2FS_IOSTAT_IO_LATENCY config option to support printing out
> > > > IO latency statistics tracepoint events which are minimal things to
> > > > understand filesystem's I/O related behaviors. With "iostat_enable" sysfs
> > > > node on, we can get this statistics info in a periodic way and it
> > > > would cause the least overhead.
> > > >
> > > > [samples]
> > > > f2fs_ckpt-254:1-507 [003] .... 2842.439683: f2fs_iostat_latency:
> > > > dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count],
> > > > rd_data [136/1/801], rd_node [136/1/1704], rd_meta [4/2/4],
> > > > wr_sync_data [164/16/3331], wr_sync_node [152/3/648],
> > > > wr_sync_meta [160/2/4243], wr_async_data [24/13/15],
> > > > wr_async_node [0/0/0], wr_async_meta [0/0/0]
> > > >
> > > > f2fs_ckpt-254:1-507 [002] .... 2845.450514: f2fs_iostat_latency:
> > > > dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count],
> > > > rd_data [60/3/456], rd_node [60/3/1258], rd_meta [0/0/1],
> > > > wr_sync_data [120/12/2285], wr_sync_node [88/5/428],
> > > > wr_sync_meta [52/6/2990], wr_async_data [4/1/3],
> > > > wr_async_node [0/0/0], wr_async_meta [0/0/0]
> > > >
> > > > Signed-off-by: Daeho Jeong <[email protected]>
> > > >
> > > > ---
> > > > v2: clean up with wrappers and fix a build breakage reported by
> > > > kernel test robot <[email protected]>
> > > > ---
> > > > fs/f2fs/Kconfig | 9 +++
> > >
> > > I try to apply this patch in my local dev branch, but it failed due to
> > > conflicting with below commit, it needs to rebase this patch to last dev
> > > branch.
> >
> > I applied this in dev branch. Could you please check?
>
> Yeah, I see.
>
> > > > +config F2FS_IOSTAT_IO_LATENCY
> > > > + bool "F2FS IO statistics IO latency information"
> > > > + depends on F2FS_FS
> > > > + default n
> > > > + help
> > > > + Support printing out periodic IO latency statistics tracepoint
> > > > + events. With this, you have to turn on "iostat_enable" sysfs
> > > > + node to print this out.
> > >
> > > This functionality looks independent, how about introuducing iostat.h
> > > and iostat.c (not sure, maybe trace.[hc])to include newly added structure
> > > and functions for dispersive codes cleanup.
>
> Thoughts? this also can avoid using CONFIG_F2FS_IOSTAT_IO_LATENCY in many places.
It seems there's somewhat dependency with iostat which is done by default.
How about adding this by default as well in the existing iostat, and then
covering all together by F2FS_IOSTAT?
>
> Thanks,
On 2021/8/14 2:56, Jaegeuk Kim wrote:
> On 08/13, Chao Yu wrote:
>> On 2021/8/13 4:52, Jaegeuk Kim wrote:
>>> On 08/11, Chao Yu wrote:
>>>> Hi Daeho,
>>>>
>>>> On 2021/8/4 6:55, Daeho Jeong wrote:
>>>>> From: Daeho Jeong <[email protected]>
>>>>>
>>>>> Whenever we notice some sluggish issues on our machines, we are always
>>>>> curious about how well all types of I/O in the f2fs filesystem are
>>>>> handled. But, it's hard to get this kind of real data. First of all,
>>>>> we need to reproduce the issue while turning on the profiling tool like
>>>>> blktrace, but the issue doesn't happen again easily. Second, with the
>>>>> intervention of any tools, the overall timing of the issue will be
>>>>> slightly changed and it sometimes makes us hard to figure it out.
>>>>>
>>>>> So, I added F2FS_IOSTAT_IO_LATENCY config option to support printing out
>>>>> IO latency statistics tracepoint events which are minimal things to
>>>>> understand filesystem's I/O related behaviors. With "iostat_enable" sysfs
>>>>> node on, we can get this statistics info in a periodic way and it
>>>>> would cause the least overhead.
>>>>>
>>>>> [samples]
>>>>> f2fs_ckpt-254:1-507 [003] .... 2842.439683: f2fs_iostat_latency:
>>>>> dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count],
>>>>> rd_data [136/1/801], rd_node [136/1/1704], rd_meta [4/2/4],
>>>>> wr_sync_data [164/16/3331], wr_sync_node [152/3/648],
>>>>> wr_sync_meta [160/2/4243], wr_async_data [24/13/15],
>>>>> wr_async_node [0/0/0], wr_async_meta [0/0/0]
>>>>>
>>>>> f2fs_ckpt-254:1-507 [002] .... 2845.450514: f2fs_iostat_latency:
>>>>> dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count],
>>>>> rd_data [60/3/456], rd_node [60/3/1258], rd_meta [0/0/1],
>>>>> wr_sync_data [120/12/2285], wr_sync_node [88/5/428],
>>>>> wr_sync_meta [52/6/2990], wr_async_data [4/1/3],
>>>>> wr_async_node [0/0/0], wr_async_meta [0/0/0]
>>>>>
>>>>> Signed-off-by: Daeho Jeong <[email protected]>
>>>>>
>>>>> ---
>>>>> v2: clean up with wrappers and fix a build breakage reported by
>>>>> kernel test robot <[email protected]>
>>>>> ---
>>>>> fs/f2fs/Kconfig | 9 +++
>>>>
>>>> I try to apply this patch in my local dev branch, but it failed due to
>>>> conflicting with below commit, it needs to rebase this patch to last dev
>>>> branch.
>>>
>>> I applied this in dev branch. Could you please check?
>>
>> Yeah, I see.
>>
>>>>> +config F2FS_IOSTAT_IO_LATENCY
>>>>> + bool "F2FS IO statistics IO latency information"
>>>>> + depends on F2FS_FS
>>>>> + default n
>>>>> + help
>>>>> + Support printing out periodic IO latency statistics tracepoint
>>>>> + events. With this, you have to turn on "iostat_enable" sysfs
>>>>> + node to print this out.
>>>>
>>>> This functionality looks independent, how about introuducing iostat.h
>>>> and iostat.c (not sure, maybe trace.[hc])to include newly added structure
>>>> and functions for dispersive codes cleanup.
>>
>> Thoughts? this also can avoid using CONFIG_F2FS_IOSTAT_IO_LATENCY in many places.
>
> It seems there's somewhat dependency with iostat which is done by default.
> How about adding this by default as well in the existing iostat, and then
> covering all together by F2FS_IOSTAT?
Agreed.
Any thoughts about using separated files to maintain these independent functionality
codes? like we did in trace.[hc] previously.
Thanks,
>
>>
>> Thanks,
Actually, I was working on it, after Chao pointed it out. :)
Merging them into F2FS_IOST and separating it from other files looks better.
On Fri, Aug 13, 2021 at 7:06 PM Chao Yu <[email protected]> wrote:
>
> On 2021/8/14 2:56, Jaegeuk Kim wrote:
> > On 08/13, Chao Yu wrote:
> >> On 2021/8/13 4:52, Jaegeuk Kim wrote:
> >>> On 08/11, Chao Yu wrote:
> >>>> Hi Daeho,
> >>>>
> >>>> On 2021/8/4 6:55, Daeho Jeong wrote:
> >>>>> From: Daeho Jeong <[email protected]>
> >>>>>
> >>>>> Whenever we notice some sluggish issues on our machines, we are always
> >>>>> curious about how well all types of I/O in the f2fs filesystem are
> >>>>> handled. But, it's hard to get this kind of real data. First of all,
> >>>>> we need to reproduce the issue while turning on the profiling tool like
> >>>>> blktrace, but the issue doesn't happen again easily. Second, with the
> >>>>> intervention of any tools, the overall timing of the issue will be
> >>>>> slightly changed and it sometimes makes us hard to figure it out.
> >>>>>
> >>>>> So, I added F2FS_IOSTAT_IO_LATENCY config option to support printing out
> >>>>> IO latency statistics tracepoint events which are minimal things to
> >>>>> understand filesystem's I/O related behaviors. With "iostat_enable" sysfs
> >>>>> node on, we can get this statistics info in a periodic way and it
> >>>>> would cause the least overhead.
> >>>>>
> >>>>> [samples]
> >>>>> f2fs_ckpt-254:1-507 [003] .... 2842.439683: f2fs_iostat_latency:
> >>>>> dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count],
> >>>>> rd_data [136/1/801], rd_node [136/1/1704], rd_meta [4/2/4],
> >>>>> wr_sync_data [164/16/3331], wr_sync_node [152/3/648],
> >>>>> wr_sync_meta [160/2/4243], wr_async_data [24/13/15],
> >>>>> wr_async_node [0/0/0], wr_async_meta [0/0/0]
> >>>>>
> >>>>> f2fs_ckpt-254:1-507 [002] .... 2845.450514: f2fs_iostat_latency:
> >>>>> dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count],
> >>>>> rd_data [60/3/456], rd_node [60/3/1258], rd_meta [0/0/1],
> >>>>> wr_sync_data [120/12/2285], wr_sync_node [88/5/428],
> >>>>> wr_sync_meta [52/6/2990], wr_async_data [4/1/3],
> >>>>> wr_async_node [0/0/0], wr_async_meta [0/0/0]
> >>>>>
> >>>>> Signed-off-by: Daeho Jeong <[email protected]>
> >>>>>
> >>>>> ---
> >>>>> v2: clean up with wrappers and fix a build breakage reported by
> >>>>> kernel test robot <[email protected]>
> >>>>> ---
> >>>>> fs/f2fs/Kconfig | 9 +++
> >>>>
> >>>> I try to apply this patch in my local dev branch, but it failed due to
> >>>> conflicting with below commit, it needs to rebase this patch to last dev
> >>>> branch.
> >>>
> >>> I applied this in dev branch. Could you please check?
> >>
> >> Yeah, I see.
> >>
> >>>>> +config F2FS_IOSTAT_IO_LATENCY
> >>>>> + bool "F2FS IO statistics IO latency information"
> >>>>> + depends on F2FS_FS
> >>>>> + default n
> >>>>> + help
> >>>>> + Support printing out periodic IO latency statistics tracepoint
> >>>>> + events. With this, you have to turn on "iostat_enable" sysfs
> >>>>> + node to print this out.
> >>>>
> >>>> This functionality looks independent, how about introuducing iostat.h
> >>>> and iostat.c (not sure, maybe trace.[hc])to include newly added structure
> >>>> and functions for dispersive codes cleanup.
> >>
> >> Thoughts? this also can avoid using CONFIG_F2FS_IOSTAT_IO_LATENCY in many places.
> >
> > It seems there's somewhat dependency with iostat which is done by default.
> > How about adding this by default as well in the existing iostat, and then
> > covering all together by F2FS_IOSTAT?
>
> Agreed.
>
> Any thoughts about using separated files to maintain these independent functionality
> codes? like we did in trace.[hc] previously.
>
> Thanks,
>
> >
> >>
> >> Thanks,
On 2021/8/15 12:27, Daeho Jeong wrote:
> Actually, I was working on it, after Chao pointed it out.:)
> Merging them into F2FS_IOST and separating it from other files looks better.
Cool, :)
Thanks,