2022-12-06 07:17:03

by 李扬韬

[permalink] [raw]
Subject: [PATCH] f2fs: introduce hot_data_age_threshold and warm_data_age_threshold mount opt

This patch supports parsing these two parameters from mount opt,
so that we don't have to dynamically modify the parameters through
the sysfs node after the system starts.

Signed-off-by: Yangtao Li <[email protected]>
---
Documentation/filesystems/f2fs.rst | 6 +++++
fs/f2fs/debug.c | 3 ++-
fs/f2fs/extent_cache.c | 14 +++++++----
fs/f2fs/f2fs.h | 14 +++++++----
fs/f2fs/segment.c | 8 ++++---
fs/f2fs/super.c | 38 +++++++++++++++++++++++++++++-
fs/f2fs/sysfs.c | 16 +++++++++----
7 files changed, 81 insertions(+), 18 deletions(-)

diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index 220f3e0d3f55..12a04d7cd634 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -351,6 +351,12 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records
data block update frequency of the extent per inode, in
order to provide better temperature hints for data block
allocation.
+hot_data_age_threshold=%u When age_extent_cache is on, it controls the age
+ threshold to indicate the data blocks as hot. By default it was
+ initialized as 262144 blocks(equals to 1GB).
+warm_data_age_threshold=%u When age_extent_cache is on, it controls the age
+ threshold to indicate the data blocks as warm. By default it was
+ initialized as 2621440 blocks(equals to 10GB).
======================== ============================================================

Debugfs Entries
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 8f1ef742551f..5bf9c1ed7a2f 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -62,6 +62,7 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi)
#ifdef CONFIG_DEBUG_FS
static void update_general_status(struct f2fs_sb_info *sbi)
{
+ struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
struct f2fs_stat_info *si = F2FS_STAT(sbi);
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
int i;
@@ -89,7 +90,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->hit_total[EX_READ] += si->hit_largest;

/* block age extent_cache only */
- si->allocated_data_blocks = atomic64_read(&sbi->allocated_data_blocks);
+ si->allocated_data_blocks = atomic64_read(&fai->allocated_data_blocks);

/* validation check of the segment numbers */
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 2fc675c45606..601659714aa9 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -883,9 +883,10 @@ static unsigned long long __calculate_block_age(unsigned long long new,
static int __get_new_block_age(struct inode *inode, struct extent_info *ei)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
loff_t f_size = i_size_read(inode);
unsigned long long cur_blocks =
- atomic64_read(&sbi->allocated_data_blocks);
+ atomic64_read(&fai->allocated_data_blocks);

/*
* When I/O is not aligned to a PAGE_SIZE, update will happen to the last
@@ -1216,13 +1217,18 @@ static void __init_extent_tree_info(struct extent_tree_info *eti)

void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
{
+ struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
+
__init_extent_tree_info(&sbi->extent_tree[EX_READ]);
__init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);

/* initialize for block age extents */
- atomic64_set(&sbi->allocated_data_blocks, 0);
- sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
- sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
+ atomic64_set(&fai->allocated_data_blocks, 0);
+
+ if (!fai->hot_data_age_threshold)
+ fai->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
+ if (!fai->warm_data_age_threshold)
+ fai->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
}

int __init f2fs_create_extent_cache(void)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index eb71edcf70de..32a0bf2977bc 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -64,6 +64,12 @@ enum {
FAULT_MAX,
};

+struct f2fs_age_extent_info {
+ atomic64_t allocated_data_blocks; /* for block age extent_cache */
+ unsigned int hot_data_age_threshold; /* The threshold used for hot data seperation*/
+ unsigned int warm_data_age_threshold; /* The threshold used for warm data seperation*/
+};
+
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define F2FS_ALL_FAULT_TYPE ((1 << FAULT_MAX) - 1)

@@ -148,6 +154,7 @@ struct f2fs_mount_info {
kgid_t s_resgid; /* reserved blocks for gid */
int active_logs; /* # of active logs */
int inline_xattr_size; /* inline xattr size */
+ struct f2fs_age_extent_info age_info; /* For block age extent */
#ifdef CONFIG_F2FS_FAULT_INJECTION
struct f2fs_fault_info fault_info; /* For fault injection */
#endif
@@ -173,6 +180,8 @@ struct f2fs_mount_info {
* unusable when disabling checkpoint
*/

+ /* For block age extent_cache */
+
/* For compression */
unsigned char compress_algorithm; /* algorithm type */
unsigned char compress_log_size; /* cluster log size */
@@ -1674,11 +1683,6 @@ struct f2fs_sb_info {

/* for extent tree cache */
struct extent_tree_info extent_tree[NR_EXTENT_CACHES];
- atomic64_t allocated_data_blocks; /* for block age extent_cache */
-
- /* The threshold used for hot and warm data seperation*/
- unsigned int hot_data_age_threshold;
- unsigned int warm_data_age_threshold;

/* basic filesystem units */
unsigned int log_sectors_per_block; /* log2 sectors per block */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index dee712f7225f..c9b779fd7041 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3159,14 +3159,15 @@ static int __get_segment_type_4(struct f2fs_io_info *fio)
static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
struct extent_info ei;

if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
if (!ei.age)
return NO_CHECK_TYPE;
- if (ei.age <= sbi->hot_data_age_threshold)
+ if (ei.age <= fai->hot_data_age_threshold)
return CURSEG_HOT_DATA;
- if (ei.age <= sbi->warm_data_age_threshold)
+ if (ei.age <= fai->warm_data_age_threshold)
return CURSEG_WARM_DATA;
return CURSEG_COLD_DATA;
}
@@ -3242,6 +3243,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio)
{
+ struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned long long old_mtime;
@@ -3316,7 +3318,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));

if (IS_DATASEG(type))
- atomic64_inc(&sbi->allocated_data_blocks);
+ atomic64_inc(&fai->allocated_data_blocks);

up_write(&sit_i->sentry_lock);

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 5bdab376b852..feea2006b070 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -164,6 +164,8 @@ enum {
Opt_discard_unit,
Opt_memory_mode,
Opt_age_extent_cache,
+ Opt_hot_data_age_threshold,
+ Opt_warm_data_age_threshold,
Opt_err,
};

@@ -243,6 +245,8 @@ static match_table_t f2fs_tokens = {
{Opt_discard_unit, "discard_unit=%s"},
{Opt_memory_mode, "memory=%s"},
{Opt_age_extent_cache, "age_extent_cache"},
+ {Opt_hot_data_age_threshold, "hot_data_age_threshold=%u"},
+ {Opt_warm_data_age_threshold, "warm_data_age_threshold=%u"},
{Opt_err, NULL},
};

@@ -658,6 +662,7 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
static int parse_options(struct super_block *sb, char *options, bool is_remount)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
substring_t args[MAX_OPT_ARGS];
#ifdef CONFIG_F2FS_FS_COMPRESSION
unsigned char (*ext)[F2FS_EXTENSION_LEN];
@@ -1262,6 +1267,32 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
case Opt_age_extent_cache:
set_opt(sbi, AGE_EXTENT_CACHE);
break;
+ case Opt_hot_data_age_threshold:
+ if (!test_opt(sbi, AGE_EXTENT_CACHE)) {
+ f2fs_info(sbi, "age extent options not enabled");
+ break;
+ }
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ if (arg == 0 || arg > DEF_HOT_DATA_AGE_THRESHOLD) {
+ f2fs_err(sbi, "hot data age threshold is out of range");
+ return -EINVAL;
+ }
+ fai->hot_data_age_threshold = arg;
+ break;
+ case Opt_warm_data_age_threshold:
+ if (!test_opt(sbi, AGE_EXTENT_CACHE)) {
+ f2fs_info(sbi, "age extent options not enabled");
+ break;
+ }
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ if (arg == 0 || arg > DEF_WARM_DATA_AGE_THRESHOLD) {
+ f2fs_err(sbi, "warm data age threshold is out of range");
+ return -EINVAL;
+ }
+ fai->warm_data_age_threshold = arg;
+ break;
default:
f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
p);
@@ -1963,8 +1994,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",read_extent_cache");
else
seq_puts(seq, ",no_read_extent_cache");
- if (test_opt(sbi, AGE_EXTENT_CACHE))
+ if (test_opt(sbi, AGE_EXTENT_CACHE)) {
+ struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
+
seq_puts(seq, ",age_extent_cache");
+ seq_printf(seq, ",hot_data_age_threshold=%u", fai->hot_data_age_threshold);
+ seq_printf(seq, ",warm_data_age_threshold=%u", fai->warm_data_age_threshold);
+ }
if (test_opt(sbi, DATA_FLUSH))
seq_puts(seq, ",data_flush");

diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 2ab215110596..5b8e08aff0a6 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -36,6 +36,7 @@ enum {
FAULT_INFO_RATE, /* struct f2fs_fault_info */
FAULT_INFO_TYPE, /* struct f2fs_fault_info */
#endif
+ AGE_EXTENT_INFO, /* struct f2fs_age_extent_info */
RESERVED_BLOCKS, /* struct f2fs_sb_info */
CPRC_INFO, /* struct ckpt_req_control */
ATGC_INFO, /* struct atgc_management */
@@ -81,6 +82,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
struct_type == FAULT_INFO_TYPE)
return (unsigned char *)&F2FS_OPTION(sbi).fault_info;
#endif
+ else if (struct_type == AGE_EXTENT_INFO)
+ return (unsigned char *)&F2FS_OPTION(sbi).age_info;
#ifdef CONFIG_F2FS_STAT_FS
else if (struct_type == STAT_INFO)
return (unsigned char *)F2FS_STAT(sbi);
@@ -669,7 +672,9 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
}

if (!strcmp(a->attr.name, "hot_data_age_threshold")) {
- if (t == 0 || t >= sbi->warm_data_age_threshold)
+ struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
+
+ if (t == 0 || t >= fai->warm_data_age_threshold)
return -EINVAL;
if (t == *ui)
return count;
@@ -678,7 +683,9 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
}

if (!strcmp(a->attr.name, "warm_data_age_threshold")) {
- if (t == 0 || t <= sbi->hot_data_age_threshold)
+ struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
+
+ if (t == 0 || t <= fai->hot_data_age_threshold)
return -EINVAL;
if (t == *ui)
return count;
@@ -942,8 +949,9 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, committed_atomic_block, committed_atomic_bl
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, revoked_atomic_block, revoked_atomic_block);

/* For block age extent cache */
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, hot_data_age_threshold, hot_data_age_threshold);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, warm_data_age_threshold, warm_data_age_threshold);
+F2FS_RW_ATTR(AGE_EXTENT_INFO, f2fs_age_extent_info, hot_data_age_threshold, hot_data_age_threshold);
+F2FS_RW_ATTR(AGE_EXTENT_INFO, f2fs_age_extent_info, warm_data_age_threshold,
+ warm_data_age_threshold);

#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
--
2.25.1


2022-12-07 10:24:14

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] f2fs: introduce hot_data_age_threshold and warm_data_age_threshold mount opt

Hi Yangtao,

I love your patch! Perhaps something to improve:

[auto build test WARNING on jaegeuk-f2fs/dev-test]
[cannot apply to jaegeuk-f2fs/dev linus/master v6.1-rc8 next-20221207]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Yangtao-Li/f2fs-introduce-hot_data_age_threshold-and-warm_data_age_threshold-mount-opt/20221206-143754
base: https://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git dev-test
patch link: https://lore.kernel.org/r/20221206063616.68522-1-frank.li%40vivo.com
patch subject: [PATCH] f2fs: introduce hot_data_age_threshold and warm_data_age_threshold mount opt
reproduce:
# https://github.com/intel-lab-lkp/linux/commit/91f112832ffd9afa43d01f4acae672c7dfae45b6
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Yangtao-Li/f2fs-introduce-hot_data_age_threshold-and-warm_data_age_threshold-mount-opt/20221206-143754
git checkout 91f112832ffd9afa43d01f4acae672c7dfae45b6
make menuconfig
# enable CONFIG_COMPILE_TEST, CONFIG_WARN_MISSING_DOCUMENTS, CONFIG_WARN_ABI_ERRORS
make htmldocs

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>

All warnings (new ones prefixed by >>):

>> Documentation/filesystems/f2fs.rst:354: WARNING: Malformed table.

vim +354 Documentation/filesystems/f2fs.rst

106
107
108 ======================== ============================================================
109 background_gc=%s Turn on/off cleaning operations, namely garbage
110 collection, triggered in background when I/O subsystem is
111 idle. If background_gc=on, it will turn on the garbage
112 collection and if background_gc=off, garbage collection
113 will be turned off. If background_gc=sync, it will turn
114 on synchronous garbage collection running in background.
115 Default value for this option is on. So garbage
116 collection is on by default.
117 gc_merge When background_gc is on, this option can be enabled to
118 let background GC thread to handle foreground GC requests,
119 it can eliminate the sluggish issue caused by slow foreground
120 GC operation when GC is triggered from a process with limited
121 I/O and CPU resources.
122 nogc_merge Disable GC merge feature.
123 disable_roll_forward Disable the roll-forward recovery routine
124 norecovery Disable the roll-forward recovery routine, mounted read-
125 only (i.e., -o ro,disable_roll_forward)
126 discard/nodiscard Enable/disable real-time discard in f2fs, if discard is
127 enabled, f2fs will issue discard/TRIM commands when a
128 segment is cleaned.
129 no_heap Disable heap-style segment allocation which finds free
130 segments for data from the beginning of main area, while
131 for node from the end of main area.
132 nouser_xattr Disable Extended User Attributes. Note: xattr is enabled
133 by default if CONFIG_F2FS_FS_XATTR is selected.
134 noacl Disable POSIX Access Control List. Note: acl is enabled
135 by default if CONFIG_F2FS_FS_POSIX_ACL is selected.
136 active_logs=%u Support configuring the number of active logs. In the
137 current design, f2fs supports only 2, 4, and 6 logs.
138 Default number is 6.
139 disable_ext_identify Disable the extension list configured by mkfs, so f2fs
140 is not aware of cold files such as media files.
141 inline_xattr Enable the inline xattrs feature.
142 noinline_xattr Disable the inline xattrs feature.
143 inline_xattr_size=%u Support configuring inline xattr size, it depends on
144 flexible inline xattr feature.
145 inline_data Enable the inline data feature: Newly created small (<~3.4k)
146 files can be written into inode block.
147 inline_dentry Enable the inline dir feature: data in newly created
148 directory entries can be written into inode block. The
149 space of inode block which is used to store inline
150 dentries is limited to ~3.4k.
151 noinline_dentry Disable the inline dentry feature.
152 flush_merge Merge concurrent cache_flush commands as much as possible
153 to eliminate redundant command issues. If the underlying
154 device handles the cache_flush command relatively slowly,
155 recommend to enable this option.
156 nobarrier This option can be used if underlying storage guarantees
157 its cached data should be written to the novolatile area.
158 If this option is set, no cache_flush commands are issued
159 but f2fs still guarantees the write ordering of all the
160 data writes.
161 barrier If this option is set, cache_flush commands are allowed to be
162 issued.
163 fastboot This option is used when a system wants to reduce mount
164 time as much as possible, even though normal performance
165 can be sacrificed.
166 extent_cache Enable an extent cache based on rb-tree, it can cache
167 as many as extent which map between contiguous logical
168 address and physical address per inode, resulting in
169 increasing the cache hit ratio. Set by default.
170 noextent_cache Disable an extent cache based on rb-tree explicitly, see
171 the above extent_cache mount option.
172 noinline_data Disable the inline data feature, inline data feature is
173 enabled by default.
174 data_flush Enable data flushing before checkpoint in order to
175 persist data of regular and symlink.
176 reserve_root=%d Support configuring reserved space which is used for
177 allocation from a privileged user with specified uid or
178 gid, unit: 4KB, the default limit is 0.2% of user blocks.
179 resuid=%d The user ID which may use the reserved blocks.
180 resgid=%d The group ID which may use the reserved blocks.
181 fault_injection=%d Enable fault injection in all supported types with
182 specified injection rate.
183 fault_type=%d Support configuring fault injection type, should be
184 enabled with fault_injection option, fault type value
185 is shown below, it supports single or combined type.
186
187 =================== ===========
188 Type_Name Type_Value
189 =================== ===========
190 FAULT_KMALLOC 0x000000001
191 FAULT_KVMALLOC 0x000000002
192 FAULT_PAGE_ALLOC 0x000000004
193 FAULT_PAGE_GET 0x000000008
194 FAULT_ALLOC_BIO 0x000000010 (obsolete)
195 FAULT_ALLOC_NID 0x000000020
196 FAULT_ORPHAN 0x000000040
197 FAULT_BLOCK 0x000000080
198 FAULT_DIR_DEPTH 0x000000100
199 FAULT_EVICT_INODE 0x000000200
200 FAULT_TRUNCATE 0x000000400
201 FAULT_READ_IO 0x000000800
202 FAULT_CHECKPOINT 0x000001000
203 FAULT_DISCARD 0x000002000
204 FAULT_WRITE_IO 0x000004000
205 FAULT_SLAB_ALLOC 0x000008000
206 FAULT_DQUOT_INIT 0x000010000
207 FAULT_LOCK_OP 0x000020000
208 FAULT_BLKADDR 0x000040000
209 =================== ===========
210 mode=%s Control block allocation mode which supports "adaptive"
211 and "lfs". In "lfs" mode, there should be no random
212 writes towards main area.
213 "fragment:segment" and "fragment:block" are newly added here.
214 These are developer options for experiments to simulate filesystem
215 fragmentation/after-GC situation itself. The developers use these
216 modes to understand filesystem fragmentation/after-GC condition well,
217 and eventually get some insights to handle them better.
218 In "fragment:segment", f2fs allocates a new segment in ramdom
219 position. With this, we can simulate the after-GC condition.
220 In "fragment:block", we can scatter block allocation with
221 "max_fragment_chunk" and "max_fragment_hole" sysfs nodes.
222 We added some randomness to both chunk and hole size to make
223 it close to realistic IO pattern. So, in this mode, f2fs will allocate
224 1..<max_fragment_chunk> blocks in a chunk and make a hole in the
225 length of 1..<max_fragment_hole> by turns. With this, the newly
226 allocated blocks will be scattered throughout the whole partition.
227 Note that "fragment:block" implicitly enables "fragment:segment"
228 option for more randomness.
229 Please, use these options for your experiments and we strongly
230 recommend to re-format the filesystem after using these options.
231 io_bits=%u Set the bit size of write IO requests. It should be set
232 with "mode=lfs".
233 usrquota Enable plain user disk quota accounting.
234 grpquota Enable plain group disk quota accounting.
235 prjquota Enable plain project quota accounting.
236 usrjquota=<file> Appoint specified file and type during mount, so that quota
237 grpjquota=<file> information can be properly updated during recovery flow,
238 prjjquota=<file> <quota file>: must be in root directory;
239 jqfmt=<quota type> <quota type>: [vfsold,vfsv0,vfsv1].
240 offusrjquota Turn off user journalled quota.
241 offgrpjquota Turn off group journalled quota.
242 offprjjquota Turn off project journalled quota.
243 quota Enable plain user disk quota accounting.
244 noquota Disable all plain disk quota option.
245 alloc_mode=%s Adjust block allocation policy, which supports "reuse"
246 and "default".
247 fsync_mode=%s Control the policy of fsync. Currently supports "posix",
248 "strict", and "nobarrier". In "posix" mode, which is
249 default, fsync will follow POSIX semantics and does a
250 light operation to improve the filesystem performance.
251 In "strict" mode, fsync will be heavy and behaves in line
252 with xfs, ext4 and btrfs, where xfstest generic/342 will
253 pass, but the performance will regress. "nobarrier" is
254 based on "posix", but doesn't issue flush command for
255 non-atomic files likewise "nobarrier" mount option.
256 test_dummy_encryption
257 test_dummy_encryption=%s
258 Enable dummy encryption, which provides a fake fscrypt
259 context. The fake fscrypt context is used by xfstests.
260 The argument may be either "v1" or "v2", in order to
261 select the corresponding fscrypt policy version.
262 checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enable"
263 to reenable checkpointing. Is enabled by default. While
264 disabled, any unmounting or unexpected shutdowns will cause
265 the filesystem contents to appear as they did when the
266 filesystem was mounted with that option.
267 While mounting with checkpoint=disabled, the filesystem must
268 run garbage collection to ensure that all available space can
269 be used. If this takes too much time, the mount may return
270 EAGAIN. You may optionally add a value to indicate how much
271 of the disk you would be willing to temporarily give up to
272 avoid additional garbage collection. This can be given as a
273 number of blocks, or as a percent. For instance, mounting
274 with checkpoint=disable:100% would always succeed, but it may
275 hide up to all remaining free space. The actual space that
276 would be unusable can be viewed at /sys/fs/f2fs/<disk>/unusable
277 This space is reclaimed once checkpoint=enable.
278 checkpoint_merge When checkpoint is enabled, this can be used to create a kernel
279 daemon and make it to merge concurrent checkpoint requests as
280 much as possible to eliminate redundant checkpoint issues. Plus,
281 we can eliminate the sluggish issue caused by slow checkpoint
282 operation when the checkpoint is done in a process context in
283 a cgroup having low i/o budget and cpu shares. To make this
284 do better, we set the default i/o priority of the kernel daemon
285 to "3", to give one higher priority than other kernel threads.
286 This is the same way to give a I/O priority to the jbd2
287 journaling thread of ext4 filesystem.
288 nocheckpoint_merge Disable checkpoint merge feature.
289 compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo",
290 "lz4", "zstd" and "lzo-rle" algorithm.
291 compress_algorithm=%s:%d Control compress algorithm and its compress level, now, only
292 "lz4" and "zstd" support compress level config.
293 algorithm level range
294 lz4 3 - 16
295 zstd 1 - 22
296 compress_log_size=%u Support configuring compress cluster size. The size will
297 be 4KB * (1 << %u). The default and minimum sizes are 16KB.
298 compress_extension=%s Support adding specified extension, so that f2fs can enable
299 compression on those corresponding files, e.g. if all files
300 with '.ext' has high compression rate, we can set the '.ext'
301 on compression extension list and enable compression on
302 these file by default rather than to enable it via ioctl.
303 For other files, we can still enable compression via ioctl.
304 Note that, there is one reserved special extension '*', it
305 can be set to enable compression for all files.
306 nocompress_extension=%s Support adding specified extension, so that f2fs can disable
307 compression on those corresponding files, just contrary to compression extension.
308 If you know exactly which files cannot be compressed, you can use this.
309 The same extension name can't appear in both compress and nocompress
310 extension at the same time.
311 If the compress extension specifies all files, the types specified by the
312 nocompress extension will be treated as special cases and will not be compressed.
313 Don't allow use '*' to specifie all file in nocompress extension.
314 After add nocompress_extension, the priority should be:
315 dir_flag < comp_extention,nocompress_extension < comp_file_flag,no_comp_file_flag.
316 See more in compression sections.
317
318 compress_chksum Support verifying chksum of raw data in compressed cluster.
319 compress_mode=%s Control file compression mode. This supports "fs" and "user"
320 modes. In "fs" mode (default), f2fs does automatic compression
321 on the compression enabled files. In "user" mode, f2fs disables
322 the automaic compression and gives the user discretion of
323 choosing the target file and the timing. The user can do manual
324 compression/decompression on the compression enabled files using
325 ioctls.
326 compress_cache Support to use address space of a filesystem managed inode to
327 cache compressed block, in order to improve cache hit ratio of
328 random read.
329 inlinecrypt When possible, encrypt/decrypt the contents of encrypted
330 files using the blk-crypto framework rather than
331 filesystem-layer encryption. This allows the use of
332 inline encryption hardware. The on-disk format is
333 unaffected. For more details, see
334 Documentation/block/inline-encryption.rst.
335 atgc Enable age-threshold garbage collection, it provides high
336 effectiveness and efficiency on background GC.
337 discard_unit=%s Control discard unit, the argument can be "block", "segment"
338 and "section", issued discard command's offset/size will be
339 aligned to the unit, by default, "discard_unit=block" is set,
340 so that small discard functionality is enabled.
341 For blkzoned device, "discard_unit=section" will be set by
342 default, it is helpful for large sized SMR or ZNS devices to
343 reduce memory cost by getting rid of fs metadata supports small
344 discard.
345 memory=%s Control memory mode. This supports "normal" and "low" modes.
346 "low" mode is introduced to support low memory devices.
347 Because of the nature of low memory devices, in this mode, f2fs
348 will try to save memory sometimes by sacrificing performance.
349 "normal" mode is the default mode and same as before.
350 age_extent_cache Enable an age extent cache based on rb-tree. It records
351 data block update frequency of the extent per inode, in
352 order to provide better temperature hints for data block
353 allocation.
> 354 hot_data_age_threshold=%u When age_extent_cache is on, it controls the age
355 threshold to indicate the data blocks as hot. By default it was
356 initialized as 262144 blocks(equals to 1GB).
357 warm_data_age_threshold=%u When age_extent_cache is on, it controls the age
358 threshold to indicate the data blocks as warm. By default it was
359 initialized as 2621440 blocks(equals to 10GB).
360 ======================== ============================================================
361

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (17.56 kB)
config (39.55 kB)
Download all attachments

2023-01-04 02:41:06

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH] f2fs: introduce hot_data_age_threshold and warm_data_age_threshold mount opt

On 12/06, Yangtao Li wrote:
> This patch supports parsing these two parameters from mount opt,
> so that we don't have to dynamically modify the parameters through
> the sysfs node after the system starts.

It seems sysfs would be enough?

>
> Signed-off-by: Yangtao Li <[email protected]>
> ---
> Documentation/filesystems/f2fs.rst | 6 +++++
> fs/f2fs/debug.c | 3 ++-
> fs/f2fs/extent_cache.c | 14 +++++++----
> fs/f2fs/f2fs.h | 14 +++++++----
> fs/f2fs/segment.c | 8 ++++---
> fs/f2fs/super.c | 38 +++++++++++++++++++++++++++++-
> fs/f2fs/sysfs.c | 16 +++++++++----
> 7 files changed, 81 insertions(+), 18 deletions(-)
>
> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
> index 220f3e0d3f55..12a04d7cd634 100644
> --- a/Documentation/filesystems/f2fs.rst
> +++ b/Documentation/filesystems/f2fs.rst
> @@ -351,6 +351,12 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records
> data block update frequency of the extent per inode, in
> order to provide better temperature hints for data block
> allocation.
> +hot_data_age_threshold=%u When age_extent_cache is on, it controls the age
> + threshold to indicate the data blocks as hot. By default it was
> + initialized as 262144 blocks(equals to 1GB).
> +warm_data_age_threshold=%u When age_extent_cache is on, it controls the age
> + threshold to indicate the data blocks as warm. By default it was
> + initialized as 2621440 blocks(equals to 10GB).
> ======================== ============================================================
>
> Debugfs Entries
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index 8f1ef742551f..5bf9c1ed7a2f 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -62,6 +62,7 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi)
> #ifdef CONFIG_DEBUG_FS
> static void update_general_status(struct f2fs_sb_info *sbi)
> {
> + struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
> struct f2fs_stat_info *si = F2FS_STAT(sbi);
> struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> int i;
> @@ -89,7 +90,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
> si->hit_total[EX_READ] += si->hit_largest;
>
> /* block age extent_cache only */
> - si->allocated_data_blocks = atomic64_read(&sbi->allocated_data_blocks);
> + si->allocated_data_blocks = atomic64_read(&fai->allocated_data_blocks);
>
> /* validation check of the segment numbers */
> si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
> diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
> index 2fc675c45606..601659714aa9 100644
> --- a/fs/f2fs/extent_cache.c
> +++ b/fs/f2fs/extent_cache.c
> @@ -883,9 +883,10 @@ static unsigned long long __calculate_block_age(unsigned long long new,
> static int __get_new_block_age(struct inode *inode, struct extent_info *ei)
> {
> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> + struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
> loff_t f_size = i_size_read(inode);
> unsigned long long cur_blocks =
> - atomic64_read(&sbi->allocated_data_blocks);
> + atomic64_read(&fai->allocated_data_blocks);
>
> /*
> * When I/O is not aligned to a PAGE_SIZE, update will happen to the last
> @@ -1216,13 +1217,18 @@ static void __init_extent_tree_info(struct extent_tree_info *eti)
>
> void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
> {
> + struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
> +
> __init_extent_tree_info(&sbi->extent_tree[EX_READ]);
> __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);
>
> /* initialize for block age extents */
> - atomic64_set(&sbi->allocated_data_blocks, 0);
> - sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
> - sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
> + atomic64_set(&fai->allocated_data_blocks, 0);
> +
> + if (!fai->hot_data_age_threshold)
> + fai->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
> + if (!fai->warm_data_age_threshold)
> + fai->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
> }
>
> int __init f2fs_create_extent_cache(void)
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index eb71edcf70de..32a0bf2977bc 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -64,6 +64,12 @@ enum {
> FAULT_MAX,
> };
>
> +struct f2fs_age_extent_info {
> + atomic64_t allocated_data_blocks; /* for block age extent_cache */
> + unsigned int hot_data_age_threshold; /* The threshold used for hot data seperation*/
> + unsigned int warm_data_age_threshold; /* The threshold used for warm data seperation*/
> +};
> +
> #ifdef CONFIG_F2FS_FAULT_INJECTION
> #define F2FS_ALL_FAULT_TYPE ((1 << FAULT_MAX) - 1)
>
> @@ -148,6 +154,7 @@ struct f2fs_mount_info {
> kgid_t s_resgid; /* reserved blocks for gid */
> int active_logs; /* # of active logs */
> int inline_xattr_size; /* inline xattr size */
> + struct f2fs_age_extent_info age_info; /* For block age extent */
> #ifdef CONFIG_F2FS_FAULT_INJECTION
> struct f2fs_fault_info fault_info; /* For fault injection */
> #endif
> @@ -173,6 +180,8 @@ struct f2fs_mount_info {
> * unusable when disabling checkpoint
> */
>
> + /* For block age extent_cache */
> +
> /* For compression */
> unsigned char compress_algorithm; /* algorithm type */
> unsigned char compress_log_size; /* cluster log size */
> @@ -1674,11 +1683,6 @@ struct f2fs_sb_info {
>
> /* for extent tree cache */
> struct extent_tree_info extent_tree[NR_EXTENT_CACHES];
> - atomic64_t allocated_data_blocks; /* for block age extent_cache */
> -
> - /* The threshold used for hot and warm data seperation*/
> - unsigned int hot_data_age_threshold;
> - unsigned int warm_data_age_threshold;
>
> /* basic filesystem units */
> unsigned int log_sectors_per_block; /* log2 sectors per block */
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index dee712f7225f..c9b779fd7041 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -3159,14 +3159,15 @@ static int __get_segment_type_4(struct f2fs_io_info *fio)
> static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
> {
> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> + struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
> struct extent_info ei;
>
> if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
> if (!ei.age)
> return NO_CHECK_TYPE;
> - if (ei.age <= sbi->hot_data_age_threshold)
> + if (ei.age <= fai->hot_data_age_threshold)
> return CURSEG_HOT_DATA;
> - if (ei.age <= sbi->warm_data_age_threshold)
> + if (ei.age <= fai->warm_data_age_threshold)
> return CURSEG_WARM_DATA;
> return CURSEG_COLD_DATA;
> }
> @@ -3242,6 +3243,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> struct f2fs_summary *sum, int type,
> struct f2fs_io_info *fio)
> {
> + struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
> struct sit_info *sit_i = SIT_I(sbi);
> struct curseg_info *curseg = CURSEG_I(sbi, type);
> unsigned long long old_mtime;
> @@ -3316,7 +3318,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
>
> if (IS_DATASEG(type))
> - atomic64_inc(&sbi->allocated_data_blocks);
> + atomic64_inc(&fai->allocated_data_blocks);
>
> up_write(&sit_i->sentry_lock);
>
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 5bdab376b852..feea2006b070 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -164,6 +164,8 @@ enum {
> Opt_discard_unit,
> Opt_memory_mode,
> Opt_age_extent_cache,
> + Opt_hot_data_age_threshold,
> + Opt_warm_data_age_threshold,
> Opt_err,
> };
>
> @@ -243,6 +245,8 @@ static match_table_t f2fs_tokens = {
> {Opt_discard_unit, "discard_unit=%s"},
> {Opt_memory_mode, "memory=%s"},
> {Opt_age_extent_cache, "age_extent_cache"},
> + {Opt_hot_data_age_threshold, "hot_data_age_threshold=%u"},
> + {Opt_warm_data_age_threshold, "warm_data_age_threshold=%u"},
> {Opt_err, NULL},
> };
>
> @@ -658,6 +662,7 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
> static int parse_options(struct super_block *sb, char *options, bool is_remount)
> {
> struct f2fs_sb_info *sbi = F2FS_SB(sb);
> + struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
> substring_t args[MAX_OPT_ARGS];
> #ifdef CONFIG_F2FS_FS_COMPRESSION
> unsigned char (*ext)[F2FS_EXTENSION_LEN];
> @@ -1262,6 +1267,32 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> case Opt_age_extent_cache:
> set_opt(sbi, AGE_EXTENT_CACHE);
> break;
> + case Opt_hot_data_age_threshold:
> + if (!test_opt(sbi, AGE_EXTENT_CACHE)) {
> + f2fs_info(sbi, "age extent options not enabled");
> + break;
> + }
> + if (args->from && match_int(args, &arg))
> + return -EINVAL;
> + if (arg == 0 || arg > DEF_HOT_DATA_AGE_THRESHOLD) {
> + f2fs_err(sbi, "hot data age threshold is out of range");
> + return -EINVAL;
> + }
> + fai->hot_data_age_threshold = arg;
> + break;
> + case Opt_warm_data_age_threshold:
> + if (!test_opt(sbi, AGE_EXTENT_CACHE)) {
> + f2fs_info(sbi, "age extent options not enabled");
> + break;
> + }
> + if (args->from && match_int(args, &arg))
> + return -EINVAL;
> + if (arg == 0 || arg > DEF_WARM_DATA_AGE_THRESHOLD) {
> + f2fs_err(sbi, "warm data age threshold is out of range");
> + return -EINVAL;
> + }
> + fai->warm_data_age_threshold = arg;
> + break;
> default:
> f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
> p);
> @@ -1963,8 +1994,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
> seq_puts(seq, ",read_extent_cache");
> else
> seq_puts(seq, ",no_read_extent_cache");
> - if (test_opt(sbi, AGE_EXTENT_CACHE))
> + if (test_opt(sbi, AGE_EXTENT_CACHE)) {
> + struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
> +
> seq_puts(seq, ",age_extent_cache");
> + seq_printf(seq, ",hot_data_age_threshold=%u", fai->hot_data_age_threshold);
> + seq_printf(seq, ",warm_data_age_threshold=%u", fai->warm_data_age_threshold);
> + }
> if (test_opt(sbi, DATA_FLUSH))
> seq_puts(seq, ",data_flush");
>
> diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> index 2ab215110596..5b8e08aff0a6 100644
> --- a/fs/f2fs/sysfs.c
> +++ b/fs/f2fs/sysfs.c
> @@ -36,6 +36,7 @@ enum {
> FAULT_INFO_RATE, /* struct f2fs_fault_info */
> FAULT_INFO_TYPE, /* struct f2fs_fault_info */
> #endif
> + AGE_EXTENT_INFO, /* struct f2fs_age_extent_info */
> RESERVED_BLOCKS, /* struct f2fs_sb_info */
> CPRC_INFO, /* struct ckpt_req_control */
> ATGC_INFO, /* struct atgc_management */
> @@ -81,6 +82,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
> struct_type == FAULT_INFO_TYPE)
> return (unsigned char *)&F2FS_OPTION(sbi).fault_info;
> #endif
> + else if (struct_type == AGE_EXTENT_INFO)
> + return (unsigned char *)&F2FS_OPTION(sbi).age_info;
> #ifdef CONFIG_F2FS_STAT_FS
> else if (struct_type == STAT_INFO)
> return (unsigned char *)F2FS_STAT(sbi);
> @@ -669,7 +672,9 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
> }
>
> if (!strcmp(a->attr.name, "hot_data_age_threshold")) {
> - if (t == 0 || t >= sbi->warm_data_age_threshold)
> + struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
> +
> + if (t == 0 || t >= fai->warm_data_age_threshold)
> return -EINVAL;
> if (t == *ui)
> return count;
> @@ -678,7 +683,9 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
> }
>
> if (!strcmp(a->attr.name, "warm_data_age_threshold")) {
> - if (t == 0 || t <= sbi->hot_data_age_threshold)
> + struct f2fs_age_extent_info *fai = &F2FS_OPTION(sbi).age_info;
> +
> + if (t == 0 || t <= fai->hot_data_age_threshold)
> return -EINVAL;
> if (t == *ui)
> return count;
> @@ -942,8 +949,9 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, committed_atomic_block, committed_atomic_bl
> F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, revoked_atomic_block, revoked_atomic_block);
>
> /* For block age extent cache */
> -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, hot_data_age_threshold, hot_data_age_threshold);
> -F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, warm_data_age_threshold, warm_data_age_threshold);
> +F2FS_RW_ATTR(AGE_EXTENT_INFO, f2fs_age_extent_info, hot_data_age_threshold, hot_data_age_threshold);
> +F2FS_RW_ATTR(AGE_EXTENT_INFO, f2fs_age_extent_info, warm_data_age_threshold,
> + warm_data_age_threshold);
>
> #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
> static struct attribute *f2fs_attrs[] = {
> --
> 2.25.1