2014-02-27 11:40:21

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 1/3] f2fs: remove costly bit operations for f2fs_find_entry

It turns out that a bit operation like find_next_bit is not always fast enough
for f2fs_find_entry.
Instead, it is pretty much simple and fast to traverse each dentries.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/dir.c | 32 +++++++++++++++++---------------
1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index d5a2c9e..c3ea8f8 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -93,16 +93,20 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
f2fs_hash_t namehash, struct page **res_page)
{
struct f2fs_dir_entry *de;
- unsigned long bit_pos, end_pos, next_pos;
+ unsigned long bit_pos = 0;
struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
- int slots;
+ int max_len = 0;

- bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
- NR_DENTRY_IN_BLOCK, 0);
while (bit_pos < NR_DENTRY_IN_BLOCK) {
de = &dentry_blk->dentry[bit_pos];
- slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
-
+ if (!test_bit_le(bit_pos, &dentry_blk->dentry_bitmap)) {
+ if (bit_pos == 0)
+ max_len = 1;
+ else if (!test_bit_le(bit_pos - 1, &dentry_blk->dentry_bitmap))
+ max_len++;
+ bit_pos++;
+ continue;
+ }
if (early_match_name(name, namelen, namehash, de)) {
if (!memcmp(dentry_blk->filename[bit_pos],
name, namelen)) {
@@ -110,20 +114,18 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
goto found;
}
}
- next_pos = bit_pos + slots;
- bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
- NR_DENTRY_IN_BLOCK, next_pos);
- if (bit_pos >= NR_DENTRY_IN_BLOCK)
- end_pos = NR_DENTRY_IN_BLOCK;
- else
- end_pos = bit_pos;
- if (*max_slots < end_pos - next_pos)
- *max_slots = end_pos - next_pos;
+ if (max_len > *max_slots) {
+ *max_slots = max_len;
+ max_len = 0;
+ }
+ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
}

de = NULL;
kunmap(dentry_page);
found:
+ if (max_len > *max_slots)
+ *max_slots = max_len;
return de;
}

--
1.8.4.474.g128a96c


2014-02-27 11:40:38

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 3/3] f2fs: add an sysfs entry to control the directory level

This patch adds an sysfs entry to control dir_level used by the large directory.

The description of this entry is:

dir_level This parameter controls the directory level to
support large directory. If a directory has a
number of files, it can reduce the file lookup
latency by increasing this dir_level value.
Otherwise, it needs to decrease this value to
reduce the space overhead. The default value is 0.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
Documentation/filesystems/f2fs.txt | 7 +++++++
fs/f2fs/f2fs.h | 3 +++
fs/f2fs/super.c | 7 +++++++
3 files changed, 17 insertions(+)

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 8eb06b0..803784e 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -195,6 +195,13 @@ Files in /sys/fs/f2fs/<devname>
cleaning operations. The default value is 4096
which covers 8GB block address range.

+ dir_level This parameter controls the directory level to
+ support large directory. If a directory has a
+ number of files, it can reduce the file lookup
+ latency by increasing this dir_level value.
+ Otherwise, it needs to decrease this value to
+ reduce the space overhead. The default value is 0.
+
================================================================================
USAGE
================================================================================
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index a826916..6f88191 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -196,6 +196,8 @@ struct extent_info {
#define FADVISE_COLD_BIT 0x01
#define FADVISE_LOST_PINO_BIT 0x02

+#define DEF_DIR_LEVEL 0
+
struct f2fs_inode_info {
struct inode vfs_inode; /* serve a vfs inode */
unsigned long i_flags; /* keep an inode flags for ioctl */
@@ -447,6 +449,7 @@ struct f2fs_sb_info {
unsigned int total_valid_node_count; /* valid node block count */
unsigned int total_valid_inode_count; /* valid inode count */
int active_logs; /* # of active logs */
+ int dir_level; /* directory level */

block_t user_block_count; /* # of user blocks */
block_t total_valid_block_count; /* # of valid blocks */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 475560e..1bd9153 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -184,6 +184,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);

#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
@@ -196,6 +197,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(max_victim_search),
+ ATTR_LIST(dir_level),
NULL,
};

@@ -359,6 +361,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
if (test_opt(F2FS_SB(sb), INLINE_XATTR))
set_inode_flag(fi, FI_INLINE_XATTR);

+ /* Will be used by directory only */
+ fi->i_dir_level = F2FS_SB(sb)->dir_level;
+
return &fi->vfs_inode;
}

@@ -785,6 +790,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)

for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(&sbi->nr_pages[i], 0);
+
+ sbi->dir_level = DEF_DIR_LEVEL;
}

/*
--
1.8.4.474.g128a96c

2014-02-27 11:42:03

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 2/3] f2fs: introduce large directory support

This patch introduces an i_dir_level field to support large directory.

Previously, f2fs maintains multi-level hash tables to find a dentry quickly
from a bunch of chiild dentries in a directory, and the hash tables consist of
the following tree structure as below.

In Documentation/filesystems/f2fs.txt,

----------------------
A : bucket
B : block
N : MAX_DIR_HASH_DEPTH
----------------------

level #0 | A(2B)
|
level #1 | A(2B) - A(2B)
|
level #2 | A(2B) - A(2B) - A(2B) - A(2B)
. | . . . .
level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B)
. | . . . .
level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B)

But, if we can guess that a directory will handle a number of child files,
we don't need to traverse the tree from level #0 to #N all the time.
Since the lower level tables contain relatively small number of dentries,
the miss ratio of the target dentry is likely to be high.

In order to avoid that, we can configure the hash tables sparsely from level #0
like this.

level #0 | A(2B) - A(2B) - A(2B) - A(2B)

level #1 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B)
. | . . . .
level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B)
. | . . . .
level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B)

With this structure, we can skip the ineffective tree searches in lower level
hash tables.

This patch adds just a facility for this by introducing i_dir_level in
f2fs_inode.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
Documentation/filesystems/f2fs.txt | 6 ++++--
fs/f2fs/dir.c | 21 ++++++++++++---------
fs/f2fs/f2fs.h | 1 +
fs/f2fs/inode.c | 2 ++
include/linux/f2fs_fs.h | 2 +-
5 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index b8d2849..8eb06b0 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -444,9 +444,11 @@ The number of blocks and buckets are determined by,
# of blocks in level #n = |
`- 4, Otherwise

- ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2,
+ ,- 2^ (n + dir_level),
+ | if n < MAX_DIR_HASH_DEPTH / 2,
# of buckets in level #n = |
- `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise
+ `- 2^((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1),
+ Otherwise

When F2FS finds a file name in a directory, at first a hash value of the file
name is calculated. Then, F2FS scans the hash table in level #0 to find the
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index c3ea8f8..582fa00 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -21,12 +21,12 @@ static unsigned long dir_blocks(struct inode *inode)
>> PAGE_CACHE_SHIFT;
}

-static unsigned int dir_buckets(unsigned int level)
+static unsigned int dir_buckets(unsigned int level, int dir_level)
{
if (level < MAX_DIR_HASH_DEPTH / 2)
- return 1 << level;
+ return 1 << (level + dir_level);
else
- return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1);
+ return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1);
}

static unsigned int bucket_blocks(unsigned int level)
@@ -65,13 +65,14 @@ static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
}

-static unsigned long dir_block_index(unsigned int level, unsigned int idx)
+static unsigned long dir_block_index(unsigned int level,
+ int dir_level, unsigned int idx)
{
unsigned long i;
unsigned long bidx = 0;

for (i = 0; i < level; i++)
- bidx += dir_buckets(i) * bucket_blocks(i);
+ bidx += dir_buckets(i, dir_level) * bucket_blocks(i);
bidx += idx * bucket_blocks(level);
return bidx;
}
@@ -143,10 +144,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,

f2fs_bug_on(level > MAX_DIR_HASH_DEPTH);

- nbucket = dir_buckets(level);
+ nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);

- bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket);
+ bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
+ le32_to_cpu(namehash) % nbucket);
end_block = bidx + nblock;

for (; bidx < end_block; bidx++) {
@@ -467,10 +469,11 @@ start:
if (level == current_depth)
++current_depth;

- nbucket = dir_buckets(level);
+ nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);

- bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket));
+ bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
+ (le32_to_cpu(dentry_hash) % nbucket));

for (block = bidx; block <= (bidx + nblock - 1); block++) {
dentry_page = get_new_data_page(dir, NULL, block, true);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4beedcc..a826916 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -200,6 +200,7 @@ struct f2fs_inode_info {
struct inode vfs_inode; /* serve a vfs inode */
unsigned long i_flags; /* keep an inode flags for ioctl */
unsigned char i_advise; /* use to give file attribute hints */
+ unsigned char i_dir_level; /* use for dentry level for large dir */
unsigned int i_current_depth; /* use only in directory structure */
unsigned int i_pino; /* parent inode number */
umode_t i_acl_mode; /* keep file acl mode temporarily */
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 08d69c9..d518e37 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -107,6 +107,7 @@ static int do_read_inode(struct inode *inode)
fi->flags = 0;
fi->i_advise = ri->i_advise;
fi->i_pino = le32_to_cpu(ri->i_pino);
+ fi->i_dir_level = ri->i_dir_level;

get_extent_info(&fi->ext, ri->i_ext);
get_inline_info(fi, ri);
@@ -204,6 +205,7 @@ void update_inode(struct inode *inode, struct page *node_page)
ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
ri->i_generation = cpu_to_le32(inode->i_generation);
+ ri->i_dir_level = F2FS_I(inode)->i_dir_level;

__set_inode_rdev(inode, ri);
set_cold_node(inode, node_page);
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index da74d87..df53e17 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -183,7 +183,7 @@ struct f2fs_inode {
__le32 i_pino; /* parent inode number */
__le32 i_namelen; /* file name length */
__u8 i_name[F2FS_NAME_LEN]; /* file name for SPOR */
- __u8 i_reserved2; /* for backward compatibility */
+ __u8 i_dir_level; /* dentry_level for large dir */

struct f2fs_extent i_ext; /* caching a largest extent */

--
1.8.4.474.g128a96c