2014-07-25 22:47:20

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 01/11] f2fs: add nobarrier mount option

This patch adds a mount option, nobarrier, in f2fs.
The assumption in here is that file system keeps the IO ordering, but
doesn't care about cache flushes inside the storages.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/data.c | 5 ++++-
fs/f2fs/f2fs.h | 1 +
fs/f2fs/segment.c | 3 +++
fs/f2fs/super.c | 7 +++++++
4 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index c77c667..482313d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -139,7 +139,10 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
/* change META to META_FLUSH in the checkpoint procedure */
if (type >= META_FLUSH) {
io->fio.type = META_FLUSH;
- io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
+ if (test_opt(sbi, NOBARRIER))
+ io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
+ else
+ io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
}
__submit_merged_bio(io);
up_write(&io->io_rwsem);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8f507d4..e999eec 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -41,6 +41,7 @@
#define F2FS_MOUNT_INLINE_XATTR 0x00000080
#define F2FS_MOUNT_INLINE_DATA 0x00000100
#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
+#define F2FS_MOUNT_NOBARRIER 0x00000400

#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 8a6e57d..9fce0f47 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -239,6 +239,9 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
struct flush_cmd cmd;

+ if (test_opt(sbi, NOBARRIER))
+ return 0;
+
if (!test_opt(sbi, FLUSH_MERGE))
return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 34649aa..eec89a2 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -52,6 +52,7 @@ enum {
Opt_inline_xattr,
Opt_inline_data,
Opt_flush_merge,
+ Opt_nobarrier,
Opt_err,
};

@@ -69,6 +70,7 @@ static match_table_t f2fs_tokens = {
{Opt_inline_xattr, "inline_xattr"},
{Opt_inline_data, "inline_data"},
{Opt_flush_merge, "flush_merge"},
+ {Opt_nobarrier, "nobarrier"},
{Opt_err, NULL},
};

@@ -339,6 +341,9 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_flush_merge:
set_opt(sbi, FLUSH_MERGE);
break;
+ case Opt_nobarrier:
+ set_opt(sbi, NOBARRIER);
+ break;
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
@@ -544,6 +549,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",inline_data");
if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
seq_puts(seq, ",flush_merge");
+ if (!f2fs_readonly(sbi->sb) && test_opt(sbi, NOBARRIER))
+ seq_puts(seq, ",nobarrier");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);

return 0;
--
1.8.5.2 (Apple Git-48)


2014-07-25 22:47:29

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 02/11] f2fs: punch the core function for inode management

This patch punches out the core functions to manage the inode numbers.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/checkpoint.c | 81 ++++++++++++++++++++++++++++------------------------
1 file changed, 44 insertions(+), 37 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 0b4710c..3e3c2c3 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -282,6 +282,47 @@ const struct address_space_operations f2fs_meta_aops = {
.set_page_dirty = f2fs_set_meta_page_dirty,
};

+static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ struct list_head *head;
+ struct ino_entry *new, *e;
+
+ new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
+ new->ino = ino;
+
+ spin_lock(&sbi->orphan_inode_lock);
+ list_for_each_entry(e, &sbi->orphan_inode_list, list) {
+ if (e->ino == ino) {
+ spin_unlock(&sbi->orphan_inode_lock);
+ kmem_cache_free(orphan_entry_slab, new);
+ return;
+ }
+ if (e->ino > ino)
+ break;
+ }
+
+ /* add new entry into list which is sorted by inode number */
+ list_add_tail(&new->list, &e->list);
+ spin_unlock(&sbi->orphan_inode_lock);
+}
+
+static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ struct ino_entry *e;
+
+ spin_lock(&sbi->orphan_inode_lock);
+ list_for_each_entry(e, &sbi->orphan_inode_list, list) {
+ if (e->ino == ino) {
+ list_del(&e->list);
+ sbi->n_orphans--;
+ spin_unlock(&sbi->orphan_inode_lock);
+ kmem_cache_free(orphan_entry_slab, e);
+ return;
+ }
+ }
+ spin_unlock(&sbi->orphan_inode_lock);
+}
+
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
int err = 0;
@@ -306,48 +347,14 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)

void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
- struct list_head *head;
- struct orphan_inode_entry *new, *orphan;
-
- new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
- new->ino = ino;
-
- spin_lock(&sbi->orphan_inode_lock);
- head = &sbi->orphan_inode_list;
- list_for_each_entry(orphan, head, list) {
- if (orphan->ino == ino) {
- spin_unlock(&sbi->orphan_inode_lock);
- kmem_cache_free(orphan_entry_slab, new);
- return;
- }
-
- if (orphan->ino > ino)
- break;
- }
-
/* add new orphan entry into list which is sorted by inode number */
- list_add_tail(&new->list, &orphan->list);
- spin_unlock(&sbi->orphan_inode_lock);
+ __add_ino_entry(sbi, ino);
}

void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
- struct list_head *head;
- struct orphan_inode_entry *orphan;
-
- spin_lock(&sbi->orphan_inode_lock);
- head = &sbi->orphan_inode_list;
- list_for_each_entry(orphan, head, list) {
- if (orphan->ino == ino) {
- list_del(&orphan->list);
- f2fs_bug_on(sbi->n_orphans == 0);
- sbi->n_orphans--;
- spin_unlock(&sbi->orphan_inode_lock);
- kmem_cache_free(orphan_entry_slab, orphan);
- return;
- }
- }
- spin_unlock(&sbi->orphan_inode_lock);
+ /* remove orphan entry from orphan list */
+ __remove_ino_entry(sbi, ino);
}

static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
--
1.8.5.2 (Apple Git-48)

2014-07-25 22:48:24

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 03/11] f2fs: add infra for ino management

This patch changes the naming of orphan-related data structures to use as
inode numbers managed globally.
Later, we can use this facility for managing any inode number lists.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/checkpoint.c | 72 +++++++++++++++++++++++++++-------------------------
fs/f2fs/debug.c | 2 +-
fs/f2fs/f2fs.h | 19 +++++++++-----
fs/f2fs/super.c | 2 +-
4 files changed, 53 insertions(+), 42 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 3e3c2c3..f93d154 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -22,7 +22,7 @@
#include "segment.h"
#include <trace/events/f2fs.h>

-static struct kmem_cache *orphan_entry_slab;
+static struct kmem_cache *ino_entry_slab;
static struct kmem_cache *inode_entry_slab;

/*
@@ -282,19 +282,18 @@ const struct address_space_operations f2fs_meta_aops = {
.set_page_dirty = f2fs_set_meta_page_dirty,
};

-static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino)
+static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
- struct list_head *head;
struct ino_entry *new, *e;

- new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
+ new = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
new->ino = ino;

- spin_lock(&sbi->orphan_inode_lock);
- list_for_each_entry(e, &sbi->orphan_inode_list, list) {
+ spin_lock(&sbi->ino_lock[type]);
+ list_for_each_entry(e, &sbi->ino_list[type], list) {
if (e->ino == ino) {
- spin_unlock(&sbi->orphan_inode_lock);
- kmem_cache_free(orphan_entry_slab, new);
+ spin_unlock(&sbi->ino_lock[type]);
+ kmem_cache_free(ino_entry_slab, new);
return;
}
if (e->ino > ino)
@@ -303,58 +302,58 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino)

/* add new entry into list which is sorted by inode number */
list_add_tail(&new->list, &e->list);
- spin_unlock(&sbi->orphan_inode_lock);
+ spin_unlock(&sbi->ino_lock[type]);
}

-static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino)
+static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
struct ino_entry *e;

- spin_lock(&sbi->orphan_inode_lock);
- list_for_each_entry(e, &sbi->orphan_inode_list, list) {
+ spin_lock(&sbi->ino_lock[type]);
+ list_for_each_entry(e, &sbi->ino_list[type], list) {
if (e->ino == ino) {
list_del(&e->list);
sbi->n_orphans--;
- spin_unlock(&sbi->orphan_inode_lock);
- kmem_cache_free(orphan_entry_slab, e);
+ spin_unlock(&sbi->ino_lock[type]);
+ kmem_cache_free(ino_entry_slab, e);
return;
}
}
- spin_unlock(&sbi->orphan_inode_lock);
+ spin_unlock(&sbi->ino_lock[type]);
}

int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
int err = 0;

- spin_lock(&sbi->orphan_inode_lock);
+ spin_lock(&sbi->ino_lock[ORPHAN_INO]);
if (unlikely(sbi->n_orphans >= sbi->max_orphans))
err = -ENOSPC;
else
sbi->n_orphans++;
- spin_unlock(&sbi->orphan_inode_lock);
+ spin_unlock(&sbi->ino_lock[ORPHAN_INO]);

return err;
}

void release_orphan_inode(struct f2fs_sb_info *sbi)
{
- spin_lock(&sbi->orphan_inode_lock);
+ spin_lock(&sbi->ino_lock[ORPHAN_INO]);
f2fs_bug_on(sbi->n_orphans == 0);
sbi->n_orphans--;
- spin_unlock(&sbi->orphan_inode_lock);
+ spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
}

void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
/* add new orphan entry into list which is sorted by inode number */
- __add_ino_entry(sbi, ino);
+ __add_ino_entry(sbi, ino, ORPHAN_INO);
}

void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
/* remove orphan entry from orphan list */
- __remove_ino_entry(sbi, ino);
+ __remove_ino_entry(sbi, ino, ORPHAN_INO);
}

static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -408,14 +407,14 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
(F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
struct page *page = NULL;
- struct orphan_inode_entry *orphan = NULL;
+ struct ino_entry *orphan = NULL;

for (index = 0; index < orphan_blocks; index++)
grab_meta_page(sbi, start_blk + index);

index = 1;
- spin_lock(&sbi->orphan_inode_lock);
- head = &sbi->orphan_inode_list;
+ spin_lock(&sbi->ino_lock[ORPHAN_INO]);
+ head = &sbi->ino_list[ORPHAN_INO];

/* loop for each orphan inode entry and write them in Jornal block */
list_for_each_entry(orphan, head, list) {
@@ -455,7 +454,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
f2fs_put_page(page, 1);
}

- spin_unlock(&sbi->orphan_inode_lock);
+ spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
}

static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -939,31 +938,36 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
}

-void init_orphan_info(struct f2fs_sb_info *sbi)
+void init_ino_entry_info(struct f2fs_sb_info *sbi)
{
- spin_lock_init(&sbi->orphan_inode_lock);
- INIT_LIST_HEAD(&sbi->orphan_inode_list);
- sbi->n_orphans = 0;
+ int i;
+
+ for (i = 0; i < MAX_INO_ENTRY; i++) {
+ spin_lock_init(&sbi->ino_lock[i]);
+ INIT_LIST_HEAD(&sbi->ino_list[i]);
+ }
+
/*
* considering 512 blocks in a segment 8 blocks are needed for cp
* and log segment summaries. Remaining blocks are used to keep
* orphan entries with the limitation one reserved segment
* for cp pack we can have max 1020*504 orphan entries
*/
+ sbi->n_orphans = 0;
sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
* F2FS_ORPHANS_PER_BLOCK;
}

int __init create_checkpoint_caches(void)
{
- orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
- sizeof(struct orphan_inode_entry));
- if (!orphan_entry_slab)
+ ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
+ sizeof(struct ino_entry));
+ if (!ino_entry_slab)
return -ENOMEM;
inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
sizeof(struct dir_inode_entry));
if (!inode_entry_slab) {
- kmem_cache_destroy(orphan_entry_slab);
+ kmem_cache_destroy(ino_entry_slab);
return -ENOMEM;
}
return 0;
@@ -971,6 +975,6 @@ int __init create_checkpoint_caches(void)

void destroy_checkpoint_caches(void)
{
- kmem_cache_destroy(orphan_entry_slab);
+ kmem_cache_destroy(ino_entry_slab);
kmem_cache_destroy(inode_entry_slab);
}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 3f99266..a441ba3 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -167,7 +167,7 @@ get_cache:
si->cache_mem += npages << PAGE_CACHE_SHIFT;
npages = META_MAPPING(sbi)->nrpages;
si->cache_mem += npages << PAGE_CACHE_SHIFT;
- si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry);
+ si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry);
si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
}

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e999eec..b6fa6ec 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -100,8 +100,13 @@ enum {
META_SSA
};

-/* for the list of orphan inodes */
-struct orphan_inode_entry {
+/* for the list of ino */
+enum {
+ ORPHAN_INO, /* for orphan ino list */
+ MAX_INO_ENTRY, /* max. list */
+};
+
+struct ino_entry {
struct list_head list; /* list head */
nid_t ino; /* inode number */
};
@@ -450,9 +455,11 @@ struct f2fs_sb_info {
bool por_doing; /* recovery is doing or not */
wait_queue_head_t cp_wait;

- /* for orphan inode management */
- struct list_head orphan_inode_list; /* orphan inode list */
- spinlock_t orphan_inode_lock; /* for orphan inode list */
+ /* for inode management */
+ spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */
+ struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */
+
+ /* for orphan inode, use 0'th array */
unsigned int n_orphans; /* # of orphan inodes */
unsigned int max_orphans; /* max orphan inodes */

@@ -1255,7 +1262,7 @@ void add_dirty_dir_inode(struct inode *);
void remove_dirty_dir_inode(struct inode *);
void sync_dirty_dir_inodes(struct f2fs_sb_info *);
void write_checkpoint(struct f2fs_sb_info *, bool);
-void init_orphan_info(struct f2fs_sb_info *);
+void init_ino_entry_info(struct f2fs_sb_info *);
int __init create_checkpoint_caches(void);
void destroy_checkpoint_caches(void);

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index eec89a2..5a80755 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1003,7 +1003,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
INIT_LIST_HEAD(&sbi->dir_inode_list);
spin_lock_init(&sbi->dir_inode_lock);

- init_orphan_info(sbi);
+ init_ino_entry_info(sbi);

/* setup f2fs internal modules */
err = build_segment_manager(sbi);
--
1.8.5.2 (Apple Git-48)

2014-07-25 22:48:34

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 04/11] f2fs: use radix_tree for ino management

For better ino management, this patch replaces the data structure from list
to radix tree.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/checkpoint.c | 48 ++++++++++++++++++++++++++----------------------
fs/f2fs/f2fs.h | 1 +
2 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f93d154..d35094a 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -284,24 +284,26 @@ const struct address_space_operations f2fs_meta_aops = {

static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
- struct ino_entry *new, *e;
-
- new = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
- new->ino = ino;
-
+ struct ino_entry *e;
+retry:
spin_lock(&sbi->ino_lock[type]);
- list_for_each_entry(e, &sbi->ino_list[type], list) {
- if (e->ino == ino) {
+
+ e = radix_tree_lookup(&sbi->ino_root[type], ino);
+ if (!e) {
+ e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
+ if (!e) {
spin_unlock(&sbi->ino_lock[type]);
- kmem_cache_free(ino_entry_slab, new);
- return;
+ goto retry;
}
- if (e->ino > ino)
- break;
- }
+ if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
+ spin_unlock(&sbi->ino_lock[type]);
+ goto retry;
+ }
+ memset(e, 0, sizeof(struct ino_entry));
+ e->ino = ino;

- /* add new entry into list which is sorted by inode number */
- list_add_tail(&new->list, &e->list);
+ list_add_tail(&e->list, &sbi->ino_list[type]);
+ }
spin_unlock(&sbi->ino_lock[type]);
}

@@ -310,14 +312,15 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
struct ino_entry *e;

spin_lock(&sbi->ino_lock[type]);
- list_for_each_entry(e, &sbi->ino_list[type], list) {
- if (e->ino == ino) {
- list_del(&e->list);
+ e = radix_tree_lookup(&sbi->ino_root[type], ino);
+ if (e) {
+ list_del(&e->list);
+ radix_tree_delete(&sbi->ino_root[type], ino);
+ if (type == ORPHAN_INO)
sbi->n_orphans--;
- spin_unlock(&sbi->ino_lock[type]);
- kmem_cache_free(ino_entry_slab, e);
- return;
- }
+ spin_unlock(&sbi->ino_lock[type]);
+ kmem_cache_free(ino_entry_slab, e);
+ return;
}
spin_unlock(&sbi->ino_lock[type]);
}
@@ -346,7 +349,7 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)

void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
- /* add new orphan entry into list which is sorted by inode number */
+ /* add new orphan ino entry into list */
__add_ino_entry(sbi, ino, ORPHAN_INO);
}

@@ -943,6 +946,7 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
int i;

for (i = 0; i < MAX_INO_ENTRY; i++) {
+ INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
spin_lock_init(&sbi->ino_lock[i]);
INIT_LIST_HEAD(&sbi->ino_list[i]);
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b6fa6ec..4454caa 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -456,6 +456,7 @@ struct f2fs_sb_info {
wait_queue_head_t cp_wait;

/* for inode management */
+ struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */
spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */
struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */

--
1.8.5.2 (Apple Git-48)

2014-07-25 22:49:17

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 05/11] f2fs: add info of appended or updated data writes

This patch introduces a inode number list in which represents inodes having
appended data writes or updated data writes after last checkpoint.
This will be used at fsync to determine whether the recovery information
should be written or not.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/checkpoint.c | 39 +++++++++++++++++++++++++++++++++++++++
fs/f2fs/data.c | 2 ++
fs/f2fs/f2fs.h | 7 +++++++
fs/f2fs/inode.c | 4 ++++
4 files changed, 52 insertions(+)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index d35094a..42a16c1 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -325,6 +325,44 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
spin_unlock(&sbi->ino_lock[type]);
}

+void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ /* add new dirty ino entry into list */
+ __add_ino_entry(sbi, ino, type);
+}
+
+void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
+{
+ /* remove dirty ino entry from list */
+ __remove_ino_entry(sbi, ino, type);
+}
+
+/* mode should be APPEND_INO or UPDATE_INO */
+bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
+{
+ struct ino_entry *e;
+ spin_lock(&sbi->ino_lock[mode]);
+ e = radix_tree_lookup(&sbi->ino_root[mode], ino);
+ spin_unlock(&sbi->ino_lock[mode]);
+ return e ? true : false;
+}
+
+static void release_dirty_inode(struct f2fs_sb_info *sbi)
+{
+ struct ino_entry *e, *tmp;
+ int i;
+
+ for (i = APPEND_INO; i <= UPDATE_INO; i++) {
+ spin_lock(&sbi->ino_lock[i]);
+ list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) {
+ list_del(&e->list);
+ radix_tree_delete(&sbi->ino_root[i], e->ino);
+ kmem_cache_free(ino_entry_slab, e);
+ }
+ spin_unlock(&sbi->ino_lock[i]);
+ }
+}
+
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
int err = 0;
@@ -896,6 +934,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)

if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
clear_prefree_segments(sbi);
+ release_dirty_inode(sbi);
F2FS_RESET_SB_DIRT(sbi);
}
}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 482313d..ec3c886 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -789,9 +789,11 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
!is_cold_data(page) &&
need_inplace_update(inode))) {
rewrite_data_page(page, old_blkaddr, fio);
+ set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
} else {
write_data_page(page, &dn, &new_blkaddr, fio);
update_extent_cache(new_blkaddr, &dn);
+ set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
}
out_writepage:
f2fs_put_dnode(&dn);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4454caa..ab36025 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -103,6 +103,8 @@ enum {
/* for the list of ino */
enum {
ORPHAN_INO, /* for orphan ino list */
+ APPEND_INO, /* for append ino list */
+ UPDATE_INO, /* for update ino list */
MAX_INO_ENTRY, /* max. list */
};

@@ -994,6 +996,8 @@ enum {
FI_NO_EXTENT, /* not to use the extent cache */
FI_INLINE_XATTR, /* used for inline xattr */
FI_INLINE_DATA, /* used for inline data*/
+ FI_APPEND_WRITE, /* inode has appended data */
+ FI_UPDATE_WRITE, /* inode has in-place-update data */
};

static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -1252,6 +1256,9 @@ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
+void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
+void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
+bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
int acquire_orphan_inode(struct f2fs_sb_info *);
void release_orphan_inode(struct f2fs_sb_info *);
void add_orphan_inode(struct f2fs_sb_info *, nid_t);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index cafba3c..0e69aa9 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -296,6 +296,10 @@ void f2fs_evict_inode(struct inode *inode)
sb_end_intwrite(inode->i_sb);
no_delete:
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
+ if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE))
+ add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
+ if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE))
+ add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
out_clear:
clear_inode(inode);
}
--
1.8.5.2 (Apple Git-48)

2014-07-25 22:49:23

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 06/11] f2fs: skip unnecessary data writes during fsync

This patch intends to improve the fsync performance by skipping remaining the
recovery information, only when there is no data that we should recover.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/file.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 7c652b3..121689a 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -133,6 +133,17 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
return ret;
}

+ /*
+ * if there is no written data, don't waste time to write recovery info.
+ */
+ if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
+ !exist_written_data(sbi, inode->i_ino, APPEND_INO)) {
+ if (is_inode_flag_set(fi, FI_UPDATE_WRITE) &&
+ exist_written_data(sbi, inode->i_ino, UPDATE_INO))
+ goto flush_out;
+ goto out;
+ }
+
/* guarantee free sections for fsync */
f2fs_balance_fs(sbi);

@@ -188,6 +199,11 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
if (ret)
goto out;
+
+ /* once recovery info is written, don't need to tack this */
+ remove_dirty_inode(sbi, inode->i_ino, APPEND_INO);
+flush_out:
+ remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
}
out:
--
1.8.5.2 (Apple Git-48)

2014-07-25 22:49:28

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 07/11] f2fs: enable in-place-update for fdatasync

This patch enforces in-place-updates only when fdatasync is requested.
If we adopt this in-place-updates for the fdatasync, we can skip to write the
recovery information.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/f2fs.h | 1 +
fs/f2fs/file.c | 7 +++++++
fs/f2fs/segment.h | 4 ++++
3 files changed, 12 insertions(+)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ab36025..8f8685e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -998,6 +998,7 @@ enum {
FI_INLINE_DATA, /* used for inline data*/
FI_APPEND_WRITE, /* inode has appended data */
FI_UPDATE_WRITE, /* inode has in-place-update data */
+ FI_NEED_IPU, /* used fo ipu for fdatasync */
};

static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 121689a..e339856 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -127,11 +127,18 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
return 0;

trace_f2fs_sync_file_enter(inode);
+
+ /* if fdatasync is triggered, let's do in-place-update */
+ if (datasync)
+ set_inode_flag(fi, FI_NEED_IPU);
+
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret) {
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
return ret;
}
+ if (datasync)
+ clear_inode_flag(fi, FI_NEED_IPU);

/*
* if there is no written data, don't waste time to write recovery info.
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index ee5c75e..55973f7 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)
if (S_ISDIR(inode->i_mode))
return false;

+ /* this is only set during fdatasync */
+ if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
+ return true;
+
switch (SM_I(sbi)->ipu_policy) {
case F2FS_IPU_FORCE:
return true;
--
1.8.5.2 (Apple Git-48)

2014-07-25 22:49:32

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 09/11] f2fs: test before set/clear bits

If the bit is already set, we don't need to reset it, and vice versa.
Because we don't need to make the caches dirty for that.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/f2fs.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8f8685e..475f97c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1003,7 +1003,8 @@ enum {

static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
{
- set_bit(flag, &fi->flags);
+ if (!test_bit(flag, &fi->flags))
+ set_bit(flag, &fi->flags);
}

static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)
@@ -1013,7 +1014,8 @@ static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag)

static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag)
{
- clear_bit(flag, &fi->flags);
+ if (test_bit(flag, &fi->flags))
+ clear_bit(flag, &fi->flags);
}

static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode)
--
1.8.5.2 (Apple Git-48)

2014-07-25 22:49:36

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 10/11] f2fs: avoid checkpoint when error was occurred

No need to do checkpoint, whenever any errors were detected.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/recovery.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index a112368..90d7e80 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -436,7 +436,6 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
{
struct list_head inode_list;
int err;
- bool need_writecp = false;

fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
sizeof(struct fsync_inode_entry));
@@ -454,8 +453,6 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
if (list_empty(&inode_list))
goto out;

- need_writecp = true;
-
/* step #2: recover data */
err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
f2fs_bug_on(!list_empty(&inode_list));
@@ -463,7 +460,7 @@ out:
destroy_fsync_dnodes(&inode_list);
kmem_cache_destroy(fsync_entry_slab);
sbi->por_doing = false;
- if (!err && need_writecp)
+ if (!err)
write_checkpoint(sbi, false);
return err;
}
--
1.8.5.2 (Apple Git-48)

2014-07-25 22:49:40

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 11/11] f2fs: avoid retrying wrong recovery routine when error was occurred

This patch eliminates the propagation of recovery errors to the next mount.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/checkpoint.c | 3 ++-
fs/f2fs/f2fs.h | 2 +-
fs/f2fs/recovery.c | 20 +++++++++++++++++++-
fs/f2fs/segment.c | 5 +----
4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 36b0d47..765cc51 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -795,6 +795,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
nid_t last_nid = 0;
block_t start_blk;
struct page *cp_page;
@@ -808,7 +809,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
* This avoids to conduct wrong roll-forward operations and uses
* metapages, so should be called prior to sync_meta_pages below.
*/
- discard_next_dnode(sbi);
+ discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));

/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META))
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 475f97c..14b9f74 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1225,7 +1225,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *);
void invalidate_blocks(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *);
-void discard_next_dnode(struct f2fs_sb_info *);
+void discard_next_dnode(struct f2fs_sb_info *, block_t);
int npages_for_summary_flush(struct f2fs_sb_info *);
void allocate_new_segments(struct f2fs_sb_info *);
struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 90d7e80..8ef78f1 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -434,7 +434,9 @@ next:

int recover_fsync_data(struct f2fs_sb_info *sbi)
{
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct list_head inode_list;
+ block_t blkaddr;
int err;

fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
@@ -446,6 +448,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)

/* step #1: find fsynced inode numbers */
sbi->por_doing = true;
+
+ blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+
err = find_fsync_dnodes(sbi, &inode_list);
if (err)
goto out;
@@ -459,8 +464,21 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
out:
destroy_fsync_dnodes(&inode_list);
kmem_cache_destroy(fsync_entry_slab);
+
+ if (err) {
+ truncate_inode_pages_final(NODE_MAPPING(sbi));
+ truncate_inode_pages_final(META_MAPPING(sbi));
+ }
+
sbi->por_doing = false;
- if (!err)
+ if (!err) {
write_checkpoint(sbi, false);
+ } else {
+ discard_next_dnode(sbi, blkaddr);
+
+ /* Flush all the NAT/SIT pages */
+ while (get_pages(sbi, F2FS_DIRTY_META))
+ sync_meta_pages(sbi, META, LONG_MAX);
+ }
return err;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9fce0f47..e016b97 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -379,11 +379,8 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
}

-void discard_next_dnode(struct f2fs_sb_info *sbi)
+void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
{
- struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
- block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
-
if (f2fs_issue_discard(sbi, blkaddr, 1)) {
struct page *page = grab_meta_page(sbi, blkaddr);
/* zero-filled page */
--
1.8.5.2 (Apple Git-48)

2014-07-25 22:50:36

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 08/11] f2fs: fix wrong condition for unlikely

This patch fixes the wrongly used unlikely condition.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/checkpoint.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 42a16c1..36b0d47 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -932,7 +932,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
/* Here, we only have one bio having CP pack */
sync_meta_pages(sbi, META_FLUSH, LONG_MAX);

- if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
+ if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
clear_prefree_segments(sbi);
release_dirty_inode(sbi);
F2FS_RESET_SB_DIRT(sbi);
--
1.8.5.2 (Apple Git-48)

2014-07-29 00:42:46

by Changman Lee

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 07/11] f2fs: enable in-place-update for fdatasync

Hi Jaegeuk,

On Fri, Jul 25, 2014 at 03:47:21PM -0700, Jaegeuk Kim wrote:
> This patch enforces in-place-updates only when fdatasync is requested.
> If we adopt this in-place-updates for the fdatasync, we can skip to write the
> recovery information.

But, as you know, random write occurs when changing into in-place-updates.
It will degrade write performance. Is there any case in-place-updates is
better, except recovery or high utilization?

Thanks

>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/f2fs.h | 1 +
> fs/f2fs/file.c | 7 +++++++
> fs/f2fs/segment.h | 4 ++++
> 3 files changed, 12 insertions(+)
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index ab36025..8f8685e 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -998,6 +998,7 @@ enum {
> FI_INLINE_DATA, /* used for inline data*/
> FI_APPEND_WRITE, /* inode has appended data */
> FI_UPDATE_WRITE, /* inode has in-place-update data */
> + FI_NEED_IPU, /* used fo ipu for fdatasync */
> };
>
> static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 121689a..e339856 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -127,11 +127,18 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
> return 0;
>
> trace_f2fs_sync_file_enter(inode);
> +
> + /* if fdatasync is triggered, let's do in-place-update */
> + if (datasync)
> + set_inode_flag(fi, FI_NEED_IPU);
> +
> ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
> if (ret) {
> trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
> return ret;
> }
> + if (datasync)
> + clear_inode_flag(fi, FI_NEED_IPU);
>
> /*
> * if there is no written data, don't waste time to write recovery info.
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index ee5c75e..55973f7 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)
> if (S_ISDIR(inode->i_mode))
> return false;
>
> + /* this is only set during fdatasync */
> + if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
> + return true;
> +
> switch (SM_I(sbi)->ipu_policy) {
> case F2FS_IPU_FORCE:
> return true;
> --
> 1.8.5.2 (Apple Git-48)
>
>
> ------------------------------------------------------------------------------
> Want fast and easy access to all the code in your enterprise? Index and
> search up to 200,000 lines of code with a free copy of Black Duck
> Code Sight - the same software that powers the world's largest code
> search on Ohloh, the Black Duck Open Hub! Try it now.
> http://p.sf.net/sfu/bds
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-29 11:29:13

by Chao Yu

[permalink] [raw]
Subject: RE: [f2fs-dev] [PATCH 01/11] f2fs: add nobarrier mount option

Hi Jaegeuk,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:[email protected]]
> Sent: Saturday, July 26, 2014 6:47 AM
> To: [email protected]; [email protected];
> [email protected]
> Cc: Jaegeuk Kim
> Subject: [f2fs-dev] [PATCH 01/11] f2fs: add nobarrier mount option
>
> This patch adds a mount option, nobarrier, in f2fs.
> The assumption in here is that file system keeps the IO ordering, but
> doesn't care about cache flushes inside the storages.

Looks good to me.
But it's better to add some description in f2fs document.

>
> Signed-off-by: Jaegeuk Kim <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

> ---
> fs/f2fs/data.c | 5 ++++-
> fs/f2fs/f2fs.h | 1 +
> fs/f2fs/segment.c | 3 +++
> fs/f2fs/super.c | 7 +++++++
> 4 files changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index c77c667..482313d 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -139,7 +139,10 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
> /* change META to META_FLUSH in the checkpoint procedure */
> if (type >= META_FLUSH) {
> io->fio.type = META_FLUSH;
> - io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
> + if (test_opt(sbi, NOBARRIER))
> + io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
> + else
> + io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
> }
> __submit_merged_bio(io);
> up_write(&io->io_rwsem);
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 8f507d4..e999eec 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -41,6 +41,7 @@
> #define F2FS_MOUNT_INLINE_XATTR 0x00000080
> #define F2FS_MOUNT_INLINE_DATA 0x00000100
> #define F2FS_MOUNT_FLUSH_MERGE 0x00000200
> +#define F2FS_MOUNT_NOBARRIER 0x00000400
>
> #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
> #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 8a6e57d..9fce0f47 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -239,6 +239,9 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
> struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
> struct flush_cmd cmd;
>
> + if (test_opt(sbi, NOBARRIER))
> + return 0;
> +
> if (!test_opt(sbi, FLUSH_MERGE))
> return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
>
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 34649aa..eec89a2 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -52,6 +52,7 @@ enum {
> Opt_inline_xattr,
> Opt_inline_data,
> Opt_flush_merge,
> + Opt_nobarrier,
> Opt_err,
> };
>
> @@ -69,6 +70,7 @@ static match_table_t f2fs_tokens = {
> {Opt_inline_xattr, "inline_xattr"},
> {Opt_inline_data, "inline_data"},
> {Opt_flush_merge, "flush_merge"},
> + {Opt_nobarrier, "nobarrier"},
> {Opt_err, NULL},
> };
>
> @@ -339,6 +341,9 @@ static int parse_options(struct super_block *sb, char *options)
> case Opt_flush_merge:
> set_opt(sbi, FLUSH_MERGE);
> break;
> + case Opt_nobarrier:
> + set_opt(sbi, NOBARRIER);
> + break;
> default:
> f2fs_msg(sb, KERN_ERR,
> "Unrecognized mount option \"%s\" or missing value",
> @@ -544,6 +549,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
> seq_puts(seq, ",inline_data");
> if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
> seq_puts(seq, ",flush_merge");
> + if (!f2fs_readonly(sbi->sb) && test_opt(sbi, NOBARRIER))
> + seq_puts(seq, ",nobarrier");
> seq_printf(seq, ",active_logs=%u", sbi->active_logs);
>
> return 0;
> --
> 1.8.5.2 (Apple Git-48)
>
>
> ------------------------------------------------------------------------------
> Want fast and easy access to all the code in your enterprise? Index and
> search up to 200,000 lines of code with a free copy of Black Duck
> Code Sight - the same software that powers the world's largest code
> search on Ohloh, the Black Duck Open Hub! Try it now.
> http://p.sf.net/sfu/bds
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-29 11:30:13

by Chao Yu

[permalink] [raw]
Subject: RE: [f2fs-dev] [PATCH 02/11] f2fs: punch the core function for inode management

> -----Original Message-----
> From: Jaegeuk Kim [mailto:[email protected]]
> Sent: Saturday, July 26, 2014 6:47 AM
> To: [email protected]; [email protected];
> [email protected]
> Cc: Jaegeuk Kim
> Subject: [f2fs-dev] [PATCH 02/11] f2fs: punch the core function for inode management
>
> This patch punches out the core functions to manage the inode numbers.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

> ---
> fs/f2fs/checkpoint.c | 81 ++++++++++++++++++++++++++++------------------------
> 1 file changed, 44 insertions(+), 37 deletions(-)
>
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 0b4710c..3e3c2c3 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -282,6 +282,47 @@ const struct address_space_operations f2fs_meta_aops = {
> .set_page_dirty = f2fs_set_meta_page_dirty,
> };
>
> +static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino)
> +{
> + struct list_head *head;
> + struct ino_entry *new, *e;
> +
> + new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
> + new->ino = ino;
> +
> + spin_lock(&sbi->orphan_inode_lock);
> + list_for_each_entry(e, &sbi->orphan_inode_list, list) {
> + if (e->ino == ino) {
> + spin_unlock(&sbi->orphan_inode_lock);
> + kmem_cache_free(orphan_entry_slab, new);
> + return;
> + }
> + if (e->ino > ino)
> + break;
> + }
> +
> + /* add new entry into list which is sorted by inode number */
> + list_add_tail(&new->list, &e->list);
> + spin_unlock(&sbi->orphan_inode_lock);
> +}
> +
> +static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino)
> +{
> + struct ino_entry *e;
> +
> + spin_lock(&sbi->orphan_inode_lock);
> + list_for_each_entry(e, &sbi->orphan_inode_list, list) {
> + if (e->ino == ino) {
> + list_del(&e->list);
> + sbi->n_orphans--;
> + spin_unlock(&sbi->orphan_inode_lock);
> + kmem_cache_free(orphan_entry_slab, e);
> + return;
> + }
> + }
> + spin_unlock(&sbi->orphan_inode_lock);
> +}
> +
> int acquire_orphan_inode(struct f2fs_sb_info *sbi)
> {
> int err = 0;
> @@ -306,48 +347,14 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
>
> void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
> {
> - struct list_head *head;
> - struct orphan_inode_entry *new, *orphan;
> -
> - new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
> - new->ino = ino;
> -
> - spin_lock(&sbi->orphan_inode_lock);
> - head = &sbi->orphan_inode_list;
> - list_for_each_entry(orphan, head, list) {
> - if (orphan->ino == ino) {
> - spin_unlock(&sbi->orphan_inode_lock);
> - kmem_cache_free(orphan_entry_slab, new);
> - return;
> - }
> -
> - if (orphan->ino > ino)
> - break;
> - }
> -
> /* add new orphan entry into list which is sorted by inode number */
> - list_add_tail(&new->list, &orphan->list);
> - spin_unlock(&sbi->orphan_inode_lock);
> + __add_ino_entry(sbi, ino);
> }
>
> void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
> {
> - struct list_head *head;
> - struct orphan_inode_entry *orphan;
> -
> - spin_lock(&sbi->orphan_inode_lock);
> - head = &sbi->orphan_inode_list;
> - list_for_each_entry(orphan, head, list) {
> - if (orphan->ino == ino) {
> - list_del(&orphan->list);
> - f2fs_bug_on(sbi->n_orphans == 0);
> - sbi->n_orphans--;
> - spin_unlock(&sbi->orphan_inode_lock);
> - kmem_cache_free(orphan_entry_slab, orphan);
> - return;
> - }
> - }
> - spin_unlock(&sbi->orphan_inode_lock);
> + /* remove orphan entry from orphan list */
> + __remove_ino_entry(sbi, ino);
> }
>
> static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
> --
> 1.8.5.2 (Apple Git-48)
>
>
> ------------------------------------------------------------------------------
> Want fast and easy access to all the code in your enterprise? Index and
> search up to 200,000 lines of code with a free copy of Black Duck
> Code Sight - the same software that powers the world's largest code
> search on Ohloh, the Black Duck Open Hub! Try it now.
> http://p.sf.net/sfu/bds
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-29 11:31:18

by Chao Yu

[permalink] [raw]
Subject: RE: [f2fs-dev] [PATCH 03/11] f2fs: add infra for ino management

> -----Original Message-----
> From: Jaegeuk Kim [mailto:[email protected]]
> Sent: Saturday, July 26, 2014 6:47 AM
> To: [email protected]; [email protected];
> [email protected]
> Cc: Jaegeuk Kim
> Subject: [f2fs-dev] [PATCH 03/11] f2fs: add infra for ino management
>
> This patch changes the naming of orphan-related data structures to use as
> inode numbers managed globally.
> Later, we can use this facility for managing any inode number lists.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

> ---
> fs/f2fs/checkpoint.c | 72 +++++++++++++++++++++++++++-------------------------
> fs/f2fs/debug.c | 2 +-
> fs/f2fs/f2fs.h | 19 +++++++++-----
> fs/f2fs/super.c | 2 +-
> 4 files changed, 53 insertions(+), 42 deletions(-)
>
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 3e3c2c3..f93d154 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -22,7 +22,7 @@
> #include "segment.h"
> #include <trace/events/f2fs.h>
>
> -static struct kmem_cache *orphan_entry_slab;
> +static struct kmem_cache *ino_entry_slab;
> static struct kmem_cache *inode_entry_slab;
>
> /*
> @@ -282,19 +282,18 @@ const struct address_space_operations f2fs_meta_aops = {
> .set_page_dirty = f2fs_set_meta_page_dirty,
> };
>
> -static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino)
> +static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
> {
> - struct list_head *head;
> struct ino_entry *new, *e;
>
> - new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC);
> + new = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
> new->ino = ino;
>
> - spin_lock(&sbi->orphan_inode_lock);
> - list_for_each_entry(e, &sbi->orphan_inode_list, list) {
> + spin_lock(&sbi->ino_lock[type]);
> + list_for_each_entry(e, &sbi->ino_list[type], list) {
> if (e->ino == ino) {
> - spin_unlock(&sbi->orphan_inode_lock);
> - kmem_cache_free(orphan_entry_slab, new);
> + spin_unlock(&sbi->ino_lock[type]);
> + kmem_cache_free(ino_entry_slab, new);
> return;
> }
> if (e->ino > ino)
> @@ -303,58 +302,58 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino)
>
> /* add new entry into list which is sorted by inode number */
> list_add_tail(&new->list, &e->list);
> - spin_unlock(&sbi->orphan_inode_lock);
> + spin_unlock(&sbi->ino_lock[type]);
> }
>
> -static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino)
> +static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
> {
> struct ino_entry *e;
>
> - spin_lock(&sbi->orphan_inode_lock);
> - list_for_each_entry(e, &sbi->orphan_inode_list, list) {
> + spin_lock(&sbi->ino_lock[type]);
> + list_for_each_entry(e, &sbi->ino_list[type], list) {
> if (e->ino == ino) {
> list_del(&e->list);
> sbi->n_orphans--;
> - spin_unlock(&sbi->orphan_inode_lock);
> - kmem_cache_free(orphan_entry_slab, e);
> + spin_unlock(&sbi->ino_lock[type]);
> + kmem_cache_free(ino_entry_slab, e);
> return;
> }
> }
> - spin_unlock(&sbi->orphan_inode_lock);
> + spin_unlock(&sbi->ino_lock[type]);
> }
>
> int acquire_orphan_inode(struct f2fs_sb_info *sbi)
> {
> int err = 0;
>
> - spin_lock(&sbi->orphan_inode_lock);
> + spin_lock(&sbi->ino_lock[ORPHAN_INO]);
> if (unlikely(sbi->n_orphans >= sbi->max_orphans))
> err = -ENOSPC;
> else
> sbi->n_orphans++;
> - spin_unlock(&sbi->orphan_inode_lock);
> + spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
>
> return err;
> }
>
> void release_orphan_inode(struct f2fs_sb_info *sbi)
> {
> - spin_lock(&sbi->orphan_inode_lock);
> + spin_lock(&sbi->ino_lock[ORPHAN_INO]);
> f2fs_bug_on(sbi->n_orphans == 0);
> sbi->n_orphans--;
> - spin_unlock(&sbi->orphan_inode_lock);
> + spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
> }
>
> void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
> {
> /* add new orphan entry into list which is sorted by inode number */
> - __add_ino_entry(sbi, ino);
> + __add_ino_entry(sbi, ino, ORPHAN_INO);
> }
>
> void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
> {
> /* remove orphan entry from orphan list */
> - __remove_ino_entry(sbi, ino);
> + __remove_ino_entry(sbi, ino, ORPHAN_INO);
> }
>
> static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
> @@ -408,14 +407,14 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t
> start_blk)
> unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
> (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
> struct page *page = NULL;
> - struct orphan_inode_entry *orphan = NULL;
> + struct ino_entry *orphan = NULL;
>
> for (index = 0; index < orphan_blocks; index++)
> grab_meta_page(sbi, start_blk + index);
>
> index = 1;
> - spin_lock(&sbi->orphan_inode_lock);
> - head = &sbi->orphan_inode_list;
> + spin_lock(&sbi->ino_lock[ORPHAN_INO]);
> + head = &sbi->ino_list[ORPHAN_INO];
>
> /* loop for each orphan inode entry and write them in Jornal block */
> list_for_each_entry(orphan, head, list) {
> @@ -455,7 +454,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
> f2fs_put_page(page, 1);
> }
>
> - spin_unlock(&sbi->orphan_inode_lock);
> + spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
> }
>
> static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
> @@ -939,31 +938,36 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
> trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
> }
>
> -void init_orphan_info(struct f2fs_sb_info *sbi)
> +void init_ino_entry_info(struct f2fs_sb_info *sbi)
> {
> - spin_lock_init(&sbi->orphan_inode_lock);
> - INIT_LIST_HEAD(&sbi->orphan_inode_list);
> - sbi->n_orphans = 0;
> + int i;
> +
> + for (i = 0; i < MAX_INO_ENTRY; i++) {
> + spin_lock_init(&sbi->ino_lock[i]);
> + INIT_LIST_HEAD(&sbi->ino_list[i]);
> + }
> +
> /*
> * considering 512 blocks in a segment 8 blocks are needed for cp
> * and log segment summaries. Remaining blocks are used to keep
> * orphan entries with the limitation one reserved segment
> * for cp pack we can have max 1020*504 orphan entries
> */
> + sbi->n_orphans = 0;
> sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
> * F2FS_ORPHANS_PER_BLOCK;
> }
>
> int __init create_checkpoint_caches(void)
> {
> - orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry",
> - sizeof(struct orphan_inode_entry));
> - if (!orphan_entry_slab)
> + ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
> + sizeof(struct ino_entry));
> + if (!ino_entry_slab)
> return -ENOMEM;
> inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
> sizeof(struct dir_inode_entry));
> if (!inode_entry_slab) {
> - kmem_cache_destroy(orphan_entry_slab);
> + kmem_cache_destroy(ino_entry_slab);
> return -ENOMEM;
> }
> return 0;
> @@ -971,6 +975,6 @@ int __init create_checkpoint_caches(void)
>
> void destroy_checkpoint_caches(void)
> {
> - kmem_cache_destroy(orphan_entry_slab);
> + kmem_cache_destroy(ino_entry_slab);
> kmem_cache_destroy(inode_entry_slab);
> }
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index 3f99266..a441ba3 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -167,7 +167,7 @@ get_cache:
> si->cache_mem += npages << PAGE_CACHE_SHIFT;
> npages = META_MAPPING(sbi)->nrpages;
> si->cache_mem += npages << PAGE_CACHE_SHIFT;
> - si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry);
> + si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry);
> si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
> }
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index e999eec..b6fa6ec 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -100,8 +100,13 @@ enum {
> META_SSA
> };
>
> -/* for the list of orphan inodes */
> -struct orphan_inode_entry {
> +/* for the list of ino */
> +enum {
> + ORPHAN_INO, /* for orphan ino list */
> + MAX_INO_ENTRY, /* max. list */
> +};
> +
> +struct ino_entry {
> struct list_head list; /* list head */
> nid_t ino; /* inode number */
> };
> @@ -450,9 +455,11 @@ struct f2fs_sb_info {
> bool por_doing; /* recovery is doing or not */
> wait_queue_head_t cp_wait;
>
> - /* for orphan inode management */
> - struct list_head orphan_inode_list; /* orphan inode list */
> - spinlock_t orphan_inode_lock; /* for orphan inode list */
> + /* for inode management */
> + spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */
> + struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */
> +
> + /* for orphan inode, use 0'th array */
> unsigned int n_orphans; /* # of orphan inodes */
> unsigned int max_orphans; /* max orphan inodes */
>
> @@ -1255,7 +1262,7 @@ void add_dirty_dir_inode(struct inode *);
> void remove_dirty_dir_inode(struct inode *);
> void sync_dirty_dir_inodes(struct f2fs_sb_info *);
> void write_checkpoint(struct f2fs_sb_info *, bool);
> -void init_orphan_info(struct f2fs_sb_info *);
> +void init_ino_entry_info(struct f2fs_sb_info *);
> int __init create_checkpoint_caches(void);
> void destroy_checkpoint_caches(void);
>
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index eec89a2..5a80755 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1003,7 +1003,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> INIT_LIST_HEAD(&sbi->dir_inode_list);
> spin_lock_init(&sbi->dir_inode_lock);
>
> - init_orphan_info(sbi);
> + init_ino_entry_info(sbi);
>
> /* setup f2fs internal modules */
> err = build_segment_manager(sbi);
> --
> 1.8.5.2 (Apple Git-48)
>
>
> ------------------------------------------------------------------------------
> Want fast and easy access to all the code in your enterprise? Index and
> search up to 200,000 lines of code with a free copy of Black Duck
> Code Sight - the same software that powers the world's largest code
> search on Ohloh, the Black Duck Open Hub! Try it now.
> http://p.sf.net/sfu/bds
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-29 11:32:57

by Chao Yu

[permalink] [raw]
Subject: RE: [f2fs-dev] [PATCH 04/11] f2fs: use radix_tree for ino management

Hi,

One comment as following.

> -----Original Message-----
> From: Jaegeuk Kim [mailto:[email protected]]
> Sent: Saturday, July 26, 2014 6:47 AM
> To: [email protected]; [email protected];
> [email protected]
> Cc: Jaegeuk Kim
> Subject: [f2fs-dev] [PATCH 04/11] f2fs: use radix_tree for ino management
>
> For better ino management, this patch replaces the data structure from list
> to radix tree.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

> ---
> fs/f2fs/checkpoint.c | 48 ++++++++++++++++++++++++++----------------------
> fs/f2fs/f2fs.h | 1 +
> 2 files changed, 27 insertions(+), 22 deletions(-)
>
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index f93d154..d35094a 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -284,24 +284,26 @@ const struct address_space_operations f2fs_meta_aops = {
>
> static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
> {
> - struct ino_entry *new, *e;
> -
> - new = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
> - new->ino = ino;
> -
> + struct ino_entry *e;
> +retry:
> spin_lock(&sbi->ino_lock[type]);
> - list_for_each_entry(e, &sbi->ino_list[type], list) {
> - if (e->ino == ino) {
> +
> + e = radix_tree_lookup(&sbi->ino_root[type], ino);
> + if (!e) {
> + e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
> + if (!e) {
> spin_unlock(&sbi->ino_lock[type]);
> - kmem_cache_free(ino_entry_slab, new);
> - return;
> + goto retry;
> }
> - if (e->ino > ino)
> - break;
> - }
> + if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
> + spin_unlock(&sbi->ino_lock[type]);

we should add kmem_cache_free(ino_entry_slab, e) here to avoid memory leak.

> + goto retry;
> + }
> + memset(e, 0, sizeof(struct ino_entry));
> + e->ino = ino;
>
> - /* add new entry into list which is sorted by inode number */
> - list_add_tail(&new->list, &e->list);
> + list_add_tail(&e->list, &sbi->ino_list[type]);
> + }
> spin_unlock(&sbi->ino_lock[type]);
> }
>
> @@ -310,14 +312,15 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int
> type)
> struct ino_entry *e;
>
> spin_lock(&sbi->ino_lock[type]);
> - list_for_each_entry(e, &sbi->ino_list[type], list) {
> - if (e->ino == ino) {
> - list_del(&e->list);
> + e = radix_tree_lookup(&sbi->ino_root[type], ino);
> + if (e) {
> + list_del(&e->list);
> + radix_tree_delete(&sbi->ino_root[type], ino);
> + if (type == ORPHAN_INO)
> sbi->n_orphans--;
> - spin_unlock(&sbi->ino_lock[type]);
> - kmem_cache_free(ino_entry_slab, e);
> - return;
> - }
> + spin_unlock(&sbi->ino_lock[type]);
> + kmem_cache_free(ino_entry_slab, e);
> + return;
> }
> spin_unlock(&sbi->ino_lock[type]);
> }
> @@ -346,7 +349,7 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
>
> void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
> {
> - /* add new orphan entry into list which is sorted by inode number */
> + /* add new orphan ino entry into list */
> __add_ino_entry(sbi, ino, ORPHAN_INO);
> }
>
> @@ -943,6 +946,7 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
> int i;
>
> for (i = 0; i < MAX_INO_ENTRY; i++) {
> + INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
> spin_lock_init(&sbi->ino_lock[i]);
> INIT_LIST_HEAD(&sbi->ino_list[i]);
> }
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index b6fa6ec..4454caa 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -456,6 +456,7 @@ struct f2fs_sb_info {
> wait_queue_head_t cp_wait;
>
> /* for inode management */
> + struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */
> spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */
> struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */
>
> --
> 1.8.5.2 (Apple Git-48)
>
>
> ------------------------------------------------------------------------------
> Want fast and easy access to all the code in your enterprise? Index and
> search up to 200,000 lines of code with a free copy of Black Duck
> Code Sight - the same software that powers the world's largest code
> search on Ohloh, the Black Duck Open Hub! Try it now.
> http://p.sf.net/sfu/bds
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-29 11:39:15

by Chao Yu

[permalink] [raw]
Subject: RE: [f2fs-dev] [PATCH 05/11] f2fs: add info of appended or updated data writes

> -----Original Message-----
> From: Jaegeuk Kim [mailto:[email protected]]
> Sent: Saturday, July 26, 2014 6:47 AM
> To: [email protected]; [email protected];
> [email protected]
> Cc: Jaegeuk Kim
> Subject: [f2fs-dev] [PATCH 05/11] f2fs: add info of appended or updated data writes
>
> This patch introduces a inode number list in which represents inodes having
> appended data writes or updated data writes after last checkpoint.
> This will be used at fsync to determine whether the recovery information
> should be written or not.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

> ---
> fs/f2fs/checkpoint.c | 39 +++++++++++++++++++++++++++++++++++++++
> fs/f2fs/data.c | 2 ++
> fs/f2fs/f2fs.h | 7 +++++++
> fs/f2fs/inode.c | 4 ++++
> 4 files changed, 52 insertions(+)
>
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index d35094a..42a16c1 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -325,6 +325,44 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int
> type)
> spin_unlock(&sbi->ino_lock[type]);
> }
>
> +void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
> +{
> + /* add new dirty ino entry into list */
> + __add_ino_entry(sbi, ino, type);
> +}
> +
> +void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
> +{
> + /* remove dirty ino entry from list */
> + __remove_ino_entry(sbi, ino, type);
> +}
> +
> +/* mode should be APPEND_INO or UPDATE_INO */
> +bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
> +{
> + struct ino_entry *e;
> + spin_lock(&sbi->ino_lock[mode]);
> + e = radix_tree_lookup(&sbi->ino_root[mode], ino);
> + spin_unlock(&sbi->ino_lock[mode]);
> + return e ? true : false;
> +}
> +
> +static void release_dirty_inode(struct f2fs_sb_info *sbi)
> +{
> + struct ino_entry *e, *tmp;
> + int i;
> +
> + for (i = APPEND_INO; i <= UPDATE_INO; i++) {
> + spin_lock(&sbi->ino_lock[i]);
> + list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) {
> + list_del(&e->list);
> + radix_tree_delete(&sbi->ino_root[i], e->ino);
> + kmem_cache_free(ino_entry_slab, e);
> + }
> + spin_unlock(&sbi->ino_lock[i]);
> + }
> +}
> +
> int acquire_orphan_inode(struct f2fs_sb_info *sbi)
> {
> int err = 0;
> @@ -896,6 +934,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
>
> if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
> clear_prefree_segments(sbi);
> + release_dirty_inode(sbi);
> F2FS_RESET_SB_DIRT(sbi);
> }
> }
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 482313d..ec3c886 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -789,9 +789,11 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
> !is_cold_data(page) &&
> need_inplace_update(inode))) {
> rewrite_data_page(page, old_blkaddr, fio);
> + set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
> } else {
> write_data_page(page, &dn, &new_blkaddr, fio);
> update_extent_cache(new_blkaddr, &dn);
> + set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
> }
> out_writepage:
> f2fs_put_dnode(&dn);
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 4454caa..ab36025 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -103,6 +103,8 @@ enum {
> /* for the list of ino */
> enum {
> ORPHAN_INO, /* for orphan ino list */
> + APPEND_INO, /* for append ino list */
> + UPDATE_INO, /* for update ino list */
> MAX_INO_ENTRY, /* max. list */
> };
>
> @@ -994,6 +996,8 @@ enum {
> FI_NO_EXTENT, /* not to use the extent cache */
> FI_INLINE_XATTR, /* used for inline xattr */
> FI_INLINE_DATA, /* used for inline data*/
> + FI_APPEND_WRITE, /* inode has appended data */
> + FI_UPDATE_WRITE, /* inode has in-place-update data */
> };
>
> static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
> @@ -1252,6 +1256,9 @@ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
> struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
> int ra_meta_pages(struct f2fs_sb_info *, int, int, int);
> long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
> +void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
> +void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
> +bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
> int acquire_orphan_inode(struct f2fs_sb_info *);
> void release_orphan_inode(struct f2fs_sb_info *);
> void add_orphan_inode(struct f2fs_sb_info *, nid_t);
> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> index cafba3c..0e69aa9 100644
> --- a/fs/f2fs/inode.c
> +++ b/fs/f2fs/inode.c
> @@ -296,6 +296,10 @@ void f2fs_evict_inode(struct inode *inode)
> sb_end_intwrite(inode->i_sb);
> no_delete:
> invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
> + if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE))
> + add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
> + if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE))
> + add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
> out_clear:
> clear_inode(inode);
> }
> --
> 1.8.5.2 (Apple Git-48)
>
>
> ------------------------------------------------------------------------------
> Want fast and easy access to all the code in your enterprise? Index and
> search up to 200,000 lines of code with a free copy of Black Duck
> Code Sight - the same software that powers the world's largest code
> search on Ohloh, the Black Duck Open Hub! Try it now.
> http://p.sf.net/sfu/bds
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-29 11:40:33

by Chao Yu

[permalink] [raw]
Subject: RE: [f2fs-dev] [PATCH 06/11] f2fs: skip unnecessary data writes during fsync

Hi,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:[email protected]]
> Sent: Saturday, July 26, 2014 6:47 AM
> To: [email protected]; [email protected];
> [email protected]
> Cc: Jaegeuk Kim
> Subject: [f2fs-dev] [PATCH 06/11] f2fs: skip unnecessary data writes during fsync
>
> This patch intends to improve the fsync performance by skipping remaining the
> recovery information, only when there is no data that we should recover.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/file.c | 16 ++++++++++++++++
> 1 file changed, 16 insertions(+)
>
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 7c652b3..121689a 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -133,6 +133,17 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int
> datasync)
> return ret;
> }
>
> + /*
> + * if there is no written data, don't waste time to write recovery info.
> + */
> + if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
> + !exist_written_data(sbi, inode->i_ino, APPEND_INO)) {
> + if (is_inode_flag_set(fi, FI_UPDATE_WRITE) &&
> + exist_written_data(sbi, inode->i_ino, UPDATE_INO))

Should we shift this to is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
exist_written_data(sbi, inode->i_ino, UPDATE_INO) ?

> + goto flush_out;
> + goto out;
> + }
> +
> /* guarantee free sections for fsync */
> f2fs_balance_fs(sbi);
>
> @@ -188,6 +199,11 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int
> datasync)
> ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
> if (ret)
> goto out;
> +
> + /* once recovery info is written, don't need to tack this */
> + remove_dirty_inode(sbi, inode->i_ino, APPEND_INO);
> +flush_out:
> + remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
> ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
> }
> out:
> --
> 1.8.5.2 (Apple Git-48)
>
>
> ------------------------------------------------------------------------------
> Want fast and easy access to all the code in your enterprise? Index and
> search up to 200,000 lines of code with a free copy of Black Duck
> Code Sight - the same software that powers the world's largest code
> search on Ohloh, the Black Duck Open Hub! Try it now.
> http://p.sf.net/sfu/bds
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-29 11:42:26

by Chao Yu

[permalink] [raw]
Subject: RE: [f2fs-dev] [PATCH 10/11] f2fs: avoid checkpoint when error was occurred

Hi Jaegeuk,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:[email protected]]
> Sent: Saturday, July 26, 2014 6:47 AM
> To: [email protected]; [email protected];
> [email protected]
> Cc: Jaegeuk Kim
> Subject: [f2fs-dev] [PATCH 10/11] f2fs: avoid checkpoint when error was occurred
>
> No need to do checkpoint, whenever any errors were detected.
>

The code here is modified once at below commit for avoid unneeded cp.
You can see the reason through the description of the commit.

Commit 691c6fd2a2d6 ("remove unneeded write checkpoint in recover_fsync_data")

"Previously, recover_fsync_data still to write checkpoint when there is
nothing to recover with normal umount image.
It may reduce mount performance and flash memory lifetime, so let's remove
it."

Why we should revert the commit?

> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/recovery.c | 5 +----
> 1 file changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> index a112368..90d7e80 100644
> --- a/fs/f2fs/recovery.c
> +++ b/fs/f2fs/recovery.c
> @@ -436,7 +436,6 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
> {
> struct list_head inode_list;
> int err;
> - bool need_writecp = false;
>
> fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
> sizeof(struct fsync_inode_entry));
> @@ -454,8 +453,6 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
> if (list_empty(&inode_list))
> goto out;
>
> - need_writecp = true;
> -
> /* step #2: recover data */
> err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
> f2fs_bug_on(!list_empty(&inode_list));
> @@ -463,7 +460,7 @@ out:
> destroy_fsync_dnodes(&inode_list);
> kmem_cache_destroy(fsync_entry_slab);
> sbi->por_doing = false;
> - if (!err && need_writecp)
> + if (!err)
> write_checkpoint(sbi, false);
> return err;
> }
> --
> 1.8.5.2 (Apple Git-48)
>
>
> ------------------------------------------------------------------------------
> Want fast and easy access to all the code in your enterprise? Index and
> search up to 200,000 lines of code with a free copy of Black Duck
> Code Sight - the same software that powers the world's largest code
> search on Ohloh, the Black Duck Open Hub! Try it now.
> http://p.sf.net/sfu/bds
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-29 12:21:57

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 07/11] f2fs: enable in-place-update for fdatasync

Hi Changman,

On Tue, Jul 29, 2014 at 09:41:11AM +0900, Changman Lee wrote:
> Hi Jaegeuk,
>
> On Fri, Jul 25, 2014 at 03:47:21PM -0700, Jaegeuk Kim wrote:
> > This patch enforces in-place-updates only when fdatasync is requested.
> > If we adopt this in-place-updates for the fdatasync, we can skip to write the
> > recovery information.
>
> But, as you know, random write occurs when changing into in-place-updates.
> It will degrade write performance. Is there any case in-place-updates is
> better, except recovery or high utilization?

As I described, you can easily imagine, if users requested small amount of data
writes with fdatasync, we should do data writes + node writes.
But, if we can do in-place-update, we don't need to write node blocks.
Surely it triggers random writes, however, the amount of data is preety small
and the device handles them very fast by its inside cache, so that it can
enhance the performance.

Thanks,

>
> Thanks
>
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > fs/f2fs/f2fs.h | 1 +
> > fs/f2fs/file.c | 7 +++++++
> > fs/f2fs/segment.h | 4 ++++
> > 3 files changed, 12 insertions(+)
> >
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index ab36025..8f8685e 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -998,6 +998,7 @@ enum {
> > FI_INLINE_DATA, /* used for inline data*/
> > FI_APPEND_WRITE, /* inode has appended data */
> > FI_UPDATE_WRITE, /* inode has in-place-update data */
> > + FI_NEED_IPU, /* used fo ipu for fdatasync */
> > };
> >
> > static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 121689a..e339856 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -127,11 +127,18 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
> > return 0;
> >
> > trace_f2fs_sync_file_enter(inode);
> > +
> > + /* if fdatasync is triggered, let's do in-place-update */
> > + if (datasync)
> > + set_inode_flag(fi, FI_NEED_IPU);
> > +
> > ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
> > if (ret) {
> > trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
> > return ret;
> > }
> > + if (datasync)
> > + clear_inode_flag(fi, FI_NEED_IPU);
> >
> > /*
> > * if there is no written data, don't waste time to write recovery info.
> > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > index ee5c75e..55973f7 100644
> > --- a/fs/f2fs/segment.h
> > +++ b/fs/f2fs/segment.h
> > @@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)
> > if (S_ISDIR(inode->i_mode))
> > return false;
> >
> > + /* this is only set during fdatasync */
> > + if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
> > + return true;
> > +
> > switch (SM_I(sbi)->ipu_policy) {
> > case F2FS_IPU_FORCE:
> > return true;
> > --
> > 1.8.5.2 (Apple Git-48)
> >
> >
> > ------------------------------------------------------------------------------
> > Want fast and easy access to all the code in your enterprise? Index and
> > search up to 200,000 lines of code with a free copy of Black Duck
> > Code Sight - the same software that powers the world's largest code
> > search on Ohloh, the Black Duck Open Hub! Try it now.
> > http://p.sf.net/sfu/bds
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > [email protected]
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-29 12:22:40

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 01/11] f2fs: add nobarrier mount option

Hi Chao,

On Tue, Jul 29, 2014 at 07:28:16PM +0800, Chao Yu wrote:
> Hi Jaegeuk,
>
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:[email protected]]
> > Sent: Saturday, July 26, 2014 6:47 AM
> > To: [email protected]; [email protected];
> > [email protected]
> > Cc: Jaegeuk Kim
> > Subject: [f2fs-dev] [PATCH 01/11] f2fs: add nobarrier mount option
> >
> > This patch adds a mount option, nobarrier, in f2fs.
> > The assumption in here is that file system keeps the IO ordering, but
> > doesn't care about cache flushes inside the storages.
>
> Looks good to me.
> But it's better to add some description in f2fs document.

Good catch. :)
I'll add that.
Thanks,

>
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
>
> Reviewed-by: Chao Yu <[email protected]>
>
> > ---
> > fs/f2fs/data.c | 5 ++++-
> > fs/f2fs/f2fs.h | 1 +
> > fs/f2fs/segment.c | 3 +++
> > fs/f2fs/super.c | 7 +++++++
> > 4 files changed, 15 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index c77c667..482313d 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -139,7 +139,10 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
> > /* change META to META_FLUSH in the checkpoint procedure */
> > if (type >= META_FLUSH) {
> > io->fio.type = META_FLUSH;
> > - io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
> > + if (test_opt(sbi, NOBARRIER))
> > + io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
> > + else
> > + io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
> > }
> > __submit_merged_bio(io);
> > up_write(&io->io_rwsem);
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 8f507d4..e999eec 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -41,6 +41,7 @@
> > #define F2FS_MOUNT_INLINE_XATTR 0x00000080
> > #define F2FS_MOUNT_INLINE_DATA 0x00000100
> > #define F2FS_MOUNT_FLUSH_MERGE 0x00000200
> > +#define F2FS_MOUNT_NOBARRIER 0x00000400
> >
> > #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
> > #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index 8a6e57d..9fce0f47 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -239,6 +239,9 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
> > struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
> > struct flush_cmd cmd;
> >
> > + if (test_opt(sbi, NOBARRIER))
> > + return 0;
> > +
> > if (!test_opt(sbi, FLUSH_MERGE))
> > return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
> >
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index 34649aa..eec89a2 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -52,6 +52,7 @@ enum {
> > Opt_inline_xattr,
> > Opt_inline_data,
> > Opt_flush_merge,
> > + Opt_nobarrier,
> > Opt_err,
> > };
> >
> > @@ -69,6 +70,7 @@ static match_table_t f2fs_tokens = {
> > {Opt_inline_xattr, "inline_xattr"},
> > {Opt_inline_data, "inline_data"},
> > {Opt_flush_merge, "flush_merge"},
> > + {Opt_nobarrier, "nobarrier"},
> > {Opt_err, NULL},
> > };
> >
> > @@ -339,6 +341,9 @@ static int parse_options(struct super_block *sb, char *options)
> > case Opt_flush_merge:
> > set_opt(sbi, FLUSH_MERGE);
> > break;
> > + case Opt_nobarrier:
> > + set_opt(sbi, NOBARRIER);
> > + break;
> > default:
> > f2fs_msg(sb, KERN_ERR,
> > "Unrecognized mount option \"%s\" or missing value",
> > @@ -544,6 +549,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
> > seq_puts(seq, ",inline_data");
> > if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
> > seq_puts(seq, ",flush_merge");
> > + if (!f2fs_readonly(sbi->sb) && test_opt(sbi, NOBARRIER))
> > + seq_puts(seq, ",nobarrier");
> > seq_printf(seq, ",active_logs=%u", sbi->active_logs);
> >
> > return 0;
> > --
> > 1.8.5.2 (Apple Git-48)
> >
> >
> > ------------------------------------------------------------------------------
> > Want fast and easy access to all the code in your enterprise? Index and
> > search up to 200,000 lines of code with a free copy of Black Duck
> > Code Sight - the same software that powers the world's largest code
> > search on Ohloh, the Black Duck Open Hub! Try it now.
> > http://p.sf.net/sfu/bds
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > [email protected]
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-29 12:33:45

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 04/11] f2fs: use radix_tree for ino management

Got it.
Fixed that.
Thanks,

On Tue, Jul 29, 2014 at 07:32:08PM +0800, Chao Yu wrote:
> Hi,
>
> One comment as following.
>
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:[email protected]]
> > Sent: Saturday, July 26, 2014 6:47 AM
> > To: [email protected]; [email protected];
> > [email protected]
> > Cc: Jaegeuk Kim
> > Subject: [f2fs-dev] [PATCH 04/11] f2fs: use radix_tree for ino management
> >
> > For better ino management, this patch replaces the data structure from list
> > to radix tree.
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
>
> Reviewed-by: Chao Yu <[email protected]>
>
> > ---
> > fs/f2fs/checkpoint.c | 48 ++++++++++++++++++++++++++----------------------
> > fs/f2fs/f2fs.h | 1 +
> > 2 files changed, 27 insertions(+), 22 deletions(-)
> >
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index f93d154..d35094a 100644
> > --- a/fs/f2fs/checkpoint.c
> > +++ b/fs/f2fs/checkpoint.c
> > @@ -284,24 +284,26 @@ const struct address_space_operations f2fs_meta_aops = {
> >
> > static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
> > {
> > - struct ino_entry *new, *e;
> > -
> > - new = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
> > - new->ino = ino;
> > -
> > + struct ino_entry *e;
> > +retry:
> > spin_lock(&sbi->ino_lock[type]);
> > - list_for_each_entry(e, &sbi->ino_list[type], list) {
> > - if (e->ino == ino) {
> > +
> > + e = radix_tree_lookup(&sbi->ino_root[type], ino);
> > + if (!e) {
> > + e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
> > + if (!e) {
> > spin_unlock(&sbi->ino_lock[type]);
> > - kmem_cache_free(ino_entry_slab, new);
> > - return;
> > + goto retry;
> > }
> > - if (e->ino > ino)
> > - break;
> > - }
> > + if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
> > + spin_unlock(&sbi->ino_lock[type]);
>
> we should add kmem_cache_free(ino_entry_slab, e) here to avoid memory leak.
>
> > + goto retry;
> > + }
> > + memset(e, 0, sizeof(struct ino_entry));
> > + e->ino = ino;
> >
> > - /* add new entry into list which is sorted by inode number */
> > - list_add_tail(&new->list, &e->list);
> > + list_add_tail(&e->list, &sbi->ino_list[type]);
> > + }
> > spin_unlock(&sbi->ino_lock[type]);
> > }
> >
> > @@ -310,14 +312,15 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int
> > type)
> > struct ino_entry *e;
> >
> > spin_lock(&sbi->ino_lock[type]);
> > - list_for_each_entry(e, &sbi->ino_list[type], list) {
> > - if (e->ino == ino) {
> > - list_del(&e->list);
> > + e = radix_tree_lookup(&sbi->ino_root[type], ino);
> > + if (e) {
> > + list_del(&e->list);
> > + radix_tree_delete(&sbi->ino_root[type], ino);
> > + if (type == ORPHAN_INO)
> > sbi->n_orphans--;
> > - spin_unlock(&sbi->ino_lock[type]);
> > - kmem_cache_free(ino_entry_slab, e);
> > - return;
> > - }
> > + spin_unlock(&sbi->ino_lock[type]);
> > + kmem_cache_free(ino_entry_slab, e);
> > + return;
> > }
> > spin_unlock(&sbi->ino_lock[type]);
> > }
> > @@ -346,7 +349,7 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
> >
> > void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
> > {
> > - /* add new orphan entry into list which is sorted by inode number */
> > + /* add new orphan ino entry into list */
> > __add_ino_entry(sbi, ino, ORPHAN_INO);
> > }
> >
> > @@ -943,6 +946,7 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
> > int i;
> >
> > for (i = 0; i < MAX_INO_ENTRY; i++) {
> > + INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
> > spin_lock_init(&sbi->ino_lock[i]);
> > INIT_LIST_HEAD(&sbi->ino_list[i]);
> > }
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index b6fa6ec..4454caa 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -456,6 +456,7 @@ struct f2fs_sb_info {
> > wait_queue_head_t cp_wait;
> >
> > /* for inode management */
> > + struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */
> > spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */
> > struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */
> >
> > --
> > 1.8.5.2 (Apple Git-48)
> >
> >
> > ------------------------------------------------------------------------------
> > Want fast and easy access to all the code in your enterprise? Index and
> > search up to 200,000 lines of code with a free copy of Black Duck
> > Code Sight - the same software that powers the world's largest code
> > search on Ohloh, the Black Duck Open Hub! Try it now.
> > http://p.sf.net/sfu/bds
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > [email protected]
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-29 12:42:59

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 06/11] f2fs: skip unnecessary data writes during fsync

On Tue, Jul 29, 2014 at 07:39:45PM +0800, Chao Yu wrote:
> Hi,
>
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:[email protected]]
> > Sent: Saturday, July 26, 2014 6:47 AM
> > To: [email protected]; [email protected];
> > [email protected]
> > Cc: Jaegeuk Kim
> > Subject: [f2fs-dev] [PATCH 06/11] f2fs: skip unnecessary data writes during fsync
> >
> > This patch intends to improve the fsync performance by skipping remaining the
> > recovery information, only when there is no data that we should recover.
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > fs/f2fs/file.c | 16 ++++++++++++++++
> > 1 file changed, 16 insertions(+)
> >
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 7c652b3..121689a 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -133,6 +133,17 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int
> > datasync)
> > return ret;
> > }
> >
> > + /*
> > + * if there is no written data, don't waste time to write recovery info.
> > + */
> > + if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
> > + !exist_written_data(sbi, inode->i_ino, APPEND_INO)) {
> > + if (is_inode_flag_set(fi, FI_UPDATE_WRITE) &&
> > + exist_written_data(sbi, inode->i_ino, UPDATE_INO))
>
> Should we shift this to is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
> exist_written_data(sbi, inode->i_ino, UPDATE_INO) ?

Hehe, I found that and was going to submit new patch. :)
Small changes are not a big deal, so I'll test and then push them into the tree.
The for-next tree can be rebased all the time, so if you have any suggestion,
let me know.

Thanks,

>
> > + goto flush_out;
> > + goto out;
> > + }
> > +
> > /* guarantee free sections for fsync */
> > f2fs_balance_fs(sbi);
> >
> > @@ -188,6 +199,11 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int
> > datasync)
> > ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
> > if (ret)
> > goto out;
> > +
> > + /* once recovery info is written, don't need to tack this */
> > + remove_dirty_inode(sbi, inode->i_ino, APPEND_INO);
> > +flush_out:
> > + remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
> > ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
> > }
> > out:
> > --
> > 1.8.5.2 (Apple Git-48)
> >
> >
> > ------------------------------------------------------------------------------
> > Want fast and easy access to all the code in your enterprise? Index and
> > search up to 200,000 lines of code with a free copy of Black Duck
> > Code Sight - the same software that powers the world's largest code
> > search on Ohloh, the Black Duck Open Hub! Try it now.
> > http://p.sf.net/sfu/bds
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > [email protected]
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-29 13:00:07

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 10/11] f2fs: avoid checkpoint when error was occurred

Hi Chao,

On Tue, Jul 29, 2014 at 07:41:35PM +0800, Chao Yu wrote:
> Hi Jaegeuk,
>
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:[email protected]]
> > Sent: Saturday, July 26, 2014 6:47 AM
> > To: [email protected]; [email protected];
> > [email protected]
> > Cc: Jaegeuk Kim
> > Subject: [f2fs-dev] [PATCH 10/11] f2fs: avoid checkpoint when error was occurred
> >
> > No need to do checkpoint, whenever any errors were detected.
> >
>
> The code here is modified once at below commit for avoid unneeded cp.
> You can see the reason through the description of the commit.
>
> Commit 691c6fd2a2d6 ("remove unneeded write checkpoint in recover_fsync_data")
>
> "Previously, recover_fsync_data still to write checkpoint when there is
> nothing to recover with normal umount image.
> It may reduce mount performance and flash memory lifetime, so let's remove
> it."
>
> Why we should revert the commit?

Agreed.
Let's ignore this patch. :)
Thanks,

>
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > fs/f2fs/recovery.c | 5 +----
> > 1 file changed, 1 insertion(+), 4 deletions(-)
> >
> > diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> > index a112368..90d7e80 100644
> > --- a/fs/f2fs/recovery.c
> > +++ b/fs/f2fs/recovery.c
> > @@ -436,7 +436,6 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
> > {
> > struct list_head inode_list;
> > int err;
> > - bool need_writecp = false;
> >
> > fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
> > sizeof(struct fsync_inode_entry));
> > @@ -454,8 +453,6 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
> > if (list_empty(&inode_list))
> > goto out;
> >
> > - need_writecp = true;
> > -
> > /* step #2: recover data */
> > err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
> > f2fs_bug_on(!list_empty(&inode_list));
> > @@ -463,7 +460,7 @@ out:
> > destroy_fsync_dnodes(&inode_list);
> > kmem_cache_destroy(fsync_entry_slab);
> > sbi->por_doing = false;
> > - if (!err && need_writecp)
> > + if (!err)
> > write_checkpoint(sbi, false);
> > return err;
> > }
> > --
> > 1.8.5.2 (Apple Git-48)
> >
> >
> > ------------------------------------------------------------------------------
> > Want fast and easy access to all the code in your enterprise? Index and
> > search up to 200,000 lines of code with a free copy of Black Duck
> > Code Sight - the same software that powers the world's largest code
> > search on Ohloh, the Black Duck Open Hub! Try it now.
> > http://p.sf.net/sfu/bds
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > [email protected]
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-29 13:01:36

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH v2 11/11] f2fs: avoid retrying wrong recovery routine when error was occurred

Change log from v1:
o adjust need_writecp condition, pointed by Chao

>From 387830bd09845dce83bf277bf8416c986c108eb7 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <[email protected]>
Date: Fri, 25 Jul 2014 15:47:25 -0700
Subject: [PATCH] f2fs: avoid retrying wrong recovery routine when error was
occurred

This patch eliminates the propagation of recovery errors to the next mount.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/checkpoint.c | 3 ++-
fs/f2fs/f2fs.h | 2 +-
fs/f2fs/recovery.c | 20 +++++++++++++++++++-
fs/f2fs/segment.c | 5 +----
4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 26b94bb..cea20b8 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -796,6 +796,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
nid_t last_nid = 0;
block_t start_blk;
struct page *cp_page;
@@ -809,7 +810,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
* This avoids to conduct wrong roll-forward operations and uses
* metapages, so should be called prior to sync_meta_pages below.
*/
- discard_next_dnode(sbi);
+ discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));

/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META))
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 475f97c..14b9f74 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1225,7 +1225,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *);
void invalidate_blocks(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *);
-void discard_next_dnode(struct f2fs_sb_info *);
+void discard_next_dnode(struct f2fs_sb_info *, block_t);
int npages_for_summary_flush(struct f2fs_sb_info *);
void allocate_new_segments(struct f2fs_sb_info *);
struct page *get_sum_page(struct f2fs_sb_info *, unsigned int);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index a112368..b2aa53b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -434,7 +434,9 @@ next:

int recover_fsync_data(struct f2fs_sb_info *sbi)
{
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct list_head inode_list;
+ block_t blkaddr;
int err;
bool need_writecp = false;

@@ -447,6 +449,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)

/* step #1: find fsynced inode numbers */
sbi->por_doing = true;
+
+ blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+
err = find_fsync_dnodes(sbi, &inode_list);
if (err)
goto out;
@@ -462,8 +467,21 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
out:
destroy_fsync_dnodes(&inode_list);
kmem_cache_destroy(fsync_entry_slab);
+
+ if (err) {
+ truncate_inode_pages_final(NODE_MAPPING(sbi));
+ truncate_inode_pages_final(META_MAPPING(sbi));
+ }
+
sbi->por_doing = false;
- if (!err && need_writecp)
+ if (err) {
+ discard_next_dnode(sbi, blkaddr);
+
+ /* Flush all the NAT/SIT pages */
+ while (get_pages(sbi, F2FS_DIRTY_META))
+ sync_meta_pages(sbi, META, LONG_MAX);
+ } else if (need_writecp) {
write_checkpoint(sbi, false);
+ }
return err;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9fce0f47..e016b97 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -379,11 +379,8 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
}

-void discard_next_dnode(struct f2fs_sb_info *sbi)
+void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
{
- struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
- block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
-
if (f2fs_issue_discard(sbi, blkaddr, 1)) {
struct page *page = grab_meta_page(sbi, blkaddr);
/* zero-filled page */
--
1.8.5.2 (Apple Git-48)

2014-07-29 23:56:28

by Changman Lee

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 07/11] f2fs: enable in-place-update for fdatasync

On Tue, Jul 29, 2014 at 05:22:15AM -0700, Jaegeuk Kim wrote:
> Hi Changman,
>
> On Tue, Jul 29, 2014 at 09:41:11AM +0900, Changman Lee wrote:
> > Hi Jaegeuk,
> >
> > On Fri, Jul 25, 2014 at 03:47:21PM -0700, Jaegeuk Kim wrote:
> > > This patch enforces in-place-updates only when fdatasync is requested.
> > > If we adopt this in-place-updates for the fdatasync, we can skip to write the
> > > recovery information.
> >
> > But, as you know, random write occurs when changing into in-place-updates.
> > It will degrade write performance. Is there any case in-place-updates is
> > better, except recovery or high utilization?
>
> As I described, you can easily imagine, if users requested small amount of data
> writes with fdatasync, we should do data writes + node writes.
> But, if we can do in-place-update, we don't need to write node blocks.
> Surely it triggers random writes, however, the amount of data is preety small
> and the device handles them very fast by its inside cache, so that it can
> enhance the performance.
>
> Thanks,

Partially agree. Sometimes, I see that SSR shows lower performance than
IPU. One of the reasons might be node writes.
Anyway, if so, we should know total dirty pages for fdatasync and it's very
tunable according to a random write performance of device.

Thanks,

>
> >
> > Thanks
> >
> > >
> > > Signed-off-by: Jaegeuk Kim <[email protected]>
> > > ---
> > > fs/f2fs/f2fs.h | 1 +
> > > fs/f2fs/file.c | 7 +++++++
> > > fs/f2fs/segment.h | 4 ++++
> > > 3 files changed, 12 insertions(+)
> > >
> > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > index ab36025..8f8685e 100644
> > > --- a/fs/f2fs/f2fs.h
> > > +++ b/fs/f2fs/f2fs.h
> > > @@ -998,6 +998,7 @@ enum {
> > > FI_INLINE_DATA, /* used for inline data*/
> > > FI_APPEND_WRITE, /* inode has appended data */
> > > FI_UPDATE_WRITE, /* inode has in-place-update data */
> > > + FI_NEED_IPU, /* used fo ipu for fdatasync */
> > > };
> > >
> > > static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
> > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > index 121689a..e339856 100644
> > > --- a/fs/f2fs/file.c
> > > +++ b/fs/f2fs/file.c
> > > @@ -127,11 +127,18 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
> > > return 0;
> > >
> > > trace_f2fs_sync_file_enter(inode);
> > > +
> > > + /* if fdatasync is triggered, let's do in-place-update */
> > > + if (datasync)
> > > + set_inode_flag(fi, FI_NEED_IPU);
> > > +
> > > ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
> > > if (ret) {
> > > trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
> > > return ret;
> > > }
> > > + if (datasync)
> > > + clear_inode_flag(fi, FI_NEED_IPU);
> > >
> > > /*
> > > * if there is no written data, don't waste time to write recovery info.
> > > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > > index ee5c75e..55973f7 100644
> > > --- a/fs/f2fs/segment.h
> > > +++ b/fs/f2fs/segment.h
> > > @@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)
> > > if (S_ISDIR(inode->i_mode))
> > > return false;
> > >
> > > + /* this is only set during fdatasync */
> > > + if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
> > > + return true;
> > > +
> > > switch (SM_I(sbi)->ipu_policy) {
> > > case F2FS_IPU_FORCE:
> > > return true;
> > > --
> > > 1.8.5.2 (Apple Git-48)
> > >
> > >
> > > ------------------------------------------------------------------------------
> > > Want fast and easy access to all the code in your enterprise? Index and
> > > search up to 200,000 lines of code with a free copy of Black Duck
> > > Code Sight - the same software that powers the world's largest code
> > > search on Ohloh, the Black Duck Open Hub! Try it now.
> > > http://p.sf.net/sfu/bds
> > > _______________________________________________
> > > Linux-f2fs-devel mailing list
> > > [email protected]
> > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-30 01:10:46

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 07/11] f2fs: enable in-place-update for fdatasync

On Wed, Jul 30, 2014 at 08:54:55AM +0900, Changman Lee wrote:
> On Tue, Jul 29, 2014 at 05:22:15AM -0700, Jaegeuk Kim wrote:
> > Hi Changman,
> >
> > On Tue, Jul 29, 2014 at 09:41:11AM +0900, Changman Lee wrote:
> > > Hi Jaegeuk,
> > >
> > > On Fri, Jul 25, 2014 at 03:47:21PM -0700, Jaegeuk Kim wrote:
> > > > This patch enforces in-place-updates only when fdatasync is requested.
> > > > If we adopt this in-place-updates for the fdatasync, we can skip to write the
> > > > recovery information.
> > >
> > > But, as you know, random write occurs when changing into in-place-updates.
> > > It will degrade write performance. Is there any case in-place-updates is
> > > better, except recovery or high utilization?
> >
> > As I described, you can easily imagine, if users requested small amount of data
> > writes with fdatasync, we should do data writes + node writes.
> > But, if we can do in-place-update, we don't need to write node blocks.
> > Surely it triggers random writes, however, the amount of data is preety small
> > and the device handles them very fast by its inside cache, so that it can
> > enhance the performance.
> >
> > Thanks,
>
> Partially agree. Sometimes, I see that SSR shows lower performance than
> IPU. One of the reasons might be node writes.

What did you mean? That's why I consider IPU eagarly instead of SSR and LFS
under the very strict cases.

> Anyway, if so, we should know total dirty pages for fdatasync and it's very
> tunable according to a random write performance of device.

Agreed. We can do that either by comparing the number of dirty pages,
additional data/node writes, and cost of checkpoint at the same time.
And there is another thing is that we need to consider the number of
waiting time for end_io.
I'll look into this at some time.

Thanks,

>
> Thanks,
>
> >
> > >
> > > Thanks
> > >
> > > >
> > > > Signed-off-by: Jaegeuk Kim <[email protected]>
> > > > ---
> > > > fs/f2fs/f2fs.h | 1 +
> > > > fs/f2fs/file.c | 7 +++++++
> > > > fs/f2fs/segment.h | 4 ++++
> > > > 3 files changed, 12 insertions(+)
> > > >
> > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > > index ab36025..8f8685e 100644
> > > > --- a/fs/f2fs/f2fs.h
> > > > +++ b/fs/f2fs/f2fs.h
> > > > @@ -998,6 +998,7 @@ enum {
> > > > FI_INLINE_DATA, /* used for inline data*/
> > > > FI_APPEND_WRITE, /* inode has appended data */
> > > > FI_UPDATE_WRITE, /* inode has in-place-update data */
> > > > + FI_NEED_IPU, /* used fo ipu for fdatasync */
> > > > };
> > > >
> > > > static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
> > > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > > index 121689a..e339856 100644
> > > > --- a/fs/f2fs/file.c
> > > > +++ b/fs/f2fs/file.c
> > > > @@ -127,11 +127,18 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
> > > > return 0;
> > > >
> > > > trace_f2fs_sync_file_enter(inode);
> > > > +
> > > > + /* if fdatasync is triggered, let's do in-place-update */
> > > > + if (datasync)
> > > > + set_inode_flag(fi, FI_NEED_IPU);
> > > > +
> > > > ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
> > > > if (ret) {
> > > > trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
> > > > return ret;
> > > > }
> > > > + if (datasync)
> > > > + clear_inode_flag(fi, FI_NEED_IPU);
> > > >
> > > > /*
> > > > * if there is no written data, don't waste time to write recovery info.
> > > > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > > > index ee5c75e..55973f7 100644
> > > > --- a/fs/f2fs/segment.h
> > > > +++ b/fs/f2fs/segment.h
> > > > @@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)
> > > > if (S_ISDIR(inode->i_mode))
> > > > return false;
> > > >
> > > > + /* this is only set during fdatasync */
> > > > + if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
> > > > + return true;
> > > > +
> > > > switch (SM_I(sbi)->ipu_policy) {
> > > > case F2FS_IPU_FORCE:
> > > > return true;
> > > > --
> > > > 1.8.5.2 (Apple Git-48)
> > > >
> > > >
> > > > ------------------------------------------------------------------------------
> > > > Want fast and easy access to all the code in your enterprise? Index and
> > > > search up to 200,000 lines of code with a free copy of Black Duck
> > > > Code Sight - the same software that powers the world's largest code
> > > > search on Ohloh, the Black Duck Open Hub! Try it now.
> > > > http://p.sf.net/sfu/bds
> > > > _______________________________________________
> > > > Linux-f2fs-devel mailing list
> > > > [email protected]
> > > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-30 01:45:41

by Chao Yu

[permalink] [raw]
Subject: RE: [f2fs-dev] [PATCH 08/11] f2fs: fix wrong condition for unlikely

Hi Jaegeuk,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:[email protected]]
> Sent: Saturday, July 26, 2014 6:47 AM
> To: [email protected]; [email protected];
> [email protected]
> Cc: Jaegeuk Kim
> Subject: [f2fs-dev] [PATCH 08/11] f2fs: fix wrong condition for unlikely
>
> This patch fixes the wrongly used unlikely condition.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/checkpoint.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 42a16c1..36b0d47 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -932,7 +932,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
> /* Here, we only have one bio having CP pack */
> sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
>
> - if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
> + if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {

Maybe use likely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) or

if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)))
return;

is more appropriate. How do you think?

> clear_prefree_segments(sbi);
> release_dirty_inode(sbi);
> F2FS_RESET_SB_DIRT(sbi);
> --
> 1.8.5.2 (Apple Git-48)
>
>
> ------------------------------------------------------------------------------
> Want fast and easy access to all the code in your enterprise? Index and
> search up to 200,000 lines of code with a free copy of Black Duck
> Code Sight - the same software that powers the world's largest code
> search on Ohloh, the Black Duck Open Hub! Try it now.
> http://p.sf.net/sfu/bds
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-30 01:58:09

by Changman Lee

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 07/11] f2fs: enable in-place-update for fdatasync

On Tue, Jul 29, 2014 at 06:08:21PM -0700, Jaegeuk Kim wrote:
> On Wed, Jul 30, 2014 at 08:54:55AM +0900, Changman Lee wrote:
> > On Tue, Jul 29, 2014 at 05:22:15AM -0700, Jaegeuk Kim wrote:
> > > Hi Changman,
> > >
> > > On Tue, Jul 29, 2014 at 09:41:11AM +0900, Changman Lee wrote:
> > > > Hi Jaegeuk,
> > > >
> > > > On Fri, Jul 25, 2014 at 03:47:21PM -0700, Jaegeuk Kim wrote:
> > > > > This patch enforces in-place-updates only when fdatasync is requested.
> > > > > If we adopt this in-place-updates for the fdatasync, we can skip to write the
> > > > > recovery information.
> > > >
> > > > But, as you know, random write occurs when changing into in-place-updates.
> > > > It will degrade write performance. Is there any case in-place-updates is
> > > > better, except recovery or high utilization?
> > >
> > > As I described, you can easily imagine, if users requested small amount of data
> > > writes with fdatasync, we should do data writes + node writes.
> > > But, if we can do in-place-update, we don't need to write node blocks.
> > > Surely it triggers random writes, however, the amount of data is preety small
> > > and the device handles them very fast by its inside cache, so that it can
> > > enhance the performance.
> > >
> > > Thanks,
> >
> > Partially agree. Sometimes, I see that SSR shows lower performance than
> > IPU. One of the reasons might be node writes.
>
> What did you mean? That's why I consider IPU eagarly instead of SSR and LFS
> under the very strict cases.
>

Okay, I understood your intention.
This discussion seems to be far from this thread a litte bit.
Background I told as above is that I got better number from IPU when I
tested fio under fragmentation by varmail and dd; and utilization about 93%.
The result of perf shows f2fs spends the most cpu time searching victim
in SSR mode. And f2fs had to write node data additionaly.
I think this condition could be one of the strict case as you told.

Thanks,

> > Anyway, if so, we should know total dirty pages for fdatasync and it's very
> > tunable according to a random write performance of device.
>
> Agreed. We can do that either by comparing the number of dirty pages,
> additional data/node writes, and cost of checkpoint at the same time.
> And there is another thing is that we need to consider the number of
> waiting time for end_io.
> I'll look into this at some time.
>
> Thanks,
>
> >
> > Thanks,
> >
> > >
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > > Signed-off-by: Jaegeuk Kim <[email protected]>
> > > > > ---
> > > > > fs/f2fs/f2fs.h | 1 +
> > > > > fs/f2fs/file.c | 7 +++++++
> > > > > fs/f2fs/segment.h | 4 ++++
> > > > > 3 files changed, 12 insertions(+)
> > > > >
> > > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > > > index ab36025..8f8685e 100644
> > > > > --- a/fs/f2fs/f2fs.h
> > > > > +++ b/fs/f2fs/f2fs.h
> > > > > @@ -998,6 +998,7 @@ enum {
> > > > > FI_INLINE_DATA, /* used for inline data*/
> > > > > FI_APPEND_WRITE, /* inode has appended data */
> > > > > FI_UPDATE_WRITE, /* inode has in-place-update data */
> > > > > + FI_NEED_IPU, /* used fo ipu for fdatasync */
> > > > > };
> > > > >
> > > > > static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
> > > > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > > > index 121689a..e339856 100644
> > > > > --- a/fs/f2fs/file.c
> > > > > +++ b/fs/f2fs/file.c
> > > > > @@ -127,11 +127,18 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
> > > > > return 0;
> > > > >
> > > > > trace_f2fs_sync_file_enter(inode);
> > > > > +
> > > > > + /* if fdatasync is triggered, let's do in-place-update */
> > > > > + if (datasync)
> > > > > + set_inode_flag(fi, FI_NEED_IPU);
> > > > > +
> > > > > ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
> > > > > if (ret) {
> > > > > trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
> > > > > return ret;
> > > > > }
> > > > > + if (datasync)
> > > > > + clear_inode_flag(fi, FI_NEED_IPU);
> > > > >
> > > > > /*
> > > > > * if there is no written data, don't waste time to write recovery info.
> > > > > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > > > > index ee5c75e..55973f7 100644
> > > > > --- a/fs/f2fs/segment.h
> > > > > +++ b/fs/f2fs/segment.h
> > > > > @@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)
> > > > > if (S_ISDIR(inode->i_mode))
> > > > > return false;
> > > > >
> > > > > + /* this is only set during fdatasync */
> > > > > + if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
> > > > > + return true;
> > > > > +
> > > > > switch (SM_I(sbi)->ipu_policy) {
> > > > > case F2FS_IPU_FORCE:
> > > > > return true;
> > > > > --
> > > > > 1.8.5.2 (Apple Git-48)
> > > > >
> > > > >
> > > > > ------------------------------------------------------------------------------
> > > > > Want fast and easy access to all the code in your enterprise? Index and
> > > > > search up to 200,000 lines of code with a free copy of Black Duck
> > > > > Code Sight - the same software that powers the world's largest code
> > > > > search on Ohloh, the Black Duck Open Hub! Try it now.
> > > > > http://p.sf.net/sfu/bds
> > > > > _______________________________________________
> > > > > Linux-f2fs-devel mailing list
> > > > > [email protected]
> > > > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-30 02:46:49

by Chao Yu

[permalink] [raw]
Subject: RE: [f2fs-dev] [PATCH 07/11] f2fs: enable in-place-update for fdatasync

Hi Jaegeuk Changman,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:[email protected]]
> Sent: Wednesday, July 30, 2014 9:08 AM
> To: Changman Lee
> Cc: [email protected]; [email protected];
> [email protected]
> Subject: Re: [f2fs-dev] [PATCH 07/11] f2fs: enable in-place-update for fdatasync
>
> On Wed, Jul 30, 2014 at 08:54:55AM +0900, Changman Lee wrote:
> > On Tue, Jul 29, 2014 at 05:22:15AM -0700, Jaegeuk Kim wrote:
> > > Hi Changman,
> > >
> > > On Tue, Jul 29, 2014 at 09:41:11AM +0900, Changman Lee wrote:
> > > > Hi Jaegeuk,
> > > >
> > > > On Fri, Jul 25, 2014 at 03:47:21PM -0700, Jaegeuk Kim wrote:
> > > > > This patch enforces in-place-updates only when fdatasync is requested.
> > > > > If we adopt this in-place-updates for the fdatasync, we can skip to write the
> > > > > recovery information.
> > > >
> > > > But, as you know, random write occurs when changing into in-place-updates.
> > > > It will degrade write performance. Is there any case in-place-updates is
> > > > better, except recovery or high utilization?
> > >
> > > As I described, you can easily imagine, if users requested small amount of data
> > > writes with fdatasync, we should do data writes + node writes.
> > > But, if we can do in-place-update, we don't need to write node blocks.
> > > Surely it triggers random writes, however, the amount of data is preety small
> > > and the device handles them very fast by its inside cache, so that it can
> > > enhance the performance.
> > >
> > > Thanks,
> >
> > Partially agree. Sometimes, I see that SSR shows lower performance than
> > IPU. One of the reasons might be node writes.
>
> What did you mean? That's why I consider IPU eagarly instead of SSR and LFS
> under the very strict cases.
>
> > Anyway, if so, we should know total dirty pages for fdatasync and it's very
> > tunable according to a random write performance of device.
>
> Agreed. We can do that either by comparing the number of dirty pages,
> additional data/node writes, and cost of checkpoint at the same time.
> And there is another thing is that we need to consider the number of
> waiting time for end_io.
> I'll look into this at some time.

Agreed, but please additionally consider that the number we got is not highly
accurate because without protection of ->i_mutex writer could generate more
dirty pages.

>
> Thanks,
>
> >
> > Thanks,
> >
> > >
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > > Signed-off-by: Jaegeuk Kim <[email protected]>
> > > > > ---
> > > > > fs/f2fs/f2fs.h | 1 +
> > > > > fs/f2fs/file.c | 7 +++++++
> > > > > fs/f2fs/segment.h | 4 ++++
> > > > > 3 files changed, 12 insertions(+)
> > > > >
> > > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > > > index ab36025..8f8685e 100644
> > > > > --- a/fs/f2fs/f2fs.h
> > > > > +++ b/fs/f2fs/f2fs.h
> > > > > @@ -998,6 +998,7 @@ enum {
> > > > > FI_INLINE_DATA, /* used for inline data*/
> > > > > FI_APPEND_WRITE, /* inode has appended data */
> > > > > FI_UPDATE_WRITE, /* inode has in-place-update data */
> > > > > + FI_NEED_IPU, /* used fo ipu for fdatasync */
> > > > > };
> > > > >
> > > > > static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
> > > > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > > > index 121689a..e339856 100644
> > > > > --- a/fs/f2fs/file.c
> > > > > +++ b/fs/f2fs/file.c
> > > > > @@ -127,11 +127,18 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end,
> int datasync)
> > > > > return 0;
> > > > >
> > > > > trace_f2fs_sync_file_enter(inode);
> > > > > +
> > > > > + /* if fdatasync is triggered, let's do in-place-update */
> > > > > + if (datasync)
> > > > > + set_inode_flag(fi, FI_NEED_IPU);
> > > > > +
> > > > > ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
> > > > > if (ret) {
> > > > > trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
> > > > > return ret;
> > > > > }
> > > > > + if (datasync)
> > > > > + clear_inode_flag(fi, FI_NEED_IPU);
> > > > >
> > > > > /*
> > > > > * if there is no written data, don't waste time to write recovery info.
> > > > > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > > > > index ee5c75e..55973f7 100644
> > > > > --- a/fs/f2fs/segment.h
> > > > > +++ b/fs/f2fs/segment.h
> > > > > @@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)
> > > > > if (S_ISDIR(inode->i_mode))
> > > > > return false;
> > > > >
> > > > > + /* this is only set during fdatasync */
> > > > > + if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
> > > > > + return true;
> > > > > +
> > > > > switch (SM_I(sbi)->ipu_policy) {
> > > > > case F2FS_IPU_FORCE:
> > > > > return true;
> > > > > --
> > > > > 1.8.5.2 (Apple Git-48)
> > > > >
> > > > >
> > > > > ------------------------------------------------------------------------------
> > > > > Want fast and easy access to all the code in your enterprise? Index and
> > > > > search up to 200,000 lines of code with a free copy of Black Duck
> > > > > Code Sight - the same software that powers the world's largest code
> > > > > search on Ohloh, the Black Duck Open Hub! Try it now.
> > > > > http://p.sf.net/sfu/bds
> > > > > _______________________________________________
> > > > > Linux-f2fs-devel mailing list
> > > > > [email protected]
> > > > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>
> ------------------------------------------------------------------------------
> Infragistics Professional
> Build stunning WinForms apps today!
> Reboot your WinForms applications with our WinForms controls.
> Build a bridge from your legacy apps to the future.
> http://pubads.g.doubleclick.net/gampad/clk?id=153845071&iu=/4140/ostg.clktrk
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-30 03:11:13

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 07/11] f2fs: enable in-place-update for fdatasync

On Wed, Jul 30, 2014 at 10:56:31AM +0900, Changman Lee wrote:
> On Tue, Jul 29, 2014 at 06:08:21PM -0700, Jaegeuk Kim wrote:
> > On Wed, Jul 30, 2014 at 08:54:55AM +0900, Changman Lee wrote:
> > > On Tue, Jul 29, 2014 at 05:22:15AM -0700, Jaegeuk Kim wrote:
> > > > Hi Changman,
> > > >
> > > > On Tue, Jul 29, 2014 at 09:41:11AM +0900, Changman Lee wrote:
> > > > > Hi Jaegeuk,
> > > > >
> > > > > On Fri, Jul 25, 2014 at 03:47:21PM -0700, Jaegeuk Kim wrote:
> > > > > > This patch enforces in-place-updates only when fdatasync is requested.
> > > > > > If we adopt this in-place-updates for the fdatasync, we can skip to write the
> > > > > > recovery information.
> > > > >
> > > > > But, as you know, random write occurs when changing into in-place-updates.
> > > > > It will degrade write performance. Is there any case in-place-updates is
> > > > > better, except recovery or high utilization?
> > > >
> > > > As I described, you can easily imagine, if users requested small amount of data
> > > > writes with fdatasync, we should do data writes + node writes.
> > > > But, if we can do in-place-update, we don't need to write node blocks.
> > > > Surely it triggers random writes, however, the amount of data is preety small
> > > > and the device handles them very fast by its inside cache, so that it can
> > > > enhance the performance.
> > > >
> > > > Thanks,
> > >
> > > Partially agree. Sometimes, I see that SSR shows lower performance than
> > > IPU. One of the reasons might be node writes.
> >
> > What did you mean? That's why I consider IPU eagarly instead of SSR and LFS
> > under the very strict cases.
> >
>
> Okay, I understood your intention.
> This discussion seems to be far from this thread a litte bit.
> Background I told as above is that I got better number from IPU when I
> tested fio under fragmentation by varmail and dd; and utilization about 93%.
> The result of perf shows f2fs spends the most cpu time searching victim
> in SSR mode. And f2fs had to write node data additionaly.

How about trying to reduce the cpu time at that moment?
And, as you know, f2fs already has such kind of triggering facility with sysfs.

> I think this condition could be one of the strict case as you told.
>
> Thanks,
>
> > > Anyway, if so, we should know total dirty pages for fdatasync and it's very
> > > tunable according to a random write performance of device.
> >
> > Agreed. We can do that either by comparing the number of dirty pages,
> > additional data/node writes, and cost of checkpoint at the same time.
> > And there is another thing is that we need to consider the number of
> > waiting time for end_io.
> > I'll look into this at some time.
> >
> > Thanks,
> >
> > >
> > > Thanks,
> > >
> > > >
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > > Signed-off-by: Jaegeuk Kim <[email protected]>
> > > > > > ---
> > > > > > fs/f2fs/f2fs.h | 1 +
> > > > > > fs/f2fs/file.c | 7 +++++++
> > > > > > fs/f2fs/segment.h | 4 ++++
> > > > > > 3 files changed, 12 insertions(+)
> > > > > >
> > > > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > > > > index ab36025..8f8685e 100644
> > > > > > --- a/fs/f2fs/f2fs.h
> > > > > > +++ b/fs/f2fs/f2fs.h
> > > > > > @@ -998,6 +998,7 @@ enum {
> > > > > > FI_INLINE_DATA, /* used for inline data*/
> > > > > > FI_APPEND_WRITE, /* inode has appended data */
> > > > > > FI_UPDATE_WRITE, /* inode has in-place-update data */
> > > > > > + FI_NEED_IPU, /* used fo ipu for fdatasync */
> > > > > > };
> > > > > >
> > > > > > static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
> > > > > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > > > > index 121689a..e339856 100644
> > > > > > --- a/fs/f2fs/file.c
> > > > > > +++ b/fs/f2fs/file.c
> > > > > > @@ -127,11 +127,18 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
> > > > > > return 0;
> > > > > >
> > > > > > trace_f2fs_sync_file_enter(inode);
> > > > > > +
> > > > > > + /* if fdatasync is triggered, let's do in-place-update */
> > > > > > + if (datasync)
> > > > > > + set_inode_flag(fi, FI_NEED_IPU);
> > > > > > +
> > > > > > ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
> > > > > > if (ret) {
> > > > > > trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
> > > > > > return ret;
> > > > > > }
> > > > > > + if (datasync)
> > > > > > + clear_inode_flag(fi, FI_NEED_IPU);
> > > > > >
> > > > > > /*
> > > > > > * if there is no written data, don't waste time to write recovery info.
> > > > > > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > > > > > index ee5c75e..55973f7 100644
> > > > > > --- a/fs/f2fs/segment.h
> > > > > > +++ b/fs/f2fs/segment.h
> > > > > > @@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)
> > > > > > if (S_ISDIR(inode->i_mode))
> > > > > > return false;
> > > > > >
> > > > > > + /* this is only set during fdatasync */
> > > > > > + if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
> > > > > > + return true;
> > > > > > +
> > > > > > switch (SM_I(sbi)->ipu_policy) {
> > > > > > case F2FS_IPU_FORCE:
> > > > > > return true;
> > > > > > --
> > > > > > 1.8.5.2 (Apple Git-48)
> > > > > >
> > > > > >
> > > > > > ------------------------------------------------------------------------------
> > > > > > Want fast and easy access to all the code in your enterprise? Index and
> > > > > > search up to 200,000 lines of code with a free copy of Black Duck
> > > > > > Code Sight - the same software that powers the world's largest code
> > > > > > search on Ohloh, the Black Duck Open Hub! Try it now.
> > > > > > http://p.sf.net/sfu/bds
> > > > > > _______________________________________________
> > > > > > Linux-f2fs-devel mailing list
> > > > > > [email protected]
> > > > > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-30 03:12:54

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 07/11] f2fs: enable in-place-update for fdatasync

On Wed, Jul 30, 2014 at 10:45:38AM +0800, Chao Yu wrote:
> Hi Jaegeuk Changman,
>
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:[email protected]]
> > Sent: Wednesday, July 30, 2014 9:08 AM
> > To: Changman Lee
> > Cc: [email protected]; [email protected];
> > [email protected]
> > Subject: Re: [f2fs-dev] [PATCH 07/11] f2fs: enable in-place-update for fdatasync
> >
> > On Wed, Jul 30, 2014 at 08:54:55AM +0900, Changman Lee wrote:
> > > On Tue, Jul 29, 2014 at 05:22:15AM -0700, Jaegeuk Kim wrote:
> > > > Hi Changman,
> > > >
> > > > On Tue, Jul 29, 2014 at 09:41:11AM +0900, Changman Lee wrote:
> > > > > Hi Jaegeuk,
> > > > >
> > > > > On Fri, Jul 25, 2014 at 03:47:21PM -0700, Jaegeuk Kim wrote:
> > > > > > This patch enforces in-place-updates only when fdatasync is requested.
> > > > > > If we adopt this in-place-updates for the fdatasync, we can skip to write the
> > > > > > recovery information.
> > > > >
> > > > > But, as you know, random write occurs when changing into in-place-updates.
> > > > > It will degrade write performance. Is there any case in-place-updates is
> > > > > better, except recovery or high utilization?
> > > >
> > > > As I described, you can easily imagine, if users requested small amount of data
> > > > writes with fdatasync, we should do data writes + node writes.
> > > > But, if we can do in-place-update, we don't need to write node blocks.
> > > > Surely it triggers random writes, however, the amount of data is preety small
> > > > and the device handles them very fast by its inside cache, so that it can
> > > > enhance the performance.
> > > >
> > > > Thanks,
> > >
> > > Partially agree. Sometimes, I see that SSR shows lower performance than
> > > IPU. One of the reasons might be node writes.
> >
> > What did you mean? That's why I consider IPU eagarly instead of SSR and LFS
> > under the very strict cases.
> >
> > > Anyway, if so, we should know total dirty pages for fdatasync and it's very
> > > tunable according to a random write performance of device.
> >
> > Agreed. We can do that either by comparing the number of dirty pages,
> > additional data/node writes, and cost of checkpoint at the same time.
> > And there is another thing is that we need to consider the number of
> > waiting time for end_io.
> > I'll look into this at some time.
>
> Agreed, but please additionally consider that the number we got is not highly
> accurate because without protection of ->i_mutex writer could generate more
> dirty pages.

Right, but IMO, it doesn't need to get correct numbers.
Just for reference is enough.
Sort of trade-offs. :)

Thanks,

>
> >
> > Thanks,
> >
> > >
> > > Thanks,
> > >
> > > >
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > > Signed-off-by: Jaegeuk Kim <[email protected]>
> > > > > > ---
> > > > > > fs/f2fs/f2fs.h | 1 +
> > > > > > fs/f2fs/file.c | 7 +++++++
> > > > > > fs/f2fs/segment.h | 4 ++++
> > > > > > 3 files changed, 12 insertions(+)
> > > > > >
> > > > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > > > > index ab36025..8f8685e 100644
> > > > > > --- a/fs/f2fs/f2fs.h
> > > > > > +++ b/fs/f2fs/f2fs.h
> > > > > > @@ -998,6 +998,7 @@ enum {
> > > > > > FI_INLINE_DATA, /* used for inline data*/
> > > > > > FI_APPEND_WRITE, /* inode has appended data */
> > > > > > FI_UPDATE_WRITE, /* inode has in-place-update data */
> > > > > > + FI_NEED_IPU, /* used fo ipu for fdatasync */
> > > > > > };
> > > > > >
> > > > > > static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
> > > > > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > > > > index 121689a..e339856 100644
> > > > > > --- a/fs/f2fs/file.c
> > > > > > +++ b/fs/f2fs/file.c
> > > > > > @@ -127,11 +127,18 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end,
> > int datasync)
> > > > > > return 0;
> > > > > >
> > > > > > trace_f2fs_sync_file_enter(inode);
> > > > > > +
> > > > > > + /* if fdatasync is triggered, let's do in-place-update */
> > > > > > + if (datasync)
> > > > > > + set_inode_flag(fi, FI_NEED_IPU);
> > > > > > +
> > > > > > ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
> > > > > > if (ret) {
> > > > > > trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
> > > > > > return ret;
> > > > > > }
> > > > > > + if (datasync)
> > > > > > + clear_inode_flag(fi, FI_NEED_IPU);
> > > > > >
> > > > > > /*
> > > > > > * if there is no written data, don't waste time to write recovery info.
> > > > > > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > > > > > index ee5c75e..55973f7 100644
> > > > > > --- a/fs/f2fs/segment.h
> > > > > > +++ b/fs/f2fs/segment.h
> > > > > > @@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)
> > > > > > if (S_ISDIR(inode->i_mode))
> > > > > > return false;
> > > > > >
> > > > > > + /* this is only set during fdatasync */
> > > > > > + if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
> > > > > > + return true;
> > > > > > +
> > > > > > switch (SM_I(sbi)->ipu_policy) {
> > > > > > case F2FS_IPU_FORCE:
> > > > > > return true;
> > > > > > --
> > > > > > 1.8.5.2 (Apple Git-48)
> > > > > >
> > > > > >
> > > > > > ------------------------------------------------------------------------------
> > > > > > Want fast and easy access to all the code in your enterprise? Index and
> > > > > > search up to 200,000 lines of code with a free copy of Black Duck
> > > > > > Code Sight - the same software that powers the world's largest code
> > > > > > search on Ohloh, the Black Duck Open Hub! Try it now.
> > > > > > http://p.sf.net/sfu/bds
> > > > > > _______________________________________________
> > > > > > Linux-f2fs-devel mailing list
> > > > > > [email protected]
> > > > > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> >
> > ------------------------------------------------------------------------------
> > Infragistics Professional
> > Build stunning WinForms apps today!
> > Reboot your WinForms applications with our WinForms controls.
> > Build a bridge from your legacy apps to the future.
> > http://pubads.g.doubleclick.net/gampad/clk?id=153845071&iu=/4140/ostg.clktrk
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > [email protected]
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-30 03:18:09

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 08/11] f2fs: fix wrong condition for unlikely

On Wed, Jul 30, 2014 at 09:44:43AM +0800, Chao Yu wrote:
> Hi Jaegeuk,
>
> > -----Original Message-----
> > From: Jaegeuk Kim [mailto:[email protected]]
> > Sent: Saturday, July 26, 2014 6:47 AM
> > To: [email protected]; [email protected];
> > [email protected]
> > Cc: Jaegeuk Kim
> > Subject: [f2fs-dev] [PATCH 08/11] f2fs: fix wrong condition for unlikely
> >
> > This patch fixes the wrongly used unlikely condition.
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > fs/f2fs/checkpoint.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > index 42a16c1..36b0d47 100644
> > --- a/fs/f2fs/checkpoint.c
> > +++ b/fs/f2fs/checkpoint.c
> > @@ -932,7 +932,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
> > /* Here, we only have one bio having CP pack */
> > sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
> >
> > - if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
> > + if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
>
> Maybe use likely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) or
>
> if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)))
> return;
>
> is more appropriate. How do you think?

Currently I'd like to put this without any likely or unlikely.
Best thing is to measure some performance how this would make effect on.
Until then, it'd be better to do without it, since apparently this should
not be unlikely.

How about you?
Can we compare both of them explicitly?

Thanks,

>
> > clear_prefree_segments(sbi);
> > release_dirty_inode(sbi);
> > F2FS_RESET_SB_DIRT(sbi);
> > --
> > 1.8.5.2 (Apple Git-48)
> >
> >
> > ------------------------------------------------------------------------------
> > Want fast and easy access to all the code in your enterprise? Index and
> > search up to 200,000 lines of code with a free copy of Black Duck
> > Code Sight - the same software that powers the world's largest code
> > search on Ohloh, the Black Duck Open Hub! Try it now.
> > http://p.sf.net/sfu/bds
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > [email protected]
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-30 11:59:09

by Chao Yu

[permalink] [raw]
Subject: RE: [f2fs-dev] [PATCH 06/11] f2fs: skip unnecessary data writes during fsync

Hi,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:[email protected]]
> Sent: Tuesday, July 29, 2014 8:43 PM
> To: Chao Yu
> Cc: [email protected]; [email protected];
> [email protected]
> Subject: Re: [f2fs-dev] [PATCH 06/11] f2fs: skip unnecessary data writes during fsync
>
> On Tue, Jul 29, 2014 at 07:39:45PM +0800, Chao Yu wrote:
> > Hi,
> >
> > > -----Original Message-----
> > > From: Jaegeuk Kim [mailto:[email protected]]
> > > Sent: Saturday, July 26, 2014 6:47 AM
> > > To: [email protected]; [email protected];
> > > [email protected]
> > > Cc: Jaegeuk Kim
> > > Subject: [f2fs-dev] [PATCH 06/11] f2fs: skip unnecessary data writes during fsync
> > >
> > > This patch intends to improve the fsync performance by skipping remaining the
> > > recovery information, only when there is no data that we should recover.
> > >
> > > Signed-off-by: Jaegeuk Kim <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

> > > ---
> > > fs/f2fs/file.c | 16 ++++++++++++++++
> > > 1 file changed, 16 insertions(+)
> > >
> > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > index 7c652b3..121689a 100644
> > > --- a/fs/f2fs/file.c
> > > +++ b/fs/f2fs/file.c
> > > @@ -133,6 +133,17 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int
> > > datasync)
> > > return ret;
> > > }
> > >
> > > + /*
> > > + * if there is no written data, don't waste time to write recovery info.
> > > + */
> > > + if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
> > > + !exist_written_data(sbi, inode->i_ino, APPEND_INO)) {
> > > + if (is_inode_flag_set(fi, FI_UPDATE_WRITE) &&
> > > + exist_written_data(sbi, inode->i_ino, UPDATE_INO))
> >
> > Should we shift this to is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
> > exist_written_data(sbi, inode->i_ino, UPDATE_INO) ?
>
> Hehe, I found that and was going to submit new patch. :)
> Small changes are not a big deal, so I'll test and then push them into the tree.
> The for-next tree can be rebased all the time, so if you have any suggestion,
> let me know.

It's OK, I will. :)

Thanks

>
> Thanks,
>
> >
> > > + goto flush_out;
> > > + goto out;
> > > + }
> > > +
> > > /* guarantee free sections for fsync */
> > > f2fs_balance_fs(sbi);
> > >
> > > @@ -188,6 +199,11 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int
> > > datasync)
> > > ret = wait_on_node_pages_writeback(sbi, inode->i_ino);
> > > if (ret)
> > > goto out;
> > > +
> > > + /* once recovery info is written, don't need to tack this */
> > > + remove_dirty_inode(sbi, inode->i_ino, APPEND_INO);
> > > +flush_out:
> > > + remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
> > > ret = f2fs_issue_flush(F2FS_SB(inode->i_sb));
> > > }
> > > out:
> > > --
> > > 1.8.5.2 (Apple Git-48)
> > >
> > >
> > > ------------------------------------------------------------------------------
> > > Want fast and easy access to all the code in your enterprise? Index and
> > > search up to 200,000 lines of code with a free copy of Black Duck
> > > Code Sight - the same software that powers the world's largest code
> > > search on Ohloh, the Black Duck Open Hub! Try it now.
> > > http://p.sf.net/sfu/bds
> > > _______________________________________________
> > > Linux-f2fs-devel mailing list
> > > [email protected]
> > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-30 12:49:42

by Chao Yu

[permalink] [raw]
Subject: RE: [f2fs-dev] [PATCH 07/11] f2fs: enable in-place-update for fdatasync

Hi,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:[email protected]]
> Sent: Wednesday, July 30, 2014 11:13 AM
> To: Chao Yu
> Cc: 'Changman Lee'; [email protected]; [email protected];
> [email protected]
> Subject: Re: [f2fs-dev] [PATCH 07/11] f2fs: enable in-place-update for fdatasync
>
> On Wed, Jul 30, 2014 at 10:45:38AM +0800, Chao Yu wrote:
> > Hi Jaegeuk Changman,
> >
> > > -----Original Message-----
> > > From: Jaegeuk Kim [mailto:[email protected]]
> > > Sent: Wednesday, July 30, 2014 9:08 AM
> > > To: Changman Lee
> > > Cc: [email protected]; [email protected];
> > > [email protected]
> > > Subject: Re: [f2fs-dev] [PATCH 07/11] f2fs: enable in-place-update for fdatasync
> > >
> > > On Wed, Jul 30, 2014 at 08:54:55AM +0900, Changman Lee wrote:
> > > > On Tue, Jul 29, 2014 at 05:22:15AM -0700, Jaegeuk Kim wrote:
> > > > > Hi Changman,
> > > > >
> > > > > On Tue, Jul 29, 2014 at 09:41:11AM +0900, Changman Lee wrote:
> > > > > > Hi Jaegeuk,
> > > > > >
> > > > > > On Fri, Jul 25, 2014 at 03:47:21PM -0700, Jaegeuk Kim wrote:
> > > > > > > This patch enforces in-place-updates only when fdatasync is requested.
> > > > > > > If we adopt this in-place-updates for the fdatasync, we can skip to write the
> > > > > > > recovery information.
> > > > > >
> > > > > > But, as you know, random write occurs when changing into in-place-updates.
> > > > > > It will degrade write performance. Is there any case in-place-updates is
> > > > > > better, except recovery or high utilization?
> > > > >
> > > > > As I described, you can easily imagine, if users requested small amount of data
> > > > > writes with fdatasync, we should do data writes + node writes.
> > > > > But, if we can do in-place-update, we don't need to write node blocks.
> > > > > Surely it triggers random writes, however, the amount of data is preety small
> > > > > and the device handles them very fast by its inside cache, so that it can
> > > > > enhance the performance.
> > > > >
> > > > > Thanks,
> > > >
> > > > Partially agree. Sometimes, I see that SSR shows lower performance than
> > > > IPU. One of the reasons might be node writes.
> > >
> > > What did you mean? That's why I consider IPU eagarly instead of SSR and LFS
> > > under the very strict cases.
> > >
> > > > Anyway, if so, we should know total dirty pages for fdatasync and it's very
> > > > tunable according to a random write performance of device.
> > >
> > > Agreed. We can do that either by comparing the number of dirty pages,
> > > additional data/node writes, and cost of checkpoint at the same time.
> > > And there is another thing is that we need to consider the number of
> > > waiting time for end_io.
> > > I'll look into this at some time.
> >
> > Agreed, but please additionally consider that the number we got is not highly
> > accurate because without protection of ->i_mutex writer could generate more
> > dirty pages.
>
> Right, but IMO, it doesn't need to get correct numbers.
> Just for reference is enough.
> Sort of trade-offs. :)

Hmmm...maybe it's not necessary to worry about this condition as our trade-offs
algorithm is good enough.
Anyway, let's make the fixed patch based on the test data. :)

Reviewed-by: Chao Yu <[email protected]>

Thanks,

>
> Thanks,
>
> >
> > >
> > > Thanks,
> > >
> > > >
> > > > Thanks,
> > > >
> > > > >
> > > > > >
> > > > > > Thanks
> > > > > >
> > > > > > >
> > > > > > > Signed-off-by: Jaegeuk Kim <[email protected]>
> > > > > > > ---
> > > > > > > fs/f2fs/f2fs.h | 1 +
> > > > > > > fs/f2fs/file.c | 7 +++++++
> > > > > > > fs/f2fs/segment.h | 4 ++++
> > > > > > > 3 files changed, 12 insertions(+)
> > > > > > >
> > > > > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > > > > > index ab36025..8f8685e 100644
> > > > > > > --- a/fs/f2fs/f2fs.h
> > > > > > > +++ b/fs/f2fs/f2fs.h
> > > > > > > @@ -998,6 +998,7 @@ enum {
> > > > > > > FI_INLINE_DATA, /* used for inline data*/
> > > > > > > FI_APPEND_WRITE, /* inode has appended data */
> > > > > > > FI_UPDATE_WRITE, /* inode has in-place-update data */
> > > > > > > + FI_NEED_IPU, /* used fo ipu for fdatasync */
> > > > > > > };
> > > > > > >
> > > > > > > static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
> > > > > > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > > > > > index 121689a..e339856 100644
> > > > > > > --- a/fs/f2fs/file.c
> > > > > > > +++ b/fs/f2fs/file.c
> > > > > > > @@ -127,11 +127,18 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t
> end,
> > > int datasync)
> > > > > > > return 0;
> > > > > > >
> > > > > > > trace_f2fs_sync_file_enter(inode);
> > > > > > > +
> > > > > > > + /* if fdatasync is triggered, let's do in-place-update */
> > > > > > > + if (datasync)
> > > > > > > + set_inode_flag(fi, FI_NEED_IPU);
> > > > > > > +
> > > > > > > ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
> > > > > > > if (ret) {
> > > > > > > trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
> > > > > > > return ret;
> > > > > > > }
> > > > > > > + if (datasync)
> > > > > > > + clear_inode_flag(fi, FI_NEED_IPU);
> > > > > > >
> > > > > > > /*
> > > > > > > * if there is no written data, don't waste time to write recovery info.
> > > > > > > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> > > > > > > index ee5c75e..55973f7 100644
> > > > > > > --- a/fs/f2fs/segment.h
> > > > > > > +++ b/fs/f2fs/segment.h
> > > > > > > @@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode)
> > > > > > > if (S_ISDIR(inode->i_mode))
> > > > > > > return false;
> > > > > > >
> > > > > > > + /* this is only set during fdatasync */
> > > > > > > + if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU))
> > > > > > > + return true;
> > > > > > > +
> > > > > > > switch (SM_I(sbi)->ipu_policy) {
> > > > > > > case F2FS_IPU_FORCE:
> > > > > > > return true;
> > > > > > > --
> > > > > > > 1.8.5.2 (Apple Git-48)
> > > > > > >
> > > > > > >
> > > > > > > ------------------------------------------------------------------------------
> > > > > > > Want fast and easy access to all the code in your enterprise? Index and
> > > > > > > search up to 200,000 lines of code with a free copy of Black Duck
> > > > > > > Code Sight - the same software that powers the world's largest code
> > > > > > > search on Ohloh, the Black Duck Open Hub! Try it now.
> > > > > > > http://p.sf.net/sfu/bds
> > > > > > > _______________________________________________
> > > > > > > Linux-f2fs-devel mailing list
> > > > > > > [email protected]
> > > > > > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> > >
> > > ------------------------------------------------------------------------------
> > > Infragistics Professional
> > > Build stunning WinForms apps today!
> > > Reboot your WinForms applications with our WinForms controls.
> > > Build a bridge from your legacy apps to the future.
> > > http://pubads.g.doubleclick.net/gampad/clk?id=153845071&iu=/4140/ostg.clktrk
> > > _______________________________________________
> > > Linux-f2fs-devel mailing list
> > > [email protected]
> > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-07-30 12:59:24

by Chao Yu

[permalink] [raw]
Subject: RE: [f2fs-dev] [PATCH 08/11] f2fs: fix wrong condition for unlikely

Hi,

> -----Original Message-----
> From: Jaegeuk Kim [mailto:[email protected]]
> Sent: Wednesday, July 30, 2014 11:18 AM
> To: Chao Yu
> Cc: [email protected]; [email protected];
> [email protected]
> Subject: Re: [f2fs-dev] [PATCH 08/11] f2fs: fix wrong condition for unlikely
>
> On Wed, Jul 30, 2014 at 09:44:43AM +0800, Chao Yu wrote:
> > Hi Jaegeuk,
> >
> > > -----Original Message-----
> > > From: Jaegeuk Kim [mailto:[email protected]]
> > > Sent: Saturday, July 26, 2014 6:47 AM
> > > To: [email protected]; [email protected];
> > > [email protected]
> > > Cc: Jaegeuk Kim
> > > Subject: [f2fs-dev] [PATCH 08/11] f2fs: fix wrong condition for unlikely
> > >
> > > This patch fixes the wrongly used unlikely condition.
> > >
> > > Signed-off-by: Jaegeuk Kim <[email protected]>

Reviewed-by: Chao Yu <[email protected]>

> > > ---
> > > fs/f2fs/checkpoint.c | 2 +-
> > > 1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > > index 42a16c1..36b0d47 100644
> > > --- a/fs/f2fs/checkpoint.c
> > > +++ b/fs/f2fs/checkpoint.c
> > > @@ -932,7 +932,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
> > > /* Here, we only have one bio having CP pack */
> > > sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
> > >
> > > - if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
> > > + if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) {
> >
> > Maybe use likely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) or
> >
> > if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)))
> > return;
> >
> > is more appropriate. How do you think?
>
> Currently I'd like to put this without any likely or unlikely.
> Best thing is to measure some performance how this would make effect on.

I think we could get little improvement of performance if we use likely or unlikely
here. So if you'd like leave it without pre-judgment, it will be ok.
Let's just leave it as it was. :)

Thanks,

> Until then, it'd be better to do without it, since apparently this should
> not be unlikely.
>
> How about you?
> Can we compare both of them explicitly?
>
> Thanks,
>
> >
> > > clear_prefree_segments(sbi);
> > > release_dirty_inode(sbi);
> > > F2FS_RESET_SB_DIRT(sbi);
> > > --
> > > 1.8.5.2 (Apple Git-48)
> > >
> > >
> > > ------------------------------------------------------------------------------
> > > Want fast and easy access to all the code in your enterprise? Index and
> > > search up to 200,000 lines of code with a free copy of Black Duck
> > > Code Sight - the same software that powers the world's largest code
> > > search on Ohloh, the Black Duck Open Hub! Try it now.
> > > http://p.sf.net/sfu/bds
> > > _______________________________________________
> > > Linux-f2fs-devel mailing list
> > > [email protected]
> > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel