2013-04-25 11:02:43

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 1/3] f2fs: check nid == 0 in add_free_nid

It is more obvious that add_free_nid checks whether the free nid is zero or not.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/node.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index a0aa044..c8f48d4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1245,6 +1245,10 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)

if (nm_i->fcnt > 2 * MAX_FREE_NIDS)
return 0;
+
+ /* 0 nid should not be used */
+ if (nid == 0)
+ return 0;
retry:
i = kmem_cache_alloc(free_nid_slab, GFP_NOFS);
if (!i) {
@@ -1286,10 +1290,6 @@ static int scan_nat_page(struct f2fs_nm_info *nm_i,
int fcnt = 0;
int i;

- /* 0 nid should not be used */
- if (start_nid == 0)
- ++start_nid;
-
i = start_nid % NAT_ENTRY_PER_BLOCK;

for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
--
1.8.1.3.566.gaa39828


2013-04-25 11:02:46

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 3/3] f2fs: enhnace alloc_nid and build_free_nids flows

In order to avoid build_free_nid lock contention, let's change the order of
function calls as follows.

At first, check whether there is enough free nids.
- If available, just get a free nid with spin_lock without any overhead.
- Otherwise, conduct build_free_nids.
: scan nat pages, journal nat entries, and nat cache entries.

We should consider carefullly not to serve free nids intermediately made by
build_free_nids.
We can get stable free nids only after build_free_nids is done.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/f2fs.h | 2 +-
fs/f2fs/node.c | 82 ++++++++++++++++++++++++++--------------------------------
2 files changed, 37 insertions(+), 47 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 6283c8d..20aab02 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -190,7 +190,6 @@ static inline void set_raw_extent(struct extent_info *ext,
struct f2fs_nm_info {
block_t nat_blkaddr; /* base disk address of NAT */
nid_t max_nid; /* maximum possible node ids */
- nid_t init_scan_nid; /* the first nid to be scanned */
nid_t next_scan_nid; /* the next nid to be scanned */

/* NAT cache management */
@@ -360,6 +359,7 @@ struct f2fs_sb_info {
struct mutex writepages; /* mutex for writepages() */
unsigned char next_lock_num; /* round-robin global locks */
int por_doing; /* recovery is doing or not */
+ int on_build_free_nids; /* build_free_nids is doing */

/* for orphan inode management */
struct list_head orphan_inode_list; /* orphan inode list */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index c8f48d4..aede910 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1309,14 +1309,14 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_summary_block *sum = curseg->sum_blk;
- nid_t nid = 0;
- bool is_cycled = false;
- int fcnt = 0;
- int i;
+ int fcnt = 0, i = 0;
+ nid_t nid = nm_i->next_scan_nid;

- nid = nm_i->next_scan_nid;
- nm_i->init_scan_nid = nid;
+ /* Enough entries */
+ if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK)
+ return;

+ /* readahead nat pages to be scanned */
ra_nat_pages(sbi, nid);

while (1) {
@@ -1326,19 +1326,15 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
f2fs_put_page(page, 1);

nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
-
- if (nid >= nm_i->max_nid) {
+ if (nid >= nm_i->max_nid)
nid = 0;
- is_cycled = true;
- }
- if (fcnt > MAX_FREE_NIDS)
- break;
- if (is_cycled && nm_i->init_scan_nid <= nid)
+
+ if (i++ == FREE_NID_PAGES)
break;
}

- /* go to the next nat page in order to reuse free nids first */
- nm_i->next_scan_nid = nm_i->init_scan_nid + NAT_ENTRY_PER_BLOCK;
+ /* go to the next free nat pages to find free nids abundantly */
+ nm_i->next_scan_nid = nid;

/* find free nids from current sum_pages */
mutex_lock(&curseg->curseg_mutex);
@@ -1375,41 +1371,36 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
struct free_nid *i = NULL;
struct list_head *this;
retry:
- mutex_lock(&nm_i->build_lock);
- if (!nm_i->fcnt) {
- /* scan NAT in order to build free nid list */
- build_free_nids(sbi);
- if (!nm_i->fcnt) {
- mutex_unlock(&nm_i->build_lock);
- return false;
- }
- }
- mutex_unlock(&nm_i->build_lock);
+ if (sbi->total_valid_node_count + 1 >= nm_i->max_nid)
+ return false;

- /*
- * We check fcnt again since previous check is racy as
- * we didn't hold free_nid_list_lock. So other thread
- * could consume all of free nids.
- */
spin_lock(&nm_i->free_nid_list_lock);
- if (!nm_i->fcnt) {
- spin_unlock(&nm_i->free_nid_list_lock);
- goto retry;
- }

- BUG_ON(list_empty(&nm_i->free_nid_list));
- list_for_each(this, &nm_i->free_nid_list) {
- i = list_entry(this, struct free_nid, list);
- if (i->state == NID_NEW)
- break;
- }
+ /* We should not use stale free nids created by build_free_nids */
+ if (nm_i->fcnt && !sbi->on_build_free_nids) {
+ BUG_ON(list_empty(&nm_i->free_nid_list));
+ list_for_each(this, &nm_i->free_nid_list) {
+ i = list_entry(this, struct free_nid, list);
+ if (i->state == NID_NEW)
+ break;
+ }

- BUG_ON(i->state != NID_NEW);
- *nid = i->nid;
- i->state = NID_ALLOC;
- nm_i->fcnt--;
+ BUG_ON(i->state != NID_NEW);
+ *nid = i->nid;
+ i->state = NID_ALLOC;
+ nm_i->fcnt--;
+ spin_unlock(&nm_i->free_nid_list_lock);
+ return true;
+ }
spin_unlock(&nm_i->free_nid_list_lock);
- return true;
+
+ /* Let's scan nat pages and its caches to get free nids */
+ mutex_lock(&nm_i->build_lock);
+ sbi->on_build_free_nids = 1;
+ build_free_nids(sbi);
+ sbi->on_build_free_nids = 0;
+ mutex_unlock(&nm_i->build_lock);
+ goto retry;
}

/*
@@ -1696,7 +1687,6 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
spin_lock_init(&nm_i->free_nid_list_lock);
rwlock_init(&nm_i->nat_tree_lock);

- nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
--
1.8.1.3.566.gaa39828

2013-04-25 11:03:14

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 2/3] f2fs: add a tracepoint on f2fs_new_inode

This can help when debugging the free nid allocation flows.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/namei.c | 3 ++-
include/trace/events/f2fs.h | 7 +++++++
2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index c57fd18..4aa26e5 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -63,7 +63,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
nid_free = true;
goto out;
}
-
+ trace_f2fs_new_inode(inode, 0);
mark_inode_dirty(inode);
return inode;

@@ -71,6 +71,7 @@ out:
clear_nlink(inode);
unlock_new_inode(inode);
fail:
+ trace_f2fs_new_inode(inode, err);
iput(inode);
if (nid_free)
alloc_nid_failed(sbi, ino);
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index b2b2f72..52ae548 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -197,6 +197,13 @@ DEFINE_EVENT(f2fs__inode, f2fs_evict_inode,
TP_ARGS(inode)
);

+DEFINE_EVENT(f2fs__inode_exit, f2fs_new_inode,
+
+ TP_PROTO(struct inode *inode, int ret),
+
+ TP_ARGS(inode, ret)
+);
+
TRACE_EVENT(f2fs_unlink_enter,

TP_PROTO(struct inode *dir, struct dentry *dentry),
--
1.8.1.3.566.gaa39828

2013-04-25 23:57:41

by Namjae Jeon

[permalink] [raw]
Subject: Re: [PATCH 1/3] f2fs: check nid == 0 in add_free_nid

2013/4/25, Jaegeuk Kim <[email protected]>:
> It is more obvious that add_free_nid checks whether the free nid is zero or
> not.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
Looks reasonable to me.
Reviewed-by: Namjae Jeon <[email protected]>

Thanks~.

2013-04-26 01:44:06

by Namjae Jeon

[permalink] [raw]
Subject: Re: [PATCH 3/3] f2fs: enhnace alloc_nid and build_free_nids flows

2013/4/25, Jaegeuk Kim <[email protected]>:
> In order to avoid build_free_nid lock contention, let's change the order of
> function calls as follows.
>
> At first, check whether there is enough free nids.
> - If available, just get a free nid with spin_lock without any overhead.
> - Otherwise, conduct build_free_nids.
> : scan nat pages, journal nat entries, and nat cache entries.
>
> We should consider carefullly not to serve free nids intermediately made by
> build_free_nids.
> We can get stable free nids only after build_free_nids is done.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
I can't find any issues in this patch.
Reviewed-by: Namjae Jeon <[email protected]>

Thanks.
> ---

2013-04-26 04:42:52

by Namjae Jeon

[permalink] [raw]
Subject: Re: [PATCH 2/3] f2fs: add a tracepoint on f2fs_new_inode

2013/4/25, Jaegeuk Kim <[email protected]>:
> This can help when debugging the free nid allocation flows.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
Yes, Agreed also.
Reviewed-by: Namjae Jeon <[email protected]>

Thanks.