Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752797AbbF3KnD (ORCPT ); Tue, 30 Jun 2015 06:43:03 -0400 Received: from mailout1.samsung.com ([203.254.224.24]:56391 "EHLO mailout1.samsung.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752229AbbF3Kmy (ORCPT ); Tue, 30 Jun 2015 06:42:54 -0400 X-AuditID: cbfee61a-f79516d000006302-ba-559272ac0440 From: Chao Yu To: Jaegeuk Kim , Changman Lee Cc: linux-f2fs-devel@lists.sourceforge.net, linux-kernel@vger.kernel.org Subject: [PATCH 1/2] f2fs: refactor shrink flow for extent cache Date: Tue, 30 Jun 2015 18:42:09 +0800 Message-id: <00ea01d0b321$854293d0$8fc7bb70$@samsung.com> MIME-version: 1.0 Content-type: text/plain; charset=us-ascii Content-transfer-encoding: 7bit X-Mailer: Microsoft Outlook 14.0 Thread-index: AdCzFSw8p+8ASgiCTJunKs9mW/62Dw== Content-language: zh-cn X-Brightmail-Tracker: H4sIAAAAAAAAA+NgFrrMLMWRmVeSWpSXmKPExsVy+t9jQd01RZNCDXa1aFtc29fIZPFk/Sxm i0uL3C0u75rD5sDisWlVJ5vH7gWfmTz6tqxi9Pi8SS6AJYrLJiU1J7MstUjfLoEr49bWdpaC kxYVT+4cZGlg/KfbxcjJISFgInF17iUWCFtM4sK99WxdjFwcQgLTGSVaZ9yGcl4xSiz8fIQJ pIpNQEViecd/MFtEwEti0v4TYN3MAh4SjR3fWUFsYQEHiQsne9hBbBYBVYmXp48xgti8ApYS PRM6WSBsQYkfk+9B9WpJrN95nAnClpfYvOYtM8RFChI7zr5mhNilJ3Ht0zU2iBpxiY1HbrFM YBSYhWTULCSjZiEZNQtJywJGllWMoqkFyQXFSem5hnrFibnFpXnpesn5uZsYwUH9TGoH48oG i0OMAhyMSjy8EjaTQoVYE8uKK3MPMUpwMCuJ8NZJAoV4UxIrq1KL8uOLSnNSiw8xSnOwKInz nsz3CRUSSE8sSc1OTS1ILYLJMnFwSjUwli2OPL1iRvKnny+iKwVfh8cJbXunG/Ri3eI1P95V JQo/l3ZNsl2yUebSvG3pM5f7dQYvV7hwx16tV/HQoqcJ11WerOT43OdofU7R5fr3t6dV/TaY cc/eWfY6cPfl1yxK25jnbOHor50lvH6md8qeCpXJh28eFrmq+aOpv+SoxbOW6p080gma1kos xRmJhlrMRcWJAHuRsbtmAgAA Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7250 Lines: 241 For now, in extent cache, we have a global lru list which links all extent node in the cache, and the list is protected by a global spinlock. If we want to shrink extent cache, we will: 1. delete all target extent node from global lru list under spinlock; 2. traverse all per-inode extent tree in global radix tree; 2.a. traverse all extent node in per-inode extent tree, try to free extent node if it is not in global lru list already. This method is inefficient when there is huge number of inode extent tree in global extent tree. In this patch we introduce a new method for extent cache shrinking: When we attach a new extent node, we record extent tree pointer in extent node. In shrink flow, we can try to find and lock extent tree of inode directly by this backward pointer, and then detach the extent node from extent tree. This can help to shrink extent cache more efficiently. Signed-off-by: Chao Yu --- fs/f2fs/data.c | 113 ++++++++++++++++++++++++++++++++++----------------------- fs/f2fs/f2fs.h | 6 +++ 2 files changed, 73 insertions(+), 46 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e90522a..e96916a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -279,6 +279,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, en->ei = *ei; INIT_LIST_HEAD(&en->list); + en->et = et; rb_link_node(&en->rb_node, parent, p); rb_insert_color(&en->rb_node, &et->root); et->count++; @@ -435,7 +436,7 @@ update_out: } static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, - struct extent_tree *et, bool free_all) + struct extent_tree *et) { struct rb_node *node, *next; struct extent_node *en; @@ -446,23 +447,42 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, next = rb_next(node); en = rb_entry(node, struct extent_node, rb_node); - if (free_all) { - spin_lock(&sbi->extent_lock); - if (!list_empty(&en->list)) - list_del_init(&en->list); - spin_unlock(&sbi->extent_lock); - } + spin_lock(&sbi->extent_lock); + if (!list_empty(&en->list)) + list_del_init(&en->list); + spin_unlock(&sbi->extent_lock); - if (free_all || list_empty(&en->list)) { - __detach_extent_node(sbi, et, en); - kmem_cache_free(extent_node_slab, en); - } + __detach_extent_node(sbi, et, en); + kmem_cache_free(extent_node_slab, en); node = next; } return count - et->count; } +static bool __try_free_extent_tree(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct extent_tree *et; + + if (!down_write_trylock(&sbi->extent_tree_lock)) + return false; + + et = radix_tree_lookup(&sbi->extent_tree_root, ino); + if (!et) + goto out; + + if (__can_free_extent_tree(et)) { + radix_tree_delete(&sbi->extent_tree_root, ino); + kmem_cache_free(extent_tree_slab, et); + sbi->total_ext_tree--; + up_write(&sbi->extent_tree_lock); + return true; + } +out: + up_write(&sbi->extent_tree_lock); + return false; +} + static void __drop_largest_extent(struct inode *inode, pgoff_t fofs) { struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest; @@ -633,7 +653,7 @@ update_extent: kmem_cache_free(extent_node_slab, den); if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) - __free_extent_tree(sbi, et, true); + __free_extent_tree(sbi, et); write_unlock(&et->lock); @@ -642,48 +662,49 @@ update_extent: unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) { - struct extent_tree *treevec[EXT_TREE_VEC_SIZE]; - struct extent_node *en, *tmp; - unsigned long ino = F2FS_ROOT_INO(sbi); - struct radix_tree_root *root = &sbi->extent_tree_root; - unsigned int found; + struct extent_tree *et; + struct extent_node *en; unsigned int node_cnt = 0, tree_cnt = 0; if (!test_opt(sbi, EXTENT_CACHE)) return 0; spin_lock(&sbi->extent_lock); - list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) { - if (!nr_shrink--) - break; - list_del_init(&en->list); - } - spin_unlock(&sbi->extent_lock); + for (; nr_shrink > 0; nr_shrink--) { + nid_t ino; + bool can_free; - if (!down_write_trylock(&sbi->extent_tree_lock)) - goto out; + if (list_empty(&sbi->extent_list)) + break; + en = list_first_entry(&sbi->extent_list, struct extent_node, + list); + et = en->et; - while ((found = radix_tree_gang_lookup(root, - (void **)treevec, ino, EXT_TREE_VEC_SIZE))) { - unsigned i; - - ino = treevec[found - 1]->ino + 1; - for (i = 0; i < found; i++) { - struct extent_tree *et = treevec[i]; - - write_lock(&et->lock); - node_cnt += __free_extent_tree(sbi, et, false); - write_unlock(&et->lock); - if (!atomic_read(&et->refcount) && !et->count) { - radix_tree_delete(root, et->ino); - kmem_cache_free(extent_tree_slab, et); - sbi->total_ext_tree--; - tree_cnt++; - } + if (!write_trylock(&et->lock)) { + /* refresh this extent node's position in extent list */ + list_move_tail(&en->list, &sbi->extent_list); + continue; } + + list_del(&en->list); + spin_unlock(&sbi->extent_lock); + + __detach_extent_node(sbi, et, en); + kmem_cache_free(extent_node_slab, en); + + ino = et->ino; + can_free = __can_free_extent_tree(et); + write_unlock(&et->lock); + + node_cnt++; + + if (can_free && __try_free_extent_tree(sbi, ino)) + tree_cnt++; + + spin_lock(&sbi->extent_lock); } - up_write(&sbi->extent_tree_lock); -out: + spin_unlock(&sbi->extent_lock); + trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); return node_cnt + tree_cnt; @@ -699,7 +720,7 @@ void f2fs_destroy_extent_node(struct inode *inode) return; write_lock(&et->lock); - node_cnt = __free_extent_tree(sbi, et, true); + node_cnt = __free_extent_tree(sbi, et); write_unlock(&et->lock); } @@ -723,7 +744,7 @@ void f2fs_destroy_extent_tree(struct inode *inode) /* delete extent tree entry in radix tree */ down_write(&sbi->extent_tree_lock); atomic_dec(&et->refcount); - f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count); + f2fs_bug_on(sbi, !__can_free_extent_tree(et)); radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); kmem_cache_free(extent_tree_slab, et); sbi->total_ext_tree--; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2baca08..ee4c04a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -343,6 +343,7 @@ struct extent_node { struct rb_node rb_node; /* rb node located in rb-tree */ struct list_head list; /* node in global extent list of sbi */ struct extent_info ei; /* extent info */ + struct extent_tree *et; /* backref extent tree for shrinker */ }; struct extent_tree { @@ -485,6 +486,11 @@ static inline bool __is_front_mergeable(struct extent_info *cur, return __is_extent_mergeable(cur, front); } +static inline bool __can_free_extent_tree(struct extent_tree *et) +{ + return (!atomic_read(&et->refcount) && !et->count); +} + struct f2fs_nm_info { block_t nat_blkaddr; /* base disk address of NAT */ nid_t max_nid; /* maximum possible node ids */ -- 2.4.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/