2014-11-09 07:36:14

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 1/5] f2fs: disable roll-forward when active_logs = 2

The roll-forward mechanism should be activated when the number of active
logs is not 2.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/file.c | 2 ++
fs/f2fs/segment.c | 4 ++--
2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 46311e7..54722a0 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -132,6 +132,8 @@ static inline bool need_do_checkpoint(struct inode *inode)
need_cp = true;
else if (test_opt(sbi, FASTBOOT))
need_cp = true;
+ else if (sbi->active_logs == 2)
+ need_cp = true;

return need_cp;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 2fb3d7f..16721b5d 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1090,8 +1090,8 @@ static int __get_segment_type_4(struct page *page, enum page_type p_type)
else
return CURSEG_COLD_DATA;
} else {
- if (IS_DNODE(page) && !is_cold_node(page))
- return CURSEG_HOT_NODE;
+ if (IS_DNODE(page) && is_cold_node(page))
+ return CURSEG_WARM_NODE;
else
return CURSEG_COLD_NODE;
}
--
2.1.1


2014-11-09 07:36:17

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 2/5] f2fs: introduce the number of inode entries

This patch adds to monitor the number of ino entries.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/checkpoint.c | 27 +++++++++++++++------------
fs/f2fs/debug.c | 4 +++-
fs/f2fs/f2fs.h | 2 +-
3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index dd6a357..bcd686e 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -318,6 +318,8 @@ retry:
e->ino = ino;

list_add_tail(&e->list, &sbi->ino_list[type]);
+ if (type != ORPHAN_INO)
+ sbi->ino_num[type]++;
}
spin_unlock(&sbi->ino_lock[type]);
}
@@ -331,8 +333,7 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
if (e) {
list_del(&e->list);
radix_tree_delete(&sbi->ino_root[type], ino);
- if (type == ORPHAN_INO)
- sbi->n_orphans--;
+ sbi->ino_num[type]--;
spin_unlock(&sbi->ino_lock[type]);
kmem_cache_free(ino_entry_slab, e);
return;
@@ -373,6 +374,7 @@ void release_dirty_inode(struct f2fs_sb_info *sbi)
list_del(&e->list);
radix_tree_delete(&sbi->ino_root[i], e->ino);
kmem_cache_free(ino_entry_slab, e);
+ sbi->ino_num[i]--;
}
spin_unlock(&sbi->ino_lock[i]);
}
@@ -383,10 +385,10 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
int err = 0;

spin_lock(&sbi->ino_lock[ORPHAN_INO]);
- if (unlikely(sbi->n_orphans >= sbi->max_orphans))
+ if (unlikely(sbi->ino_num[ORPHAN_INO] >= sbi->max_orphans))
err = -ENOSPC;
else
- sbi->n_orphans++;
+ sbi->ino_num[ORPHAN_INO]++;
spin_unlock(&sbi->ino_lock[ORPHAN_INO]);

return err;
@@ -395,8 +397,8 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
void release_orphan_inode(struct f2fs_sb_info *sbi)
{
spin_lock(&sbi->ino_lock[ORPHAN_INO]);
- f2fs_bug_on(sbi, sbi->n_orphans == 0);
- sbi->n_orphans--;
+ f2fs_bug_on(sbi, sbi->ino_num[ORPHAN_INO] == 0);
+ sbi->ino_num[ORPHAN_INO]--;
spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
}

@@ -460,11 +462,12 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
struct f2fs_orphan_block *orphan_blk = NULL;
unsigned int nentries = 0;
unsigned short index;
- unsigned short orphan_blocks =
- (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans);
+ unsigned short orphan_blocks;
struct page *page = NULL;
struct ino_entry *orphan = NULL;

+ orphan_blocks = GET_ORPHAN_BLOCKS(sbi->ino_num[ORPHAN_INO]);
+
for (index = 0; index < orphan_blocks; index++)
grab_meta_page(sbi, start_blk + index);

@@ -892,7 +895,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
else
clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);

- orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans);
+ orphan_blocks = GET_ORPHAN_BLOCKS(sbi->ino_num[ORPHAN_INO]);
ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
orphan_blocks);

@@ -908,7 +911,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
orphan_blocks);
}

- if (sbi->n_orphans)
+ if (sbi->ino_num[ORPHAN_INO])
set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
else
clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
@@ -943,7 +946,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_put_page(cp_page, 1);
}

- if (sbi->n_orphans) {
+ if (sbi->ino_num[ORPHAN_INO]) {
write_orphan_inodes(sbi, start_blk);
start_blk += orphan_blocks;
}
@@ -1045,6 +1048,7 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
spin_lock_init(&sbi->ino_lock[i]);
INIT_LIST_HEAD(&sbi->ino_list[i]);
+ sbi->ino_num[i] = 0;
}

/*
@@ -1053,7 +1057,6 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
* orphan entries with the limitation one reserved segment
* for cp pack we can have max 1020*504 orphan entries
*/
- sbi->n_orphans = 0;
sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK;
}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 86e6e92..74a0d78 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -119,6 +119,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
unsigned npages;
+ int i;

if (si->base_mem)
goto get_cache;
@@ -168,8 +169,9 @@ get_cache:
si->cache_mem += npages << PAGE_CACHE_SHIFT;
npages = META_MAPPING(sbi)->nrpages;
si->cache_mem += npages << PAGE_CACHE_SHIFT;
- si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry);
si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
+ for (i = 0; i <= UPDATE_INO; i++)
+ si->cache_mem += sbi->ino_num[i] * sizeof(struct ino_entry);
}

static int stat_show(struct seq_file *s, void *v)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d45f3f4..994b87e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -532,9 +532,9 @@ struct f2fs_sb_info {
struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */
spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */
struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */
+ unsigned long ino_num[MAX_INO_ENTRY]; /* number of entries */

/* for orphan inode, use 0'th array */
- unsigned int n_orphans; /* # of orphan inodes */
unsigned int max_orphans; /* max orphan inodes */

/* for directory inode management */
--
2.1.1

2014-11-09 07:36:20

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 5/5] f2fs: do not skip any writes under memory pressure

Under memory pressure, let's avoid skipping data writes.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/segment.h | 3 +++
1 file changed, 3 insertions(+)

diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 6723ccc..7f327c0 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -711,6 +711,9 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
*/
static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
{
+ if (sbi->sb->s_bdi->dirty_exceeded)
+ return 0;
+
if (type == DATA)
return sbi->blocks_per_seg;
else if (type == NODE)
--
2.1.1

2014-11-09 07:36:39

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 4/5] f2fs: write node pages if checkpoint is not doing

It needs to write node pages if checkpoint is not doing in order to avoid
memory pressure.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/node.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 4ea2c47..6f514fb 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1314,10 +1314,12 @@ static int f2fs_write_node_page(struct page *page,
return 0;
}

- if (wbc->for_reclaim)
- goto redirty_out;
-
- down_read(&sbi->node_write);
+ if (wbc->for_reclaim) {
+ if (!down_read_trylock(&sbi->node_write))
+ goto redirty_out;
+ } else {
+ down_read(&sbi->node_write);
+ }
set_page_writeback(page);
write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
--
2.1.1

2014-11-09 07:37:01

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 3/5] f2fs: control the memory footprint used by ino entries

This patch adds to control the memory footprint used by ino entries.
This will conduct best effort, not strictly.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/node.c | 28 ++++++++++++++++++++++------
fs/f2fs/node.h | 3 ++-
fs/f2fs/segment.c | 3 ++-
3 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 44b8afe..4ea2c47 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -31,22 +31,38 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct sysinfo val;
+ unsigned long avail_ram;
unsigned long mem_size = 0;
bool res = false;

si_meminfo(&val);
- /* give 25%, 25%, 50% memory for each components respectively */
+
+ /* only uses low memory */
+ avail_ram = val.totalram - val.totalhigh;
+
+ /* give 25%, 25%, 50%, 50% memory for each components respectively */
if (type == FREE_NIDS) {
- mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12;
- res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+ mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
+ PAGE_CACHE_SHIFT;
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == NAT_ENTRIES) {
- mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
- res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+ mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
+ PAGE_CACHE_SHIFT;
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == DIRTY_DENTS) {
if (sbi->sb->s_bdi->dirty_exceeded)
return false;
mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
- res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
+ } else if (type == INO_ENTRIES) {
+ int i;
+
+ if (sbi->sb->s_bdi->dirty_exceeded)
+ return false;
+ for (i = 0; i <= UPDATE_INO; i++)
+ mem_size += (sbi->ino_num[i] * sizeof(struct ino_entry))
+ >> PAGE_CACHE_SHIFT;
+ res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
}
return res;
}
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index acb71e5..d10b644 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -106,7 +106,8 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
enum mem_type {
FREE_NIDS, /* indicates the free nid list */
NAT_ENTRIES, /* indicates the cached nat entry */
- DIRTY_DENTS /* indicates dirty dentry pages */
+ DIRTY_DENTS, /* indicates dirty dentry pages */
+ INO_ENTRIES, /* indicates inode entries */
};

struct nat_entry_set {
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 16721b5d..e094675 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -276,7 +276,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
{
/* check the # of cached NAT entries and prefree segments */
if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
- excess_prefree_segs(sbi))
+ excess_prefree_segs(sbi) ||
+ available_free_memory(sbi, INO_ENTRIES))
f2fs_sync_fs(sbi->sb, true);
}

--
2.1.1

2014-11-10 03:29:56

by Changman Lee

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 3/5] f2fs: control the memory footprint used by ino entries

On Sat, Nov 08, 2014 at 11:36:07PM -0800, Jaegeuk Kim wrote:
> This patch adds to control the memory footprint used by ino entries.
> This will conduct best effort, not strictly.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/node.c | 28 ++++++++++++++++++++++------
> fs/f2fs/node.h | 3 ++-
> fs/f2fs/segment.c | 3 ++-
> 3 files changed, 26 insertions(+), 8 deletions(-)
>
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 44b8afe..4ea2c47 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -31,22 +31,38 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
> {
> struct f2fs_nm_info *nm_i = NM_I(sbi);
> struct sysinfo val;
> + unsigned long avail_ram;
> unsigned long mem_size = 0;
> bool res = false;
>
> si_meminfo(&val);
> - /* give 25%, 25%, 50% memory for each components respectively */
> +
> + /* only uses low memory */
> + avail_ram = val.totalram - val.totalhigh;
> +
> + /* give 25%, 25%, 50%, 50% memory for each components respectively */

Hi Jaegeuk,

The memory usage of nm_i should be 100% but it's 125%.
Mistake or intended?

> if (type == FREE_NIDS) {
> - mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12;
> - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
> + mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
> + PAGE_CACHE_SHIFT;
> + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
> } else if (type == NAT_ENTRIES) {
> - mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
> - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
> + mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
> + PAGE_CACHE_SHIFT;
> + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
> } else if (type == DIRTY_DENTS) {
> if (sbi->sb->s_bdi->dirty_exceeded)
> return false;
> mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
> - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
> + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
> + } else if (type == INO_ENTRIES) {
> + int i;
> +
> + if (sbi->sb->s_bdi->dirty_exceeded)
> + return false;
> + for (i = 0; i <= UPDATE_INO; i++)
> + mem_size += (sbi->ino_num[i] * sizeof(struct ino_entry))
> + >> PAGE_CACHE_SHIFT;
> + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
> }
> return res;
> }
> diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
> index acb71e5..d10b644 100644
> --- a/fs/f2fs/node.h
> +++ b/fs/f2fs/node.h
> @@ -106,7 +106,8 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
> enum mem_type {
> FREE_NIDS, /* indicates the free nid list */
> NAT_ENTRIES, /* indicates the cached nat entry */
> - DIRTY_DENTS /* indicates dirty dentry pages */
> + DIRTY_DENTS, /* indicates dirty dentry pages */
> + INO_ENTRIES, /* indicates inode entries */
> };
>
> struct nat_entry_set {
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 16721b5d..e094675 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -276,7 +276,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
> {
> /* check the # of cached NAT entries and prefree segments */
> if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
> - excess_prefree_segs(sbi))
> + excess_prefree_segs(sbi) ||
> + available_free_memory(sbi, INO_ENTRIES))
> f2fs_sync_fs(sbi->sb, true);
> }
>
> --
> 2.1.1
>
>
> ------------------------------------------------------------------------------
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-11-10 05:20:14

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 3/5] f2fs: control the memory footprint used by ino entries

On Mon, Nov 10, 2014 at 12:28:34PM +0900, Changman Lee wrote:
> On Sat, Nov 08, 2014 at 11:36:07PM -0800, Jaegeuk Kim wrote:
> > This patch adds to control the memory footprint used by ino entries.
> > This will conduct best effort, not strictly.
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > fs/f2fs/node.c | 28 ++++++++++++++++++++++------
> > fs/f2fs/node.h | 3 ++-
> > fs/f2fs/segment.c | 3 ++-
> > 3 files changed, 26 insertions(+), 8 deletions(-)
> >
> > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > index 44b8afe..4ea2c47 100644
> > --- a/fs/f2fs/node.c
> > +++ b/fs/f2fs/node.c
> > @@ -31,22 +31,38 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
> > {
> > struct f2fs_nm_info *nm_i = NM_I(sbi);
> > struct sysinfo val;
> > + unsigned long avail_ram;
> > unsigned long mem_size = 0;
> > bool res = false;
> >
> > si_meminfo(&val);
> > - /* give 25%, 25%, 50% memory for each components respectively */
> > +
> > + /* only uses low memory */
> > + avail_ram = val.totalram - val.totalhigh;
> > +
> > + /* give 25%, 25%, 50%, 50% memory for each components respectively */
>
> Hi Jaegeuk,
>
> The memory usage of nm_i should be 100% but it's 125%.
> Mistake or intended?

I contemplated whether this 100% was an exact number that we expected.
The answer was NO, since this number was just an estimated one.
There were no strict constrains to limit memory footprints even the previous
codes were used whatever 25%, 25%, and 50%.

So, here, I'd like to add additional threshold for INO_ENTRIES on a basis
of the given threshold.
In addition, I don't want to add any complex equations to satisfy 100% at all.
It's meaningless.

Thanks,

>
> > if (type == FREE_NIDS) {
> > - mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12;
> > - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
> > + mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
> > + PAGE_CACHE_SHIFT;
> > + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
> > } else if (type == NAT_ENTRIES) {
> > - mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
> > - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
> > + mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
> > + PAGE_CACHE_SHIFT;
> > + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
> > } else if (type == DIRTY_DENTS) {
> > if (sbi->sb->s_bdi->dirty_exceeded)
> > return false;
> > mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
> > - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
> > + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
> > + } else if (type == INO_ENTRIES) {
> > + int i;
> > +
> > + if (sbi->sb->s_bdi->dirty_exceeded)
> > + return false;
> > + for (i = 0; i <= UPDATE_INO; i++)
> > + mem_size += (sbi->ino_num[i] * sizeof(struct ino_entry))
> > + >> PAGE_CACHE_SHIFT;
> > + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
> > }
> > return res;
> > }
> > diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
> > index acb71e5..d10b644 100644
> > --- a/fs/f2fs/node.h
> > +++ b/fs/f2fs/node.h
> > @@ -106,7 +106,8 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
> > enum mem_type {
> > FREE_NIDS, /* indicates the free nid list */
> > NAT_ENTRIES, /* indicates the cached nat entry */
> > - DIRTY_DENTS /* indicates dirty dentry pages */
> > + DIRTY_DENTS, /* indicates dirty dentry pages */
> > + INO_ENTRIES, /* indicates inode entries */
> > };
> >
> > struct nat_entry_set {
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index 16721b5d..e094675 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -276,7 +276,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
> > {
> > /* check the # of cached NAT entries and prefree segments */
> > if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
> > - excess_prefree_segs(sbi))
> > + excess_prefree_segs(sbi) ||
> > + available_free_memory(sbi, INO_ENTRIES))
> > f2fs_sync_fs(sbi->sb, true);
> > }
> >
> > --
> > 2.1.1
> >
> >
> > ------------------------------------------------------------------------------
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > [email protected]
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-11-10 07:39:18

by Changman Lee

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 4/5] f2fs: write node pages if checkpoint is not doing

On Sat, Nov 08, 2014 at 11:36:08PM -0800, Jaegeuk Kim wrote:
> It needs to write node pages if checkpoint is not doing in order to avoid
> memory pressure.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/node.c | 10 ++++++----
> 1 file changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 4ea2c47..6f514fb 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -1314,10 +1314,12 @@ static int f2fs_write_node_page(struct page *page,
> return 0;
> }
>
> - if (wbc->for_reclaim)
> - goto redirty_out;
> -
> - down_read(&sbi->node_write);
> + if (wbc->for_reclaim) {
> + if (!down_read_trylock(&sbi->node_write))
> + goto redirty_out;

Previously, we skipped write_page for reclaim path, but from now on, we
will write out node page to reclaim memory at any time except checkpoint.
We should remember it may occur to break merging bio.
Got it.

Reviewed-by: Changman Lee <[email protected]>

> + } else {
> + down_read(&sbi->node_write);
> + }
> set_page_writeback(page);
> write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
> set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
> --
> 2.1.1
>
>
> ------------------------------------------------------------------------------
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-11-10 09:55:56

by Changman Lee

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 1/5] f2fs: disable roll-forward when active_logs = 2

On Sat, Nov 08, 2014 at 11:36:05PM -0800, Jaegeuk Kim wrote:
> The roll-forward mechanism should be activated when the number of active
> logs is not 2.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/file.c | 2 ++
> fs/f2fs/segment.c | 4 ++--
> 2 files changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 46311e7..54722a0 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -132,6 +132,8 @@ static inline bool need_do_checkpoint(struct inode *inode)
> need_cp = true;
> else if (test_opt(sbi, FASTBOOT))
> need_cp = true;
> + else if (sbi->active_logs == 2)
> + need_cp = true;
>
> return need_cp;
> }
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 2fb3d7f..16721b5d 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -1090,8 +1090,8 @@ static int __get_segment_type_4(struct page *page, enum page_type p_type)
> else
> return CURSEG_COLD_DATA;
> } else {
> - if (IS_DNODE(page) && !is_cold_node(page))
> - return CURSEG_HOT_NODE;
> + if (IS_DNODE(page) && is_cold_node(page))
> + return CURSEG_WARM_NODE;

Hi Jaegeuk,

We should take hot/cold seperation into account as well.
In case of dir inode, it will be mixed with COLD_NODE.
If it's trade-off, let's notice it kindly as comments.

Regards,
Changman

> else
> return CURSEG_COLD_NODE;
> }
> --
> 2.1.1
>
>
> ------------------------------------------------------------------------------
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-11-10 15:08:03

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 1/5] f2fs: disable roll-forward when active_logs = 2

Hi Changman,

On Mon, Nov 10, 2014 at 06:54:37PM +0900, Changman Lee wrote:
> On Sat, Nov 08, 2014 at 11:36:05PM -0800, Jaegeuk Kim wrote:
> > The roll-forward mechanism should be activated when the number of active
> > logs is not 2.
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > fs/f2fs/file.c | 2 ++
> > fs/f2fs/segment.c | 4 ++--
> > 2 files changed, 4 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 46311e7..54722a0 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -132,6 +132,8 @@ static inline bool need_do_checkpoint(struct inode *inode)
> > need_cp = true;
> > else if (test_opt(sbi, FASTBOOT))
> > need_cp = true;
> > + else if (sbi->active_logs == 2)
> > + need_cp = true;
> >
> > return need_cp;
> > }
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index 2fb3d7f..16721b5d 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -1090,8 +1090,8 @@ static int __get_segment_type_4(struct page *page, enum page_type p_type)
> > else
> > return CURSEG_COLD_DATA;
> > } else {
> > - if (IS_DNODE(page) && !is_cold_node(page))
> > - return CURSEG_HOT_NODE;
> > + if (IS_DNODE(page) && is_cold_node(page))
> > + return CURSEG_WARM_NODE;
>
> Hi Jaegeuk,
>
> We should take hot/cold seperation into account as well.
> In case of dir inode, it will be mixed with COLD_NODE.
> If it's trade-off, let's notice it kindly as comments.

NAK.
This patch tries to fix a bug, which is not a trade-off.
We should write files' direct node blocks in CURSEG_WARM_NODE for recovery.

Thanks,

>
> Regards,
> Changman
>
> > else
> > return CURSEG_COLD_NODE;
> > }
> > --
> > 2.1.1
> >
> >
> > ------------------------------------------------------------------------------
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > [email protected]
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-11-11 22:44:31

by Changman Lee

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 1/5] f2fs: disable roll-forward when active_logs = 2

On Mon, Nov 10, 2014 at 07:07:59AM -0800, Jaegeuk Kim wrote:
> Hi Changman,
>
> On Mon, Nov 10, 2014 at 06:54:37PM +0900, Changman Lee wrote:
> > On Sat, Nov 08, 2014 at 11:36:05PM -0800, Jaegeuk Kim wrote:
> > > The roll-forward mechanism should be activated when the number of active
> > > logs is not 2.
> > >
> > > Signed-off-by: Jaegeuk Kim <[email protected]>
> > > ---
> > > fs/f2fs/file.c | 2 ++
> > > fs/f2fs/segment.c | 4 ++--
> > > 2 files changed, 4 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > index 46311e7..54722a0 100644
> > > --- a/fs/f2fs/file.c
> > > +++ b/fs/f2fs/file.c
> > > @@ -132,6 +132,8 @@ static inline bool need_do_checkpoint(struct inode *inode)
> > > need_cp = true;
> > > else if (test_opt(sbi, FASTBOOT))
> > > need_cp = true;
> > > + else if (sbi->active_logs == 2)
> > > + need_cp = true;
> > >
> > > return need_cp;
> > > }
> > > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > > index 2fb3d7f..16721b5d 100644
> > > --- a/fs/f2fs/segment.c
> > > +++ b/fs/f2fs/segment.c
> > > @@ -1090,8 +1090,8 @@ static int __get_segment_type_4(struct page *page, enum page_type p_type)
> > > else
> > > return CURSEG_COLD_DATA;
> > > } else {
> > > - if (IS_DNODE(page) && !is_cold_node(page))
> > > - return CURSEG_HOT_NODE;
> > > + if (IS_DNODE(page) && is_cold_node(page))
> > > + return CURSEG_WARM_NODE;
> >
> > Hi Jaegeuk,
> >
> > We should take hot/cold seperation into account as well.
> > In case of dir inode, it will be mixed with COLD_NODE.
> > If it's trade-off, let's notice it kindly as comments.
>
> NAK.
> This patch tries to fix a bug, which is not a trade-off.
> We should write files' direct node blocks in CURSEG_WARM_NODE for recovery.
>
> Thanks,

Okay, a word of 'trade-off' is wrong. We must be able to do recovery.
However, we break a rule of hot/cold separation we want. So I thought we
should notice its negative effect.
Anyway, how about putting WARM and HOT together instead HOT and COLD?
We can distinguish enough if they are direct node and have fsync_mark at
recovery time although HOT/WARM are mixed.
Let me know if there is my misundertanding.

Thanks,

>
> >
> > Regards,
> > Changman
> >
> > > else
> > > return CURSEG_COLD_NODE;
> > > }
> > > --
> > > 2.1.1
> > >
> > >
> > > ------------------------------------------------------------------------------
> > > _______________________________________________
> > > Linux-f2fs-devel mailing list
> > > [email protected]
> > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

2014-11-14 01:21:33

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH 1/5] f2fs: disable roll-forward when active_logs = 2

On Wed, Nov 12, 2014 at 07:43:12AM +0900, Changman Lee wrote:
> On Mon, Nov 10, 2014 at 07:07:59AM -0800, Jaegeuk Kim wrote:
> > Hi Changman,
> >
> > On Mon, Nov 10, 2014 at 06:54:37PM +0900, Changman Lee wrote:
> > > On Sat, Nov 08, 2014 at 11:36:05PM -0800, Jaegeuk Kim wrote:
> > > > The roll-forward mechanism should be activated when the number of active
> > > > logs is not 2.
> > > >
> > > > Signed-off-by: Jaegeuk Kim <[email protected]>
> > > > ---
> > > > fs/f2fs/file.c | 2 ++
> > > > fs/f2fs/segment.c | 4 ++--
> > > > 2 files changed, 4 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > > index 46311e7..54722a0 100644
> > > > --- a/fs/f2fs/file.c
> > > > +++ b/fs/f2fs/file.c
> > > > @@ -132,6 +132,8 @@ static inline bool need_do_checkpoint(struct inode *inode)
> > > > need_cp = true;
> > > > else if (test_opt(sbi, FASTBOOT))
> > > > need_cp = true;
> > > > + else if (sbi->active_logs == 2)
> > > > + need_cp = true;
> > > >
> > > > return need_cp;
> > > > }
> > > > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > > > index 2fb3d7f..16721b5d 100644
> > > > --- a/fs/f2fs/segment.c
> > > > +++ b/fs/f2fs/segment.c
> > > > @@ -1090,8 +1090,8 @@ static int __get_segment_type_4(struct page *page, enum page_type p_type)
> > > > else
> > > > return CURSEG_COLD_DATA;
> > > > } else {
> > > > - if (IS_DNODE(page) && !is_cold_node(page))
> > > > - return CURSEG_HOT_NODE;
> > > > + if (IS_DNODE(page) && is_cold_node(page))
> > > > + return CURSEG_WARM_NODE;
> > >
> > > Hi Jaegeuk,
> > >
> > > We should take hot/cold seperation into account as well.
> > > In case of dir inode, it will be mixed with COLD_NODE.
> > > If it's trade-off, let's notice it kindly as comments.
> >
> > NAK.
> > This patch tries to fix a bug, which is not a trade-off.
> > We should write files' direct node blocks in CURSEG_WARM_NODE for recovery.
> >
> > Thanks,
>
> Okay, a word of 'trade-off' is wrong. We must be able to do recovery.
> However, we break a rule of hot/cold separation we want. So I thought we
> should notice its negative effect.
> Anyway, how about putting WARM and HOT together instead HOT and COLD?
> We can distinguish enough if they are direct node and have fsync_mark at
> recovery time although HOT/WARM are mixed.

We know that it's hard to say any negative effect on each way, since it depends
on the workloads.

At least, however, we need to avoid mixing dir/files, since it increases the
roll-forward recovery time incredibly.
IMO, this is enough reason for not using that.

Thanks,

> Let me know if there is my misundertanding.
>
> Thanks,
>
> >
> > >
> > > Regards,
> > > Changman
> > >
> > > > else
> > > > return CURSEG_COLD_NODE;
> > > > }
> > > > --
> > > > 2.1.1
> > > >
> > > >
> > > > ------------------------------------------------------------------------------
> > > > _______________________________________________
> > > > Linux-f2fs-devel mailing list
> > > > [email protected]
> > > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel