2013-03-24 23:41:35

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 1/4] f2fs: fix the recovery flow to handle errors correctly

We should handle errors during the recovery flow correctly.
For example, if we get -ENOMEM, we should report a mount failure instead of
conducting the remained mount procedure.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/f2fs.h | 2 +-
fs/f2fs/recovery.c | 46 ++++++++++++++++++++++++++++------------------
fs/f2fs/super.c | 9 +++++++--
3 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5bb87e0..109e12d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1027,7 +1027,7 @@ void destroy_gc_caches(void);
/*
* recovery.c
*/
-void recover_fsync_data(struct f2fs_sb_info *);
+int recover_fsync_data(struct f2fs_sb_info *);
bool space_for_roll_forward(struct f2fs_sb_info *);

/*
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 2d86eb2..61bdaa7 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -118,10 +118,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)

lock_page(page);

- if (cp_ver != cpver_of_node(page)) {
- err = -EINVAL;
+ if (cp_ver != cpver_of_node(page))
goto unlock_out;
- }

if (!is_fsync_dnode(page))
goto next;
@@ -134,10 +132,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
FI_INC_LINK);
} else {
if (IS_INODE(page) && is_dent_dnode(page)) {
- if (recover_inode_page(sbi, page)) {
- err = -ENOMEM;
+ err = recover_inode_page(sbi, page);
+ if (err)
goto unlock_out;
- }
}

/* add this fsync inode to the list */
@@ -237,13 +234,14 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
iput(inode);
}

-static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
+static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
struct page *page, block_t blkaddr)
{
unsigned int start, end;
struct dnode_of_data dn;
struct f2fs_summary sum;
struct node_info ni;
+ int err = 0;

start = start_bidx_of_node(ofs_of_node(page));
if (IS_INODE(page))
@@ -252,8 +250,9 @@ static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
end = start + ADDRS_PER_BLOCK;

set_new_dnode(&dn, inode, NULL, NULL, 0);
- if (get_dnode_of_data(&dn, start, ALLOC_NODE))
- return;
+ err = get_dnode_of_data(&dn, start, ALLOC_NODE);
+ if (err)
+ return err;

wait_on_page_writeback(dn.node_page);

@@ -298,14 +297,16 @@ static void do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,

recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr);
f2fs_put_dnode(&dn);
+ return 0;
}

-static void recover_data(struct f2fs_sb_info *sbi,
+static int recover_data(struct f2fs_sb_info *sbi,
struct list_head *head, int type)
{
unsigned long long cp_ver = le64_to_cpu(sbi->ckpt->checkpoint_ver);
struct curseg_info *curseg;
struct page *page;
+ int err = 0;
block_t blkaddr;

/* get node pages in the current segment */
@@ -315,13 +316,15 @@ static void recover_data(struct f2fs_sb_info *sbi,
/* read node page */
page = alloc_page(GFP_NOFS | __GFP_ZERO);
if (IS_ERR(page))
- return;
+ return -ENOMEM;
+
lock_page(page);

while (1) {
struct fsync_inode_entry *entry;

- if (f2fs_readpage(sbi, page, blkaddr, READ_SYNC))
+ err = f2fs_readpage(sbi, page, blkaddr, READ_SYNC);
+ if (err)
goto out;

lock_page(page);
@@ -333,7 +336,9 @@ static void recover_data(struct f2fs_sb_info *sbi,
if (!entry)
goto next;

- do_recover_data(sbi, entry->inode, page, blkaddr);
+ err = do_recover_data(sbi, entry->inode, page, blkaddr);
+ if (err)
+ goto out;

if (entry->blkaddr == blkaddr) {
iput(entry->inode);
@@ -349,22 +354,26 @@ unlock_out:
out:
__free_pages(page, 0);

- allocate_new_segments(sbi);
+ if (!err)
+ allocate_new_segments(sbi);
+ return err;
}

-void recover_fsync_data(struct f2fs_sb_info *sbi)
+int recover_fsync_data(struct f2fs_sb_info *sbi)
{
struct list_head inode_list;
+ int err;

fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
sizeof(struct fsync_inode_entry), NULL);
if (unlikely(!fsync_entry_slab))
- return;
+ return -ENOMEM;

INIT_LIST_HEAD(&inode_list);

/* step #1: find fsynced inode numbers */
- if (find_fsync_dnodes(sbi, &inode_list))
+ err = find_fsync_dnodes(sbi, &inode_list);
+ if (err)
goto out;

if (list_empty(&inode_list))
@@ -372,11 +381,12 @@ void recover_fsync_data(struct f2fs_sb_info *sbi)

/* step #2: recover data */
sbi->por_doing = 1;
- recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
+ err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
sbi->por_doing = 0;
BUG_ON(!list_empty(&inode_list));
out:
destroy_fsync_dnodes(sbi, &inode_list);
kmem_cache_destroy(fsync_entry_slab);
write_checkpoint(sbi, false);
+ return err;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index c9ef88d..252890e 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -642,8 +642,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
}

/* recover fsynced data */
- if (!test_opt(sbi, DISABLE_ROLL_FORWARD))
- recover_fsync_data(sbi);
+ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
+ err = recover_fsync_data(sbi);
+ if (err) {
+ f2fs_msg(sb, KERN_ERR, "Failed to recover fsync data");
+ goto free_root_inode;
+ }
+ }

/* After POR, we can run background GC thread */
err = start_gc_thread(sbi);
--
1.8.1.3.566.gaa39828


2013-03-24 23:41:10

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 2/4] f2fs: do not skip writing file meta during fsync

This patch removes data_version check flow during the fsync call.
The original purpose for the use of data_version was to avoid writng inode
pages redundantly by the fsync calls repeatedly.
However, when user can modify file meta and then call fsync, we should not
skip fsync procedure.
So, let's remove this condition check and hope that user triggers in right
manner.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/data.c | 3 ---
fs/f2fs/f2fs.h | 1 -
fs/f2fs/file.c | 10 ----------
fs/f2fs/inode.c | 1 -
4 files changed, 15 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index ea8be6f..47a2d7c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -435,7 +435,6 @@ static int f2fs_read_data_pages(struct file *file,
int do_write_data_page(struct page *page)
{
struct inode *inode = page->mapping->host;
- struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
block_t old_blk_addr, new_blk_addr;
struct dnode_of_data dn;
int err = 0;
@@ -465,8 +464,6 @@ int do_write_data_page(struct page *page)
write_data_page(inode, page, &dn,
old_blk_addr, &new_blk_addr);
update_extent_cache(new_blk_addr, &dn);
- F2FS_I(inode)->data_version =
- le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver);
}
out_writepage:
f2fs_put_dnode(&dn);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 109e12d..380e2b3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -159,7 +159,6 @@ struct f2fs_inode_info {

/* Use below internally in f2fs*/
unsigned long flags; /* use to pass per-file flags */
- unsigned long long data_version;/* latest version of data for fsync */
atomic_t dirty_dents; /* # of dirty dentry pages */
f2fs_hash_t chash; /* hash value of given file name */
unsigned int clevel; /* maximum level of given file name */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index ff018a4..d65fcad 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -124,7 +124,6 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
- unsigned long long cur_version;
int ret = 0;
bool need_cp = false;
struct writeback_control wbc = {
@@ -148,15 +147,6 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
goto out;

- mutex_lock(&sbi->cp_mutex);
- cur_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver);
- mutex_unlock(&sbi->cp_mutex);
-
- if (F2FS_I(inode)->data_version != cur_version &&
- !(inode->i_state & I_DIRTY))
- goto out;
- F2FS_I(inode)->data_version--;
-
if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
need_cp = true;
else if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP))
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index e0e8308..f798ddf 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -80,7 +80,6 @@ static int do_read_inode(struct inode *inode)
fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
fi->i_flags = le32_to_cpu(ri->i_flags);
fi->flags = 0;
- fi->data_version = le64_to_cpu(F2FS_CKPT(sbi)->checkpoint_ver) - 1;
fi->i_advise = ri->i_advise;
fi->i_pino = le32_to_cpu(ri->i_pino);
get_extent_info(&fi->ext, ri->i_ext);
--
1.8.1.3.566.gaa39828

2013-03-24 23:41:37

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 3/4] f2fs: remain nat cache entries for further free nid allocation

In the checkpoint flow, the f2fs investigates the total nat cache entries.
Previously, if an entry has NULL_ADDR, f2fs drops the entry and adds the
obsolete nid to the free nid list.
However, this free nid will be reused sooner, resulting in its nat entry miss.
In order to avoid this, we don't need to drop the nat cache entry at this moment.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/node.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index f7b03ba..0177f94 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1621,11 +1621,11 @@ flush_now:
nid_in_journal(sum, offset) = cpu_to_le32(nid);
}

- if (nat_get_blkaddr(ne) == NULL_ADDR) {
+ if (nat_get_blkaddr(ne) == NULL_ADDR &&
+ !add_free_nid(NM_I(sbi), nid)) {
write_lock(&nm_i->nat_tree_lock);
__del_from_nat_cache(nm_i, ne);
write_unlock(&nm_i->nat_tree_lock);
- add_free_nid(NM_I(sbi), nid);
} else {
write_lock(&nm_i->nat_tree_lock);
__clear_nat_cache_dirty(nm_i, ne);
--
1.8.1.3.566.gaa39828

2013-03-24 23:41:39

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH 4/4] f2fs: fix to give correct parent inode number for roll forward

When we recover fsync'ed data after power-off-recovery, we should guarantee
that any parent inode number should be correct for each direct inode blocks.

So, let's make the following rules.

- The fsync should do checkpoint to all the inodes that were experienced hard
links.

- So, the only normal files can be recovered by roll-forward.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/f2fs.h | 2 +-
fs/f2fs/file.c | 22 ++--------------------
fs/f2fs/namei.c | 14 ++++++++++----
fs/f2fs/node.h | 15 +++++++++++++++
4 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 380e2b3..77e2eb0 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -148,6 +148,7 @@ struct extent_info {
* i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
*/
#define FADVISE_COLD_BIT 0x01
+#define FADVISE_CP_BIT 0x02

struct f2fs_inode_info {
struct inode vfs_inode; /* serve a vfs inode */
@@ -825,7 +826,6 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr)
/* used for f2fs_inode_info->flags */
enum {
FI_NEW_INODE, /* indicate newly allocated inode */
- FI_NEED_CP, /* need to do checkpoint during fsync */
FI_INC_LINK, /* need to increment i_nlink */
FI_ACL_MODE, /* indicate acl mode */
FI_NO_ALLOC, /* should not allocate any blocks */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index d65fcad..e031f57 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -103,23 +103,6 @@ static const struct vm_operations_struct f2fs_file_vm_ops = {
.remap_pages = generic_file_remap_pages,
};

-static int need_to_sync_dir(struct f2fs_sb_info *sbi, struct inode *inode)
-{
- struct dentry *dentry;
- nid_t pino;
-
- inode = igrab(inode);
- dentry = d_find_any_alias(inode);
- if (!dentry) {
- iput(inode);
- return 0;
- }
- pino = dentry->d_parent->d_inode->i_ino;
- dput(dentry);
- iput(inode);
- return !is_checkpointed_node(sbi, pino);
-}
-
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
@@ -149,17 +132,16 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)

if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
need_cp = true;
- else if (is_inode_flag_set(F2FS_I(inode), FI_NEED_CP))
+ else if (is_cp_file(inode))
need_cp = true;
else if (!space_for_roll_forward(sbi))
need_cp = true;
- else if (need_to_sync_dir(sbi, inode))
+ else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
need_cp = true;

if (need_cp) {
/* all the dirty node pages should be flushed for POR */
ret = f2fs_sync_fs(inode->i_sb, 1);
- clear_inode_flag(F2FS_I(inode), FI_NEED_CP);
} else {
/* if there is no written node page, write its inode page */
while (!sync_node_pages(sbi, inode->i_ino, &wbc)) {
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index d4a171b..7c6e219 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -15,6 +15,7 @@
#include <linux/ctype.h>

#include "f2fs.h"
+#include "node.h"
#include "xattr.h"
#include "acl.h"

@@ -99,7 +100,7 @@ static int is_multimedia_file(const unsigned char *s, const char *sub)
/*
* Set multimedia files as cold files for hot/cold data separation
*/
-static inline void set_cold_file(struct f2fs_sb_info *sbi, struct inode *inode,
+static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode,
const unsigned char *name)
{
int i;
@@ -108,7 +109,7 @@ static inline void set_cold_file(struct f2fs_sb_info *sbi, struct inode *inode,
int count = le32_to_cpu(sbi->raw_super->extension_count);
for (i = 0; i < count; i++) {
if (!is_multimedia_file(name, extlist[i])) {
- F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT;
+ set_cold_file(inode);
break;
}
}
@@ -130,7 +131,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
return PTR_ERR(inode);

if (!test_opt(sbi, DISABLE_EXT_IDENTIFY))
- set_cold_file(sbi, inode, dentry->d_name.name);
+ set_cold_files(sbi, inode, dentry->d_name.name);

inode->i_op = &f2fs_file_inode_operations;
inode->i_fop = &f2fs_file_operations;
@@ -173,6 +174,12 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
if (err)
goto out;

+ /*
+ * This file should be checkpointed during fsync.
+ * We lost i_pino from now on.
+ */
+ set_cp_file(inode);
+
d_instantiate(dentry, inode);
return 0;
out:
@@ -425,7 +432,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}

old_inode->i_ctime = CURRENT_TIME;
- set_inode_flag(F2FS_I(old_inode), FI_NEED_CP);
mark_inode_dirty(old_inode);

f2fs_delete_entry(old_entry, old_page, NULL);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index afdb130..d009cdf 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -277,6 +277,21 @@ static inline int is_cold_file(struct inode *inode)
return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT;
}

+static inline void set_cold_file(struct inode *inode)
+{
+ F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT;
+}
+
+static inline int is_cp_file(struct inode *inode)
+{
+ return F2FS_I(inode)->i_advise & FADVISE_CP_BIT;
+}
+
+static inline void set_cp_file(struct inode *inode)
+{
+ F2FS_I(inode)->i_advise |= FADVISE_CP_BIT;
+}
+
static inline int is_cold_data(struct page *page)
{
return PageChecked(page);
--
1.8.1.3.566.gaa39828

2013-03-25 06:30:19

by Namjae Jeon

[permalink] [raw]
Subject: Re: [PATCH 1/4] f2fs: fix the recovery flow to handle errors correctly

2013/3/25, Jaegeuk Kim <[email protected]>:
> We should handle errors during the recovery flow correctly.
> For example, if we get -ENOMEM, we should report a mount failure instead of
> conducting the remained mount procedure.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/f2fs.h | 2 +-
> fs/f2fs/recovery.c | 46 ++++++++++++++++++++++++++++------------------
> fs/f2fs/super.c | 9 +++++++--
> 3 files changed, 36 insertions(+), 21 deletions(-)
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 5bb87e0..109e12d 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1027,7 +1027,7 @@ void destroy_gc_caches(void);
> /*
> * recovery.c
> */
> -void recover_fsync_data(struct f2fs_sb_info *);
> +int recover_fsync_data(struct f2fs_sb_info *);
> bool space_for_roll_forward(struct f2fs_sb_info *);
>
> /*
> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> index 2d86eb2..61bdaa7 100644
> --- a/fs/f2fs/recovery.c
> +++ b/fs/f2fs/recovery.c
> @@ -118,10 +118,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi,
> struct list_head *head)
>
> lock_page(page);
>
Hi Jaegeuk.
I have a question.
> - if (cp_ver != cpver_of_node(page)) {
> - err = -EINVAL;
> + if (cp_ver != cpver_of_node(page))
> goto unlock_out;
> - }
err = 0 is initialized to zero in the start of function
Why have you remove err = -EINVAL; code when mismatching cp_ver ?

Thanks.
>

2013-03-25 07:50:39

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH 1/4] f2fs: fix the recovery flow to handle errors correctly

2013-03-25 (월), 15:30 +0900, Namjae Jeon:
> 2013/3/25, Jaegeuk Kim <[email protected]>:
> > We should handle errors during the recovery flow correctly.
> > For example, if we get -ENOMEM, we should report a mount failure instead of
> > conducting the remained mount procedure.
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> > ---
> > fs/f2fs/f2fs.h | 2 +-
> > fs/f2fs/recovery.c | 46 ++++++++++++++++++++++++++++------------------
> > fs/f2fs/super.c | 9 +++++++--
> > 3 files changed, 36 insertions(+), 21 deletions(-)
> >
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 5bb87e0..109e12d 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -1027,7 +1027,7 @@ void destroy_gc_caches(void);
> > /*
> > * recovery.c
> > */
> > -void recover_fsync_data(struct f2fs_sb_info *);
> > +int recover_fsync_data(struct f2fs_sb_info *);
> > bool space_for_roll_forward(struct f2fs_sb_info *);
> >
> > /*
> > diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> > index 2d86eb2..61bdaa7 100644
> > --- a/fs/f2fs/recovery.c
> > +++ b/fs/f2fs/recovery.c
> > @@ -118,10 +118,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi,
> > struct list_head *head)
> >
> > lock_page(page);
> >
> Hi Jaegeuk.
> I have a question.
> > - if (cp_ver != cpver_of_node(page)) {
> > - err = -EINVAL;
> > + if (cp_ver != cpver_of_node(page))
> > goto unlock_out;
> > - }
> err = 0 is initialized to zero in the start of function
> Why have you remove err = -EINVAL; code when mismatching cp_ver ?

This ending condition is used to find the latest node pages that we have
to recover, not to detect an error to exit the recovery routine.
For example, the error conditions include -ENOMEM or -EIO, something
like such the obvious errors.
Thanks,

>
> Thanks.
> >
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

--
Jaegeuk Kim
Samsung


Attachments:
signature.asc (836.00 B)
This is a digitally signed message part

2013-03-26 00:44:27

by Namjae Jeon

[permalink] [raw]
Subject: Re: [PATCH 1/4] f2fs: fix the recovery flow to handle errors correctly

2013/3/25, Jaegeuk Kim <[email protected]>:
> 2013-03-25 (월), 15:30 +0900, Namjae Jeon:
>> 2013/3/25, Jaegeuk Kim <[email protected]>:
>> > We should handle errors during the recovery flow correctly.
>> > For example, if we get -ENOMEM, we should report a mount failure instead
>> > of
>> > conducting the remained mount procedure.
>> >
>> > Signed-off-by: Jaegeuk Kim <[email protected]>
>> > ---
>> > fs/f2fs/f2fs.h | 2 +-
>> > fs/f2fs/recovery.c | 46 ++++++++++++++++++++++++++++------------------
>> > fs/f2fs/super.c | 9 +++++++--
>> > 3 files changed, 36 insertions(+), 21 deletions(-)
>> >
>> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> > index 5bb87e0..109e12d 100644
>> > --- a/fs/f2fs/f2fs.h
>> > +++ b/fs/f2fs/f2fs.h
>> > @@ -1027,7 +1027,7 @@ void destroy_gc_caches(void);
>> > /*
>> > * recovery.c
>> > */
>> > -void recover_fsync_data(struct f2fs_sb_info *);
>> > +int recover_fsync_data(struct f2fs_sb_info *);
>> > bool space_for_roll_forward(struct f2fs_sb_info *);
>> >
>> > /*
>> > diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
>> > index 2d86eb2..61bdaa7 100644
>> > --- a/fs/f2fs/recovery.c
>> > +++ b/fs/f2fs/recovery.c
>> > @@ -118,10 +118,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info
>> > *sbi,
>> > struct list_head *head)
>> >
>> > lock_page(page);
>> >
>> Hi Jaegeuk.
>> I have a question.
>> > - if (cp_ver != cpver_of_node(page)) {
>> > - err = -EINVAL;
>> > + if (cp_ver != cpver_of_node(page))
>> > goto unlock_out;
>> > - }
>> err = 0 is initialized to zero in the start of function
>> Why have you remove err = -EINVAL; code when mismatching cp_ver ?
>
> This ending condition is used to find the latest node pages that we have
> to recover, not to detect an error to exit the recovery routine.
> For example, the error conditions include -ENOMEM or -EIO, something
> like such the obvious errors.
> Thanks,
Yes, Right. It does make sense.
Thanks for explanation :)

You can add:
Reviewed-by: Namjae Jeon <[email protected]>
>
>>
>> Thanks.
>> >
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel"
>> in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
> --
> Jaegeuk Kim
> Samsung
>

2013-03-26 00:48:20

by Namjae Jeon

[permalink] [raw]
Subject: Re: [PATCH 2/4] f2fs: do not skip writing file meta during fsync

2013/3/25, Jaegeuk Kim <[email protected]>:
> This patch removes data_version check flow during the fsync call.
> The original purpose for the use of data_version was to avoid writng inode
> pages redundantly by the fsync calls repeatedly.
Hi Jaegeuk.
> However, when user can modify file meta and then call fsync, we should not
> skip fsync procedure.
I have a question.
Which case does user can directly modify meta ? Recovery tool ?

Thanks.

> So, let's remove this condition check and hope that user triggers in right
> manner.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>

2013-03-26 00:49:31

by Namjae Jeon

[permalink] [raw]
Subject: Re: [PATCH 3/4] f2fs: remain nat cache entries for further free nid allocation

2013/3/25, Jaegeuk Kim <[email protected]>:
> In the checkpoint flow, the f2fs investigates the total nat cache entries.
> Previously, if an entry has NULL_ADDR, f2fs drops the entry and adds the
> obsolete nid to the free nid list.
> However, this free nid will be reused sooner, resulting in its nat entry
> miss.
> In order to avoid this, we don't need to drop the nat cache entry at this
> moment.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
Looks good to me.
Reviewed-by: Namjae Jeon <[email protected]>

Thanks~

2013-03-27 00:19:23

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH 2/4] f2fs: do not skip writing file meta during fsync

2013-03-26 (화), 09:48 +0900, Namjae Jeon:
> 2013/3/25, Jaegeuk Kim <[email protected]>:
> > This patch removes data_version check flow during the fsync call.
> > The original purpose for the use of data_version was to avoid writng inode
> > pages redundantly by the fsync calls repeatedly.
> Hi Jaegeuk.
> > However, when user can modify file meta and then call fsync, we should not
> > skip fsync procedure.
> I have a question.
> Which case does user can directly modify meta ? Recovery tool ?

The meta means the inode information like atime, mtime, size, and so on,
which can be modified by setattr() or something other vfs apis.
Thanks,

>
> Thanks.
>
> > So, let's remove this condition check and hope that user triggers in right
> > manner.
> >
> > Signed-off-by: Jaegeuk Kim <[email protected]>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

--
Jaegeuk Kim
Samsung


Attachments:
signature.asc (836.00 B)
This is a digitally signed message part

2013-03-27 00:57:48

by Namjae Jeon

[permalink] [raw]
Subject: Re: [PATCH 2/4] f2fs: do not skip writing file meta during fsync

2013/3/27, Jaegeuk Kim <[email protected]>:
> 2013-03-26 (화), 09:48 +0900, Namjae Jeon:
>> 2013/3/25, Jaegeuk Kim <[email protected]>:
>> > This patch removes data_version check flow during the fsync call.
>> > The original purpose for the use of data_version was to avoid writng
>> > inode
>> > pages redundantly by the fsync calls repeatedly.
>> Hi Jaegeuk.
>> > However, when user can modify file meta and then call fsync, we should
>> > not
>> > skip fsync procedure.
>> I have a question.
>> Which case does user can directly modify meta ? Recovery tool ?
>
> The meta means the inode information like atime, mtime, size, and so on,
> which can be modified by setattr() or something other vfs apis.
> Thanks,
I understood. Thanks for your explanation :)
One more,,
When inode state is !(inode->i_state & I_DIRTY)), We don't need to skip ?

Thanks.
>
>>
>> Thanks.
>>
>> > So, let's remove this condition check and hope that user triggers in
>> > right
>> > manner.
>> >
>> > Signed-off-by: Jaegeuk Kim <[email protected]>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel"
>> in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at http://www.tux.org/lkml/
>
> --
> Jaegeuk Kim
> Samsung
>

2013-03-27 01:19:37

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH 2/4] f2fs: do not skip writing file meta during fsync

2013-03-27 (수), 09:57 +0900, Namjae Jeon:
> 2013/3/27, Jaegeuk Kim <[email protected]>:
> > 2013-03-26 (화), 09:48 +0900, Namjae Jeon:
> >> 2013/3/25, Jaegeuk Kim <[email protected]>:
> >> > This patch removes data_version check flow during the fsync call.
> >> > The original purpose for the use of data_version was to avoid writng
> >> > inode
> >> > pages redundantly by the fsync calls repeatedly.
> >> Hi Jaegeuk.
> >> > However, when user can modify file meta and then call fsync, we should
> >> > not
> >> > skip fsync procedure.
> >> I have a question.
> >> Which case does user can directly modify meta ? Recovery tool ?
> >
> > The meta means the inode information like atime, mtime, size, and so on,
> > which can be modified by setattr() or something other vfs apis.
> > Thanks,
> I understood. Thanks for your explanation :)
> One more,,
> When inode state is !(inode->i_state & I_DIRTY)), We don't need to skip ?

Even though fsync writes no data and the inode is clean, we should mark
the inode to recover after power-off-recovery.
Any data and its inode can be written to the disk clearly before fsync
was called.
Thanks,

>
> Thanks.
> >
> >>
> >> Thanks.
> >>
> >> > So, let's remove this condition check and hope that user triggers in
> >> > right
> >> > manner.
> >> >
> >> > Signed-off-by: Jaegeuk Kim <[email protected]>
> >> --
> >> To unsubscribe from this list: send the line "unsubscribe linux-kernel"
> >> in
> >> the body of a message to [email protected]
> >> More majordomo info at http://vger.kernel.org/majordomo-info.html
> >> Please read the FAQ at http://www.tux.org/lkml/
> >
> > --
> > Jaegeuk Kim
> > Samsung
> >
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

--
Jaegeuk Kim
Samsung


Attachments:
signature.asc (836.00 B)
This is a digitally signed message part

2013-03-27 01:28:58

by Namjae Jeon

[permalink] [raw]
Subject: Re: [PATCH 2/4] f2fs: do not skip writing file meta during fsync

2013/3/27, Jaegeuk Kim <[email protected]>:
> 2013-03-27 (수), 09:57 +0900, Namjae Jeon:
>> 2013/3/27, Jaegeuk Kim <[email protected]>:
>> > 2013-03-26 (화), 09:48 +0900, Namjae Jeon:
>> >> 2013/3/25, Jaegeuk Kim <[email protected]>:
>> >> > This patch removes data_version check flow during the fsync call.
>> >> > The original purpose for the use of data_version was to avoid writng
>> >> > inode
>> >> > pages redundantly by the fsync calls repeatedly.
>> >> Hi Jaegeuk.
>> >> > However, when user can modify file meta and then call fsync, we
>> >> > should
>> >> > not
>> >> > skip fsync procedure.
>> >> I have a question.
>> >> Which case does user can directly modify meta ? Recovery tool ?
>> >
>> > The meta means the inode information like atime, mtime, size, and so
>> > on,
>> > which can be modified by setattr() or something other vfs apis.
>> > Thanks,
>> I understood. Thanks for your explanation :)
>> One more,,
>> When inode state is !(inode->i_state & I_DIRTY)), We don't need to skip
>> ?
>
> Even though fsync writes no data and the inode is clean, we should mark
> the inode to recover after power-off-recovery.
> Any data and its inode can be written to the disk clearly before fsync
> was called.
Okay, Clear.
Thanks Jaegeuk!

> Thanks,
>
>>
>> Thanks.
>> >
>> >>
>> >> Thanks.
>> >>
>> >> > So, let's remove this condition check and hope that user triggers in
>> >> > right
>> >> > manner.
>> >> >
>> >> > Signed-off-by: Jaegeuk Kim <[email protected]>
>> >> --
>> >> To unsubscribe from this list: send the line "unsubscribe
>> >> linux-kernel"
>> >> in
>> >> the body of a message to [email protected]
>> >> More majordomo info at http://vger.kernel.org/majordomo-info.html
>> >> Please read the FAQ at http://www.tux.org/lkml/
>> >
>> > --
>> > Jaegeuk Kim
>> > Samsung
>> >
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel"
>> in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at http://www.tux.org/lkml/
>
> --
> Jaegeuk Kim
> Samsung
>

2013-03-27 01:41:49

by Namjae Jeon

[permalink] [raw]
Subject: Re: [PATCH 4/4] f2fs: fix to give correct parent inode number for roll forward

> */
> -static inline void set_cold_file(struct f2fs_sb_info *sbi, struct inode
> *inode,
> +static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode
> *inode,
> const unsigned char *name)
> {
> int i;
> @@ -108,7 +109,7 @@ static inline void set_cold_file(struct f2fs_sb_info
> *sbi, struct inode *inode,
> int count = le32_to_cpu(sbi->raw_super->extension_count);
> for (i = 0; i < count; i++) {
> if (!is_multimedia_file(name, extlist[i])) {
> - F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT;
> + set_cold_file(inode);
> break;
> }
> }
It is just my private opinion.
How about use this name "set_cold_file_from_list instead of set_cold_files ?

Thanks.