2023-04-08 12:43:24

by Chao Yu

[permalink] [raw]
Subject: [PATCH v8] f2fs: support errors=remount-ro|continue|panic mountoption

This patch supports errors=remount-ro|continue|panic mount option,
by default it uses "remount-ro" mode.

Signed-off-by: Chao Yu <[email protected]>
Signed-off-by: Yangtao Li <[email protected]>
---
v8:
- record stop reason synchronously if it's from ioctl shutdown interface
- fix typo in doc and commit message.
- use f2fs_hw_is_readonly() instead of bdev_read_only()
Documentation/filesystems/f2fs.rst | 5 ++
fs/f2fs/checkpoint.c | 7 +-
fs/f2fs/f2fs.h | 20 ++++-
fs/f2fs/file.c | 5 --
fs/f2fs/gc.c | 2 +-
fs/f2fs/super.c | 134 ++++++++++++++++++++++++++---
6 files changed, 149 insertions(+), 24 deletions(-)

diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index 2055e72871fe..ba4c02fad647 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -351,6 +351,11 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records
data block update frequency of the extent per inode, in
order to provide better temperature hints for data block
allocation.
+errors=%s Specify f2fs behavior on critical errors. This supports modes:
+ "panic", "continue" and "remount-ro", respectively, trigger
+ panic immediately, continue without doing anything, and remount
+ the partition in read-only mode. By default it uses "remount-ro"
+ mode.
======================== ============================================================

Debugfs Entries
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 73ae4e85e70d..e6b266bb9ce0 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
unsigned char reason)
{
f2fs_build_fault_attr(sbi, 0, 0);
- set_ckpt_flags(sbi, CP_ERROR_FLAG);
- if (!end_io) {
+ if (!end_io)
f2fs_flush_merged_writes(sbi);
-
- f2fs_handle_stop(sbi, reason);
- }
+ f2fs_handle_critical_error(sbi, reason, end_io);
}

/*
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ac4942f8e83e..7d9c5a65f974 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -162,6 +162,7 @@ struct f2fs_mount_info {
int fs_mode; /* fs mode: LFS or ADAPTIVE */
int bggc_mode; /* bggc mode: off, on or sync */
int memory_mode; /* memory mode */
+ int errors; /* errors parameter */
int discard_unit; /*
* discard command's offset/size should
* be aligned to this unit: block,
@@ -1370,6 +1371,12 @@ enum {
MEMORY_MODE_LOW, /* memory mode for low memry devices */
};

+enum errors_option {
+ MOUNT_ERRORS_READONLY, /* remount fs ro on errors */
+ MOUNT_ERRORS_CONTINUE, /* continue on errors */
+ MOUNT_ERRORS_PANIC, /* panic on errors */
+};
+
static inline int f2fs_test_bit(unsigned int nr, char *addr);
static inline void f2fs_set_bit(unsigned int nr, char *addr);
static inline void f2fs_clear_bit(unsigned int nr, char *addr);
@@ -1786,8 +1793,14 @@ struct f2fs_sb_info {

struct workqueue_struct *post_read_wq; /* post read workqueue */

- unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
- spinlock_t error_lock; /* protect errors array */
+ /*
+ * If we are in irq context, let's update error information into
+ * on-disk superblock in the work.
+ */
+ struct work_struct s_error_work;
+ unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
+ unsigned char stop_reason[MAX_STOP_REASON]; /* stop reason */
+ spinlock_t error_lock; /* protect errors/stop_reason array */
bool error_dirty; /* errors of sb is dirty */

struct kmem_cache *inline_xattr_slab; /* inline xattr entry */
@@ -3522,8 +3535,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
int f2fs_quota_sync(struct super_block *sb, int type);
loff_t max_file_blocks(struct inode *inode);
void f2fs_quota_off_umount(struct super_block *sb);
-void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
+void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
+ bool irq_context);
void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
int f2fs_sync_fs(struct super_block *sb, int sync);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 14e9a20e68df..10743e864d57 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2221,7 +2221,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
ret = 0;
f2fs_stop_checkpoint(sbi, false,
STOP_CP_REASON_SHUTDOWN);
- set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
trace_f2fs_shutdown(sbi, in, ret);
}
return ret;
@@ -2234,7 +2233,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
if (ret)
goto out;
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
- set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
thaw_bdev(sb->s_bdev);
break;
case F2FS_GOING_DOWN_METASYNC:
@@ -2243,16 +2241,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
if (ret)
goto out;
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
- set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_NOSYNC:
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
- set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_METAFLUSH:
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
- set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_NEED_FSCK:
set_sbi_flag(sbi, SBI_NEED_FSCK);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index f1d0dd9c5a6c..6dc2bc710ba8 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
if (gc_th->gc_wake)
gc_th->gc_wake = false;

- if (try_to_freeze()) {
+ if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
stat_other_skip_bggc_count(sbi);
continue;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 23574894e170..1573bf123197 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -164,6 +164,7 @@ enum {
Opt_discard_unit,
Opt_memory_mode,
Opt_age_extent_cache,
+ Opt_errors,
Opt_err,
};

@@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
{Opt_discard_unit, "discard_unit=%s"},
{Opt_memory_mode, "memory=%s"},
{Opt_age_extent_cache, "age_extent_cache"},
+ {Opt_errors, "errors=%s"},
{Opt_err, NULL},
};

@@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
case Opt_age_extent_cache:
set_opt(sbi, AGE_EXTENT_CACHE);
break;
+ case Opt_errors:
+ name = match_strdup(&args[0]);
+ if (!name)
+ return -ENOMEM;
+ if (!strcmp(name, "remount-ro")) {
+ F2FS_OPTION(sbi).errors =
+ MOUNT_ERRORS_READONLY;
+ } else if (!strcmp(name, "continue")) {
+ F2FS_OPTION(sbi).errors =
+ MOUNT_ERRORS_CONTINUE;
+ } else if (!strcmp(name, "panic")) {
+ F2FS_OPTION(sbi).errors =
+ MOUNT_ERRORS_PANIC;
+ } else {
+ kfree(name);
+ return -EINVAL;
+ }
+ kfree(name);
+ break;
default:
f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
p);
@@ -1623,6 +1644,9 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_destroy_node_manager(sbi);
f2fs_destroy_segment_manager(sbi);

+ /* flush s_error_work before sbi destroy */
+ flush_work(&sbi->s_error_work);
+
f2fs_destroy_post_read_wq(sbi);

kvfree(sbi->ckpt);
@@ -2053,6 +2077,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
seq_printf(seq, ",memory=%s", "low");

+ if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
+ seq_printf(seq, ",errors=%s", "remount-ro");
+ else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
+ seq_printf(seq, ",errors=%s", "continue");
+ else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
+ seq_printf(seq, ",errors=%s", "panic");
+
return 0;
}

@@ -2081,6 +2112,7 @@ static void default_options(struct f2fs_sb_info *sbi)
}
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
+ F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;

sbi->sb->s_flags &= ~SB_INLINECRYPT;

@@ -2282,6 +2314,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
if (err)
goto restore_opts;

+ /* flush outstanding errors before changing fs state */
+ flush_work(&sbi->s_error_work);
+
/*
* Previous and new state of filesystem is RO,
* so skip checking GC and FLUSH_MERGE conditions.
@@ -3931,45 +3966,60 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
return err;
}

-void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
+static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->error_lock, flags);
+ if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
+ sbi->stop_reason[reason]++;
+ spin_unlock_irqrestore(&sbi->error_lock, flags);
+}
+
+static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+ unsigned long flags;
int err;

f2fs_down_write(&sbi->sb_lock);

- if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
- raw_super->s_stop_reason[reason]++;
+ spin_lock_irqsave(&sbi->error_lock, flags);
+ memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
+ spin_unlock_irqrestore(&sbi->error_lock, flags);

err = f2fs_commit_super(sbi, false);
- if (err)
- f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
- reason, err);
+
f2fs_up_write(&sbi->sb_lock);
+ if (err)
+ f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
}

void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
{
- spin_lock(&sbi->error_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->error_lock, flags);
if (!test_bit(flag, (unsigned long *)sbi->errors)) {
set_bit(flag, (unsigned long *)sbi->errors);
sbi->error_dirty = true;
}
- spin_unlock(&sbi->error_lock);
+ spin_unlock_irqrestore(&sbi->error_lock, flags);
}

static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
{
+ unsigned long flags;
bool need_update = false;

- spin_lock(&sbi->error_lock);
+ spin_lock_irqsave(&sbi->error_lock, flags);
if (sbi->error_dirty) {
memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
MAX_F2FS_ERRORS);
sbi->error_dirty = false;
need_update = true;
}
- spin_unlock(&sbi->error_lock);
+ spin_unlock_irqrestore(&sbi->error_lock, flags);

return need_update;
}
@@ -3993,6 +4043,66 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
f2fs_up_write(&sbi->sb_lock);
}

+static bool system_going_down(void)
+{
+ return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
+ || system_state == SYSTEM_RESTART;
+}
+
+void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
+ bool irq_context)
+{
+ struct super_block *sb = sbi->sb;
+ bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
+ bool continue_fs = !shutdown &&
+ F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
+
+ set_ckpt_flags(sbi, CP_ERROR_FLAG);
+
+ if (!f2fs_hw_is_readonly(sbi)) {
+ save_stop_reason(sbi, reason);
+
+ if (irq_context && !shutdown)
+ schedule_work(&sbi->s_error_work);
+ else
+ f2fs_record_stop_reason(sbi);
+ }
+
+ /*
+ * We force ERRORS_RO behavior when system is rebooting. Otherwise we
+ * could panic during 'reboot -f' as the underlying device got already
+ * disabled.
+ */
+ if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
+ !shutdown && !system_going_down() &&
+ !is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
+ panic("F2FS-fs (device %s): panic forced after error\n",
+ sb->s_id);
+
+ if (shutdown)
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
+
+ /* continue filesystem operators if errors=continue */
+ if (continue_fs || f2fs_readonly(sb))
+ return;
+
+ f2fs_warn(sbi, "Remounting filesystem read-only");
+ /*
+ * Make sure updated value of ->s_mount_flags will be visible before
+ * ->s_flags update
+ */
+ smp_wmb();
+ sb->s_flags |= SB_RDONLY;
+}
+
+static void f2fs_record_error_work(struct work_struct *work)
+{
+ struct f2fs_sb_info *sbi = container_of(work,
+ struct f2fs_sb_info, s_error_work);
+
+ f2fs_record_stop_reason(sbi);
+}
+
static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
@@ -4223,7 +4333,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_fs_info = sbi;
sbi->raw_super = raw_super;

+ INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
+ memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);

/* precompute checksum seed for metadata */
if (f2fs_sb_has_inode_chksum(sbi))
@@ -4620,6 +4732,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
f2fs_destroy_segment_manager(sbi);
stop_ckpt_thread:
f2fs_stop_ckpt_thread(sbi);
+ /* flush s_error_work before sbi destroy */
+ flush_work(&sbi->s_error_work);
f2fs_destroy_post_read_wq(sbi);
free_devices:
destroy_device_list(sbi);
--
2.36.1


2023-04-19 03:46:12

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v8] f2fs: support errors=remount-ro|continue|panic mountoption

Ping,

Jaegeuk, do you still suffer any issue w/ this version?

Thanks,

On 2023/4/8 20:31, Chao Yu wrote:
> This patch supports errors=remount-ro|continue|panic mount option,
> by default it uses "remount-ro" mode.
>
> Signed-off-by: Chao Yu <[email protected]>
> Signed-off-by: Yangtao Li <[email protected]>
> ---
> v8:
> - record stop reason synchronously if it's from ioctl shutdown interface
> - fix typo in doc and commit message.
> - use f2fs_hw_is_readonly() instead of bdev_read_only()
> Documentation/filesystems/f2fs.rst | 5 ++
> fs/f2fs/checkpoint.c | 7 +-
> fs/f2fs/f2fs.h | 20 ++++-
> fs/f2fs/file.c | 5 --
> fs/f2fs/gc.c | 2 +-
> fs/f2fs/super.c | 134 ++++++++++++++++++++++++++---
> 6 files changed, 149 insertions(+), 24 deletions(-)
>
> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
> index 2055e72871fe..ba4c02fad647 100644
> --- a/Documentation/filesystems/f2fs.rst
> +++ b/Documentation/filesystems/f2fs.rst
> @@ -351,6 +351,11 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records
> data block update frequency of the extent per inode, in
> order to provide better temperature hints for data block
> allocation.
> +errors=%s Specify f2fs behavior on critical errors. This supports modes:
> + "panic", "continue" and "remount-ro", respectively, trigger
> + panic immediately, continue without doing anything, and remount
> + the partition in read-only mode. By default it uses "remount-ro"
> + mode.
> ======================== ============================================================
>
> Debugfs Entries
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 73ae4e85e70d..e6b266bb9ce0 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
> unsigned char reason)
> {
> f2fs_build_fault_attr(sbi, 0, 0);
> - set_ckpt_flags(sbi, CP_ERROR_FLAG);
> - if (!end_io) {
> + if (!end_io)
> f2fs_flush_merged_writes(sbi);
> -
> - f2fs_handle_stop(sbi, reason);
> - }
> + f2fs_handle_critical_error(sbi, reason, end_io);
> }
>
> /*
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index ac4942f8e83e..7d9c5a65f974 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -162,6 +162,7 @@ struct f2fs_mount_info {
> int fs_mode; /* fs mode: LFS or ADAPTIVE */
> int bggc_mode; /* bggc mode: off, on or sync */
> int memory_mode; /* memory mode */
> + int errors; /* errors parameter */
> int discard_unit; /*
> * discard command's offset/size should
> * be aligned to this unit: block,
> @@ -1370,6 +1371,12 @@ enum {
> MEMORY_MODE_LOW, /* memory mode for low memry devices */
> };
>
> +enum errors_option {
> + MOUNT_ERRORS_READONLY, /* remount fs ro on errors */
> + MOUNT_ERRORS_CONTINUE, /* continue on errors */
> + MOUNT_ERRORS_PANIC, /* panic on errors */
> +};
> +
> static inline int f2fs_test_bit(unsigned int nr, char *addr);
> static inline void f2fs_set_bit(unsigned int nr, char *addr);
> static inline void f2fs_clear_bit(unsigned int nr, char *addr);
> @@ -1786,8 +1793,14 @@ struct f2fs_sb_info {
>
> struct workqueue_struct *post_read_wq; /* post read workqueue */
>
> - unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
> - spinlock_t error_lock; /* protect errors array */
> + /*
> + * If we are in irq context, let's update error information into
> + * on-disk superblock in the work.
> + */
> + struct work_struct s_error_work;
> + unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
> + unsigned char stop_reason[MAX_STOP_REASON]; /* stop reason */
> + spinlock_t error_lock; /* protect errors/stop_reason array */
> bool error_dirty; /* errors of sb is dirty */
>
> struct kmem_cache *inline_xattr_slab; /* inline xattr entry */
> @@ -3522,8 +3535,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
> int f2fs_quota_sync(struct super_block *sb, int type);
> loff_t max_file_blocks(struct inode *inode);
> void f2fs_quota_off_umount(struct super_block *sb);
> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
> void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
> + bool irq_context);
> void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
> int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
> int f2fs_sync_fs(struct super_block *sb, int sync);
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 14e9a20e68df..10743e864d57 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -2221,7 +2221,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> ret = 0;
> f2fs_stop_checkpoint(sbi, false,
> STOP_CP_REASON_SHUTDOWN);
> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> trace_f2fs_shutdown(sbi, in, ret);
> }
> return ret;
> @@ -2234,7 +2233,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> if (ret)
> goto out;
> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> thaw_bdev(sb->s_bdev);
> break;
> case F2FS_GOING_DOWN_METASYNC:
> @@ -2243,16 +2241,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> if (ret)
> goto out;
> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> break;
> case F2FS_GOING_DOWN_NOSYNC:
> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> break;
> case F2FS_GOING_DOWN_METAFLUSH:
> f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> break;
> case F2FS_GOING_DOWN_NEED_FSCK:
> set_sbi_flag(sbi, SBI_NEED_FSCK);
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index f1d0dd9c5a6c..6dc2bc710ba8 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
> if (gc_th->gc_wake)
> gc_th->gc_wake = false;
>
> - if (try_to_freeze()) {
> + if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
> stat_other_skip_bggc_count(sbi);
> continue;
> }
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 23574894e170..1573bf123197 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -164,6 +164,7 @@ enum {
> Opt_discard_unit,
> Opt_memory_mode,
> Opt_age_extent_cache,
> + Opt_errors,
> Opt_err,
> };
>
> @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
> {Opt_discard_unit, "discard_unit=%s"},
> {Opt_memory_mode, "memory=%s"},
> {Opt_age_extent_cache, "age_extent_cache"},
> + {Opt_errors, "errors=%s"},
> {Opt_err, NULL},
> };
>
> @@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> case Opt_age_extent_cache:
> set_opt(sbi, AGE_EXTENT_CACHE);
> break;
> + case Opt_errors:
> + name = match_strdup(&args[0]);
> + if (!name)
> + return -ENOMEM;
> + if (!strcmp(name, "remount-ro")) {
> + F2FS_OPTION(sbi).errors =
> + MOUNT_ERRORS_READONLY;
> + } else if (!strcmp(name, "continue")) {
> + F2FS_OPTION(sbi).errors =
> + MOUNT_ERRORS_CONTINUE;
> + } else if (!strcmp(name, "panic")) {
> + F2FS_OPTION(sbi).errors =
> + MOUNT_ERRORS_PANIC;
> + } else {
> + kfree(name);
> + return -EINVAL;
> + }
> + kfree(name);
> + break;
> default:
> f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
> p);
> @@ -1623,6 +1644,9 @@ static void f2fs_put_super(struct super_block *sb)
> f2fs_destroy_node_manager(sbi);
> f2fs_destroy_segment_manager(sbi);
>
> + /* flush s_error_work before sbi destroy */
> + flush_work(&sbi->s_error_work);
> +
> f2fs_destroy_post_read_wq(sbi);
>
> kvfree(sbi->ckpt);
> @@ -2053,6 +2077,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
> else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
> seq_printf(seq, ",memory=%s", "low");
>
> + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
> + seq_printf(seq, ",errors=%s", "remount-ro");
> + else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
> + seq_printf(seq, ",errors=%s", "continue");
> + else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
> + seq_printf(seq, ",errors=%s", "panic");
> +
> return 0;
> }
>
> @@ -2081,6 +2112,7 @@ static void default_options(struct f2fs_sb_info *sbi)
> }
> F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
> F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
> + F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;
>
> sbi->sb->s_flags &= ~SB_INLINECRYPT;
>
> @@ -2282,6 +2314,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
> if (err)
> goto restore_opts;
>
> + /* flush outstanding errors before changing fs state */
> + flush_work(&sbi->s_error_work);
> +
> /*
> * Previous and new state of filesystem is RO,
> * so skip checking GC and FLUSH_MERGE conditions.
> @@ -3931,45 +3966,60 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
> return err;
> }
>
> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
> +static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&sbi->error_lock, flags);
> + if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
> + sbi->stop_reason[reason]++;
> + spin_unlock_irqrestore(&sbi->error_lock, flags);
> +}
> +
> +static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
> {
> struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> + unsigned long flags;
> int err;
>
> f2fs_down_write(&sbi->sb_lock);
>
> - if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
> - raw_super->s_stop_reason[reason]++;
> + spin_lock_irqsave(&sbi->error_lock, flags);
> + memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
> + spin_unlock_irqrestore(&sbi->error_lock, flags);
>
> err = f2fs_commit_super(sbi, false);
> - if (err)
> - f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
> - reason, err);
> +
> f2fs_up_write(&sbi->sb_lock);
> + if (err)
> + f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
> }
>
> void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
> {
> - spin_lock(&sbi->error_lock);
> + unsigned long flags;
> +
> + spin_lock_irqsave(&sbi->error_lock, flags);
> if (!test_bit(flag, (unsigned long *)sbi->errors)) {
> set_bit(flag, (unsigned long *)sbi->errors);
> sbi->error_dirty = true;
> }
> - spin_unlock(&sbi->error_lock);
> + spin_unlock_irqrestore(&sbi->error_lock, flags);
> }
>
> static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
> {
> + unsigned long flags;
> bool need_update = false;
>
> - spin_lock(&sbi->error_lock);
> + spin_lock_irqsave(&sbi->error_lock, flags);
> if (sbi->error_dirty) {
> memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
> MAX_F2FS_ERRORS);
> sbi->error_dirty = false;
> need_update = true;
> }
> - spin_unlock(&sbi->error_lock);
> + spin_unlock_irqrestore(&sbi->error_lock, flags);
>
> return need_update;
> }
> @@ -3993,6 +4043,66 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
> f2fs_up_write(&sbi->sb_lock);
> }
>
> +static bool system_going_down(void)
> +{
> + return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
> + || system_state == SYSTEM_RESTART;
> +}
> +
> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
> + bool irq_context)
> +{
> + struct super_block *sb = sbi->sb;
> + bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
> + bool continue_fs = !shutdown &&
> + F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
> +
> + set_ckpt_flags(sbi, CP_ERROR_FLAG);
> +
> + if (!f2fs_hw_is_readonly(sbi)) {
> + save_stop_reason(sbi, reason);
> +
> + if (irq_context && !shutdown)
> + schedule_work(&sbi->s_error_work);
> + else
> + f2fs_record_stop_reason(sbi);
> + }
> +
> + /*
> + * We force ERRORS_RO behavior when system is rebooting. Otherwise we
> + * could panic during 'reboot -f' as the underlying device got already
> + * disabled.
> + */
> + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
> + !shutdown && !system_going_down() &&
> + !is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
> + panic("F2FS-fs (device %s): panic forced after error\n",
> + sb->s_id);
> +
> + if (shutdown)
> + set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> +
> + /* continue filesystem operators if errors=continue */
> + if (continue_fs || f2fs_readonly(sb))
> + return;
> +
> + f2fs_warn(sbi, "Remounting filesystem read-only");
> + /*
> + * Make sure updated value of ->s_mount_flags will be visible before
> + * ->s_flags update
> + */
> + smp_wmb();
> + sb->s_flags |= SB_RDONLY;
> +}
> +
> +static void f2fs_record_error_work(struct work_struct *work)
> +{
> + struct f2fs_sb_info *sbi = container_of(work,
> + struct f2fs_sb_info, s_error_work);
> +
> + f2fs_record_stop_reason(sbi);
> +}
> +
> static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
> {
> struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> @@ -4223,7 +4333,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> sb->s_fs_info = sbi;
> sbi->raw_super = raw_super;
>
> + INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
> memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
> + memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);
>
> /* precompute checksum seed for metadata */
> if (f2fs_sb_has_inode_chksum(sbi))
> @@ -4620,6 +4732,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> f2fs_destroy_segment_manager(sbi);
> stop_ckpt_thread:
> f2fs_stop_ckpt_thread(sbi);
> + /* flush s_error_work before sbi destroy */
> + flush_work(&sbi->s_error_work);
> f2fs_destroy_post_read_wq(sbi);
> free_devices:
> destroy_device_list(sbi);

2023-04-19 20:25:45

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH v8] f2fs: support errors=remount-ro|continue|panic mountoption

On 04/08, Chao Yu wrote:
> This patch supports errors=remount-ro|continue|panic mount option,
> by default it uses "remount-ro" mode.
>
> Signed-off-by: Chao Yu <[email protected]>
> Signed-off-by: Yangtao Li <[email protected]>
> ---
> v8:
> - record stop reason synchronously if it's from ioctl shutdown interface
> - fix typo in doc and commit message.
> - use f2fs_hw_is_readonly() instead of bdev_read_only()
> Documentation/filesystems/f2fs.rst | 5 ++
> fs/f2fs/checkpoint.c | 7 +-
> fs/f2fs/f2fs.h | 20 ++++-
> fs/f2fs/file.c | 5 --
> fs/f2fs/gc.c | 2 +-
> fs/f2fs/super.c | 134 ++++++++++++++++++++++++++---
> 6 files changed, 149 insertions(+), 24 deletions(-)
>
> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
> index 2055e72871fe..ba4c02fad647 100644
> --- a/Documentation/filesystems/f2fs.rst
> +++ b/Documentation/filesystems/f2fs.rst
> @@ -351,6 +351,11 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records
> data block update frequency of the extent per inode, in
> order to provide better temperature hints for data block
> allocation.
> +errors=%s Specify f2fs behavior on critical errors. This supports modes:
> + "panic", "continue" and "remount-ro", respectively, trigger
> + panic immediately, continue without doing anything, and remount
> + the partition in read-only mode. By default it uses "remount-ro"
> + mode.
> ======================== ============================================================
>
> Debugfs Entries
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 73ae4e85e70d..e6b266bb9ce0 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
> unsigned char reason)
> {
> f2fs_build_fault_attr(sbi, 0, 0);
> - set_ckpt_flags(sbi, CP_ERROR_FLAG);
> - if (!end_io) {
> + if (!end_io)
> f2fs_flush_merged_writes(sbi);
> -
> - f2fs_handle_stop(sbi, reason);
> - }
> + f2fs_handle_critical_error(sbi, reason, end_io);
> }
>
> /*
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index ac4942f8e83e..7d9c5a65f974 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -162,6 +162,7 @@ struct f2fs_mount_info {
> int fs_mode; /* fs mode: LFS or ADAPTIVE */
> int bggc_mode; /* bggc mode: off, on or sync */
> int memory_mode; /* memory mode */
> + int errors; /* errors parameter */
> int discard_unit; /*
> * discard command's offset/size should
> * be aligned to this unit: block,
> @@ -1370,6 +1371,12 @@ enum {
> MEMORY_MODE_LOW, /* memory mode for low memry devices */
> };
>
> +enum errors_option {
> + MOUNT_ERRORS_READONLY, /* remount fs ro on errors */
> + MOUNT_ERRORS_CONTINUE, /* continue on errors */
> + MOUNT_ERRORS_PANIC, /* panic on errors */
> +};
> +
> static inline int f2fs_test_bit(unsigned int nr, char *addr);
> static inline void f2fs_set_bit(unsigned int nr, char *addr);
> static inline void f2fs_clear_bit(unsigned int nr, char *addr);
> @@ -1786,8 +1793,14 @@ struct f2fs_sb_info {
>
> struct workqueue_struct *post_read_wq; /* post read workqueue */
>
> - unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
> - spinlock_t error_lock; /* protect errors array */
> + /*
> + * If we are in irq context, let's update error information into
> + * on-disk superblock in the work.
> + */
> + struct work_struct s_error_work;
> + unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
> + unsigned char stop_reason[MAX_STOP_REASON]; /* stop reason */
> + spinlock_t error_lock; /* protect errors/stop_reason array */
> bool error_dirty; /* errors of sb is dirty */
>
> struct kmem_cache *inline_xattr_slab; /* inline xattr entry */
> @@ -3522,8 +3535,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
> int f2fs_quota_sync(struct super_block *sb, int type);
> loff_t max_file_blocks(struct inode *inode);
> void f2fs_quota_off_umount(struct super_block *sb);
> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
> void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
> + bool irq_context);
> void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
> int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
> int f2fs_sync_fs(struct super_block *sb, int sync);
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 14e9a20e68df..10743e864d57 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -2221,7 +2221,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> ret = 0;
> f2fs_stop_checkpoint(sbi, false,
> STOP_CP_REASON_SHUTDOWN);
> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> trace_f2fs_shutdown(sbi, in, ret);
> }
> return ret;
> @@ -2234,7 +2233,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> if (ret)
> goto out;
> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> thaw_bdev(sb->s_bdev);
> break;
> case F2FS_GOING_DOWN_METASYNC:
> @@ -2243,16 +2241,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> if (ret)
> goto out;
> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> break;
> case F2FS_GOING_DOWN_NOSYNC:
> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> break;
> case F2FS_GOING_DOWN_METAFLUSH:
> f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> break;
> case F2FS_GOING_DOWN_NEED_FSCK:
> set_sbi_flag(sbi, SBI_NEED_FSCK);
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index f1d0dd9c5a6c..6dc2bc710ba8 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
> if (gc_th->gc_wake)
> gc_th->gc_wake = false;
>
> - if (try_to_freeze()) {
> + if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
> stat_other_skip_bggc_count(sbi);
> continue;
> }
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 23574894e170..1573bf123197 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -164,6 +164,7 @@ enum {
> Opt_discard_unit,
> Opt_memory_mode,
> Opt_age_extent_cache,
> + Opt_errors,
> Opt_err,
> };
>
> @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
> {Opt_discard_unit, "discard_unit=%s"},
> {Opt_memory_mode, "memory=%s"},
> {Opt_age_extent_cache, "age_extent_cache"},
> + {Opt_errors, "errors=%s"},
> {Opt_err, NULL},
> };
>
> @@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> case Opt_age_extent_cache:
> set_opt(sbi, AGE_EXTENT_CACHE);
> break;
> + case Opt_errors:
> + name = match_strdup(&args[0]);
> + if (!name)
> + return -ENOMEM;
> + if (!strcmp(name, "remount-ro")) {
> + F2FS_OPTION(sbi).errors =
> + MOUNT_ERRORS_READONLY;
> + } else if (!strcmp(name, "continue")) {
> + F2FS_OPTION(sbi).errors =
> + MOUNT_ERRORS_CONTINUE;
> + } else if (!strcmp(name, "panic")) {
> + F2FS_OPTION(sbi).errors =
> + MOUNT_ERRORS_PANIC;
> + } else {
> + kfree(name);
> + return -EINVAL;
> + }
> + kfree(name);
> + break;
> default:
> f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
> p);
> @@ -1623,6 +1644,9 @@ static void f2fs_put_super(struct super_block *sb)
> f2fs_destroy_node_manager(sbi);
> f2fs_destroy_segment_manager(sbi);
>
> + /* flush s_error_work before sbi destroy */
> + flush_work(&sbi->s_error_work);
> +
> f2fs_destroy_post_read_wq(sbi);
>
> kvfree(sbi->ckpt);
> @@ -2053,6 +2077,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
> else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
> seq_printf(seq, ",memory=%s", "low");
>
> + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
> + seq_printf(seq, ",errors=%s", "remount-ro");
> + else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
> + seq_printf(seq, ",errors=%s", "continue");
> + else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
> + seq_printf(seq, ",errors=%s", "panic");
> +
> return 0;
> }
>
> @@ -2081,6 +2112,7 @@ static void default_options(struct f2fs_sb_info *sbi)
> }
> F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
> F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
> + F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;

Was the default MOUNT_ERRORS_CONTINUE?

>
> sbi->sb->s_flags &= ~SB_INLINECRYPT;
>
> @@ -2282,6 +2314,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
> if (err)
> goto restore_opts;
>
> + /* flush outstanding errors before changing fs state */
> + flush_work(&sbi->s_error_work);
> +
> /*
> * Previous and new state of filesystem is RO,
> * so skip checking GC and FLUSH_MERGE conditions.
> @@ -3931,45 +3966,60 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
> return err;
> }
>
> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
> +static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&sbi->error_lock, flags);
> + if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
> + sbi->stop_reason[reason]++;
> + spin_unlock_irqrestore(&sbi->error_lock, flags);
> +}
> +
> +static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
> {
> struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> + unsigned long flags;
> int err;
>
> f2fs_down_write(&sbi->sb_lock);
>
> - if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
> - raw_super->s_stop_reason[reason]++;
> + spin_lock_irqsave(&sbi->error_lock, flags);
> + memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
> + spin_unlock_irqrestore(&sbi->error_lock, flags);
>
> err = f2fs_commit_super(sbi, false);
> - if (err)
> - f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
> - reason, err);
> +
> f2fs_up_write(&sbi->sb_lock);
> + if (err)
> + f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
> }
>
> void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
> {
> - spin_lock(&sbi->error_lock);
> + unsigned long flags;
> +
> + spin_lock_irqsave(&sbi->error_lock, flags);
> if (!test_bit(flag, (unsigned long *)sbi->errors)) {
> set_bit(flag, (unsigned long *)sbi->errors);
> sbi->error_dirty = true;
> }
> - spin_unlock(&sbi->error_lock);
> + spin_unlock_irqrestore(&sbi->error_lock, flags);
> }
>
> static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
> {
> + unsigned long flags;
> bool need_update = false;
>
> - spin_lock(&sbi->error_lock);
> + spin_lock_irqsave(&sbi->error_lock, flags);
> if (sbi->error_dirty) {
> memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
> MAX_F2FS_ERRORS);
> sbi->error_dirty = false;
> need_update = true;
> }
> - spin_unlock(&sbi->error_lock);
> + spin_unlock_irqrestore(&sbi->error_lock, flags);
>
> return need_update;
> }
> @@ -3993,6 +4043,66 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
> f2fs_up_write(&sbi->sb_lock);
> }
>
> +static bool system_going_down(void)
> +{
> + return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
> + || system_state == SYSTEM_RESTART;
> +}
> +
> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
> + bool irq_context)
> +{
> + struct super_block *sb = sbi->sb;
> + bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
> + bool continue_fs = !shutdown &&
> + F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
> +
> + set_ckpt_flags(sbi, CP_ERROR_FLAG);
> +
> + if (!f2fs_hw_is_readonly(sbi)) {
> + save_stop_reason(sbi, reason);
> +
> + if (irq_context && !shutdown)
> + schedule_work(&sbi->s_error_work);
> + else
> + f2fs_record_stop_reason(sbi);
> + }
> +
> + /*
> + * We force ERRORS_RO behavior when system is rebooting. Otherwise we
> + * could panic during 'reboot -f' as the underlying device got already
> + * disabled.
> + */
> + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
> + !shutdown && !system_going_down() &&
> + !is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
> + panic("F2FS-fs (device %s): panic forced after error\n",
> + sb->s_id);
> +
> + if (shutdown)
> + set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> +
> + /* continue filesystem operators if errors=continue */
> + if (continue_fs || f2fs_readonly(sb))
> + return;
> +
> + f2fs_warn(sbi, "Remounting filesystem read-only");
> + /*
> + * Make sure updated value of ->s_mount_flags will be visible before
> + * ->s_flags update
> + */
> + smp_wmb();
> + sb->s_flags |= SB_RDONLY;
> +}
> +
> +static void f2fs_record_error_work(struct work_struct *work)
> +{
> + struct f2fs_sb_info *sbi = container_of(work,
> + struct f2fs_sb_info, s_error_work);
> +
> + f2fs_record_stop_reason(sbi);
> +}
> +
> static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
> {
> struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> @@ -4223,7 +4333,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> sb->s_fs_info = sbi;
> sbi->raw_super = raw_super;
>
> + INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
> memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
> + memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);
>
> /* precompute checksum seed for metadata */
> if (f2fs_sb_has_inode_chksum(sbi))
> @@ -4620,6 +4732,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> f2fs_destroy_segment_manager(sbi);
> stop_ckpt_thread:
> f2fs_stop_ckpt_thread(sbi);
> + /* flush s_error_work before sbi destroy */
> + flush_work(&sbi->s_error_work);
> f2fs_destroy_post_read_wq(sbi);
> free_devices:
> destroy_device_list(sbi);
> --
> 2.36.1

2023-04-20 08:51:25

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v8] f2fs: support errors=remount-ro|continue|panic mountoption

On 2023/4/20 4:19, Jaegeuk Kim wrote:
> On 04/08, Chao Yu wrote:
>> This patch supports errors=remount-ro|continue|panic mount option,
>> by default it uses "remount-ro" mode.
>>
>> Signed-off-by: Chao Yu <[email protected]>
>> Signed-off-by: Yangtao Li <[email protected]>
>> ---
>> v8:
>> - record stop reason synchronously if it's from ioctl shutdown interface
>> - fix typo in doc and commit message.
>> - use f2fs_hw_is_readonly() instead of bdev_read_only()
>> Documentation/filesystems/f2fs.rst | 5 ++
>> fs/f2fs/checkpoint.c | 7 +-
>> fs/f2fs/f2fs.h | 20 ++++-
>> fs/f2fs/file.c | 5 --
>> fs/f2fs/gc.c | 2 +-
>> fs/f2fs/super.c | 134 ++++++++++++++++++++++++++---
>> 6 files changed, 149 insertions(+), 24 deletions(-)
>>
>> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
>> index 2055e72871fe..ba4c02fad647 100644
>> --- a/Documentation/filesystems/f2fs.rst
>> +++ b/Documentation/filesystems/f2fs.rst
>> @@ -351,6 +351,11 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records
>> data block update frequency of the extent per inode, in
>> order to provide better temperature hints for data block
>> allocation.
>> +errors=%s Specify f2fs behavior on critical errors. This supports modes:
>> + "panic", "continue" and "remount-ro", respectively, trigger
>> + panic immediately, continue without doing anything, and remount
>> + the partition in read-only mode. By default it uses "remount-ro"
>> + mode.
>> ======================== ============================================================
>>
>> Debugfs Entries
>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>> index 73ae4e85e70d..e6b266bb9ce0 100644
>> --- a/fs/f2fs/checkpoint.c
>> +++ b/fs/f2fs/checkpoint.c
>> @@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
>> unsigned char reason)
>> {
>> f2fs_build_fault_attr(sbi, 0, 0);
>> - set_ckpt_flags(sbi, CP_ERROR_FLAG);
>> - if (!end_io) {
>> + if (!end_io)
>> f2fs_flush_merged_writes(sbi);
>> -
>> - f2fs_handle_stop(sbi, reason);
>> - }
>> + f2fs_handle_critical_error(sbi, reason, end_io);
>> }
>>
>> /*
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index ac4942f8e83e..7d9c5a65f974 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -162,6 +162,7 @@ struct f2fs_mount_info {
>> int fs_mode; /* fs mode: LFS or ADAPTIVE */
>> int bggc_mode; /* bggc mode: off, on or sync */
>> int memory_mode; /* memory mode */
>> + int errors; /* errors parameter */
>> int discard_unit; /*
>> * discard command's offset/size should
>> * be aligned to this unit: block,
>> @@ -1370,6 +1371,12 @@ enum {
>> MEMORY_MODE_LOW, /* memory mode for low memry devices */
>> };
>>
>> +enum errors_option {
>> + MOUNT_ERRORS_READONLY, /* remount fs ro on errors */
>> + MOUNT_ERRORS_CONTINUE, /* continue on errors */
>> + MOUNT_ERRORS_PANIC, /* panic on errors */
>> +};
>> +
>> static inline int f2fs_test_bit(unsigned int nr, char *addr);
>> static inline void f2fs_set_bit(unsigned int nr, char *addr);
>> static inline void f2fs_clear_bit(unsigned int nr, char *addr);
>> @@ -1786,8 +1793,14 @@ struct f2fs_sb_info {
>>
>> struct workqueue_struct *post_read_wq; /* post read workqueue */
>>
>> - unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
>> - spinlock_t error_lock; /* protect errors array */
>> + /*
>> + * If we are in irq context, let's update error information into
>> + * on-disk superblock in the work.
>> + */
>> + struct work_struct s_error_work;
>> + unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
>> + unsigned char stop_reason[MAX_STOP_REASON]; /* stop reason */
>> + spinlock_t error_lock; /* protect errors/stop_reason array */
>> bool error_dirty; /* errors of sb is dirty */
>>
>> struct kmem_cache *inline_xattr_slab; /* inline xattr entry */
>> @@ -3522,8 +3535,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
>> int f2fs_quota_sync(struct super_block *sb, int type);
>> loff_t max_file_blocks(struct inode *inode);
>> void f2fs_quota_off_umount(struct super_block *sb);
>> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
>> void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
>> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
>> + bool irq_context);
>> void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
>> int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
>> int f2fs_sync_fs(struct super_block *sb, int sync);
>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>> index 14e9a20e68df..10743e864d57 100644
>> --- a/fs/f2fs/file.c
>> +++ b/fs/f2fs/file.c
>> @@ -2221,7 +2221,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>> ret = 0;
>> f2fs_stop_checkpoint(sbi, false,
>> STOP_CP_REASON_SHUTDOWN);
>> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>> trace_f2fs_shutdown(sbi, in, ret);
>> }
>> return ret;
>> @@ -2234,7 +2233,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>> if (ret)
>> goto out;
>> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>> thaw_bdev(sb->s_bdev);
>> break;
>> case F2FS_GOING_DOWN_METASYNC:
>> @@ -2243,16 +2241,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>> if (ret)
>> goto out;
>> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>> break;
>> case F2FS_GOING_DOWN_NOSYNC:
>> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>> break;
>> case F2FS_GOING_DOWN_METAFLUSH:
>> f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
>> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>> break;
>> case F2FS_GOING_DOWN_NEED_FSCK:
>> set_sbi_flag(sbi, SBI_NEED_FSCK);
>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>> index f1d0dd9c5a6c..6dc2bc710ba8 100644
>> --- a/fs/f2fs/gc.c
>> +++ b/fs/f2fs/gc.c
>> @@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
>> if (gc_th->gc_wake)
>> gc_th->gc_wake = false;
>>
>> - if (try_to_freeze()) {
>> + if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
>> stat_other_skip_bggc_count(sbi);
>> continue;
>> }
>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>> index 23574894e170..1573bf123197 100644
>> --- a/fs/f2fs/super.c
>> +++ b/fs/f2fs/super.c
>> @@ -164,6 +164,7 @@ enum {
>> Opt_discard_unit,
>> Opt_memory_mode,
>> Opt_age_extent_cache,
>> + Opt_errors,
>> Opt_err,
>> };
>>
>> @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
>> {Opt_discard_unit, "discard_unit=%s"},
>> {Opt_memory_mode, "memory=%s"},
>> {Opt_age_extent_cache, "age_extent_cache"},
>> + {Opt_errors, "errors=%s"},
>> {Opt_err, NULL},
>> };
>>
>> @@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>> case Opt_age_extent_cache:
>> set_opt(sbi, AGE_EXTENT_CACHE);
>> break;
>> + case Opt_errors:
>> + name = match_strdup(&args[0]);
>> + if (!name)
>> + return -ENOMEM;
>> + if (!strcmp(name, "remount-ro")) {
>> + F2FS_OPTION(sbi).errors =
>> + MOUNT_ERRORS_READONLY;
>> + } else if (!strcmp(name, "continue")) {
>> + F2FS_OPTION(sbi).errors =
>> + MOUNT_ERRORS_CONTINUE;
>> + } else if (!strcmp(name, "panic")) {
>> + F2FS_OPTION(sbi).errors =
>> + MOUNT_ERRORS_PANIC;
>> + } else {
>> + kfree(name);
>> + return -EINVAL;
>> + }
>> + kfree(name);
>> + break;
>> default:
>> f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
>> p);
>> @@ -1623,6 +1644,9 @@ static void f2fs_put_super(struct super_block *sb)
>> f2fs_destroy_node_manager(sbi);
>> f2fs_destroy_segment_manager(sbi);
>>
>> + /* flush s_error_work before sbi destroy */
>> + flush_work(&sbi->s_error_work);
>> +
>> f2fs_destroy_post_read_wq(sbi);
>>
>> kvfree(sbi->ckpt);
>> @@ -2053,6 +2077,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>> else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
>> seq_printf(seq, ",memory=%s", "low");
>>
>> + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
>> + seq_printf(seq, ",errors=%s", "remount-ro");
>> + else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
>> + seq_printf(seq, ",errors=%s", "continue");
>> + else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
>> + seq_printf(seq, ",errors=%s", "panic");
>> +
>> return 0;
>> }
>>
>> @@ -2081,6 +2112,7 @@ static void default_options(struct f2fs_sb_info *sbi)
>> }
>> F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
>> F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
>> + F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;
>
> Was the default MOUNT_ERRORS_CONTINUE?

Okay, let me revise in v9.

Please note that main difference between continue mode and readonly mode once
cp_error is set:

continue mode readonly mode
access ops: Normal Normal
modify ops: -EIO -EROFS
mount option rw ro

Thanks,

>
>>
>> sbi->sb->s_flags &= ~SB_INLINECRYPT;
>>
>> @@ -2282,6 +2314,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>> if (err)
>> goto restore_opts;
>>
>> + /* flush outstanding errors before changing fs state */
>> + flush_work(&sbi->s_error_work);
>> +
>> /*
>> * Previous and new state of filesystem is RO,
>> * so skip checking GC and FLUSH_MERGE conditions.
>> @@ -3931,45 +3966,60 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
>> return err;
>> }
>>
>> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
>> +static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
>> +{
>> + unsigned long flags;
>> +
>> + spin_lock_irqsave(&sbi->error_lock, flags);
>> + if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
>> + sbi->stop_reason[reason]++;
>> + spin_unlock_irqrestore(&sbi->error_lock, flags);
>> +}
>> +
>> +static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
>> {
>> struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
>> + unsigned long flags;
>> int err;
>>
>> f2fs_down_write(&sbi->sb_lock);
>>
>> - if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
>> - raw_super->s_stop_reason[reason]++;
>> + spin_lock_irqsave(&sbi->error_lock, flags);
>> + memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
>> + spin_unlock_irqrestore(&sbi->error_lock, flags);
>>
>> err = f2fs_commit_super(sbi, false);
>> - if (err)
>> - f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
>> - reason, err);
>> +
>> f2fs_up_write(&sbi->sb_lock);
>> + if (err)
>> + f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
>> }
>>
>> void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
>> {
>> - spin_lock(&sbi->error_lock);
>> + unsigned long flags;
>> +
>> + spin_lock_irqsave(&sbi->error_lock, flags);
>> if (!test_bit(flag, (unsigned long *)sbi->errors)) {
>> set_bit(flag, (unsigned long *)sbi->errors);
>> sbi->error_dirty = true;
>> }
>> - spin_unlock(&sbi->error_lock);
>> + spin_unlock_irqrestore(&sbi->error_lock, flags);
>> }
>>
>> static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
>> {
>> + unsigned long flags;
>> bool need_update = false;
>>
>> - spin_lock(&sbi->error_lock);
>> + spin_lock_irqsave(&sbi->error_lock, flags);
>> if (sbi->error_dirty) {
>> memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
>> MAX_F2FS_ERRORS);
>> sbi->error_dirty = false;
>> need_update = true;
>> }
>> - spin_unlock(&sbi->error_lock);
>> + spin_unlock_irqrestore(&sbi->error_lock, flags);
>>
>> return need_update;
>> }
>> @@ -3993,6 +4043,66 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
>> f2fs_up_write(&sbi->sb_lock);
>> }
>>
>> +static bool system_going_down(void)
>> +{
>> + return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
>> + || system_state == SYSTEM_RESTART;
>> +}
>> +
>> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
>> + bool irq_context)
>> +{
>> + struct super_block *sb = sbi->sb;
>> + bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
>> + bool continue_fs = !shutdown &&
>> + F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
>> +
>> + set_ckpt_flags(sbi, CP_ERROR_FLAG);
>> +
>> + if (!f2fs_hw_is_readonly(sbi)) {
>> + save_stop_reason(sbi, reason);
>> +
>> + if (irq_context && !shutdown)
>> + schedule_work(&sbi->s_error_work);
>> + else
>> + f2fs_record_stop_reason(sbi);
>> + }
>> +
>> + /*
>> + * We force ERRORS_RO behavior when system is rebooting. Otherwise we
>> + * could panic during 'reboot -f' as the underlying device got already
>> + * disabled.
>> + */
>> + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
>> + !shutdown && !system_going_down() &&
>> + !is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
>> + panic("F2FS-fs (device %s): panic forced after error\n",
>> + sb->s_id);
>> +
>> + if (shutdown)
>> + set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>> +
>> + /* continue filesystem operators if errors=continue */
>> + if (continue_fs || f2fs_readonly(sb))
>> + return;
>> +
>> + f2fs_warn(sbi, "Remounting filesystem read-only");
>> + /*
>> + * Make sure updated value of ->s_mount_flags will be visible before
>> + * ->s_flags update
>> + */
>> + smp_wmb();
>> + sb->s_flags |= SB_RDONLY;
>> +}
>> +
>> +static void f2fs_record_error_work(struct work_struct *work)
>> +{
>> + struct f2fs_sb_info *sbi = container_of(work,
>> + struct f2fs_sb_info, s_error_work);
>> +
>> + f2fs_record_stop_reason(sbi);
>> +}
>> +
>> static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
>> {
>> struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
>> @@ -4223,7 +4333,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>> sb->s_fs_info = sbi;
>> sbi->raw_super = raw_super;
>>
>> + INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
>> memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
>> + memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);
>>
>> /* precompute checksum seed for metadata */
>> if (f2fs_sb_has_inode_chksum(sbi))
>> @@ -4620,6 +4732,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>> f2fs_destroy_segment_manager(sbi);
>> stop_ckpt_thread:
>> f2fs_stop_ckpt_thread(sbi);
>> + /* flush s_error_work before sbi destroy */
>> + flush_work(&sbi->s_error_work);
>> f2fs_destroy_post_read_wq(sbi);
>> free_devices:
>> destroy_device_list(sbi);
>> --
>> 2.36.1

2023-04-20 16:36:23

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH v8] f2fs: support errors=remount-ro|continue|panic mountoption

On 04/20, Chao Yu wrote:
> On 2023/4/20 4:19, Jaegeuk Kim wrote:
> > On 04/08, Chao Yu wrote:
> > > This patch supports errors=remount-ro|continue|panic mount option,
> > > by default it uses "remount-ro" mode.
> > >
> > > Signed-off-by: Chao Yu <[email protected]>
> > > Signed-off-by: Yangtao Li <[email protected]>
> > > ---
> > > v8:
> > > - record stop reason synchronously if it's from ioctl shutdown interface
> > > - fix typo in doc and commit message.
> > > - use f2fs_hw_is_readonly() instead of bdev_read_only()
> > > Documentation/filesystems/f2fs.rst | 5 ++
> > > fs/f2fs/checkpoint.c | 7 +-
> > > fs/f2fs/f2fs.h | 20 ++++-
> > > fs/f2fs/file.c | 5 --
> > > fs/f2fs/gc.c | 2 +-
> > > fs/f2fs/super.c | 134 ++++++++++++++++++++++++++---
> > > 6 files changed, 149 insertions(+), 24 deletions(-)
> > >
> > > diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
> > > index 2055e72871fe..ba4c02fad647 100644
> > > --- a/Documentation/filesystems/f2fs.rst
> > > +++ b/Documentation/filesystems/f2fs.rst
> > > @@ -351,6 +351,11 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records
> > > data block update frequency of the extent per inode, in
> > > order to provide better temperature hints for data block
> > > allocation.
> > > +errors=%s Specify f2fs behavior on critical errors. This supports modes:
> > > + "panic", "continue" and "remount-ro", respectively, trigger
> > > + panic immediately, continue without doing anything, and remount
> > > + the partition in read-only mode. By default it uses "remount-ro"
> > > + mode.
> > > ======================== ============================================================
> > > Debugfs Entries
> > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > > index 73ae4e85e70d..e6b266bb9ce0 100644
> > > --- a/fs/f2fs/checkpoint.c
> > > +++ b/fs/f2fs/checkpoint.c
> > > @@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
> > > unsigned char reason)
> > > {
> > > f2fs_build_fault_attr(sbi, 0, 0);
> > > - set_ckpt_flags(sbi, CP_ERROR_FLAG);
> > > - if (!end_io) {
> > > + if (!end_io)
> > > f2fs_flush_merged_writes(sbi);
> > > -
> > > - f2fs_handle_stop(sbi, reason);
> > > - }
> > > + f2fs_handle_critical_error(sbi, reason, end_io);
> > > }
> > > /*
> > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > index ac4942f8e83e..7d9c5a65f974 100644
> > > --- a/fs/f2fs/f2fs.h
> > > +++ b/fs/f2fs/f2fs.h
> > > @@ -162,6 +162,7 @@ struct f2fs_mount_info {
> > > int fs_mode; /* fs mode: LFS or ADAPTIVE */
> > > int bggc_mode; /* bggc mode: off, on or sync */
> > > int memory_mode; /* memory mode */
> > > + int errors; /* errors parameter */
> > > int discard_unit; /*
> > > * discard command's offset/size should
> > > * be aligned to this unit: block,
> > > @@ -1370,6 +1371,12 @@ enum {
> > > MEMORY_MODE_LOW, /* memory mode for low memry devices */
> > > };
> > > +enum errors_option {
> > > + MOUNT_ERRORS_READONLY, /* remount fs ro on errors */
> > > + MOUNT_ERRORS_CONTINUE, /* continue on errors */
> > > + MOUNT_ERRORS_PANIC, /* panic on errors */
> > > +};
> > > +
> > > static inline int f2fs_test_bit(unsigned int nr, char *addr);
> > > static inline void f2fs_set_bit(unsigned int nr, char *addr);
> > > static inline void f2fs_clear_bit(unsigned int nr, char *addr);
> > > @@ -1786,8 +1793,14 @@ struct f2fs_sb_info {
> > > struct workqueue_struct *post_read_wq; /* post read workqueue */
> > > - unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
> > > - spinlock_t error_lock; /* protect errors array */
> > > + /*
> > > + * If we are in irq context, let's update error information into
> > > + * on-disk superblock in the work.
> > > + */
> > > + struct work_struct s_error_work;
> > > + unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
> > > + unsigned char stop_reason[MAX_STOP_REASON]; /* stop reason */
> > > + spinlock_t error_lock; /* protect errors/stop_reason array */
> > > bool error_dirty; /* errors of sb is dirty */
> > > struct kmem_cache *inline_xattr_slab; /* inline xattr entry */
> > > @@ -3522,8 +3535,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
> > > int f2fs_quota_sync(struct super_block *sb, int type);
> > > loff_t max_file_blocks(struct inode *inode);
> > > void f2fs_quota_off_umount(struct super_block *sb);
> > > -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
> > > void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
> > > +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
> > > + bool irq_context);
> > > void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
> > > int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
> > > int f2fs_sync_fs(struct super_block *sb, int sync);
> > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > index 14e9a20e68df..10743e864d57 100644
> > > --- a/fs/f2fs/file.c
> > > +++ b/fs/f2fs/file.c
> > > @@ -2221,7 +2221,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> > > ret = 0;
> > > f2fs_stop_checkpoint(sbi, false,
> > > STOP_CP_REASON_SHUTDOWN);
> > > - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > trace_f2fs_shutdown(sbi, in, ret);
> > > }
> > > return ret;
> > > @@ -2234,7 +2233,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> > > if (ret)
> > > goto out;
> > > f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> > > - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > thaw_bdev(sb->s_bdev);
> > > break;
> > > case F2FS_GOING_DOWN_METASYNC:
> > > @@ -2243,16 +2241,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> > > if (ret)
> > > goto out;
> > > f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> > > - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > break;
> > > case F2FS_GOING_DOWN_NOSYNC:
> > > f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> > > - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > break;
> > > case F2FS_GOING_DOWN_METAFLUSH:
> > > f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
> > > f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> > > - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > break;
> > > case F2FS_GOING_DOWN_NEED_FSCK:
> > > set_sbi_flag(sbi, SBI_NEED_FSCK);
> > > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > > index f1d0dd9c5a6c..6dc2bc710ba8 100644
> > > --- a/fs/f2fs/gc.c
> > > +++ b/fs/f2fs/gc.c
> > > @@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
> > > if (gc_th->gc_wake)
> > > gc_th->gc_wake = false;
> > > - if (try_to_freeze()) {
> > > + if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
> > > stat_other_skip_bggc_count(sbi);
> > > continue;
> > > }
> > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > > index 23574894e170..1573bf123197 100644
> > > --- a/fs/f2fs/super.c
> > > +++ b/fs/f2fs/super.c
> > > @@ -164,6 +164,7 @@ enum {
> > > Opt_discard_unit,
> > > Opt_memory_mode,
> > > Opt_age_extent_cache,
> > > + Opt_errors,
> > > Opt_err,
> > > };
> > > @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
> > > {Opt_discard_unit, "discard_unit=%s"},
> > > {Opt_memory_mode, "memory=%s"},
> > > {Opt_age_extent_cache, "age_extent_cache"},
> > > + {Opt_errors, "errors=%s"},
> > > {Opt_err, NULL},
> > > };
> > > @@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> > > case Opt_age_extent_cache:
> > > set_opt(sbi, AGE_EXTENT_CACHE);
> > > break;
> > > + case Opt_errors:
> > > + name = match_strdup(&args[0]);
> > > + if (!name)
> > > + return -ENOMEM;
> > > + if (!strcmp(name, "remount-ro")) {
> > > + F2FS_OPTION(sbi).errors =
> > > + MOUNT_ERRORS_READONLY;
> > > + } else if (!strcmp(name, "continue")) {
> > > + F2FS_OPTION(sbi).errors =
> > > + MOUNT_ERRORS_CONTINUE;
> > > + } else if (!strcmp(name, "panic")) {
> > > + F2FS_OPTION(sbi).errors =
> > > + MOUNT_ERRORS_PANIC;
> > > + } else {
> > > + kfree(name);
> > > + return -EINVAL;
> > > + }
> > > + kfree(name);
> > > + break;
> > > default:
> > > f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
> > > p);
> > > @@ -1623,6 +1644,9 @@ static void f2fs_put_super(struct super_block *sb)
> > > f2fs_destroy_node_manager(sbi);
> > > f2fs_destroy_segment_manager(sbi);
> > > + /* flush s_error_work before sbi destroy */
> > > + flush_work(&sbi->s_error_work);
> > > +
> > > f2fs_destroy_post_read_wq(sbi);
> > > kvfree(sbi->ckpt);
> > > @@ -2053,6 +2077,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
> > > else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
> > > seq_printf(seq, ",memory=%s", "low");
> > > + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
> > > + seq_printf(seq, ",errors=%s", "remount-ro");
> > > + else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
> > > + seq_printf(seq, ",errors=%s", "continue");
> > > + else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
> > > + seq_printf(seq, ",errors=%s", "panic");
> > > +
> > > return 0;
> > > }
> > > @@ -2081,6 +2112,7 @@ static void default_options(struct f2fs_sb_info *sbi)
> > > }
> > > F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
> > > F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
> > > + F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;
> >
> > Was the default MOUNT_ERRORS_CONTINUE?
>
> Okay, let me revise in v9.
>
> Please note that main difference between continue mode and readonly mode once
> cp_error is set:
>
> continue mode readonly mode
> access ops: Normal Normal
> modify ops: -EIO -EROFS
> mount option rw ro

Could you please add the difference of all the modes in patch description?

I think it'd be good to add:
1. access ops
2. syscall errors
3. mount option
4. pending data writes
5. pending node writes
6. pending meta writes


>
> Thanks,
>
> >
> > > sbi->sb->s_flags &= ~SB_INLINECRYPT;
> > > @@ -2282,6 +2314,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
> > > if (err)
> > > goto restore_opts;
> > > + /* flush outstanding errors before changing fs state */
> > > + flush_work(&sbi->s_error_work);
> > > +
> > > /*
> > > * Previous and new state of filesystem is RO,
> > > * so skip checking GC and FLUSH_MERGE conditions.
> > > @@ -3931,45 +3966,60 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
> > > return err;
> > > }
> > > -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
> > > +static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
> > > +{
> > > + unsigned long flags;
> > > +
> > > + spin_lock_irqsave(&sbi->error_lock, flags);
> > > + if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
> > > + sbi->stop_reason[reason]++;
> > > + spin_unlock_irqrestore(&sbi->error_lock, flags);
> > > +}
> > > +
> > > +static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
> > > {
> > > struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> > > + unsigned long flags;
> > > int err;
> > > f2fs_down_write(&sbi->sb_lock);
> > > - if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
> > > - raw_super->s_stop_reason[reason]++;
> > > + spin_lock_irqsave(&sbi->error_lock, flags);
> > > + memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
> > > + spin_unlock_irqrestore(&sbi->error_lock, flags);
> > > err = f2fs_commit_super(sbi, false);
> > > - if (err)
> > > - f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
> > > - reason, err);
> > > +
> > > f2fs_up_write(&sbi->sb_lock);
> > > + if (err)
> > > + f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
> > > }
> > > void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
> > > {
> > > - spin_lock(&sbi->error_lock);
> > > + unsigned long flags;
> > > +
> > > + spin_lock_irqsave(&sbi->error_lock, flags);
> > > if (!test_bit(flag, (unsigned long *)sbi->errors)) {
> > > set_bit(flag, (unsigned long *)sbi->errors);
> > > sbi->error_dirty = true;
> > > }
> > > - spin_unlock(&sbi->error_lock);
> > > + spin_unlock_irqrestore(&sbi->error_lock, flags);
> > > }
> > > static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
> > > {
> > > + unsigned long flags;
> > > bool need_update = false;
> > > - spin_lock(&sbi->error_lock);
> > > + spin_lock_irqsave(&sbi->error_lock, flags);
> > > if (sbi->error_dirty) {
> > > memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
> > > MAX_F2FS_ERRORS);
> > > sbi->error_dirty = false;
> > > need_update = true;
> > > }
> > > - spin_unlock(&sbi->error_lock);
> > > + spin_unlock_irqrestore(&sbi->error_lock, flags);
> > > return need_update;
> > > }
> > > @@ -3993,6 +4043,66 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
> > > f2fs_up_write(&sbi->sb_lock);
> > > }
> > > +static bool system_going_down(void)
> > > +{
> > > + return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
> > > + || system_state == SYSTEM_RESTART;
> > > +}
> > > +
> > > +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
> > > + bool irq_context)
> > > +{
> > > + struct super_block *sb = sbi->sb;
> > > + bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
> > > + bool continue_fs = !shutdown &&
> > > + F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
> > > +
> > > + set_ckpt_flags(sbi, CP_ERROR_FLAG);
> > > +
> > > + if (!f2fs_hw_is_readonly(sbi)) {
> > > + save_stop_reason(sbi, reason);
> > > +
> > > + if (irq_context && !shutdown)
> > > + schedule_work(&sbi->s_error_work);
> > > + else
> > > + f2fs_record_stop_reason(sbi);
> > > + }
> > > +
> > > + /*
> > > + * We force ERRORS_RO behavior when system is rebooting. Otherwise we
> > > + * could panic during 'reboot -f' as the underlying device got already
> > > + * disabled.
> > > + */
> > > + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
> > > + !shutdown && !system_going_down() &&
> > > + !is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
> > > + panic("F2FS-fs (device %s): panic forced after error\n",
> > > + sb->s_id);
> > > +
> > > + if (shutdown)
> > > + set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > +
> > > + /* continue filesystem operators if errors=continue */
> > > + if (continue_fs || f2fs_readonly(sb))
> > > + return;
> > > +
> > > + f2fs_warn(sbi, "Remounting filesystem read-only");
> > > + /*
> > > + * Make sure updated value of ->s_mount_flags will be visible before
> > > + * ->s_flags update
> > > + */
> > > + smp_wmb();
> > > + sb->s_flags |= SB_RDONLY;
> > > +}
> > > +
> > > +static void f2fs_record_error_work(struct work_struct *work)
> > > +{
> > > + struct f2fs_sb_info *sbi = container_of(work,
> > > + struct f2fs_sb_info, s_error_work);
> > > +
> > > + f2fs_record_stop_reason(sbi);
> > > +}
> > > +
> > > static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
> > > {
> > > struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> > > @@ -4223,7 +4333,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> > > sb->s_fs_info = sbi;
> > > sbi->raw_super = raw_super;
> > > + INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
> > > memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
> > > + memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);
> > > /* precompute checksum seed for metadata */
> > > if (f2fs_sb_has_inode_chksum(sbi))
> > > @@ -4620,6 +4732,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> > > f2fs_destroy_segment_manager(sbi);
> > > stop_ckpt_thread:
> > > f2fs_stop_ckpt_thread(sbi);
> > > + /* flush s_error_work before sbi destroy */
> > > + flush_work(&sbi->s_error_work);
> > > f2fs_destroy_post_read_wq(sbi);
> > > free_devices:
> > > destroy_device_list(sbi);
> > > --
> > > 2.36.1

2023-04-21 02:02:15

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v8] f2fs: support errors=remount-ro|continue|panic mountoption

On 2023/4/21 0:34, Jaegeuk Kim wrote:
> On 04/20, Chao Yu wrote:
>> On 2023/4/20 4:19, Jaegeuk Kim wrote:
>>> On 04/08, Chao Yu wrote:
>>>> This patch supports errors=remount-ro|continue|panic mount option,
>>>> by default it uses "remount-ro" mode.
>>>>
>>>> Signed-off-by: Chao Yu <[email protected]>
>>>> Signed-off-by: Yangtao Li <[email protected]>
>>>> ---
>>>> v8:
>>>> - record stop reason synchronously if it's from ioctl shutdown interface
>>>> - fix typo in doc and commit message.
>>>> - use f2fs_hw_is_readonly() instead of bdev_read_only()
>>>> Documentation/filesystems/f2fs.rst | 5 ++
>>>> fs/f2fs/checkpoint.c | 7 +-
>>>> fs/f2fs/f2fs.h | 20 ++++-
>>>> fs/f2fs/file.c | 5 --
>>>> fs/f2fs/gc.c | 2 +-
>>>> fs/f2fs/super.c | 134 ++++++++++++++++++++++++++---
>>>> 6 files changed, 149 insertions(+), 24 deletions(-)
>>>>
>>>> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
>>>> index 2055e72871fe..ba4c02fad647 100644
>>>> --- a/Documentation/filesystems/f2fs.rst
>>>> +++ b/Documentation/filesystems/f2fs.rst
>>>> @@ -351,6 +351,11 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records
>>>> data block update frequency of the extent per inode, in
>>>> order to provide better temperature hints for data block
>>>> allocation.
>>>> +errors=%s Specify f2fs behavior on critical errors. This supports modes:
>>>> + "panic", "continue" and "remount-ro", respectively, trigger
>>>> + panic immediately, continue without doing anything, and remount
>>>> + the partition in read-only mode. By default it uses "remount-ro"
>>>> + mode.
>>>> ======================== ============================================================
>>>> Debugfs Entries
>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>> index 73ae4e85e70d..e6b266bb9ce0 100644
>>>> --- a/fs/f2fs/checkpoint.c
>>>> +++ b/fs/f2fs/checkpoint.c
>>>> @@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
>>>> unsigned char reason)
>>>> {
>>>> f2fs_build_fault_attr(sbi, 0, 0);
>>>> - set_ckpt_flags(sbi, CP_ERROR_FLAG);
>>>> - if (!end_io) {
>>>> + if (!end_io)
>>>> f2fs_flush_merged_writes(sbi);
>>>> -
>>>> - f2fs_handle_stop(sbi, reason);
>>>> - }
>>>> + f2fs_handle_critical_error(sbi, reason, end_io);
>>>> }
>>>> /*
>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>> index ac4942f8e83e..7d9c5a65f974 100644
>>>> --- a/fs/f2fs/f2fs.h
>>>> +++ b/fs/f2fs/f2fs.h
>>>> @@ -162,6 +162,7 @@ struct f2fs_mount_info {
>>>> int fs_mode; /* fs mode: LFS or ADAPTIVE */
>>>> int bggc_mode; /* bggc mode: off, on or sync */
>>>> int memory_mode; /* memory mode */
>>>> + int errors; /* errors parameter */
>>>> int discard_unit; /*
>>>> * discard command's offset/size should
>>>> * be aligned to this unit: block,
>>>> @@ -1370,6 +1371,12 @@ enum {
>>>> MEMORY_MODE_LOW, /* memory mode for low memry devices */
>>>> };
>>>> +enum errors_option {
>>>> + MOUNT_ERRORS_READONLY, /* remount fs ro on errors */
>>>> + MOUNT_ERRORS_CONTINUE, /* continue on errors */
>>>> + MOUNT_ERRORS_PANIC, /* panic on errors */
>>>> +};
>>>> +
>>>> static inline int f2fs_test_bit(unsigned int nr, char *addr);
>>>> static inline void f2fs_set_bit(unsigned int nr, char *addr);
>>>> static inline void f2fs_clear_bit(unsigned int nr, char *addr);
>>>> @@ -1786,8 +1793,14 @@ struct f2fs_sb_info {
>>>> struct workqueue_struct *post_read_wq; /* post read workqueue */
>>>> - unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
>>>> - spinlock_t error_lock; /* protect errors array */
>>>> + /*
>>>> + * If we are in irq context, let's update error information into
>>>> + * on-disk superblock in the work.
>>>> + */
>>>> + struct work_struct s_error_work;
>>>> + unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
>>>> + unsigned char stop_reason[MAX_STOP_REASON]; /* stop reason */
>>>> + spinlock_t error_lock; /* protect errors/stop_reason array */
>>>> bool error_dirty; /* errors of sb is dirty */
>>>> struct kmem_cache *inline_xattr_slab; /* inline xattr entry */
>>>> @@ -3522,8 +3535,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
>>>> int f2fs_quota_sync(struct super_block *sb, int type);
>>>> loff_t max_file_blocks(struct inode *inode);
>>>> void f2fs_quota_off_umount(struct super_block *sb);
>>>> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
>>>> void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
>>>> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
>>>> + bool irq_context);
>>>> void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
>>>> int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
>>>> int f2fs_sync_fs(struct super_block *sb, int sync);
>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>> index 14e9a20e68df..10743e864d57 100644
>>>> --- a/fs/f2fs/file.c
>>>> +++ b/fs/f2fs/file.c
>>>> @@ -2221,7 +2221,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>> ret = 0;
>>>> f2fs_stop_checkpoint(sbi, false,
>>>> STOP_CP_REASON_SHUTDOWN);
>>>> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>> trace_f2fs_shutdown(sbi, in, ret);
>>>> }
>>>> return ret;
>>>> @@ -2234,7 +2233,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>> if (ret)
>>>> goto out;
>>>> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>> thaw_bdev(sb->s_bdev);
>>>> break;
>>>> case F2FS_GOING_DOWN_METASYNC:
>>>> @@ -2243,16 +2241,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>> if (ret)
>>>> goto out;
>>>> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>> break;
>>>> case F2FS_GOING_DOWN_NOSYNC:
>>>> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>> break;
>>>> case F2FS_GOING_DOWN_METAFLUSH:
>>>> f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
>>>> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>> break;
>>>> case F2FS_GOING_DOWN_NEED_FSCK:
>>>> set_sbi_flag(sbi, SBI_NEED_FSCK);
>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>> index f1d0dd9c5a6c..6dc2bc710ba8 100644
>>>> --- a/fs/f2fs/gc.c
>>>> +++ b/fs/f2fs/gc.c
>>>> @@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
>>>> if (gc_th->gc_wake)
>>>> gc_th->gc_wake = false;
>>>> - if (try_to_freeze()) {
>>>> + if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
>>>> stat_other_skip_bggc_count(sbi);
>>>> continue;
>>>> }
>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>> index 23574894e170..1573bf123197 100644
>>>> --- a/fs/f2fs/super.c
>>>> +++ b/fs/f2fs/super.c
>>>> @@ -164,6 +164,7 @@ enum {
>>>> Opt_discard_unit,
>>>> Opt_memory_mode,
>>>> Opt_age_extent_cache,
>>>> + Opt_errors,
>>>> Opt_err,
>>>> };
>>>> @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
>>>> {Opt_discard_unit, "discard_unit=%s"},
>>>> {Opt_memory_mode, "memory=%s"},
>>>> {Opt_age_extent_cache, "age_extent_cache"},
>>>> + {Opt_errors, "errors=%s"},
>>>> {Opt_err, NULL},
>>>> };
>>>> @@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>>> case Opt_age_extent_cache:
>>>> set_opt(sbi, AGE_EXTENT_CACHE);
>>>> break;
>>>> + case Opt_errors:
>>>> + name = match_strdup(&args[0]);
>>>> + if (!name)
>>>> + return -ENOMEM;
>>>> + if (!strcmp(name, "remount-ro")) {
>>>> + F2FS_OPTION(sbi).errors =
>>>> + MOUNT_ERRORS_READONLY;
>>>> + } else if (!strcmp(name, "continue")) {
>>>> + F2FS_OPTION(sbi).errors =
>>>> + MOUNT_ERRORS_CONTINUE;
>>>> + } else if (!strcmp(name, "panic")) {
>>>> + F2FS_OPTION(sbi).errors =
>>>> + MOUNT_ERRORS_PANIC;
>>>> + } else {
>>>> + kfree(name);
>>>> + return -EINVAL;
>>>> + }
>>>> + kfree(name);
>>>> + break;
>>>> default:
>>>> f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
>>>> p);
>>>> @@ -1623,6 +1644,9 @@ static void f2fs_put_super(struct super_block *sb)
>>>> f2fs_destroy_node_manager(sbi);
>>>> f2fs_destroy_segment_manager(sbi);
>>>> + /* flush s_error_work before sbi destroy */
>>>> + flush_work(&sbi->s_error_work);
>>>> +
>>>> f2fs_destroy_post_read_wq(sbi);
>>>> kvfree(sbi->ckpt);
>>>> @@ -2053,6 +2077,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>>>> else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
>>>> seq_printf(seq, ",memory=%s", "low");
>>>> + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
>>>> + seq_printf(seq, ",errors=%s", "remount-ro");
>>>> + else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
>>>> + seq_printf(seq, ",errors=%s", "continue");
>>>> + else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
>>>> + seq_printf(seq, ",errors=%s", "panic");
>>>> +
>>>> return 0;
>>>> }
>>>> @@ -2081,6 +2112,7 @@ static void default_options(struct f2fs_sb_info *sbi)
>>>> }
>>>> F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
>>>> F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
>>>> + F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;
>>>
>>> Was the default MOUNT_ERRORS_CONTINUE?
>>
>> Okay, let me revise in v9.
>>
>> Please note that main difference between continue mode and readonly mode once
>> cp_error is set:
>>
>> continue mode readonly mode
>> access ops: Normal Normal
>> modify ops: -EIO -EROFS
>> mount option rw ro
>
> Could you please add the difference of all the modes in patch description?
>
> I think it'd be good to add:
> 1. access ops
> 2. syscall errors
> 3. mount option
> 4. pending data writes
> 5. pending node writes
> 6. pending meta writes

Sure, I'd like to add below description into f2fs.rst, any comments?

continue remount-ro panic
access ops normal noraml N/A
syscall errors -EIO -EROFS N/A
mount option rw ro N/A
pending dir write keep keep N/A
pending nondir write drop drop N/A
pending node write drop drop N/A
pending meta write keep keep N/A

Thanks,

>
>
>>
>> Thanks,
>>
>>>
>>>> sbi->sb->s_flags &= ~SB_INLINECRYPT;
>>>> @@ -2282,6 +2314,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>>>> if (err)
>>>> goto restore_opts;
>>>> + /* flush outstanding errors before changing fs state */
>>>> + flush_work(&sbi->s_error_work);
>>>> +
>>>> /*
>>>> * Previous and new state of filesystem is RO,
>>>> * so skip checking GC and FLUSH_MERGE conditions.
>>>> @@ -3931,45 +3966,60 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
>>>> return err;
>>>> }
>>>> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
>>>> +static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
>>>> +{
>>>> + unsigned long flags;
>>>> +
>>>> + spin_lock_irqsave(&sbi->error_lock, flags);
>>>> + if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
>>>> + sbi->stop_reason[reason]++;
>>>> + spin_unlock_irqrestore(&sbi->error_lock, flags);
>>>> +}
>>>> +
>>>> +static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
>>>> {
>>>> struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
>>>> + unsigned long flags;
>>>> int err;
>>>> f2fs_down_write(&sbi->sb_lock);
>>>> - if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
>>>> - raw_super->s_stop_reason[reason]++;
>>>> + spin_lock_irqsave(&sbi->error_lock, flags);
>>>> + memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
>>>> + spin_unlock_irqrestore(&sbi->error_lock, flags);
>>>> err = f2fs_commit_super(sbi, false);
>>>> - if (err)
>>>> - f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
>>>> - reason, err);
>>>> +
>>>> f2fs_up_write(&sbi->sb_lock);
>>>> + if (err)
>>>> + f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
>>>> }
>>>> void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
>>>> {
>>>> - spin_lock(&sbi->error_lock);
>>>> + unsigned long flags;
>>>> +
>>>> + spin_lock_irqsave(&sbi->error_lock, flags);
>>>> if (!test_bit(flag, (unsigned long *)sbi->errors)) {
>>>> set_bit(flag, (unsigned long *)sbi->errors);
>>>> sbi->error_dirty = true;
>>>> }
>>>> - spin_unlock(&sbi->error_lock);
>>>> + spin_unlock_irqrestore(&sbi->error_lock, flags);
>>>> }
>>>> static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
>>>> {
>>>> + unsigned long flags;
>>>> bool need_update = false;
>>>> - spin_lock(&sbi->error_lock);
>>>> + spin_lock_irqsave(&sbi->error_lock, flags);
>>>> if (sbi->error_dirty) {
>>>> memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
>>>> MAX_F2FS_ERRORS);
>>>> sbi->error_dirty = false;
>>>> need_update = true;
>>>> }
>>>> - spin_unlock(&sbi->error_lock);
>>>> + spin_unlock_irqrestore(&sbi->error_lock, flags);
>>>> return need_update;
>>>> }
>>>> @@ -3993,6 +4043,66 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
>>>> f2fs_up_write(&sbi->sb_lock);
>>>> }
>>>> +static bool system_going_down(void)
>>>> +{
>>>> + return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
>>>> + || system_state == SYSTEM_RESTART;
>>>> +}
>>>> +
>>>> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
>>>> + bool irq_context)
>>>> +{
>>>> + struct super_block *sb = sbi->sb;
>>>> + bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
>>>> + bool continue_fs = !shutdown &&
>>>> + F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
>>>> +
>>>> + set_ckpt_flags(sbi, CP_ERROR_FLAG);
>>>> +
>>>> + if (!f2fs_hw_is_readonly(sbi)) {
>>>> + save_stop_reason(sbi, reason);
>>>> +
>>>> + if (irq_context && !shutdown)
>>>> + schedule_work(&sbi->s_error_work);
>>>> + else
>>>> + f2fs_record_stop_reason(sbi);
>>>> + }
>>>> +
>>>> + /*
>>>> + * We force ERRORS_RO behavior when system is rebooting. Otherwise we
>>>> + * could panic during 'reboot -f' as the underlying device got already
>>>> + * disabled.
>>>> + */
>>>> + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
>>>> + !shutdown && !system_going_down() &&
>>>> + !is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
>>>> + panic("F2FS-fs (device %s): panic forced after error\n",
>>>> + sb->s_id);
>>>> +
>>>> + if (shutdown)
>>>> + set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>> +
>>>> + /* continue filesystem operators if errors=continue */
>>>> + if (continue_fs || f2fs_readonly(sb))
>>>> + return;
>>>> +
>>>> + f2fs_warn(sbi, "Remounting filesystem read-only");
>>>> + /*
>>>> + * Make sure updated value of ->s_mount_flags will be visible before
>>>> + * ->s_flags update
>>>> + */
>>>> + smp_wmb();
>>>> + sb->s_flags |= SB_RDONLY;
>>>> +}
>>>> +
>>>> +static void f2fs_record_error_work(struct work_struct *work)
>>>> +{
>>>> + struct f2fs_sb_info *sbi = container_of(work,
>>>> + struct f2fs_sb_info, s_error_work);
>>>> +
>>>> + f2fs_record_stop_reason(sbi);
>>>> +}
>>>> +
>>>> static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
>>>> {
>>>> struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
>>>> @@ -4223,7 +4333,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>> sb->s_fs_info = sbi;
>>>> sbi->raw_super = raw_super;
>>>> + INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
>>>> memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
>>>> + memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);
>>>> /* precompute checksum seed for metadata */
>>>> if (f2fs_sb_has_inode_chksum(sbi))
>>>> @@ -4620,6 +4732,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>> f2fs_destroy_segment_manager(sbi);
>>>> stop_ckpt_thread:
>>>> f2fs_stop_ckpt_thread(sbi);
>>>> + /* flush s_error_work before sbi destroy */
>>>> + flush_work(&sbi->s_error_work);
>>>> f2fs_destroy_post_read_wq(sbi);
>>>> free_devices:
>>>> destroy_device_list(sbi);
>>>> --
>>>> 2.36.1

2023-04-21 19:48:11

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH v8] f2fs: support errors=remount-ro|continue|panic mountoption

On 04/21, Chao Yu wrote:
> On 2023/4/21 0:34, Jaegeuk Kim wrote:
> > On 04/20, Chao Yu wrote:
> > > On 2023/4/20 4:19, Jaegeuk Kim wrote:
> > > > On 04/08, Chao Yu wrote:
> > > > > This patch supports errors=remount-ro|continue|panic mount option,
> > > > > by default it uses "remount-ro" mode.
> > > > >
> > > > > Signed-off-by: Chao Yu <[email protected]>
> > > > > Signed-off-by: Yangtao Li <[email protected]>
> > > > > ---
> > > > > v8:
> > > > > - record stop reason synchronously if it's from ioctl shutdown interface
> > > > > - fix typo in doc and commit message.
> > > > > - use f2fs_hw_is_readonly() instead of bdev_read_only()
> > > > > Documentation/filesystems/f2fs.rst | 5 ++
> > > > > fs/f2fs/checkpoint.c | 7 +-
> > > > > fs/f2fs/f2fs.h | 20 ++++-
> > > > > fs/f2fs/file.c | 5 --
> > > > > fs/f2fs/gc.c | 2 +-
> > > > > fs/f2fs/super.c | 134 ++++++++++++++++++++++++++---
> > > > > 6 files changed, 149 insertions(+), 24 deletions(-)
> > > > >
> > > > > diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
> > > > > index 2055e72871fe..ba4c02fad647 100644
> > > > > --- a/Documentation/filesystems/f2fs.rst
> > > > > +++ b/Documentation/filesystems/f2fs.rst
> > > > > @@ -351,6 +351,11 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records
> > > > > data block update frequency of the extent per inode, in
> > > > > order to provide better temperature hints for data block
> > > > > allocation.
> > > > > +errors=%s Specify f2fs behavior on critical errors. This supports modes:
> > > > > + "panic", "continue" and "remount-ro", respectively, trigger
> > > > > + panic immediately, continue without doing anything, and remount
> > > > > + the partition in read-only mode. By default it uses "remount-ro"
> > > > > + mode.
> > > > > ======================== ============================================================
> > > > > Debugfs Entries
> > > > > diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> > > > > index 73ae4e85e70d..e6b266bb9ce0 100644
> > > > > --- a/fs/f2fs/checkpoint.c
> > > > > +++ b/fs/f2fs/checkpoint.c
> > > > > @@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
> > > > > unsigned char reason)
> > > > > {
> > > > > f2fs_build_fault_attr(sbi, 0, 0);
> > > > > - set_ckpt_flags(sbi, CP_ERROR_FLAG);
> > > > > - if (!end_io) {
> > > > > + if (!end_io)
> > > > > f2fs_flush_merged_writes(sbi);
> > > > > -
> > > > > - f2fs_handle_stop(sbi, reason);
> > > > > - }
> > > > > + f2fs_handle_critical_error(sbi, reason, end_io);
> > > > > }
> > > > > /*
> > > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > > > > index ac4942f8e83e..7d9c5a65f974 100644
> > > > > --- a/fs/f2fs/f2fs.h
> > > > > +++ b/fs/f2fs/f2fs.h
> > > > > @@ -162,6 +162,7 @@ struct f2fs_mount_info {
> > > > > int fs_mode; /* fs mode: LFS or ADAPTIVE */
> > > > > int bggc_mode; /* bggc mode: off, on or sync */
> > > > > int memory_mode; /* memory mode */
> > > > > + int errors; /* errors parameter */
> > > > > int discard_unit; /*
> > > > > * discard command's offset/size should
> > > > > * be aligned to this unit: block,
> > > > > @@ -1370,6 +1371,12 @@ enum {
> > > > > MEMORY_MODE_LOW, /* memory mode for low memry devices */
> > > > > };
> > > > > +enum errors_option {
> > > > > + MOUNT_ERRORS_READONLY, /* remount fs ro on errors */
> > > > > + MOUNT_ERRORS_CONTINUE, /* continue on errors */
> > > > > + MOUNT_ERRORS_PANIC, /* panic on errors */
> > > > > +};
> > > > > +
> > > > > static inline int f2fs_test_bit(unsigned int nr, char *addr);
> > > > > static inline void f2fs_set_bit(unsigned int nr, char *addr);
> > > > > static inline void f2fs_clear_bit(unsigned int nr, char *addr);
> > > > > @@ -1786,8 +1793,14 @@ struct f2fs_sb_info {
> > > > > struct workqueue_struct *post_read_wq; /* post read workqueue */
> > > > > - unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
> > > > > - spinlock_t error_lock; /* protect errors array */
> > > > > + /*
> > > > > + * If we are in irq context, let's update error information into
> > > > > + * on-disk superblock in the work.
> > > > > + */
> > > > > + struct work_struct s_error_work;
> > > > > + unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
> > > > > + unsigned char stop_reason[MAX_STOP_REASON]; /* stop reason */
> > > > > + spinlock_t error_lock; /* protect errors/stop_reason array */
> > > > > bool error_dirty; /* errors of sb is dirty */
> > > > > struct kmem_cache *inline_xattr_slab; /* inline xattr entry */
> > > > > @@ -3522,8 +3535,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
> > > > > int f2fs_quota_sync(struct super_block *sb, int type);
> > > > > loff_t max_file_blocks(struct inode *inode);
> > > > > void f2fs_quota_off_umount(struct super_block *sb);
> > > > > -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
> > > > > void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
> > > > > +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
> > > > > + bool irq_context);
> > > > > void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
> > > > > int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
> > > > > int f2fs_sync_fs(struct super_block *sb, int sync);
> > > > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > > > > index 14e9a20e68df..10743e864d57 100644
> > > > > --- a/fs/f2fs/file.c
> > > > > +++ b/fs/f2fs/file.c
> > > > > @@ -2221,7 +2221,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> > > > > ret = 0;
> > > > > f2fs_stop_checkpoint(sbi, false,
> > > > > STOP_CP_REASON_SHUTDOWN);
> > > > > - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > > > trace_f2fs_shutdown(sbi, in, ret);
> > > > > }
> > > > > return ret;
> > > > > @@ -2234,7 +2233,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> > > > > if (ret)
> > > > > goto out;
> > > > > f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> > > > > - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > > > thaw_bdev(sb->s_bdev);
> > > > > break;
> > > > > case F2FS_GOING_DOWN_METASYNC:
> > > > > @@ -2243,16 +2241,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
> > > > > if (ret)
> > > > > goto out;
> > > > > f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> > > > > - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > > > break;
> > > > > case F2FS_GOING_DOWN_NOSYNC:
> > > > > f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> > > > > - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > > > break;
> > > > > case F2FS_GOING_DOWN_METAFLUSH:
> > > > > f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
> > > > > f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
> > > > > - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > > > break;
> > > > > case F2FS_GOING_DOWN_NEED_FSCK:
> > > > > set_sbi_flag(sbi, SBI_NEED_FSCK);
> > > > > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > > > > index f1d0dd9c5a6c..6dc2bc710ba8 100644
> > > > > --- a/fs/f2fs/gc.c
> > > > > +++ b/fs/f2fs/gc.c
> > > > > @@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
> > > > > if (gc_th->gc_wake)
> > > > > gc_th->gc_wake = false;
> > > > > - if (try_to_freeze()) {
> > > > > + if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
> > > > > stat_other_skip_bggc_count(sbi);
> > > > > continue;
> > > > > }
> > > > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > > > > index 23574894e170..1573bf123197 100644
> > > > > --- a/fs/f2fs/super.c
> > > > > +++ b/fs/f2fs/super.c
> > > > > @@ -164,6 +164,7 @@ enum {
> > > > > Opt_discard_unit,
> > > > > Opt_memory_mode,
> > > > > Opt_age_extent_cache,
> > > > > + Opt_errors,
> > > > > Opt_err,
> > > > > };
> > > > > @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
> > > > > {Opt_discard_unit, "discard_unit=%s"},
> > > > > {Opt_memory_mode, "memory=%s"},
> > > > > {Opt_age_extent_cache, "age_extent_cache"},
> > > > > + {Opt_errors, "errors=%s"},
> > > > > {Opt_err, NULL},
> > > > > };
> > > > > @@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> > > > > case Opt_age_extent_cache:
> > > > > set_opt(sbi, AGE_EXTENT_CACHE);
> > > > > break;
> > > > > + case Opt_errors:
> > > > > + name = match_strdup(&args[0]);
> > > > > + if (!name)
> > > > > + return -ENOMEM;
> > > > > + if (!strcmp(name, "remount-ro")) {
> > > > > + F2FS_OPTION(sbi).errors =
> > > > > + MOUNT_ERRORS_READONLY;
> > > > > + } else if (!strcmp(name, "continue")) {
> > > > > + F2FS_OPTION(sbi).errors =
> > > > > + MOUNT_ERRORS_CONTINUE;
> > > > > + } else if (!strcmp(name, "panic")) {
> > > > > + F2FS_OPTION(sbi).errors =
> > > > > + MOUNT_ERRORS_PANIC;
> > > > > + } else {
> > > > > + kfree(name);
> > > > > + return -EINVAL;
> > > > > + }
> > > > > + kfree(name);
> > > > > + break;
> > > > > default:
> > > > > f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
> > > > > p);
> > > > > @@ -1623,6 +1644,9 @@ static void f2fs_put_super(struct super_block *sb)
> > > > > f2fs_destroy_node_manager(sbi);
> > > > > f2fs_destroy_segment_manager(sbi);
> > > > > + /* flush s_error_work before sbi destroy */
> > > > > + flush_work(&sbi->s_error_work);
> > > > > +
> > > > > f2fs_destroy_post_read_wq(sbi);
> > > > > kvfree(sbi->ckpt);
> > > > > @@ -2053,6 +2077,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
> > > > > else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
> > > > > seq_printf(seq, ",memory=%s", "low");
> > > > > + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
> > > > > + seq_printf(seq, ",errors=%s", "remount-ro");
> > > > > + else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
> > > > > + seq_printf(seq, ",errors=%s", "continue");
> > > > > + else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
> > > > > + seq_printf(seq, ",errors=%s", "panic");
> > > > > +
> > > > > return 0;
> > > > > }
> > > > > @@ -2081,6 +2112,7 @@ static void default_options(struct f2fs_sb_info *sbi)
> > > > > }
> > > > > F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
> > > > > F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
> > > > > + F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;
> > > >
> > > > Was the default MOUNT_ERRORS_CONTINUE?
> > >
> > > Okay, let me revise in v9.
> > >
> > > Please note that main difference between continue mode and readonly mode once
> > > cp_error is set:
> > >
> > > continue mode readonly mode
> > > access ops: Normal Normal
> > > modify ops: -EIO -EROFS
> > > mount option rw ro
> >
> > Could you please add the difference of all the modes in patch description?
> >
> > I think it'd be good to add:
> > 1. access ops
> > 2. syscall errors
> > 3. mount option
> > 4. pending data writes
> > 5. pending node writes
> > 6. pending meta writes
>
> Sure, I'd like to add below description into f2fs.rst, any comments?
>
> continue remount-ro panic
> access ops normal noraml N/A
> syscall errors -EIO -EROFS N/A
> mount option rw ro N/A

So the only difference is the above two, as in any cases, we won't flush pending
pages at all? What about keeping nondir/node pages in the remount-ro case
differently?

> pending dir write keep keep N/A
> pending nondir write drop drop N/A
> pending node write drop drop N/A
> pending meta write keep keep N/A
>
> Thanks,
>
> >
> >
> > >
> > > Thanks,
> > >
> > > >
> > > > > sbi->sb->s_flags &= ~SB_INLINECRYPT;
> > > > > @@ -2282,6 +2314,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
> > > > > if (err)
> > > > > goto restore_opts;
> > > > > + /* flush outstanding errors before changing fs state */
> > > > > + flush_work(&sbi->s_error_work);
> > > > > +
> > > > > /*
> > > > > * Previous and new state of filesystem is RO,
> > > > > * so skip checking GC and FLUSH_MERGE conditions.
> > > > > @@ -3931,45 +3966,60 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
> > > > > return err;
> > > > > }
> > > > > -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
> > > > > +static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
> > > > > +{
> > > > > + unsigned long flags;
> > > > > +
> > > > > + spin_lock_irqsave(&sbi->error_lock, flags);
> > > > > + if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
> > > > > + sbi->stop_reason[reason]++;
> > > > > + spin_unlock_irqrestore(&sbi->error_lock, flags);
> > > > > +}
> > > > > +
> > > > > +static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
> > > > > {
> > > > > struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> > > > > + unsigned long flags;
> > > > > int err;
> > > > > f2fs_down_write(&sbi->sb_lock);
> > > > > - if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
> > > > > - raw_super->s_stop_reason[reason]++;
> > > > > + spin_lock_irqsave(&sbi->error_lock, flags);
> > > > > + memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
> > > > > + spin_unlock_irqrestore(&sbi->error_lock, flags);
> > > > > err = f2fs_commit_super(sbi, false);
> > > > > - if (err)
> > > > > - f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
> > > > > - reason, err);
> > > > > +
> > > > > f2fs_up_write(&sbi->sb_lock);
> > > > > + if (err)
> > > > > + f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
> > > > > }
> > > > > void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
> > > > > {
> > > > > - spin_lock(&sbi->error_lock);
> > > > > + unsigned long flags;
> > > > > +
> > > > > + spin_lock_irqsave(&sbi->error_lock, flags);
> > > > > if (!test_bit(flag, (unsigned long *)sbi->errors)) {
> > > > > set_bit(flag, (unsigned long *)sbi->errors);
> > > > > sbi->error_dirty = true;
> > > > > }
> > > > > - spin_unlock(&sbi->error_lock);
> > > > > + spin_unlock_irqrestore(&sbi->error_lock, flags);
> > > > > }
> > > > > static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
> > > > > {
> > > > > + unsigned long flags;
> > > > > bool need_update = false;
> > > > > - spin_lock(&sbi->error_lock);
> > > > > + spin_lock_irqsave(&sbi->error_lock, flags);
> > > > > if (sbi->error_dirty) {
> > > > > memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
> > > > > MAX_F2FS_ERRORS);
> > > > > sbi->error_dirty = false;
> > > > > need_update = true;
> > > > > }
> > > > > - spin_unlock(&sbi->error_lock);
> > > > > + spin_unlock_irqrestore(&sbi->error_lock, flags);
> > > > > return need_update;
> > > > > }
> > > > > @@ -3993,6 +4043,66 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
> > > > > f2fs_up_write(&sbi->sb_lock);
> > > > > }
> > > > > +static bool system_going_down(void)
> > > > > +{
> > > > > + return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
> > > > > + || system_state == SYSTEM_RESTART;
> > > > > +}
> > > > > +
> > > > > +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
> > > > > + bool irq_context)
> > > > > +{
> > > > > + struct super_block *sb = sbi->sb;
> > > > > + bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
> > > > > + bool continue_fs = !shutdown &&
> > > > > + F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
> > > > > +
> > > > > + set_ckpt_flags(sbi, CP_ERROR_FLAG);
> > > > > +
> > > > > + if (!f2fs_hw_is_readonly(sbi)) {
> > > > > + save_stop_reason(sbi, reason);
> > > > > +
> > > > > + if (irq_context && !shutdown)
> > > > > + schedule_work(&sbi->s_error_work);
> > > > > + else
> > > > > + f2fs_record_stop_reason(sbi);
> > > > > + }
> > > > > +
> > > > > + /*
> > > > > + * We force ERRORS_RO behavior when system is rebooting. Otherwise we
> > > > > + * could panic during 'reboot -f' as the underlying device got already
> > > > > + * disabled.
> > > > > + */
> > > > > + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
> > > > > + !shutdown && !system_going_down() &&
> > > > > + !is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
> > > > > + panic("F2FS-fs (device %s): panic forced after error\n",
> > > > > + sb->s_id);
> > > > > +
> > > > > + if (shutdown)
> > > > > + set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
> > > > > +
> > > > > + /* continue filesystem operators if errors=continue */
> > > > > + if (continue_fs || f2fs_readonly(sb))
> > > > > + return;
> > > > > +
> > > > > + f2fs_warn(sbi, "Remounting filesystem read-only");
> > > > > + /*
> > > > > + * Make sure updated value of ->s_mount_flags will be visible before
> > > > > + * ->s_flags update
> > > > > + */
> > > > > + smp_wmb();
> > > > > + sb->s_flags |= SB_RDONLY;
> > > > > +}
> > > > > +
> > > > > +static void f2fs_record_error_work(struct work_struct *work)
> > > > > +{
> > > > > + struct f2fs_sb_info *sbi = container_of(work,
> > > > > + struct f2fs_sb_info, s_error_work);
> > > > > +
> > > > > + f2fs_record_stop_reason(sbi);
> > > > > +}
> > > > > +
> > > > > static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
> > > > > {
> > > > > struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
> > > > > @@ -4223,7 +4333,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> > > > > sb->s_fs_info = sbi;
> > > > > sbi->raw_super = raw_super;
> > > > > + INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
> > > > > memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
> > > > > + memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);
> > > > > /* precompute checksum seed for metadata */
> > > > > if (f2fs_sb_has_inode_chksum(sbi))
> > > > > @@ -4620,6 +4732,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
> > > > > f2fs_destroy_segment_manager(sbi);
> > > > > stop_ckpt_thread:
> > > > > f2fs_stop_ckpt_thread(sbi);
> > > > > + /* flush s_error_work before sbi destroy */
> > > > > + flush_work(&sbi->s_error_work);
> > > > > f2fs_destroy_post_read_wq(sbi);
> > > > > free_devices:
> > > > > destroy_device_list(sbi);
> > > > > --
> > > > > 2.36.1

2023-04-22 12:20:46

by Chao Yu

[permalink] [raw]
Subject: Re: [PATCH v8] f2fs: support errors=remount-ro|continue|panic mountoption

On 2023/4/22 3:31, Jaegeuk Kim wrote:
> On 04/21, Chao Yu wrote:
>> On 2023/4/21 0:34, Jaegeuk Kim wrote:
>>> On 04/20, Chao Yu wrote:
>>>> On 2023/4/20 4:19, Jaegeuk Kim wrote:
>>>>> On 04/08, Chao Yu wrote:
>>>>>> This patch supports errors=remount-ro|continue|panic mount option,
>>>>>> by default it uses "remount-ro" mode.
>>>>>>
>>>>>> Signed-off-by: Chao Yu <[email protected]>
>>>>>> Signed-off-by: Yangtao Li <[email protected]>
>>>>>> ---
>>>>>> v8:
>>>>>> - record stop reason synchronously if it's from ioctl shutdown interface
>>>>>> - fix typo in doc and commit message.
>>>>>> - use f2fs_hw_is_readonly() instead of bdev_read_only()
>>>>>> Documentation/filesystems/f2fs.rst | 5 ++
>>>>>> fs/f2fs/checkpoint.c | 7 +-
>>>>>> fs/f2fs/f2fs.h | 20 ++++-
>>>>>> fs/f2fs/file.c | 5 --
>>>>>> fs/f2fs/gc.c | 2 +-
>>>>>> fs/f2fs/super.c | 134 ++++++++++++++++++++++++++---
>>>>>> 6 files changed, 149 insertions(+), 24 deletions(-)
>>>>>>
>>>>>> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
>>>>>> index 2055e72871fe..ba4c02fad647 100644
>>>>>> --- a/Documentation/filesystems/f2fs.rst
>>>>>> +++ b/Documentation/filesystems/f2fs.rst
>>>>>> @@ -351,6 +351,11 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records
>>>>>> data block update frequency of the extent per inode, in
>>>>>> order to provide better temperature hints for data block
>>>>>> allocation.
>>>>>> +errors=%s Specify f2fs behavior on critical errors. This supports modes:
>>>>>> + "panic", "continue" and "remount-ro", respectively, trigger
>>>>>> + panic immediately, continue without doing anything, and remount
>>>>>> + the partition in read-only mode. By default it uses "remount-ro"
>>>>>> + mode.
>>>>>> ======================== ============================================================
>>>>>> Debugfs Entries
>>>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>>>> index 73ae4e85e70d..e6b266bb9ce0 100644
>>>>>> --- a/fs/f2fs/checkpoint.c
>>>>>> +++ b/fs/f2fs/checkpoint.c
>>>>>> @@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
>>>>>> unsigned char reason)
>>>>>> {
>>>>>> f2fs_build_fault_attr(sbi, 0, 0);
>>>>>> - set_ckpt_flags(sbi, CP_ERROR_FLAG);
>>>>>> - if (!end_io) {
>>>>>> + if (!end_io)
>>>>>> f2fs_flush_merged_writes(sbi);
>>>>>> -
>>>>>> - f2fs_handle_stop(sbi, reason);
>>>>>> - }
>>>>>> + f2fs_handle_critical_error(sbi, reason, end_io);
>>>>>> }
>>>>>> /*
>>>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>>>> index ac4942f8e83e..7d9c5a65f974 100644
>>>>>> --- a/fs/f2fs/f2fs.h
>>>>>> +++ b/fs/f2fs/f2fs.h
>>>>>> @@ -162,6 +162,7 @@ struct f2fs_mount_info {
>>>>>> int fs_mode; /* fs mode: LFS or ADAPTIVE */
>>>>>> int bggc_mode; /* bggc mode: off, on or sync */
>>>>>> int memory_mode; /* memory mode */
>>>>>> + int errors; /* errors parameter */
>>>>>> int discard_unit; /*
>>>>>> * discard command's offset/size should
>>>>>> * be aligned to this unit: block,
>>>>>> @@ -1370,6 +1371,12 @@ enum {
>>>>>> MEMORY_MODE_LOW, /* memory mode for low memry devices */
>>>>>> };
>>>>>> +enum errors_option {
>>>>>> + MOUNT_ERRORS_READONLY, /* remount fs ro on errors */
>>>>>> + MOUNT_ERRORS_CONTINUE, /* continue on errors */
>>>>>> + MOUNT_ERRORS_PANIC, /* panic on errors */
>>>>>> +};
>>>>>> +
>>>>>> static inline int f2fs_test_bit(unsigned int nr, char *addr);
>>>>>> static inline void f2fs_set_bit(unsigned int nr, char *addr);
>>>>>> static inline void f2fs_clear_bit(unsigned int nr, char *addr);
>>>>>> @@ -1786,8 +1793,14 @@ struct f2fs_sb_info {
>>>>>> struct workqueue_struct *post_read_wq; /* post read workqueue */
>>>>>> - unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
>>>>>> - spinlock_t error_lock; /* protect errors array */
>>>>>> + /*
>>>>>> + * If we are in irq context, let's update error information into
>>>>>> + * on-disk superblock in the work.
>>>>>> + */
>>>>>> + struct work_struct s_error_work;
>>>>>> + unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
>>>>>> + unsigned char stop_reason[MAX_STOP_REASON]; /* stop reason */
>>>>>> + spinlock_t error_lock; /* protect errors/stop_reason array */
>>>>>> bool error_dirty; /* errors of sb is dirty */
>>>>>> struct kmem_cache *inline_xattr_slab; /* inline xattr entry */
>>>>>> @@ -3522,8 +3535,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
>>>>>> int f2fs_quota_sync(struct super_block *sb, int type);
>>>>>> loff_t max_file_blocks(struct inode *inode);
>>>>>> void f2fs_quota_off_umount(struct super_block *sb);
>>>>>> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
>>>>>> void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
>>>>>> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
>>>>>> + bool irq_context);
>>>>>> void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
>>>>>> int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
>>>>>> int f2fs_sync_fs(struct super_block *sb, int sync);
>>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>>>> index 14e9a20e68df..10743e864d57 100644
>>>>>> --- a/fs/f2fs/file.c
>>>>>> +++ b/fs/f2fs/file.c
>>>>>> @@ -2221,7 +2221,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>>>> ret = 0;
>>>>>> f2fs_stop_checkpoint(sbi, false,
>>>>>> STOP_CP_REASON_SHUTDOWN);
>>>>>> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>>> trace_f2fs_shutdown(sbi, in, ret);
>>>>>> }
>>>>>> return ret;
>>>>>> @@ -2234,7 +2233,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>>>> if (ret)
>>>>>> goto out;
>>>>>> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>>>> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>>> thaw_bdev(sb->s_bdev);
>>>>>> break;
>>>>>> case F2FS_GOING_DOWN_METASYNC:
>>>>>> @@ -2243,16 +2241,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>>>> if (ret)
>>>>>> goto out;
>>>>>> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>>>> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>>> break;
>>>>>> case F2FS_GOING_DOWN_NOSYNC:
>>>>>> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>>>> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>>> break;
>>>>>> case F2FS_GOING_DOWN_METAFLUSH:
>>>>>> f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
>>>>>> f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>>>> - set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>>> break;
>>>>>> case F2FS_GOING_DOWN_NEED_FSCK:
>>>>>> set_sbi_flag(sbi, SBI_NEED_FSCK);
>>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>>> index f1d0dd9c5a6c..6dc2bc710ba8 100644
>>>>>> --- a/fs/f2fs/gc.c
>>>>>> +++ b/fs/f2fs/gc.c
>>>>>> @@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
>>>>>> if (gc_th->gc_wake)
>>>>>> gc_th->gc_wake = false;
>>>>>> - if (try_to_freeze()) {
>>>>>> + if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
>>>>>> stat_other_skip_bggc_count(sbi);
>>>>>> continue;
>>>>>> }
>>>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>>>> index 23574894e170..1573bf123197 100644
>>>>>> --- a/fs/f2fs/super.c
>>>>>> +++ b/fs/f2fs/super.c
>>>>>> @@ -164,6 +164,7 @@ enum {
>>>>>> Opt_discard_unit,
>>>>>> Opt_memory_mode,
>>>>>> Opt_age_extent_cache,
>>>>>> + Opt_errors,
>>>>>> Opt_err,
>>>>>> };
>>>>>> @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
>>>>>> {Opt_discard_unit, "discard_unit=%s"},
>>>>>> {Opt_memory_mode, "memory=%s"},
>>>>>> {Opt_age_extent_cache, "age_extent_cache"},
>>>>>> + {Opt_errors, "errors=%s"},
>>>>>> {Opt_err, NULL},
>>>>>> };
>>>>>> @@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>>>>> case Opt_age_extent_cache:
>>>>>> set_opt(sbi, AGE_EXTENT_CACHE);
>>>>>> break;
>>>>>> + case Opt_errors:
>>>>>> + name = match_strdup(&args[0]);
>>>>>> + if (!name)
>>>>>> + return -ENOMEM;
>>>>>> + if (!strcmp(name, "remount-ro")) {
>>>>>> + F2FS_OPTION(sbi).errors =
>>>>>> + MOUNT_ERRORS_READONLY;
>>>>>> + } else if (!strcmp(name, "continue")) {
>>>>>> + F2FS_OPTION(sbi).errors =
>>>>>> + MOUNT_ERRORS_CONTINUE;
>>>>>> + } else if (!strcmp(name, "panic")) {
>>>>>> + F2FS_OPTION(sbi).errors =
>>>>>> + MOUNT_ERRORS_PANIC;
>>>>>> + } else {
>>>>>> + kfree(name);
>>>>>> + return -EINVAL;
>>>>>> + }
>>>>>> + kfree(name);
>>>>>> + break;
>>>>>> default:
>>>>>> f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
>>>>>> p);
>>>>>> @@ -1623,6 +1644,9 @@ static void f2fs_put_super(struct super_block *sb)
>>>>>> f2fs_destroy_node_manager(sbi);
>>>>>> f2fs_destroy_segment_manager(sbi);
>>>>>> + /* flush s_error_work before sbi destroy */
>>>>>> + flush_work(&sbi->s_error_work);
>>>>>> +
>>>>>> f2fs_destroy_post_read_wq(sbi);
>>>>>> kvfree(sbi->ckpt);
>>>>>> @@ -2053,6 +2077,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>>>>>> else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
>>>>>> seq_printf(seq, ",memory=%s", "low");
>>>>>> + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
>>>>>> + seq_printf(seq, ",errors=%s", "remount-ro");
>>>>>> + else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
>>>>>> + seq_printf(seq, ",errors=%s", "continue");
>>>>>> + else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
>>>>>> + seq_printf(seq, ",errors=%s", "panic");
>>>>>> +
>>>>>> return 0;
>>>>>> }
>>>>>> @@ -2081,6 +2112,7 @@ static void default_options(struct f2fs_sb_info *sbi)
>>>>>> }
>>>>>> F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
>>>>>> F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
>>>>>> + F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;
>>>>>
>>>>> Was the default MOUNT_ERRORS_CONTINUE?
>>>>
>>>> Okay, let me revise in v9.
>>>>
>>>> Please note that main difference between continue mode and readonly mode once
>>>> cp_error is set:
>>>>
>>>> continue mode readonly mode
>>>> access ops: Normal Normal
>>>> modify ops: -EIO -EROFS
>>>> mount option rw ro
>>>
>>> Could you please add the difference of all the modes in patch description?
>>>
>>> I think it'd be good to add:
>>> 1. access ops
>>> 2. syscall errors
>>> 3. mount option
>>> 4. pending data writes
>>> 5. pending node writes
>>> 6. pending meta writes
>>
>> Sure, I'd like to add below description into f2fs.rst, any comments?
>>
>> continue remount-ro panic
>> access ops normal noraml N/A
>> syscall errors -EIO -EROFS N/A
>> mount option rw ro N/A
>
> So the only difference is the above two, as in any cases, we won't flush pending
> pages at all? What about keeping nondir/node pages in the remount-ro case
> differently?

I revised v10 w/ below diff.

---
Documentation/filesystems/f2fs.rst | 8 ++++++++
fs/f2fs/data.c | 4 ++++
fs/f2fs/node.c | 3 +++
3 files changed, 15 insertions(+)

diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index ce0963ba255b..79670e182250 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -356,6 +356,14 @@ errors=%s Specify f2fs behavior on critical errors. This supports modes:
panic immediately, continue without doing anything, and remount
the partition in read-only mode. By default it uses "continue"
mode.
+ continue remount-ro panic
+ access ops normal noraml N/A
+ syscall errors -EIO -EROFS N/A
+ mount option rw ro N/A
+ pending dir write keep keep N/A
+ pending non-dir write drop keep N/A
+ pending node write drop keep N/A
+ pending meta write keep keep N/A
======================== ============================================================

Debugfs Entries
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7165b1202f53..f26eac327d6e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2807,6 +2807,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
if (S_ISDIR(inode->i_mode) &&
!is_sbi_flag_set(sbi, SBI_IS_CLOSE))
goto redirty_out;
+
+ /* keep data pages in remount-ro mode */
+ if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
+ goto redirty_out;
goto out;
}

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index bd1dad523796..834c6f099c95 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1596,6 +1596,9 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
trace_f2fs_writepage(page, NODE);

if (unlikely(f2fs_cp_error(sbi))) {
+ /* keep node pages in remount-ro mode */
+ if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
+ goto redirty_out;
ClearPageUptodate(page);
dec_page_count(sbi, F2FS_DIRTY_NODES);
unlock_page(page);
--
2.36.1

>
>> pending dir write keep keep N/A
>> pending nondir write drop drop N/A
>> pending node write drop drop N/A
>> pending meta write keep keep N/A
>>
>> Thanks,
>>
>>>
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>> sbi->sb->s_flags &= ~SB_INLINECRYPT;
>>>>>> @@ -2282,6 +2314,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>>>>>> if (err)
>>>>>> goto restore_opts;
>>>>>> + /* flush outstanding errors before changing fs state */
>>>>>> + flush_work(&sbi->s_error_work);
>>>>>> +
>>>>>> /*
>>>>>> * Previous and new state of filesystem is RO,
>>>>>> * so skip checking GC and FLUSH_MERGE conditions.
>>>>>> @@ -3931,45 +3966,60 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
>>>>>> return err;
>>>>>> }
>>>>>> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
>>>>>> +static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
>>>>>> +{
>>>>>> + unsigned long flags;
>>>>>> +
>>>>>> + spin_lock_irqsave(&sbi->error_lock, flags);
>>>>>> + if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
>>>>>> + sbi->stop_reason[reason]++;
>>>>>> + spin_unlock_irqrestore(&sbi->error_lock, flags);
>>>>>> +}
>>>>>> +
>>>>>> +static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
>>>>>> {
>>>>>> struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
>>>>>> + unsigned long flags;
>>>>>> int err;
>>>>>> f2fs_down_write(&sbi->sb_lock);
>>>>>> - if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
>>>>>> - raw_super->s_stop_reason[reason]++;
>>>>>> + spin_lock_irqsave(&sbi->error_lock, flags);
>>>>>> + memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
>>>>>> + spin_unlock_irqrestore(&sbi->error_lock, flags);
>>>>>> err = f2fs_commit_super(sbi, false);
>>>>>> - if (err)
>>>>>> - f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
>>>>>> - reason, err);
>>>>>> +
>>>>>> f2fs_up_write(&sbi->sb_lock);
>>>>>> + if (err)
>>>>>> + f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
>>>>>> }
>>>>>> void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
>>>>>> {
>>>>>> - spin_lock(&sbi->error_lock);
>>>>>> + unsigned long flags;
>>>>>> +
>>>>>> + spin_lock_irqsave(&sbi->error_lock, flags);
>>>>>> if (!test_bit(flag, (unsigned long *)sbi->errors)) {
>>>>>> set_bit(flag, (unsigned long *)sbi->errors);
>>>>>> sbi->error_dirty = true;
>>>>>> }
>>>>>> - spin_unlock(&sbi->error_lock);
>>>>>> + spin_unlock_irqrestore(&sbi->error_lock, flags);
>>>>>> }
>>>>>> static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
>>>>>> {
>>>>>> + unsigned long flags;
>>>>>> bool need_update = false;
>>>>>> - spin_lock(&sbi->error_lock);
>>>>>> + spin_lock_irqsave(&sbi->error_lock, flags);
>>>>>> if (sbi->error_dirty) {
>>>>>> memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
>>>>>> MAX_F2FS_ERRORS);
>>>>>> sbi->error_dirty = false;
>>>>>> need_update = true;
>>>>>> }
>>>>>> - spin_unlock(&sbi->error_lock);
>>>>>> + spin_unlock_irqrestore(&sbi->error_lock, flags);
>>>>>> return need_update;
>>>>>> }
>>>>>> @@ -3993,6 +4043,66 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
>>>>>> f2fs_up_write(&sbi->sb_lock);
>>>>>> }
>>>>>> +static bool system_going_down(void)
>>>>>> +{
>>>>>> + return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
>>>>>> + || system_state == SYSTEM_RESTART;
>>>>>> +}
>>>>>> +
>>>>>> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
>>>>>> + bool irq_context)
>>>>>> +{
>>>>>> + struct super_block *sb = sbi->sb;
>>>>>> + bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
>>>>>> + bool continue_fs = !shutdown &&
>>>>>> + F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
>>>>>> +
>>>>>> + set_ckpt_flags(sbi, CP_ERROR_FLAG);
>>>>>> +
>>>>>> + if (!f2fs_hw_is_readonly(sbi)) {
>>>>>> + save_stop_reason(sbi, reason);
>>>>>> +
>>>>>> + if (irq_context && !shutdown)
>>>>>> + schedule_work(&sbi->s_error_work);
>>>>>> + else
>>>>>> + f2fs_record_stop_reason(sbi);
>>>>>> + }
>>>>>> +
>>>>>> + /*
>>>>>> + * We force ERRORS_RO behavior when system is rebooting. Otherwise we
>>>>>> + * could panic during 'reboot -f' as the underlying device got already
>>>>>> + * disabled.
>>>>>> + */
>>>>>> + if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
>>>>>> + !shutdown && !system_going_down() &&
>>>>>> + !is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
>>>>>> + panic("F2FS-fs (device %s): panic forced after error\n",
>>>>>> + sb->s_id);
>>>>>> +
>>>>>> + if (shutdown)
>>>>>> + set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>>> +
>>>>>> + /* continue filesystem operators if errors=continue */
>>>>>> + if (continue_fs || f2fs_readonly(sb))
>>>>>> + return;
>>>>>> +
>>>>>> + f2fs_warn(sbi, "Remounting filesystem read-only");
>>>>>> + /*
>>>>>> + * Make sure updated value of ->s_mount_flags will be visible before
>>>>>> + * ->s_flags update
>>>>>> + */
>>>>>> + smp_wmb();
>>>>>> + sb->s_flags |= SB_RDONLY;
>>>>>> +}
>>>>>> +
>>>>>> +static void f2fs_record_error_work(struct work_struct *work)
>>>>>> +{
>>>>>> + struct f2fs_sb_info *sbi = container_of(work,
>>>>>> + struct f2fs_sb_info, s_error_work);
>>>>>> +
>>>>>> + f2fs_record_stop_reason(sbi);
>>>>>> +}
>>>>>> +
>>>>>> static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
>>>>>> {
>>>>>> struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
>>>>>> @@ -4223,7 +4333,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>>> sb->s_fs_info = sbi;
>>>>>> sbi->raw_super = raw_super;
>>>>>> + INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
>>>>>> memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
>>>>>> + memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);
>>>>>> /* precompute checksum seed for metadata */
>>>>>> if (f2fs_sb_has_inode_chksum(sbi))
>>>>>> @@ -4620,6 +4732,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>>> f2fs_destroy_segment_manager(sbi);
>>>>>> stop_ckpt_thread:
>>>>>> f2fs_stop_ckpt_thread(sbi);
>>>>>> + /* flush s_error_work before sbi destroy */
>>>>>> + flush_work(&sbi->s_error_work);
>>>>>> f2fs_destroy_post_read_wq(sbi);
>>>>>> free_devices:
>>>>>> destroy_device_list(sbi);
>>>>>> --
>>>>>> 2.36.1