2023-05-31 11:52:37

by Zhang Yi

[permalink] [raw]
Subject: [PATCH 0/5] jbd2: fix several checkpoint inconsistent issues

From: Zhang Yi <[email protected]>

Hello,

The first three patches are from [1] and are not changed, appending
another two (it depends on the first three) to fix another three race
issues in the checkpoint procedure which could also lead to inconsistent
results.

[1] https://lore.kernel.org/linux-ext4/[email protected]/

Thanks,
Yi.

Zhang Yi (4):
jbd2: recheck chechpointing non-dirty buffer
jbd2: remove t_checkpoint_io_list
jbd2: remove released parameter in journal_shrink_one_cp_list()
jbd2: fix a race when checking checkpoint buffer busy

Zhihao Cheng (1):
jbd2: Fix wrongly judgement for buffer head removing while doing
checkpoint

fs/jbd2/checkpoint.c | 186 +++++++++++-------------------------------
fs/jbd2/commit.c | 3 +-
fs/jbd2/transaction.c | 4 +-
include/linux/jbd2.h | 9 +-
4 files changed, 55 insertions(+), 147 deletions(-)

--
2.31.1



2023-05-31 11:52:40

by Zhang Yi

[permalink] [raw]
Subject: [PATCH 2/5] jbd2: remove t_checkpoint_io_list

From: Zhang Yi <[email protected]>

Since t_checkpoint_io_list was stop using in jbd2_log_do_checkpoint()
now, it's time to remove the whole t_checkpoint_io_list logic.

Signed-off-by: Zhang Yi <[email protected]>
Reviewed-by: Jan Kara <[email protected]>
---
fs/jbd2/checkpoint.c | 42 ++----------------------------------------
fs/jbd2/commit.c | 3 +--
include/linux/jbd2.h | 6 ------
3 files changed, 3 insertions(+), 48 deletions(-)

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 25e3c20eb19f..55d6efdbea64 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -27,7 +27,7 @@
*
* Called with j_list_lock held.
*/
-static inline void __buffer_unlink_first(struct journal_head *jh)
+static inline void __buffer_unlink(struct journal_head *jh)
{
transaction_t *transaction = jh->b_cp_transaction;

@@ -40,23 +40,6 @@ static inline void __buffer_unlink_first(struct journal_head *jh)
}
}

-/*
- * Unlink a buffer from a transaction checkpoint(io) list.
- *
- * Called with j_list_lock held.
- */
-static inline void __buffer_unlink(struct journal_head *jh)
-{
- transaction_t *transaction = jh->b_cp_transaction;
-
- __buffer_unlink_first(jh);
- if (transaction->t_checkpoint_io_list == jh) {
- transaction->t_checkpoint_io_list = jh->b_cpnext;
- if (transaction->t_checkpoint_io_list == jh)
- transaction->t_checkpoint_io_list = NULL;
- }
-}
-
/*
* Check a checkpoint buffer could be release or not.
*
@@ -503,15 +486,6 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
break;
if (need_resched() || spin_needbreak(&journal->j_list_lock))
break;
- if (released)
- continue;
-
- nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_io_list,
- nr_to_scan, &released);
- if (*nr_to_scan == 0)
- break;
- if (need_resched() || spin_needbreak(&journal->j_list_lock))
- break;
} while (transaction != last_transaction);

if (transaction != last_transaction) {
@@ -566,17 +540,6 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
*/
if (need_resched())
return;
- if (ret)
- continue;
- /*
- * It is essential that we are as careful as in the case of
- * t_checkpoint_list with removing the buffer from the list as
- * we can possibly see not yet submitted buffers on io_list
- */
- ret = journal_clean_one_cp_list(transaction->
- t_checkpoint_io_list, destroy);
- if (need_resched())
- return;
/*
* Stop scanning if we couldn't free the transaction. This
* avoids pointless scanning of transactions which still
@@ -661,7 +624,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
jbd2_journal_put_journal_head(jh);

/* Is this transaction empty? */
- if (transaction->t_checkpoint_list || transaction->t_checkpoint_io_list)
+ if (transaction->t_checkpoint_list)
return 0;

/*
@@ -753,7 +716,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
J_ASSERT(transaction->t_forget == NULL);
J_ASSERT(transaction->t_shadow_list == NULL);
J_ASSERT(transaction->t_checkpoint_list == NULL);
- J_ASSERT(transaction->t_checkpoint_io_list == NULL);
J_ASSERT(atomic_read(&transaction->t_updates) == 0);
J_ASSERT(journal->j_committing_transaction != transaction);
J_ASSERT(journal->j_running_transaction != transaction);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index b33155dd7001..1073259902a6 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -1141,8 +1141,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
spin_lock(&journal->j_list_lock);
commit_transaction->t_state = T_FINISHED;
/* Check if the transaction can be dropped now that we are finished */
- if (commit_transaction->t_checkpoint_list == NULL &&
- commit_transaction->t_checkpoint_io_list == NULL) {
+ if (commit_transaction->t_checkpoint_list == NULL) {
__jbd2_journal_drop_transaction(journal, commit_transaction);
jbd2_journal_free_transaction(commit_transaction);
}
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f619bae1dcc5..91a2cf4bc575 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -622,12 +622,6 @@ struct transaction_s
*/
struct journal_head *t_checkpoint_list;

- /*
- * Doubly-linked circular list of all buffers submitted for IO while
- * checkpointing. [j_list_lock]
- */
- struct journal_head *t_checkpoint_io_list;
-
/*
* Doubly-linked circular list of metadata buffers being
* shadowed by log IO. The IO buffers on the iobuf list and
--
2.31.1


2023-05-31 11:53:00

by Zhang Yi

[permalink] [raw]
Subject: [PATCH 5/5] jbd2: fix a race when checking checkpoint buffer busy

From: Zhang Yi <[email protected]>

Before removing checkpoint buffer from the t_checkpoint_list, we have to
check both BH_Dirty and BH_Lock bits together to distinguish buffers
have not been or were being written back. But __cp_buffer_busy() checks
them separately, it first check lock state and then check dirty, the
window between these two checks could be raced by writing back
procedure, which locks buffer and clears buffer dirty before I/O
completes. So it cannot guarantee checkpointing buffers been written
back to disk if some error happens later. Finally, it may clean
checkpoint transactions and lead to inconsistent filesystem.
jbd2_journal_forget() and __journal_try_to_free_buffer() also have the
same problem, so fix them by introduce a new helper to check the busy
state atomically.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=217490
Cc: [email protected]
Signed-off-by: Zhang Yi <[email protected]>
---
fs/jbd2/checkpoint.c | 8 ++++----
fs/jbd2/transaction.c | 4 ++--
include/linux/jbd2.h | 3 +++
3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 620f3d345f3d..2dde5fd1f0dd 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -45,11 +45,11 @@ static inline void __buffer_unlink(struct journal_head *jh)
*
* Requires j_list_lock
*/
-static inline bool __cp_buffer_busy(struct journal_head *jh)
+static inline bool cp_buffer_busy(struct journal_head *jh)
{
struct buffer_head *bh = jh2bh(jh);

- return (jh->b_transaction || buffer_locked(bh) || buffer_dirty(bh));
+ return (jh->b_transaction || __cp_buffer_busy(bh));
}

/*
@@ -369,7 +369,7 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
jh = next_jh;
next_jh = jh->b_cpnext;

- if (!destroy && __cp_buffer_busy(jh))
+ if (!destroy && cp_buffer_busy(jh))
return 0;

if (__jbd2_journal_remove_checkpoint(jh))
@@ -413,7 +413,7 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
next_jh = jh->b_cpnext;

(*nr_to_scan)--;
- if (__cp_buffer_busy(jh))
+ if (cp_buffer_busy(jh))
continue;

nr_freed++;
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 18611241f451..04863787c93e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1784,7 +1784,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
* Otherwise, if the buffer has been written to disk,
* it is safe to remove the checkpoint and drop it.
*/
- if (!buffer_dirty(bh)) {
+ if (!__cp_buffer_busy(bh)) {
__jbd2_journal_remove_checkpoint(jh);
spin_unlock(&journal->j_list_lock);
goto drop;
@@ -2112,7 +2112,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)

jh = bh2jh(bh);

- if (buffer_locked(bh) || buffer_dirty(bh))
+ if (__cp_buffer_busy(bh))
goto out;

if (jh->b_next_transaction != NULL || jh->b_transaction != NULL)
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 91a2cf4bc575..b17d1efab787 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1440,6 +1440,9 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
extern void jbd2_journal_commit_transaction(journal_t *);

/* Checkpoint list management */
+#define __cp_buffer_busy(bh) \
+ ((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
+
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy);
unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan);
int __jbd2_journal_remove_checkpoint(struct journal_head *);
--
2.31.1


2023-05-31 11:53:14

by Zhang Yi

[permalink] [raw]
Subject: [PATCH 3/5] jbd2: remove released parameter in journal_shrink_one_cp_list()

From: Zhang Yi <[email protected]>

After t_checkpoint_io_list is gone, the 'released' parameter in
journal_shrink_one_cp_list() becomes useless, just remove it.

Signed-off-by: Zhang Yi <[email protected]>
Reviewed-by: Jan Kara <[email protected]>
---
fs/jbd2/checkpoint.c | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 55d6efdbea64..3f52560912a9 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -391,15 +391,13 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
* journal_shrink_one_cp_list
*
* Find 'nr_to_scan' written-back checkpoint buffers in the given list
- * and try to release them. If the whole transaction is released, set
- * the 'released' parameter. Return the number of released checkpointed
+ * and try to release them. Return the number of released checkpointed
* buffers.
*
* Called with j_list_lock held.
*/
static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
- unsigned long *nr_to_scan,
- bool *released)
+ unsigned long *nr_to_scan)
{
struct journal_head *last_jh;
struct journal_head *next_jh = jh;
@@ -420,10 +418,8 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,

nr_freed++;
ret = __jbd2_journal_remove_checkpoint(jh);
- if (ret) {
- *released = true;
+ if (ret)
break;
- }

if (need_resched())
break;
@@ -445,7 +441,6 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
unsigned long *nr_to_scan)
{
transaction_t *transaction, *last_transaction, *next_transaction;
- bool released;
tid_t first_tid = 0, last_tid = 0, next_tid = 0;
tid_t tid = 0;
unsigned long nr_freed = 0;
@@ -478,10 +473,9 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
transaction = next_transaction;
next_transaction = transaction->t_cpnext;
tid = transaction->t_tid;
- released = false;

nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_list,
- nr_to_scan, &released);
+ nr_to_scan);
if (*nr_to_scan == 0)
break;
if (need_resched() || spin_needbreak(&journal->j_list_lock))
--
2.31.1