2010-02-06 04:54:06

by Ding Dinghua

[permalink] [raw]
Subject: [PATCH] Jbd2: delay discarding buffers in journal_unmap_buffer

Delay discarding buffers in journal_unmap_buffer until
we know that "add to orphan" operation has definitely been
committed, otherwise the log space of committing transation
may be freed and reused before truncate get committed, updates
may get lost if crash happens.

Signed-off-by: dingdinghua <[email protected]>
---
fs/jbd2/commit.c | 10 +++++-----
fs/jbd2/transaction.c | 33 +++++++++++++++++++++------------
2 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 1bc74b6..aabff6a 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -930,12 +930,12 @@ restart_loop:
/* A buffer which has been freed while still being
* journaled by a previous transaction may end up still
* being dirty here, but we want to avoid writing back
- * that buffer in the future now that the last use has
- * been committed. That's not only a performance gain,
- * it also stops aliasing problems if the buffer is left
- * behind for writeback and gets reallocated for another
+ * that buffer in the future after the "add to orphan"
+ * operation been committed, That's not only a performance
+ * gain, it also stops aliasing problems if the buffer is
+ * left behind for writeback and gets reallocated for another
* use in a different page. */
- if (buffer_freed(bh)) {
+ if (buffer_freed(bh) && !jh->b_next_transaction) {
clear_buffer_freed(bh);
clear_buffer_jbddirty(bh);
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index a051270..6e2971d 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1783,16 +1783,20 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
} else if (transaction == journal->j_committing_transaction) {
JBUFFER_TRACE(jh, "on committing transaction");
/*
- * If it is committing, we simply cannot touch it. We
- * can remove it's next_transaction pointer from the
- * running transaction if that is set, but nothing
- * else. */
+ * We don't know the transaction which "add to orphan" handle
+ * belongs to, there are three cases:
+ * (1) it's a transaction which has been committed
+ * (2) it's the committing transaction
+ * (3) it's the running transaction
+ * We can't clear the BH_jbddirty bit and discard the buffer
+ * before "add to orphan" has been committed, just
+ * be conservative, if running transaction exists and buffer
+ * has been dirtied(may be by previous transaction), wait until
+ * running transaction complete commit and do the discard work.
+ */
set_buffer_freed(bh);
- if (jh->b_next_transaction) {
- J_ASSERT(jh->b_next_transaction ==
- journal->j_running_transaction);
- jh->b_next_transaction = NULL;
- }
+ if (journal->j_running_transaction && buffer_jbddirty(bh))
+ jh->b_next_transaction = journal->j_running_transaction;
jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
@@ -1969,7 +1973,7 @@ void jbd2_journal_file_buffer(struct journal_head *jh,
*/
void __jbd2_journal_refile_buffer(struct journal_head *jh)
{
- int was_dirty;
+ int was_dirty, jlist;
struct buffer_head *bh = jh2bh(jh);

J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
@@ -1991,8 +1995,13 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
__jbd2_journal_temp_unlink_buffer(jh);
jh->b_transaction = jh->b_next_transaction;
jh->b_next_transaction = NULL;
- __jbd2_journal_file_buffer(jh, jh->b_transaction,
- jh->b_modified ? BJ_Metadata : BJ_Reserved);
+ if (buffer_freed(bh))
+ jlist = BJ_Forget;
+ else if (jh->b_modified)
+ jlist = BJ_Metadata;
+ else
+ jlist = BJ_Reserved;
+ __jbd2_journal_file_buffer(jh, jh->b_transaction, jlist);
J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);

if (was_dirty)
--
1.5.5.6



2010-02-08 16:03:28

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH] Jbd2: delay discarding buffers in journal_unmap_buffer

On Sat 06-02-10 12:54:23, dingdinghua wrote:
> Delay discarding buffers in journal_unmap_buffer until
> we know that "add to orphan" operation has definitely been
> committed, otherwise the log space of committing transation
> may be freed and reused before truncate get committed, updates
> may get lost if crash happens.
>
> Signed-off-by: dingdinghua <[email protected]>
Thanks for the patch. Just two remarks to the comments below...

> ---
> fs/jbd2/commit.c | 10 +++++-----
> fs/jbd2/transaction.c | 33 +++++++++++++++++++++------------
> 2 files changed, 26 insertions(+), 17 deletions(-)
>
> diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
> index 1bc74b6..aabff6a 100644
> --- a/fs/jbd2/commit.c
> +++ b/fs/jbd2/commit.c
> @@ -930,12 +930,12 @@ restart_loop:
> /* A buffer which has been freed while still being
> * journaled by a previous transaction may end up still
> * being dirty here, but we want to avoid writing back
> - * that buffer in the future now that the last use has
> - * been committed. That's not only a performance gain,
> - * it also stops aliasing problems if the buffer is left
> - * behind for writeback and gets reallocated for another
> + * that buffer in the future after the "add to orphan"
> + * operation been committed, That's not only a performance
> + * gain, it also stops aliasing problems if the buffer is
Please use TAB for indentation of the above line.

> + * left behind for writeback and gets reallocated for another
> * use in a different page. */
> - if (buffer_freed(bh)) {
> + if (buffer_freed(bh) && !jh->b_next_transaction) {
> clear_buffer_freed(bh);
> clear_buffer_jbddirty(bh);
> }
> diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
> index a051270..6e2971d 100644
> --- a/fs/jbd2/transaction.c
> +++ b/fs/jbd2/transaction.c
> @@ -1783,16 +1783,20 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
> } else if (transaction == journal->j_committing_transaction) {
> JBUFFER_TRACE(jh, "on committing transaction");
> /*
> - * If it is committing, we simply cannot touch it. We
> - * can remove it's next_transaction pointer from the
> - * running transaction if that is set, but nothing
> - * else. */
> + * We don't know the transaction which "add to orphan" handle
> + * belongs to, there are three cases:
> + * (1) it's a transaction which has been committed
> + * (2) it's the committing transaction
> + * (3) it's the running transaction
> + * We can't clear the BH_jbddirty bit and discard the buffer
> + * before "add to orphan" has been committed, just
> + * be conservative, if running transaction exists and buffer
> + * has been dirtied(may be by previous transaction), wait until
> + * running transaction complete commit and do the discard work.
> + */
I was thinking about how to better explain the problem in the comment
and here's what I'd do:
I'd add the comment below before line with transaction = jh->b_transaction
in journal_unmap_buffer:
We cannot remove the buffer from checkpoint lists until the transaction
adding inode to orphan list (let's call it T) is committed. Otherwise if
the transaction changing the buffer would be cleaned from the journal
before T is committed, a crash will cause that the correct contents of the
buffer will be lost. On the other hand we have to clear the buffer dirty
bit at latest at the moment when the transaction marking the buffer as
freed in the filesystem structures is committed because from that moment on
the buffer can be reallocated and used by a different page. Since the
block hasn't been freed yet but the inode has already been added to orphan
list, it is safe for us to add the buffer to BJ_Forget list of the newest
transaction.

And then I'd change the comment in the transaction ==
journal->j_committing_transaction case to:
The buffer is committing, we simply cannot touch it. So we just set
j_next_transaction to the running transaction (if there is one) and
mark buffer as freed so that commit code knows it should clear dirty
bits when it is done with the buffer.

Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR

2010-02-16 01:45:27

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH] Jbd2: delay discarding buffers in journal_unmap_buffer

On Mon, Feb 08, 2010 at 05:03:33PM +0100, Jan Kara wrote:
> On Sat 06-02-10 12:54:23, dingdinghua wrote:
> > Delay discarding buffers in journal_unmap_buffer until
> > we know that "add to orphan" operation has definitely been
> > committed, otherwise the log space of committing transation
> > may be freed and reused before truncate get committed, updates
> > may get lost if crash happens.
> >
> > Signed-off-by: dingdinghua <[email protected]>
> Thanks for the patch. Just two remarks to the comments below...

Here's the version I've applied to the ext4 patch queue, which
includes Jan's suggested changes to the comments.

- Ted

commit 20ca286ad19b76483a79b44c5172feeaad02065b
Author: dingdinghua <[email protected]>
Date: Mon Feb 15 16:35:42 2010 -0500

jbd2: delay discarding buffers in journal_unmap_buffer

Delay discarding buffers in journal_unmap_buffer until
we know that "add to orphan" operation has definitely been
committed, otherwise the log space of committing transation
may be freed and reused before truncate get committed, updates
may get lost if crash happens.

Signed-off-by: dingdinghua <[email protected]>
Signed-off-by: "Theodore Ts'o" <[email protected]>

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 1bc74b6..3ee211e 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -930,12 +930,12 @@ restart_loop:
/* A buffer which has been freed while still being
* journaled by a previous transaction may end up still
* being dirty here, but we want to avoid writing back
- * that buffer in the future now that the last use has
- * been committed. That's not only a performance gain,
- * it also stops aliasing problems if the buffer is left
- * behind for writeback and gets reallocated for another
+ * that buffer in the future after the "add to orphan"
+ * operation been committed, That's not only a performance
+ * gain, it also stops aliasing problems if the buffer is
+ * left behind for writeback and gets reallocated for another
* use in a different page. */
- if (buffer_freed(bh)) {
+ if (buffer_freed(bh) && !jh->b_next_transaction) {
clear_buffer_freed(bh);
clear_buffer_jbddirty(bh);
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index a051270..bfc70f5 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1727,6 +1727,21 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
if (!jh)
goto zap_buffer_no_jh;

+ /*
+ * We cannot remove the buffer from checkpoint lists until the
+ * transaction adding inode to orphan list (let's call it T)
+ * is committed. Otherwise if the transaction changing the
+ * buffer would be cleaned from the journal before T is
+ * committed, a crash will cause that the correct contents of
+ * the buffer will be lost. On the other hand we have to
+ * clear the buffer dirty bit at latest at the moment when the
+ * transaction marking the buffer as freed in the filesystem
+ * structures is committed because from that moment on the
+ * buffer can be reallocated and used by a different page.
+ * Since the block hasn't been freed yet but the inode has
+ * already been added to orphan list, it is safe for us to add
+ * the buffer to BJ_Forget list of the newest transaction.
+ */
transaction = jh->b_transaction;
if (transaction == NULL) {
/* First case: not on any transaction. If it
@@ -1783,16 +1798,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
} else if (transaction == journal->j_committing_transaction) {
JBUFFER_TRACE(jh, "on committing transaction");
/*
- * If it is committing, we simply cannot touch it. We
- * can remove it's next_transaction pointer from the
- * running transaction if that is set, but nothing
- * else. */
+ * The buffer is committing, we simply cannot touch
+ * it. So we just set j_next_transaction to the
+ * running transaction (if there is one) and mark
+ * buffer as freed so that commit code knows it should
+ * clear dirty bits when it is done with the buffer.
+ */
set_buffer_freed(bh);
- if (jh->b_next_transaction) {
- J_ASSERT(jh->b_next_transaction ==
- journal->j_running_transaction);
- jh->b_next_transaction = NULL;
- }
+ if (journal->j_running_transaction && buffer_jbddirty(bh))
+ jh->b_next_transaction = journal->j_running_transaction;
jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
@@ -1969,7 +1983,7 @@ void jbd2_journal_file_buffer(struct journal_head *jh,
*/
void __jbd2_journal_refile_buffer(struct journal_head *jh)
{
- int was_dirty;
+ int was_dirty, jlist;
struct buffer_head *bh = jh2bh(jh);

J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
@@ -1991,8 +2005,13 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
__jbd2_journal_temp_unlink_buffer(jh);
jh->b_transaction = jh->b_next_transaction;
jh->b_next_transaction = NULL;
- __jbd2_journal_file_buffer(jh, jh->b_transaction,
- jh->b_modified ? BJ_Metadata : BJ_Reserved);
+ if (buffer_freed(bh))
+ jlist = BJ_Forget;
+ else if (jh->b_modified)
+ jlist = BJ_Metadata;
+ else
+ jlist = BJ_Reserved;
+ __jbd2_journal_file_buffer(jh, jh->b_transaction, jlist);
J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);

if (was_dirty)

2010-02-16 19:32:57

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH] Jbd2: delay discarding buffers in journal_unmap_buffer

On Mon 15-02-10 20:45:21, [email protected] wrote:
> On Mon, Feb 08, 2010 at 05:03:33PM +0100, Jan Kara wrote:
> > On Sat 06-02-10 12:54:23, dingdinghua wrote:
> > > Delay discarding buffers in journal_unmap_buffer until
> > > we know that "add to orphan" operation has definitely been
> > > committed, otherwise the log space of committing transation
> > > may be freed and reused before truncate get committed, updates
> > > may get lost if crash happens.
> > >
> > > Signed-off-by: dingdinghua <[email protected]>
> > Thanks for the patch. Just two remarks to the comments below...
>
> Here's the version I've applied to the ext4 patch queue, which
> includes Jan's suggested changes to the comments.
Thanks for summing it up. The patch looks good.
Acked-by: Jan Kara <[email protected]>

I'm going to backport the patch to JBD now.

Honza


> commit 20ca286ad19b76483a79b44c5172feeaad02065b
> Author: dingdinghua <[email protected]>
> Date: Mon Feb 15 16:35:42 2010 -0500
>
> jbd2: delay discarding buffers in journal_unmap_buffer
>
> Delay discarding buffers in journal_unmap_buffer until
> we know that "add to orphan" operation has definitely been
> committed, otherwise the log space of committing transation
> may be freed and reused before truncate get committed, updates
> may get lost if crash happens.
>
> Signed-off-by: dingdinghua <[email protected]>
> Signed-off-by: "Theodore Ts'o" <[email protected]>
>
> diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
> index 1bc74b6..3ee211e 100644
> --- a/fs/jbd2/commit.c
> +++ b/fs/jbd2/commit.c
> @@ -930,12 +930,12 @@ restart_loop:
> /* A buffer which has been freed while still being
> * journaled by a previous transaction may end up still
> * being dirty here, but we want to avoid writing back
> - * that buffer in the future now that the last use has
> - * been committed. That's not only a performance gain,
> - * it also stops aliasing problems if the buffer is left
> - * behind for writeback and gets reallocated for another
> + * that buffer in the future after the "add to orphan"
> + * operation been committed, That's not only a performance
> + * gain, it also stops aliasing problems if the buffer is
> + * left behind for writeback and gets reallocated for another
> * use in a different page. */
> - if (buffer_freed(bh)) {
> + if (buffer_freed(bh) && !jh->b_next_transaction) {
> clear_buffer_freed(bh);
> clear_buffer_jbddirty(bh);
> }
> diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
> index a051270..bfc70f5 100644
> --- a/fs/jbd2/transaction.c
> +++ b/fs/jbd2/transaction.c
> @@ -1727,6 +1727,21 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
> if (!jh)
> goto zap_buffer_no_jh;
>
> + /*
> + * We cannot remove the buffer from checkpoint lists until the
> + * transaction adding inode to orphan list (let's call it T)
> + * is committed. Otherwise if the transaction changing the
> + * buffer would be cleaned from the journal before T is
> + * committed, a crash will cause that the correct contents of
> + * the buffer will be lost. On the other hand we have to
> + * clear the buffer dirty bit at latest at the moment when the
> + * transaction marking the buffer as freed in the filesystem
> + * structures is committed because from that moment on the
> + * buffer can be reallocated and used by a different page.
> + * Since the block hasn't been freed yet but the inode has
> + * already been added to orphan list, it is safe for us to add
> + * the buffer to BJ_Forget list of the newest transaction.
> + */
> transaction = jh->b_transaction;
> if (transaction == NULL) {
> /* First case: not on any transaction. If it
> @@ -1783,16 +1798,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
> } else if (transaction == journal->j_committing_transaction) {
> JBUFFER_TRACE(jh, "on committing transaction");
> /*
> - * If it is committing, we simply cannot touch it. We
> - * can remove it's next_transaction pointer from the
> - * running transaction if that is set, but nothing
> - * else. */
> + * The buffer is committing, we simply cannot touch
> + * it. So we just set j_next_transaction to the
> + * running transaction (if there is one) and mark
> + * buffer as freed so that commit code knows it should
> + * clear dirty bits when it is done with the buffer.
> + */
> set_buffer_freed(bh);
> - if (jh->b_next_transaction) {
> - J_ASSERT(jh->b_next_transaction ==
> - journal->j_running_transaction);
> - jh->b_next_transaction = NULL;
> - }
> + if (journal->j_running_transaction && buffer_jbddirty(bh))
> + jh->b_next_transaction = journal->j_running_transaction;
> jbd2_journal_put_journal_head(jh);
> spin_unlock(&journal->j_list_lock);
> jbd_unlock_bh_state(bh);
> @@ -1969,7 +1983,7 @@ void jbd2_journal_file_buffer(struct journal_head *jh,
> */
> void __jbd2_journal_refile_buffer(struct journal_head *jh)
> {
> - int was_dirty;
> + int was_dirty, jlist;
> struct buffer_head *bh = jh2bh(jh);
>
> J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
> @@ -1991,8 +2005,13 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
> __jbd2_journal_temp_unlink_buffer(jh);
> jh->b_transaction = jh->b_next_transaction;
> jh->b_next_transaction = NULL;
> - __jbd2_journal_file_buffer(jh, jh->b_transaction,
> - jh->b_modified ? BJ_Metadata : BJ_Reserved);
> + if (buffer_freed(bh))
> + jlist = BJ_Forget;
> + else if (jh->b_modified)
> + jlist = BJ_Metadata;
> + else
> + jlist = BJ_Reserved;
> + __jbd2_journal_file_buffer(jh, jh->b_transaction, jlist);
> J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
>
> if (was_dirty)
--
Jan Kara <[email protected]>
SUSE Labs, CR

2010-02-16 19:39:50

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH] Jbd2: delay discarding buffers in journal_unmap_buffer

On Tue 16-02-10 20:33:05, Jan Kara wrote:
> On Mon 15-02-10 20:45:21, [email protected] wrote:
> > On Mon, Feb 08, 2010 at 05:03:33PM +0100, Jan Kara wrote:
> > > On Sat 06-02-10 12:54:23, dingdinghua wrote:
> > > > Delay discarding buffers in journal_unmap_buffer until
> > > > we know that "add to orphan" operation has definitely been
> > > > committed, otherwise the log space of committing transation
> > > > may be freed and reused before truncate get committed, updates
> > > > may get lost if crash happens.
> > > >
> > > > Signed-off-by: dingdinghua <[email protected]>
> > > Thanks for the patch. Just two remarks to the comments below...
> >
> > Here's the version I've applied to the ext4 patch queue, which
> > includes Jan's suggested changes to the comments.
> Thanks for summing it up. The patch looks good.
> Acked-by: Jan Kara <[email protected]>
>
> I'm going to backport the patch to JBD now.
OK, the backport is below. I've added it to my tree.

Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR
---

>From 63b4e55561560adf8a9c735f9feffbbf7d3978e2 Mon Sep 17 00:00:00 2001
From: Jan Kara <[email protected]>
Date: Tue, 16 Feb 2010 20:37:12 +0100
Subject: [PATCH] jbd: Delay discarding buffers in journal_unmap_buffer

Delay discarding buffers in journal_unmap_buffer until
we know that "add to orphan" operation has definitely been
committed, otherwise the log space of committing transation
may be freed and reused before truncate get committed, updates
may get lost if crash happens.

This patch is a backport of JBD2 fix by dingdinghua <[email protected]>.

Signed-off-by: Jan Kara <[email protected]>
---
fs/jbd/commit.c | 10 +++++-----
fs/jbd/transaction.c | 43 +++++++++++++++++++++++++++++++------------
2 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 4bd8825..2c90e3e 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -862,12 +862,12 @@ restart_loop:
/* A buffer which has been freed while still being
* journaled by a previous transaction may end up still
* being dirty here, but we want to avoid writing back
- * that buffer in the future now that the last use has
- * been committed. That's not only a performance gain,
- * it also stops aliasing problems if the buffer is left
- * behind for writeback and gets reallocated for another
+ * that buffer in the future after the "add to orphan"
+ * operation been committed, That's not only a performance
+ * gain, it also stops aliasing problems if the buffer is
+ * left behind for writeback and gets reallocated for another
* use in a different page. */
- if (buffer_freed(bh)) {
+ if (buffer_freed(bh) && !jh->b_next_transaction) {
clear_buffer_freed(bh);
clear_buffer_jbddirty(bh);
}
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 006f9ad..99e9fea 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1864,6 +1864,21 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
if (!jh)
goto zap_buffer_no_jh;

+ /*
+ * We cannot remove the buffer from checkpoint lists until the
+ * transaction adding inode to orphan list (let's call it T)
+ * is committed. Otherwise if the transaction changing the
+ * buffer would be cleaned from the journal before T is
+ * committed, a crash will cause that the correct contents of
+ * the buffer will be lost. On the other hand we have to
+ * clear the buffer dirty bit at latest at the moment when the
+ * transaction marking the buffer as freed in the filesystem
+ * structures is committed because from that moment on the
+ * buffer can be reallocated and used by a different page.
+ * Since the block hasn't been freed yet but the inode has
+ * already been added to orphan list, it is safe for us to add
+ * the buffer to BJ_Forget list of the newest transaction.
+ */
transaction = jh->b_transaction;
if (transaction == NULL) {
/* First case: not on any transaction. If it
@@ -1929,16 +1944,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
goto zap_buffer;
}
/*
- * If it is committing, we simply cannot touch it. We
- * can remove it's next_transaction pointer from the
- * running transaction if that is set, but nothing
- * else. */
+ * The buffer is committing, we simply cannot touch
+ * it. So we just set j_next_transaction to the
+ * running transaction (if there is one) and mark
+ * buffer as freed so that commit code knows it should
+ * clear dirty bits when it is done with the buffer.
+ */
set_buffer_freed(bh);
- if (jh->b_next_transaction) {
- J_ASSERT(jh->b_next_transaction ==
- journal->j_running_transaction);
- jh->b_next_transaction = NULL;
- }
+ if (journal->j_running_transaction && buffer_jbddirty(bh))
+ jh->b_next_transaction = journal->j_running_transaction;
journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
@@ -2120,7 +2134,7 @@ void journal_file_buffer(struct journal_head *jh,
*/
void __journal_refile_buffer(struct journal_head *jh)
{
- int was_dirty;
+ int was_dirty, jlist;
struct buffer_head *bh = jh2bh(jh);

J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
@@ -2142,8 +2156,13 @@ void __journal_refile_buffer(struct journal_head *jh)
__journal_temp_unlink_buffer(jh);
jh->b_transaction = jh->b_next_transaction;
jh->b_next_transaction = NULL;
- __journal_file_buffer(jh, jh->b_transaction,
- jh->b_modified ? BJ_Metadata : BJ_Reserved);
+ if (buffer_freed(bh))
+ jlist = BJ_Forget;
+ else if (jh->b_modified)
+ jlist = BJ_Metadata;
+ else
+ jlist = BJ_Reserved;
+ __journal_file_buffer(jh, jh->b_transaction, jlist);
J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);

if (was_dirty)
--
1.6.4.2