2014-09-02 17:18:34

by Jan Kara

[permalink] [raw]
Subject: [PATCH] jbd2: Optimize jbd2_log_do_checkpoint() a bit

When we discover written out buffer in transaction checkpoint list we
don't have to recheck validity of a transaction. Either this is the last
buffer in a transaction - and then we are done - or this isn't and then
we can just take another buffer from the checkpoint list without
dropping j_list_lock.

Signed-off-by: Jan Kara <[email protected]>
---
fs/jbd2/checkpoint.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 993a187527f3..3722e2e53638 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -343,12 +343,15 @@ restart:
if (!buffer_dirty(bh)) {
if (unlikely(buffer_write_io_error(bh)) && !result)
result = -EIO;
- get_bh(bh);
BUFFER_TRACE(bh, "remove from checkpoint");
- __jbd2_journal_remove_checkpoint(jh);
- spin_unlock(&journal->j_list_lock);
- __brelse(bh);
- goto retry;
+ if (__jbd2_journal_remove_checkpoint(jh)) {
+ /*
+ * This was the last buffer attached to the
+ * transaction. We are done.
+ */
+ goto out;
+ }
+ continue;
}
/*
* Important: we are about to write the buffer, and
--
1.8.1.4



2014-09-02 22:44:00

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH] jbd2: Optimize jbd2_log_do_checkpoint() a bit

On Tue, Sep 02, 2014 at 07:18:30PM +0200, Jan Kara wrote:
> When we discover written out buffer in transaction checkpoint list we
> don't have to recheck validity of a transaction. Either this is the last
> buffer in a transaction - and then we are done - or this isn't and then
> we can just take another buffer from the checkpoint list without
> dropping j_list_lock.
>
> Signed-off-by: Jan Kara <[email protected]>
> ---
> fs/jbd2/checkpoint.c | 13 ++++++++-----
> 1 file changed, 8 insertions(+), 5 deletions(-)
>
> diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
> index 993a187527f3..3722e2e53638 100644
> --- a/fs/jbd2/checkpoint.c
> +++ b/fs/jbd2/checkpoint.c
> @@ -343,12 +343,15 @@ restart:
> if (!buffer_dirty(bh)) {
> if (unlikely(buffer_write_io_error(bh)) && !result)
> result = -EIO;
> - get_bh(bh);
> BUFFER_TRACE(bh, "remove from checkpoint");
> - __jbd2_journal_remove_checkpoint(jh);
> - spin_unlock(&journal->j_list_lock);
> - __brelse(bh);

Currently, all of the places which call
__jbd2_jouranl_remove_checkpoint(jh) are doing so with an elevated
b_count. For example, see __try_to_free_cp_buf().

After doing a lot of desk checking, I can't see any reason for holding
the elevanted b_count, so I think it should be to remove it, but then
we can simplify the other uses __try_to_free_cp_buf(). For example,
in the loop that I folded from __wait_cp_io, we could drop the done
variable and change:

done = __jbd2_journal_remove_checkpoint(jh);
__brelse(bh);

to this:

__brelse(bh);
if (__jbd2_journal_remove_checkpoint(jh))
break;

How much testing have you done of this optimization? I'm tempted to
try nuking all of the elevated b_counts around the call to
__jbd2_journal_remove_checkpoint(), and then doing a test to see if
anything blows up.

Cheers,

- Ted

2014-09-02 22:47:00

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH 1/2] jbd2: don't call get_bh() before calling __jbd2_journal_remove_checkpoint()

The __jbd2_journal_remove_checkpoint() doesn't require an elevated
b_count; indeed, until the jh structure gets released by the call to
jbd2_journal_put_journal_head(), the bh's b_count is elevated by
virtue of the existence of the jh structure.

Suggested-by: Jan Kara <[email protected]>
Signed-off-by: Theodore Ts'o <[email protected]>
---
fs/jbd2/checkpoint.c | 12 ++++--------
1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 22fcd50..cb6e17c 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -100,11 +100,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
* Get our reference so that bh cannot be freed before
* we unlock it
*/
- get_bh(bh);
JBUFFER_TRACE(jh, "remove from checkpoint list");
ret = __jbd2_journal_remove_checkpoint(jh) + 1;
- BUFFER_TRACE(bh, "release");
- __brelse(bh);
}
return ret;
}
@@ -216,7 +213,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
struct buffer_head *bh;
transaction_t *transaction;
tid_t this_tid;
- int result, batch_count = 0, done = 0;
+ int result, batch_count = 0;

jbd_debug(1, "Start checkpoint\n");

@@ -291,11 +288,9 @@ restart:
if (!buffer_dirty(bh)) {
if (unlikely(buffer_write_io_error(bh)) && !result)
result = -EIO;
- get_bh(bh);
BUFFER_TRACE(bh, "remove from checkpoint");
__jbd2_journal_remove_checkpoint(jh);
spin_unlock(&journal->j_list_lock);
- __brelse(bh);
goto retry;
}
/*
@@ -338,7 +333,7 @@ restart2:
transaction->t_tid != this_tid)
goto out;

- while (!done && transaction->t_checkpoint_io_list) {
+ while (transaction->t_checkpoint_io_list) {
jh = transaction->t_checkpoint_io_list;
bh = jh2bh(jh);
get_bh(bh);
@@ -359,8 +354,9 @@ restart2:
* know that it has been written out and so we can
* drop it from the list
*/
- done = __jbd2_journal_remove_checkpoint(jh);
__brelse(bh);
+ if (__jbd2_journal_remove_checkpoint(jh))
+ break;
}
out:
spin_unlock(&journal->j_list_lock);
--
2.1.0


2014-09-02 22:47:03

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH 2/2] jbd2: optimize jbd2_log_do_checkpoint() a bit

From: Jan Kara <[email protected]>

When we discover written out buffer in transaction checkpoint list we
don't have to recheck validity of a transaction. Either this is the
last buffer in a transaction - and then we are done - or this isn't
and then we can just take another buffer from the checkpoint list
without dropping j_list_lock.

Signed-off-by: Jan Kara <[email protected]>
Signed-off-by: Theodore Ts'o <[email protected]>
---
fs/jbd2/checkpoint.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index cb6e17c..7713f94 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -289,9 +289,10 @@ restart:
if (unlikely(buffer_write_io_error(bh)) && !result)
result = -EIO;
BUFFER_TRACE(bh, "remove from checkpoint");
- __jbd2_journal_remove_checkpoint(jh);
- spin_unlock(&journal->j_list_lock);
- goto retry;
+ if (__jbd2_journal_remove_checkpoint(jh))
+ /* The transaction was released; we're done */
+ goto out;
+ continue;
}
/*
* Important: we are about to write the buffer, and
--
2.1.0


2014-09-03 07:54:39

by Yuanhan Liu

[permalink] [raw]
Subject: Re: [PATCH 1/2] jbd2: don't call get_bh() before calling __jbd2_journal_remove_checkpoint()

<resend due to malformed email>

On Wed, Sep 3, 2014 at 6:46 AM, Theodore Ts'o <[email protected]> wrote:
> The __jbd2_journal_remove_checkpoint() doesn't require an elevated
> b_count; indeed, until the jh structure gets released by the call to
> jbd2_journal_put_journal_head(), the bh's b_count is elevated by
> virtue of the existence of the jh structure.
>
> Suggested-by: Jan Kara <[email protected]>
> Signed-off-by: Theodore Ts'o <[email protected]>
> ---
> fs/jbd2/checkpoint.c | 12 ++++--------
> 1 file changed, 4 insertions(+), 8 deletions(-)
>
> diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
> index 22fcd50..cb6e17c 100644
> --- a/fs/jbd2/checkpoint.c
> +++ b/fs/jbd2/checkpoint.c
> @@ -100,11 +100,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
> * Get our reference so that bh cannot be freed before
> * we unlock it
> */

I guess you need drop those comments as well.

--yliu
> - get_bh(bh);
> JBUFFER_TRACE(jh, "remove from checkpoint list");
> ret = __jbd2_journal_remove_checkpoint(jh) + 1;
> - BUFFER_TRACE(bh, "release");
> - __brelse(bh);
> }
> return ret;

2014-09-03 16:03:31

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH] jbd2: Optimize jbd2_log_do_checkpoint() a bit

On Tue 02-09-14 17:59:30, Ted Tso wrote:
> On Tue, Sep 02, 2014 at 07:18:30PM +0200, Jan Kara wrote:
> > When we discover written out buffer in transaction checkpoint list we
> > don't have to recheck validity of a transaction. Either this is the last
> > buffer in a transaction - and then we are done - or this isn't and then
> > we can just take another buffer from the checkpoint list without
> > dropping j_list_lock.
> >
> > Signed-off-by: Jan Kara <[email protected]>
> > ---
> > fs/jbd2/checkpoint.c | 13 ++++++++-----
> > 1 file changed, 8 insertions(+), 5 deletions(-)
> >
> > diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
> > index 993a187527f3..3722e2e53638 100644
> > --- a/fs/jbd2/checkpoint.c
> > +++ b/fs/jbd2/checkpoint.c
> > @@ -343,12 +343,15 @@ restart:
> > if (!buffer_dirty(bh)) {
> > if (unlikely(buffer_write_io_error(bh)) && !result)
> > result = -EIO;
> > - get_bh(bh);
> > BUFFER_TRACE(bh, "remove from checkpoint");
> > - __jbd2_journal_remove_checkpoint(jh);
> > - spin_unlock(&journal->j_list_lock);
> > - __brelse(bh);
>
> Currently, all of the places which call
> __jbd2_jouranl_remove_checkpoint(jh) are doing so with an elevated
> b_count. For example, see __try_to_free_cp_buf().
I did a bit of archeology and commit
932bb305ba2a01cd62809644d569f004e77a4355 removed the need to hold buffer
reference when calling __jbd2_journal_remove_checkpoint(). So it should be
safe to remove that reference handling also from __try_to_free_cp_buf().

> After doing a lot of desk checking, I can't see any reason for holding
> the elevanted b_count, so I think it should be to remove it, but then
> we can simplify the other uses __try_to_free_cp_buf(). For example,
> in the loop that I folded from __wait_cp_io, we could drop the done
> variable and change:
>
> done = __jbd2_journal_remove_checkpoint(jh);
> __brelse(bh);
>
> to this:
>
> __brelse(bh);
> if (__jbd2_journal_remove_checkpoint(jh))
> break;
Well, we don't even need to grab bh reference unless we find the buffer
is locked and are going to wait for it. And yes, we can get rid of that
'done' variable.

> How much testing have you done of this optimization? I'm tempted to
> try nuking all of the elevated b_counts around the call to
> __jbd2_journal_remove_checkpoint(), and then doing a test to see if
> anything blows up.
Honestly, I didn't test much but I'm pretty confident we are safe to
remove those bh refs ;)

Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR

2014-09-03 16:08:39

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 1/2] jbd2: don't call get_bh() before calling __jbd2_journal_remove_checkpoint()

On Tue 02-09-14 18:46:39, Ted Tso wrote:
> The __jbd2_journal_remove_checkpoint() doesn't require an elevated
> b_count; indeed, until the jh structure gets released by the call to
> jbd2_journal_put_journal_head(), the bh's b_count is elevated by
> virtue of the existence of the jh structure.
>
> Suggested-by: Jan Kara <[email protected]>
> Signed-off-by: Theodore Ts'o <[email protected]>
Looks good so you can add:
Reviewed-by: Jan Kara <[email protected]>

Just we can do a bit more as I mentioned in my other email:

> @@ -359,8 +354,9 @@ restart2:
> * know that it has been written out and so we can
> * drop it from the list
> */
> - done = __jbd2_journal_remove_checkpoint(jh);
> __brelse(bh);
Here we don't need to grab bh reference unless we are going to call
wait_on_buffer(). Which moves get_bh / __brelse out of fast path.

> + if (__jbd2_journal_remove_checkpoint(jh))
> + break;
> }
> out:
> spin_unlock(&journal->j_list_lock);

Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR

2014-09-03 17:30:05

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 1/2] jbd2: don't call get_bh() before calling __jbd2_journal_remove_checkpoint()

On Wed, Sep 03, 2014 at 03:48:56PM +0800, Yuanhan Liu wrote:
> > * Get our reference so that bh cannot be freed before
> > * we unlock it
> > */
>
> I guess you need remove those comments as well.

Good catch, thanks.

- Ted

2014-09-03 18:38:41

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH -v2] jbd2: don't call get_bh() before calling __jbd2_journal_remove_checkpoint()

The __jbd2_journal_remove_checkpoint() doesn't require an elevated
b_count; indeed, until the jh structure gets released by the call to
jbd2_journal_put_journal_head(), the bh's b_count is elevated by
virtue of the existence of the jh structure.

Suggested-by: Jan Kara <[email protected]>
Reviewed-by: Jan Kara <[email protected]>
Signed-off-by: Theodore Ts'o <[email protected]>
---
fs/jbd2/checkpoint.c | 19 +++++--------------
1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 18c7a8d..90d6091 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -96,15 +96,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)

if (jh->b_transaction == NULL && !buffer_locked(bh) &&
!buffer_dirty(bh) && !buffer_write_io_error(bh)) {
- /*
- * Get our reference so that bh cannot be freed before
- * we unlock it
- */
- get_bh(bh);
JBUFFER_TRACE(jh, "remove from checkpoint list");
ret = __jbd2_journal_remove_checkpoint(jh) + 1;
- BUFFER_TRACE(bh, "release");
- __brelse(bh);
}
return ret;
}
@@ -216,7 +209,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
struct buffer_head *bh;
transaction_t *transaction;
tid_t this_tid;
- int result, batch_count = 0, done = 0;
+ int result, batch_count = 0;

jbd_debug(1, "Start checkpoint\n");

@@ -291,11 +284,9 @@ restart:
if (!buffer_dirty(bh)) {
if (unlikely(buffer_write_io_error(bh)) && !result)
result = -EIO;
- get_bh(bh);
BUFFER_TRACE(bh, "remove from checkpoint");
__jbd2_journal_remove_checkpoint(jh);
spin_unlock(&journal->j_list_lock);
- __brelse(bh);
goto retry;
}
/*
@@ -338,12 +329,12 @@ restart2:
transaction->t_tid != this_tid)
goto out;

- while (!done && transaction->t_checkpoint_io_list) {
+ while (transaction->t_checkpoint_io_list) {
jh = transaction->t_checkpoint_io_list;
bh = jh2bh(jh);
- get_bh(bh);
if (buffer_locked(bh)) {
spin_unlock(&journal->j_list_lock);
+ get_bh(bh);
wait_on_buffer(bh);
/* the journal_head may have gone by now */
BUFFER_TRACE(bh, "brelse");
@@ -359,8 +350,8 @@ restart2:
* know that it has been written out and so we can
* drop it from the list
*/
- done = __jbd2_journal_remove_checkpoint(jh);
- __brelse(bh);
+ if (__jbd2_journal_remove_checkpoint(jh))
+ break;
}
out:
spin_unlock(&journal->j_list_lock);
--
2.1.0