2011-08-14 02:53:42

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] jbd2: instrument jh on wrong transaction BUG_ON's

These assertions have been reported as being tripped. Instrument them
so we get more information about what might be going on.

Cc: [email protected]
Signed-off-by: "Theodore Ts'o" <[email protected]>
---

Curt, you might want to see if you can replicate that jbd2 crash with
this patch applied. Hopefully we'll get more information about what is
going on.

fs/jbd2/transaction.c | 42 +++++++++++++++++++++++++++++++++++++-----
1 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 2d71094..693fe63 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1093,8 +1093,19 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
*/
if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
JBUFFER_TRACE(jh, "fastpath");
- J_ASSERT_JH(jh, jh->b_transaction ==
- journal->j_running_transaction);
+ if (unlikely(jh->b_transaction !=
+ journal->j_running_transaction)) {
+ printk(KERN_EMERG "JBD: %s: "
+ "jh->b_transaction (%llu, %p, %u) != "
+ "journal->j_running_transaction (%p, %u)",
+ journal->j_devname,
+ (unsigned long long) bh->b_blocknr,
+ jh->b_transaction,
+ jh->b_transaction->t_tid,
+ journal->j_running_transaction,
+ journal->j_running_transaction->t_tid);
+ BUG_ON(1);
+ }
goto out_unlock_bh;
}

@@ -1108,9 +1119,30 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
*/
if (jh->b_transaction != transaction) {
JBUFFER_TRACE(jh, "already on other transaction");
- J_ASSERT_JH(jh, jh->b_transaction ==
- journal->j_committing_transaction);
- J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
+ if (unlikely(jh->b_transaction !=
+ journal->j_committing_transaction)) {
+ printk(KERN_EMERG "JBD: %s: "
+ "jh->b_transaction (%llu, %p, %u) != "
+ "journal->j_committing_transaction (%p, %u)",
+ journal->j_devname,
+ (unsigned long long) bh->b_blocknr,
+ jh->b_transaction,
+ jh->b_transaction->t_tid,
+ journal->j_committing_transaction,
+ journal->j_committing_transaction->t_tid);
+ BUG_ON(1);
+ }
+ if (unlikely(jh->b_next_transaction != transaction)) {
+ printk(KERN_EMERG "JBD: %s: "
+ "jh->b_next_transaction (%llu, %p, %u) != "
+ "transaction (%p, %u)",
+ journal->j_devname,
+ (unsigned long long) bh->b_blocknr,
+ jh->b_next_transaction,
+ jh->b_next_transaction->t_tid,
+ transaction, transaction->t_tid);
+ BUG_ON(1);
+ }
/* And this case is illegal: we can't reuse another
* transaction's data buffer, ever. */
goto out_unlock_bh;
--
1.7.4.1.22.gec8e1.dirty



2011-08-14 20:01:06

by Curt Wohlgemuth

[permalink] [raw]
Subject: Re: [PATCH] jbd2: instrument jh on wrong transaction BUG_ON's

Hi Ted:

Unfortunately, some deref in your printk didn't go over too well:

[ 970.871166] BUG: unable to handle kernel NULL pointer dereference
at 0000000000000008
[ 970.872139] IP: [<ffffffff812a158e>] jbd2_journal_dirty_metadata+0x18e/0x220
[ 970.872139] PGD 857b8d067 PUD 852f26067 PMD 0
[ 970.872139] Oops: 0000 [#1] SMP
[ 970.872139] CPU 14
...
[ 970.872139] Call Trace:
[ 970.872139] [<ffffffff812a9d26>] ? jbd2_journal_add_journal_head+0x86/0x290
[ 970.872139] [<ffffffff8125c87c>] ? __ext4_journalled_writepage+0xac/0x390
[ 970.872139] [<ffffffff812848b3>] __ext4_handle_dirty_metadata+0x83/0x120
[ 970.872139] [<ffffffff81284c1e>] ? __ext4_journal_get_write_access+0x3e/0x80
[ 970.872139] [<ffffffff8125cac8>] __ext4_journalled_writepage+0x2f8/0x390
[ 970.872139] [<ffffffff8125e264>] ext4_writepage+0x234/0x2f0
[ 970.872139] [<ffffffff81155ad7>] __writepage+0x17/0x40
...

where the IP corresponds to:

(gdb) list *0xffffffff812a158e
0xffffffff812a158e is in jbd2_journal_dirty_metadata
(fs/jbd2/transaction.c:1124).
1119 */
1120 if (jh->b_transaction != transaction) {
1121 JBUFFER_TRACE(jh, "already on other transaction");
1122 if (unlikely(jh->b_transaction !=
1123 journal->j_committing_transaction)) {
1124 printk(KERN_EMERG "JBD: %s: "
1125 "jh->b_transaction (%llu, %p, %u) != "
1126
"journal->j_committing_transaction (%p, %u)",
1127 journal->j_devname,
1128 (unsigned long long) bh->b_blocknr,

It seems that jh->b_transaction is NULL, and deferencing it to print
out jh->b_transaction->t_tid causes the NULL pointer deref.

Curt

On Sat, Aug 13, 2011 at 7:53 PM, Theodore Ts'o <[email protected]> wrote:
> These assertions have been reported as being tripped. ?Instrument them
> so we get more information about what might be going on.
>
> Cc: [email protected]
> Signed-off-by: "Theodore Ts'o" <[email protected]>
> ---
>
> Curt, you might want to see if you can replicate that jbd2 crash with
> this patch applied. ?Hopefully we'll get more information about what is
> going on.
>
> ?fs/jbd2/transaction.c | ? 42 +++++++++++++++++++++++++++++++++++++-----
> ?1 files changed, 37 insertions(+), 5 deletions(-)
>
> diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
> index 2d71094..693fe63 100644
> --- a/fs/jbd2/transaction.c
> +++ b/fs/jbd2/transaction.c
> @@ -1093,8 +1093,19 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
> ? ? ? ? */
> ? ? ? ?if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
> ? ? ? ? ? ? ? ?JBUFFER_TRACE(jh, "fastpath");
> - ? ? ? ? ? ? ? J_ASSERT_JH(jh, jh->b_transaction ==
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? journal->j_running_transaction);
> + ? ? ? ? ? ? ? if (unlikely(jh->b_transaction !=
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_running_transaction)) {
> + ? ? ? ? ? ? ? ? ? ? ? printk(KERN_EMERG "JBD: %s: "
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"jh->b_transaction (%llu, %p, %u) != "
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"journal->j_running_transaction (%p, %u)",
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_devname,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?(unsigned long long) bh->b_blocknr,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?jh->b_transaction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?jh->b_transaction->t_tid,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_running_transaction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_running_transaction->t_tid);
> + ? ? ? ? ? ? ? ? ? ? ? BUG_ON(1);
> + ? ? ? ? ? ? ? }
> ? ? ? ? ? ? ? ?goto out_unlock_bh;
> ? ? ? ?}
>
> @@ -1108,9 +1119,30 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
> ? ? ? ? */
> ? ? ? ?if (jh->b_transaction != transaction) {
> ? ? ? ? ? ? ? ?JBUFFER_TRACE(jh, "already on other transaction");
> - ? ? ? ? ? ? ? J_ASSERT_JH(jh, jh->b_transaction ==
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? journal->j_committing_transaction);
> - ? ? ? ? ? ? ? J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
> + ? ? ? ? ? ? ? if (unlikely(jh->b_transaction !=
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_committing_transaction)) {
> + ? ? ? ? ? ? ? ? ? ? ? printk(KERN_EMERG "JBD: %s: "
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"jh->b_transaction (%llu, %p, %u) != "
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"journal->j_committing_transaction (%p, %u)",
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_devname,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?(unsigned long long) bh->b_blocknr,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?jh->b_transaction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?jh->b_transaction->t_tid,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_committing_transaction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_committing_transaction->t_tid);
> + ? ? ? ? ? ? ? ? ? ? ? BUG_ON(1);
> + ? ? ? ? ? ? ? }
> + ? ? ? ? ? ? ? if (unlikely(jh->b_next_transaction != transaction)) {
> + ? ? ? ? ? ? ? ? ? ? ? printk(KERN_EMERG "JBD: %s: "
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"jh->b_next_transaction (%llu, %p, %u) != "
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"transaction (%p, %u)",
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_devname,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?(unsigned long long) bh->b_blocknr,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?jh->b_next_transaction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?jh->b_next_transaction->t_tid,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?transaction, transaction->t_tid);
> + ? ? ? ? ? ? ? ? ? ? ? BUG_ON(1);
> + ? ? ? ? ? ? ? }
> ? ? ? ? ? ? ? ?/* And this case is illegal: we can't reuse another
> ? ? ? ? ? ? ? ? * transaction's data buffer, ever. */
> ? ? ? ? ? ? ? ?goto out_unlock_bh;
> --
> 1.7.4.1.22.gec8e1.dirty
>
>

2011-08-15 02:06:57

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH] jbd2: instrument jh on wrong transaction BUG_ON's

On Sun, Aug 14, 2011 at 01:01:01PM -0700, Curt Wohlgemuth wrote:
>
> It seems that jh->b_transaction is NULL, and deferencing it to print
> out jh->b_transaction->t_tid causes the NULL pointer deref.

Hmm, if jh->b_transaction is null, then
jbd2_journal_get_write_access() must not have been called on the
journal_head. But the relevant code from ext4_journalled_writepage() is:

ret = walk_page_buffers(handle, page_bufs, 0, len, NULL,
do_journal_get_write_access);

err = walk_page_buffers(handle, page_bufs, 0, len, NULL,
write_end_fn);

(write_end_fn is the function which calls which ultimately calls
ext4_handle_dirty_metadata which then calls
jbd2_journal_dirty_metadata(), which is where you're seeing the BUG_ON).

I suspect what is going on is do_get_write_access() is returning an
error, which means we never set jh->b_transaction. Hence when we call
jbd2_journal_data_metadata(), we trigger the BUG.

OK, what could cause do_get_write_access() to return an error? Two
conditions: if the handle is aborted (due to a previous error), in
which case it returns -EROFS, or if it can't get the memory needed
make a copy of the buffer, in which case there should have been a
"do_get_write_access: OOM for frozen buffer" error message earlier in
the log. (No, it's not prefixed with JBD2 --- it probably should be.)

Any chance you're seeing any indication of either possibility in the
messages before the BUG message?

- Ted

2011-08-15 02:12:02

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH -v2] jbd2: instrument jh on wrong transaction BUG_ON's

These assertions have been reported as being tripped. Instrument them
so we get more information about what might be going on.

Cc: [email protected]
Signed-off-by: "Theodore Ts'o" <[email protected]>
---
fs/jbd2/transaction.c | 46 +++++++++++++++++++++++++++++++++++++++++-----
1 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 2d71094..a242f6f 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1093,8 +1093,20 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
*/
if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
JBUFFER_TRACE(jh, "fastpath");
- J_ASSERT_JH(jh, jh->b_transaction ==
- journal->j_running_transaction);
+ if (unlikely(jh->b_transaction !=
+ journal->j_running_transaction)) {
+ printk(KERN_EMERG "JBD: %s: "
+ "jh->b_transaction (%llu, %p, %u) != "
+ "journal->j_running_transaction (%p, %u)",
+ journal->j_devname,
+ (unsigned long long) bh->b_blocknr,
+ jh->b_transaction,
+ jh->b_transaction ? jh->b_transaction->t_tid : 0,
+ journal->j_running_transaction,
+ journal->j_running_transaction ?
+ journal->j_running_transaction->t_tid : 0);
+ BUG_ON(1);
+ }
goto out_unlock_bh;
}

@@ -1108,9 +1120,33 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
*/
if (jh->b_transaction != transaction) {
JBUFFER_TRACE(jh, "already on other transaction");
- J_ASSERT_JH(jh, jh->b_transaction ==
- journal->j_committing_transaction);
- J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
+ if (unlikely(jh->b_transaction !=
+ journal->j_committing_transaction)) {
+ printk(KERN_EMERG "JBD: %s: "
+ "jh->b_transaction (%llu, %p, %u) != "
+ "journal->j_committing_transaction (%p, %u)",
+ journal->j_devname,
+ (unsigned long long) bh->b_blocknr,
+ jh->b_transaction,
+ jh->b_transaction ? jh->b_transaction->t_tid : 0,
+ journal->j_committing_transaction,
+ journal->j_committing_transaction ?
+ journal->j_committing_transaction->t_tid : 0);
+ BUG_ON(1);
+ }
+ if (unlikely(jh->b_next_transaction != transaction)) {
+ printk(KERN_EMERG "JBD: %s: "
+ "jh->b_next_transaction (%llu, %p, %u) != "
+ "transaction (%p, %u)",
+ journal->j_devname,
+ (unsigned long long) bh->b_blocknr,
+ jh->b_next_transaction,
+ jh->b_next_transaction ?
+ jh->b_next_transaction->t_tid : 0,
+ transaction,
+ transaction ? transaction->t_tid : 0);
+ BUG_ON(1);
+ }
/* And this case is illegal: we can't reuse another
* transaction's data buffer, ever. */
goto out_unlock_bh;
--
1.7.4.1.22.gec8e1.dirty


2011-08-15 16:18:18

by Curt Wohlgemuth

[permalink] [raw]
Subject: Re: [PATCH -v2] jbd2: instrument jh on wrong transaction BUG_ON's

Hi Ted:

With your latest patch, here's the console output. And no, there are
no interesting messages prior to the crash.

[ 727.933568] general protection fault: 0000 [#1] SMP
[ 727.934001] CPU 2
[ 727.934001] Modules linked in: loop tg3 msr cpuid ipv6 genrtc
[ 727.934001]
[ 727.934001] Pid: 9238, comm: flush-8:16 Not tainted 3.1.0-smp-ext4_test
[ 727.934001] RIP: 0010:[<ffffffff812a159f>] [<ffffffff812a159f>]
jbd2_journal_dirty_metadata+0x19f/0x240
...
[ 727.934001] Process flush-8:16 (pid: 9238, threadinfo
ffff88076c8ec000, task ffff880852529800)
[ 727.934001] Stack:
[ 727.934001] ffff88076c8ed840 ffffffff812a9d46 ffffffff8125c87c
ffff8808577de540
[ 727.934001] ffff880641c32850 0000000000000000 ffff880641c32850
ffff880663985958
[ 727.934001] 000000000000033d ffffffff81640bea ffff88076c8ed8a0
ffffffff812848b3
[ 727.934001] Call Trace:
[ 727.934001] [<ffffffff812a9d46>] ? jbd2_journal_add_journal_head+0x86/0x290
[ 727.934001] [<ffffffff8125c87c>] ? __ext4_journalled_writepage+0xac/0x390
[ 727.934001] [<ffffffff812848b3>] __ext4_handle_dirty_metadata+0x83/0x120
[ 727.934001] [<ffffffff81284c1e>] ? __ext4_journal_get_write_access+0x3e/0x80
[ 727.934001] [<ffffffff8125cac8>] __ext4_journalled_writepage+0x2f8/0x390
[ 727.934001] [<ffffffff8125e264>] ext4_writepage+0x234/0x2f0
[ 727.934001] [<ffffffff81155ad7>] __writepage+0x17/0x40
[ 727.934001] [<ffffffff811574ae>] write_cache_pages+0x29e/0x6d0
[ 727.934001] [<ffffffff813ec4d7>] ? scsi_request_fn+0xd7/0x580
[ 727.934001] [<ffffffff81155ac0>] ? set_page_dirty+0x70/0x70
[ 727.934001] [<ffffffff81157931>] generic_writepages+0x51/0x80
...

where 0xffffffff812a159f is in the printk() , line 1126; it looks like
journal->j_committing_transaction is not NULL, but isn't valid either:

0xffffffff812a1597 <+407>: xor %r8d,%r8d ; put 0 into %r8d
0xffffffff812a159a <+410>: test %rcx,%rcx ; %rcx is
journal->j_committing_transaction, I think
0xffffffff812a159d <+413>: je 0xffffffff812a15a3
<jbd2_journal_dirty_metadata+419> ; if 0, jump over next instruction
0xffffffff812a159f <+415>: mov 0x8(%rcx),%r8d ; put ->t_tid into %r8d

I reformatted/mounted both the main and scratch devices before running
the test, and

export EXT_MOUNT_OPTIONS="-o block_validity,data=journal"
export MKFS_OPTIONS="-t ext4 -O has_journal"

Curt

On Sun, Aug 14, 2011 at 7:11 PM, Theodore Ts'o <[email protected]> wrote:
> These assertions have been reported as being tripped. ?Instrument them
> so we get more information about what might be going on.
>
> Cc: [email protected]
> Signed-off-by: "Theodore Ts'o" <[email protected]>
> ---
> ?fs/jbd2/transaction.c | ? 46 +++++++++++++++++++++++++++++++++++++++++-----
> ?1 files changed, 41 insertions(+), 5 deletions(-)
>
> diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
> index 2d71094..a242f6f 100644
> --- a/fs/jbd2/transaction.c
> +++ b/fs/jbd2/transaction.c
> @@ -1093,8 +1093,20 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
> ? ? ? ? */
> ? ? ? ?if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
> ? ? ? ? ? ? ? ?JBUFFER_TRACE(jh, "fastpath");
> - ? ? ? ? ? ? ? J_ASSERT_JH(jh, jh->b_transaction ==
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? journal->j_running_transaction);
> + ? ? ? ? ? ? ? if (unlikely(jh->b_transaction !=
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_running_transaction)) {
> + ? ? ? ? ? ? ? ? ? ? ? printk(KERN_EMERG "JBD: %s: "
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"jh->b_transaction (%llu, %p, %u) != "
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"journal->j_running_transaction (%p, %u)",
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_devname,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?(unsigned long long) bh->b_blocknr,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?jh->b_transaction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?jh->b_transaction ? jh->b_transaction->t_tid : 0,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_running_transaction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_running_transaction ?
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_running_transaction->t_tid : 0);
> + ? ? ? ? ? ? ? ? ? ? ? BUG_ON(1);
> + ? ? ? ? ? ? ? }
> ? ? ? ? ? ? ? ?goto out_unlock_bh;
> ? ? ? ?}
>
> @@ -1108,9 +1120,33 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
> ? ? ? ? */
> ? ? ? ?if (jh->b_transaction != transaction) {
> ? ? ? ? ? ? ? ?JBUFFER_TRACE(jh, "already on other transaction");
> - ? ? ? ? ? ? ? J_ASSERT_JH(jh, jh->b_transaction ==
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? journal->j_committing_transaction);
> - ? ? ? ? ? ? ? J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
> + ? ? ? ? ? ? ? if (unlikely(jh->b_transaction !=
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_committing_transaction)) {
> + ? ? ? ? ? ? ? ? ? ? ? printk(KERN_EMERG "JBD: %s: "
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"jh->b_transaction (%llu, %p, %u) != "
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"journal->j_committing_transaction (%p, %u)",
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_devname,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?(unsigned long long) bh->b_blocknr,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?jh->b_transaction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?jh->b_transaction ? jh->b_transaction->t_tid : 0,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_committing_transaction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_committing_transaction ?
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_committing_transaction->t_tid : 0);
> + ? ? ? ? ? ? ? ? ? ? ? BUG_ON(1);
> + ? ? ? ? ? ? ? }
> + ? ? ? ? ? ? ? if (unlikely(jh->b_next_transaction != transaction)) {
> + ? ? ? ? ? ? ? ? ? ? ? printk(KERN_EMERG "JBD: %s: "
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"jh->b_next_transaction (%llu, %p, %u) != "
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?"transaction (%p, %u)",
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?journal->j_devname,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?(unsigned long long) bh->b_blocknr,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?jh->b_next_transaction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?jh->b_next_transaction ?
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?jh->b_next_transaction->t_tid : 0,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?transaction,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?transaction ? transaction->t_tid : 0);
> + ? ? ? ? ? ? ? ? ? ? ? BUG_ON(1);
> + ? ? ? ? ? ? ? }
> ? ? ? ? ? ? ? ?/* And this case is illegal: we can't reuse another
> ? ? ? ? ? ? ? ? * transaction's data buffer, ever. */
> ? ? ? ? ? ? ? ?goto out_unlock_bh;
> --
> 1.7.4.1.22.gec8e1.dirty
>
>

2011-08-26 02:35:18

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH -v3] jbd2: add debugging information to jbd2_journal_dirty_metadata()

Add debugging information in case jbd2_journal_dirty_metadata() is
called with a buffer_head which didn't have
jbd2_journal_get_write_access() called on it, or if the journal_head
has the wrong transaction in it. In addition, return an error code.
This won't change anything for ocfs2, which will BUG_ON() the non-zero
exit code.

For ext4, the caller of this function is ext4_handle_dirty_metadata(),
and on seeing a non-zero return code, will call
ext4_journal_abort_handle(), which will print the function and line
number of the calling function that screwed up and abort the journal.
This will allow us to recover instead of bug halting, which is better
from a robustness and reliability point of view.

Cc: [email protected]
Signed-off-by: "Theodore Ts'o" <[email protected]>
---

This is a revised version of a patch originally called "jbd2: instrument
jh on wrong transaction BUG_ON's". I now believe that what is going on
is that the bug is caused by jbd2_journal_dirty_metadata() being called
on a buffer head that did not have jbd2_journal_get_write_access()
called on it first. In that case the BH_JBD flag will not be set in
b_state, and b_private is undefined, but usually zero. That would
explain some of the crashes that we have seen. This patch makes
jbd2_journal_dirty_metadata() more robust, in that it will detect any
potential programming errors of this type, and in such a way that the it
will be easy to determine which function was at fault.

fs/jbd2/transaction.c | 59 ++++++++++++++++++++++++++++++++++++++++++++-----
1 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 2d71094..8c640f0 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1049,6 +1049,10 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
* mark dirty metadata which needs to be journaled as part of the current
* transaction.
*
+ * The buffer must have previously had jbd2_journal_get_write_access()
+ * called so that it has a valid journal_head attached to the buffer
+ * head.
+ *
* The buffer is placed on the transaction's metadata list and is marked
* as belonging to the transaction.
*
@@ -1065,11 +1069,16 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal;
struct journal_head *jh = bh2jh(bh);
+ int ret = 0;

jbd_debug(5, "journal_head %p\n", jh);
JBUFFER_TRACE(jh, "entry");
if (is_handle_aborted(handle))
goto out;
+ if (!buffer_jbd(bh)) {
+ ret = -EUCLEAN;
+ goto out;
+ }

jbd_lock_bh_state(bh);

@@ -1093,8 +1102,20 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
*/
if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
JBUFFER_TRACE(jh, "fastpath");
- J_ASSERT_JH(jh, jh->b_transaction ==
- journal->j_running_transaction);
+ if (unlikely(jh->b_transaction !=
+ journal->j_running_transaction)) {
+ printk(KERN_EMERG "JBD: %s: "
+ "jh->b_transaction (%llu, %p, %u) != "
+ "journal->j_running_transaction (%p, %u)",
+ journal->j_devname,
+ (unsigned long long) bh->b_blocknr,
+ jh->b_transaction,
+ jh->b_transaction ? jh->b_transaction->t_tid : 0,
+ journal->j_running_transaction,
+ journal->j_running_transaction ?
+ journal->j_running_transaction->t_tid : 0);
+ ret = -EINVAL;
+ }
goto out_unlock_bh;
}

@@ -1108,9 +1129,33 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
*/
if (jh->b_transaction != transaction) {
JBUFFER_TRACE(jh, "already on other transaction");
- J_ASSERT_JH(jh, jh->b_transaction ==
- journal->j_committing_transaction);
- J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
+ if (unlikely(jh->b_transaction !=
+ journal->j_committing_transaction)) {
+ printk(KERN_EMERG "JBD: %s: "
+ "jh->b_transaction (%llu, %p, %u) != "
+ "journal->j_committing_transaction (%p, %u)",
+ journal->j_devname,
+ (unsigned long long) bh->b_blocknr,
+ jh->b_transaction,
+ jh->b_transaction ? jh->b_transaction->t_tid : 0,
+ journal->j_committing_transaction,
+ journal->j_committing_transaction ?
+ journal->j_committing_transaction->t_tid : 0);
+ ret = -EINVAL;
+ }
+ if (unlikely(jh->b_next_transaction != transaction)) {
+ printk(KERN_EMERG "JBD: %s: "
+ "jh->b_next_transaction (%llu, %p, %u) != "
+ "transaction (%p, %u)",
+ journal->j_devname,
+ (unsigned long long) bh->b_blocknr,
+ jh->b_next_transaction,
+ jh->b_next_transaction ?
+ jh->b_next_transaction->t_tid : 0,
+ transaction,
+ transaction ? transaction->t_tid : 0);
+ ret = -EINVAL;
+ }
/* And this case is illegal: we can't reuse another
* transaction's data buffer, ever. */
goto out_unlock_bh;
@@ -1127,7 +1172,9 @@ out_unlock_bh:
jbd_unlock_bh_state(bh);
out:
JBUFFER_TRACE(jh, "exit");
- return 0;
+ if (ret)
+ __WARN(); /* All errors are bugs, so dump the stack */
+ return ret;
}

/*
--
1.7.4.1.22.gec8e1.dirty