LinuxLists.cc - [PATCH 1/2] ext4: Prevent panic for destroyed devices

2016-02-16 10:36:22

Subject: [PATCH 1/2] ext4: Prevent panic for destroyed devices

Some devices(nbd) can becomes unoperatable via kill_bdev
so its pagecache will be invalidated and all buffers becomes unmapped.
In that situation we will trigger BUGON on submit_bh.

#Testcase
mkdir -p a/mnt
cd a
truncate -s 1G img
mkfs.ext4 -F img
qemu-nbd -c /dev/nbd0 img
mount /dev/nbd0 /mnt
cp -r /bin/ /mnt&
# Disconnect nbd while cp is active
qemu-nbd -d /dev/nbd0
sync

Signed-off-by: Dmitry Monakhov <[email protected]>
---
fs/ext4/super.c | 24 +++++++++++++++++++++---
1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3ed01ec..617d8b4a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4320,7 +4320,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
int error = 0;

- if (!sbh || block_device_ejected(sb))
+ if (!sbh || !buffer_mapped(sbh) || block_device_ejected(sb))
return error;
if (buffer_write_io_error(sbh)) {
/*
@@ -4368,8 +4368,26 @@ static int ext4_commit_super(struct super_block *sb, int sync)
ext4_superblock_csum_set(sb);
mark_buffer_dirty(sbh);
if (sync) {
- error = __sync_dirty_buffer(sbh,
- test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC);
+ lock_buffer(sbh);
+ /* superblock bh may be invalidated due to drive failure */
+ if (!buffer_mapped(sbh)) {
+ unlock_buffer(sbh);
+ ext4_msg(sb, KERN_ERR, "Can not write superblock "
+ "because disk cache was invalidated");
+ return -EIO;
+ }
+ if (test_clear_buffer_dirty(sbh)) {
+ get_bh(sbh);
+ sbh->b_end_io = end_buffer_write_sync;
+ error = submit_bh(test_opt(sb, BARRIER) ?
+ WRITE_FUA : WRITE_SYNC, sbh);
+ wait_on_buffer(sbh);
+ if (!error && !buffer_uptodate(sbh))
+ error = -EIO;
+ } else {
+ unlock_buffer(sbh);
+ }
+
if (error)
return error;

--
1.8.3.1

2016-02-16 10:36:26

by Dmitry Monakhov

[permalink] [raw]

Subject: [PATCH 2/2] jbd2: Prevent panic for destroyed devices

Some devices(nbd) can becomes unoperatable via kill_bdev
so its pagecache will be invalidated and all buffers becomes unmapped.
In that situation we will trigger BUGON on submit_bh.

#Testcase
mkdir -p a/mnt
cd a
truncate -s 1G img
mkfs.ext4 -F img
qemu-nbd -c /dev/nbd0 img
mount /dev/nbd0 /mnt
cp -r /bin/ /mnt&
# Disconnect nbd while cp is active
qemu-nbd -d /dev/nbd0

Signed-off-by: Dmitry Monakhov <[email protected]>
---
fs/jbd2/journal.c | 12 ++++++++++++
1 file changed, 12 insertions(+)

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 81e6226..f2760c8 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1333,6 +1333,18 @@ static int jbd2_write_superblock(journal_t *journal, int write_op)
if (!(journal->j_flags & JBD2_BARRIER))
write_op &= ~(REQ_FUA | REQ_FLUSH);
lock_buffer(bh);
+ /*
+ * Some disk drives may invalidate its page cache on failure
+ * so sbh becomes unmapped.
+ */
+ if (!buffer_mapped(bh)) {
+ /* Disk dissapeared under us, there is nothing we can
+ do but complain */
+ printk(KERN_ERR "JBD2: journal superblock was invalidated "
+ "for %s.\n", journal->j_devname);
+ return -EIO;
+ }
+
if (buffer_write_io_error(bh)) {
/*
* Oh, dear. A previous attempt to write the journal
--
1.8.3.1

2016-02-18 09:36:04

by Dmitry Monakhov

[permalink] [raw]

Subject: Re: [PATCH 1/2] ext4: Prevent panic for destroyed devices

Dmitry Monakhov <[email protected]> writes:

> Some devices(nbd) can becomes unoperatable via kill_bdev
> so its pagecache will be invalidated and all buffers becomes unmapped.
> In that situation we will trigger BUGON on submit_bh.
>
> #Testcase
> mkdir -p a/mnt
> cd a
> truncate -s 1G img
> mkfs.ext4 -F img
> qemu-nbd -c /dev/nbd0 img
> mount /dev/nbd0 /mnt
> cp -r /bin/ /mnt&
> # Disconnect nbd while cp is active
> qemu-nbd -d /dev/nbd0
> sync

Hm...
Actually there are many places where we submit BHs w/o checks
nojournal code is full of such places.
It looks like we need some sort of AOP_TRUNCATE_PAGE error code
for generic buffer submission path

diff --git a/fs/buffer.c b/fs/buffer.c
index e1632ab..1cf94ec 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3127,6 +3127,10 @@ int __sync_dirty_buffer(struct buffer_head *bh, int rw)

WARN_ON(atomic_read(&bh->b_count) < 1);
lock_buffer(bh);
+ if(unlikely(!buffer_mapped(bh))) {
+ unlock_buffer(bh);
+ return AOP_TRUNCATE_PAGE; /* or EIO */
+ }
if (test_clear_buffer_dirty(bh)) {
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;

>
> Signed-off-by: Dmitry Monakhov <[email protected]>
> ---
> fs/ext4/super.c | 24 +++++++++++++++++++++---
> 1 file changed, 21 insertions(+), 3 deletions(-)
>
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 3ed01ec..617d8b4a 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -4320,7 +4320,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
> struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
> int error = 0;
>
> - if (!sbh || block_device_ejected(sb))
> + if (!sbh || !buffer_mapped(sbh) || block_device_ejected(sb))
> return error;
> if (buffer_write_io_error(sbh)) {
> /*
> @@ -4368,8 +4368,26 @@ static int ext4_commit_super(struct super_block *sb, int sync)
> ext4_superblock_csum_set(sb);
> mark_buffer_dirty(sbh);
> if (sync) {
> - error = __sync_dirty_buffer(sbh,
> - test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC);
> + lock_buffer(sbh);
> + /* superblock bh may be invalidated due to drive failure */
> + if (!buffer_mapped(sbh)) {
> + unlock_buffer(sbh);
> + ext4_msg(sb, KERN_ERR, "Can not write superblock "
> + "because disk cache was invalidated");
> + return -EIO;
> + }
> + if (test_clear_buffer_dirty(sbh)) {
> + get_bh(sbh);
> + sbh->b_end_io = end_buffer_write_sync;
> + error = submit_bh(test_opt(sb, BARRIER) ?
> + WRITE_FUA : WRITE_SYNC, sbh);
> + wait_on_buffer(sbh);
> + if (!error && !buffer_uptodate(sbh))
> + error = -EIO;
> + } else {
> + unlock_buffer(sbh);
> + }
> +
> if (error)
> return error;
>
> --
> 1.8.3.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

Attachments:

signature.asc (472.00 B)

2016-02-19 05:13:37

by Theodore Ts'o

[permalink] [raw]

Subject: Re: [PATCH 1/2] ext4: Prevent panic for destroyed devices

On Thu, Feb 18, 2016 at 12:35:34PM +0300, Dmitry Monakhov wrote:
> Hm...
> Actually there are many places where we submit BHs w/o checks
> nojournal code is full of such places.
> It looks like we need some sort of AOP_TRUNCATE_PAGE error code
> for generic buffer submission path

Might it be enough to simply return have submit_bh check for the
unmapped buffer and return EIO? I'm not sure we need a magic error
code....

That being said, I've always wished that there was a struct super
operation, s_block_device_gone() that would allow information about
the status of the block device to be communicated up the storage
stack. Another useful thing that might be useful is an indication
that for whatever reason, the storage device has turned read-only on
us (so we might as well remounte the file system read-only instead of
letting userspace continue to dirty pages that have no hope of getting
written out).

- Ted

2016-03-13 22:26:54

by Theodore Ts'o

[permalink] [raw]

Subject: Re: [PATCH 1/2] ext4: Prevent panic for destroyed devices

On Tue, Feb 16, 2016 at 02:36:04PM +0400, Dmitry Monakhov wrote:
> Some devices(nbd) can becomes unoperatable via kill_bdev
> so its pagecache will be invalidated and all buffers becomes unmapped.
> In that situation we will trigger BUGON on submit_bh.
>
> #Testcase
> mkdir -p a/mnt
> cd a
> truncate -s 1G img
> mkfs.ext4 -F img
> qemu-nbd -c /dev/nbd0 img
> mount /dev/nbd0 /mnt
> cp -r /bin/ /mnt&
> # Disconnect nbd while cp is active
> qemu-nbd -d /dev/nbd0
> sync
>
> Signed-off-by: Dmitry Monakhov <[email protected]>
> ---
> fs/ext4/super.c | 24 +++++++++++++++++++++---
> 1 file changed, 21 insertions(+), 3 deletions(-)
>
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 3ed01ec..617d8b4a 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -4320,7 +4320,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
> struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
> int error = 0;
>
> - if (!sbh || block_device_ejected(sb))
> + if (!sbh || !buffer_mapped(sbh) || block_device_ejected(sb))
> return error;
> if (buffer_write_io_error(sbh)) {
> /*

Hmm... error is zero here, so probably need change "return error" to "return EIO".

> @@ -4368,8 +4368,26 @@ static int ext4_commit_super(struct super_block *sb, int sync)
> ext4_superblock_csum_set(sb);
> mark_buffer_dirty(sbh);
> if (sync) {
> - error = __sync_dirty_buffer(sbh,
> - test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC);
> + lock_buffer(sbh);
> + /* superblock bh may be invalidated due to drive failure */
> + if (!buffer_mapped(sbh)) {
> + unlock_buffer(sbh);
> + ext4_msg(sb, KERN_ERR, "Can not write superblock "
> + "because disk cache was invalidated");
> + return -EIO;
> + }
> + if (test_clear_buffer_dirty(sbh)) {
> + get_bh(sbh);
> + sbh->b_end_io = end_buffer_write_sync;
> + error = submit_bh(test_opt(sb, BARRIER) ?
> + WRITE_FUA : WRITE_SYNC, sbh);
> + wait_on_buffer(sbh);
> + if (!error && !buffer_uptodate(sbh))
> + error = -EIO;
> + } else {
> + unlock_buffer(sbh);
> + }
> +

Instead of replicating all of __sync_dirty_buffer() here, we should
just add a check to __sync_dirty_buffer() and have it return EIO if
bh->bdev is NULL. (Which I think is a better check than checking
buffer_mapped).

- Ted

2016-03-13 22:28:08

by Theodore Ts'o

[permalink] [raw]

Subject: Re: [PATCH 2/2] jbd2: Prevent panic for destroyed devices

On Tue, Feb 16, 2016 at 02:36:05PM +0400, Dmitry Monakhov wrote:
> diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
> index 81e6226..f2760c8 100644
> --- a/fs/jbd2/journal.c
> +++ b/fs/jbd2/journal.c
> @@ -1333,6 +1333,18 @@ static int jbd2_write_superblock(journal_t *journal, int write_op)
> if (!(journal->j_flags & JBD2_BARRIER))
> write_op &= ~(REQ_FUA | REQ_FLUSH);
> lock_buffer(bh);
> + /*
> + * Some disk drives may invalidate its page cache on failure
> + * so sbh becomes unmapped.
> + */
> + if (!buffer_mapped(bh)) {

I think checking bh->bdev == NULL is a better check for this
condition.

> + /* Disk dissapeared under us, there is nothing we can

Spelling nit: "disappeared"

> + do but complain */
> + printk(KERN_ERR "JBD2: journal superblock was invalidated "
> + "for %s.\n", journal->j_devname);
> + return -EIO;
> + }
> +

This is missing a an "unlock_buffer(bh)" before we return.

> if (buffer_write_io_error(bh)) {
> /*
> * Oh, dear. A previous attempt to write the journal

- Ted