When ext3_write_begin fails after allocating some blocks or
generic_perform_write fails to copy data to write, we truncate blocks already
instantiated beyond i_size. Although these blocks were never inside i_size, we
have to truncate pagecache of these blocks so that corresponding buffers get
unmapped. Otherwise subsequent __block_prepare_write (called because we are
retrying the write) will find the buffers mapped, not call ->get_block, and
thus the page will be backed by already freed blocks leading to filesystem and
data corruption.
CC: [email protected]
Reported-by: James Y Knight <[email protected]>
Signed-off-by: Jan Kara <[email protected]>
---
fs/ext3/inode.c | 18 ++++++++++++++----
1 files changed, 14 insertions(+), 4 deletions(-)
I will take care of merging this patch. I'm just sending it for completeness...
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 354ed3b..f9d6937 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1151,6 +1151,16 @@ static int do_journal_get_write_access(handle_t *handle,
return ext3_journal_get_write_access(handle, bh);
}
+/*
+ * Truncate blocks that were not used by write. We have to truncate the
+ * pagecache as well so that corresponding buffers get properly unmapped.
+ */
+static void ext3_truncate_failed_write(struct inode *inode)
+{
+ truncate_inode_pages(inode->i_mapping, inode->i_size);
+ ext3_truncate(inode);
+}
+
static int ext3_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -1209,7 +1219,7 @@ write_begin_failed:
unlock_page(page);
page_cache_release(page);
if (pos + len > inode->i_size)
- ext3_truncate(inode);
+ ext3_truncate_failed_write(inode);
}
if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
goto retry;
@@ -1304,7 +1314,7 @@ static int ext3_ordered_write_end(struct file *file,
page_cache_release(page);
if (pos + len > inode->i_size)
- ext3_truncate(inode);
+ ext3_truncate_failed_write(inode);
return ret ? ret : copied;
}
@@ -1330,7 +1340,7 @@ static int ext3_writeback_write_end(struct file *file,
page_cache_release(page);
if (pos + len > inode->i_size)
- ext3_truncate(inode);
+ ext3_truncate_failed_write(inode);
return ret ? ret : copied;
}
@@ -1383,7 +1393,7 @@ static int ext3_journalled_write_end(struct file *file,
page_cache_release(page);
if (pos + len > inode->i_size)
- ext3_truncate(inode);
+ ext3_truncate_failed_write(inode);
return ret ? ret : copied;
}
--
1.6.4.2
When ext4_write_begin fails after allocating some blocks or
generic_perform_write fails to copy data to write, we truncate blocks already
instantiated beyond i_size. Although these blocks were never inside i_size, we
have to truncate pagecache of these blocks so that corresponding buffers get
unmapped. Otherwise subsequent __block_prepare_write (called because we are
retrying the write) will find the buffers mapped, not call ->get_block, and
thus the page will be backed by already freed blocks leading to filesystem and
data corruption.
CC: [email protected]
CC: [email protected]
Signed-off-by: Jan Kara <[email protected]>
---
fs/ext4/inode.c | 20 +++++++++++++++-----
1 files changed, 15 insertions(+), 5 deletions(-)
Ted, will you please merge this patch? Thanks.
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2c8caa5..18b9416 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1534,6 +1534,16 @@ static int do_journal_get_write_access(handle_t *handle,
return ext4_journal_get_write_access(handle, bh);
}
+/*
+ * Truncate blocks that were not used by write. We have to truncate the
+ * pagecache as well so that corresponding buffers get properly unmapped.
+ */
+static void ext4_truncate_failed_write(struct inode *inode)
+{
+ truncate_inode_pages(inode->i_mapping, inode->i_size);
+ ext4_truncate(inode);
+}
+
static int ext4_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -1599,7 +1609,7 @@ retry:
ext4_journal_stop(handle);
if (pos + len > inode->i_size) {
- ext4_truncate(inode);
+ ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might
* still be on the orphan list; we need to
@@ -1709,7 +1719,7 @@ static int ext4_ordered_write_end(struct file *file,
ret = ret2;
if (pos + len > inode->i_size) {
- ext4_truncate(inode);
+ ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might still be
* on the orphan list; we need to make sure the inode
@@ -1751,7 +1761,7 @@ static int ext4_writeback_write_end(struct file *file,
ret = ret2;
if (pos + len > inode->i_size) {
- ext4_truncate(inode);
+ ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might still be
* on the orphan list; we need to make sure the inode
@@ -1814,7 +1824,7 @@ static int ext4_journalled_write_end(struct file *file,
if (!ret)
ret = ret2;
if (pos + len > inode->i_size) {
- ext4_truncate(inode);
+ ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might still be
* on the orphan list; we need to make sure the inode
@@ -3091,7 +3101,7 @@ retry:
* i_size_read because we hold i_mutex.
*/
if (pos + len > inode->i_size)
- ext4_truncate(inode);
+ ext4_truncate_failed_write(inode);
}
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
--
1.6.4.2
On Wed, Dec 02, 2009 at 08:16:48PM +0100, Jan Kara wrote:
> When ext4_write_begin fails after allocating some blocks or
> generic_perform_write fails to copy data to write, we truncate blocks already
> instantiated beyond i_size. Although these blocks were never inside i_size, we
> have to truncate pagecache of these blocks so that corresponding buffers get
> unmapped. Otherwise subsequent __block_prepare_write (called because we are
> retrying the write) will find the buffers mapped, not call ->get_block, and
> thus the page will be backed by already freed blocks leading to filesystem and
> data corruption.
Added to the ext4 patch queue.
- Ted
Hi,
I came a cross data corruption bug when using ext3, this patch fixed
it. the bug exists in 2.6.31 and 32.
saeed
On Wed, Dec 2, 2009 at 9:16 PM, Jan Kara <[email protected]> wrote:
> When ext3_write_begin fails after allocating some blocks or
> generic_perform_write fails to copy data to write, we truncate blocks already
> instantiated beyond i_size. Although these blocks were never inside i_size, we
> have to truncate pagecache of these blocks so that corresponding buffers get
> unmapped. Otherwise subsequent __block_prepare_write (called because we are
> retrying the write) will find the buffers mapped, not call ->get_block, and
> thus the page will be backed by already freed blocks leading to filesystem and
> data corruption.
>
> CC: [email protected]
> Reported-by: James Y Knight <[email protected]>
> Signed-off-by: Jan Kara <[email protected]>
> ---
> fs/ext3/inode.c | 18 ++++++++++++++----
> 1 files changed, 14 insertions(+), 4 deletions(-)
>
> I will take care of merging this patch. I'm just sending it for completeness...
>
> diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
> index 354ed3b..f9d6937 100644
> --- a/fs/ext3/inode.c
> +++ b/fs/ext3/inode.c
> @@ -1151,6 +1151,16 @@ static int do_journal_get_write_access(handle_t *handle,
> return ext3_journal_get_write_access(handle, bh);
> }
>
> +/*
> + * Truncate blocks that were not used by write. We have to truncate the
> + * pagecache as well so that corresponding buffers get properly unmapped.
> + */
> +static void ext3_truncate_failed_write(struct inode *inode)
> +{
> + truncate_inode_pages(inode->i_mapping, inode->i_size);
> + ext3_truncate(inode);
> +}
> +
> static int ext3_write_begin(struct file *file, struct address_space *mapping,
> loff_t pos, unsigned len, unsigned flags,
> struct page **pagep, void **fsdata)
> @@ -1209,7 +1219,7 @@ write_begin_failed:
> unlock_page(page);
> page_cache_release(page);
> if (pos + len > inode->i_size)
> - ext3_truncate(inode);
> + ext3_truncate_failed_write(inode);
> }
> if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
> goto retry;
> @@ -1304,7 +1314,7 @@ static int ext3_ordered_write_end(struct file *file,
> page_cache_release(page);
>
> if (pos + len > inode->i_size)
> - ext3_truncate(inode);
> + ext3_truncate_failed_write(inode);
> return ret ? ret : copied;
> }
>
> @@ -1330,7 +1340,7 @@ static int ext3_writeback_write_end(struct file *file,
> page_cache_release(page);
>
> if (pos + len > inode->i_size)
> - ext3_truncate(inode);
> + ext3_truncate_failed_write(inode);
> return ret ? ret : copied;
> }
>
> @@ -1383,7 +1393,7 @@ static int ext3_journalled_write_end(struct file *file,
> page_cache_release(page);
>
> if (pos + len > inode->i_size)
> - ext3_truncate(inode);
> + ext3_truncate_failed_write(inode);
> return ret ? ret : copied;
> }
>
> --
> 1.6.4.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
Hi,
On Wed 09-12-09 17:42:12, saeed bishara wrote:
> I came a cross data corruption bug when using ext3, this patch fixed
> it. the bug exists in 2.6.31 and 32.
Yes, I plan to send the fix to [email protected] so that it gets fixed in
the stable releases for these kernels as well. Thanks for your notice.
Honza
> On Wed, Dec 2, 2009 at 9:16 PM, Jan Kara <[email protected]> wrote:
> > When ext3_write_begin fails after allocating some blocks or
> > generic_perform_write fails to copy data to write, we truncate blocks already
> > instantiated beyond i_size. Although these blocks were never inside i_size, we
> > have to truncate pagecache of these blocks so that corresponding buffers get
> > unmapped. Otherwise subsequent __block_prepare_write (called because we are
> > retrying the write) will find the buffers mapped, not call ->get_block, and
> > thus the page will be backed by already freed blocks leading to filesystem and
> > data corruption.
> >
> > CC: [email protected]
> > Reported-by: James Y Knight <[email protected]>
> > Signed-off-by: Jan Kara <[email protected]>
> > ---
> > ?fs/ext3/inode.c | ? 18 ++++++++++++++----
> > ?1 files changed, 14 insertions(+), 4 deletions(-)
> >
> > I will take care of merging this patch. I'm just sending it for completeness...
> >
> > diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
> > index 354ed3b..f9d6937 100644
> > --- a/fs/ext3/inode.c
> > +++ b/fs/ext3/inode.c
> > @@ -1151,6 +1151,16 @@ static int do_journal_get_write_access(handle_t *handle,
> > ? ? ? ?return ext3_journal_get_write_access(handle, bh);
> > ?}
> >
> > +/*
> > + * Truncate blocks that were not used by write. We have to truncate the
> > + * pagecache as well so that corresponding buffers get properly unmapped.
> > + */
> > +static void ext3_truncate_failed_write(struct inode *inode)
> > +{
> > + ? ? ? truncate_inode_pages(inode->i_mapping, inode->i_size);
> > + ? ? ? ext3_truncate(inode);
> > +}
> > +
> > ?static int ext3_write_begin(struct file *file, struct address_space *mapping,
> > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?loff_t pos, unsigned len, unsigned flags,
> > ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct page **pagep, void **fsdata)
> > @@ -1209,7 +1219,7 @@ write_begin_failed:
> > ? ? ? ? ? ? ? ?unlock_page(page);
> > ? ? ? ? ? ? ? ?page_cache_release(page);
> > ? ? ? ? ? ? ? ?if (pos + len > inode->i_size)
> > - ? ? ? ? ? ? ? ? ? ? ? ext3_truncate(inode);
> > + ? ? ? ? ? ? ? ? ? ? ? ext3_truncate_failed_write(inode);
> > ? ? ? ?}
> > ? ? ? ?if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
> > ? ? ? ? ? ? ? ?goto retry;
> > @@ -1304,7 +1314,7 @@ static int ext3_ordered_write_end(struct file *file,
> > ? ? ? ?page_cache_release(page);
> >
> > ? ? ? ?if (pos + len > inode->i_size)
> > - ? ? ? ? ? ? ? ext3_truncate(inode);
> > + ? ? ? ? ? ? ? ext3_truncate_failed_write(inode);
> > ? ? ? ?return ret ? ret : copied;
> > ?}
> >
> > @@ -1330,7 +1340,7 @@ static int ext3_writeback_write_end(struct file *file,
> > ? ? ? ?page_cache_release(page);
> >
> > ? ? ? ?if (pos + len > inode->i_size)
> > - ? ? ? ? ? ? ? ext3_truncate(inode);
> > + ? ? ? ? ? ? ? ext3_truncate_failed_write(inode);
> > ? ? ? ?return ret ? ret : copied;
> > ?}
> >
> > @@ -1383,7 +1393,7 @@ static int ext3_journalled_write_end(struct file *file,
> > ? ? ? ?page_cache_release(page);
> >
> > ? ? ? ?if (pos + len > inode->i_size)
> > - ? ? ? ? ? ? ? ext3_truncate(inode);
> > + ? ? ? ? ? ? ? ext3_truncate_failed_write(inode);
> > ? ? ? ?return ret ? ret : copied;
> > ?}
> >
> > --
> > 1.6.4.2
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to [email protected]
> > More majordomo info at ?http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at ?http://www.tux.org/lkml/
> >
--
Jan Kara <[email protected]>
SUSE Labs, CR