2008-03-07 10:53:25

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [RFC PATCH] ext4: Fix fallocate to update the file size in each transaction.

ext4_fallocate need to update file size in each transaction. Otherwise
ife we crash the file size won't be updated. We were also not marking
the inode dirty after updating file size before. Also when we try to
retry allocation due to ENOSPC make sure we reset the variable ret so
that we actually do a retry.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/extents.c | 78 +++++++++++++++++++++--------------------------------
1 files changed, 31 insertions(+), 47 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index dcdf92a..09dd3c5 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2783,6 +2783,26 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
return needed;
}

+static void ext4_falloc_update_inode(struct inode *inode,
+ int mode, loff_t new_size)
+{
+ struct timespec now;
+
+ now = current_fs_time(inode->i_sb);
+ if (!timespec_equal(&inode->i_ctime, &now))
+ inode->i_ctime = now;
+ /*
+ * Update only when preallocation was requested beyond
+ * the file size.
+ */
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ new_size > i_size_read(inode)) {
+ i_size_write(inode, new_size);
+ EXT4_I(inode)->i_disksize = i_size_read(inode);
+ }
+
+}
+
/*
* preallocate space for a file. This implements ext4's fallocate inode
* operation, which gets called from sys_fallocate system call.
@@ -2794,8 +2814,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
{
handle_t *handle;
ext4_lblk_t block;
+ loff_t new_size;
unsigned long max_blocks;
- ext4_fsblk_t nblocks = 0;
int ret = 0;
int ret2 = 0;
int retries = 0;
@@ -2814,8 +2834,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
return -ENODEV;

block = offset >> blkbits;
- max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
- - block;
+ max_blocks = EXT4_BLOCK_ALIGN(len, blkbits) >> blkbits;

/*
* credits to insert 1 extent into extent tree + buffers to be able to
@@ -2848,28 +2867,13 @@ retry:
ret2 = ext4_journal_stop(handle);
break;
}
- if (ret > 0) {
- /* check wrap through sign-bit/zero here */
- if ((block + ret) < 0 || (block + ret) < block) {
- ret = -EIO;
- ext4_mark_inode_dirty(handle, inode);
- ret2 = ext4_journal_stop(handle);
- break;
- }
- if (buffer_new(&map_bh) && ((block + ret) >
- (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
- >> blkbits)))
- nblocks = nblocks + ret;
- }
+ if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
+ blkbits) >> blkbits))
+ new_size = offset + len;
+ else
+ new_size = (block + ret) << blkbits;

- /* Update ctime if new blocks get allocated */
- if (nblocks) {
- struct timespec now;
-
- now = current_fs_time(inode->i_sb);
- if (!timespec_equal(&inode->i_ctime, &now))
- inode->i_ctime = now;
- }
+ ext4_falloc_update_inode(inode, mode, new_size);

ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle);
@@ -2877,30 +2881,10 @@ retry:
break;
}

- if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+ if (ret == -ENOSPC &&
+ ext4_should_retry_alloc(inode->i_sb, &retries)) {
+ ret = 0;
goto retry;
-
- /*
- * Time to update the file size.
- * Update only when preallocation was requested beyond the file size.
- */
- if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- (offset + len) > i_size_read(inode)) {
- if (ret > 0) {
- /*
- * if no error, we assume preallocation succeeded
- * completely
- */
- i_size_write(inode, offset + len);
- EXT4_I(inode)->i_disksize = i_size_read(inode);
- } else if (ret < 0 && nblocks) {
- /* Handle partial allocation scenario */
- loff_t newsize;


2008-03-07 11:31:37

by Mingming Cao

[permalink] [raw]
Subject: Re: [RFC PATCH] ext4: Fix fallocate to update the file size in each transaction.

On Fri, 2008-03-07 at 16:23 +0530, Aneesh Kumar K.V wrote:
> ext4_fallocate need to update file size in each transaction. Otherwise
> ife we crash the file size won't be updated. We were also not marking
> the inode dirty after updating file size before. Also when we try to
> retry allocation due to ENOSPC make sure we reset the variable ret so
> that we actually do a retry.
>
> Signed-off-by: Aneesh Kumar K.V <[email protected]>
> ---
> fs/ext4/extents.c | 78 +++++++++++++++++++++--------------------------------
> 1 files changed, 31 insertions(+), 47 deletions(-)
>
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index dcdf92a..09dd3c5 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2783,6 +2783,26 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
> return needed;
> }
>
> +static void ext4_falloc_update_inode(struct inode *inode,
> + int mode, loff_t new_size)
> +{
> + struct timespec now;
> +
> + now = current_fs_time(inode->i_sb);
> + if (!timespec_equal(&inode->i_ctime, &now))
> + inode->i_ctime = now;
> + /*
> + * Update only when preallocation was requested beyond
> + * the file size.
> + */
> + if (!(mode & FALLOC_FL_KEEP_SIZE) &&
> + new_size > i_size_read(inode)) {
> + i_size_write(inode, new_size);
> + EXT4_I(inode)->i_disksize = i_size_read(inode);

Just a minor thing, we could store new_size directly to i_disksize to
avoid calling the i_size_read() instead.


> + }
> +
> +}
> +
> /*
> * preallocate space for a file. This implements ext4's fallocate inode
> * operation, which gets called from sys_fallocate system call.
> @@ -2794,8 +2814,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
> {
> handle_t *handle;
> ext4_lblk_t block;
> + loff_t new_size;
> unsigned long max_blocks;
> - ext4_fsblk_t nblocks = 0;
> int ret = 0;
> int ret2 = 0;
> int retries = 0;
> @@ -2814,8 +2834,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
> return -ENODEV;
>
> block = offset >> blkbits;
> - max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
> - - block;
> + max_blocks = EXT4_BLOCK_ALIGN(len, blkbits) >> blkbits;
>

Not sure about this change... Other than this looks fine to me

MIngming



2008-03-07 11:44:20

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [RFC PATCH] ext4: Fix fallocate to update the file size in each transaction.

On Fri, Mar 07, 2008 at 03:30:57AM -0800, Mingming Cao wrote:
> On Fri, 2008-03-07 at 16:23 +0530, Aneesh Kumar K.V wrote:
> > ext4_fallocate need to update file size in each transaction. Otherwise
> > ife we crash the file size won't be updated. We were also not marking
> > the inode dirty after updating file size before. Also when we try to
> > retry allocation due to ENOSPC make sure we reset the variable ret so
> > that we actually do a retry.
> >
> > Signed-off-by: Aneesh Kumar K.V <[email protected]>
> > ---
> > fs/ext4/extents.c | 78 +++++++++++++++++++++--------------------------------
> > 1 files changed, 31 insertions(+), 47 deletions(-)
> >
> > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> > index dcdf92a..09dd3c5 100644
> > --- a/fs/ext4/extents.c
> > +++ b/fs/ext4/extents.c
> > @@ -2783,6 +2783,26 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
> > return needed;
> > }
> >
> > +static void ext4_falloc_update_inode(struct inode *inode,
> > + int mode, loff_t new_size)
> > +{
> > + struct timespec now;
> > +
> > + now = current_fs_time(inode->i_sb);
> > + if (!timespec_equal(&inode->i_ctime, &now))
> > + inode->i_ctime = now;
> > + /*
> > + * Update only when preallocation was requested beyond
> > + * the file size.
> > + */
> > + if (!(mode & FALLOC_FL_KEEP_SIZE) &&
> > + new_size > i_size_read(inode)) {
> > + i_size_write(inode, new_size);
> > + EXT4_I(inode)->i_disksize = i_size_read(inode);
>
> Just a minor thing, we could store new_size directly to i_disksize to
> avoid calling the i_size_read() instead.
>

OK

>
> > + }
> > +
> > +}
> > +
> > /*
> > * preallocate space for a file. This implements ext4's fallocate inode
> > * operation, which gets called from sys_fallocate system call.
> > @@ -2794,8 +2814,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
> > {
> > handle_t *handle;
> > ext4_lblk_t block;
> > + loff_t new_size;
> > unsigned long max_blocks;
> > - ext4_fsblk_t nblocks = 0;
> > int ret = 0;
> > int ret2 = 0;
> > int retries = 0;
> > @@ -2814,8 +2834,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
> > return -ENODEV;
> >
> > block = offset >> blkbits;
> > - max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
> > - - block;
> > + max_blocks = EXT4_BLOCK_ALIGN(len, blkbits) >> blkbits;
> >
>
> Not sure about this change... Other than this looks fine to me
>

max_blocks actually represented the number of blocks that we are
requesting. The above change makes it simple.

-aneesh

2008-03-07 15:52:35

by Eric Sandeen

[permalink] [raw]
Subject: Re: [RFC PATCH] ext4: Fix fallocate to update the file size in each transaction.

Aneesh Kumar K.V wrote:
> ext4_fallocate need to update file size in each transaction. Otherwise
> ife we crash the file size won't be updated. We were also not marking
> the inode dirty after updating file size before.

This led to losing the size update when unmounted...

> Also when we try to
> retry allocation due to ENOSPC make sure we reset the variable ret so
> that we actually do a retry.

That's a few alsos. Should this be more than one patch when committed?

-Eric

> Signed-off-by: Aneesh Kumar K.V <[email protected]>
> ---
> fs/ext4/extents.c | 78 +++++++++++++++++++++--------------------------------
> 1 files changed, 31 insertions(+), 47 deletions(-)
>
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index dcdf92a..09dd3c5 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2783,6 +2783,26 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
> return needed;
> }
>
> +static void ext4_falloc_update_inode(struct inode *inode,
> + int mode, loff_t new_size)
> +{
> + struct timespec now;
> +
> + now = current_fs_time(inode->i_sb);
> + if (!timespec_equal(&inode->i_ctime, &now))
> + inode->i_ctime = now;

This used to depend on blocks actually being allocated; it looks like it
doesn't anymore?

> + /*
> + * Update only when preallocation was requested beyond
> + * the file size.
> + */
> + if (!(mode & FALLOC_FL_KEEP_SIZE) &&
> + new_size > i_size_read(inode)) {
> + i_size_write(inode, new_size);
> + EXT4_I(inode)->i_disksize = i_size_read(inode);
> + }
> +
> +}

Ok, I think this is all cleaner...

> /*
> * preallocate space for a file. This implements ext4's fallocate inode
> * operation, which gets called from sys_fallocate system call.
> @@ -2794,8 +2814,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
> {
> handle_t *handle;
> ext4_lblk_t block;
> + loff_t new_size;
> unsigned long max_blocks;
> - ext4_fsblk_t nblocks = 0;
> int ret = 0;
> int ret2 = 0;
> int retries = 0;
> @@ -2814,8 +2834,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
> return -ENODEV;
>
> block = offset >> blkbits;
> - max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
> - - block;
> + max_blocks = EXT4_BLOCK_ALIGN(len, blkbits) >> blkbits;

Ok, this makes much more sense IMHO.

>
> /*
> * credits to insert 1 extent into extent tree + buffers to be able to
> @@ -2848,28 +2867,13 @@ retry:
> ret2 = ext4_journal_stop(handle);
> break;
> }
> - if (ret > 0) {
> - /* check wrap through sign-bit/zero here */
> - if ((block + ret) < 0 || (block + ret) < block) {
> - ret = -EIO;
> - ext4_mark_inode_dirty(handle, inode);
> - ret2 = ext4_journal_stop(handle);
> - break;
> - }
> - if (buffer_new(&map_bh) && ((block + ret) >
> - (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
> - >> blkbits)))
> - nblocks = nblocks + ret;
> - }
> + if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
> + blkbits) >> blkbits))
> + new_size = offset + len;
> + else
> + new_size = (block + ret) << blkbits;

Since we called ext4_get_blocks_wrap with max_blocks, will we really end
up with more blocks allocated than we asked for? And do we no longer
need the wrap checks? (I'd expect that to be tested higher up with the
original offset+len requested, no?)

> - /* Update ctime if new blocks get allocated */
> - if (nblocks) {
> - struct timespec now;
> -
> - now = current_fs_time(inode->i_sb);
> - if (!timespec_equal(&inode->i_ctime, &now))
> - inode->i_ctime = now;
> - }
> + ext4_falloc_update_inode(inode, mode, new_size);
>
> ext4_mark_inode_dirty(handle, inode);
> ret2 = ext4_journal_stop(handle);
> @@ -2877,30 +2881,10 @@ retry:
> break;
> }
>
> - if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
> + if (ret == -ENOSPC &&
> + ext4_should_retry_alloc(inode->i_sb, &retries)) {
> + ret = 0;
> goto retry;
> -
> - /*
> - * Time to update the file size.
> - * Update only when preallocation was requested beyond the file size.
> - */
> - if (!(mode & FALLOC_FL_KEEP_SIZE) &&
> - (offset + len) > i_size_read(inode)) {
> - if (ret > 0) {
> - /*
> - * if no error, we assume preallocation succeeded
> - * completely
> - */
> - i_size_write(inode, offset + len);
> - EXT4_I(inode)->i_disksize = i_size_read(inode);
> - } else if (ret < 0 && nblocks) {
> - /* Handle partial allocation scenario */
> - loff_t newsize;
> -
> - newsize = (nblocks << blkbits) + i_size_read(inode);
> - i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
> - EXT4_I(inode)->i_disksize = i_size_read(inode);
> - }
> }
>
> mutex_unlock(&inode->i_mutex);


2008-03-07 15:58:22

by Eric Sandeen

[permalink] [raw]
Subject: Re: [RFC PATCH] ext4: Fix fallocate to update the file size in each transaction.

Eric Sandeen wrote:
> Aneesh Kumar K.V wrote:
>> ext4_fallocate need to update file size in each transaction. Otherwise
>> ife we crash the file size won't be updated. We were also not marking
>> the inode dirty after updating file size before.
>
> This led to losing the size update when unmounted...
>
>> Also when we try to
>> retry allocation due to ENOSPC make sure we reset the variable ret so
>> that we actually do a retry.
>
> That's a few alsos. Should this be more than one patch when committed?
>
> -Eric

Oops that "-Eric" was too soon, there are more comments below ;)

>> Signed-off-by: Aneesh Kumar K.V <[email protected]>
>> ---
>> fs/ext4/extents.c | 78 +++++++++++++++++++++--------------------------------
>> 1 files changed, 31 insertions(+), 47 deletions(-)
>>
>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>> index dcdf92a..09dd3c5 100644
>> --- a/fs/ext4/extents.c
>> +++ b/fs/ext4/extents.c
>> @@ -2783,6 +2783,26 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
>> return needed;
>> }
>>
>> +static void ext4_falloc_update_inode(struct inode *inode,
>> + int mode, loff_t new_size)
>> +{
>> + struct timespec now;
>> +
>> + now = current_fs_time(inode->i_sb);
>> + if (!timespec_equal(&inode->i_ctime, &now))
>> + inode->i_ctime = now;
>
> This used to depend on blocks actually being allocated; it looks like it
> doesn't anymore?
>
>> + /*
>> + * Update only when preallocation was requested beyond
>> + * the file size.
>> + */
>> + if (!(mode & FALLOC_FL_KEEP_SIZE) &&
>> + new_size > i_size_read(inode)) {
>> + i_size_write(inode, new_size);
>> + EXT4_I(inode)->i_disksize = i_size_read(inode);
>> + }
>> +
>> +}
>
> Ok, I think this is all cleaner...
>
>> /*
>> * preallocate space for a file. This implements ext4's fallocate inode
>> * operation, which gets called from sys_fallocate system call.
>> @@ -2794,8 +2814,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
>> {
>> handle_t *handle;
>> ext4_lblk_t block;
>> + loff_t new_size;
>> unsigned long max_blocks;
>> - ext4_fsblk_t nblocks = 0;
>> int ret = 0;
>> int ret2 = 0;
>> int retries = 0;
>> @@ -2814,8 +2834,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
>> return -ENODEV;
>>
>> block = offset >> blkbits;
>> - max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
>> - - block;
>> + max_blocks = EXT4_BLOCK_ALIGN(len, blkbits) >> blkbits;
>
> Ok, this makes much more sense IMHO.
>
>>
>> /*
>> * credits to insert 1 extent into extent tree + buffers to be able to
>> @@ -2848,28 +2867,13 @@ retry:
>> ret2 = ext4_journal_stop(handle);
>> break;
>> }
>> - if (ret > 0) {
>> - /* check wrap through sign-bit/zero here */
>> - if ((block + ret) < 0 || (block + ret) < block) {
>> - ret = -EIO;
>> - ext4_mark_inode_dirty(handle, inode);
>> - ret2 = ext4_journal_stop(handle);
>> - break;
>> - }
>> - if (buffer_new(&map_bh) && ((block + ret) >
>> - (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
>> - >> blkbits)))
>> - nblocks = nblocks + ret;
>> - }
>> + if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
>> + blkbits) >> blkbits))
>> + new_size = offset + len;
>> + else
>> + new_size = (block + ret) << blkbits;
>
> Since we called ext4_get_blocks_wrap with max_blocks, will we really end
> up with more blocks allocated than we asked for? And do we no longer
> need the wrap checks? (I'd expect that to be tested higher up with the
> original offset+len requested, no?)
>
>> - /* Update ctime if new blocks get allocated */
>> - if (nblocks) {
>> - struct timespec now;
>> -
>> - now = current_fs_time(inode->i_sb);
>> - if (!timespec_equal(&inode->i_ctime, &now))
>> - inode->i_ctime = now;
>> - }
>> + ext4_falloc_update_inode(inode, mode, new_size);
>>
>> ext4_mark_inode_dirty(handle, inode);
>> ret2 = ext4_journal_stop(handle);
>> @@ -2877,30 +2881,10 @@ retry:
>> break;
>> }
>>
>> - if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
>> + if (ret == -ENOSPC &&
>> + ext4_should_retry_alloc(inode->i_sb, &retries)) {
>> + ret = 0;
>> goto retry;
>> -
>> - /*
>> - * Time to update the file size.
>> - * Update only when preallocation was requested beyond the file size.
>> - */
>> - if (!(mode & FALLOC_FL_KEEP_SIZE) &&
>> - (offset + len) > i_size_read(inode)) {
>> - if (ret > 0) {
>> - /*
>> - * if no error, we assume preallocation succeeded
>> - * completely
>> - */
>> - i_size_write(inode, offset + len);
>> - EXT4_I(inode)->i_disksize = i_size_read(inode);
>> - } else if (ret < 0 && nblocks) {
>> - /* Handle partial allocation scenario */
>> - loff_t newsize;
>> -
>> - newsize = (nblocks << blkbits) + i_size_read(inode);
>> - i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
>> - EXT4_I(inode)->i_disksize = i_size_read(inode);
>> - }
>> }
>>
>> mutex_unlock(&inode->i_mutex);
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html


2008-03-07 16:03:25

by Dave Kleikamp

[permalink] [raw]
Subject: Re: [RFC PATCH] ext4: Fix fallocate to update the file size in each transaction.

On Fri, 2008-03-07 at 17:14 +0530, Aneesh Kumar K.V wrote:
> On Fri, Mar 07, 2008 at 03:30:57AM -0800, Mingming Cao wrote:
> > On Fri, 2008-03-07 at 16:23 +0530, Aneesh Kumar K.V wrote:

> > > @@ -2814,8 +2834,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
> > > return -ENODEV;
> > >
> > > block = offset >> blkbits;
> > > - max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
> > > - - block;
> > > + max_blocks = EXT4_BLOCK_ALIGN(len, blkbits) >> blkbits;
> > >
> >
> > Not sure about this change... Other than this looks fine to me
> >
>
> max_blocks actually represented the number of blocks that we are
> requesting. The above change makes it simple.

I think it's too simple. :-(

Take an example of an empty file (no allocated blocks), offset=0xC00,
length = 0x800, block_size = 0x1000.

The existing code will result in max_blocks = 2, accounting for both the
blocks that the allocation spans, but your new code will result in
max_blocks = 1.
--
David Kleikamp
IBM Linux Technology Center


2008-03-07 17:59:54

by Mingming Cao

[permalink] [raw]
Subject: Re: [RFC PATCH] ext4: Fix fallocate to update the file size in each transaction.

On Fri, 2008-03-07 at 10:03 -0600, Dave Kleikamp wrote:
> On Fri, 2008-03-07 at 17:14 +0530, Aneesh Kumar K.V wrote:
> > On Fri, Mar 07, 2008 at 03:30:57AM -0800, Mingming Cao wrote:
> > > On Fri, 2008-03-07 at 16:23 +0530, Aneesh Kumar K.V wrote:
>
> > > > @@ -2814,8 +2834,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
> > > > return -ENODEV;
> > > >
> > > > block = offset >> blkbits;
> > > > - max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
> > > > - - block;
> > > > + max_blocks = EXT4_BLOCK_ALIGN(len, blkbits) >> blkbits;
> > > >
> > >
> > > Not sure about this change... Other than this looks fine to me
> > >
> >
> > max_blocks actually represented the number of blocks that we are
> > requesting. The above change makes it simple.
>
> I think it's too simple. :-(
>
> Take an example of an empty file (no allocated blocks), offset=0xC00,
> length = 0x800, block_size = 0x1000.
>
> The existing code will result in max_blocks = 2, accounting for both the
> blocks that the allocation spans, but your new code will result in
> max_blocks = 1.

I agree with you.

Mingming


2008-03-08 16:12:30

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [RFC PATCH] ext4: Fix fallocate to update the file size in each transaction.

On Fri, Mar 07, 2008 at 09:45:06AM -0600, Eric Sandeen wrote:
> Aneesh Kumar K.V wrote:
> > ext4_fallocate need to update file size in each transaction. Otherwise
> > ife we crash the file size won't be updated. We were also not marking
> > the inode dirty after updating file size before.
>
> This led to losing the size update when unmounted...
>
> > Also when we try to
> > retry allocation due to ENOSPC make sure we reset the variable ret so
> > that we actually do a retry.
>
> That's a few alsos. Should this be more than one patch when committed?
>
> -Eric
>
> > Signed-off-by: Aneesh Kumar K.V <[email protected]>
> > ---
> > fs/ext4/extents.c | 78 +++++++++++++++++++++--------------------------------
> > 1 files changed, 31 insertions(+), 47 deletions(-)
> >
> > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> > index dcdf92a..09dd3c5 100644
> > --- a/fs/ext4/extents.c
> > +++ b/fs/ext4/extents.c
> > @@ -2783,6 +2783,26 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
> > return needed;
> > }
> >
> > +static void ext4_falloc_update_inode(struct inode *inode,
> > + int mode, loff_t new_size)
> > +{
> > + struct timespec now;
> > +
> > + now = current_fs_time(inode->i_sb);
> > + if (!timespec_equal(&inode->i_ctime, &now))
> > + inode->i_ctime = now;
>
> This used to depend on blocks actually being allocated; it looks like it
> doesn't anymore?

I am still not clear about the requirement here. The earlier code
check for greater than current file size, which was confusing because we
could very well convert a hole to a prealloc area. How about updating
i_ctime if the buffer head is new ?.


>
> > + /*
> > + * Update only when preallocation was requested beyond
> > + * the file size.
> > + */
> > + if (!(mode & FALLOC_FL_KEEP_SIZE) &&
> > break;

.....

> > }
> > - if (ret > 0) {
> > - /* check wrap through sign-bit/zero here */
> > - if ((block + ret) < 0 || (block + ret) < block) {
> > - ret = -EIO;
> > - ext4_mark_inode_dirty(handle, inode);
> > - ret2 = ext4_journal_stop(handle);
> > - break;
> > - }
> > - if (buffer_new(&map_bh) && ((block + ret) >
> > - (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
> > - >> blkbits)))
> > - nblocks = nblocks + ret;
> > - }
> > + if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
> > + blkbits) >> blkbits))
> > + new_size = offset + len;
> > + else
> > + new_size = (block + ret) << blkbits;
>
> Since we called ext4_get_blocks_wrap with max_blocks, will we really end
> up with more blocks allocated than we asked for? And do we no longer
> need the wrap checks? (I'd expect that to be tested higher up with the
> original offset+len requested, no?)

ext4_get_blocks_wrap had confusing returns. Mingming actually fixed it
in the previous patch. We can actually return more bytes than requested
if we are calling ext4_get_blocks_wrap in read mode for an falloc area.
Well that is not really the case here. But having in >= is ok.


The wrap check there was not needed. The needed wrap check is to make sure
whether we are crossing the allowed max file size. That is done in
sys_fallocate.



>
> > - /* Update ctime if new blocks get allocated */
> > - if (nblocks) {
> > - struct timespec now;
> > -
> > - now = current_fs_time(inode->i_sb);

...

-aneesh

2008-03-08 16:13:00

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [RFC PATCH] ext4: Fix fallocate to update the file size in each transaction.

On Fri, Mar 07, 2008 at 10:03:23AM -0600, Dave Kleikamp wrote:
> On Fri, 2008-03-07 at 17:14 +0530, Aneesh Kumar K.V wrote:
> > On Fri, Mar 07, 2008 at 03:30:57AM -0800, Mingming Cao wrote:
> > > On Fri, 2008-03-07 at 16:23 +0530, Aneesh Kumar K.V wrote:
>
> > > > @@ -2814,8 +2834,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
> > > > return -ENODEV;
> > > >
> > > > block = offset >> blkbits;
> > > > - max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
> > > > - - block;
> > > > + max_blocks = EXT4_BLOCK_ALIGN(len, blkbits) >> blkbits;
> > > >
> > >
> > > Not sure about this change... Other than this looks fine to me
> > >
> >
> > max_blocks actually represented the number of blocks that we are
> > requesting. The above change makes it simple.
>
> I think it's too simple. :-(
>
> Take an example of an empty file (no allocated blocks), offset=0xC00,
> length = 0x800, block_size = 0x1000.
>
> The existing code will result in max_blocks = 2, accounting for both the
> blocks that the allocation spans, but your new code will result in
> max_blocks = 1.

Fixed with the above example added to comment.

Thanks
-aneesh