ext4: delalloc ENOSPC handling core
From: Mingming cao <[email protected]>
Core part of delaloc ENOSPC (block reservation.)
data/meta blocks are reserved on write_begin(), and per-inode reserved counters
are updated after block allocation.
Signed-off-by: Mingming cao <[email protected]>
---
fs/ext4/inode.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 66 insertions(+), 3 deletions(-)
Index: linux-2.6.26-rc4/fs/ext4/inode.c
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/inode.c 2008-06-01 14:26:13.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/inode.c 2008-06-01 15:04:06.000000000 -0700
@@ -38,6 +38,7 @@
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
+#include "ext4_extents.h"
static void ext4_invalidatepage(struct page *page, unsigned long offset);
@@ -1410,6 +1411,61 @@ static int ext4_journalled_write_end(str
return ret ? ret : copied;
}
+static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ unsigned long md_needed, mdblocks, total = 0;
+
+ /*
+ * calculate the amount of metadata blocks to reserve
+ * in order to allocate nrblocks
+ * worse case is one extent per block
+ */
+ total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
+ mdblocks = ext4_ext_calc_metadata_amount(inode, total);
+ BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks);
+
+ md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
+ total = md_needed + nrblocks;
+
+ if (ext4_has_free_blocks(sbi, total) < total)
+ return -ENOSPC;
+
+ /* reduce fs free blocks counter */
+ percpu_counter_sub(&sbi->s_freeblocks_counter, total);
+
+ EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
+ EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
+
+ return 0; /* success */
+}
+
+static void ext4_da_release_space(struct inode *inode, int used, int to_free)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int total, mdb, release;
+
+ /* calculate the number of metablocks still need to be reserved */
+ total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free;
+ mdb = ext4_ext_calc_metadata_amount(inode, total);
+
+ /* figure out how many metablocks to release */
+ BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+ mdb = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
+
+ release = to_free + mdb;
+
+ /* update fs free blocks counter for truncate case */
+ percpu_counter_add(&sbi->s_freeblocks_counter, release);
+
+ /* update per-inode reservations */
+ BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks);
+ EXT4_I(inode)->i_reserved_data_blocks -= used + to_free;
+
+ BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+ EXT4_I(inode)->i_reserved_meta_blocks -= mdb;
+}
+
/*
* this is a special callback for ->write_begin() only
* it's intention is to return mapped block or reserve space
@@ -1428,13 +1484,17 @@ static int ext4_da_get_block_prep(struct
* the same as allocated blocks.
*/
ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0);
- if (ret == 0) {
- /* the block isn't allocated yet, let's reserve space */
- /* XXX: call reservation here */
+ if ((ret == 0)&& !buffer_delay(bh_result)) {
+ /* the block isn't (pre)allocated yet, let's reserve space */
/*
* XXX: __block_prepare_write() unmaps passed block,
* is it OK?
*/
+ ret = ext4_da_reserve_space(inode, 1);
+ if (ret)
+ /* not enough space to reserve */
+ return ret;
+
map_bh(bh_result, inode->i_sb, 0);
set_buffer_new(bh_result);
set_buffer_delay(bh_result);
@@ -1463,6 +1523,9 @@ static int ext4_da_get_block_write(struc
if (ret > 0) {
bh_result->b_size = (ret << inode->i_blkbits);
+ /* release reserved-but-unused meta blocks */
+ ext4_da_release_space(inode, ret, 0);
+
/*
* Update on-disk size along with block allocation
* we don't use 'extend_disksize' as size may change
On Sun, Jun 01, 2008 at 04:35:55PM -0700, Mingming Cao wrote:
> ext4: delalloc ENOSPC handling core
>
> From: Mingming cao <[email protected]>
>
> Core part of delaloc ENOSPC (block reservation.)
> data/meta blocks are reserved on write_begin(), and per-inode reserved counters
> are updated after block allocation.
>
> Signed-off-by: Mingming cao <[email protected]>
>
> ---
> fs/ext4/inode.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 66 insertions(+), 3 deletions(-)
>
> Index: linux-2.6.26-rc4/fs/ext4/inode.c
> ===================================================================
> --- linux-2.6.26-rc4.orig/fs/ext4/inode.c 2008-06-01 14:26:13.000000000 -0700
> +++ linux-2.6.26-rc4/fs/ext4/inode.c 2008-06-01 15:04:06.000000000 -0700
> @@ -38,6 +38,7 @@
> #include "ext4_jbd2.h"
> #include "xattr.h"
> #include "acl.h"
> +#include "ext4_extents.h"
>
> static void ext4_invalidatepage(struct page *page, unsigned long offset);
>
> @@ -1410,6 +1411,61 @@ static int ext4_journalled_write_end(str
> return ret ? ret : copied;
> }
>
> +static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
> +{
> + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> + unsigned long md_needed, mdblocks, total = 0;
> +
> + /*
> + * calculate the amount of metadata blocks to reserve
recalculate the amount of metadata blocks to reserve.
> + * in order to allocate nrblocks
> + * worse case is one extent per block
> + */
> + total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
> + mdblocks = ext4_ext_calc_metadata_amount(inode, total);
> + BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks);
> +
> + md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
> + total = md_needed + nrblocks;
> +
> + if (ext4_has_free_blocks(sbi, total) < total)
> + return -ENOSPC;
> +
> + /* reduce fs free blocks counter */
> + percpu_counter_sub(&sbi->s_freeblocks_counter, total);
> +
> + EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
> + EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
Better written as
EXT4_I(inode)->i_reserved_meta_blocks += mdblocks;
> +
> + return 0; /* success */
> +}
> +
> +static void ext4_da_release_space(struct inode *inode, int used, int to_free)
> +{
> + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> + int total, mdb, release;
int total, mdb, release, mdb_to_free;
> +
> + /* calculate the number of metablocks still need to be reserved */
> + total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free;
> + mdb = ext4_ext_calc_metadata_amount(inode, total);
> +
> + /* figure out how many metablocks to release */
> + BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
> + mdb = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
mdb_to_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
> +
> + release = to_free + mdb;
> +
release = to_free + mdb_to_free;
> + /* update fs free blocks counter for truncate case */
> + percpu_counter_add(&sbi->s_freeblocks_counter, release);
> +
> + /* update per-inode reservations */
> + BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks);
> + EXT4_I(inode)->i_reserved_data_blocks -= used + to_free;
> +
> + BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
> + EXT4_I(inode)->i_reserved_meta_blocks -= mdb;
/*
* update the meta block reservation
*/
EXT4_I(inode)->i_reserved_meta_blocks = mdb;
> +}
> +
> /*
> * this is a special callback for ->write_begin() only
> * it's intention is to return mapped block or reserve space
> @@ -1428,13 +1484,17 @@ static int ext4_da_get_block_prep(struct
> * the same as allocated blocks.
> */
> ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0);
> - if (ret == 0) {
> - /* the block isn't allocated yet, let's reserve space */
> - /* XXX: call reservation here */
> + if ((ret == 0)&& !buffer_delay(bh_result)) {
> + /* the block isn't (pre)allocated yet, let's reserve space */
> /*
> * XXX: __block_prepare_write() unmaps passed block,
> * is it OK?
> */
> + ret = ext4_da_reserve_space(inode, 1);
> + if (ret)
> + /* not enough space to reserve */
> + return ret;
> +
> map_bh(bh_result, inode->i_sb, 0);
> set_buffer_new(bh_result);
> set_buffer_delay(bh_result);
> @@ -1463,6 +1523,9 @@ static int ext4_da_get_block_write(struc
> if (ret > 0) {
> bh_result->b_size = (ret << inode->i_blkbits);
>
> + /* release reserved-but-unused meta blocks */
> + ext4_da_release_space(inode, ret, 0);
> +
> /*
> * Update on-disk size along with block allocation
> * we don't use 'extend_disksize' as size may change
>
>
-aneesh
On Mon, Jun 02, 2008 at 12:14:33PM +0530, Aneesh Kumar K.V wrote:
> On Sun, Jun 01, 2008 at 04:35:55PM -0700, Mingming Cao wrote:
> > ext4: delalloc ENOSPC handling core
> >
> > From: Mingming cao <[email protected]>
> >
> > Core part of delaloc ENOSPC (block reservation.)
> > data/meta blocks are reserved on write_begin(), and per-inode reserved counters
> > are updated after block allocation.
> >
> > Signed-off-by: Mingming cao <[email protected]>
> >
> > ---
> > fs/ext4/inode.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
> > 1 file changed, 66 insertions(+), 3 deletions(-)
> >
> > Index: linux-2.6.26-rc4/fs/ext4/inode.c
> > ===================================================================
> > --- linux-2.6.26-rc4.orig/fs/ext4/inode.c 2008-06-01 14:26:13.000000000 -0700
> > +++ linux-2.6.26-rc4/fs/ext4/inode.c 2008-06-01 15:04:06.000000000 -0700
> > @@ -38,6 +38,7 @@
> > #include "ext4_jbd2.h"
> > #include "xattr.h"
> > #include "acl.h"
> > +#include "ext4_extents.h"
> >
> > static void ext4_invalidatepage(struct page *page, unsigned long offset);
> >
> > @@ -1410,6 +1411,61 @@ static int ext4_journalled_write_end(str
> > return ret ? ret : copied;
> > }
> >
> > +static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
> > +{
> > + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> > + unsigned long md_needed, mdblocks, total = 0;
> > +
> > + /*
> > + * calculate the amount of metadata blocks to reserve
>
> recalculate the amount of metadata blocks to reserve.
>
>
>
>
> > + * in order to allocate nrblocks
> > + * worse case is one extent per block
> > + */
> > + total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
> > + mdblocks = ext4_ext_calc_metadata_amount(inode, total);
> > + BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks);
> > +
> > + md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
> > + total = md_needed + nrblocks;
> > +
> > + if (ext4_has_free_blocks(sbi, total) < total)
> > + return -ENOSPC;
> > +
> > + /* reduce fs free blocks counter */
> > + percpu_counter_sub(&sbi->s_freeblocks_counter, total);
> > +
> > + EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
> > + EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
>
> Better written as
> EXT4_I(inode)->i_reserved_meta_blocks += mdblocks;
>
>
EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
-aneesh
On Sun, Jun 01, 2008 at 04:35:55PM -0700, Mingming Cao wrote:
> ext4: delalloc ENOSPC handling core
>
> From: Mingming cao <[email protected]>
>
> Core part of delaloc ENOSPC (block reservation.)
> data/meta blocks are reserved on write_begin(), and per-inode reserved counters
> are updated after block allocation.
I am not clear about ext4_da_release_space. Are we accounting for the
blocks related to meta-data blocks allocated ? . In
ext4_da_get_block_write we get the number of data blocks allocated.
and we don't update sbi->s_freeblocks_counter in the block allocator
any more. So request for meta-data blocks are not updating the
sbi->s_freeblocks_counter. In ext4_da_release_space we update
the sbi->s_freeblocks_counter without considering the above. Am I
missing something ?
-aneesh