2008-06-01 23:36:16

by Mingming Cao

[permalink] [raw]
Subject: [RFC][PATCH 4/6] delalloc ENOSPC: avoid free blocks double booking

ext4: delalloc block reservation avoid double accounting

From: Mingming cao <[email protected]>

Since fs free blocks counters are already reduced at the block reservation
time, we need to let the underlying block allocation know to avoid
decrease the free blocks counter again when real block allocation finished.

Signed-off-by: Mingming cao <[email protected]>
---
fs/ext4/dir.c | 3 ++-
fs/ext4/ext4.h | 6 +++++-
fs/ext4/ext4_i.h | 1 +
fs/ext4/extents.c | 2 +-
fs/ext4/inode.c | 25 ++++++++++++++++++-------
fs/ext4/mballoc.c | 13 ++++++++++++-
fs/ext4/super.c | 2 ++
7 files changed, 41 insertions(+), 11 deletions(-)

Index: linux-2.6.26-rc4/fs/ext4/ext4.h
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/ext4.h 2008-06-01 14:22:03.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/ext4.h 2008-06-01 15:04:14.000000000 -0700
@@ -74,6 +74,9 @@
#define EXT4_MB_HINT_GOAL_ONLY 256
/* goal is meaningful */
#define EXT4_MB_HINT_TRY_GOAL 512
+/* blocks already pre-reserved by delayed allocation */
+#define EXT4_MB_DELALLOC_RESERVED 1024
+

struct ext4_allocation_request {
/* target inode for block we're allocating */
@@ -1039,6 +1042,7 @@ extern void ext4_mb_free_blocks(handle_t


/* inode.c */
+void ext4_da_release_space(struct inode *inode, int used, int to_free);
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t blocknr);
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
@@ -1231,7 +1235,7 @@ extern long ext4_fallocate(struct inode
extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
sector_t block, unsigned long max_blocks,
struct buffer_head *bh, int create,
- int extend_disksize);
+ int extend_disksize, int flag);
#endif /* __KERNEL__ */

#endif /* _EXT4_H */
Index: linux-2.6.26-rc4/fs/ext4/inode.c
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/inode.c 2008-06-01 15:04:06.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/inode.c 2008-06-01 15:04:14.000000000 -0700
@@ -973,7 +973,7 @@ out:
*/
int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
unsigned long max_blocks, struct buffer_head *bh,
- int create, int extend_disksize)
+ int create, int extend_disksize, int flag)
{
int retval;

@@ -1014,6 +1014,15 @@ int ext4_get_blocks_wrap(handle_t *handl
* with create == 1 flag.
*/
down_write((&EXT4_I(inode)->i_data_sem));
+
+ /*
+ * if the caller is from delayed allocation writeout path
+ * we have already reserved fs blocks for allocation
+ * let the underlying get_block() function know to
+ * avoid double accounting
+ */
+ if (flag)
+ EXT4_I(inode)->i_delalloc_reserved_flag = 1;
/*
* We need to check for EXT4 here because migrate
* could have changed the inode type in between
@@ -1035,6 +1044,8 @@ int ext4_get_blocks_wrap(handle_t *handl
~EXT4_EXT_MIGRATE;
}
}
+ if (flag)
+ EXT4_I(inode)->i_delalloc_reserved_flag = 0;
up_write((&EXT4_I(inode)->i_data_sem));
return retval;
}
@@ -1060,7 +1071,7 @@ static int ext4_get_block(struct inode *
}

ret = ext4_get_blocks_wrap(handle, inode, iblock,
- max_blocks, bh_result, create, 0);
+ max_blocks, bh_result, create, 0, 0);
if (ret > 0) {
bh_result->b_size = (ret << inode->i_blkbits);
ret = 0;
@@ -1086,7 +1097,7 @@ struct buffer_head *ext4_getblk(handle_t
dummy.b_blocknr = -1000;
buffer_trace_init(&dummy.b_history);
err = ext4_get_blocks_wrap(handle, inode, block, 1,
- &dummy, create, 1);
+ &dummy, create, 1, 0);
/*
* ext4_get_blocks_handle() returns number of blocks
* mapped. 0 in case of a HOLE.
@@ -1440,7 +1451,7 @@ static int ext4_da_reserve_space(struct
return 0; /* success */
}

-static void ext4_da_release_space(struct inode *inode, int used, int to_free)
+void ext4_da_release_space(struct inode *inode, int used, int to_free)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int total, mdb, release;
@@ -1483,7 +1494,7 @@ static int ext4_da_get_block_prep(struct
* preallocated blocks are unmapped but should treated
* the same as allocated blocks.
*/
- ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0);
+ ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0);
if ((ret == 0)&& !buffer_delay(bh_result)) {
/* the block isn't (pre)allocated yet, let's reserve space */
/*
@@ -1505,7 +1516,7 @@ static int ext4_da_get_block_prep(struct

return ret;
}
-
+#define EXT4_DELALLOC_RSVED 1
static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
@@ -1519,7 +1530,7 @@ static int ext4_da_get_block_write(struc
BUG_ON(create == 0);

ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
- bh_result, create, 0);
+ bh_result, create, 0, EXT4_DELALLOC_RSVED);
if (ret > 0) {
bh_result->b_size = (ret << inode->i_blkbits);

Index: linux-2.6.26-rc4/fs/ext4/ext4_i.h
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/ext4_i.h 2008-06-01 14:26:14.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/ext4_i.h 2008-06-01 15:04:14.000000000 -0700
@@ -166,6 +166,7 @@ struct ext4_inode_info {
/* allocation reservation info for delalloc */
unsigned long i_reserved_data_blocks;
unsigned long i_reserved_meta_blocks;
+ unsigned short i_delalloc_reserved_flag;
};

#endif /* _EXT4_I */
Index: linux-2.6.26-rc4/fs/ext4/super.c
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/super.c 2008-06-01 14:26:14.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/super.c 2008-06-01 15:04:14.000000000 -0700
@@ -574,6 +574,7 @@ static struct inode *ext4_alloc_inode(st
spin_lock_init(&ei->i_prealloc_lock);
ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0;
+ ei->i_delalloc_reserved_flag = 0;
return &ei->vfs_inode;
}

@@ -1328,6 +1329,7 @@ set_qf_format:
sbi->s_stripe = option;
break;
case Opt_delalloc:
+ printk("delayed allocation enabled\n");
set_opt(sbi->s_mount_opt, DELALLOC);
break;
default:
Index: linux-2.6.26-rc4/fs/ext4/mballoc.c
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/mballoc.c 2008-06-01 14:22:02.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/mballoc.c 2008-06-01 15:04:14.000000000 -0700
@@ -2831,7 +2831,15 @@ ext4_mb_mark_diskspace_used(struct ext4_
le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
- percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
+
+ /*
+ * free blocks account has already be reduced/reserved
+ * at write_begin() time for delayed allocation
+ * do not double accounting
+ */
+ if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
+ percpu_counter_sub(&sbi->s_freeblocks_counter,
+ ac->ac_b_ex.fe_len);

if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -4055,6 +4063,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t
}
inquota = ar->len;

+ if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
+ ar->flags |= EXT4_MB_DELALLOC_RESERVED;
+
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (!ac) {
ar->len = 0;
Index: linux-2.6.26-rc4/fs/ext4/dir.c
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/dir.c 2008-06-01 13:06:07.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/dir.c 2008-06-01 15:04:14.000000000 -0700
@@ -129,7 +129,8 @@ static int ext4_readdir(struct file * fi
struct buffer_head *bh = NULL;

map_bh.b_state = 0;
- err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0);
+ err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh,
+ 0, 0, 0);
if (err > 0) {
pgoff_t index = map_bh.b_blocknr >>
(PAGE_CACHE_SHIFT - inode->i_blkbits);
Index: linux-2.6.26-rc4/fs/ext4/extents.c
===================================================================
--- linux-2.6.26-rc4.orig/fs/ext4/extents.c 2008-06-01 14:57:57.000000000 -0700
+++ linux-2.6.26-rc4/fs/ext4/extents.c 2008-06-01 15:04:14.000000000 -0700
@@ -2934,7 +2934,7 @@ retry:
}
ret = ext4_get_blocks_wrap(handle, inode, block,
max_blocks, &map_bh,
- EXT4_CREATE_UNINITIALIZED_EXT, 0);
+ EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
if (ret <= 0) {
#ifdef EXT4FS_DEBUG
WARN_ON(ret <= 0);




2008-06-02 06:52:14

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [RFC][PATCH 4/6] delalloc ENOSPC: avoid free blocks double booking

On Sun, Jun 01, 2008 at 04:36:10PM -0700, Mingming Cao wrote:
> ext4: delalloc block reservation avoid double accounting
>
> From: Mingming cao <[email protected]>
>
> Since fs free blocks counters are already reduced at the block reservation
> time, we need to let the underlying block allocation know to avoid
> decrease the free blocks counter again when real block allocation finished.
>
> Signed-off-by: Mingming cao <[email protected]>
> ---
> fs/ext4/dir.c | 3 ++-
> fs/ext4/ext4.h | 6 +++++-
> fs/ext4/ext4_i.h | 1 +
> fs/ext4/extents.c | 2 +-
> fs/ext4/inode.c | 25 ++++++++++++++++++-------
> fs/ext4/mballoc.c | 13 ++++++++++++-
> fs/ext4/super.c | 2 ++
> 7 files changed, 41 insertions(+), 11 deletions(-)
>

I guess you would need a similar change for balloc.c

-aneesh

2008-06-02 07:12:42

by Mingming Cao

[permalink] [raw]
Subject: Re: [RFC][PATCH 4/6] delalloc ENOSPC: avoid free blocks double booking

On Mon, 2008-06-02 at 12:21 +0530, Aneesh Kumar K.V wrote:
> On Sun, Jun 01, 2008 at 04:36:10PM -0700, Mingming Cao wrote:
> > ext4: delalloc block reservation avoid double accounting
> >
> > From: Mingming cao <[email protected]>
> >
> > Since fs free blocks counters are already reduced at the block reservation
> > time, we need to let the underlying block allocation know to avoid
> > decrease the free blocks counter again when real block allocation finished.
> >
> > Signed-off-by: Mingming cao <[email protected]>
> > ---
> > fs/ext4/dir.c | 3 ++-
> > fs/ext4/ext4.h | 6 +++++-
> > fs/ext4/ext4_i.h | 1 +
> > fs/ext4/extents.c | 2 +-
> > fs/ext4/inode.c | 25 ++++++++++++++++++-------
> > fs/ext4/mballoc.c | 13 ++++++++++++-
> > fs/ext4/super.c | 2 ++
> > 7 files changed, 41 insertions(+), 11 deletions(-)
> >
>
> I guess you would need a similar change for balloc.c
>

I haven't try delaloc on non-extent files yet. that's todo after this
series is settle down.

It first need a similar function to calculate the total number of meta
data blocks to reserve for non extent files, then similar accounting
handling for balloc.

Mingming


2008-06-02 07:59:57

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [RFC][PATCH 4/6] delalloc ENOSPC: avoid free blocks double booking

On Mon, Jun 02, 2008 at 12:12:36AM -0700, Mingming Cao wrote:
> On Mon, 2008-06-02 at 12:21 +0530, Aneesh Kumar K.V wrote:
> > On Sun, Jun 01, 2008 at 04:36:10PM -0700, Mingming Cao wrote:
> > > ext4: delalloc block reservation avoid double accounting
> > >
> > > From: Mingming cao <[email protected]>
> > >
> > > Since fs free blocks counters are already reduced at the block reservation
> > > time, we need to let the underlying block allocation know to avoid
> > > decrease the free blocks counter again when real block allocation finished.
> > >
> > > Signed-off-by: Mingming cao <[email protected]>
> > > ---
> > > fs/ext4/dir.c | 3 ++-
> > > fs/ext4/ext4.h | 6 +++++-
> > > fs/ext4/ext4_i.h | 1 +
> > > fs/ext4/extents.c | 2 +-
> > > fs/ext4/inode.c | 25 ++++++++++++++++++-------
> > > fs/ext4/mballoc.c | 13 ++++++++++++-
> > > fs/ext4/super.c | 2 ++
> > > 7 files changed, 41 insertions(+), 11 deletions(-)
> > >
> >
> > I guess you would need a similar change for balloc.c
> >
>
> I haven't try delaloc on non-extent files yet. that's todo after this
> series is settle down.

how about mount -o nomballoc ?

-aneesh