2008-05-19 20:34:33

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH -v2] ext4: Use inode preallocation with -o noextents

When mouting ext4 with -o noextents, request for
file data blocks from inode prealloc space.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
Signed-off-by: Mingming Cao <[email protected]>
---
fs/ext4/balloc.c | 36 +++++++++++++++++++++++++-
fs/ext4/ext4.h | 7 +++-
fs/ext4/extents.c | 2 +-
fs/ext4/inode.c | 72 +++++++++++++++++++++++++++++++++++++++-------------
fs/ext4/xattr.c | 2 +-
5 files changed, 95 insertions(+), 24 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 30494c5..bc7e1cd 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1862,7 +1862,7 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
return 0;
}

-ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode,
+ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, int *errp)
{
struct ext4_allocation_request ar;
@@ -1881,9 +1881,29 @@ ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode,
ret = ext4_mb_new_blocks(handle, &ar, errp);
return ret;
}
+ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
+ ext4_fsblk_t goal, unsigned long *count, int *errp)
+{
+ struct ext4_allocation_request ar;
+ ext4_fsblk_t ret;
+
+ if (!test_opt(inode->i_sb, MBALLOC)) {
+ ret = ext4_new_blocks_old(handle, inode, goal, count, errp);
+ return ret;
+ }
+
+ memset(&ar, 0, sizeof(ar));
+ ar.inode = inode;
+ ar.goal = goal;
+ ar.len = *count;
+ ret = ext4_mb_new_blocks(handle, &ar, errp);
+ *count = ar.len;
+ return ret;
+}

ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
- ext4_fsblk_t goal, unsigned long *count, int *errp)
+ ext4_lblk_t iblock, ext4_fsblk_t goal,
+ unsigned long *count, int *errp)
{
struct ext4_allocation_request ar;
ext4_fsblk_t ret;
@@ -1894,9 +1914,21 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
}

memset(&ar, 0, sizeof(ar));
+ /* Fill with neighbour allocated blocks */
+ ar.lleft = 0;
+ ar.pleft = 0;
+ ar.lright = 0;
+ ar.pright = 0;
+
ar.inode = inode;
ar.goal = goal;
ar.len = *count;
+ ar.logical = iblock;
+ if (S_ISREG(inode->i_mode))
+ ar.flags = EXT4_MB_HINT_DATA;
+ else
+ /* disable in-core preallocation for non-regular files */
+ ar.flags = 0;
ret = ext4_mb_new_blocks(handle, &ar, errp);
*count = ar.len;
return ret;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8158083..899406b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -958,10 +958,13 @@ extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
ext4_group_t group);
-extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode,
+extern ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, int *errp);
-extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode,
+extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp);
+extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
+ ext4_lblk_t iblock, ext4_fsblk_t goal,
+ unsigned long *count, int *errp);
extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp);
extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 47929c4..5ba81b3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -188,7 +188,7 @@ ext4_ext_new_block(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, newblock;

goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
- newblock = ext4_new_block(handle, inode, goal, err);
+ newblock = ext4_new_meta_block(handle, inode, goal, err);
return newblock;
}

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8d97077..3f4182f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -508,11 +508,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
* direct blocks
*/
static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
- ext4_fsblk_t goal, int indirect_blks, int blks,
- ext4_fsblk_t new_blocks[4], int *err)
+ ext4_lblk_t iblock, ext4_fsblk_t goal,
+ int indirect_blks, int blks,
+ ext4_fsblk_t new_blocks[4], int *err)
{
int target, i;
- unsigned long count = 0;
+ long count = 0, blk_allocated = 0;
int index = 0;
ext4_fsblk_t current_block = 0;
int ret = 0;
@@ -525,12 +526,13 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
* the first direct block of this branch. That's the
* minimum number of blocks need to allocate(required)
*/
- target = blks + indirect_blks;
-
- while (1) {
+ /* first we try to allocate the indirect blocks */
+ target = indirect_blks;
+ while (target > 0) {
count = target;
/* allocating blocks for indirect blocks and direct blocks */
- current_block = ext4_new_blocks(handle,inode,goal,&count,err);
+ current_block = ext4_new_meta_blocks(handle, inode,
+ goal, &count, err);
if (*err)
goto failed_out;

@@ -540,16 +542,48 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
new_blocks[index++] = current_block++;
count--;
}
-
- if (count > 0)
+ if (count > 0) {
+ /*
+ * save the new block number
+ * for the first direct block
+ */
+ new_blocks[index] = current_block;
+ printk(KERN_INFO "%s returned more blocks than "
+ "requested\n", __func__);
+ WARN_ON(1);
break;
+ }
}

- /* save the new block number for the first direct block */
- new_blocks[index] = current_block;
-
+ target = blks - count ;
+ blk_allocated = count;
+ if (!target)
+ goto allocated;
+ /* Now allocate data blocks */
+ count = target;
+ /* allocating blocks for indirect blocks and direct blocks */
+ current_block = ext4_new_blocks(handle, inode, iblock,
+ goal, &count, err);
+ if (*err && (target == blks)) {
+ /*
+ * if the allocation failed and we didn't allocate
+ * any blocks before
+ */
+ goto failed_out;
+ }
+ if (!*err) {
+ if (target == blks) {
+ /*
+ * save the new block number
+ * for the first direct block
+ */
+ new_blocks[index] = current_block;
+ }
+ blk_allocated += count;
+ }
+allocated:
/* total number of blocks allocated for direct blocks */
- ret = count;
+ ret = blk_allocated;
*err = 0;
return ret;
failed_out:
@@ -584,8 +618,9 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
* as described above and return 0.
*/
static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
- int indirect_blks, int *blks, ext4_fsblk_t goal,
- ext4_lblk_t *offsets, Indirect *branch)
+ ext4_lblk_t iblock, int indirect_blks,
+ int *blks, ext4_fsblk_t goal,
+ ext4_lblk_t *offsets, Indirect *branch)
{
int blocksize = inode->i_sb->s_blocksize;
int i, n = 0;
@@ -595,7 +630,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
ext4_fsblk_t new_blocks[4];
ext4_fsblk_t current_block;

- num = ext4_alloc_blocks(handle, inode, goal, indirect_blks,
+ num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks,
*blks, new_blocks, &err);
if (err)
return err;
@@ -855,8 +890,9 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
/*
* Block out ext4_truncate while we alter the tree
*/
- err = ext4_alloc_branch(handle, inode, indirect_blks, &count, goal,
- offsets + (partial - chain), partial);
+ err = ext4_alloc_branch(handle, inode, iblock, indirect_blks,
+ &count, goal,
+ offsets + (partial - chain), partial);

/*
* The ext4_splice_branch call will free and forget any buffers
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index ff08633..93c5fdc 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -810,7 +810,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
/* We need to allocate a new block */
ext4_fsblk_t goal = ext4_group_first_block_no(sb,
EXT4_I(inode)->i_block_group);
- ext4_fsblk_t block = ext4_new_block(handle, inode,
+ ext4_fsblk_t block = ext4_new_meta_block(handle, inode,
goal, &error);
if (error)
goto cleanup;
--
1.5.5.1.211.g65ea3.dirty



2008-06-04 02:24:28

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents

On Tue, May 20, 2008 at 02:04:22AM +0530, Aneesh Kumar K.V wrote:
> When mouting ext4 with -o noextents, request for
> file data blocks from inode prealloc space.

Aneesh, can you expand on this patch set? Why is this important?
What was it doing beforehand? Is this to replace the use of the block
reservations code that had been introduced by Mingming in ext3? Or is
that a long-term goal?

Also, it would be nice to add some comments at the beginning of the
changed functions, explaining what the functions do, what they are
intended for, what they assumptions they make (i.e., do they assume
that certain locks are taken), any side effects they make (i.e., this
function calls get_bh or put_bh to change the refcount on a passed-in
buffer head).

I assume there was a good reason that you renamed the function
ext4_new_block() to ext4_fsblk_t ext4_new_meta_block(), but why? Some
comments would really be useful.

I asked Mingming what this patch did when I was reviewing it this
afternoon, since we were both in New Hampshire at the btrfs
conference. I couldn't parse the english for the comments, and after
looking at the patch, it wasn't at all obvious what the patch was
trying to accomplish. Even though Mingming had reviewed it maybe two
weeks ago, she couldn't figure it out completely after looking over
the patch. Consider how a someone who isn't intimately familiar with
the code would be able to figure out either (a) the code, or (b)
looking over the changeset. Good code has to be long-term
maintainable, and some comments would really help in this department.

Since neither of us couldn't figure it out what the motivation of this
patch, we've decided to move it to the unstable portion of the patch
since without some better comments, it's probably not a good idea to
push it to Linus in this form.

- Ted

> Signed-off-by: Aneesh Kumar K.V <[email protected]>
> Signed-off-by: Mingming Cao <[email protected]>
> ---
> fs/ext4/balloc.c | 36 +++++++++++++++++++++++++-
> fs/ext4/ext4.h | 7 +++-
> fs/ext4/extents.c | 2 +-
> fs/ext4/inode.c | 72 +++++++++++++++++++++++++++++++++++++++-------------
> fs/ext4/xattr.c | 2 +-
> 5 files changed, 95 insertions(+), 24 deletions(-)
>
> diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
> index 30494c5..bc7e1cd 100644
> --- a/fs/ext4/balloc.c
> +++ b/fs/ext4/balloc.c
> @@ -1862,7 +1862,7 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
> return 0;
> }
>
> -ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode,
> +ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
> ext4_fsblk_t goal, int *errp)
> {
> struct ext4_allocation_request ar;
> @@ -1881,9 +1881,29 @@ ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode,
> ret = ext4_mb_new_blocks(handle, &ar, errp);
> return ret;
> }
> +ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
> + ext4_fsblk_t goal, unsigned long *count, int *errp)
> +{
> + struct ext4_allocation_request ar;
> + ext4_fsblk_t ret;
> +
> + if (!test_opt(inode->i_sb, MBALLOC)) {
> + ret = ext4_new_blocks_old(handle, inode, goal, count, errp);
> + return ret;
> + }
> +
> + memset(&ar, 0, sizeof(ar));
> + ar.inode = inode;
> + ar.goal = goal;
> + ar.len = *count;
> + ret = ext4_mb_new_blocks(handle, &ar, errp);
> + *count = ar.len;
> + return ret;
> +}
>
> ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
> - ext4_fsblk_t goal, unsigned long *count, int *errp)
> + ext4_lblk_t iblock, ext4_fsblk_t goal,
> + unsigned long *count, int *errp)
> {
> struct ext4_allocation_request ar;
> ext4_fsblk_t ret;
> @@ -1894,9 +1914,21 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
> }
>
> memset(&ar, 0, sizeof(ar));
> + /* Fill with neighbour allocated blocks */
> + ar.lleft = 0;
> + ar.pleft = 0;
> + ar.lright = 0;
> + ar.pright = 0;
> +
> ar.inode = inode;
> ar.goal = goal;
> ar.len = *count;
> + ar.logical = iblock;
> + if (S_ISREG(inode->i_mode))
> + ar.flags = EXT4_MB_HINT_DATA;
> + else
> + /* disable in-core preallocation for non-regular files */
> + ar.flags = 0;
> ret = ext4_mb_new_blocks(handle, &ar, errp);
> *count = ar.len;
> return ret;
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 8158083..899406b 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -958,10 +958,13 @@ extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
> extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
> extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
> ext4_group_t group);
> -extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode,
> +extern ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
> ext4_fsblk_t goal, int *errp);
> -extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode,
> +extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
> ext4_fsblk_t goal, unsigned long *count, int *errp);
> +extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
> + ext4_lblk_t iblock, ext4_fsblk_t goal,
> + unsigned long *count, int *errp);
> extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
> ext4_fsblk_t goal, unsigned long *count, int *errp);
> extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 47929c4..5ba81b3 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -188,7 +188,7 @@ ext4_ext_new_block(handle_t *handle, struct inode *inode,
> ext4_fsblk_t goal, newblock;
>
> goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
> - newblock = ext4_new_block(handle, inode, goal, err);
> + newblock = ext4_new_meta_block(handle, inode, goal, err);
> return newblock;
> }
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 8d97077..3f4182f 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -508,11 +508,12 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
> * direct blocks
> */
> static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
> - ext4_fsblk_t goal, int indirect_blks, int blks,
> - ext4_fsblk_t new_blocks[4], int *err)
> + ext4_lblk_t iblock, ext4_fsblk_t goal,
> + int indirect_blks, int blks,
> + ext4_fsblk_t new_blocks[4], int *err)
> {
> int target, i;
> - unsigned long count = 0;
> + long count = 0, blk_allocated = 0;
> int index = 0;
> ext4_fsblk_t current_block = 0;
> int ret = 0;
> @@ -525,12 +526,13 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
> * the first direct block of this branch. That's the
> * minimum number of blocks need to allocate(required)
> */
> - target = blks + indirect_blks;
> -
> - while (1) {
> + /* first we try to allocate the indirect blocks */
> + target = indirect_blks;
> + while (target > 0) {
> count = target;
> /* allocating blocks for indirect blocks and direct blocks */
> - current_block = ext4_new_blocks(handle,inode,goal,&count,err);
> + current_block = ext4_new_meta_blocks(handle, inode,
> + goal, &count, err);
> if (*err)
> goto failed_out;
>
> @@ -540,16 +542,48 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
> new_blocks[index++] = current_block++;
> count--;
> }
> -
> - if (count > 0)
> + if (count > 0) {
> + /*
> + * save the new block number
> + * for the first direct block
> + */
> + new_blocks[index] = current_block;
> + printk(KERN_INFO "%s returned more blocks than "
> + "requested\n", __func__);
> + WARN_ON(1);
> break;
> + }
> }
>
> - /* save the new block number for the first direct block */
> - new_blocks[index] = current_block;
> -
> + target = blks - count ;
> + blk_allocated = count;
> + if (!target)
> + goto allocated;
> + /* Now allocate data blocks */
> + count = target;
> + /* allocating blocks for indirect blocks and direct blocks */
> + current_block = ext4_new_blocks(handle, inode, iblock,
> + goal, &count, err);
> + if (*err && (target == blks)) {
> + /*
> + * if the allocation failed and we didn't allocate
> + * any blocks before
> + */
> + goto failed_out;
> + }
> + if (!*err) {
> + if (target == blks) {
> + /*
> + * save the new block number
> + * for the first direct block
> + */
> + new_blocks[index] = current_block;
> + }
> + blk_allocated += count;
> + }
> +allocated:
> /* total number of blocks allocated for direct blocks */
> - ret = count;
> + ret = blk_allocated;
> *err = 0;
> return ret;
> failed_out:
> @@ -584,8 +618,9 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
> * as described above and return 0.
> */
> static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
> - int indirect_blks, int *blks, ext4_fsblk_t goal,
> - ext4_lblk_t *offsets, Indirect *branch)
> + ext4_lblk_t iblock, int indirect_blks,
> + int *blks, ext4_fsblk_t goal,
> + ext4_lblk_t *offsets, Indirect *branch)
> {
> int blocksize = inode->i_sb->s_blocksize;
> int i, n = 0;
> @@ -595,7 +630,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
> ext4_fsblk_t new_blocks[4];
> ext4_fsblk_t current_block;
>
> - num = ext4_alloc_blocks(handle, inode, goal, indirect_blks,
> + num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks,
> *blks, new_blocks, &err);
> if (err)
> return err;
> @@ -855,8 +890,9 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
> /*
> * Block out ext4_truncate while we alter the tree
> */
> - err = ext4_alloc_branch(handle, inode, indirect_blks, &count, goal,
> - offsets + (partial - chain), partial);
> + err = ext4_alloc_branch(handle, inode, iblock, indirect_blks,
> + &count, goal,
> + offsets + (partial - chain), partial);
>
> /*
> * The ext4_splice_branch call will free and forget any buffers
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index ff08633..93c5fdc 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -810,7 +810,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
> /* We need to allocate a new block */
> ext4_fsblk_t goal = ext4_group_first_block_no(sb,
> EXT4_I(inode)->i_block_group);
> - ext4_fsblk_t block = ext4_new_block(handle, inode,
> + ext4_fsblk_t block = ext4_new_meta_block(handle, inode,
> goal, &error);
> if (error)
> goto cleanup;
> --
> 1.5.5.1.211.g65ea3.dirty
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2008-06-04 04:01:16

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents

On Tue, Jun 03, 2008 at 10:23:56PM -0400, Theodore Tso wrote:
> On Tue, May 20, 2008 at 02:04:22AM +0530, Aneesh Kumar K.V wrote:
> > When mouting ext4 with -o noextents, request for
> > file data blocks from inode prealloc space.
>
> Aneesh, can you expand on this patch set? Why is this important?
> What was it doing beforehand? Is this to replace the use of the block
> reservations code that had been introduced by Mingming in ext3? Or is
> that a long-term goal?
>
> Also, it would be nice to add some comments at the beginning of the
> changed functions, explaining what the functions do, what they are
> intended for, what they assumptions they make (i.e., do they assume
> that certain locks are taken), any side effects they make (i.e., this
> function calls get_bh or put_bh to change the refcount on a passed-in
> buffer head).
>
> I assume there was a good reason that you renamed the function
> ext4_new_block() to ext4_fsblk_t ext4_new_meta_block(), but why? Some
> comments would really be useful.
>
> I asked Mingming what this patch did when I was reviewing it this
> afternoon, since we were both in New Hampshire at the btrfs
> conference. I couldn't parse the english for the comments, and after
> looking at the patch, it wasn't at all obvious what the patch was
> trying to accomplish. Even though Mingming had reviewed it maybe two
> weeks ago, she couldn't figure it out completely after looking over
> the patch. Consider how a someone who isn't intimately familiar with
> the code would be able to figure out either (a) the code, or (b)
> looking over the changeset. Good code has to be long-term
> maintainable, and some comments would really help in this department.
>
> Since neither of us couldn't figure it out what the motivation of this
> patch, we've decided to move it to the unstable portion of the patch
> since without some better comments, it's probably not a good idea to
> push it to Linus in this form.
>
> - Ted

When we mount the filesystem with -o noextents currently the block
allocation requests are not using the preallocation feature of mballoc.
mballoc consider the allocation request that doesn't have
ar.flags = EXT4_MB_HINT_DATA set as meta-data allocation and force
them to get the blocks via regular allocator ext4_mb_regular_allocator.
In order to make sure -o noextents (or ext3 format) block allocation
use the preallocation feature we need to set the EXT4_MB_HINT_DATA
while requesting for the blocks. mballoc also needs the
logical block number of the requested block so that it call nicely place
it in the inode prealloc space. This means that we need to differentiate
between data allocation request and meta-data allocation request. The
meta-data allocation is done without setting EXT4_MB_HINT_DATA flag
and they would use the regular allocator where as the data blocks
are requested with EXT4_MB_HINT_DATA set and along with logical block
number so that they can allocate the blocks nicely from the inode prealloc
space. Now with -o noextents we request for blocks via
ext4_alloc_blocks. Earlier we request for both the data and meta-block
together. Now we can't do that since the data-blocks are allocated based
on the logical block number of the request. So the changes was to split
it such that we first request for meta-data blocks and then request
for data-blocks with the logical block number.


-aneesh

2008-06-05 03:22:47

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents

when I moved this patch to the beginning of the unstable patch queue,
it didn't apply. When I tried to look at it, my head started
spinning. The patch applied to the wrong function, apparently,
because there is so much code duplication "patch" got confused. I
can't blame it, though, because *I* got confused.

fs/ext4/balloc.c is a complete disaster right now. We have:

ext4_new_blocks_old()
ext4_new_meta_block()
ext4_new_meta_blocks()
ext4_new_blocks()

... and without any comments, it is extremely impenetrable. Someone
needs to document what the heck all of the various functions have to
do with each other, when they get used (i.e., with which mount options).

Why aren't we factoring out the last three into a single function?

- Ted

2008-06-05 08:47:13

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents

On Wed, Jun 04, 2008 at 11:22:20PM -0400, Theodore Tso wrote:
> when I moved this patch to the beginning of the unstable patch queue,
> it didn't apply. When I tried to look at it, my head started
> spinning. The patch applied to the wrong function, apparently,
> because there is so much code duplication "patch" got confused. I
> can't blame it, though, because *I* got confused.
>
> fs/ext4/balloc.c is a complete disaster right now. We have:
>
> ext4_new_blocks_old()
> ext4_new_meta_block()
> ext4_new_meta_blocks()
> ext4_new_blocks()
>
> ... and without any comments, it is extremely impenetrable. Someone
> needs to document what the heck all of the various functions have to
> do with each other, when they get used (i.e., with which mount options).
>
> Why aren't we factoring out the last three into a single function?

Something like below ? . I will send a final patch once I get the
patchqueu updated. I am not able to reach repo.or.cz currently.

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index bd18ceb..abbc500 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1656,7 +1656,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
}

/**
- * ext4_new_blocks_old() -- core block(s) allocation function
+ * ext4_orlov_new_blocks() -- core block(s) allocation function
* @handle: handle to this transaction
* @inode: file inode
* @goal: given target block(filesystem wide)
@@ -1669,7 +1669,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
* any specific goal block.
*
*/
-ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
+ext4_fsblk_t ext4_orlov_new_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp)
{
struct buffer_head *bitmap_bh = NULL;
@@ -1942,88 +1942,68 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
return 0;
}

-ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
- ext4_fsblk_t goal, int *errp)
+static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
+ ext4_lblk_t iblock, ext4_fsblk_t goal,
+ unsigned long *count, int *errp, int meta)
{
struct ext4_allocation_request ar;
ext4_fsblk_t ret;

if (!test_opt(inode->i_sb, MBALLOC)) {
- unsigned long count = 1;
- ret = ext4_new_blocks_old(handle, inode, goal, &count, errp);
+ ret = ext4_orlov_new_blocks(handle, inode, goal, count, errp);
return ret;
}

memset(&ar, 0, sizeof(ar));
+ /* Fill with neighbour allocated blocks */
+ ar.lleft = 0;
+ ar.pleft = 0;
+ ar.lright = 0;
+ ar.pright = 0;
+
ar.inode = inode;
ar.goal = goal;
- ar.len = 1;
+ ar.len = *count;
+ ar.logical = iblock;
+ if (S_ISREG(inode->i_mode) && !meta)
+ ar.flags = EXT4_MB_HINT_DATA;
+ else
+ /* disable in-core preallocation for non-regular files */
+ ar.flags = 0;
ret = ext4_mb_new_blocks(handle, &ar, errp);
+ *count = ar.len;
/*
* Account for the allocated meta blocks
*/
- if (!(*errp)) {
+ if (!(*errp) && meta) {
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
}
-
return ret;
}
+
+
+ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
+ ext4_fsblk_t goal, int *errp)
+{
+ unsigned long count = 1;
+ return do_blk_alloc(handle, inode, 0, goal, &count, errp, 1);
+}
+
ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp)
{
- struct ext4_allocation_request ar;
- ext4_fsblk_t ret;
-
- if (!test_opt(inode->i_sb, MBALLOC)) {
- ret = ext4_new_blocks_old(handle, inode, goal, count, errp);
- return ret;
- }
-
- memset(&ar, 0, sizeof(ar));
- ar.inode = inode;
- ar.goal = goal;
- ar.len = *count;
- ret = ext4_mb_new_blocks(handle, &ar, errp);
- *count = ar.len;
- return ret;
+ return do_blk_alloc(handle, inode, 0, goal, count, errp, 1);
}

ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, ext4_fsblk_t goal,
unsigned long *count, int *errp)
{
- struct ext4_allocation_request ar;
- ext4_fsblk_t ret;
-
- if (!test_opt(inode->i_sb, MBALLOC)) {
- ret = ext4_new_blocks_old(handle, inode, goal, count, errp);
- return ret;
- }
-
- memset(&ar, 0, sizeof(ar));
- /* Fill with neighbour allocated blocks */
- ar.lleft = 0;
- ar.pleft = 0;
- ar.lright = 0;
- ar.pright = 0;
-
- ar.inode = inode;
- ar.goal = goal;
- ar.len = *count;
- ar.logical = iblock;
- if (S_ISREG(inode->i_mode))
- ar.flags = EXT4_MB_HINT_DATA;
- else
- /* disable in-core preallocation for non-regular files */
- ar.flags = 0;
- ret = ext4_mb_new_blocks(handle, &ar, errp);
- *count = ar.len;
- return ret;
+ return do_blk_alloc(handle, inode, iblock, goal, count, errp, 0);
}

-
/**
* ext4_count_free_blocks() -- count filesystem free blocks
* @sb: superblock
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 19d48dd..c401253 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1049,7 +1049,7 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, ext4_fsblk_t goal,
unsigned long *count, int *errp);
-extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
+extern ext4_fsblk_t ext4_orlov_new_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp);
extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
ext4_fsblk_t nblocks);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 09922ae..a810a21 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4048,7 +4048,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
sbi = EXT4_SB(sb);

if (!test_opt(sb, MBALLOC)) {
- block = ext4_new_blocks_old(handle, ar->inode, ar->goal,
+ block = ext4_orlov_new_blocks(handle, ar->inode, ar->goal,
&(ar->len), errp);
return block;
}

2008-06-05 14:55:50

by Mingming Cao

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents

On Thu, 2008-06-05 at 14:13 +0530, Aneesh Kumar K.V wrote:
> On Wed, Jun 04, 2008 at 11:22:20PM -0400, Theodore Tso wrote:
> > when I moved this patch to the beginning of the unstable patch queue,
> > it didn't apply. When I tried to look at it, my head started
> > spinning. The patch applied to the wrong function, apparently,
> > because there is so much code duplication "patch" got confused. I
> > can't blame it, though, because *I* got confused.
> >
> > fs/ext4/balloc.c is a complete disaster right now. We have:
> >
> > ext4_new_blocks_old()
> > ext4_new_meta_block()
> > ext4_new_meta_blocks()
> > ext4_new_blocks()
> >
> > ... and without any comments, it is extremely impenetrable. Someone
> > needs to document what the heck all of the various functions have to
> > do with each other, when they get used (i.e., with which mount options).
> >

One more thing, I feel we should clean up inode.c, move the functions
related to non extent file allocation from inode.c into balloc.c, and
try to keep balloc.c the single file to handle allocation for non extent
files.

> > Why aren't we factoring out the last three into a single function?
>
> Something like below ? . I will send a final patch once I get the
> patchqueu updated. I am not able to reach repo.or.cz currently.
>
looks good, a few comment
> diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
> index bd18ceb..abbc500 100644
> --- a/fs/ext4/balloc.c
> +++ b/fs/ext4/balloc.c
> @@ -1656,7 +1656,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
> }
>
> /**
> - * ext4_new_blocks_old() -- core block(s) allocation function
> + * ext4_orlov_new_blocks() -- core block(s) allocation function

what is orlov means? this is core function for non extent based without
mballoc block allocation, right?

> * @handle: handle to this transaction
> * @inode: file inode
> * @goal: given target block(filesystem wide)
> @@ -1669,7 +1669,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
> * any specific goal block.
> *
> */
> -ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
> +ext4_fsblk_t ext4_orlov_new_blocks(handle_t *handle, struct inode *inode,
> ext4_fsblk_t goal, unsigned long *count, int *errp)
> {
> struct buffer_head *bitmap_bh = NULL;
> @@ -1942,88 +1942,68 @@ ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
> return 0;
> }
>
> -ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
> - ext4_fsblk_t goal, int *errp)
> +static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
> + ext4_lblk_t iblock, ext4_fsblk_t goal,
> + unsigned long *count, int *errp, int meta)
> {
> struct ext4_allocation_request ar;
> ext4_fsblk_t ret;
>
> if (!test_opt(inode->i_sb, MBALLOC)) {
> - unsigned long count = 1;
> - ret = ext4_new_blocks_old(handle, inode, goal, &count, errp);
> + ret = ext4_orlov_new_blocks(handle, inode, goal, count, errp);
> return ret;
> }
>
> memset(&ar, 0, sizeof(ar));
> + /* Fill with neighbour allocated blocks */
> + ar.lleft = 0;
> + ar.pleft = 0;
> + ar.lright = 0;
> + ar.pright = 0;
> +
> ar.inode = inode;
> ar.goal = goal;
> - ar.len = 1;
> + ar.len = *count;
> + ar.logical = iblock;
> + if (S_ISREG(inode->i_mode) && !meta)
> + ar.flags = EXT4_MB_HINT_DATA;
> + else
> + /* disable in-core preallocation for non-regular files */
> + ar.flags = 0;
> ret = ext4_mb_new_blocks(handle, &ar, errp);
> + *count = ar.len;
> /*
> * Account for the allocated meta blocks
> */
> - if (!(*errp)) {
> + if (!(*errp) && meta) {
> spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
> EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
> spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
> }
> -
> return ret;
> }
> +
> +
> +ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
> + ext4_fsblk_t goal, int *errp)
> +{
> + unsigned long count = 1;
> + return do_blk_alloc(handle, inode, 0, goal, &count, errp, 1);
> +}
> +
> ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
> ext4_fsblk_t goal, unsigned long *count, int *errp)
> {
> - struct ext4_allocation_request ar;
> - ext4_fsblk_t ret;
> -
> - if (!test_opt(inode->i_sb, MBALLOC)) {
> - ret = ext4_new_blocks_old(handle, inode, goal, count, errp);
> - return ret;
> - }
> -
> - memset(&ar, 0, sizeof(ar));
> - ar.inode = inode;
> - ar.goal = goal;
> - ar.len = *count;
> - ret = ext4_mb_new_blocks(handle, &ar, errp);
> - *count = ar.len;
> - return ret;
> + return do_blk_alloc(handle, inode, 0, goal, count, errp, 1);
> }
>
> ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
> ext4_lblk_t iblock, ext4_fsblk_t goal,
> unsigned long *count, int *errp)
> {
> - struct ext4_allocation_request ar;
> - ext4_fsblk_t ret;
> -
> - if (!test_opt(inode->i_sb, MBALLOC)) {
> - ret = ext4_new_blocks_old(handle, inode, goal, count, errp);
> - return ret;
> - }
> -
> - memset(&ar, 0, sizeof(ar));
> - /* Fill with neighbour allocated blocks */
> - ar.lleft = 0;
> - ar.pleft = 0;
> - ar.lright = 0;
> - ar.pright = 0;
> -
> - ar.inode = inode;
> - ar.goal = goal;
> - ar.len = *count;
> - ar.logical = iblock;
> - if (S_ISREG(inode->i_mode))
> - ar.flags = EXT4_MB_HINT_DATA;
> - else
> - /* disable in-core preallocation for non-regular files */
> - ar.flags = 0;
> - ret = ext4_mb_new_blocks(handle, &ar, errp);
> - *count = ar.len;
> - return ret;
> + return do_blk_alloc(handle, inode, iblock, goal, count, errp, 0);
> }
>
> -
> /**
> * ext4_count_free_blocks() -- count filesystem free blocks
> * @sb: superblock
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 19d48dd..c401253 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1049,7 +1049,7 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
> extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
> ext4_lblk_t iblock, ext4_fsblk_t goal,
> unsigned long *count, int *errp);
> -extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
> +extern ext4_fsblk_t ext4_orlov_new_blocks(handle_t *handle, struct inode *inode,
> ext4_fsblk_t goal, unsigned long *count, int *errp);
> extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
> ext4_fsblk_t nblocks);
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 09922ae..a810a21 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -4048,7 +4048,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
> sbi = EXT4_SB(sb);
>
> if (!test_opt(sb, MBALLOC)) {
> - block = ext4_new_blocks_old(handle, ar->inode, ar->goal,
> + block = ext4_orlov_new_blocks(handle, ar->inode, ar->goal,
> &(ar->len), errp);
> return block;
> }

when we get to ext4_mb_new_blocks, don't we already tested MBALLOC is
turned on?


2008-06-05 15:37:58

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents

On Thu, Jun 05, 2008 at 02:13:29PM +0530, Aneesh Kumar K.V wrote:
>
> Something like below ? . I will send a final patch once I get the
> patchqueu updated. I am not able to reach repo.or.cz currently.

This is better, but it still means that we are exporting a large
number of functions to the callers. It's not clear to me we need so
many different variants of ext4_new_blocks_* --- what is their
justification to exist?

For example, why not just have:

static ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, ext4_fsblk_t goal,
unsigned long *count, int *errp, int meta)

where if inode is NULL, then you're allocating a metadata block, and
if count is NULL, then you only want one block. Of course, this needs
to be carefully documented at the function.

- Ted

2008-06-05 18:25:01

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents

On Jun 05, 2008 07:55 -0700, Mingming Cao wrote:
> > /**
> > - * ext4_new_blocks_old() -- core block(s) allocation function
> > + * ext4_orlov_new_blocks() -- core block(s) allocation function
>
> what is orlov means? this is core function for non extent based without
> mballoc block allocation, right?

Orlov is the name of the INODE allocator, not the block allocator. I'm
not sure there is a name for the block allocator except "old" or "bitmap".
In the future I suspect we won't want to keep this version at all, using
the mballoc allocator even for block-mapped files, but it is useful for
now for performance comparisons.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


2008-06-05 18:28:41

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents

On Jun 05, 2008 11:37 -0400, Theodore Ts'o wrote:
> This is better, but it still means that we are exporting a large
> number of functions to the callers. It's not clear to me we need so
> many different variants of ext4_new_blocks_* --- what is their
> justification to exist?
>
> For example, why not just have:
>
> static ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
> ext4_lblk_t iblock, ext4_fsblk_t goal,
> unsigned long *count, int *errp, int meta)
>
> where if inode is NULL, then you're allocating a metadata block, and
> if count is NULL, then you only want one block. Of course, this needs
> to be carefully documented at the function.

I don't necessarily agree that meta should be implied by inode != NULL.
We do want to cluster metadata allocations for a single inode if possible,
so keeping the inode information is useful. We may want to keep a separate
"metadata goal block" from the "data goal block" in the inode...

That said, it seems you still have a "meta" parameter here? I always hate
having an int for a boolean, and we may as well make this a "flags" so
that when we want to improve it later we don't need to rename it and change
all of the "1" parameters to "EXT4_META_BLOCK". Do it right the first time.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


2008-06-05 18:46:45

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents

On Thu, Jun 05, 2008 at 12:24:55PM -0600, Andreas Dilger wrote:
> On Jun 05, 2008 07:55 -0700, Mingming Cao wrote:
> > > /**
> > > - * ext4_new_blocks_old() -- core block(s) allocation function
> > > + * ext4_orlov_new_blocks() -- core block(s) allocation function
> >
> > what is orlov means? this is core function for non extent based without
> > mballoc block allocation, right?
>
> Orlov is the name of the INODE allocator, not the block allocator. I'm
> not sure there is a name for the block allocator except "old" or "bitmap".

What do you suggest to rename it as ? I actually got the name from
http://lwn.net/Articles/14633/. But since the old allocator the block
location depends more closely on where the inodes are place i guess we
can name even the block as orlov allocator.


> In the future I suspect we won't want to keep this version at all, using
> the mballoc allocator even for block-mapped files, but it is useful for
> now for performance comparisons.
>

The patch which is marked in the subject is exactly doing that.


-aneesh

2008-06-05 19:13:13

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents

On Thu, Jun 05, 2008 at 12:28:30PM -0600, Andreas Dilger wrote:
> On Jun 05, 2008 11:37 -0400, Theodore Ts'o wrote:
> > This is better, but it still means that we are exporting a large
> > number of functions to the callers. It's not clear to me we need so
> > many different variants of ext4_new_blocks_* --- what is their
> > justification to exist?
> >
> > For example, why not just have:
> >
> > static ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
> > ext4_lblk_t iblock, ext4_fsblk_t goal,
> > unsigned long *count, int *errp, int meta)
> >
> > where if inode is NULL, then you're allocating a metadata block, and
> > if count is NULL, then you only want one block. Of course, this needs
> > to be carefully documented at the function.
>
> I don't necessarily agree that meta should be implied by inode != NULL.
> We do want to cluster metadata allocations for a single inode if possible,
> so keeping the inode information is useful. We may want to keep a separate
> "metadata goal block" from the "data goal block" in the inode...
>
> That said, it seems you still have a "meta" parameter here? I always hate
> having an int for a boolean, and we may as well make this a "flags" so
> that when we want to improve it later we don't need to rename it and change
> all of the "1" parameters to "EXT4_META_BLOCK". Do it right the first time.
>

how about ?

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index c6e6a6f..014e5b5 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1928,9 +1928,11 @@ ext4_fsblk_t ext4_orlov_new_blocks(handle_t *handle, struct inode *inode,
return 0;
}

+#defin EXT4_META_BLOCK 0x1
+
static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, ext4_fsblk_t goal,
- unsigned long *count, int *errp, int meta)
+ unsigned long *count, int *errp, int flags)
{
struct ext4_allocation_request ar;
ext4_fsblk_t ret;
@@ -1950,7 +1952,7 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
ar.goal = goal;
ar.len = *count;
ar.logical = iblock;
- if (S_ISREG(inode->i_mode) && !meta)
+ if (S_ISREG(inode->i_mode) && !(flags & EXT4_META_BLOCK))
ar.flags = EXT4_MB_HINT_DATA;
else
/* disable in-core preallocation for non-regular files */
@@ -1965,13 +1967,15 @@ ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, int *errp)
{
unsigned long count = 1;
- return do_blk_alloc(handle, inode, 0, goal, &count, errp, 1);
+ return do_blk_alloc(handle, inode, 0, goal, &count,
+ errp, EXT4_META_BLOCK);
}

ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp)
{
- return do_blk_alloc(handle, inode, 0, goal, count, errp, 1);
+ return do_blk_alloc(handle, inode, 0, goal, count,
+ errp, EXT4_META_BLOCK);
}

ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,


2008-06-05 20:58:41

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents

On Jun 06, 2008 00:42 +0530, Aneesh Kumar K.V wrote:
> On Thu, Jun 05, 2008 at 12:28:30PM -0600, Andreas Dilger wrote:
> > I don't necessarily agree that meta should be implied by inode != NULL.
> > We do want to cluster metadata allocations for a single inode if possible,
> > so keeping the inode information is useful. We may want to keep a separate
> > "metadata goal block" from the "data goal block" in the inode...
> >
> > That said, it seems you still have a "meta" parameter here? I always hate
> > having an int for a boolean, and we may as well make this a "flags" so
> > that when we want to improve it later we don't need to rename it and change
> > all of the "1" parameters to "EXT4_META_BLOCK". Do it right the first time.
> >
>
> how about ?
>
> +#defin EXT4_META_BLOCK 0x1

> @@ -1950,7 +1952,7 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
> - if (S_ISREG(inode->i_mode) && !meta)
> + if (S_ISREG(inode->i_mode) && !(flags & EXT4_META_BLOCK))

I'm fine with this.

> + return do_blk_alloc(handle, inode, 0, goal, &count,
> + errp, EXT4_META_BLOCK);

Please follow the normal CodingStyle.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


2008-06-06 16:26:49

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents

On Thu, Jun 05, 2008 at 11:37:01AM -0400, Theodore Tso wrote:
> On Thu, Jun 05, 2008 at 02:13:29PM +0530, Aneesh Kumar K.V wrote:
> >
> > Something like below ? . I will send a final patch once I get the
> > patchqueu updated. I am not able to reach repo.or.cz currently.
>
> This is better, but it still means that we are exporting a large
> number of functions to the callers. It's not clear to me we need so
> many different variants of ext4_new_blocks_* --- what is their
> justification to exist?
>
> For example, why not just have:
>
> static ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
> ext4_lblk_t iblock, ext4_fsblk_t goal,
> unsigned long *count, int *errp, int meta)
>

Now that we have moved all the code to do_blk_alloc, we can be assured
that we won't miss bug fixes to those allocation APIs when fixing one of
them. IMHO having separate APIs reduces the risk of misusing them

-aneesh

2008-06-06 21:33:17

by Mingming Cao

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents

On Thu, 2008-06-05 at 12:24 -0600, Andreas Dilger wrote:
> On Jun 05, 2008 07:55 -0700, Mingming Cao wrote:
> > > /**
> > > - * ext4_new_blocks_old() -- core block(s) allocation function
> > > + * ext4_orlov_new_blocks() -- core block(s) allocation function
> >
> > what is orlov means? this is core function for non extent based without
> > mballoc block allocation, right?
>
> Orlov is the name of the INODE allocator, not the block allocator. I'm
> not sure there is a name for the block allocator except "old" or "bitmap".

I think rename it as ext4_old_new_blocks() seems better, better than
orlov. This is the block allocator that is only used when mballoc is
turned off. I think we should update the function description.

> In the future I suspect we won't want to keep this version at all, using
> the mballoc allocator even for block-mapped files, but it is useful for
> now for performance comparisons.

In fact I found the mballoc is enabled even for block-mapped files. what
Aneesh's patch "Use inode preallocation with -o noextents" does is
enable the per-inode in-core preallocation for block allocation for
block-mapped files, whem mballoc is turned on. Without this patch, there
is not old ext3 block reservation(as mblloc turned off ext3 block
reservation automatically) and non in-core preallocation in the new
mballoc.

Anessh, Could you clarify this and update in the change log?

> Cheers, Andreas
> --
> Andreas Dilger
> Sr. Staff Engineer, Lustre Group
> Sun Microsystems of Canada, Inc.
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html


2008-06-11 03:29:49

by Shen Feng

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents



Andreas Dilger Wrote:
> On Jun 05, 2008 07:55 -0700, Mingming Cao wrote:
>>> /**
>>> - * ext4_new_blocks_old() -- core block(s) allocation function
>>> + * ext4_orlov_new_blocks() -- core block(s) allocation function
>> what is orlov means? this is core function for non extent based without
>> mballoc block allocation, right?
>
> Orlov is the name of the INODE allocator, not the block allocator. I'm
> not sure there is a name for the block allocator except "old" or "bitmap".
> In the future I suspect we won't want to keep this version at all, using
> the mballoc allocator even for block-mapped files, but it is useful for
> now for performance comparisons.
>

Is that true?

I got the following from the kernel ext4 documentation.
orlov (*) This enables the new Orlov block allocator. It is
enabled by default.

oldalloc This disables the Orlov block allocator and enables
the old block allocator. Orlov should have better
performance - we'd like to get some feedback if it's
the contrary for you.

-Shen Feng

2008-06-11 03:47:48

by Shen Feng

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents



Mingming Cao Wrote:
> On Thu, 2008-06-05 at 14:13 +0530, Aneesh Kumar K.V wrote:
>> On Wed, Jun 04, 2008 at 11:22:20PM -0400, Theodore Tso wrote:
>>> when I moved this patch to the beginning of the unstable patch queue,
>>> it didn't apply. When I tried to look at it, my head started
>>> spinning. The patch applied to the wrong function, apparently,
>>> because there is so much code duplication "patch" got confused. I
>>> can't blame it, though, because *I* got confused.
>>>
>>> fs/ext4/balloc.c is a complete disaster right now. We have:
>>>
>>> ext4_new_blocks_old()
>>> ext4_new_meta_block()
>>> ext4_new_meta_blocks()
>>> ext4_new_blocks()
>>>
>>> ... and without any comments, it is extremely impenetrable. Someone
>>> needs to document what the heck all of the various functions have to
>>> do with each other, when they get used (i.e., with which mount options).
>>>
>
> One more thing, I feel we should clean up inode.c, move the functions
> related to non extent file allocation from inode.c into balloc.c, and
> try to keep balloc.c the single file to handle allocation for non extent
> files.
>

I don't agree this.
balloc.c is for non mballoc allocation, not for non extent files.
Maybe we need a noextent.c for non extent file allocation, now it's done
in inode.c.

-Shen Feng

2008-06-12 09:34:35

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents

On Jun 11, 2008 11:26 +0800, Shen Feng wrote:
> Andreas Dilger Wrote:
> > On Jun 05, 2008 07:55 -0700, Mingming Cao wrote:
> >>> /**
> >>> - * ext4_new_blocks_old() -- core block(s) allocation function
> >>> + * ext4_orlov_new_blocks() -- core block(s) allocation function
> >> what is orlov means? this is core function for non extent based without
> >> mballoc block allocation, right?
> >
> > Orlov is the name of the INODE allocator, not the block allocator. I'm
> > not sure there is a name for the block allocator except "old" or "bitmap".
> > In the future I suspect we won't want to keep this version at all, using
> > the mballoc allocator even for block-mapped files, but it is useful for
> > now for performance comparisons.
> >
>
> Is that true?
>
> I got the following from the kernel ext4 documentation.
> orlov (*) This enables the new Orlov block allocator. It is
> enabled by default.
>
> oldalloc This disables the Orlov block allocator and enables
> the old block allocator. Orlov should have better
> performance - we'd like to get some feedback if it's
> the contrary for you.

The documentation is incorrect then. The Orlov allocator is for inodes:

ialloc.c:
/*
* Orlov's allocator for directories.

struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
{
if (S_ISDIR(mode)) {
if (test_opt (sb, OLDALLOC))
group = find_group_dir(sb, dir);
else
group = find_group_orlov(sb, dir);


Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


2008-06-12 13:43:29

by Eric Sandeen

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents

Andreas Dilger wrote:
> On Jun 11, 2008 11:26 +0800, Shen Feng wrote:
>> Andreas Dilger Wrote:
>>> On Jun 05, 2008 07:55 -0700, Mingming Cao wrote:
>>>>> /**
>>>>> - * ext4_new_blocks_old() -- core block(s) allocation function
>>>>> + * ext4_orlov_new_blocks() -- core block(s) allocation function
>>>> what is orlov means? this is core function for non extent based without
>>>> mballoc block allocation, right?
>>> Orlov is the name of the INODE allocator, not the block allocator. I'm
>>> not sure there is a name for the block allocator except "old" or "bitmap".
>>> In the future I suspect we won't want to keep this version at all, using
>>> the mballoc allocator even for block-mapped files, but it is useful for
>>> now for performance comparisons.
>>>
>> Is that true?
>>
>> I got the following from the kernel ext4 documentation.
>> orlov (*) This enables the new Orlov block allocator. It is
>> enabled by default.
>>
>> oldalloc This disables the Orlov block allocator and enables
>> the old block allocator. Orlov should have better
>> performance - we'd like to get some feedback if it's
>> the contrary for you.
>
> The documentation is incorrect then. The Orlov allocator is for inodes:

Interestingly, though, the internets are full of references to "the
orlov block allocator for ext*" anyway. :) (it seems that the original
patches all referred to it this way):

http://lwn.net/Articles/14447/

Plus, some Orlov history for those interested ;)

http://web.archive.org/web/20070609035919/http://www.ptci.ru/gluk/dirpref/old/dirpref.html

-Eric


2008-06-16 03:45:13

by Shen Feng

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents



Mingming Cao Wrote:
> On Thu, 2008-06-05 at 14:13 +0530, Aneesh Kumar K.V wrote:
>> On Wed, Jun 04, 2008 at 11:22:20PM -0400, Theodore Tso wrote:
>>> when I moved this patch to the beginning of the unstable patch queue,
>>> it didn't apply. When I tried to look at it, my head started
>>> spinning. The patch applied to the wrong function, apparently,
>>> because there is so much code duplication "patch" got confused. I
>>> can't blame it, though, because *I* got confused.
>>>

...snip...

>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>> index 09922ae..a810a21 100644
>> --- a/fs/ext4/mballoc.c
>> +++ b/fs/ext4/mballoc.c
>> @@ -4048,7 +4048,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
>> sbi = EXT4_SB(sb);
>>
>> if (!test_opt(sb, MBALLOC)) {
>> - block = ext4_new_blocks_old(handle, ar->inode, ar->goal,
>> + block = ext4_orlov_new_blocks(handle, ar->inode, ar->goal,
>> &(ar->len), errp);
>> return block;
>> }
>
> when we get to ext4_mb_new_blocks, don't we already tested MBALLOC is
> turned on?
>

ext4_ext_get_blocks calls ext4_mb_new_blocks. So we have to check this.
So maybe ext4_ext_get_blocks should call ext4_new_blocks and
we can remove this check.

-Shen Feng


2008-06-17 09:46:26

by Shen Feng

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents



Shen Feng Wrote:
>
> Mingming Cao Wrote:
>> On Thu, 2008-06-05 at 14:13 +0530, Aneesh Kumar K.V wrote:
>>> On Wed, Jun 04, 2008 at 11:22:20PM -0400, Theodore Tso wrote:
>>>> when I moved this patch to the beginning of the unstable patch queue,
>>>> it didn't apply. When I tried to look at it, my head started
>>>> spinning. The patch applied to the wrong function, apparently,
>>>> because there is so much code duplication "patch" got confused. I
>>>> can't blame it, though, because *I* got confused.
>>>>
>
> ...snip...
>
>>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>>> index 09922ae..a810a21 100644
>>> --- a/fs/ext4/mballoc.c
>>> +++ b/fs/ext4/mballoc.c
>>> @@ -4048,7 +4048,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
>>> sbi = EXT4_SB(sb);
>>>
>>> if (!test_opt(sb, MBALLOC)) {
>>> - block = ext4_new_blocks_old(handle, ar->inode, ar->goal,
>>> + block = ext4_orlov_new_blocks(handle, ar->inode, ar->goal,
>>> &(ar->len), errp);
>>> return block;
>>> }
>> when we get to ext4_mb_new_blocks, don't we already tested MBALLOC is
>> turned on?
>>
>
> ext4_ext_get_blocks calls ext4_mb_new_blocks. So we have to check this.
> So maybe ext4_ext_get_blocks should call ext4_new_blocks and
> we can remove this check.
>

How about this patch?
I tested it with bonnie++ and ltp fs test using mballoc and nonomballoc
options.

Signed-off-by: Shen Feng <[email protected]tsu.com>
---
fs/ext4/extents.c | 25 +++++++++----------------
fs/ext4/mballoc.c | 6 ------
2 files changed, 9 insertions(+), 22 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 47929c4..3f6be32 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2528,7 +2528,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, newblock;
int err = 0, depth, ret;
unsigned long allocated = 0;
- struct ext4_allocation_request ar;
+ ext4_lblk_t lleft, lright;
+ ext4_fsblk_t pleft, pright;

__clear_bit(BH_New, &bh_result->b_state);
ext_debug("blocks %u/%lu requested for inode %u\n",
@@ -2653,12 +2654,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ext4_init_block_alloc_info(inode);

/* find neighbour allocated blocks */
- ar.lleft = iblock;
- err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
+ lleft = iblock;
+ err = ext4_ext_search_left(inode, path, &lleft, &pleft);
if (err)
goto out2;
- ar.lright = iblock;
- err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);
+ lright = iblock;
+ err = ext4_ext_search_right(inode, path, &lright, &pright);
if (err)
goto out2;

@@ -2685,16 +2686,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
allocated = max_blocks;

/* allocate new block */
- ar.inode = inode;
- ar.goal = ext4_ext_find_goal(inode, path, iblock);
- ar.logical = iblock;
- ar.len = allocated;
- if (S_ISREG(inode->i_mode))
- ar.flags = EXT4_MB_HINT_DATA;
- else
- /* disable in-core preallocation for non-regular files */
- ar.flags = 0;
- newblock = ext4_mb_new_blocks(handle, &ar, &err);
+ goal = ext4_ext_find_goal(inode, path, iblock);
+ newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
if (!newblock)
goto out2;
ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
@@ -2702,7 +2695,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,

/* try to insert new extent into found leaf and return */
ext4_ext_store_pblock(&newex, newblock);
- newex.ee_len = cpu_to_le16(ar.len);
+ newex.ee_len = cpu_to_le16(allocated);
if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */
ext4_ext_mark_uninitialized(&newex);
err = ext4_ext_insert_extent(handle, inode, path, &newex);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c9900aa..bc82d39 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4010,12 +4010,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
sb = ar->inode->i_sb;
sbi = EXT4_SB(sb);

- if (!test_opt(sb, MBALLOC)) {
- block = ext4_new_blocks_old(handle, ar->inode, ar->goal,
- &(ar->len), errp);
- return block;
- }

2008-06-17 10:48:39

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents

On Tue, Jun 17, 2008 at 05:42:57PM +0800, Shen Feng wrote:
>
>
> Shen Feng Wrote:
> >
> > Mingming Cao Wrote:
> >> On Thu, 2008-06-05 at 14:13 +0530, Aneesh Kumar K.V wrote:
> >>> On Wed, Jun 04, 2008 at 11:22:20PM -0400, Theodore Tso wrote:
> >>>> when I moved this patch to the beginning of the unstable patch queue,
> >>>> it didn't apply. When I tried to look at it, my head started
> >>>> spinning. The patch applied to the wrong function, apparently,
> >>>> because there is so much code duplication "patch" got confused. I
> >>>> can't blame it, though, because *I* got confused.
> >>>>
> >
> > ...snip...
> >
> >>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> >>> index 09922ae..a810a21 100644
> >>> --- a/fs/ext4/mballoc.c
> >>> +++ b/fs/ext4/mballoc.c
> >>> @@ -4048,7 +4048,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
> >>> sbi = EXT4_SB(sb);
> >>>
> >>> if (!test_opt(sb, MBALLOC)) {
> >>> - block = ext4_new_blocks_old(handle, ar->inode, ar->goal,
> >>> + block = ext4_orlov_new_blocks(handle, ar->inode, ar->goal,
> >>> &(ar->len), errp);
> >>> return block;
> >>> }
> >> when we get to ext4_mb_new_blocks, don't we already tested MBALLOC is
> >> turned on?
> >>
> >
> > ext4_ext_get_blocks calls ext4_mb_new_blocks. So we have to check this.
> > So maybe ext4_ext_get_blocks should call ext4_new_blocks and
> > we can remove this check.
> >
>
> How about this patch?
> I tested it with bonnie++ and ltp fs test using mballoc and nonomballoc
> options.

NACK. you need the ar.lleft/pleft and ar.lright/pright values so that
mballoc can merge the requests properly. Look at
ext4_mb_normalize_request . So you can't do the below change.


>
> Signed-off-by: Shen Feng <[email protected]>
> ---
> fs/ext4/extents.c | 25 +++++++++----------------
> fs/ext4/mballoc.c | 6 ------
> 2 files changed, 9 insertions(+), 22 deletions(-)
>
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 47929c4..3f6be32 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2528,7 +2528,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
> ext4_fsblk_t goal, newblock;
> int err = 0, depth, ret;
> unsigned long allocated = 0;
> - struct ext4_allocation_request ar;
> + ext4_lblk_t lleft, lright;
> + ext4_fsblk_t pleft, pright;
>
> __clear_bit(BH_New, &bh_result->b_state);
> ext_debug("blocks %u/%lu requested for inode %u\n",
> @@ -2653,12 +2654,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
> ext4_init_block_alloc_info(inode);
>
> /* find neighbour allocated blocks */
> - ar.lleft = iblock;
> - err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
> + lleft = iblock;
> + err = ext4_ext_search_left(inode, path, &lleft, &pleft);
> if (err)
> goto out2;
> - ar.lright = iblock;
> - err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);
> + lright = iblock;
> + err = ext4_ext_search_right(inode, path, &lright, &pright);
> if (err)
> goto out2;
>
> @@ -2685,16 +2686,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
> allocated = max_blocks;
>
> /* allocate new block */
> - ar.inode = inode;
> - ar.goal = ext4_ext_find_goal(inode, path, iblock);
> - ar.logical = iblock;
> - ar.len = allocated;
> - if (S_ISREG(inode->i_mode))
> - ar.flags = EXT4_MB_HINT_DATA;
> - else
> - /* disable in-core preallocation for non-regular files */
> - ar.flags = 0;
> - newblock = ext4_mb_new_blocks(handle, &ar, &err);
> + goal = ext4_ext_find_goal(inode, path, iblock);
> + newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
> if (!newblock)
> goto out2;
> ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
> @@ -2702,7 +2695,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
>
> /* try to insert new extent into found leaf and return */
> ext4_ext_store_pblock(&newex, newblock);
> - newex.ee_len = cpu_to_le16(ar.len);
> + newex.ee_len = cpu_to_le16(allocated);
> if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */
> ext4_ext_mark_uninitialized(&newex);
> err = ext4_ext_insert_extent(handle, inode, path, &newex);
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index c9900aa..bc82d39 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -4010,12 +4010,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
> sb = ar->inode->i_sb;
> sbi = EXT4_SB(sb);
>
> - if (!test_opt(sb, MBALLOC)) {
> - block = ext4_new_blocks_old(handle, ar->inode, ar->goal,
> - &(ar->len), errp);
> - return block;
> - }
> -
> while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
> ar->flags |= EXT4_MB_HINT_NOPREALLOC;
> ar->len--;
> -- 1.5.5.1

2008-06-18 01:46:51

by Shen Feng

[permalink] [raw]
Subject: Re: [PATCH -v2] ext4: Use inode preallocation with -o noextents



Aneesh Kumar K.V Wrote:
> On Tue, Jun 17, 2008 at 05:42:57PM +0800, Shen Feng wrote:
>>
>> Shen Feng Wrote:
>>> Mingming Cao Wrote:
>>>> On Thu, 2008-06-05 at 14:13 +0530, Aneesh Kumar K.V wrote:
>>>>> On Wed, Jun 04, 2008 at 11:22:20PM -0400, Theodore Tso wrote:
>>>>>> when I moved this patch to the beginning of the unstable patch queue,
>>>>>> it didn't apply. When I tried to look at it, my head started
>>>>>> spinning. The patch applied to the wrong function, apparently,
>>>>>> because there is so much code duplication "patch" got confused. I
>>>>>> can't blame it, though, because *I* got confused.
>>>>>>
>>> ...snip...
>>>
>>>>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>>>>> index 09922ae..a810a21 100644
>>>>> --- a/fs/ext4/mballoc.c
>>>>> +++ b/fs/ext4/mballoc.c
>>>>> @@ -4048,7 +4048,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
>>>>> sbi = EXT4_SB(sb);
>>>>>
>>>>> if (!test_opt(sb, MBALLOC)) {
>>>>> - block = ext4_new_blocks_old(handle, ar->inode, ar->goal,
>>>>> + block = ext4_orlov_new_blocks(handle, ar->inode, ar->goal,
>>>>> &(ar->len), errp);
>>>>> return block;
>>>>> }
>>>> when we get to ext4_mb_new_blocks, don't we already tested MBALLOC is
>>>> turned on?
>>>>
>>> ext4_ext_get_blocks calls ext4_mb_new_blocks. So we have to check this.
>>> So maybe ext4_ext_get_blocks should call ext4_new_blocks and
>>> we can remove this check.
>>>
>> How about this patch?
>> I tested it with bonnie++ and ltp fs test using mballoc and nonomballoc
>> options.
>
> NACK. you need the ar.lleft/pleft and ar.lright/pright values so that
> mballoc can merge the requests properly. Look at
> ext4_mb_normalize_request . So you can't do the below change.
>

Thank you for your explanation.

OK. Now I understand why the call to ext4_new_blocks_old in ext4_mb_new_blocks
is necessary.

The ar.lleft/pleft and ar.lright/pright values are very useful in the case
that extent and mballoc are used at the same time.
But in other case, i.e. noextent & mballoc, extent & nomballoc, noextent &
nomballoc, these values are useless.

Is that right?

-Shen Feng

>
>> Signed-off-by: Shen Feng <[email protected]>
>> ---
>> fs/ext4/extents.c | 25 +++++++++----------------
>> fs/ext4/mballoc.c | 6 ------
>> 2 files changed, 9 insertions(+), 22 deletions(-)
>>
>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>> index 47929c4..3f6be32 100644
>> --- a/fs/ext4/extents.c
>> +++ b/fs/ext4/extents.c
>> @@ -2528,7 +2528,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
>> ext4_fsblk_t goal, newblock;
>> int err = 0, depth, ret;
>> unsigned long allocated = 0;
>> - struct ext4_allocation_request ar;
>> + ext4_lblk_t lleft, lright;
>> + ext4_fsblk_t pleft, pright;
>>
>> __clear_bit(BH_New, &bh_result->b_state);
>> ext_debug("blocks %u/%lu requested for inode %u\n",
>> @@ -2653,12 +2654,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
>> ext4_init_block_alloc_info(inode);
>>
>> /* find neighbour allocated blocks */
>> - ar.lleft = iblock;
>> - err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
>> + lleft = iblock;
>> + err = ext4_ext_search_left(inode, path, &lleft, &pleft);
>> if (err)
>> goto out2;
>> - ar.lright = iblock;
>> - err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);
>> + lright = iblock;
>> + err = ext4_ext_search_right(inode, path, &lright, &pright);
>> if (err)
>> goto out2;
>>
>> @@ -2685,16 +2686,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
>> allocated = max_blocks;
>>
>> /* allocate new block */
>> - ar.inode = inode;
>> - ar.goal = ext4_ext_find_goal(inode, path, iblock);
>> - ar.logical = iblock;
>> - ar.len = allocated;
>> - if (S_ISREG(inode->i_mode))
>> - ar.flags = EXT4_MB_HINT_DATA;
>> - else
>> - /* disable in-core preallocation for non-regular files */
>> - ar.flags = 0;
>> - newblock = ext4_mb_new_blocks(handle, &ar, &err);
>> + goal = ext4_ext_find_goal(inode, path, iblock);
>> + newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
>> if (!newblock)
>> goto out2;
>> ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
>> @@ -2702,7 +2695,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
>>
>> /* try to insert new extent into found leaf and return */
>> ext4_ext_store_pblock(&newex, newblock);
>> - newex.ee_len = cpu_to_le16(ar.len);
>> + newex.ee_len = cpu_to_le16(allocated);
>> if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */
>> ext4_ext_mark_uninitialized(&newex);
>> err = ext4_ext_insert_extent(handle, inode, path, &newex);
>> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
>> index c9900aa..bc82d39 100644
>> --- a/fs/ext4/mballoc.c
>> +++ b/fs/ext4/mballoc.c
>> @@ -4010,12 +4010,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
>> sb = ar->inode->i_sb;
>> sbi = EXT4_SB(sb);
>>
>> - if (!test_opt(sb, MBALLOC)) {
>> - block = ext4_new_blocks_old(handle, ar->inode, ar->goal,
>> - &(ar->len), errp);
>> - return block;
>> - }
>> -
>> while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) {
>> ar->flags |= EXT4_MB_HINT_NOPREALLOC;
>> ar->len--;
>> -- 1.5.5.1
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>