2008-03-04 12:46:09

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH] ext4: zero out small extents when writing to prealloc area.

If the preallocated area is small zero out the full extent
instead of splitting them. This should avoid the "write
every alternate block" problem that could grow the number
of extents dramatically.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/extents.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 63 insertions(+), 0 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 839caf2..dcdf92a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2210,6 +2210,8 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
return ret;
}

+#define EXT4_EXT_ZERO_LEN 7
+
/*
* This function is called by ext4_ext_get_blocks() if someone tries to write
* to an uninitialized extent. It may result in splitting the uninitialized
@@ -2252,6 +2254,18 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
+ /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
+ if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
+ err = ext4_ext_zeroout(inode, &orig_ex);
+ if (err)
+ goto fix_extent_len;
+ /* update the extent length and mark as initialized */
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_dirty(handle, inode, path + depth);
+ return le16_to_cpu(ex->ee_len);
+ }

/* ex1: ee_block to iblock - 1 : uninitialized */
if (iblock > ee_block) {
@@ -2270,6 +2284,38 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
/* ex3: to ee_block + ee_len : uninitialised */
if (allocated > max_blocks) {
unsigned int newdepth;
+ /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
+ if (allocated <= EXT4_EXT_ZERO_LEN) {
+ /* Mark first half uninitialized.
+ * Mark second half initialized and zero out the
+ * initialized extent
+ */
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = cpu_to_le16(ee_len - allocated);
+ ext4_ext_mark_uninitialized(ex);
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_dirty(handle, inode, path + depth);
+
+ ex3 = &newex;
+ ex3->ee_block = cpu_to_le32(iblock);
+ ext4_ext_store_pblock(ex3, newblock);
+ ex3->ee_len = cpu_to_le16(allocated);
+ err = ext4_ext_insert_extent(handle, inode, path, ex3);
+ if (err == -ENOSPC) {
+ err = ext4_ext_zeroout(inode, &orig_ex);
+ if (err)
+ goto fix_extent_len;
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_dirty(handle, inode, path + depth);
+ return le16_to_cpu(ex->ee_len);
+
+ } else if (err)
+ goto fix_extent_len;
+
+ return allocated;
+ }
ex3 = &newex;
ex3->ee_block = cpu_to_le32(iblock + max_blocks);
ext4_ext_store_pblock(ex3, newblock + max_blocks);
@@ -2318,6 +2364,23 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
goto out;
}
allocated = max_blocks;
+
+ /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
+ * to insert a extent in the middle zerout directly
+ * otherwise give the extent a chance to merge to left
+ */
+ if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
+ iblock != ee_block) {
+ err = ext4_ext_zeroout(inode, &orig_ex);
+ if (err)
+ goto fix_extent_len;
+ /* update the extent length and mark as initialized */
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_dirty(handle, inode, path + depth);
+ return le16_to_cpu(ex->ee_len);
+ }
}
/*
* If there was a change of depth as part of the
--
1.5.4.3.422.g34cd6.dirty



2008-03-05 00:52:00

by Mingming Cao

[permalink] [raw]
Subject: Re: [PATCH] ext4: zero out small extents when writing to prealloc area.

On Tue, 2008-03-04 at 18:16 +0530, Aneesh Kumar K.V wrote:
> If the preallocated area is small zero out the full extent
> instead of splitting them. This should avoid the "write
> every alternate block" problem that could grow the number
> of extents dramatically.
>
> Signed-off-by: Aneesh Kumar K.V <[email protected]>
> ---
> fs/ext4/extents.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 files changed, 63 insertions(+), 0 deletions(-)
>
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 839caf2..dcdf92a 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2210,6 +2210,8 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
> return ret;
> }
>
> +#define EXT4_EXT_ZERO_LEN 7
> +
> /*
> * This function is called by ext4_ext_get_blocks() if someone tries to write
> * to an uninitialized extent. It may result in splitting the uninitialized
> @@ -2252,6 +2254,18 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
> err = ext4_ext_get_access(handle, inode, path + depth);
> if (err)
> goto out;
> + /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */

Hmm, here this is range is extended to 2*EXT4_EXT_ZERO_LEN? I am a
little more biased to keep the threshold constant as EXT4_EXT_ZERO_LEN
around all places...

> + if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
> + err = ext4_ext_zeroout(inode, &orig_ex);
> + if (err)
> + goto fix_extent_len;
> + /* update the extent length and mark as initialized */
> + ex->ee_block = orig_ex.ee_block;
> + ex->ee_len = orig_ex.ee_len;
> + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
> + ext4_ext_dirty(handle, inode, path + depth);
> + return le16_to_cpu(ex->ee_len);
> + }
>
> /* ex1: ee_block to iblock - 1 : uninitialized */
> if (iblock > ee_block) {
> @@ -2270,6 +2284,38 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
> /* ex3: to ee_block + ee_len : uninitialised */
> if (allocated > max_blocks) {
> unsigned int newdepth;
> + /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
> + if (allocated <= EXT4_EXT_ZERO_LEN) {
> + /* Mark first half uninitialized.
> + * Mark second half initialized and zero out the
> + * initialized extent
> + */
> + ex->ee_block = orig_ex.ee_block;
> + ex->ee_len = cpu_to_le16(ee_len - allocated);
> + ext4_ext_mark_uninitialized(ex);
> + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
> + ext4_ext_dirty(handle, inode, path + depth);
> +
> + ex3 = &newex;
> + ex3->ee_block = cpu_to_le32(iblock);
> + ext4_ext_store_pblock(ex3, newblock);
> + ex3->ee_len = cpu_to_le16(allocated);
> + err = ext4_ext_insert_extent(handle, inode, path, ex3);
> + if (err == -ENOSPC) {
> + err = ext4_ext_zeroout(inode, &orig_ex);
> + if (err)
> + goto fix_extent_len;
> + ex->ee_block = orig_ex.ee_block;
> + ex->ee_len = orig_ex.ee_len;
> + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
> + ext4_ext_dirty(handle, inode, path + depth);
> + return le16_to_cpu(ex->ee_len);
> +
> + } else if (err)
> + goto fix_extent_len;
> +
> + return allocated;
> + }
> ex3 = &newex;
> ex3->ee_block = cpu_to_le32(iblock + max_blocks);
> ext4_ext_store_pblock(ex3, newblock + max_blocks);
> @@ -2318,6 +2364,23 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
> goto out;
> }
> allocated = max_blocks;
> +
> + /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
> + * to insert a extent in the middle zerout directly
> + * otherwise give the extent a chance to merge to left
> + */
> + if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
> + iblock != ee_block) {
> + err = ext4_ext_zeroout(inode, &orig_ex);
> + if (err)
> + goto fix_extent_len;
> + /* update the extent length and mark as initialized */
> + ex->ee_block = orig_ex.ee_block;
> + ex->ee_len = orig_ex.ee_len;
> + ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
> + ext4_ext_dirty(handle, inode, path + depth);
> + return le16_to_cpu(ex->ee_len);
> + }
> }
> /*
> * If there was a change of depth as part of the


2008-03-05 06:45:52

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH] ext4: zero out small extents when writing to prealloc area.

On Mar 04, 2008 16:51 -0800, Mingming Cao wrote:
> On Tue, 2008-03-04 at 18:16 +0530, Aneesh Kumar K.V wrote:
> > + /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
> > + if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
>
> Hmm, here this is range is extended to 2*EXT4_EXT_ZERO_LEN? I am a
> little more biased to keep the threshold constant as EXT4_EXT_ZERO_LEN
> around all places...

I initially thoguht this also, but it makes sense. If the extent is
<= 2x EXT4_EXT_ZERO_LEN and it is split anywhere then the resulting
parts will be < EXT4_EXT_ZERO_LEN themselves, unless split is very
close to one edge. Even then it is better to keep a single initialized
extent instead of 3 different ones.

Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.