2011-11-09 11:17:24

by Robin Dong

[permalink] [raw]
Subject: [PATCH 0/9 bigalloc] ext4: change unit of extent's ee_block and ee_len from block to cluster

From: Robin Dong <[email protected]>

Hi,

This patch series change ee_block and ee_len (of extent) 's unit from "block" to "cluster",
since it could reduce the space occupied by meta data.

This patch series should be used after Ted's bigalloc-patchs and it now can't support:
1. delay allocation
2. 1k/2k blocksize



2011-11-09 11:17:26

by Robin Dong

[permalink] [raw]
Subject: [PATCH 1/9 bigalloc] ext4: get blocks from ext4_ext_get_actual_blocks

From: Robin Dong <[email protected]>

Since ee_len's unit change to cluster, it need to transform from clusters
to blocks when use new function: ext4_ext_get_actual_blocks.

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/ext4.h | 5 ++
fs/ext4/ext4_extents.h | 16 ++++++-
fs/ext4/extents.c | 123 +++++++++++++++++++++++++++---------------------
3 files changed, 88 insertions(+), 56 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index fba951b..1dea3e8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -276,6 +276,11 @@ struct ext4_io_submit {
/* Translate # of blks to # of clusters */
#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
(sbi)->s_cluster_bits)
+/* Translate a block number to a cluster number by inode */
+#define EXT4_INODE_B2C(inode, block) (EXT4_B2C(EXT4_SB(inode->i_sb), (block)))
+/* Translate a cluster number to a block number by inode */
+#define EXT4_INODE_C2B(inode, cluster) (EXT4_C2B(EXT4_SB(inode->i_sb), \
+ (cluster)))

/*
* Structure of a blocks group descriptor
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index a52db3a..30c5ce1 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -70,8 +70,10 @@
* It's used at the bottom of the tree.
*/
struct ext4_extent {
- __le32 ee_block; /* first logical block extent covers */
- __le16 ee_len; /* number of blocks covered by extent */
+ __le32 ee_block; /* first logical block (or cluster) *
+ * extent covers */
+ __le16 ee_len; /* number of blocks (or clusters) *
+ * covered by extent */
__le16 ee_start_hi; /* high 16 bits of physical block */
__le32 ee_start_lo; /* low 32 bits of physical block */
};
@@ -212,6 +214,16 @@ static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
}

+static inline int ext4_ext_get_actual_blocks(struct ext4_extent *ext,
+ struct super_block *sb)
+{
+ int res = (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
+ le16_to_cpu(ext->ee_len) :
+ (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
+
+ return EXT4_C2B(EXT4_SB(sb), res);
+}
+
static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
{
return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4c38262..597ebcb 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -304,7 +304,7 @@ ext4_ext_max_entries(struct inode *inode, int depth)
static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
{
ext4_fsblk_t block = ext4_ext_pblock(ext);
- int len = ext4_ext_get_actual_len(ext);
+ int len = ext4_ext_get_actual_blocks(ext, inode->i_sb);

return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
}
@@ -417,7 +417,8 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
ext_debug(" %d:[%d]%d:%llu ",
le32_to_cpu(path->p_ext->ee_block),
ext4_ext_is_uninitialized(path->p_ext),
- ext4_ext_get_actual_len(path->p_ext),
+ ext4_ext_get_actual_blocks(path->p_ext,
+ inode->i_sb),
ext4_ext_pblock(path->p_ext));
} else
ext_debug(" []");
@@ -443,7 +444,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
ext4_ext_is_uninitialized(ex),
- ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
+ ext4_ext_get_actual_blocks(ex, inode->i_sb),
+ ext4_ext_pblock(ex));
}
ext_debug("\n");
}
@@ -474,7 +476,7 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
le32_to_cpu(ex->ee_block),
ext4_ext_pblock(ex),
ext4_ext_is_uninitialized(ex),
- ext4_ext_get_actual_len(ex),
+ ext4_ext_get_actual_blocks(ex, inode->i_sb),
newblock);
ex++;
}
@@ -599,7 +601,7 @@ ext4_ext_binsearch(struct inode *inode,
le32_to_cpu(path->p_ext->ee_block),
ext4_ext_pblock(path->p_ext),
ext4_ext_is_uninitialized(path->p_ext),
- ext4_ext_get_actual_len(path->p_ext));
+ ext4_ext_get_actual_blocks(path->p_ext, inode->i_sb));

#ifdef CHECK_BINSEARCH
{
@@ -1222,7 +1224,7 @@ static int ext4_ext_search_left(struct inode *inode,
* first one in the file */

ex = path[depth].p_ext;
- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
if (*logical < le32_to_cpu(ex->ee_block)) {
if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
EXT4_ERROR_INODE(inode,
@@ -1292,7 +1294,7 @@ static int ext4_ext_search_right(struct inode *inode,
* first one in the file */

ex = path[depth].p_ext;
- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
if (*logical < le32_to_cpu(ex->ee_block)) {
if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
EXT4_ERROR_INODE(inode,
@@ -1506,7 +1508,8 @@ int
ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
struct ext4_extent *ex2)
{
- unsigned short ext1_ee_len, ext2_ee_len, max_len;
+ /* unit: cluster */
+ unsigned int ext1_ee_len, ext2_ee_len, max_len;

/*
* Make sure that either both extents are uninitialized, or
@@ -1539,7 +1542,8 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
return 0;
#endif

- if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
+ if (ext4_ext_pblock(ex1) + EXT4_INODE_C2B(inode, ext1_ee_len)
+ == ext4_ext_pblock(ex2))
return 1;
return 0;
}
@@ -1633,7 +1637,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
unsigned int ret = 0;

b1 = le32_to_cpu(newext->ee_block);
- len1 = ext4_ext_get_actual_len(newext);
+ len1 = ext4_ext_get_actual_blocks(newext, inode->i_sb);
depth = ext_depth(inode);
if (!path[depth].p_ext)
goto out;
@@ -1654,13 +1658,13 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
/* check for wrap through zero on extent logical start block*/
if (b1 + len1 < b1) {
len1 = EXT_MAX_BLOCKS - b1;
- newext->ee_len = cpu_to_le16(len1);
+ newext->ee_len = cpu_to_le16(EXT4_B2C(sbi, len1));
ret = 1;
}

/* check for overlap */
if (b1 + len1 > b2) {
- newext->ee_len = cpu_to_le16(b2 - b1);
+ newext->ee_len = cpu_to_le16(EXT4_B2C(sbi, b2 - b1));
ret = 1;
}
out:
@@ -1702,10 +1706,10 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
&& ext4_can_extents_be_merged(inode, ex, newext)) {
ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
ext4_ext_is_uninitialized(newext),
- ext4_ext_get_actual_len(newext),
+ ext4_ext_get_actual_blocks(newext, inode->i_sb),
le32_to_cpu(ex->ee_block),
ext4_ext_is_uninitialized(ex),
- ext4_ext_get_actual_len(ex),
+ ext4_ext_get_actual_blocks(ex, inode->i_sb),
ext4_ext_pblock(ex));
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
@@ -1780,7 +1784,8 @@ has_space:
le32_to_cpu(newext->ee_block),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
- ext4_ext_get_actual_len(newext));
+ ext4_ext_get_actual_blocks(newext,
+ inode->i_sb));
path[depth].p_ext = EXT_FIRST_EXTENT(eh);
} else if (le32_to_cpu(newext->ee_block)
> le32_to_cpu(nearex->ee_block)) {
@@ -1794,7 +1799,8 @@ has_space:
le32_to_cpu(newext->ee_block),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
- ext4_ext_get_actual_len(newext),
+ ext4_ext_get_actual_blocks(newext,
+ inode->i_sb),
nearex, len, nearex + 1, nearex + 2);
memmove(nearex + 2, nearex + 1, len);
}
@@ -1808,7 +1814,8 @@ has_space:
le32_to_cpu(newext->ee_block),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
- ext4_ext_get_actual_len(newext),
+ ext4_ext_get_actual_blocks(newext,
+ inode->i_sb),
nearex, len, nearex, nearex + 1);
memmove(nearex + 1, nearex, len);
path[depth].p_ext = nearex;
@@ -1891,7 +1898,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
if (block + num < end)
end = block + num;
} else if (block >= le32_to_cpu(ex->ee_block)
- + ext4_ext_get_actual_len(ex)) {
+ + ext4_ext_get_actual_blocks(ex, inode->i_sb)) {
/* need to allocate space after found extent */
start = block;
end = block + num;
@@ -1904,7 +1911,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
*/
start = block;
end = le32_to_cpu(ex->ee_block)
- + ext4_ext_get_actual_len(ex);
+ + ext4_ext_get_actual_blocks(ex, inode->i_sb);
if (block + num < end)
end = block + num;
exists = 1;
@@ -1915,7 +1922,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,

if (!exists) {
cbex.ec_block = start;
- cbex.ec_len = end - start;
+ cbex.ec_len = EXT4_INODE_B2C(inode, end - start);
cbex.ec_start = 0;
} else {
cbex.ec_block = le32_to_cpu(ex->ee_block);
@@ -1947,7 +1954,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
path = NULL;
}

- block = cbex.ec_block + cbex.ec_len;
+ block = cbex.ec_block + EXT4_INODE_C2B(inode, cbex.ec_len);
}

if (path) {
@@ -1968,7 +1975,7 @@ ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
trace_ext4_ext_put_in_cache(inode, block, len, start);
cex = &EXT4_I(inode)->i_cached_extent;
cex->ec_block = block;
- cex->ec_len = len;
+ cex->ec_len = EXT4_INODE_B2C(inode, len);
cex->ec_start = start;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
}
@@ -1999,17 +2006,17 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
ext_debug("cache gap(before): %u [%u:%u]",
block,
le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_len(ex));
+ ext4_ext_get_actual_blocks(ex, inode->i_sb));
} else if (block >= le32_to_cpu(ex->ee_block)
- + ext4_ext_get_actual_len(ex)) {
+ + ext4_ext_get_actual_blocks(ex, inode->i_sb)) {
ext4_lblk_t next;
lblock = le32_to_cpu(ex->ee_block)
- + ext4_ext_get_actual_len(ex);
+ + ext4_ext_get_actual_blocks(ex, inode->i_sb);

next = ext4_ext_next_allocated_block(path);
ext_debug("cache gap(after): [%u:%u] %u",
le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_len(ex),
+ ext4_ext_get_actual_blocks(ex, inode->i_sb),
block);
BUG_ON(next == lblock);
len = next - lblock;
@@ -2207,7 +2214,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t from, ext4_lblk_t to)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- unsigned short ee_len = ext4_ext_get_actual_len(ex);
+ unsigned int ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
ext4_fsblk_t pblk;
int flags = EXT4_FREE_BLOCKS_FORGET;

@@ -2319,7 +2326,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ext4_lblk_t a, b, block;
unsigned num;
ext4_lblk_t ex_ee_block;
- unsigned short ex_ee_len;
+ unsigned int ex_ee_len;
unsigned uninitialized = 0;
struct ext4_extent *ex;
struct ext4_map_blocks map;
@@ -2337,7 +2344,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ex = EXT_LAST_EXTENT(eh);

ex_ee_block = le32_to_cpu(ex->ee_block);
- ex_ee_len = ext4_ext_get_actual_len(ex);
+ ex_ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);

trace_ext4_ext_rm_leaf(inode, start, ex_ee_block, ext4_ext_pblock(ex),
ex_ee_len, *partial_cluster);
@@ -2364,7 +2371,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
if (end <= ex_ee_block) {
ex--;
ex_ee_block = le32_to_cpu(ex->ee_block);
- ex_ee_len = ext4_ext_get_actual_len(ex);
+ ex_ee_len = ext4_ext_get_actual_blocks(ex,
+ inode->i_sb);
continue;
} else if (a != ex_ee_block &&
b != ex_ee_block + ex_ee_len - 1) {
@@ -2399,7 +2407,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
if (err < 0)
goto out;

- ex_ee_len = ext4_ext_get_actual_len(ex);
+ ex_ee_len = ext4_ext_get_actual_blocks(ex,
+ inode->i_sb);

b = ex_ee_block+ex_ee_len - 1 < end ?
ex_ee_block+ex_ee_len - 1 : end;
@@ -2485,7 +2494,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
}

ex->ee_block = cpu_to_le32(block);
- ex->ee_len = cpu_to_le16(num);
+ ex->ee_len = cpu_to_le16(EXT4_B2C(sbi, num));
/*
* Do not mark uninitialized if all the blocks in the
* extent have been removed.
@@ -2523,7 +2532,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ext4_ext_pblock(ex));
ex--;
ex_ee_block = le32_to_cpu(ex->ee_block);
- ex_ee_len = ext4_ext_get_actual_len(ex);
+ ex_ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
}

if (correct_index && eh->eh_entries)
@@ -2706,7 +2715,7 @@ again:
flags |= EXT4_FREE_BLOCKS_METADATA;

ext4_free_blocks(handle, inode, NULL,
- EXT4_C2B(EXT4_SB(sb), partial_cluster),
+ EXT4_INODE_C2B(inode, partial_cluster),
EXT4_SB(sb)->s_cluster_ratio, flags);
partial_cluster = 0;
}
@@ -2793,7 +2802,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
unsigned int ee_len;
int ret;

- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
ee_pblock = ext4_ext_pblock(ex);

ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
@@ -2854,7 +2863,7 @@ static int ext4_split_extent_at(handle_t *handle,
depth = ext_depth(inode);
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
newblock = split - ee_block + ext4_ext_pblock(ex);

BUG_ON(split < ee_block || split >= (ee_block + ee_len));
@@ -2883,7 +2892,7 @@ static int ext4_split_extent_at(handle_t *handle,

/* case a */
memcpy(&orig_ex, ex, sizeof(orig_ex));
- ex->ee_len = cpu_to_le16(split - ee_block);
+ ex->ee_len = cpu_to_le16(EXT4_INODE_B2C(inode, split - ee_block));
if (split_flag & EXT4_EXT_MARK_UNINIT1)
ext4_ext_mark_uninitialized(ex);

@@ -2897,7 +2906,8 @@ static int ext4_split_extent_at(handle_t *handle,

ex2 = &newex;
ex2->ee_block = cpu_to_le32(split);
- ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block));
+ ex2->ee_len = cpu_to_le16(
+ EXT4_INODE_B2C(inode, ee_len - (split - ee_block)));
ext4_ext_store_pblock(ex2, newblock);
if (split_flag & EXT4_EXT_MARK_UNINIT2)
ext4_ext_mark_uninitialized(ex2);
@@ -2908,7 +2918,7 @@ static int ext4_split_extent_at(handle_t *handle,
if (err)
goto fix_extent_len;
/* update the extent length and mark as initialized */
- ex->ee_len = cpu_to_le32(ee_len);
+ ex->ee_len = cpu_to_le32(EXT4_INODE_B2C(inode, ee_len));
ext4_ext_try_to_merge(inode, path, ex);
err = ext4_ext_dirty(handle, inode, path + depth);
goto out;
@@ -2953,7 +2963,7 @@ static int ext4_split_extent(handle_t *handle,
depth = ext_depth(inode);
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
uninitialized = ext4_ext_is_uninitialized(ex);

if (map->m_lblk + map->m_len < ee_block + ee_len) {
@@ -3028,7 +3038,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
depth = ext_depth(inode);
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
allocated = ee_len - (map->m_lblk - ee_block);

WARN_ON(map->m_lblk < ee_block);
@@ -3070,7 +3080,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
/* case 3 */
zero_ex.ee_block =
cpu_to_le32(map->m_lblk);
- zero_ex.ee_len = cpu_to_le16(allocated);
+ zero_ex.ee_len = cpu_to_le16(
+ EXT4_INODE_B2C(inode, allocated));
ext4_ext_store_pblock(&zero_ex,
ext4_ext_pblock(ex) + map->m_lblk - ee_block);
err = ext4_ext_zeroout(inode, &zero_ex);
@@ -3084,8 +3095,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
/* case 2 */
if (map->m_lblk != ee_block) {
zero_ex.ee_block = ex->ee_block;
- zero_ex.ee_len = cpu_to_le16(map->m_lblk -
- ee_block);
+ zero_ex.ee_len =
+ cpu_to_le16(EXT4_INODE_B2C(inode,
+ map->m_lblk - ee_block));
ext4_ext_store_pblock(&zero_ex,
ext4_ext_pblock(ex));
err = ext4_ext_zeroout(inode, &zero_ex);
@@ -3157,7 +3169,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
depth = ext_depth(inode);
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);

split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
split_flag |= EXT4_EXT_MARK_UNINIT2;
@@ -3180,7 +3192,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
"block %llu, max_blocks %u\n", inode->i_ino,
(unsigned long long)le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_len(ex));
+ ext4_ext_get_actual_blocks(ex, inode->i_sb));

err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
@@ -3242,7 +3254,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
* function immediately.
*/
if (lblk + len < le32_to_cpu(last_ex->ee_block) +
- ext4_ext_get_actual_len(last_ex))
+ ext4_ext_get_actual_blocks(last_ex, inode->i_sb))
return 0;
/*
* If the caller does appear to be planning to write at or
@@ -3645,7 +3657,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
ext4_lblk_t rr_cluster_start, rr_cluster_end;
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
- unsigned short ee_len = ext4_ext_get_actual_len(ex);
+ unsigned int ee_len = ext4_ext_get_actual_blocks(ex, sb);

/* The extent passed in that we are trying to match */
ex_cluster_start = EXT4_B2C(sbi, ee_block);
@@ -3761,7 +3773,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
- le32_to_cpu(newex.ee_block)
+ ext4_ext_pblock(&newex);
/* number of remaining blocks in the extent */
- allocated = ext4_ext_get_actual_len(&newex) -
+ allocated = ext4_ext_get_actual_blocks(&newex,
+ inode->i_sb) -
(map->m_lblk - le32_to_cpu(newex.ee_block));
goto out;
}
@@ -3796,13 +3809,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
ext4_fsblk_t partial_cluster = 0;
- unsigned short ee_len;
+ unsigned int ee_len;

/*
* Uninitialized extents are treated as holes, except that
* we split out initialized portions during a write.
*/
- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);

trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);

@@ -3880,7 +3893,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,

depth = ext_depth(inode);
ex = path[depth].p_ext;
- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex,
+ inode->i_sb);
ee_block = le32_to_cpu(ex->ee_block);
ee_start = ext4_ext_pblock(ex);

@@ -4064,13 +4078,14 @@ got_allocated_blocks:
* but otherwise we'd need to call it every free() */
ext4_discard_preallocations(inode);
ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
- ext4_ext_get_actual_len(&newex), fb_flags);
+ ext4_ext_get_actual_blocks(&newex, inode->i_sb),
+ fb_flags);
goto out2;
}

/* previous routine could use block we allocated */
newblock = ext4_ext_pblock(&newex);
- allocated = ext4_ext_get_actual_len(&newex);
+ allocated = ext4_ext_get_actual_blocks(&newex, inode->i_sb);
if (allocated > map->m_len)
allocated = map->m_len;
map->m_flags |= EXT4_MAP_NEW;
--
1.7.3.2


2011-11-09 11:17:30

by Robin Dong

[permalink] [raw]
Subject: [PATCH 2/9 bigalloc] ext4: change ext4_ext_map_blocks to allocate clusters instead of blocks

From: Robin Dong <[email protected]>

We need to align to a cluster when users allocate just one block.

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/extents.c | 45 ++++++++++++---------------------------------
1 files changed, 12 insertions(+), 33 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 597ebcb..3430ddf 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3949,20 +3949,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* Okay, we need to do block allocation.
*/
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
- newex.ee_block = cpu_to_le32(map->m_lblk);
+ newex.ee_block = cpu_to_le32(map->m_lblk & ~(sbi->s_cluster_ratio-1));
cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);

- /*
- * If we are doing bigalloc, check to see if the extent returned
- * by ext4_ext_find_extent() implies a cluster we can use.
- */
- if (cluster_offset && ex &&
- get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
- ar.len = allocated = map->m_len;
- newblock = map->m_pblk;
- map->m_flags |= EXT4_MAP_FROM_CLUSTER;
- goto got_allocated_blocks;
- }
+ if (ex)
+ BUG_ON((le32_to_cpu(ex->ee_block) +
+ EXT4_C2B(sbi, ext4_ext_get_actual_len(ex))) >
+ (map->m_lblk & ~(sbi->s_cluster_ratio-1)));

/* find neighbour allocated blocks */
ar.lleft = map->m_lblk;
@@ -3975,16 +3968,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
if (err)
goto out2;

- /* Check if the extent after searching to the right implies a
- * cluster we can use. */
- if ((sbi->s_cluster_ratio > 1) && ex2 &&
- get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
- ar.len = allocated = map->m_len;
- newblock = map->m_pblk;
- map->m_flags |= EXT4_MAP_FROM_CLUSTER;
- goto got_allocated_blocks;
- }
-
/*
* See if request is beyond maximum number of blocks we can have in
* a single extent. For an initialized extent this limit is
@@ -3999,7 +3982,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
map->m_len = EXT_UNINIT_MAX_LEN;

/* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
- newex.ee_len = cpu_to_le16(map->m_len);
+ newex.ee_len = cpu_to_le16(EXT4_NUM_B2C(sbi, map->m_len));
err = ext4_ext_check_overlap(sbi, inode, &newex, path);
if (err)
allocated = ext4_ext_get_actual_len(&newex);
@@ -4036,14 +4019,11 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ar.goal, newblock, allocated);
free_on_err = 1;
allocated_clusters = ar.len;
- ar.len = EXT4_C2B(sbi, ar.len) - offset;
- if (ar.len > allocated)
- ar.len = allocated;

got_allocated_blocks:
/* try to insert new extent into found leaf and return */
- ext4_ext_store_pblock(&newex, newblock + offset);
- newex.ee_len = cpu_to_le16(ar.len);
+ ext4_ext_store_pblock(&newex, newblock);
+ newex.ee_len = cpu_to_le16(allocated_clusters);
/* Mark uninitialized */
if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
ext4_ext_mark_uninitialized(&newex);
@@ -4066,7 +4046,8 @@ got_allocated_blocks:
map->m_flags |= EXT4_MAP_UNINIT;
}

- err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len);
+ err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
+ EXT4_C2B(sbi, allocated_clusters));
if (!err)
err = ext4_ext_insert_extent(handle, inode, path,
&newex, flags);
@@ -4086,8 +4067,6 @@ got_allocated_blocks:
/* previous routine could use block we allocated */
newblock = ext4_ext_pblock(&newex);
allocated = ext4_ext_get_actual_blocks(&newex, inode->i_sb);
- if (allocated > map->m_len)
- allocated = map->m_len;
map->m_flags |= EXT4_MAP_NEW;

/*
@@ -4174,7 +4153,7 @@ got_allocated_blocks:
* when it is _not_ an uninitialized extent.
*/
if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
- ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock);
+ ext4_ext_put_in_cache(inode, ar.logical, allocated, newblock);
ext4_update_inode_fsync_trans(handle, inode, 1);
} else
ext4_update_inode_fsync_trans(handle, inode, 0);
@@ -4183,7 +4162,7 @@ out:
allocated = map->m_len;
ext4_ext_show_leaf(inode, path);
map->m_flags |= EXT4_MAP_MAPPED;
- map->m_pblk = newblock;
+ map->m_pblk = newblock + offset;
map->m_len = allocated;
out2:
if (path) {
--
1.7.3.2


2011-11-09 11:17:31

by Robin Dong

[permalink] [raw]
Subject: [PATCH 3/9 bigalloc] ext4: change unit of ee_block of extent to cluster

From: Robin Dong <[email protected]>

Change the unit of ee_block (of extent) from block to cluster

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/extents.c | 286 ++++++++++++++++++++++++++++++++---------------------
1 files changed, 174 insertions(+), 112 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3430ddf..4f764ee 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -140,7 +140,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
ex = path[depth].p_ext;
if (ex) {
ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
- ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
+ ext4_lblk_t ext_block = EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block));

if (block > ext_block)
return ext_pblk + (block - ext_block);
@@ -168,7 +169,8 @@ ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
{
ext4_fsblk_t goal, newblock;

- goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
+ goal = ext4_ext_find_goal(inode, path,
+ EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)));
newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
NULL, err);
return newblock;
@@ -411,11 +413,13 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
ext_debug("path:");
for (k = 0; k <= l; k++, path++) {
if (path->p_idx) {
- ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block),
- ext4_idx_pblock(path->p_idx));
+ ext_debug(" %d->%llu", EXT4_INODE_C2B(inode,
+ le32_to_cpu(path->p_idx->ei_block)),
+ ext4_idx_pblock(path->p_idx));
} else if (path->p_ext) {
ext_debug(" %d:[%d]%d:%llu ",
- le32_to_cpu(path->p_ext->ee_block),
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(path->p_ext->ee_block)),
ext4_ext_is_uninitialized(path->p_ext),
ext4_ext_get_actual_blocks(path->p_ext,
inode->i_sb),
@@ -442,7 +446,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino);

for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
- ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
+ ext_debug("%d:[%d]%d:%llu ",
+ EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)),
ext4_ext_is_uninitialized(ex),
ext4_ext_get_actual_blocks(ex, inode->i_sb),
ext4_ext_pblock(ex));
@@ -461,7 +466,8 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
idx = path[level].p_idx;
while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
ext_debug("%d: move %d:%llu in new index %llu\n", level,
- le32_to_cpu(idx->ei_block),
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(idx->ei_block)),
ext4_idx_pblock(idx),
newblock);
idx++;
@@ -473,7 +479,8 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
ex = path[depth].p_ext;
while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
- le32_to_cpu(ex->ee_block),
+ EXT4_INODE_C2B(ionde,
+ le32_to_cpu(ex->ee_block)),
ext4_ext_pblock(ex),
ext4_ext_is_uninitialized(ex),
ext4_ext_get_actual_blocks(ex, inode->i_sb),
@@ -519,17 +526,19 @@ ext4_ext_binsearch_idx(struct inode *inode,
r = EXT_LAST_INDEX(eh);
while (l <= r) {
m = l + (r - l) / 2;
- if (block < le32_to_cpu(m->ei_block))
+ if (block < EXT4_INODE_C2B(inode, le32_to_cpu(m->ei_block)))
r = m - 1;
else
l = m + 1;
- ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block),
- m, le32_to_cpu(m->ei_block),
- r, le32_to_cpu(r->ei_block));
+ ext_debug("%p(%u):%p(%u):%p(%u) ",
+ l, EXT4_INODE_C2B(inode, le32_to_cpu(l->ei_block)),
+ m, EXT4_INODE_C2B(inode, le32_to_cpu(m->ei_block)),
+ r, EXT4_INODE_C2B(inode, le32_to_cpu(r->ei_block)));
}

path->p_idx = l - 1;
- ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
+ ext_debug(" -> %d->%lld ",
+ EXT4_INODE_C2B(inode, le32_to_cpu(path->p_idx->ei_block)),
ext4_idx_pblock(path->p_idx));

#ifdef CHECK_BINSEARCH
@@ -545,12 +554,14 @@ ext4_ext_binsearch_idx(struct inode *inode,
"first=0x%p\n", k,
ix, EXT_FIRST_INDEX(eh));
printk(KERN_DEBUG "%u <= %u\n",
- le32_to_cpu(ix->ei_block),
- le32_to_cpu(ix[-1].ei_block));
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(ix->ei_block)),
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(ix[-1].ei_block)));
}
BUG_ON(k && le32_to_cpu(ix->ei_block)
<= le32_to_cpu(ix[-1].ei_block));
- if (block < le32_to_cpu(ix->ei_block))
+ if (block < EXT4_INODE_C2B(le32_to_cpu(ix->ei_block)))
break;
chix = ix;
}
@@ -587,21 +598,22 @@ ext4_ext_binsearch(struct inode *inode,

while (l <= r) {
m = l + (r - l) / 2;
- if (block < le32_to_cpu(m->ee_block))
+ if (block < EXT4_INODE_C2B(inode, le32_to_cpu(m->ee_block)))
r = m - 1;
else
l = m + 1;
- ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block),
- m, le32_to_cpu(m->ee_block),
- r, le32_to_cpu(r->ee_block));
+ ext_debug("%p(%u):%p(%u):%p(%u) ",
+ l, EXT4_INODE_C2B(inode, le32_to_cpu(l->ee_block)),
+ m, EXT4_INODE_C2B(inode, le32_to_cpu(m->ee_block)),
+ r, EXT4_INODE_C2B(inode, le32_to_cpu(r->ee_block)));
}

path->p_ext = l - 1;
ext_debug(" -> %d:%llu:[%d]%d ",
- le32_to_cpu(path->p_ext->ee_block),
- ext4_ext_pblock(path->p_ext),
- ext4_ext_is_uninitialized(path->p_ext),
- ext4_ext_get_actual_blocks(path->p_ext, inode->i_sb));
+ EXT4_INODE_C2B(inode, le32_to_cpu(path->p_ext->ee_block)),
+ ext4_ext_pblock(path->p_ext),
+ ext4_ext_is_uninitialized(path->p_ext),
+ ext4_ext_get_actual_blocks(path->p_ext, inode->i_sb));

#ifdef CHECK_BINSEARCH
{
@@ -612,7 +624,8 @@ ext4_ext_binsearch(struct inode *inode,
for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
BUG_ON(k && le32_to_cpu(ex->ee_block)
<= le32_to_cpu(ex[-1].ee_block));
- if (block < le32_to_cpu(ex->ee_block))
+ if (block < EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block)))
break;
chex = ex;
}
@@ -737,10 +750,13 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
if (err)
return err;

+ /* variable "logical" is in unit of cluster */
if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
EXT4_ERROR_INODE(inode,
"logical %d == ei_block %d!",
- logical, le32_to_cpu(curp->p_idx->ei_block));
+ logical,
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(curp->p_idx->ei_block)));
return -EIO;
}

@@ -971,8 +987,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
EXT_LAST_INDEX(path[i].p_hdr))) {
EXT4_ERROR_INODE(inode,
- "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
- le32_to_cpu(path[i].p_ext->ee_block));
+ "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(path[i].p_ext->ee_block)));
err = -EIO;
goto cleanup;
}
@@ -1112,7 +1129,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
neh = ext_inode_hdr(inode);
ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
- le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block)),
ext4_idx_pblock(EXT_FIRST_INDEX(neh)));

neh->eh_depth = cpu_to_le16(path->p_depth + 1);
@@ -1158,7 +1176,8 @@ repeat:
/* refill path */
ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode,
- (ext4_lblk_t)le32_to_cpu(newext->ee_block),
+ (ext4_lblk_t)EXT4_INODE_C2B(inode,
+ le32_to_cpu(newext->ee_block)),
path);
if (IS_ERR(path))
err = PTR_ERR(path);
@@ -1172,7 +1191,8 @@ repeat:
/* refill path */
ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode,
- (ext4_lblk_t)le32_to_cpu(newext->ee_block),
+ (ext4_lblk_t)EXT4_INODE_C2B(inode,
+ le32_to_cpu(newext->ee_block)),
path);
if (IS_ERR(path)) {
err = PTR_ERR(path);
@@ -1225,11 +1245,13 @@ static int ext4_ext_search_left(struct inode *inode,

ex = path[depth].p_ext;
ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
- if (*logical < le32_to_cpu(ex->ee_block)) {
+ if (*logical < EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))) {
if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
EXT4_ERROR_INODE(inode,
"EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
- *logical, le32_to_cpu(ex->ee_block));
+ *logical,
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block)));
return -EIO;
}
while (--depth >= 0) {
@@ -1237,9 +1259,11 @@ static int ext4_ext_search_left(struct inode *inode,
if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
EXT4_ERROR_INODE(inode,
"ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
- ix != NULL ? ix->ei_block : 0,
+ ix != NULL ? EXT4_INODE_C2B(
+ inode, ix->ei_block) : 0,
EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
- EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0,
+ EXT4_INODE_C2B(inode, EXT_FIRST_INDEX(
+ path[depth].p_hdr)->ei_block) : 0,
depth);
return -EIO;
}
@@ -1247,14 +1271,19 @@ static int ext4_ext_search_left(struct inode *inode,
return 0;
}

- if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
+ if (unlikely(*logical <
+ (EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)) + ee_len))) {
EXT4_ERROR_INODE(inode,
"logical %d < ee_block %d + ee_len %d!",
- *logical, le32_to_cpu(ex->ee_block), ee_len);
+ *logical,
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block)),
+ ee_len);
return -EIO;
}

- *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
+ *logical = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ + ee_len - 1;
*phys = ext4_ext_pblock(ex) + ee_len - 1;
return 0;
}
@@ -1295,7 +1324,7 @@ static int ext4_ext_search_right(struct inode *inode,

ex = path[depth].p_ext;
ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
- if (*logical < le32_to_cpu(ex->ee_block)) {
+ if (*logical < EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))) {
if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
EXT4_ERROR_INODE(inode,
"first_extent(path[%d].p_hdr) != ex",
@@ -1314,10 +1343,14 @@ static int ext4_ext_search_right(struct inode *inode,
goto found_extent;
}

- if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
+ if (unlikely(*logical <
+ (EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)) + ee_len))) {
EXT4_ERROR_INODE(inode,
"logical %d < ee_block %d + ee_len %d!",
- *logical, le32_to_cpu(ex->ee_block), ee_len);
+ *logical,
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block)),
+ ee_len);
return -EIO;
}

@@ -1368,7 +1401,7 @@ got_index:
}
ex = EXT_FIRST_EXTENT(eh);
found_extent:
- *logical = le32_to_cpu(ex->ee_block);
+ *logical = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
*phys = ext4_ext_pblock(ex);
*ret_ex = ex;
if (bh)
@@ -1384,7 +1417,7 @@ found_extent:
* with leaves.
*/
static ext4_lblk_t
-ext4_ext_next_allocated_block(struct ext4_ext_path *path)
+ext4_ext_next_allocated_block(struct inode *inode, struct ext4_ext_path *path)
{
int depth;

@@ -1397,14 +1430,16 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
while (depth >= 0) {
if (depth == path->p_depth) {
/* leaf */
- if (path[depth].p_ext !=
- EXT_LAST_EXTENT(path[depth].p_hdr))
- return le32_to_cpu(path[depth].p_ext[1].ee_block);
+ if (path[depth].p_ext != EXT_LAST_EXTENT(
+ path[depth].p_hdr))
+ return EXT4_INODE_C2B(inode, le32_to_cpu(
+ path[depth].p_ext[1].ee_block));
} else {
/* index */
- if (path[depth].p_idx !=
- EXT_LAST_INDEX(path[depth].p_hdr))
- return le32_to_cpu(path[depth].p_idx[1].ei_block);
+ if (path[depth].p_idx != EXT_LAST_INDEX(
+ path[depth].p_hdr))
+ return EXT4_INODE_C2B(inode, le32_to_cpu(
+ path[depth].p_idx[1].ei_block));
}
depth--;
}
@@ -1416,7 +1451,8 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
* ext4_ext_next_leaf_block:
* returns first allocated block from next leaf or EXT_MAX_BLOCKS
*/
-static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
+static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
+ struct ext4_ext_path *path)
{
int depth;

@@ -1433,8 +1469,8 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
while (depth >= 0) {
if (path[depth].p_idx !=
EXT_LAST_INDEX(path[depth].p_hdr))
- return (ext4_lblk_t)
- le32_to_cpu(path[depth].p_idx[1].ei_block);
+ return (ext4_lblk_t) EXT4_INODE_C2B(inode,
+ le32_to_cpu(path[depth].p_idx[1].ei_block));
depth--;
}

@@ -1636,12 +1672,12 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
unsigned int depth, len1;
unsigned int ret = 0;

- b1 = le32_to_cpu(newext->ee_block);
+ b1 = EXT4_INODE_C2B(inode, le32_to_cpu(newext->ee_block));
len1 = ext4_ext_get_actual_blocks(newext, inode->i_sb);
depth = ext_depth(inode);
if (!path[depth].p_ext)
goto out;
- b2 = le32_to_cpu(path[depth].p_ext->ee_block);
+ b2 = EXT4_INODE_C2B(inode, le32_to_cpu(path[depth].p_ext->ee_block));
b2 &= ~(sbi->s_cluster_ratio - 1);

/*
@@ -1649,7 +1685,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
* is before the requested block(s)
*/
if (b2 < b1) {
- b2 = ext4_ext_next_allocated_block(path);
+ b2 = ext4_ext_next_allocated_block(inode, path);
if (b2 == EXT_MAX_BLOCKS)
goto out;
b2 &= ~(sbi->s_cluster_ratio - 1);
@@ -1707,7 +1743,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_blocks(newext, inode->i_sb),
- le32_to_cpu(ex->ee_block),
+ EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)),
ext4_ext_is_uninitialized(ex),
ext4_ext_get_actual_blocks(ex, inode->i_sb),
ext4_ext_pblock(ex));
@@ -1740,7 +1776,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
fex = EXT_LAST_EXTENT(eh);
next = EXT_MAX_BLOCKS;
if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
- next = ext4_ext_next_leaf_block(path);
+ next = ext4_ext_next_leaf_block(inode, path);
if (next != EXT_MAX_BLOCKS) {
ext_debug("next leaf block - %d\n", next);
BUG_ON(npath != NULL);
@@ -1781,7 +1817,8 @@ has_space:
if (!nearex) {
/* there is no extent in this leaf, create first one */
ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
- le32_to_cpu(newext->ee_block),
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(newext->ee_block)),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_blocks(newext,
@@ -1796,7 +1833,8 @@ has_space:
len = len < 0 ? 0 : len;
ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n",
- le32_to_cpu(newext->ee_block),
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(newext->ee_block)),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_blocks(newext,
@@ -1811,7 +1849,8 @@ has_space:
len = len < 0 ? 0 : len;
ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n",
- le32_to_cpu(newext->ee_block),
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(newext->ee_block)),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_blocks(newext,
@@ -1883,7 +1922,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
break;
}
ex = path[depth].p_ext;
- next = ext4_ext_next_allocated_block(path);
+ next = ext4_ext_next_allocated_block(inode, path);

exists = 0;
if (!ex) {
@@ -1891,26 +1930,29 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
* all requested space */
start = block;
end = block + num;
- } else if (le32_to_cpu(ex->ee_block) > block) {
+ } else if (EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ > block) {
/* need to allocate space before found extent */
start = block;
- end = le32_to_cpu(ex->ee_block);
+ end = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
if (block + num < end)
end = block + num;
- } else if (block >= le32_to_cpu(ex->ee_block)
+ } else if (block >=
+ EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ ext4_ext_get_actual_blocks(ex, inode->i_sb)) {
/* need to allocate space after found extent */
start = block;
end = block + num;
if (end >= next)
end = next;
- } else if (block >= le32_to_cpu(ex->ee_block)) {
+ } else if (block >= EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block))) {
/*
* some part of requested space is covered
* by found extent
*/
start = block;
- end = le32_to_cpu(ex->ee_block)
+ end = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ ext4_ext_get_actual_blocks(ex, inode->i_sb);
if (block + num < end)
end = block + num;
@@ -1925,7 +1967,8 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
cbex.ec_len = EXT4_INODE_B2C(inode, end - start);
cbex.ec_start = 0;
} else {
- cbex.ec_block = le32_to_cpu(ex->ee_block);
+ cbex.ec_block = EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block));
cbex.ec_len = ext4_ext_get_actual_len(ex);
cbex.ec_start = ext4_ext_pblock(ex);
}
@@ -2000,24 +2043,24 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
lblock = 0;
len = EXT_MAX_BLOCKS;
ext_debug("cache gap(whole file):");
- } else if (block < le32_to_cpu(ex->ee_block)) {
+ } else if (block < EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))) {
lblock = block;
- len = le32_to_cpu(ex->ee_block) - block;
+ len = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)) - block;
ext_debug("cache gap(before): %u [%u:%u]",
- block,
- le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_blocks(ex, inode->i_sb));
- } else if (block >= le32_to_cpu(ex->ee_block)
+ block,
+ EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)),
+ ext4_ext_get_actual_blocks(ex, inode->i_sb));
+ } else if (block >= EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ ext4_ext_get_actual_blocks(ex, inode->i_sb)) {
ext4_lblk_t next;
- lblock = le32_to_cpu(ex->ee_block)
+ lblock = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ ext4_ext_get_actual_blocks(ex, inode->i_sb);

- next = ext4_ext_next_allocated_block(path);
+ next = ext4_ext_next_allocated_block(inode, path);
ext_debug("cache gap(after): [%u:%u] %u",
- le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_blocks(ex, inode->i_sb),
- block);
+ EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)),
+ ext4_ext_get_actual_blocks(ex, inode->i_sb),
+ block);
BUG_ON(next == lblock);
len = next - lblock;
} else {
@@ -2026,7 +2069,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
}

ext_debug(" -> %u:%lu\n", lblock, len);
- ext4_ext_put_in_cache(inode, lblock, len, 0);
+ ext4_ext_put_in_cache(inode, EXT4_INODE_B2C(inode, lblock), len, 0);
}

/*
@@ -2062,11 +2105,14 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
if (cex->ec_len == 0)
goto errout;

- if (in_range(block, cex->ec_block, cex->ec_len)) {
+ if (in_range(block, EXT4_C2B(sbi, cex->ec_block),
+ EXT4_C2B(sbi, cex->ec_len))) {
memcpy(ex, cex, sizeof(struct ext4_ext_cache));
ext_debug("%u cached by %u:%u:%llu\n",
block,
- cex->ec_block, cex->ec_len, cex->ec_start);
+ EXT4_C2B(sbi, cex->ec_block),
+ EXT4_C2B(sbi, cex->ec_len),
+ EXT4_C2B(sbi, cex->ec_start));
ret = 1;
}
errout:
@@ -2229,9 +2275,10 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
*/
flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;

- trace_ext4_remove_blocks(inode, cpu_to_le32(ex->ee_block),
- ext4_ext_pblock(ex), ee_len, from,
- to, *partial_cluster);
+ trace_ext4_remove_blocks(inode,
+ cpu_to_le32(EXT4_INODE_C2B(inode, ex->ee_block)),
+ ext4_ext_pblock(ex), ee_len, from,
+ to, *partial_cluster);
/*
* If we have a partial cluster, and it's different from the
* cluster of the last block, we need to explicitly free the
@@ -2260,12 +2307,14 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
spin_unlock(&sbi->s_ext_stats_lock);
}
#endif
- if (from >= le32_to_cpu(ex->ee_block)
- && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
+ if (from >= EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ && to == EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ + ee_len - 1) {
/* tail removal */
ext4_lblk_t num;

- num = le32_to_cpu(ex->ee_block) + ee_len - from;
+ num = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ + ee_len - from;
pblk = ext4_ext_pblock(ex) + ee_len - num;
ext_debug("free last %u blocks starting %llu\n", num, pblk);
ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
@@ -2282,8 +2331,9 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
*partial_cluster = EXT4_B2C(sbi, pblk);
else
*partial_cluster = 0;
- } else if (from == le32_to_cpu(ex->ee_block)
- && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
+ } else if (from == EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ && to <= EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ + ee_len - 1) {
/* head removal */
ext4_lblk_t num;
ext4_fsblk_t start;
@@ -2297,7 +2347,10 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
} else {
printk(KERN_INFO "strange request: removal(2) "
"%u-%u from %u:%u\n",
- from, to, le32_to_cpu(ex->ee_block), ee_len);
+ from, to,
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block)),
+ ee_len);
}
return 0;
}
@@ -2343,7 +2396,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
/* find where to start removing */
ex = EXT_LAST_EXTENT(eh);

- ex_ee_block = le32_to_cpu(ex->ee_block);
+ ex_ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
ex_ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);

trace_ext4_ext_rm_leaf(inode, start, ex_ee_block, ext4_ext_pblock(ex),
@@ -2370,7 +2423,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
/* If this extent is beyond the end of the hole, skip it */
if (end <= ex_ee_block) {
ex--;
- ex_ee_block = le32_to_cpu(ex->ee_block);
+ ex_ee_block = EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block));
ex_ee_len = ext4_ext_get_actual_blocks(ex,
inode->i_sb);
continue;
@@ -2493,7 +2547,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a));
}

- ex->ee_block = cpu_to_le32(block);
+ BUG_ON(block & (sbi->s_cluster_ratio-1));
+ ex->ee_block = cpu_to_le32(EXT4_B2C(sbi, block));
ex->ee_len = cpu_to_le16(EXT4_B2C(sbi, num));
/*
* Do not mark uninitialized if all the blocks in the
@@ -2531,7 +2586,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ext_debug("new extent: %u:%u:%llu\n", block, num,
ext4_ext_pblock(ex));
ex--;
- ex_ee_block = le32_to_cpu(ex->ee_block);
+ ex_ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
ex_ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
}

@@ -2862,7 +2917,7 @@ static int ext4_split_extent_at(handle_t *handle,

depth = ext_depth(inode);
ex = path[depth].p_ext;
- ee_block = le32_to_cpu(ex->ee_block);
+ ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
newblock = split - ee_block + ext4_ext_pblock(ex);

@@ -2905,7 +2960,7 @@ static int ext4_split_extent_at(handle_t *handle,
goto fix_extent_len;

ex2 = &newex;
- ex2->ee_block = cpu_to_le32(split);
+ ex2->ee_block = cpu_to_le32(EXT4_INODE_B2C(inode, split));
ex2->ee_len = cpu_to_le16(
EXT4_INODE_B2C(inode, ee_len - (split - ee_block)));
ext4_ext_store_pblock(ex2, newblock);
@@ -2962,7 +3017,7 @@ static int ext4_split_extent(handle_t *handle,

depth = ext_depth(inode);
ex = path[depth].p_ext;
- ee_block = le32_to_cpu(ex->ee_block);
+ ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
uninitialized = ext4_ext_is_uninitialized(ex);

@@ -3037,7 +3092,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,

depth = ext_depth(inode);
ex = path[depth].p_ext;
- ee_block = le32_to_cpu(ex->ee_block);
+ ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
allocated = ee_len - (map->m_lblk - ee_block);

@@ -3078,8 +3133,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
if (allocated <= EXT4_EXT_ZERO_LEN &&
(EXT4_EXT_MAY_ZEROOUT & split_flag)) {
/* case 3 */
- zero_ex.ee_block =
- cpu_to_le32(map->m_lblk);
+ zero_ex.ee_block = cpu_to_le32(EXT4_INODE_B2C(inode,
+ map->m_lblk));
zero_ex.ee_len = cpu_to_le16(
EXT4_INODE_B2C(inode, allocated));
ext4_ext_store_pblock(&zero_ex,
@@ -3168,7 +3223,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
*/
depth = ext_depth(inode);
ex = path[depth].p_ext;
- ee_block = le32_to_cpu(ex->ee_block);
+ ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);

split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
@@ -3191,7 +3246,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,

ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
"block %llu, max_blocks %u\n", inode->i_ino,
- (unsigned long long)le32_to_cpu(ex->ee_block),
+ (unsigned long long)EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block)),
ext4_ext_get_actual_blocks(ex, inode->i_sb));

err = ext4_ext_get_access(handle, inode, path + depth);
@@ -3253,7 +3309,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
* this turns out to be false, we can bail out from this
* function immediately.
*/
- if (lblk + len < le32_to_cpu(last_ex->ee_block) +
+ if (lblk + len < EXT4_INODE_C2B(inode, le32_to_cpu(last_ex->ee_block)) +
ext4_ext_get_actual_blocks(last_ex, inode->i_sb))
return 0;
/*
@@ -3697,7 +3753,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
* |================|
*/
if (map->m_lblk > ee_block) {
- ext4_lblk_t next = ext4_ext_next_allocated_block(path);
+ ext4_lblk_t next = 0;//ext4_ext_next_allocated_block(path);
map->m_len = min(map->m_len, next - map->m_lblk);
}

@@ -3770,12 +3826,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
if (sbi->s_cluster_ratio > 1)
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
newblock = map->m_lblk
- - le32_to_cpu(newex.ee_block)
+ - EXT4_C2B(sbi, le32_to_cpu(newex.ee_block))
+ ext4_ext_pblock(&newex);
/* number of remaining blocks in the extent */
allocated = ext4_ext_get_actual_blocks(&newex,
inode->i_sb) -
- (map->m_lblk - le32_to_cpu(newex.ee_block));
+ (map->m_lblk - EXT4_C2B(sbi,
+ le32_to_cpu(newex.ee_block)));
goto out;
}
}
@@ -3806,7 +3863,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,

ex = path[depth].p_ext;
if (ex) {
- ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
+ ext4_lblk_t ee_block = EXT4_C2B(sbi, le32_to_cpu(ex->ee_block));
ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
ext4_fsblk_t partial_cluster = 0;
unsigned int ee_len;
@@ -3833,7 +3890,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* in the cache
*/
if (!ext4_ext_is_uninitialized(ex)) {
- ext4_ext_put_in_cache(inode, ee_block,
+ ext4_ext_put_in_cache(inode,
+ EXT4_B2C(sbi, ee_block),
ee_len, ee_start);
goto out;
}
@@ -3895,7 +3953,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ex = path[depth].p_ext;
ee_len = ext4_ext_get_actual_blocks(ex,
inode->i_sb);
- ee_block = le32_to_cpu(ex->ee_block);
+ ee_block = EXT4_C2B(sbi,
+ le32_to_cpu(ex->ee_block));
ee_start = ext4_ext_pblock(ex);

}
@@ -3949,11 +4008,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* Okay, we need to do block allocation.
*/
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
- newex.ee_block = cpu_to_le32(map->m_lblk & ~(sbi->s_cluster_ratio-1));
+ newex.ee_block = EXT4_B2C(sbi,
+ cpu_to_le32(map->m_lblk & ~(sbi->s_cluster_ratio-1)));
cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);

if (ex)
- BUG_ON((le32_to_cpu(ex->ee_block) +
+ BUG_ON((EXT4_C2B(sbi, le32_to_cpu(ex->ee_block)) +
EXT4_C2B(sbi, ext4_ext_get_actual_len(ex))) >
(map->m_lblk & ~(sbi->s_cluster_ratio-1)));

@@ -4012,6 +4072,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ar.flags = 0;
if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
ar.flags |= EXT4_MB_HINT_NOPREALLOC;
+ printk(KERN_ERR "ar: %lu, %lu, %lu\n", ar.len, ar.goal, ar.logical);
newblock = ext4_mb_new_blocks(handle, &ar, &err);
if (!newblock)
goto out2;
@@ -4153,7 +4214,8 @@ got_allocated_blocks:
* when it is _not_ an uninitialized extent.
*/
if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
- ext4_ext_put_in_cache(inode, ar.logical, allocated, newblock);
+ ext4_ext_put_in_cache(inode, EXT4_B2C(sbi, ar.logical),
+ allocated, newblock);
ext4_update_inode_fsync_trans(handle, inode, 1);
} else
ext4_update_inode_fsync_trans(handle, inode, 0);
--
1.7.3.2


2011-11-09 11:17:33

by Robin Dong

[permalink] [raw]
Subject: [PATCH 4/9] ext4: remove unused functions and tags

From: Robin Dong <[email protected]>

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/extents.c | 109 -----------------------------------------------------
1 files changed, 0 insertions(+), 109 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4f764ee..ccf12a0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3662,111 +3662,6 @@ out2:
}

/*
- * get_implied_cluster_alloc - check to see if the requested
- * allocation (in the map structure) overlaps with a cluster already
- * allocated in an extent.
- * @sb The filesystem superblock structure
- * @map The requested lblk->pblk mapping
- * @ex The extent structure which might contain an implied
- * cluster allocation
- *
- * This function is called by ext4_ext_map_blocks() after we failed to
- * find blocks that were already in the inode's extent tree. Hence,
- * we know that the beginning of the requested region cannot overlap
- * the extent from the inode's extent tree. There are three cases we
- * want to catch. The first is this case:
- *
- * |--- cluster # N--|
- * |--- extent ---| |---- requested region ---|
- * |==========|
- *
- * The second case that we need to test for is this one:
- *
- * |--------- cluster # N ----------------|
- * |--- requested region --| |------- extent ----|
- * |=======================|
- *
- * The third case is when the requested region lies between two extents
- * within the same cluster:
- * |------------- cluster # N-------------|
- * |----- ex -----| |---- ex_right ----|
- * |------ requested region ------|
- * |================|
- *
- * In each of the above cases, we need to set the map->m_pblk and
- * map->m_len so it corresponds to the return the extent labelled as
- * "|====|" from cluster #N, since it is already in use for data in
- * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to
- * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
- * as a new "allocated" block region. Otherwise, we will return 0 and
- * ext4_ext_map_blocks() will then allocate one or more new clusters
- * by calling ext4_mb_new_blocks().
- */
-static int get_implied_cluster_alloc(struct super_block *sb,
- struct ext4_map_blocks *map,
- struct ext4_extent *ex,
- struct ext4_ext_path *path)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
- ext4_lblk_t ex_cluster_start, ex_cluster_end;
- ext4_lblk_t rr_cluster_start, rr_cluster_end;
- ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
- ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
- unsigned int ee_len = ext4_ext_get_actual_blocks(ex, sb);
-
- /* The extent passed in that we are trying to match */
- ex_cluster_start = EXT4_B2C(sbi, ee_block);
- ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
-
- /* The requested region passed into ext4_map_blocks() */
- rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
- rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1);
-
- if ((rr_cluster_start == ex_cluster_end) ||
- (rr_cluster_start == ex_cluster_start)) {
- if (rr_cluster_start == ex_cluster_end)
- ee_start += ee_len - 1;
- map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
- c_offset;
- map->m_len = min(map->m_len,
- (unsigned) sbi->s_cluster_ratio - c_offset);
- /*
- * Check for and handle this case:
- *
- * |--------- cluster # N-------------|
- * |------- extent ----|
- * |--- requested region ---|
- * |===========|
- */
-
- if (map->m_lblk < ee_block)
- map->m_len = min(map->m_len, ee_block - map->m_lblk);
-
- /*
- * Check for the case where there is already another allocated
- * block to the right of 'ex' but before the end of the cluster.
- *
- * |------------- cluster # N-------------|
- * |----- ex -----| |---- ex_right ----|
- * |------ requested region ------|
- * |================|
- */
- if (map->m_lblk > ee_block) {
- ext4_lblk_t next = 0;//ext4_ext_next_allocated_block(path);
- map->m_len = min(map->m_len, next - map->m_lblk);
- }
-
- trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
- return 1;
- }
-
- trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
- return 0;
-}
-
-
-/*
* Block allocation/map/preallocation routine for extents based files
*
*
@@ -3798,7 +3693,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
unsigned int result = 0;
struct ext4_allocation_request ar;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
- ext4_lblk_t cluster_offset;
struct ext4_map_blocks punch_map;

ext_debug("blocks %u/%u requested for inode %lu\n",
@@ -4010,7 +3904,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
newex.ee_block = EXT4_B2C(sbi,
cpu_to_le32(map->m_lblk & ~(sbi->s_cluster_ratio-1)));
- cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);

if (ex)
BUG_ON((EXT4_C2B(sbi, le32_to_cpu(ex->ee_block)) +
@@ -4072,7 +3965,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ar.flags = 0;
if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
ar.flags |= EXT4_MB_HINT_NOPREALLOC;
- printk(KERN_ERR "ar: %lu, %lu, %lu\n", ar.len, ar.goal, ar.logical);
newblock = ext4_mb_new_blocks(handle, &ar, &err);
if (!newblock)
goto out2;
@@ -4081,7 +3973,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
free_on_err = 1;
allocated_clusters = ar.len;

-got_allocated_blocks:
/* try to insert new extent into found leaf and return */
ext4_ext_store_pblock(&newex, newblock);
newex.ee_len = cpu_to_le16(allocated_clusters);
--
1.7.3.2


2011-11-09 11:17:36

by Robin Dong

[permalink] [raw]
Subject: [PATCH 5/9 bigalloc] ext4: zero out extra pages when users write one page

From: Robin Dong <[email protected]>

When users write one page which in the middle of a cluster, we need to zero the
anthor pages around it.

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/ext4.h | 18 ++++
fs/ext4/inode.c | 293 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 295 insertions(+), 16 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1dea3e8..90ae8a2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -675,6 +675,15 @@ struct move_extent {
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)

+#define EXT4_MAX_CLUSTERSIZE 1048576
+#define EXT4_MAX_CTXT_PAGES (EXT4_MAX_CLUSTERSIZE / PAGE_CACHE_SIZE)
+
+/* tracking cluster write pages */
+struct ext4_write_cluster_ctxt {
+ unsigned long w_num_pages;
+ struct page *w_pages[EXT4_MAX_CTXT_PAGES];
+};
+
/*
* Extended fields will fit into an inode if the filesystem was formatted
* with large inodes (-I 256 or larger) and there are not currently any EAs
@@ -1849,6 +1858,15 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);

/* inode.c */
+int walk_page_buffers(handle_t *handle, struct buffer_head *head,
+ unsigned from, unsigned to, int *partial,
+ int (*fn)(handle_t *handle, struct buffer_head *bh));
+int do_journal_get_write_access(handle_t *handle, struct buffer_head *bh);
+struct ext4_write_cluster_ctxt *ext4_alloc_write_cluster_ctxt(void);
+void ext4_free_write_cluster_ctxt(struct ext4_write_cluster_ctxt *ewcc);
+int ext4_zero_cluster_page(struct inode *inode, int index,
+ struct ext4_write_cluster_ctxt *ewcc, unsigned flags);
+
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
ext4_lblk_t, int, int *);
struct buffer_head *ext4_bread(handle_t *, struct inode *,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9b83c3c..0ae546d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -38,6 +38,7 @@
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
+#include <linux/swap.h>

#include "ext4_jbd2.h"
#include "xattr.h"
@@ -49,6 +50,31 @@

#define MPAGE_DA_EXTENT_TAIL 0x01

+static void ext4_write_cluster_add_page(struct ext4_write_cluster_ctxt *ewcc,
+ struct page *page)
+{
+ ewcc->w_pages[ewcc->w_num_pages] = page;
+ ewcc->w_num_pages++;
+}
+
+struct ext4_write_cluster_ctxt *ext4_alloc_write_cluster_ctxt(void)
+{
+ return kzalloc(sizeof(struct ext4_write_cluster_ctxt), GFP_NOFS);
+}
+
+void ext4_free_write_cluster_ctxt(struct ext4_write_cluster_ctxt *ewcc)
+{
+ int i;
+ for (i = 0; i < ewcc->w_num_pages; i++) {
+ if (ewcc->w_pages[i]) {
+ unlock_page(ewcc->w_pages[i]);
+ mark_page_accessed(ewcc->w_pages[i]);
+ page_cache_release(ewcc->w_pages[i]);
+ }
+ }
+ kfree(ewcc);
+}
+
static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
@@ -656,7 +682,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
return NULL;
}

-static int walk_page_buffers(handle_t *handle,
+int walk_page_buffers(handle_t *handle,
struct buffer_head *head,
unsigned from,
unsigned to,
@@ -712,7 +738,7 @@ static int walk_page_buffers(handle_t *handle,
* is elevated. We'll still have enough credits for the tiny quotafile
* write.
*/
-static int do_journal_get_write_access(handle_t *handle,
+int do_journal_get_write_access(handle_t *handle,
struct buffer_head *bh)
{
int dirty = buffer_dirty(bh);
@@ -738,15 +764,176 @@ static int do_journal_get_write_access(handle_t *handle,

static int ext4_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
+
+int ext4_cluster_write_begin(struct page *page, loff_t pos, unsigned len,
+ get_block_t *get_block)
+{
+ unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned to = from + len;
+ struct inode *inode = page->mapping->host;
+ unsigned block_start, block_end;
+ sector_t block;
+ int err = 0;
+ unsigned blocksize, bbits;
+ struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
+
+ BUG_ON(!PageLocked(page));
+ BUG_ON(from > PAGE_CACHE_SIZE);
+ BUG_ON(to > PAGE_CACHE_SIZE);
+ BUG_ON(from > to);
+
+ blocksize = 1 << inode->i_blkbits;
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, blocksize, 0);
+ head = page_buffers(page);
+
+ bbits = inode->i_blkbits;
+ block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+
+ for (bh = head, block_start = 0; bh != head || !block_start;
+ block++, block_start = block_end, bh = bh->b_this_page) {
+ block_end = block_start + blocksize;
+ if (block_end <= from || block_start >= to) {
+ if (PageUptodate(page)) {
+ if (!buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+ }
+ continue;
+ }
+ if (buffer_new(bh))
+ clear_buffer_new(bh);
+ if (!buffer_mapped(bh)) {
+ WARN_ON(bh->b_size != blocksize);
+ err = get_block(inode, block, bh, 1);
+ if (err)
+ break;
+ unmap_underlying_metadata(bh->b_bdev,
+ bh->b_blocknr);
+ if (PageUptodate(page)) {
+ clear_buffer_new(bh);
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ continue;
+ }
+ if (block_end > to || block_start < from)
+ zero_user_segments(page,
+ to, block_end,
+ block_start, from);
+ continue;
+ }
+ if (PageUptodate(page)) {
+ if (!buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+ continue;
+ }
+ if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
+ !buffer_unwritten(bh) &&
+ (block_start < from || block_end > to)) {
+ ll_rw_block(READ, 1, &bh);
+ *wait_bh++ = bh;
+ }
+ }
+ /*
+ * If we issued read requests - let them complete.
+ */
+ while (wait_bh > wait) {
+ wait_on_buffer(*--wait_bh);
+ if (!buffer_uptodate(*wait_bh))
+ err = -EIO;
+ }
+ if (unlikely(err))
+ page_zero_new_buffers(page, from, to);
+ return err;
+}
+
+int ext4_zero_cluster_page(struct inode *inode, int index,
+ struct ext4_write_cluster_ctxt *ewcc, unsigned flags)
+{
+ int ret = 0;
+ struct page *page;
+
+ page = grab_cache_page_write_begin(inode->i_mapping, index, flags);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ext4_write_cluster_add_page(ewcc, page);
+
+ /* if page is already uptodate and has buffers, don't get_block again
+ */
+ if (PageUptodate(page) && PagePrivate(page))
+ goto out;
+
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
+ if (ext4_should_dioread_nolock(inode))
+ ret = ext4_cluster_write_begin(page, index << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE, ext4_get_block_write);
+ else
+ ret = ext4_cluster_write_begin(page, index << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE, ext4_get_block);
+
+out:
+ return ret;
+}
+
+int ext4_prepare_cluster_left_pages(struct inode *inode, int index,
+ struct ext4_write_cluster_ctxt *ewcc, unsigned flags)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int ret = 0;
+ int block;
+ sector_t left_offset = index & (sbi->s_cluster_ratio - 1);
+ sector_t begin;
+
+ if (left_offset) {
+ begin = index - left_offset;
+ for (block = begin; block < index; block++) {
+ ret = ext4_zero_cluster_page(inode, block, ewcc, flags);
+ if (ret)
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+int ext4_prepare_cluster_right_pages(struct inode *inode, int index,
+ struct ext4_write_cluster_ctxt *ewcc, unsigned flags)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int ret = 0;
+ int block;
+ sector_t left_offset = index & (sbi->s_cluster_ratio - 1);
+ sector_t right_offset = sbi->s_cluster_ratio - left_offset - 1;
+ sector_t begin;
+
+ if (right_offset) {
+ begin = index + 1;
+ for (block = begin; block < index + right_offset + 1; block++) {
+ ret = ext4_zero_cluster_page(inode, block, ewcc, flags);
+ if (ret)
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
static int ext4_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
struct inode *inode = mapping->host;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int ret, needed_blocks;
handle_t *handle;
- int retries = 0;
- struct page *page;
+ int retries = 0, uninit = 0;
+ struct page *page = NULL;
+ struct ext4_write_cluster_ctxt *ewcc;
pgoff_t index;
unsigned from, to;

@@ -761,6 +948,12 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
to = from + len;

retry:
+ ewcc = ext4_alloc_write_cluster_ctxt();
+ if (!ewcc) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
handle = ext4_journal_start(inode, needed_blocks);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
@@ -771,27 +964,76 @@ retry:
* started */
flags |= AOP_FLAG_NOFS;

+ if (sbi->s_cluster_ratio > 1) {
+ /* We need to know whether the block is allocated already
+ */
+ struct ext4_map_blocks map;
+ map.m_lblk = index;
+ map.m_len = 1;
+ ret = ext4_map_blocks(handle, inode, &map, 0);
+ uninit = map.m_flags & EXT4_MAP_UNWRITTEN;
+ if (ret <= 0 || uninit) {
+ ret = ext4_prepare_cluster_left_pages(inode, index,
+ ewcc, flags);
+ if (ret)
+ goto err_out;
+ }
+ }
+
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
- ext4_journal_stop(handle);
ret = -ENOMEM;
- goto out;
+ goto err_out;
}
+
*pagep = page;

- if (ext4_should_dioread_nolock(inode))
- ret = __block_write_begin(page, pos, len, ext4_get_block_write);
- else
- ret = __block_write_begin(page, pos, len, ext4_get_block);
+ ext4_write_cluster_add_page(ewcc, page);
+
+ /* if the block is already allocated by cluster, we should use
+ * ext4_cluster_write_begin (it will not read buffer again)
+ */
+ if (sbi->s_cluster_ratio > 1 && pos + len > inode->i_size) {
+ if (ext4_should_dioread_nolock(inode))
+ ret = ext4_cluster_write_begin(page, pos, len,
+ ext4_get_block_write);
+ else
+ ret = ext4_cluster_write_begin(page, pos, len,
+ ext4_get_block);
+ } else {
+ if (ext4_should_dioread_nolock(inode))
+ ret = __block_write_begin(page, pos, len,
+ ext4_get_block_write);
+ else
+ ret = __block_write_begin(page, pos, len,
+ ext4_get_block);
+ }
+
+ if (sbi->s_cluster_ratio > 1 && uninit) {
+ ret = ext4_prepare_cluster_right_pages(inode, index,
+ ewcc, flags);
+ if (ret)
+ goto err_out;
+ }

if (!ret && ext4_should_journal_data(inode)) {
- ret = walk_page_buffers(handle, page_buffers(page),
+ int i;
+ unsigned long from, to;
+ for (i = 0; i < ewcc->w_num_pages; i++) {
+ page = ewcc->w_pages[i];
+ if (!page || !page_buffers(page))
+ continue;
+ from = page->index << PAGE_CACHE_SHIFT;
+ to = from + PAGE_CACHE_SIZE;
+ ret = walk_page_buffers(handle, page_buffers(page),
from, to, NULL, do_journal_get_write_access);
+ if (ret)
+ break;
+ }
}

if (ret) {
- unlock_page(page);
- page_cache_release(page);
+ ext4_free_write_cluster_ctxt(ewcc);
/*
* __block_write_begin may have instantiated a few blocks
* outside i_size. Trim these off again. Don't need
@@ -819,8 +1061,15 @@ retry:

if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
+
+ *fsdata = ewcc;
out:
return ret;
+
+err_out:
+ ext4_free_write_cluster_ctxt(ewcc);
+ ext4_journal_stop(handle);
+ return ret;
}

/* For write_end() in data=journal mode */
@@ -837,11 +1086,24 @@ static int ext4_generic_write_end(struct file *file,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
- int i_size_changed = 0;
+ int i_size_changed = 0, i;
struct inode *inode = mapping->host;
+ struct ext4_write_cluster_ctxt *ewcc = fsdata;
handle_t *handle = ext4_journal_current_handle();

copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+ for (i = 0; i < ewcc->w_num_pages; i++) {
+ unsigned long pos;
+ struct page *cluster_page;
+ cluster_page = ewcc->w_pages[i];
+ if (!cluster_page)
+ break;
+ if (cluster_page == page)
+ continue;
+ pos = cluster_page->index << PAGE_CACHE_SHIFT;
+ block_write_end(file, mapping, pos, PAGE_CACHE_SIZE,
+ PAGE_CACHE_SIZE, cluster_page, fsdata);
+ }

/*
* No need to use i_size_read() here, the i_size
@@ -863,8 +1125,7 @@ static int ext4_generic_write_end(struct file *file,
ext4_update_i_disksize(inode, (pos + copied));
i_size_changed = 1;
}
- unlock_page(page);
- page_cache_release(page);
+ ext4_free_write_cluster_ctxt(ewcc);

/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
--
1.7.3.2


2011-11-09 11:17:41

by Robin Dong

[permalink] [raw]
Subject: [PATCH 8/9 bigalloc] ext4: align fallocate size to a whole cluster

From: Robin Dong <[email protected]>

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/extents.c | 34 ++++++++++++++++++++++++++++++++--
1 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f84c122..7e3d4c8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3533,8 +3533,11 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path, int flags,
unsigned int allocated, ext4_fsblk_t newblock)
{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct ext4_map_blocks convert_map;
int ret = 0;
int err = 0;
+ int offset;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;

ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical"
@@ -3598,8 +3601,14 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
}

/* buffered write, writepage time, convert*/
- ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
+ offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
+ convert_map.m_len =
+ EXT4_C2B(sbi, EXT4_NUM_B2C(sbi, offset + map->m_len));
+ convert_map.m_lblk = map->m_lblk - offset;
+ ret = ext4_ext_convert_to_initialized(handle, inode,
+ &convert_map, path);
if (ret >= 0) {
+ ret = map->m_len;
ext4_update_inode_fsync_trans(handle, inode, 1);
err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
map->m_len);
@@ -4318,8 +4327,9 @@ static void ext4_falloc_update_inode(struct inode *inode,
long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
struct inode *inode = file->f_path.dentry->d_inode;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
handle_t *handle;
- loff_t new_size;
+ loff_t new_size, old_size;
unsigned int max_blocks;
int ret = 0;
int ret2 = 0;
@@ -4349,6 +4359,8 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
*/
max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
- map.m_lblk;
+
+ old_size = i_size_read(inode);
/*
* credits to insert 1 extent into extent tree
*/
@@ -4403,6 +4415,24 @@ retry:
goto retry;
}
mutex_unlock(&inode->i_mutex);
+
+ /* if the fallocate expand the file size, we need to zeroout
+ * extra pages in cluster */
+ if (len + offset > old_size) {
+ credits = ext4_chunk_trans_blocks(inode, sbi->s_cluster_ratio);
+ handle = ext4_journal_start(inode, credits);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out;
+ }
+ ext4_ext_truncate_zero_pages(handle, inode, old_size);
+ if (IS_SYNC(inode))
+ ext4_handle_sync(handle);
+ ext4_mark_inode_dirty(handle, inode);
+ ext4_journal_stop(handle);
+ }
+
+out:
trace_ext4_fallocate_exit(inode, offset, max_blocks,
ret > 0 ? ret2 : ret);
return ret > 0 ? ret2 : ret;
--
1.7.3.2


2011-11-09 11:17:38

by Robin Dong

[permalink] [raw]
Subject: [PATCH 6/9 bigalloc] ext4: zero out extra pages when truncate file

From: Robin Dong <[email protected]>

When truncate file to be larger, we need to zero out the pages which beyond
the old i_size.

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/ext4.h | 4 +-
fs/ext4/extents.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++-
fs/ext4/inode.c | 13 ++++----
fs/ext4/ioctl.c | 2 +-
fs/ext4/super.c | 2 +-
fs/ext4/truncate.h | 2 +-
6 files changed, 89 insertions(+), 12 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 90ae8a2..7d226af 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1886,7 +1886,7 @@ extern void ext4_dirty_inode(struct inode *, int);
extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
extern int ext4_can_truncate(struct inode *inode);
-extern void ext4_truncate(struct inode *);
+extern void ext4_truncate(struct inode *, loff_t oldsize);
extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *);
@@ -2267,7 +2267,7 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
int chunk);
extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags);
-extern void ext4_ext_truncate(struct inode *);
+extern void ext4_ext_truncate(struct inode *, loff_t oldsize);
extern int ext4_ext_punch_hole(struct file *file, loff_t offset,
loff_t length);
extern void ext4_ext_init(struct super_block *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ccf12a0..f84c122 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4131,10 +4131,76 @@ out2:
return err ? err : result;
}

-void ext4_ext_truncate(struct inode *inode)
+int ext4_ext_truncate_zero_pages(handle_t *handle, struct inode *inode,
+ loff_t old_size)
+{
+ struct super_block *sb = inode->i_sb;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct ext4_write_cluster_ctxt *ewcc = NULL;
+ struct page *page;
+ ext4_lblk_t last_block = ((old_size + sb->s_blocksize - 1)
+ >> EXT4_BLOCK_SIZE_BITS(sb)) - 1;
+ ext4_lblk_t left_offset = last_block & (sbi->s_cluster_ratio - 1);
+ ext4_lblk_t right_offset = sbi->s_cluster_ratio - left_offset - 1;
+ ext4_lblk_t begin, index;
+ unsigned long i;
+ int ret = 0;
+ unsigned from, to;
+
+ if (sbi->s_cluster_ratio <= 1)
+ goto out;
+
+ if (right_offset) {
+ struct ext4_map_blocks map;
+ map.m_lblk = last_block;
+ map.m_len = 1;
+ if (ext4_map_blocks(handle, inode, &map, 0) <= 0
+ || map.m_flags & EXT4_MAP_UNWRITTEN)
+ goto out;
+
+ ewcc = ext4_alloc_write_cluster_ctxt();
+ if (!ewcc) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ begin = last_block + 1;
+ for (index = begin; index < last_block + right_offset + 1;
+ index++) {
+ ret = ext4_zero_cluster_page(inode, index, ewcc,
+ mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+ if (ret)
+ goto out;
+ }
+
+ if (ext4_should_journal_data(inode)) {
+ for (i = 0; i < ewcc->w_num_pages; i++) {
+ page = ewcc->w_pages[i];
+ if (!page || !page_buffers(page))
+ continue;
+ from = page->index << PAGE_CACHE_SHIFT;
+ to = from + PAGE_CACHE_SIZE;
+ ret = walk_page_buffers(handle,
+ page_buffers(page), from, to, NULL,
+ do_journal_get_write_access);
+ if (ret)
+ goto out;
+ }
+ }
+ }
+
+out:
+ if (ewcc)
+ ext4_free_write_cluster_ctxt(ewcc);
+
+ return ret;
+}
+
+void ext4_ext_truncate(struct inode *inode, loff_t old_size)
{
struct address_space *mapping = inode->i_mapping;
struct super_block *sb = inode->i_sb;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_lblk_t last_block;
handle_t *handle;
int err = 0;
@@ -4156,6 +4222,9 @@ void ext4_ext_truncate(struct inode *inode)
if (inode->i_size & (sb->s_blocksize - 1))
ext4_block_truncate_page(handle, mapping, inode->i_size);

+ if (ext4_ext_truncate_zero_pages(handle, inode, old_size))
+ goto out_stop;
+
if (ext4_orphan_add(handle, inode))
goto out_stop;

@@ -4176,6 +4245,13 @@ void ext4_ext_truncate(struct inode *inode)

last_block = (inode->i_size + sb->s_blocksize - 1)
>> EXT4_BLOCK_SIZE_BITS(sb);
+
+ if (sbi->s_cluster_ratio > 1 &&
+ (last_block & (sbi->s_cluster_ratio - 1))) {
+ last_block = (last_block & ~(sbi->s_cluster_ratio - 1)) +
+ sbi->s_cluster_ratio;
+ }
+
err = ext4_ext_remove_space(inode, last_block);

/* In a multi-transaction truncate, we only make the final
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0ae546d..be922ae 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -213,7 +213,7 @@ void ext4_evict_inode(struct inode *inode)
goto stop_handle;
}
if (inode->i_blocks)
- ext4_truncate(inode);
+ ext4_truncate(inode, 0);

/*
* ext4_ext_truncate() doesn't reserve any slop when it
@@ -3438,7 +3438,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
* that's fine - as long as they are linked from the inode, the post-crash
* ext4_truncate() run will find them and release them.
*/
-void ext4_truncate(struct inode *inode)
+void ext4_truncate(struct inode *inode, loff_t old_size)
{
trace_ext4_truncate_enter(inode);

@@ -3451,7 +3451,7 @@ void ext4_truncate(struct inode *inode)
ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);

if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- ext4_ext_truncate(inode);
+ ext4_ext_truncate(inode, old_size);
else
ext4_ind_truncate(inode);

@@ -4218,11 +4218,12 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
}

if (attr->ia_valid & ATTR_SIZE) {
- if (attr->ia_size != i_size_read(inode)) {
+ loff_t old_size = i_size_read(inode);
+ if (attr->ia_size != old_size) {
truncate_setsize(inode, attr->ia_size);
- ext4_truncate(inode);
+ ext4_truncate(inode, old_size);
} else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
- ext4_truncate(inode);
+ ext4_truncate(inode, 0);
}

if (!rc) {
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 4a5081a..6eb2f4f 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -100,7 +100,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
goto flags_out;
}
} else if (oldflags & EXT4_EOFBLOCKS_FL)
- ext4_truncate(inode);
+ ext4_truncate(inode, 0);

handle = ext4_journal_start(inode, 1);
if (IS_ERR(handle)) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2cf4ae0..beea7a1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2229,7 +2229,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
__func__, inode->i_ino, inode->i_size);
jbd_debug(2, "truncating inode %lu to %lld bytes\n",
inode->i_ino, inode->i_size);
- ext4_truncate(inode);
+ ext4_truncate(inode, 0);
nr_truncates++;
} else {
ext4_msg(sb, KERN_DEBUG,
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
index 011ba66..2be0783 100644
--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
@@ -11,7 +11,7 @@
static inline void ext4_truncate_failed_write(struct inode *inode)
{
truncate_inode_pages(inode->i_mapping, inode->i_size);
- ext4_truncate(inode);
+ ext4_truncate(inode, 0);
}

/*
--
1.7.3.2


2011-11-09 11:17:40

by Robin Dong

[permalink] [raw]
Subject: [PATCH 7/9 bigalloc] ext4: allocate a cluster for a directory when it need spaces

From: Robin Dong <[email protected]>

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/inode.c | 9 +++++++++
fs/ext4/namei.c | 46 ++++++++++++++++++++++++++++++++--------------
2 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index be922ae..69d0031 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -673,6 +673,15 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
return bh;
if (buffer_uptodate(bh))
return bh;
+ /* if the block has been allocated by cluster,
+ * do not need to read it
+ */
+ if ((EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) &&
+ ((block << inode->i_sb->s_blocksize_bits) >= inode->i_size)) {
+ memset(bh->b_data, 0, inode->i_sb->s_blocksize);
+ set_buffer_uptodate(bh);
+ return bh;
+ }
ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 1c924fa..43f7dcb 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1811,10 +1811,12 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
handle_t *handle;
struct inode *inode;
- struct buffer_head *dir_block = NULL;
+ struct buffer_head *first_block = NULL;
+ struct buffer_head **dir_block = NULL;
struct ext4_dir_entry_2 *de;
+ struct ext4_sb_info *sbi = EXT4_SB(dir->i_sb);
unsigned int blocksize = dir->i_sb->s_blocksize;
- int err, retries = 0;
+ int i, err, retries = 0;

if (EXT4_DIR_LINK_MAX(dir))
return -EMLINK;
@@ -1824,6 +1826,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
retry:
handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+ sbi->s_cluster_ratio +
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -1840,14 +1843,24 @@ retry:
inode->i_op = &ext4_dir_inode_operations;
inode->i_fop = &ext4_dir_operations;
inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
- dir_block = ext4_bread(handle, inode, 0, 1, &err);
+
+ dir_block = kzalloc(sizeof(struct buffer_head *) * EXT4_MAX_CTXT_PAGES,
+ GFP_NOFS);
if (!dir_block)
- goto out_clear_inode;
- BUFFER_TRACE(dir_block, "get_write_access");
- err = ext4_journal_get_write_access(handle, dir_block);
- if (err)
- goto out_clear_inode;
- de = (struct ext4_dir_entry_2 *) dir_block->b_data;
+ goto out_stop;
+
+ for (i = 0; i < sbi->s_cluster_ratio; i++) {
+ dir_block[i] = ext4_getblk(handle, inode, i, 1, &err);
+ if (!dir_block[i])
+ goto out_clear_inode;
+ memset(dir_block[i]->b_data, 0, inode->i_sb->s_blocksize);
+ set_buffer_uptodate(dir_block[i]);
+ err = ext4_journal_get_write_access(handle, dir_block[i]);
+ if (err)
+ goto out_clear_inode;
+ }
+ first_block = dir_block[0];
+ de = (struct ext4_dir_entry_2 *) first_block->b_data;
de->inode = cpu_to_le32(inode->i_ino);
de->name_len = 1;
de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
@@ -1862,10 +1875,13 @@ retry:
strcpy(de->name, "..");
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
inode->i_nlink = 2;
- BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, dir, dir_block);
- if (err)
- goto out_clear_inode;
+ BUFFER_TRACE(first_block, "call ext4_handle_dirty_metadata");
+
+ for (i = 0; i < sbi->s_cluster_ratio; i++) {
+ err = ext4_handle_dirty_metadata(handle, dir, dir_block[i]);
+ if (err)
+ goto out_clear_inode;
+ }
err = ext4_mark_inode_dirty(handle, inode);
if (!err)
err = ext4_add_entry(handle, dentry, inode);
@@ -1885,7 +1901,9 @@ out_clear_inode:
d_instantiate(dentry, inode);
unlock_new_inode(inode);
out_stop:
- brelse(dir_block);
+ for (i = 0; i < sbi->s_cluster_ratio; i++)
+ brelse(dir_block[i]);
+ kfree(dir_block);
ext4_journal_stop(handle);
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
goto retry;
--
1.7.3.2


2011-11-09 11:17:45

by Robin Dong

[permalink] [raw]
Subject: [PATCH 9/9 bigalloc] ext4: make cluster works for mmap

From: Robin Dong <[email protected]>

When users write a page in mmap regioin, it need to zero out other
pages around it.

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/inode.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 68 insertions(+), 1 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 69d0031..4a25767 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4649,13 +4649,17 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
struct page *page = vmf->page;
loff_t size;
unsigned long len;
- int ret;
+ int ret, i, uninit = 0;
struct file *file = vma->vm_file;
struct inode *inode = file->f_path.dentry->d_inode;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct address_space *mapping = inode->i_mapping;
+ struct ext4_write_cluster_ctxt *ewcc = NULL;
handle_t *handle;
get_block_t *get_block;
int retries = 0;
+ unsigned int flags = AOP_FLAG_NOFS;
+ unsigned long from, to;

/*
* This check is racy but catches the common case. We rely on
@@ -4712,7 +4716,47 @@ retry_alloc:
ret = VM_FAULT_SIGBUS;
goto out;
}
+
+ ewcc = ext4_alloc_write_cluster_ctxt();
+ if (!ewcc) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (sbi->s_cluster_ratio > 1) {
+ /* We need to know whether the block is allocated already
+ */
+ struct ext4_map_blocks map;
+ map.m_lblk = page->index;
+ map.m_len = 1;
+ ret = ext4_map_blocks(handle, inode, &map, 0);
+ uninit = map.m_flags & EXT4_MAP_UNWRITTEN;
+ if (ret <= 0 || uninit) {
+ ret = ext4_prepare_cluster_left_pages(inode,
+ page->index, ewcc, flags);
+ if (ret)
+ goto err_out;
+ }
+ }
+
ret = __block_page_mkwrite(vma, vmf, get_block);
+ if (ret)
+ goto err_out;
+
+ if (sbi->s_cluster_ratio > 1 && uninit) {
+ ret = ext4_prepare_cluster_right_pages(inode, page->index,
+ ewcc, flags);
+ if (ret)
+ goto err_out;
+ for (i = 0; i < ewcc->w_num_pages; i++) {
+ if (!ewcc->w_pages[i] ||
+ !page_buffers(ewcc->w_pages[i]))
+ break;
+ block_commit_write(ewcc->w_pages[i],
+ 0, PAGE_CACHE_SIZE);
+ }
+ }
+
if (!ret && ext4_should_journal_data(inode)) {
if (walk_page_buffers(handle, page_buffers(page), 0,
PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
@@ -4720,13 +4764,36 @@ retry_alloc:
ret = VM_FAULT_SIGBUS;
goto out;
}
+
+ for (i = 0; i < ewcc->w_num_pages; i++) {
+ page = ewcc->w_pages[i];
+ if (!page || !page_buffers(page))
+ continue;
+ from = page->index << PAGE_CACHE_SHIFT;
+ to = from + PAGE_CACHE_SIZE;
+ ret = walk_page_buffers(handle, page_buffers(page),
+ from, to, NULL, do_journal_get_write_access);
+ if (ret) {
+ ret = VM_FAULT_SIGBUS;
+ goto out;
+ }
+ }
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
}
+
+err_out:
+ if (ewcc) {
+ ext4_free_write_cluster_ctxt(ewcc);
+ ewcc = NULL;
+ }
ext4_journal_stop(handle);
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry_alloc;
out_ret:
ret = block_page_mkwrite_return(ret);
+
out:
+ if (ewcc)
+ ext4_free_write_cluster_ctxt(ewcc);
return ret;
}
--
1.7.3.2