2011-11-18 10:43:14

by Robin Dong

[permalink] [raw]
Subject: [PATCH 0/9 v2 bigalloc] ext4: change unit of extent's ee_block and ee_len from block to cluster for bigalloc

From: Robin Dong <[email protected]>

Hi,

This patch series change ee_block and ee_len (of extent) 's unit from "block" to "cluster",
since it could reduce the space occupied by meta data.

This patch series should be used after Ted's bigalloc-patchs and it now can't support:
1. delay allocation
2. 1k/2k blocksize



2011-11-18 10:43:16

by Robin Dong

[permalink] [raw]
Subject: [PATCH 1/9 v2 bigalloc] ext4: get blocks from ext4_ext_get_actual_blocks

From: Robin Dong <[email protected]>

Since ee_len's unit change to cluster, it need to transform from clusters
to blocks when use new function: ext4_ext_get_actual_blocks.

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/ext4.h | 5 ++
fs/ext4/ext4_extents.h | 16 ++++++-
fs/ext4/extents.c | 123 +++++++++++++++++++++++++++---------------------
3 files changed, 88 insertions(+), 56 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index fba951b..1dea3e8 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -276,6 +276,11 @@ struct ext4_io_submit {
/* Translate # of blks to # of clusters */
#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
(sbi)->s_cluster_bits)
+/* Translate a block number to a cluster number by inode */
+#define EXT4_INODE_B2C(inode, block) (EXT4_B2C(EXT4_SB(inode->i_sb), (block)))
+/* Translate a cluster number to a block number by inode */
+#define EXT4_INODE_C2B(inode, cluster) (EXT4_C2B(EXT4_SB(inode->i_sb), \
+ (cluster)))

/*
* Structure of a blocks group descriptor
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index a52db3a..30c5ce1 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -70,8 +70,10 @@
* It's used at the bottom of the tree.
*/
struct ext4_extent {
- __le32 ee_block; /* first logical block extent covers */
- __le16 ee_len; /* number of blocks covered by extent */
+ __le32 ee_block; /* first logical block (or cluster) *
+ * extent covers */
+ __le16 ee_len; /* number of blocks (or clusters) *
+ * covered by extent */
__le16 ee_start_hi; /* high 16 bits of physical block */
__le32 ee_start_lo; /* low 32 bits of physical block */
};
@@ -212,6 +214,16 @@ static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
}

+static inline int ext4_ext_get_actual_blocks(struct ext4_extent *ext,
+ struct super_block *sb)
+{
+ int res = (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
+ le16_to_cpu(ext->ee_len) :
+ (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
+
+ return EXT4_C2B(EXT4_SB(sb), res);
+}
+
static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
{
return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4c38262..597ebcb 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -304,7 +304,7 @@ ext4_ext_max_entries(struct inode *inode, int depth)
static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
{
ext4_fsblk_t block = ext4_ext_pblock(ext);
- int len = ext4_ext_get_actual_len(ext);
+ int len = ext4_ext_get_actual_blocks(ext, inode->i_sb);

return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
}
@@ -417,7 +417,8 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
ext_debug(" %d:[%d]%d:%llu ",
le32_to_cpu(path->p_ext->ee_block),
ext4_ext_is_uninitialized(path->p_ext),
- ext4_ext_get_actual_len(path->p_ext),
+ ext4_ext_get_actual_blocks(path->p_ext,
+ inode->i_sb),
ext4_ext_pblock(path->p_ext));
} else
ext_debug(" []");
@@ -443,7 +444,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
ext4_ext_is_uninitialized(ex),
- ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
+ ext4_ext_get_actual_blocks(ex, inode->i_sb),
+ ext4_ext_pblock(ex));
}
ext_debug("\n");
}
@@ -474,7 +476,7 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
le32_to_cpu(ex->ee_block),
ext4_ext_pblock(ex),
ext4_ext_is_uninitialized(ex),
- ext4_ext_get_actual_len(ex),
+ ext4_ext_get_actual_blocks(ex, inode->i_sb),
newblock);
ex++;
}
@@ -599,7 +601,7 @@ ext4_ext_binsearch(struct inode *inode,
le32_to_cpu(path->p_ext->ee_block),
ext4_ext_pblock(path->p_ext),
ext4_ext_is_uninitialized(path->p_ext),
- ext4_ext_get_actual_len(path->p_ext));
+ ext4_ext_get_actual_blocks(path->p_ext, inode->i_sb));

#ifdef CHECK_BINSEARCH
{
@@ -1222,7 +1224,7 @@ static int ext4_ext_search_left(struct inode *inode,
* first one in the file */

ex = path[depth].p_ext;
- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
if (*logical < le32_to_cpu(ex->ee_block)) {
if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
EXT4_ERROR_INODE(inode,
@@ -1292,7 +1294,7 @@ static int ext4_ext_search_right(struct inode *inode,
* first one in the file */

ex = path[depth].p_ext;
- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
if (*logical < le32_to_cpu(ex->ee_block)) {
if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
EXT4_ERROR_INODE(inode,
@@ -1506,7 +1508,8 @@ int
ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
struct ext4_extent *ex2)
{
- unsigned short ext1_ee_len, ext2_ee_len, max_len;
+ /* unit: cluster */
+ unsigned int ext1_ee_len, ext2_ee_len, max_len;

/*
* Make sure that either both extents are uninitialized, or
@@ -1539,7 +1542,8 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
return 0;
#endif

- if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
+ if (ext4_ext_pblock(ex1) + EXT4_INODE_C2B(inode, ext1_ee_len)
+ == ext4_ext_pblock(ex2))
return 1;
return 0;
}
@@ -1633,7 +1637,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
unsigned int ret = 0;

b1 = le32_to_cpu(newext->ee_block);
- len1 = ext4_ext_get_actual_len(newext);
+ len1 = ext4_ext_get_actual_blocks(newext, inode->i_sb);
depth = ext_depth(inode);
if (!path[depth].p_ext)
goto out;
@@ -1654,13 +1658,13 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
/* check for wrap through zero on extent logical start block*/
if (b1 + len1 < b1) {
len1 = EXT_MAX_BLOCKS - b1;
- newext->ee_len = cpu_to_le16(len1);
+ newext->ee_len = cpu_to_le16(EXT4_B2C(sbi, len1));
ret = 1;
}

/* check for overlap */
if (b1 + len1 > b2) {
- newext->ee_len = cpu_to_le16(b2 - b1);
+ newext->ee_len = cpu_to_le16(EXT4_B2C(sbi, b2 - b1));
ret = 1;
}
out:
@@ -1702,10 +1706,10 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
&& ext4_can_extents_be_merged(inode, ex, newext)) {
ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
ext4_ext_is_uninitialized(newext),
- ext4_ext_get_actual_len(newext),
+ ext4_ext_get_actual_blocks(newext, inode->i_sb),
le32_to_cpu(ex->ee_block),
ext4_ext_is_uninitialized(ex),
- ext4_ext_get_actual_len(ex),
+ ext4_ext_get_actual_blocks(ex, inode->i_sb),
ext4_ext_pblock(ex));
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
@@ -1780,7 +1784,8 @@ has_space:
le32_to_cpu(newext->ee_block),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
- ext4_ext_get_actual_len(newext));
+ ext4_ext_get_actual_blocks(newext,
+ inode->i_sb));
path[depth].p_ext = EXT_FIRST_EXTENT(eh);
} else if (le32_to_cpu(newext->ee_block)
> le32_to_cpu(nearex->ee_block)) {
@@ -1794,7 +1799,8 @@ has_space:
le32_to_cpu(newext->ee_block),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
- ext4_ext_get_actual_len(newext),
+ ext4_ext_get_actual_blocks(newext,
+ inode->i_sb),
nearex, len, nearex + 1, nearex + 2);
memmove(nearex + 2, nearex + 1, len);
}
@@ -1808,7 +1814,8 @@ has_space:
le32_to_cpu(newext->ee_block),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
- ext4_ext_get_actual_len(newext),
+ ext4_ext_get_actual_blocks(newext,
+ inode->i_sb),
nearex, len, nearex, nearex + 1);
memmove(nearex + 1, nearex, len);
path[depth].p_ext = nearex;
@@ -1891,7 +1898,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
if (block + num < end)
end = block + num;
} else if (block >= le32_to_cpu(ex->ee_block)
- + ext4_ext_get_actual_len(ex)) {
+ + ext4_ext_get_actual_blocks(ex, inode->i_sb)) {
/* need to allocate space after found extent */
start = block;
end = block + num;
@@ -1904,7 +1911,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
*/
start = block;
end = le32_to_cpu(ex->ee_block)
- + ext4_ext_get_actual_len(ex);
+ + ext4_ext_get_actual_blocks(ex, inode->i_sb);
if (block + num < end)
end = block + num;
exists = 1;
@@ -1915,7 +1922,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,

if (!exists) {
cbex.ec_block = start;
- cbex.ec_len = end - start;
+ cbex.ec_len = EXT4_INODE_B2C(inode, end - start);
cbex.ec_start = 0;
} else {
cbex.ec_block = le32_to_cpu(ex->ee_block);
@@ -1947,7 +1954,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
path = NULL;
}

- block = cbex.ec_block + cbex.ec_len;
+ block = cbex.ec_block + EXT4_INODE_C2B(inode, cbex.ec_len);
}

if (path) {
@@ -1968,7 +1975,7 @@ ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
trace_ext4_ext_put_in_cache(inode, block, len, start);
cex = &EXT4_I(inode)->i_cached_extent;
cex->ec_block = block;
- cex->ec_len = len;
+ cex->ec_len = EXT4_INODE_B2C(inode, len);
cex->ec_start = start;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
}
@@ -1999,17 +2006,17 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
ext_debug("cache gap(before): %u [%u:%u]",
block,
le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_len(ex));
+ ext4_ext_get_actual_blocks(ex, inode->i_sb));
} else if (block >= le32_to_cpu(ex->ee_block)
- + ext4_ext_get_actual_len(ex)) {
+ + ext4_ext_get_actual_blocks(ex, inode->i_sb)) {
ext4_lblk_t next;
lblock = le32_to_cpu(ex->ee_block)
- + ext4_ext_get_actual_len(ex);
+ + ext4_ext_get_actual_blocks(ex, inode->i_sb);

next = ext4_ext_next_allocated_block(path);
ext_debug("cache gap(after): [%u:%u] %u",
le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_len(ex),
+ ext4_ext_get_actual_blocks(ex, inode->i_sb),
block);
BUG_ON(next == lblock);
len = next - lblock;
@@ -2207,7 +2214,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t from, ext4_lblk_t to)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- unsigned short ee_len = ext4_ext_get_actual_len(ex);
+ unsigned int ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
ext4_fsblk_t pblk;
int flags = EXT4_FREE_BLOCKS_FORGET;

@@ -2319,7 +2326,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ext4_lblk_t a, b, block;
unsigned num;
ext4_lblk_t ex_ee_block;
- unsigned short ex_ee_len;
+ unsigned int ex_ee_len;
unsigned uninitialized = 0;
struct ext4_extent *ex;
struct ext4_map_blocks map;
@@ -2337,7 +2344,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ex = EXT_LAST_EXTENT(eh);

ex_ee_block = le32_to_cpu(ex->ee_block);
- ex_ee_len = ext4_ext_get_actual_len(ex);
+ ex_ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);

trace_ext4_ext_rm_leaf(inode, start, ex_ee_block, ext4_ext_pblock(ex),
ex_ee_len, *partial_cluster);
@@ -2364,7 +2371,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
if (end <= ex_ee_block) {
ex--;
ex_ee_block = le32_to_cpu(ex->ee_block);
- ex_ee_len = ext4_ext_get_actual_len(ex);
+ ex_ee_len = ext4_ext_get_actual_blocks(ex,
+ inode->i_sb);
continue;
} else if (a != ex_ee_block &&
b != ex_ee_block + ex_ee_len - 1) {
@@ -2399,7 +2407,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
if (err < 0)
goto out;

- ex_ee_len = ext4_ext_get_actual_len(ex);
+ ex_ee_len = ext4_ext_get_actual_blocks(ex,
+ inode->i_sb);

b = ex_ee_block+ex_ee_len - 1 < end ?
ex_ee_block+ex_ee_len - 1 : end;
@@ -2485,7 +2494,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
}

ex->ee_block = cpu_to_le32(block);
- ex->ee_len = cpu_to_le16(num);
+ ex->ee_len = cpu_to_le16(EXT4_B2C(sbi, num));
/*
* Do not mark uninitialized if all the blocks in the
* extent have been removed.
@@ -2523,7 +2532,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ext4_ext_pblock(ex));
ex--;
ex_ee_block = le32_to_cpu(ex->ee_block);
- ex_ee_len = ext4_ext_get_actual_len(ex);
+ ex_ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
}

if (correct_index && eh->eh_entries)
@@ -2706,7 +2715,7 @@ again:
flags |= EXT4_FREE_BLOCKS_METADATA;

ext4_free_blocks(handle, inode, NULL,
- EXT4_C2B(EXT4_SB(sb), partial_cluster),
+ EXT4_INODE_C2B(inode, partial_cluster),
EXT4_SB(sb)->s_cluster_ratio, flags);
partial_cluster = 0;
}
@@ -2793,7 +2802,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
unsigned int ee_len;
int ret;

- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
ee_pblock = ext4_ext_pblock(ex);

ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
@@ -2854,7 +2863,7 @@ static int ext4_split_extent_at(handle_t *handle,
depth = ext_depth(inode);
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
newblock = split - ee_block + ext4_ext_pblock(ex);

BUG_ON(split < ee_block || split >= (ee_block + ee_len));
@@ -2883,7 +2892,7 @@ static int ext4_split_extent_at(handle_t *handle,

/* case a */
memcpy(&orig_ex, ex, sizeof(orig_ex));
- ex->ee_len = cpu_to_le16(split - ee_block);
+ ex->ee_len = cpu_to_le16(EXT4_INODE_B2C(inode, split - ee_block));
if (split_flag & EXT4_EXT_MARK_UNINIT1)
ext4_ext_mark_uninitialized(ex);

@@ -2897,7 +2906,8 @@ static int ext4_split_extent_at(handle_t *handle,

ex2 = &newex;
ex2->ee_block = cpu_to_le32(split);
- ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block));
+ ex2->ee_len = cpu_to_le16(
+ EXT4_INODE_B2C(inode, ee_len - (split - ee_block)));
ext4_ext_store_pblock(ex2, newblock);
if (split_flag & EXT4_EXT_MARK_UNINIT2)
ext4_ext_mark_uninitialized(ex2);
@@ -2908,7 +2918,7 @@ static int ext4_split_extent_at(handle_t *handle,
if (err)
goto fix_extent_len;
/* update the extent length and mark as initialized */
- ex->ee_len = cpu_to_le32(ee_len);
+ ex->ee_len = cpu_to_le32(EXT4_INODE_B2C(inode, ee_len));
ext4_ext_try_to_merge(inode, path, ex);
err = ext4_ext_dirty(handle, inode, path + depth);
goto out;
@@ -2953,7 +2963,7 @@ static int ext4_split_extent(handle_t *handle,
depth = ext_depth(inode);
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
uninitialized = ext4_ext_is_uninitialized(ex);

if (map->m_lblk + map->m_len < ee_block + ee_len) {
@@ -3028,7 +3038,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
depth = ext_depth(inode);
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
allocated = ee_len - (map->m_lblk - ee_block);

WARN_ON(map->m_lblk < ee_block);
@@ -3070,7 +3080,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
/* case 3 */
zero_ex.ee_block =
cpu_to_le32(map->m_lblk);
- zero_ex.ee_len = cpu_to_le16(allocated);
+ zero_ex.ee_len = cpu_to_le16(
+ EXT4_INODE_B2C(inode, allocated));
ext4_ext_store_pblock(&zero_ex,
ext4_ext_pblock(ex) + map->m_lblk - ee_block);
err = ext4_ext_zeroout(inode, &zero_ex);
@@ -3084,8 +3095,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
/* case 2 */
if (map->m_lblk != ee_block) {
zero_ex.ee_block = ex->ee_block;
- zero_ex.ee_len = cpu_to_le16(map->m_lblk -
- ee_block);
+ zero_ex.ee_len =
+ cpu_to_le16(EXT4_INODE_B2C(inode,
+ map->m_lblk - ee_block));
ext4_ext_store_pblock(&zero_ex,
ext4_ext_pblock(ex));
err = ext4_ext_zeroout(inode, &zero_ex);
@@ -3157,7 +3169,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
depth = ext_depth(inode);
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);

split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
split_flag |= EXT4_EXT_MARK_UNINIT2;
@@ -3180,7 +3192,7 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
"block %llu, max_blocks %u\n", inode->i_ino,
(unsigned long long)le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_len(ex));
+ ext4_ext_get_actual_blocks(ex, inode->i_sb));

err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
@@ -3242,7 +3254,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
* function immediately.
*/
if (lblk + len < le32_to_cpu(last_ex->ee_block) +
- ext4_ext_get_actual_len(last_ex))
+ ext4_ext_get_actual_blocks(last_ex, inode->i_sb))
return 0;
/*
* If the caller does appear to be planning to write at or
@@ -3645,7 +3657,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
ext4_lblk_t rr_cluster_start, rr_cluster_end;
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
- unsigned short ee_len = ext4_ext_get_actual_len(ex);
+ unsigned int ee_len = ext4_ext_get_actual_blocks(ex, sb);

/* The extent passed in that we are trying to match */
ex_cluster_start = EXT4_B2C(sbi, ee_block);
@@ -3761,7 +3773,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
- le32_to_cpu(newex.ee_block)
+ ext4_ext_pblock(&newex);
/* number of remaining blocks in the extent */
- allocated = ext4_ext_get_actual_len(&newex) -
+ allocated = ext4_ext_get_actual_blocks(&newex,
+ inode->i_sb) -
(map->m_lblk - le32_to_cpu(newex.ee_block));
goto out;
}
@@ -3796,13 +3809,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
ext4_fsblk_t partial_cluster = 0;
- unsigned short ee_len;
+ unsigned int ee_len;

/*
* Uninitialized extents are treated as holes, except that
* we split out initialized portions during a write.
*/
- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);

trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);

@@ -3880,7 +3893,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,

depth = ext_depth(inode);
ex = path[depth].p_ext;
- ee_len = ext4_ext_get_actual_len(ex);
+ ee_len = ext4_ext_get_actual_blocks(ex,
+ inode->i_sb);
ee_block = le32_to_cpu(ex->ee_block);
ee_start = ext4_ext_pblock(ex);

@@ -4064,13 +4078,14 @@ got_allocated_blocks:
* but otherwise we'd need to call it every free() */
ext4_discard_preallocations(inode);
ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
- ext4_ext_get_actual_len(&newex), fb_flags);
+ ext4_ext_get_actual_blocks(&newex, inode->i_sb),
+ fb_flags);
goto out2;
}

/* previous routine could use block we allocated */
newblock = ext4_ext_pblock(&newex);
- allocated = ext4_ext_get_actual_len(&newex);
+ allocated = ext4_ext_get_actual_blocks(&newex, inode->i_sb);
if (allocated > map->m_len)
allocated = map->m_len;
map->m_flags |= EXT4_MAP_NEW;
--
1.7.3.2


2011-11-18 10:43:18

by Robin Dong

[permalink] [raw]
Subject: [PATCH 2/9 v2 bigalloc] ext4: change ext4_ext_map_blocks to allocate clusters instead of blocks

From: Robin Dong <[email protected]>

We need to align to a cluster when users allocate just one block.

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/extents.c | 45 ++++++++++++---------------------------------
1 files changed, 12 insertions(+), 33 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 597ebcb..3430ddf 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3949,20 +3949,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* Okay, we need to do block allocation.
*/
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
- newex.ee_block = cpu_to_le32(map->m_lblk);
+ newex.ee_block = cpu_to_le32(map->m_lblk & ~(sbi->s_cluster_ratio-1));
cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);

- /*
- * If we are doing bigalloc, check to see if the extent returned
- * by ext4_ext_find_extent() implies a cluster we can use.
- */
- if (cluster_offset && ex &&
- get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
- ar.len = allocated = map->m_len;
- newblock = map->m_pblk;
- map->m_flags |= EXT4_MAP_FROM_CLUSTER;
- goto got_allocated_blocks;
- }
+ if (ex)
+ BUG_ON((le32_to_cpu(ex->ee_block) +
+ EXT4_C2B(sbi, ext4_ext_get_actual_len(ex))) >
+ (map->m_lblk & ~(sbi->s_cluster_ratio-1)));

/* find neighbour allocated blocks */
ar.lleft = map->m_lblk;
@@ -3975,16 +3968,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
if (err)
goto out2;

- /* Check if the extent after searching to the right implies a
- * cluster we can use. */
- if ((sbi->s_cluster_ratio > 1) && ex2 &&
- get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) {
- ar.len = allocated = map->m_len;
- newblock = map->m_pblk;
- map->m_flags |= EXT4_MAP_FROM_CLUSTER;
- goto got_allocated_blocks;
- }
-
/*
* See if request is beyond maximum number of blocks we can have in
* a single extent. For an initialized extent this limit is
@@ -3999,7 +3982,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
map->m_len = EXT_UNINIT_MAX_LEN;

/* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
- newex.ee_len = cpu_to_le16(map->m_len);
+ newex.ee_len = cpu_to_le16(EXT4_NUM_B2C(sbi, map->m_len));
err = ext4_ext_check_overlap(sbi, inode, &newex, path);
if (err)
allocated = ext4_ext_get_actual_len(&newex);
@@ -4036,14 +4019,11 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ar.goal, newblock, allocated);
free_on_err = 1;
allocated_clusters = ar.len;
- ar.len = EXT4_C2B(sbi, ar.len) - offset;
- if (ar.len > allocated)
- ar.len = allocated;

got_allocated_blocks:
/* try to insert new extent into found leaf and return */
- ext4_ext_store_pblock(&newex, newblock + offset);
- newex.ee_len = cpu_to_le16(ar.len);
+ ext4_ext_store_pblock(&newex, newblock);
+ newex.ee_len = cpu_to_le16(allocated_clusters);
/* Mark uninitialized */
if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
ext4_ext_mark_uninitialized(&newex);
@@ -4066,7 +4046,8 @@ got_allocated_blocks:
map->m_flags |= EXT4_MAP_UNINIT;
}

- err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len);
+ err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
+ EXT4_C2B(sbi, allocated_clusters));
if (!err)
err = ext4_ext_insert_extent(handle, inode, path,
&newex, flags);
@@ -4086,8 +4067,6 @@ got_allocated_blocks:
/* previous routine could use block we allocated */
newblock = ext4_ext_pblock(&newex);
allocated = ext4_ext_get_actual_blocks(&newex, inode->i_sb);
- if (allocated > map->m_len)
- allocated = map->m_len;
map->m_flags |= EXT4_MAP_NEW;

/*
@@ -4174,7 +4153,7 @@ got_allocated_blocks:
* when it is _not_ an uninitialized extent.
*/
if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
- ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock);
+ ext4_ext_put_in_cache(inode, ar.logical, allocated, newblock);
ext4_update_inode_fsync_trans(handle, inode, 1);
} else
ext4_update_inode_fsync_trans(handle, inode, 0);
@@ -4183,7 +4162,7 @@ out:
allocated = map->m_len;
ext4_ext_show_leaf(inode, path);
map->m_flags |= EXT4_MAP_MAPPED;
- map->m_pblk = newblock;
+ map->m_pblk = newblock + offset;
map->m_len = allocated;
out2:
if (path) {
--
1.7.3.2


2011-11-18 10:43:25

by Robin Dong

[permalink] [raw]
Subject: [PATCH 5/9 v2 bigalloc] ext4: zero out extra pages when users write one page

From: Robin Dong <[email protected]>

When users write one page which in the middle of a cluster, we need to zero the
anthor pages around it.

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/ext4.h | 18 ++++
fs/ext4/inode.c | 295 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 297 insertions(+), 16 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1dea3e8..90ae8a2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -675,6 +675,15 @@ struct move_extent {
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)

+#define EXT4_MAX_CLUSTERSIZE 1048576
+#define EXT4_MAX_CTXT_PAGES (EXT4_MAX_CLUSTERSIZE / PAGE_CACHE_SIZE)
+
+/* tracking cluster write pages */
+struct ext4_write_cluster_ctxt {
+ unsigned long w_num_pages;
+ struct page *w_pages[EXT4_MAX_CTXT_PAGES];
+};
+
/*
* Extended fields will fit into an inode if the filesystem was formatted
* with large inodes (-I 256 or larger) and there are not currently any EAs
@@ -1849,6 +1858,15 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);

/* inode.c */
+int walk_page_buffers(handle_t *handle, struct buffer_head *head,
+ unsigned from, unsigned to, int *partial,
+ int (*fn)(handle_t *handle, struct buffer_head *bh));
+int do_journal_get_write_access(handle_t *handle, struct buffer_head *bh);
+struct ext4_write_cluster_ctxt *ext4_alloc_write_cluster_ctxt(void);
+void ext4_free_write_cluster_ctxt(struct ext4_write_cluster_ctxt *ewcc);
+int ext4_zero_cluster_page(struct inode *inode, int index,
+ struct ext4_write_cluster_ctxt *ewcc, unsigned flags);
+
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
ext4_lblk_t, int, int *);
struct buffer_head *ext4_bread(handle_t *, struct inode *,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9b83c3c..f1c332d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -38,6 +38,7 @@
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
+#include <linux/swap.h>

#include "ext4_jbd2.h"
#include "xattr.h"
@@ -49,6 +50,31 @@

#define MPAGE_DA_EXTENT_TAIL 0x01

+static void ext4_write_cluster_add_page(struct ext4_write_cluster_ctxt *ewcc,
+ struct page *page)
+{
+ ewcc->w_pages[ewcc->w_num_pages] = page;
+ ewcc->w_num_pages++;
+}
+
+struct ext4_write_cluster_ctxt *ext4_alloc_write_cluster_ctxt(void)
+{
+ return kzalloc(sizeof(struct ext4_write_cluster_ctxt), GFP_NOFS);
+}
+
+void ext4_free_write_cluster_ctxt(struct ext4_write_cluster_ctxt *ewcc)
+{
+ int i;
+ for (i = 0; i < ewcc->w_num_pages; i++) {
+ if (ewcc->w_pages[i]) {
+ unlock_page(ewcc->w_pages[i]);
+ mark_page_accessed(ewcc->w_pages[i]);
+ page_cache_release(ewcc->w_pages[i]);
+ }
+ }
+ kfree(ewcc);
+}
+
static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
@@ -656,7 +682,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
return NULL;
}

-static int walk_page_buffers(handle_t *handle,
+int walk_page_buffers(handle_t *handle,
struct buffer_head *head,
unsigned from,
unsigned to,
@@ -712,7 +738,7 @@ static int walk_page_buffers(handle_t *handle,
* is elevated. We'll still have enough credits for the tiny quotafile
* write.
*/
-static int do_journal_get_write_access(handle_t *handle,
+int do_journal_get_write_access(handle_t *handle,
struct buffer_head *bh)
{
int dirty = buffer_dirty(bh);
@@ -738,15 +764,176 @@ static int do_journal_get_write_access(handle_t *handle,

static int ext4_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
+
+int ext4_cluster_write_begin(struct page *page, loff_t pos, unsigned len,
+ get_block_t *get_block)
+{
+ unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned to = from + len;
+ struct inode *inode = page->mapping->host;
+ unsigned block_start, block_end;
+ sector_t block;
+ int err = 0;
+ unsigned blocksize, bbits;
+ struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
+
+ BUG_ON(!PageLocked(page));
+ BUG_ON(from > PAGE_CACHE_SIZE);
+ BUG_ON(to > PAGE_CACHE_SIZE);
+ BUG_ON(from > to);
+
+ blocksize = 1 << inode->i_blkbits;
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, blocksize, 0);
+ head = page_buffers(page);
+
+ bbits = inode->i_blkbits;
+ block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+
+ for (bh = head, block_start = 0; bh != head || !block_start;
+ block++, block_start = block_end, bh = bh->b_this_page) {
+ block_end = block_start + blocksize;
+ if (block_end <= from || block_start >= to) {
+ if (PageUptodate(page)) {
+ if (!buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+ }
+ continue;
+ }
+ if (buffer_new(bh))
+ clear_buffer_new(bh);
+ if (!buffer_mapped(bh)) {
+ WARN_ON(bh->b_size != blocksize);
+ err = get_block(inode, block, bh, 1);
+ if (err)
+ break;
+ unmap_underlying_metadata(bh->b_bdev,
+ bh->b_blocknr);
+ if (PageUptodate(page)) {
+ clear_buffer_new(bh);
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ continue;
+ }
+ if (block_end > to || block_start < from)
+ zero_user_segments(page,
+ to, block_end,
+ block_start, from);
+ continue;
+ }
+ if (PageUptodate(page)) {
+ if (!buffer_uptodate(bh))
+ set_buffer_uptodate(bh);
+ continue;
+ }
+ if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
+ !buffer_unwritten(bh) &&
+ (block_start < from || block_end > to)) {
+ ll_rw_block(READ, 1, &bh);
+ *wait_bh++ = bh;
+ }
+ }
+ /*
+ * If we issued read requests - let them complete.
+ */
+ while (wait_bh > wait) {
+ wait_on_buffer(*--wait_bh);
+ if (!buffer_uptodate(*wait_bh))
+ err = -EIO;
+ }
+ if (unlikely(err))
+ page_zero_new_buffers(page, from, to);
+ return err;
+}
+
+int ext4_zero_cluster_page(struct inode *inode, int index,
+ struct ext4_write_cluster_ctxt *ewcc, unsigned flags)
+{
+ int ret = 0;
+ struct page *page;
+
+ page = grab_cache_page_write_begin(inode->i_mapping, index, flags);
+ if (!page) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ext4_write_cluster_add_page(ewcc, page);
+
+ /* if page is already uptodate and has buffers, don't get_block again
+ */
+ if (PageUptodate(page) && PagePrivate(page))
+ goto out;
+
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
+ if (ext4_should_dioread_nolock(inode))
+ ret = ext4_cluster_write_begin(page, index << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE, ext4_get_block_write);
+ else
+ ret = ext4_cluster_write_begin(page, index << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE, ext4_get_block);
+
+out:
+ return ret;
+}
+
+int ext4_prepare_cluster_left_pages(struct inode *inode, int index,
+ struct ext4_write_cluster_ctxt *ewcc, unsigned flags)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int ret = 0;
+ int block;
+ sector_t left_offset = index & (sbi->s_cluster_ratio - 1);
+ sector_t begin;
+
+ if (left_offset) {
+ begin = index - left_offset;
+ for (block = begin; block < index; block++) {
+ ret = ext4_zero_cluster_page(inode, block, ewcc, flags);
+ if (ret)
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+int ext4_prepare_cluster_right_pages(struct inode *inode, int index,
+ struct ext4_write_cluster_ctxt *ewcc, unsigned flags)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int ret = 0;
+ int block;
+ sector_t left_offset = index & (sbi->s_cluster_ratio - 1);
+ sector_t right_offset = sbi->s_cluster_ratio - left_offset - 1;
+ sector_t begin;
+
+ if (right_offset) {
+ begin = index + 1;
+ for (block = begin; block < index + right_offset + 1; block++) {
+ ret = ext4_zero_cluster_page(inode, block, ewcc, flags);
+ if (ret)
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
static int ext4_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
struct inode *inode = mapping->host;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int ret, needed_blocks;
handle_t *handle;
- int retries = 0;
- struct page *page;
+ int retries = 0, uninit = 0;
+ struct page *page = NULL;
+ struct ext4_write_cluster_ctxt *ewcc;
pgoff_t index;
unsigned from, to;

@@ -761,6 +948,12 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
to = from + len;

retry:
+ ewcc = ext4_alloc_write_cluster_ctxt();
+ if (!ewcc) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
handle = ext4_journal_start(inode, needed_blocks);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
@@ -771,27 +964,78 @@ retry:
* started */
flags |= AOP_FLAG_NOFS;

+ if (sbi->s_cluster_ratio > 1) {
+ /* We need to know whether the block is allocated already
+ */
+ struct ext4_map_blocks map;
+ map.m_lblk = index;
+ map.m_len = 1;
+ ret = ext4_map_blocks(handle, inode, &map, 0);
+ uninit = map.m_flags & EXT4_MAP_UNWRITTEN;
+ if (ret <= 0 || uninit) {
+ ret = ext4_prepare_cluster_left_pages(inode, index,
+ ewcc, flags);
+ if (ret)
+ goto err_out;
+ }
+ }
+
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
- ext4_journal_stop(handle);
ret = -ENOMEM;
- goto out;
+ goto err_out;
}
+
*pagep = page;

- if (ext4_should_dioread_nolock(inode))
- ret = __block_write_begin(page, pos, len, ext4_get_block_write);
- else
- ret = __block_write_begin(page, pos, len, ext4_get_block);
+ ext4_write_cluster_add_page(ewcc, page);
+
+ /* if the block is already allocated by cluster, we should use
+ * ext4_cluster_write_begin (it will not read buffer again)
+ */
+ if (sbi->s_cluster_ratio > 1 && (pos >> inode->i_blkbits) >
+ ((inode->i_size + inode->i_sb->s_blocksize - 1) >>
+ inode->i_blkbits) - 1) {
+ if (ext4_should_dioread_nolock(inode))
+ ret = ext4_cluster_write_begin(page, pos, len,
+ ext4_get_block_write);
+ else
+ ret = ext4_cluster_write_begin(page, pos, len,
+ ext4_get_block);
+ } else {
+ if (ext4_should_dioread_nolock(inode))
+ ret = __block_write_begin(page, pos, len,
+ ext4_get_block_write);
+ else
+ ret = __block_write_begin(page, pos, len,
+ ext4_get_block);
+ }
+
+ if (sbi->s_cluster_ratio > 1 && uninit) {
+ ret = ext4_prepare_cluster_right_pages(inode, index,
+ ewcc, flags);
+ if (ret)
+ goto err_out;
+ }

if (!ret && ext4_should_journal_data(inode)) {
- ret = walk_page_buffers(handle, page_buffers(page),
+ int i;
+ unsigned long from, to;
+ for (i = 0; i < ewcc->w_num_pages; i++) {
+ page = ewcc->w_pages[i];
+ if (!page || !page_buffers(page))
+ continue;
+ from = page->index << PAGE_CACHE_SHIFT;
+ to = from + PAGE_CACHE_SIZE;
+ ret = walk_page_buffers(handle, page_buffers(page),
from, to, NULL, do_journal_get_write_access);
+ if (ret)
+ break;
+ }
}

if (ret) {
- unlock_page(page);
- page_cache_release(page);
+ ext4_free_write_cluster_ctxt(ewcc);
/*
* __block_write_begin may have instantiated a few blocks
* outside i_size. Trim these off again. Don't need
@@ -819,8 +1063,15 @@ retry:

if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
+
+ *fsdata = ewcc;
out:
return ret;
+
+err_out:
+ ext4_free_write_cluster_ctxt(ewcc);
+ ext4_journal_stop(handle);
+ return ret;
}

/* For write_end() in data=journal mode */
@@ -837,11 +1088,24 @@ static int ext4_generic_write_end(struct file *file,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
- int i_size_changed = 0;
+ int i_size_changed = 0, i;
struct inode *inode = mapping->host;
+ struct ext4_write_cluster_ctxt *ewcc = fsdata;
handle_t *handle = ext4_journal_current_handle();

copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+ for (i = 0; i < ewcc->w_num_pages; i++) {
+ unsigned long pos;
+ struct page *cluster_page;
+ cluster_page = ewcc->w_pages[i];
+ if (!cluster_page)
+ break;
+ if (cluster_page == page)
+ continue;
+ pos = cluster_page->index << PAGE_CACHE_SHIFT;
+ block_write_end(file, mapping, pos, PAGE_CACHE_SIZE,
+ PAGE_CACHE_SIZE, cluster_page, fsdata);
+ }

/*
* No need to use i_size_read() here, the i_size
@@ -863,8 +1127,7 @@ static int ext4_generic_write_end(struct file *file,
ext4_update_i_disksize(inode, (pos + copied));
i_size_changed = 1;
}
- unlock_page(page);
- page_cache_release(page);
+ ext4_free_write_cluster_ctxt(ewcc);

/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
--
1.7.3.2


2011-11-18 10:43:23

by Robin Dong

[permalink] [raw]
Subject: [PATCH 4/9 v2 bigalloc] ext4: remove unused functions and tags

From: Robin Dong <[email protected]>

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/extents.c | 109 -----------------------------------------------------
1 files changed, 0 insertions(+), 109 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4f764ee..ccf12a0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3662,111 +3662,6 @@ out2:
}

/*
- * get_implied_cluster_alloc - check to see if the requested
- * allocation (in the map structure) overlaps with a cluster already
- * allocated in an extent.
- * @sb The filesystem superblock structure
- * @map The requested lblk->pblk mapping
- * @ex The extent structure which might contain an implied
- * cluster allocation
- *
- * This function is called by ext4_ext_map_blocks() after we failed to
- * find blocks that were already in the inode's extent tree. Hence,
- * we know that the beginning of the requested region cannot overlap
- * the extent from the inode's extent tree. There are three cases we
- * want to catch. The first is this case:
- *
- * |--- cluster # N--|
- * |--- extent ---| |---- requested region ---|
- * |==========|
- *
- * The second case that we need to test for is this one:
- *
- * |--------- cluster # N ----------------|
- * |--- requested region --| |------- extent ----|
- * |=======================|
- *
- * The third case is when the requested region lies between two extents
- * within the same cluster:
- * |------------- cluster # N-------------|
- * |----- ex -----| |---- ex_right ----|
- * |------ requested region ------|
- * |================|
- *
- * In each of the above cases, we need to set the map->m_pblk and
- * map->m_len so it corresponds to the return the extent labelled as
- * "|====|" from cluster #N, since it is already in use for data in
- * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to
- * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
- * as a new "allocated" block region. Otherwise, we will return 0 and
- * ext4_ext_map_blocks() will then allocate one or more new clusters
- * by calling ext4_mb_new_blocks().
- */
-static int get_implied_cluster_alloc(struct super_block *sb,
- struct ext4_map_blocks *map,
- struct ext4_extent *ex,
- struct ext4_ext_path *path)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
- ext4_lblk_t ex_cluster_start, ex_cluster_end;
- ext4_lblk_t rr_cluster_start, rr_cluster_end;
- ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
- ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
- unsigned int ee_len = ext4_ext_get_actual_blocks(ex, sb);
-
- /* The extent passed in that we are trying to match */
- ex_cluster_start = EXT4_B2C(sbi, ee_block);
- ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
-
- /* The requested region passed into ext4_map_blocks() */
- rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
- rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1);
-
- if ((rr_cluster_start == ex_cluster_end) ||
- (rr_cluster_start == ex_cluster_start)) {
- if (rr_cluster_start == ex_cluster_end)
- ee_start += ee_len - 1;
- map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
- c_offset;
- map->m_len = min(map->m_len,
- (unsigned) sbi->s_cluster_ratio - c_offset);
- /*
- * Check for and handle this case:
- *
- * |--------- cluster # N-------------|
- * |------- extent ----|
- * |--- requested region ---|
- * |===========|
- */
-
- if (map->m_lblk < ee_block)
- map->m_len = min(map->m_len, ee_block - map->m_lblk);
-
- /*
- * Check for the case where there is already another allocated
- * block to the right of 'ex' but before the end of the cluster.
- *
- * |------------- cluster # N-------------|
- * |----- ex -----| |---- ex_right ----|
- * |------ requested region ------|
- * |================|
- */
- if (map->m_lblk > ee_block) {
- ext4_lblk_t next = 0;//ext4_ext_next_allocated_block(path);
- map->m_len = min(map->m_len, next - map->m_lblk);
- }
-
- trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
- return 1;
- }
-
- trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
- return 0;
-}
-
-
-/*
* Block allocation/map/preallocation routine for extents based files
*
*
@@ -3798,7 +3693,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
unsigned int result = 0;
struct ext4_allocation_request ar;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
- ext4_lblk_t cluster_offset;
struct ext4_map_blocks punch_map;

ext_debug("blocks %u/%u requested for inode %lu\n",
@@ -4010,7 +3904,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
newex.ee_block = EXT4_B2C(sbi,
cpu_to_le32(map->m_lblk & ~(sbi->s_cluster_ratio-1)));
- cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);

if (ex)
BUG_ON((EXT4_C2B(sbi, le32_to_cpu(ex->ee_block)) +
@@ -4072,7 +3965,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ar.flags = 0;
if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
ar.flags |= EXT4_MB_HINT_NOPREALLOC;
- printk(KERN_ERR "ar: %lu, %lu, %lu\n", ar.len, ar.goal, ar.logical);
newblock = ext4_mb_new_blocks(handle, &ar, &err);
if (!newblock)
goto out2;
@@ -4081,7 +3973,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
free_on_err = 1;
allocated_clusters = ar.len;

-got_allocated_blocks:
/* try to insert new extent into found leaf and return */
ext4_ext_store_pblock(&newex, newblock);
newex.ee_len = cpu_to_le16(allocated_clusters);
--
1.7.3.2


2011-11-18 10:43:28

by Robin Dong

[permalink] [raw]
Subject: [PATCH 6/9 v2 bigalloc] ext4: zero out extra page when users truncate a file

From: Robin Dong <[email protected]>

When truncate file to be larger, we need to zero out the pages which beyond
the old i_size.

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/ext4.h | 4 +-
fs/ext4/extents.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++-
fs/ext4/inode.c | 13 ++++----
fs/ext4/ioctl.c | 2 +-
fs/ext4/super.c | 2 +-
fs/ext4/truncate.h | 2 +-
6 files changed, 89 insertions(+), 12 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 90ae8a2..7d226af 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1886,7 +1886,7 @@ extern void ext4_dirty_inode(struct inode *, int);
extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
extern int ext4_can_truncate(struct inode *inode);
-extern void ext4_truncate(struct inode *);
+extern void ext4_truncate(struct inode *, loff_t oldsize);
extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *);
@@ -2267,7 +2267,7 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
int chunk);
extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags);
-extern void ext4_ext_truncate(struct inode *);
+extern void ext4_ext_truncate(struct inode *, loff_t oldsize);
extern int ext4_ext_punch_hole(struct file *file, loff_t offset,
loff_t length);
extern void ext4_ext_init(struct super_block *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ccf12a0..7799973 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4131,10 +4131,76 @@ out2:
return err ? err : result;
}

-void ext4_ext_truncate(struct inode *inode)
+int ext4_ext_truncate_zero_pages(handle_t *handle, struct inode *inode,
+ loff_t old_size)
+{
+ struct super_block *sb = inode->i_sb;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct ext4_write_cluster_ctxt *ewcc = NULL;
+ struct page *page;
+ ext4_lblk_t last_block = ((old_size + sb->s_blocksize - 1)
+ >> EXT4_BLOCK_SIZE_BITS(sb)) - 1;
+ ext4_lblk_t left_offset = last_block & (sbi->s_cluster_ratio - 1);
+ ext4_lblk_t right_offset = sbi->s_cluster_ratio - left_offset - 1;
+ ext4_lblk_t begin, index;
+ unsigned long i;
+ int ret = 0;
+ unsigned from, to;
+
+ if (sbi->s_cluster_ratio <= 1)
+ goto out;
+
+ if (right_offset) {
+ struct ext4_map_blocks map;
+ map.m_lblk = last_block;
+ map.m_len = 1;
+ if (ext4_map_blocks(handle, inode, &map, 0) <= 0
+ || map.m_flags & EXT4_MAP_UNWRITTEN)
+ goto out;
+
+ ewcc = ext4_alloc_write_cluster_ctxt();
+ if (!ewcc) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ begin = last_block + 1;
+ for (index = begin; index < last_block + right_offset + 1;
+ index++) {
+ ret = ext4_zero_cluster_page(inode, index, ewcc,
+ mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+ if (ret)
+ goto out;
+ }
+
+ if (ext4_should_journal_data(inode)) {
+ for (i = 0; i < ewcc->w_num_pages; i++) {
+ page = ewcc->w_pages[i];
+ if (!page || !page_buffers(page))
+ continue;
+ from = page->index << PAGE_CACHE_SHIFT;
+ to = from + PAGE_CACHE_SIZE;
+ ret = walk_page_buffers(handle,
+ page_buffers(page), from, to, NULL,
+ do_journal_get_write_access);
+ if (ret)
+ goto out;
+ }
+ }
+ }
+
+out:
+ if (ewcc)
+ ext4_free_write_cluster_ctxt(ewcc);
+
+ return ret;
+}
+
+void ext4_ext_truncate(struct inode *inode, loff_t old_size)
{
struct address_space *mapping = inode->i_mapping;
struct super_block *sb = inode->i_sb;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_lblk_t last_block;
handle_t *handle;
int err = 0;
@@ -4176,6 +4242,13 @@ void ext4_ext_truncate(struct inode *inode)

last_block = (inode->i_size + sb->s_blocksize - 1)
>> EXT4_BLOCK_SIZE_BITS(sb);
+
+ if (sbi->s_cluster_ratio > 1 &&
+ (last_block & (sbi->s_cluster_ratio - 1))) {
+ last_block = (last_block & ~(sbi->s_cluster_ratio - 1)) +
+ sbi->s_cluster_ratio;
+ }
+
err = ext4_ext_remove_space(inode, last_block);

/* In a multi-transaction truncate, we only make the final
@@ -4186,6 +4259,9 @@ void ext4_ext_truncate(struct inode *inode)

up_write(&EXT4_I(inode)->i_data_sem);

+ if (ext4_ext_truncate_zero_pages(handle, inode, old_size))
+ goto out_stop;
+
out_stop:
/*
* If this was a simple ftruncate() and the file will remain alive,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f1c332d..22b28bd 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -213,7 +213,7 @@ void ext4_evict_inode(struct inode *inode)
goto stop_handle;
}
if (inode->i_blocks)
- ext4_truncate(inode);
+ ext4_truncate(inode, 0);

/*
* ext4_ext_truncate() doesn't reserve any slop when it
@@ -3440,7 +3440,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
* that's fine - as long as they are linked from the inode, the post-crash
* ext4_truncate() run will find them and release them.
*/
-void ext4_truncate(struct inode *inode)
+void ext4_truncate(struct inode *inode, loff_t old_size)
{
trace_ext4_truncate_enter(inode);

@@ -3453,7 +3453,7 @@ void ext4_truncate(struct inode *inode)
ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);

if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- ext4_ext_truncate(inode);
+ ext4_ext_truncate(inode, old_size);
else
ext4_ind_truncate(inode);

@@ -4220,11 +4220,12 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
}

if (attr->ia_valid & ATTR_SIZE) {
- if (attr->ia_size != i_size_read(inode)) {
+ loff_t old_size = i_size_read(inode);
+ if (attr->ia_size != old_size) {
truncate_setsize(inode, attr->ia_size);
- ext4_truncate(inode);
+ ext4_truncate(inode, old_size);
} else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
- ext4_truncate(inode);
+ ext4_truncate(inode, 0);
}

if (!rc) {
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 4a5081a..6eb2f4f 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -100,7 +100,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
goto flags_out;
}
} else if (oldflags & EXT4_EOFBLOCKS_FL)
- ext4_truncate(inode);
+ ext4_truncate(inode, 0);

handle = ext4_journal_start(inode, 1);
if (IS_ERR(handle)) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2cf4ae0..beea7a1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2229,7 +2229,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
__func__, inode->i_ino, inode->i_size);
jbd_debug(2, "truncating inode %lu to %lld bytes\n",
inode->i_ino, inode->i_size);
- ext4_truncate(inode);
+ ext4_truncate(inode, 0);
nr_truncates++;
} else {
ext4_msg(sb, KERN_DEBUG,
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
index 011ba66..2be0783 100644
--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
@@ -11,7 +11,7 @@
static inline void ext4_truncate_failed_write(struct inode *inode)
{
truncate_inode_pages(inode->i_mapping, inode->i_size);
- ext4_truncate(inode);
+ ext4_truncate(inode, 0);
}

/*
--
1.7.3.2


2011-11-18 10:43:21

by Robin Dong

[permalink] [raw]
Subject: [PATCH 3/9 v2 bigalloc] ext4: change unit of extent's ee_block to cluster

From: Robin Dong <[email protected]>

Change the unit of ee_block (of extent) from block to cluster

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/extents.c | 286 ++++++++++++++++++++++++++++++++---------------------
1 files changed, 174 insertions(+), 112 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3430ddf..4f764ee 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -140,7 +140,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
ex = path[depth].p_ext;
if (ex) {
ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
- ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
+ ext4_lblk_t ext_block = EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block));

if (block > ext_block)
return ext_pblk + (block - ext_block);
@@ -168,7 +169,8 @@ ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
{
ext4_fsblk_t goal, newblock;

- goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
+ goal = ext4_ext_find_goal(inode, path,
+ EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)));
newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
NULL, err);
return newblock;
@@ -411,11 +413,13 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
ext_debug("path:");
for (k = 0; k <= l; k++, path++) {
if (path->p_idx) {
- ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block),
- ext4_idx_pblock(path->p_idx));
+ ext_debug(" %d->%llu", EXT4_INODE_C2B(inode,
+ le32_to_cpu(path->p_idx->ei_block)),
+ ext4_idx_pblock(path->p_idx));
} else if (path->p_ext) {
ext_debug(" %d:[%d]%d:%llu ",
- le32_to_cpu(path->p_ext->ee_block),
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(path->p_ext->ee_block)),
ext4_ext_is_uninitialized(path->p_ext),
ext4_ext_get_actual_blocks(path->p_ext,
inode->i_sb),
@@ -442,7 +446,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino);

for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
- ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
+ ext_debug("%d:[%d]%d:%llu ",
+ EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)),
ext4_ext_is_uninitialized(ex),
ext4_ext_get_actual_blocks(ex, inode->i_sb),
ext4_ext_pblock(ex));
@@ -461,7 +466,8 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
idx = path[level].p_idx;
while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
ext_debug("%d: move %d:%llu in new index %llu\n", level,
- le32_to_cpu(idx->ei_block),
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(idx->ei_block)),
ext4_idx_pblock(idx),
newblock);
idx++;
@@ -473,7 +479,8 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
ex = path[depth].p_ext;
while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
- le32_to_cpu(ex->ee_block),
+ EXT4_INODE_C2B(ionde,
+ le32_to_cpu(ex->ee_block)),
ext4_ext_pblock(ex),
ext4_ext_is_uninitialized(ex),
ext4_ext_get_actual_blocks(ex, inode->i_sb),
@@ -519,17 +526,19 @@ ext4_ext_binsearch_idx(struct inode *inode,
r = EXT_LAST_INDEX(eh);
while (l <= r) {
m = l + (r - l) / 2;
- if (block < le32_to_cpu(m->ei_block))
+ if (block < EXT4_INODE_C2B(inode, le32_to_cpu(m->ei_block)))
r = m - 1;
else
l = m + 1;
- ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block),
- m, le32_to_cpu(m->ei_block),
- r, le32_to_cpu(r->ei_block));
+ ext_debug("%p(%u):%p(%u):%p(%u) ",
+ l, EXT4_INODE_C2B(inode, le32_to_cpu(l->ei_block)),
+ m, EXT4_INODE_C2B(inode, le32_to_cpu(m->ei_block)),
+ r, EXT4_INODE_C2B(inode, le32_to_cpu(r->ei_block)));
}

path->p_idx = l - 1;
- ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
+ ext_debug(" -> %d->%lld ",
+ EXT4_INODE_C2B(inode, le32_to_cpu(path->p_idx->ei_block)),
ext4_idx_pblock(path->p_idx));

#ifdef CHECK_BINSEARCH
@@ -545,12 +554,14 @@ ext4_ext_binsearch_idx(struct inode *inode,
"first=0x%p\n", k,
ix, EXT_FIRST_INDEX(eh));
printk(KERN_DEBUG "%u <= %u\n",
- le32_to_cpu(ix->ei_block),
- le32_to_cpu(ix[-1].ei_block));
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(ix->ei_block)),
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(ix[-1].ei_block)));
}
BUG_ON(k && le32_to_cpu(ix->ei_block)
<= le32_to_cpu(ix[-1].ei_block));
- if (block < le32_to_cpu(ix->ei_block))
+ if (block < EXT4_INODE_C2B(le32_to_cpu(ix->ei_block)))
break;
chix = ix;
}
@@ -587,21 +598,22 @@ ext4_ext_binsearch(struct inode *inode,

while (l <= r) {
m = l + (r - l) / 2;
- if (block < le32_to_cpu(m->ee_block))
+ if (block < EXT4_INODE_C2B(inode, le32_to_cpu(m->ee_block)))
r = m - 1;
else
l = m + 1;
- ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block),
- m, le32_to_cpu(m->ee_block),
- r, le32_to_cpu(r->ee_block));
+ ext_debug("%p(%u):%p(%u):%p(%u) ",
+ l, EXT4_INODE_C2B(inode, le32_to_cpu(l->ee_block)),
+ m, EXT4_INODE_C2B(inode, le32_to_cpu(m->ee_block)),
+ r, EXT4_INODE_C2B(inode, le32_to_cpu(r->ee_block)));
}

path->p_ext = l - 1;
ext_debug(" -> %d:%llu:[%d]%d ",
- le32_to_cpu(path->p_ext->ee_block),
- ext4_ext_pblock(path->p_ext),
- ext4_ext_is_uninitialized(path->p_ext),
- ext4_ext_get_actual_blocks(path->p_ext, inode->i_sb));
+ EXT4_INODE_C2B(inode, le32_to_cpu(path->p_ext->ee_block)),
+ ext4_ext_pblock(path->p_ext),
+ ext4_ext_is_uninitialized(path->p_ext),
+ ext4_ext_get_actual_blocks(path->p_ext, inode->i_sb));

#ifdef CHECK_BINSEARCH
{
@@ -612,7 +624,8 @@ ext4_ext_binsearch(struct inode *inode,
for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
BUG_ON(k && le32_to_cpu(ex->ee_block)
<= le32_to_cpu(ex[-1].ee_block));
- if (block < le32_to_cpu(ex->ee_block))
+ if (block < EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block)))
break;
chex = ex;
}
@@ -737,10 +750,13 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
if (err)
return err;

+ /* variable "logical" is in unit of cluster */
if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
EXT4_ERROR_INODE(inode,
"logical %d == ei_block %d!",
- logical, le32_to_cpu(curp->p_idx->ei_block));
+ logical,
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(curp->p_idx->ei_block)));
return -EIO;
}

@@ -971,8 +987,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
EXT_LAST_INDEX(path[i].p_hdr))) {
EXT4_ERROR_INODE(inode,
- "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
- le32_to_cpu(path[i].p_ext->ee_block));
+ "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(path[i].p_ext->ee_block)));
err = -EIO;
goto cleanup;
}
@@ -1112,7 +1129,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
neh = ext_inode_hdr(inode);
ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
- le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block)),
ext4_idx_pblock(EXT_FIRST_INDEX(neh)));

neh->eh_depth = cpu_to_le16(path->p_depth + 1);
@@ -1158,7 +1176,8 @@ repeat:
/* refill path */
ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode,
- (ext4_lblk_t)le32_to_cpu(newext->ee_block),
+ (ext4_lblk_t)EXT4_INODE_C2B(inode,
+ le32_to_cpu(newext->ee_block)),
path);
if (IS_ERR(path))
err = PTR_ERR(path);
@@ -1172,7 +1191,8 @@ repeat:
/* refill path */
ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode,
- (ext4_lblk_t)le32_to_cpu(newext->ee_block),
+ (ext4_lblk_t)EXT4_INODE_C2B(inode,
+ le32_to_cpu(newext->ee_block)),
path);
if (IS_ERR(path)) {
err = PTR_ERR(path);
@@ -1225,11 +1245,13 @@ static int ext4_ext_search_left(struct inode *inode,

ex = path[depth].p_ext;
ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
- if (*logical < le32_to_cpu(ex->ee_block)) {
+ if (*logical < EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))) {
if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
EXT4_ERROR_INODE(inode,
"EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
- *logical, le32_to_cpu(ex->ee_block));
+ *logical,
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block)));
return -EIO;
}
while (--depth >= 0) {
@@ -1237,9 +1259,11 @@ static int ext4_ext_search_left(struct inode *inode,
if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
EXT4_ERROR_INODE(inode,
"ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
- ix != NULL ? ix->ei_block : 0,
+ ix != NULL ? EXT4_INODE_C2B(
+ inode, ix->ei_block) : 0,
EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
- EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0,
+ EXT4_INODE_C2B(inode, EXT_FIRST_INDEX(
+ path[depth].p_hdr)->ei_block) : 0,
depth);
return -EIO;
}
@@ -1247,14 +1271,19 @@ static int ext4_ext_search_left(struct inode *inode,
return 0;
}

- if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
+ if (unlikely(*logical <
+ (EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)) + ee_len))) {
EXT4_ERROR_INODE(inode,
"logical %d < ee_block %d + ee_len %d!",
- *logical, le32_to_cpu(ex->ee_block), ee_len);
+ *logical,
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block)),
+ ee_len);
return -EIO;
}

- *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
+ *logical = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ + ee_len - 1;
*phys = ext4_ext_pblock(ex) + ee_len - 1;
return 0;
}
@@ -1295,7 +1324,7 @@ static int ext4_ext_search_right(struct inode *inode,

ex = path[depth].p_ext;
ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
- if (*logical < le32_to_cpu(ex->ee_block)) {
+ if (*logical < EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))) {
if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
EXT4_ERROR_INODE(inode,
"first_extent(path[%d].p_hdr) != ex",
@@ -1314,10 +1343,14 @@ static int ext4_ext_search_right(struct inode *inode,
goto found_extent;
}

- if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
+ if (unlikely(*logical <
+ (EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)) + ee_len))) {
EXT4_ERROR_INODE(inode,
"logical %d < ee_block %d + ee_len %d!",
- *logical, le32_to_cpu(ex->ee_block), ee_len);
+ *logical,
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block)),
+ ee_len);
return -EIO;
}

@@ -1368,7 +1401,7 @@ got_index:
}
ex = EXT_FIRST_EXTENT(eh);
found_extent:
- *logical = le32_to_cpu(ex->ee_block);
+ *logical = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
*phys = ext4_ext_pblock(ex);
*ret_ex = ex;
if (bh)
@@ -1384,7 +1417,7 @@ found_extent:
* with leaves.
*/
static ext4_lblk_t
-ext4_ext_next_allocated_block(struct ext4_ext_path *path)
+ext4_ext_next_allocated_block(struct inode *inode, struct ext4_ext_path *path)
{
int depth;

@@ -1397,14 +1430,16 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
while (depth >= 0) {
if (depth == path->p_depth) {
/* leaf */
- if (path[depth].p_ext !=
- EXT_LAST_EXTENT(path[depth].p_hdr))
- return le32_to_cpu(path[depth].p_ext[1].ee_block);
+ if (path[depth].p_ext != EXT_LAST_EXTENT(
+ path[depth].p_hdr))
+ return EXT4_INODE_C2B(inode, le32_to_cpu(
+ path[depth].p_ext[1].ee_block));
} else {
/* index */
- if (path[depth].p_idx !=
- EXT_LAST_INDEX(path[depth].p_hdr))
- return le32_to_cpu(path[depth].p_idx[1].ei_block);
+ if (path[depth].p_idx != EXT_LAST_INDEX(
+ path[depth].p_hdr))
+ return EXT4_INODE_C2B(inode, le32_to_cpu(
+ path[depth].p_idx[1].ei_block));
}
depth--;
}
@@ -1416,7 +1451,8 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
* ext4_ext_next_leaf_block:
* returns first allocated block from next leaf or EXT_MAX_BLOCKS
*/
-static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
+static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
+ struct ext4_ext_path *path)
{
int depth;

@@ -1433,8 +1469,8 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
while (depth >= 0) {
if (path[depth].p_idx !=
EXT_LAST_INDEX(path[depth].p_hdr))
- return (ext4_lblk_t)
- le32_to_cpu(path[depth].p_idx[1].ei_block);
+ return (ext4_lblk_t) EXT4_INODE_C2B(inode,
+ le32_to_cpu(path[depth].p_idx[1].ei_block));
depth--;
}

@@ -1636,12 +1672,12 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
unsigned int depth, len1;
unsigned int ret = 0;

- b1 = le32_to_cpu(newext->ee_block);
+ b1 = EXT4_INODE_C2B(inode, le32_to_cpu(newext->ee_block));
len1 = ext4_ext_get_actual_blocks(newext, inode->i_sb);
depth = ext_depth(inode);
if (!path[depth].p_ext)
goto out;
- b2 = le32_to_cpu(path[depth].p_ext->ee_block);
+ b2 = EXT4_INODE_C2B(inode, le32_to_cpu(path[depth].p_ext->ee_block));
b2 &= ~(sbi->s_cluster_ratio - 1);

/*
@@ -1649,7 +1685,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
* is before the requested block(s)
*/
if (b2 < b1) {
- b2 = ext4_ext_next_allocated_block(path);
+ b2 = ext4_ext_next_allocated_block(inode, path);
if (b2 == EXT_MAX_BLOCKS)
goto out;
b2 &= ~(sbi->s_cluster_ratio - 1);
@@ -1707,7 +1743,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_blocks(newext, inode->i_sb),
- le32_to_cpu(ex->ee_block),
+ EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)),
ext4_ext_is_uninitialized(ex),
ext4_ext_get_actual_blocks(ex, inode->i_sb),
ext4_ext_pblock(ex));
@@ -1740,7 +1776,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
fex = EXT_LAST_EXTENT(eh);
next = EXT_MAX_BLOCKS;
if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
- next = ext4_ext_next_leaf_block(path);
+ next = ext4_ext_next_leaf_block(inode, path);
if (next != EXT_MAX_BLOCKS) {
ext_debug("next leaf block - %d\n", next);
BUG_ON(npath != NULL);
@@ -1781,7 +1817,8 @@ has_space:
if (!nearex) {
/* there is no extent in this leaf, create first one */
ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
- le32_to_cpu(newext->ee_block),
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(newext->ee_block)),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_blocks(newext,
@@ -1796,7 +1833,8 @@ has_space:
len = len < 0 ? 0 : len;
ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n",
- le32_to_cpu(newext->ee_block),
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(newext->ee_block)),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_blocks(newext,
@@ -1811,7 +1849,8 @@ has_space:
len = len < 0 ? 0 : len;
ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n",
- le32_to_cpu(newext->ee_block),
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(newext->ee_block)),
ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_blocks(newext,
@@ -1883,7 +1922,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
break;
}
ex = path[depth].p_ext;
- next = ext4_ext_next_allocated_block(path);
+ next = ext4_ext_next_allocated_block(inode, path);

exists = 0;
if (!ex) {
@@ -1891,26 +1930,29 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
* all requested space */
start = block;
end = block + num;
- } else if (le32_to_cpu(ex->ee_block) > block) {
+ } else if (EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ > block) {
/* need to allocate space before found extent */
start = block;
- end = le32_to_cpu(ex->ee_block);
+ end = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
if (block + num < end)
end = block + num;
- } else if (block >= le32_to_cpu(ex->ee_block)
+ } else if (block >=
+ EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ ext4_ext_get_actual_blocks(ex, inode->i_sb)) {
/* need to allocate space after found extent */
start = block;
end = block + num;
if (end >= next)
end = next;
- } else if (block >= le32_to_cpu(ex->ee_block)) {
+ } else if (block >= EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block))) {
/*
* some part of requested space is covered
* by found extent
*/
start = block;
- end = le32_to_cpu(ex->ee_block)
+ end = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ ext4_ext_get_actual_blocks(ex, inode->i_sb);
if (block + num < end)
end = block + num;
@@ -1925,7 +1967,8 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
cbex.ec_len = EXT4_INODE_B2C(inode, end - start);
cbex.ec_start = 0;
} else {
- cbex.ec_block = le32_to_cpu(ex->ee_block);
+ cbex.ec_block = EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block));
cbex.ec_len = ext4_ext_get_actual_len(ex);
cbex.ec_start = ext4_ext_pblock(ex);
}
@@ -2000,24 +2043,24 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
lblock = 0;
len = EXT_MAX_BLOCKS;
ext_debug("cache gap(whole file):");
- } else if (block < le32_to_cpu(ex->ee_block)) {
+ } else if (block < EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))) {
lblock = block;
- len = le32_to_cpu(ex->ee_block) - block;
+ len = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)) - block;
ext_debug("cache gap(before): %u [%u:%u]",
- block,
- le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_blocks(ex, inode->i_sb));
- } else if (block >= le32_to_cpu(ex->ee_block)
+ block,
+ EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)),
+ ext4_ext_get_actual_blocks(ex, inode->i_sb));
+ } else if (block >= EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ ext4_ext_get_actual_blocks(ex, inode->i_sb)) {
ext4_lblk_t next;
- lblock = le32_to_cpu(ex->ee_block)
+ lblock = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ ext4_ext_get_actual_blocks(ex, inode->i_sb);

- next = ext4_ext_next_allocated_block(path);
+ next = ext4_ext_next_allocated_block(inode, path);
ext_debug("cache gap(after): [%u:%u] %u",
- le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_blocks(ex, inode->i_sb),
- block);
+ EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block)),
+ ext4_ext_get_actual_blocks(ex, inode->i_sb),
+ block);
BUG_ON(next == lblock);
len = next - lblock;
} else {
@@ -2026,7 +2069,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
}

ext_debug(" -> %u:%lu\n", lblock, len);
- ext4_ext_put_in_cache(inode, lblock, len, 0);
+ ext4_ext_put_in_cache(inode, EXT4_INODE_B2C(inode, lblock), len, 0);
}

/*
@@ -2062,11 +2105,14 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
if (cex->ec_len == 0)
goto errout;

- if (in_range(block, cex->ec_block, cex->ec_len)) {
+ if (in_range(block, EXT4_C2B(sbi, cex->ec_block),
+ EXT4_C2B(sbi, cex->ec_len))) {
memcpy(ex, cex, sizeof(struct ext4_ext_cache));
ext_debug("%u cached by %u:%u:%llu\n",
block,
- cex->ec_block, cex->ec_len, cex->ec_start);
+ EXT4_C2B(sbi, cex->ec_block),
+ EXT4_C2B(sbi, cex->ec_len),
+ EXT4_C2B(sbi, cex->ec_start));
ret = 1;
}
errout:
@@ -2229,9 +2275,10 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
*/
flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;

- trace_ext4_remove_blocks(inode, cpu_to_le32(ex->ee_block),
- ext4_ext_pblock(ex), ee_len, from,
- to, *partial_cluster);
+ trace_ext4_remove_blocks(inode,
+ cpu_to_le32(EXT4_INODE_C2B(inode, ex->ee_block)),
+ ext4_ext_pblock(ex), ee_len, from,
+ to, *partial_cluster);
/*
* If we have a partial cluster, and it's different from the
* cluster of the last block, we need to explicitly free the
@@ -2260,12 +2307,14 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
spin_unlock(&sbi->s_ext_stats_lock);
}
#endif
- if (from >= le32_to_cpu(ex->ee_block)
- && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
+ if (from >= EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ && to == EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ + ee_len - 1) {
/* tail removal */
ext4_lblk_t num;

- num = le32_to_cpu(ex->ee_block) + ee_len - from;
+ num = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ + ee_len - from;
pblk = ext4_ext_pblock(ex) + ee_len - num;
ext_debug("free last %u blocks starting %llu\n", num, pblk);
ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
@@ -2282,8 +2331,9 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
*partial_cluster = EXT4_B2C(sbi, pblk);
else
*partial_cluster = 0;
- } else if (from == le32_to_cpu(ex->ee_block)
- && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
+ } else if (from == EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ && to <= EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block))
+ + ee_len - 1) {
/* head removal */
ext4_lblk_t num;
ext4_fsblk_t start;
@@ -2297,7 +2347,10 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
} else {
printk(KERN_INFO "strange request: removal(2) "
"%u-%u from %u:%u\n",
- from, to, le32_to_cpu(ex->ee_block), ee_len);
+ from, to,
+ EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block)),
+ ee_len);
}
return 0;
}
@@ -2343,7 +2396,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
/* find where to start removing */
ex = EXT_LAST_EXTENT(eh);

- ex_ee_block = le32_to_cpu(ex->ee_block);
+ ex_ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
ex_ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);

trace_ext4_ext_rm_leaf(inode, start, ex_ee_block, ext4_ext_pblock(ex),
@@ -2370,7 +2423,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
/* If this extent is beyond the end of the hole, skip it */
if (end <= ex_ee_block) {
ex--;
- ex_ee_block = le32_to_cpu(ex->ee_block);
+ ex_ee_block = EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block));
ex_ee_len = ext4_ext_get_actual_blocks(ex,
inode->i_sb);
continue;
@@ -2493,7 +2547,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a));
}

- ex->ee_block = cpu_to_le32(block);
+ BUG_ON(block & (sbi->s_cluster_ratio-1));
+ ex->ee_block = cpu_to_le32(EXT4_B2C(sbi, block));
ex->ee_len = cpu_to_le16(EXT4_B2C(sbi, num));
/*
* Do not mark uninitialized if all the blocks in the
@@ -2531,7 +2586,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ext_debug("new extent: %u:%u:%llu\n", block, num,
ext4_ext_pblock(ex));
ex--;
- ex_ee_block = le32_to_cpu(ex->ee_block);
+ ex_ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
ex_ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
}

@@ -2862,7 +2917,7 @@ static int ext4_split_extent_at(handle_t *handle,

depth = ext_depth(inode);
ex = path[depth].p_ext;
- ee_block = le32_to_cpu(ex->ee_block);
+ ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
newblock = split - ee_block + ext4_ext_pblock(ex);

@@ -2905,7 +2960,7 @@ static int ext4_split_extent_at(handle_t *handle,
goto fix_extent_len;

ex2 = &newex;
- ex2->ee_block = cpu_to_le32(split);
+ ex2->ee_block = cpu_to_le32(EXT4_INODE_B2C(inode, split));
ex2->ee_len = cpu_to_le16(
EXT4_INODE_B2C(inode, ee_len - (split - ee_block)));
ext4_ext_store_pblock(ex2, newblock);
@@ -2962,7 +3017,7 @@ static int ext4_split_extent(handle_t *handle,

depth = ext_depth(inode);
ex = path[depth].p_ext;
- ee_block = le32_to_cpu(ex->ee_block);
+ ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
uninitialized = ext4_ext_is_uninitialized(ex);

@@ -3037,7 +3092,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,

depth = ext_depth(inode);
ex = path[depth].p_ext;
- ee_block = le32_to_cpu(ex->ee_block);
+ ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);
allocated = ee_len - (map->m_lblk - ee_block);

@@ -3078,8 +3133,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
if (allocated <= EXT4_EXT_ZERO_LEN &&
(EXT4_EXT_MAY_ZEROOUT & split_flag)) {
/* case 3 */
- zero_ex.ee_block =
- cpu_to_le32(map->m_lblk);
+ zero_ex.ee_block = cpu_to_le32(EXT4_INODE_B2C(inode,
+ map->m_lblk));
zero_ex.ee_len = cpu_to_le16(
EXT4_INODE_B2C(inode, allocated));
ext4_ext_store_pblock(&zero_ex,
@@ -3168,7 +3223,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
*/
depth = ext_depth(inode);
ex = path[depth].p_ext;
- ee_block = le32_to_cpu(ex->ee_block);
+ ee_block = EXT4_INODE_C2B(inode, le32_to_cpu(ex->ee_block));
ee_len = ext4_ext_get_actual_blocks(ex, inode->i_sb);

split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
@@ -3191,7 +3246,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,

ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
"block %llu, max_blocks %u\n", inode->i_ino,
- (unsigned long long)le32_to_cpu(ex->ee_block),
+ (unsigned long long)EXT4_INODE_C2B(inode,
+ le32_to_cpu(ex->ee_block)),
ext4_ext_get_actual_blocks(ex, inode->i_sb));

err = ext4_ext_get_access(handle, inode, path + depth);
@@ -3253,7 +3309,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
* this turns out to be false, we can bail out from this
* function immediately.
*/
- if (lblk + len < le32_to_cpu(last_ex->ee_block) +
+ if (lblk + len < EXT4_INODE_C2B(inode, le32_to_cpu(last_ex->ee_block)) +
ext4_ext_get_actual_blocks(last_ex, inode->i_sb))
return 0;
/*
@@ -3697,7 +3753,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
* |================|
*/
if (map->m_lblk > ee_block) {
- ext4_lblk_t next = ext4_ext_next_allocated_block(path);
+ ext4_lblk_t next = 0;//ext4_ext_next_allocated_block(path);
map->m_len = min(map->m_len, next - map->m_lblk);
}

@@ -3770,12 +3826,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
if (sbi->s_cluster_ratio > 1)
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
newblock = map->m_lblk
- - le32_to_cpu(newex.ee_block)
+ - EXT4_C2B(sbi, le32_to_cpu(newex.ee_block))
+ ext4_ext_pblock(&newex);
/* number of remaining blocks in the extent */
allocated = ext4_ext_get_actual_blocks(&newex,
inode->i_sb) -
- (map->m_lblk - le32_to_cpu(newex.ee_block));
+ (map->m_lblk - EXT4_C2B(sbi,
+ le32_to_cpu(newex.ee_block)));
goto out;
}
}
@@ -3806,7 +3863,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,

ex = path[depth].p_ext;
if (ex) {
- ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
+ ext4_lblk_t ee_block = EXT4_C2B(sbi, le32_to_cpu(ex->ee_block));
ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
ext4_fsblk_t partial_cluster = 0;
unsigned int ee_len;
@@ -3833,7 +3890,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* in the cache
*/
if (!ext4_ext_is_uninitialized(ex)) {
- ext4_ext_put_in_cache(inode, ee_block,
+ ext4_ext_put_in_cache(inode,
+ EXT4_B2C(sbi, ee_block),
ee_len, ee_start);
goto out;
}
@@ -3895,7 +3953,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ex = path[depth].p_ext;
ee_len = ext4_ext_get_actual_blocks(ex,
inode->i_sb);
- ee_block = le32_to_cpu(ex->ee_block);
+ ee_block = EXT4_C2B(sbi,
+ le32_to_cpu(ex->ee_block));
ee_start = ext4_ext_pblock(ex);

}
@@ -3949,11 +4008,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* Okay, we need to do block allocation.
*/
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
- newex.ee_block = cpu_to_le32(map->m_lblk & ~(sbi->s_cluster_ratio-1));
+ newex.ee_block = EXT4_B2C(sbi,
+ cpu_to_le32(map->m_lblk & ~(sbi->s_cluster_ratio-1)));
cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);

if (ex)
- BUG_ON((le32_to_cpu(ex->ee_block) +
+ BUG_ON((EXT4_C2B(sbi, le32_to_cpu(ex->ee_block)) +
EXT4_C2B(sbi, ext4_ext_get_actual_len(ex))) >
(map->m_lblk & ~(sbi->s_cluster_ratio-1)));

@@ -4012,6 +4072,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ar.flags = 0;
if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
ar.flags |= EXT4_MB_HINT_NOPREALLOC;
+ printk(KERN_ERR "ar: %lu, %lu, %lu\n", ar.len, ar.goal, ar.logical);
newblock = ext4_mb_new_blocks(handle, &ar, &err);
if (!newblock)
goto out2;
@@ -4153,7 +4214,8 @@ got_allocated_blocks:
* when it is _not_ an uninitialized extent.
*/
if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
- ext4_ext_put_in_cache(inode, ar.logical, allocated, newblock);
+ ext4_ext_put_in_cache(inode, EXT4_B2C(sbi, ar.logical),
+ allocated, newblock);
ext4_update_inode_fsync_trans(handle, inode, 1);
} else
ext4_update_inode_fsync_trans(handle, inode, 0);
--
1.7.3.2


2011-11-18 10:43:30

by Robin Dong

[permalink] [raw]
Subject: [PATCH 7/9 v2 bigalloc] ext4: allocate a cluster for a directory when it need spaces

From: Robin Dong <[email protected]>

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/inode.c | 9 +++++++++
fs/ext4/namei.c | 46 ++++++++++++++++++++++++++++++++--------------
2 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 22b28bd..4331d07 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -673,6 +673,15 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
return bh;
if (buffer_uptodate(bh))
return bh;
+ /* if the block has been allocated by cluster,
+ * do not need to read it
+ */
+ if ((EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) &&
+ ((block << inode->i_sb->s_blocksize_bits) >= inode->i_size)) {
+ memset(bh->b_data, 0, inode->i_sb->s_blocksize);
+ set_buffer_uptodate(bh);
+ return bh;
+ }
ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 1c924fa..43f7dcb 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1811,10 +1811,12 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
handle_t *handle;
struct inode *inode;
- struct buffer_head *dir_block = NULL;
+ struct buffer_head *first_block = NULL;
+ struct buffer_head **dir_block = NULL;
struct ext4_dir_entry_2 *de;
+ struct ext4_sb_info *sbi = EXT4_SB(dir->i_sb);
unsigned int blocksize = dir->i_sb->s_blocksize;
- int err, retries = 0;
+ int i, err, retries = 0;

if (EXT4_DIR_LINK_MAX(dir))
return -EMLINK;
@@ -1824,6 +1826,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
retry:
handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+ sbi->s_cluster_ratio +
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -1840,14 +1843,24 @@ retry:
inode->i_op = &ext4_dir_inode_operations;
inode->i_fop = &ext4_dir_operations;
inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
- dir_block = ext4_bread(handle, inode, 0, 1, &err);
+
+ dir_block = kzalloc(sizeof(struct buffer_head *) * EXT4_MAX_CTXT_PAGES,
+ GFP_NOFS);
if (!dir_block)
- goto out_clear_inode;
- BUFFER_TRACE(dir_block, "get_write_access");
- err = ext4_journal_get_write_access(handle, dir_block);
- if (err)
- goto out_clear_inode;
- de = (struct ext4_dir_entry_2 *) dir_block->b_data;
+ goto out_stop;
+
+ for (i = 0; i < sbi->s_cluster_ratio; i++) {
+ dir_block[i] = ext4_getblk(handle, inode, i, 1, &err);
+ if (!dir_block[i])
+ goto out_clear_inode;
+ memset(dir_block[i]->b_data, 0, inode->i_sb->s_blocksize);
+ set_buffer_uptodate(dir_block[i]);
+ err = ext4_journal_get_write_access(handle, dir_block[i]);
+ if (err)
+ goto out_clear_inode;
+ }
+ first_block = dir_block[0];
+ de = (struct ext4_dir_entry_2 *) first_block->b_data;
de->inode = cpu_to_le32(inode->i_ino);
de->name_len = 1;
de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
@@ -1862,10 +1875,13 @@ retry:
strcpy(de->name, "..");
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
inode->i_nlink = 2;
- BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
- err = ext4_handle_dirty_metadata(handle, dir, dir_block);
- if (err)
- goto out_clear_inode;
+ BUFFER_TRACE(first_block, "call ext4_handle_dirty_metadata");
+
+ for (i = 0; i < sbi->s_cluster_ratio; i++) {
+ err = ext4_handle_dirty_metadata(handle, dir, dir_block[i]);
+ if (err)
+ goto out_clear_inode;
+ }
err = ext4_mark_inode_dirty(handle, inode);
if (!err)
err = ext4_add_entry(handle, dentry, inode);
@@ -1885,7 +1901,9 @@ out_clear_inode:
d_instantiate(dentry, inode);
unlock_new_inode(inode);
out_stop:
- brelse(dir_block);
+ for (i = 0; i < sbi->s_cluster_ratio; i++)
+ brelse(dir_block[i]);
+ kfree(dir_block);
ext4_journal_stop(handle);
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
goto retry;
--
1.7.3.2


2011-11-18 10:43:32

by Robin Dong

[permalink] [raw]
Subject: [PATCH 8/9 v2 bigalloc] ext4: align fallocate size to a whole cluster

From: Robin Dong <[email protected]>

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/extents.c | 34 ++++++++++++++++++++++++++++++++--
1 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7799973..c7dc340 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3533,8 +3533,11 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path, int flags,
unsigned int allocated, ext4_fsblk_t newblock)
{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct ext4_map_blocks convert_map;
int ret = 0;
int err = 0;
+ int offset;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;

ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical"
@@ -3598,8 +3601,14 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
}

/* buffered write, writepage time, convert*/
- ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
+ offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
+ convert_map.m_len =
+ EXT4_C2B(sbi, EXT4_NUM_B2C(sbi, offset + map->m_len));
+ convert_map.m_lblk = map->m_lblk - offset;
+ ret = ext4_ext_convert_to_initialized(handle, inode,
+ &convert_map, path);
if (ret >= 0) {
+ ret = map->m_len;
ext4_update_inode_fsync_trans(handle, inode, 1);
err = check_eofblocks_fl(handle, inode, map->m_lblk, path,
map->m_len);
@@ -4318,8 +4327,9 @@ static void ext4_falloc_update_inode(struct inode *inode,
long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{
struct inode *inode = file->f_path.dentry->d_inode;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
handle_t *handle;
- loff_t new_size;
+ loff_t new_size, old_size;
unsigned int max_blocks;
int ret = 0;
int ret2 = 0;
@@ -4349,6 +4359,8 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
*/
max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
- map.m_lblk;
+
+ old_size = i_size_read(inode);
/*
* credits to insert 1 extent into extent tree
*/
@@ -4403,6 +4415,24 @@ retry:
goto retry;
}
mutex_unlock(&inode->i_mutex);
+
+ /* if the fallocate expand the file size, we need to zeroout
+ * extra pages in cluster */
+ if (len + offset > old_size) {
+ credits = ext4_chunk_trans_blocks(inode, sbi->s_cluster_ratio);
+ handle = ext4_journal_start(inode, credits);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out;
+ }
+ ext4_ext_truncate_zero_pages(handle, inode, old_size);
+ if (IS_SYNC(inode))
+ ext4_handle_sync(handle);
+ ext4_mark_inode_dirty(handle, inode);
+ ext4_journal_stop(handle);
+ }
+
+out:
trace_ext4_fallocate_exit(inode, offset, max_blocks,
ret > 0 ? ret2 : ret);
return ret > 0 ? ret2 : ret;
--
1.7.3.2


2011-11-18 10:43:34

by Robin Dong

[permalink] [raw]
Subject: [PATCH 9/9 v2 bigalloc] ext4: make cluster works for mmap

From: Robin Dong <[email protected]>

When users write a page in mmap regioin, it need to zero out other
pages around it.

Signed-off-by: Robin Dong <[email protected]>
---
fs/ext4/inode.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 68 insertions(+), 1 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4331d07..f965ed1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4651,13 +4651,17 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
struct page *page = vmf->page;
loff_t size;
unsigned long len;
- int ret;
+ int ret, i, uninit = 0;
struct file *file = vma->vm_file;
struct inode *inode = file->f_path.dentry->d_inode;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct address_space *mapping = inode->i_mapping;
+ struct ext4_write_cluster_ctxt *ewcc = NULL;
handle_t *handle;
get_block_t *get_block;
int retries = 0;
+ unsigned int flags = AOP_FLAG_NOFS;
+ unsigned long from, to;

/*
* This check is racy but catches the common case. We rely on
@@ -4714,7 +4718,47 @@ retry_alloc:
ret = VM_FAULT_SIGBUS;
goto out;
}
+
+ ewcc = ext4_alloc_write_cluster_ctxt();
+ if (!ewcc) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (sbi->s_cluster_ratio > 1) {
+ /* We need to know whether the block is allocated already
+ */
+ struct ext4_map_blocks map;
+ map.m_lblk = page->index;
+ map.m_len = 1;
+ ret = ext4_map_blocks(handle, inode, &map, 0);
+ uninit = map.m_flags & EXT4_MAP_UNWRITTEN;
+ if (ret <= 0 || uninit) {
+ ret = ext4_prepare_cluster_left_pages(inode,
+ page->index, ewcc, flags);
+ if (ret)
+ goto err_out;
+ }
+ }
+
ret = __block_page_mkwrite(vma, vmf, get_block);
+ if (ret)
+ goto err_out;
+
+ if (sbi->s_cluster_ratio > 1 && uninit) {
+ ret = ext4_prepare_cluster_right_pages(inode, page->index,
+ ewcc, flags);
+ if (ret)
+ goto err_out;
+ for (i = 0; i < ewcc->w_num_pages; i++) {
+ if (!ewcc->w_pages[i] ||
+ !page_buffers(ewcc->w_pages[i]))
+ break;
+ block_commit_write(ewcc->w_pages[i],
+ 0, PAGE_CACHE_SIZE);
+ }
+ }
+
if (!ret && ext4_should_journal_data(inode)) {
if (walk_page_buffers(handle, page_buffers(page), 0,
PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
@@ -4722,13 +4766,36 @@ retry_alloc:
ret = VM_FAULT_SIGBUS;
goto out;
}
+
+ for (i = 0; i < ewcc->w_num_pages; i++) {
+ page = ewcc->w_pages[i];
+ if (!page || !page_buffers(page))
+ continue;
+ from = page->index << PAGE_CACHE_SHIFT;
+ to = from + PAGE_CACHE_SIZE;
+ ret = walk_page_buffers(handle, page_buffers(page),
+ from, to, NULL, do_journal_get_write_access);
+ if (ret) {
+ ret = VM_FAULT_SIGBUS;
+ goto out;
+ }
+ }
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
}
+
+err_out:
+ if (ewcc) {
+ ext4_free_write_cluster_ctxt(ewcc);
+ ewcc = NULL;
+ }
ext4_journal_stop(handle);
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry_alloc;
out_ret:
ret = block_page_mkwrite_return(ret);
+
out:
+ if (ewcc)
+ ext4_free_write_cluster_ctxt(ewcc);
return ret;
}
--
1.7.3.2


2011-11-19 04:22:25

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 0/9 v2 bigalloc] ext4: change unit of extent's ee_block and ee_len from block to cluster for bigalloc

On Fri, Nov 18, 2011 at 06:42:55PM +0800, Robin Dong wrote:
> From: Robin Dong <[email protected]>
>
> This patch series change ee_block and ee_len (of extent) 's unit
> from "block" to "cluster", since it could reduce the space occupied
> by meta data.
>
> This patch series should be used after Ted's bigalloc-patchs and it
> now can't support:
> 1. delay allocation
> 2. 1k/2k blocksize

It *can't* support delayed allocation or sub-4k block sizes? That's
only with your modified bigalloc enabled, I presume, right?

If we are going to support this modified bigalloc, I think it only
makes sense to do it as new file system feature, so we can support
both extents which are denominated in blocks, as well as extents which
are denominated in clusters.

But it may be that we're better off biting the bullet and supporting
at 2nd extent format, which looks like this:

struct ext4_extent2 {
__le64 ee_block; /* first logical block extent covers */
__le64 ee_start; /* starting physical block */
__le32 ee_len; /* number of blocks covered by extent */
__le32 ee_flags; /* flags and future extension */
};

This is 24 bytes, which means we can only fit two extents in the inode
(12 bytes header, plus two 24 byte extents). But it expands the size
of files we can support, as well as supporting larger volumes.

Yes, using units of clusters could support this as well, but the fact
that it is required that sparse blocks have to get zeroed out to the
nearest cluster binary means that it's only going to work for clusters
sizes of 128k at most, since the requirements that clusters get
zero'ed out when doing a partial write is going to get pretty onerous
much beyond that size.

- Ted