2008-10-24 10:10:07

by Akira Fujita

[permalink] [raw]
Subject: [RFC][PATCH 2/9]ext4: allocate new contiguous blocks with mballoc

ext4: online defrag -- Allocate new contiguous blocks with mballoc.

From: Akira Fujita <[email protected]>

Search contiguous free blocks with multi-block allocation
and allocate them for the temporary inode.

Signed-off-by: Akira Fujita <[email protected]>
Signed-off-by: Takashi Sato <[email protected]>
---
fs/ext4/defrag.c | 224 +++++++++++++++++++++++++++++++++++++++++++++++-
fs/ext4/ext4.h | 3 +
fs/ext4/ext4_extents.h | 3 +
fs/ext4/extents.c | 4 +-
4 files changed, 231 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
index 1e36193..729f001 100644
--- a/fs/ext4/defrag.c
+++ b/fs/ext4/defrag.c
@@ -91,6 +91,84 @@ err:
}

/**
+ * ext4_defrag_fill_ar - Prepare to multiple block allocate for tmp inode
+ *
+ * @org_inode: original inode
+ * @dest_inode: temporary inode
+ * @ar: allocation request for multiple block allocation
+ * @org_path: indicating the original inode's extent
+ * @dest_path: indicating the temporary inode's extent
+ * @req_blocks: contiguous blocks count we need
+ * @iblock: target file offset
+ *
+ */
+static void
+ext4_defrag_fill_ar(struct inode *org_inode, struct inode *dest_inode,
+ struct ext4_allocation_request *ar,
+ struct ext4_ext_path *org_path,
+ struct ext4_ext_path *dest_path,
+ ext4_fsblk_t req_blocks, ext4_lblk_t iblock)
+{
+ ar->inode = dest_inode;
+ ar->len = req_blocks;
+ ar->logical = iblock;
+ ar->flags = EXT4_MB_HINT_DATA | EXT4_MB_HINT_RESERVED
+ | EXT4_MB_HINT_NOPREALLOC;
+ ar->lleft = 0;
+ ar->pleft = 0;
+ ar->lright = 0;
+ ar->pright = 0;
+
+ ar->goal = ext4_ext_find_goal(dest_inode, dest_path, iblock);
+}
+
+/**
+ * ext4_defrag_alloc_blocks - Allocate contiguous blocks to temporary inode
+ *
+ * @handle: journal handle
+ * @org_inode: original inode
+ * @dest_inode: temporary inode for multiple block allocation
+ * @ar: allocation request for multiple block allocation
+ * @dest_path: indicating the temporary inode's extent
+ * @newblock: start offset of contiguous blocks
+ *
+ * This function returns 0 if succeed, otherwise returns error value.
+ */
+static int
+ext4_defrag_alloc_blocks(handle_t *handle, struct inode *org_inode,
+ struct inode *dest_inode, struct ext4_allocation_request *ar,
+ struct ext4_ext_path *dest_path, ext4_fsblk_t *newblock)
+{
+ struct super_block *sb = org_inode->i_sb;
+ struct buffer_head *bh = NULL;
+ int err, i, credits = 0;
+
+ credits = ext4_ext_calc_credits_for_single_extent(dest_inode,
+ ar->len, dest_path);
+ err = ext4_ext_journal_restart(handle, credits);
+ if (err)
+ return err;
+
+ *newblock = ext4_mb_new_blocks(handle, ar, &err);
+ if (err)
+ return err;
+
+ /*
+ * Dirty buffer_head causes the overwriting
+ * if ext4_mb_new_blocks() allocates the block
+ * which used to be the metadata block.
+ * We should call unmap_underlying_metadata()
+ * to clear the dirty flag.
+ */
+ for (i = 0; i < ar->len; i++) {
+ bh = sb_find_get_block(sb, *newblock + i);
+ unmap_underlying_metadata(sb->s_bdev, *newblock + i);
+ }
+
+ return err;
+}
+
+/**
* ext4_defrag_partial - Defrag a file per page
*
* @tmp_inode: temporary inode
@@ -111,6 +189,69 @@ ext4_defrag_partial(struct inode *tmp_inode, struct file *filp,
}

/**
+ * ext4_defrag_comp_ext_count- Check whether fragments are improved or not
+ *
+ * @org_inode: original inode
+ * @path: the structure holding some info about
+ * original extent tree
+ * @tar_end: the last block number of the allocated blocks
+ * @sum_tmp: the extents count in the allocated blocks
+ *
+ * This function returns the values as below.
+ * 0 (improved)
+ * 1 (not improved)
+ * negative value (error case)
+ */
+static int
+ext4_defrag_comp_ext_count(struct inode *org_inode,
+ struct ext4_ext_path *org_path, ext4_lblk_t tar_end,
+ int sum_tmp)
+{
+ struct ext4_extent *ext = NULL;
+ int depth = ext_depth(org_inode);
+ int last_extent = 0;
+ int sum_org = 0;
+ int ret = 0;
+
+ ext = org_path[depth].p_ext;
+
+ /*
+ * Compare the number of the newly allocated extents to
+ * that of existing one.
+ */
+ while (1) {
+ if (!last_extent)
+ ++sum_org;
+ if (tar_end <= (le32_to_cpu(ext->ee_block) +
+ le16_to_cpu(ext->ee_len) - 1) ||
+ last_extent) {
+ /*
+ * If the goal has not been set and the fragmentation
+ * is not improved any more, defrag fails.
+ */
+ if (sum_org == sum_tmp) {
+ /* Not improved */
+ ret = 1;
+ } else if (sum_org < sum_tmp) {
+ /* Fragment increased */
+ ret = -ENOSPC;
+ printk(KERN_ERR "ext4 defrag: "
+ "Insufficient free blocks\n");
+ }
+ break;
+ }
+ last_extent =
+ ext4_defrag_next_extent(org_inode, org_path, &ext);
+ if (last_extent < 0) {
+ ret = last_extent;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/**
* ext4_defrag_new_extent_tree - Get contiguous blocks and build an extent tree
*
* @org_inode: original inode
@@ -130,7 +271,88 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode,
struct ext4_ext_path *org_path, ext4_lblk_t req_start,
ext4_lblk_t req_blocks, ext4_lblk_t iblock)
{
- return 0;
+ handle_t *handle;
+ struct ext4_sb_info *sbi = EXT4_SB(org_inode->i_sb);
+ struct ext4_extent_header *eh = NULL;
+ struct ext4_allocation_request ar;
+ struct ext4_ext_path *dest_path = NULL;
+ struct ext4_extent newex;
+ ext4_fsblk_t alloc_total = 0;
+ ext4_fsblk_t newblock = 0;
+ ext4_lblk_t req_end = req_start + req_blocks - 1;
+ ext4_lblk_t rest_blocks = 0;
+ int sum_tmp = 0;
+ int metadata = 1;
+ int ret;
+
+ eh = ext_inode_hdr(tmp_inode);
+ eh->eh_depth = 0;
+
+ dest_path = ext4_ext_find_extent(tmp_inode, iblock, NULL);
+ if (IS_ERR(dest_path)) {
+ ret = PTR_ERR(dest_path);
+ dest_path = NULL;
+ goto out2;
+ }
+
+ /* Fill struct ext4_allocation_request with necessary info */
+ ext4_defrag_fill_ar(org_inode, tmp_inode, &ar, org_path,
+ dest_path, req_blocks, iblock);
+
+ handle = ext4_journal_start(tmp_inode, 0);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out2;
+ }
+
+ while (alloc_total != req_blocks) {
+ /* Allocate blocks */
+ ret = ext4_defrag_alloc_blocks(handle, org_inode, tmp_inode,
+ &ar, dest_path, &newblock);
+ if (ret < 0)
+ goto out;
+ /* Claimed blocks are already reserved */
+ EXT4_I(ar.inode)->i_delalloc_reserved_flag = 1;
+
+ alloc_total += ar.len;
+ rest_blocks = req_blocks - alloc_total;
+
+ newex.ee_block = cpu_to_le32(alloc_total - ar.len);
+ ext4_ext_store_pblock(&newex, newblock);
+ newex.ee_len = cpu_to_le16(ar.len);
+
+ ret = ext4_ext_insert_extent(handle, tmp_inode,
+ dest_path, &newex);
+ if (ret < 0)
+ goto out;
+
+ ar.goal = newblock + ar.len;
+ ar.len = req_blocks - alloc_total;
+ sum_tmp++;
+ }
+
+ ret = ext4_defrag_comp_ext_count(org_inode, org_path, req_end,
+ sum_tmp);
+
+out:
+ if (ret < 0 && ar.len)
+ ext4_free_blocks(handle, tmp_inode, newblock, ar.len, metadata);
+ /*
+ * Update dirty-blocks counter if we cannot allocate the all of
+ * requested blocks.
+ */
+ if (rest_blocks)
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, rest_blocks);
+
+ ext4_journal_stop(handle);
+
+out2:
+ if (dest_path) {
+ ext4_ext_drop_refs(dest_path);
+ kfree(dest_path);
+ }
+
+ return ret;
}

/**
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 30e3195..aa3b639 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -326,6 +326,7 @@ struct ext4_ext_defrag_data {
ext4_fsblk_t goal; /* block offset for allocation */
};

+#define EXT4_TRANS_META_BLOCKS 4 /* bitmap + group desc + sb + inode */

/*
* Mount options
@@ -1151,6 +1152,8 @@ extern void ext4_inode_bitmap_set(struct super_block *sb,
struct ext4_group_desc *bg, ext4_fsblk_t blk);
extern void ext4_inode_table_set(struct super_block *sb,
struct ext4_group_desc *bg, ext4_fsblk_t blk);
+/* extents.c */
+extern int ext4_ext_journal_restart(handle_t *handle, int needed);
/* defrag.c */
extern int ext4_defrag(struct file *filp, ext4_lblk_t block_start,
ext4_lblk_t defrag_size);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index a28d8d2..39e043b 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -247,6 +247,9 @@ extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
ext4_lblk_t *, ext4_fsblk_t *);
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
+extern ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t block);

#endif /* _EXT4_EXTENTS */

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 406cab9..dde234f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -93,7 +93,7 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
}

-static int ext4_ext_journal_restart(handle_t *handle, int needed)
+int ext4_ext_journal_restart(handle_t *handle, int needed)
{
int err;

@@ -142,7 +142,7 @@ static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
return err;
}

-static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
+ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
struct ext4_ext_path *path,
ext4_lblk_t block)
{