From: Akira Fujita Subject: [RFC][PATCH 4/9]ext4: Inside extents management and relevant defrag Date: Fri, 24 Oct 2008 19:09:48 +0900 Message-ID: <49019EEC.6090203@rs.jp.nec.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-2022-JP Content-Transfer-Encoding: 7bit Cc: linux-fsdevel@vger.kernel.org To: linux-ext4@vger.kernel.org, Theodore Tso , Mingming Cao Return-path: Received: from TYO202.gate.nec.co.jp ([202.32.8.206]:63843 "EHLO tyo202.gate.nec.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753115AbYJXKKP (ORCPT ); Fri, 24 Oct 2008 06:10:15 -0400 Sender: linux-ext4-owner@vger.kernel.org List-ID: ext4: online defrag -- Exchange the blocks between two inodes. From: Akira Fujita Merge extents of target inode which is increased by defragmentation. For relevant defrag, add the goal argument to related functions to allocate data block to the specified offset. Signed-off-by: Akira Fujita Signed-off-by: Takashi Sato --- fs/ext4/defrag.c | 275 +++++++++++++++++++++++++++++++++++++++++++++++++++--- fs/ext4/ext4.h | 2 +- fs/ext4/ioctl.c | 12 ++- 3 files changed, 275 insertions(+), 14 deletions(-) diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c index 0b90d4d..891e599 100644 --- a/fs/ext4/defrag.c +++ b/fs/ext4/defrag.c @@ -91,6 +91,188 @@ err: } /** + * ext4_defrag_merge_across_blocks - Merge extents across leaf block + * + * @handle: journal handle + * @org_inode: original inode + * @o_start: first original extent to be defraged + * @o_end: last original extent to be defraged + * @start_ext: first new extent to be merged + * @new_ext: middle of new extent to be merged + * @end_ext: last new extent to be merged + * + * This function returns 0 if succeed, otherwise returns error value. + */ +static int +ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *org_inode, + struct ext4_extent *o_start, struct ext4_extent *o_end, + struct ext4_extent *start_ext, struct ext4_extent *new_ext, + struct ext4_extent *end_ext) +{ + struct ext4_ext_path *org_path = NULL; + ext4_lblk_t eblock = 0; + int new_flag = 0; + int end_flag = 0; + int err; + + if (le16_to_cpu(start_ext->ee_len) && + le16_to_cpu(new_ext->ee_len) && + le16_to_cpu(end_ext->ee_len)) { + + if (o_start == o_end) { + + /* start_ext new_ext end_ext + * dest |---------|-----------|--------| + * org |------------------------------| + */ + + end_flag = 1; + } else { + + /* start_ext new_ext end_ext + * dest |---------|----------|---------| + * org |---------------|--------------| + */ + + o_end->ee_block = end_ext->ee_block; + o_end->ee_len = end_ext->ee_len; + ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); + } + + o_start->ee_len = start_ext->ee_len; + new_flag = 1; + + } else if (le16_to_cpu(start_ext->ee_len) && + le16_to_cpu(new_ext->ee_len) && + !le16_to_cpu(end_ext->ee_len) && + o_start == o_end) { + + /* start_ext new_ext + * dest |--------------|---------------| + * org |------------------------------| + */ + + o_start->ee_len = start_ext->ee_len; + new_flag = 1; + + } else if (!le16_to_cpu(start_ext->ee_len) && + le16_to_cpu(new_ext->ee_len) && + le16_to_cpu(end_ext->ee_len) && + o_start == o_end) { + + /* new_ext end_ext + * dest |--------------|---------------| + * org |------------------------------| + */ + + o_end->ee_block = end_ext->ee_block; + o_end->ee_len = end_ext->ee_len; + ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); + + /* + * Set 0 to the extent block if new_ext was + * the first block. + */ + if (!new_ext->ee_block) + eblock = 0; + else + eblock = le32_to_cpu(new_ext->ee_block); + + new_flag = 1; + } else { + printk(KERN_ERR "ext4 defrag: Unexpected merge case\n"); + return -EIO; + } + + if (new_flag) { + org_path = ext4_ext_find_extent(org_inode, eblock, NULL); + if (IS_ERR(org_path)) { + err = PTR_ERR(org_path); + org_path = NULL; + goto out; + } + err = ext4_ext_insert_extent(handle, org_inode, + org_path, new_ext); + if (err) + goto out; + } + + if (end_flag) { + org_path = ext4_ext_find_extent(org_inode, + le32_to_cpu(end_ext->ee_block) - 1, org_path); + if (IS_ERR(org_path)) { + err = PTR_ERR(org_path); + org_path = NULL; + goto out; + } + err = ext4_ext_insert_extent(handle, org_inode, + org_path, end_ext); + if (err) + goto out; + } +out: + if (org_path) { + ext4_ext_drop_refs(org_path); + kfree(org_path); + } + + return err; + +} + +/** + * ext4_defrag_merge_inside_block - Merge new extent to the extent block + * + * @o_start: first original extent to be merged + * @o_end: last original extent to be merged + * @start_ext: first new extent to be merged + * @new_ext: middle of new extent to be merged + * @end_ext: last new extent to be merged + * @eh: extent header of target leaf block + * @replaced: the number of blocks which will be replaced with new_ext + * @range_to_move: used to decide how to merge + * + * This function always returns 0. + */ +static int +ext4_defrag_merge_inside_block(struct ext4_extent *o_start, + struct ext4_extent *o_end, struct ext4_extent *start_ext, + struct ext4_extent *new_ext, struct ext4_extent *end_ext, + struct ext4_extent_header *eh, ext4_fsblk_t replaced, + int range_to_move) +{ + int i = 0; + unsigned len; + + /* Move the existing extents */ + if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) { + len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) - + (unsigned long)(o_end + 1); + memmove(o_end + 1 + range_to_move, o_end + 1, len); + } + + /* Insert start entry */ + if (le16_to_cpu(start_ext->ee_len)) + o_start[i++].ee_len = start_ext->ee_len; + + /* Insert new entry */ + if (le16_to_cpu(new_ext->ee_len)) { + o_start[i].ee_block = new_ext->ee_block; + o_start[i].ee_len = cpu_to_le16(replaced); + ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext)); + } + + /* Insert end entry */ + if (end_ext->ee_len) + o_start[i] = *end_ext; + + /* Increment the total entries counter on the extent block */ + le16_add_cpu(&eh->eh_entries, range_to_move); + + return 0; +} + +/** * ext4_defrag_merge_extents - Merge new extent * * @handle: journal handle @@ -112,7 +294,65 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *org_inode, struct ext4_extent *start_ext, struct ext4_extent *new_ext, struct ext4_extent *end_ext, ext4_fsblk_t replaced) { + struct ext4_extent_header *eh; + unsigned need_slots, slots_range; + int range_to_move, depth, ret; + + /* + * The extents need to be inserted + * start_extent + new_extent + end_extent. + */ + need_slots = (le16_to_cpu(start_ext->ee_len) ? 1 : 0) + + (le16_to_cpu(end_ext->ee_len) ? 1 : 0) + + (le16_to_cpu(new_ext->ee_len) ? 1 : 0); + + /* The number of slots between start and end */ + slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1) + / sizeof(struct ext4_extent); + + /* Range to move the end of extent */ + range_to_move = need_slots - slots_range; + depth = org_path->p_depth; + org_path += depth; + eh = org_path->p_hdr; + + if (depth) { + /* Register to journal */ + ret = ext4_journal_get_write_access(handle, org_path->p_bh); + if (ret) + return ret; + } + + /* Expansion */ + if (range_to_move > 0 && + (range_to_move > le16_to_cpu(eh->eh_max) + - le16_to_cpu(eh->eh_entries))) { + + ret = ext4_defrag_merge_across_blocks(handle, org_inode, + o_start, o_end, start_ext, new_ext, + end_ext); + if (ret < 0) + return ret; + } else { + ret = ext4_defrag_merge_inside_block(o_start, o_end, + start_ext, new_ext, end_ext, eh, + replaced, range_to_move); + if (ret < 0) + return ret; + } + + if (depth) { + ret = ext4_journal_dirty_metadata(handle, org_path->p_bh); + if (ret) + return ret; + } else { + ret = ext4_mark_inode_dirty(handle, org_inode); + if (ret < 0) + return ret; + } + return 0; + } /** @@ -455,6 +695,7 @@ out: * @dest_path: indicating the temporary inode's extent * @req_blocks: contiguous blocks count we need * @iblock: target file offset + * @goal: goal offset * */ static void @@ -462,7 +703,8 @@ ext4_defrag_fill_ar(struct inode *org_inode, struct inode *dest_inode, struct ext4_allocation_request *ar, struct ext4_ext_path *org_path, struct ext4_ext_path *dest_path, - ext4_fsblk_t req_blocks, ext4_lblk_t iblock) + ext4_fsblk_t req_blocks, ext4_lblk_t iblock, + ext4_fsblk_t goal) { ar->inode = dest_inode; ar->len = req_blocks; @@ -474,7 +716,10 @@ ext4_defrag_fill_ar(struct inode *org_inode, struct inode *dest_inode, ar->lright = 0; ar->pright = 0; - ar->goal = ext4_ext_find_goal(dest_inode, dest_path, iblock); + if (goal) + ar->goal = goal; + else + ar->goal = ext4_ext_find_goal(dest_inode, dest_path, iblock); } /** @@ -671,6 +916,7 @@ out: * original extent tree * @tar_end: the last block number of the allocated blocks * @sum_tmp: the extents count in the allocated blocks + * @goal: block offset for allocation * * This function returns the values as below. * 0 (improved) @@ -680,7 +926,7 @@ out: static int ext4_defrag_comp_ext_count(struct inode *org_inode, struct ext4_ext_path *org_path, ext4_lblk_t tar_end, - int sum_tmp) + int sum_tmp, ext4_fsblk_t goal) { struct ext4_extent *ext = NULL; int depth = ext_depth(org_inode); @@ -704,7 +950,7 @@ ext4_defrag_comp_ext_count(struct inode *org_inode, * If the goal has not been set and the fragmentation * is not improved any more, defrag fails. */ - if (sum_org == sum_tmp) { + if (sum_org == sum_tmp && !goal) { /* Not improved */ ret = 1; } else if (sum_org < sum_tmp) { @@ -735,6 +981,7 @@ ext4_defrag_comp_ext_count(struct inode *org_inode, * @req_start: starting offset to allocate in blocks * @req_blocks: the number of blocks to allocate * @iblock: file related offset + * @goal: block offset for allocation * * This function returns the value as below: * 0 (succeed) @@ -744,7 +991,8 @@ ext4_defrag_comp_ext_count(struct inode *org_inode, static int ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode, struct ext4_ext_path *org_path, ext4_lblk_t req_start, - ext4_lblk_t req_blocks, ext4_lblk_t iblock) + ext4_lblk_t req_blocks, ext4_lblk_t iblock, + ext4_fsblk_t goal) { handle_t *handle; struct ext4_sb_info *sbi = EXT4_SB(org_inode->i_sb); @@ -772,7 +1020,7 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode, /* Fill struct ext4_allocation_request with necessary info */ ext4_defrag_fill_ar(org_inode, tmp_inode, &ar, org_path, - dest_path, req_blocks, iblock); + dest_path, req_blocks, iblock, goal); handle = ext4_journal_start(tmp_inode, 0); if (IS_ERR(handle)) { @@ -807,7 +1055,7 @@ ext4_defrag_new_extent_tree(struct inode *org_inode, struct inode *tmp_inode, } ret = ext4_defrag_comp_ext_count(org_inode, org_path, req_end, - sum_tmp); + sum_tmp, goal); out: if (ret < 0 && ar.len) @@ -835,11 +1083,13 @@ out2: * * @org_inode: original inode * @defrag_size: size of defrag in blocks + * @goal: pointer to block offset for allocation * * This function returns 0 if succeed, otherwise returns error value. */ static int -ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size) +ext4_defrag_check(struct inode *org_inode, ext4_lblk_t defrag_size, + ext4_fsblk_t *goal) { /* Ext4 online defrag supports only extent based file */ @@ -940,13 +1190,14 @@ out: * @filp: pointer to file * @block_start: starting offset to defrag in blocks * @defrag_size: size of defrag in blocks + * @goal: block offset for allocation * * This function returns the number of blocks if succeed, otherwise * returns error value. */ int ext4_defrag(struct file *filp, ext4_lblk_t block_start, - ext4_lblk_t defrag_size) + ext4_lblk_t defrag_size, ext4_fsblk_t goal) { struct inode *org_inode = filp->f_dentry->d_inode, *tmp_inode = NULL; struct ext4_ext_path *org_path = NULL, *holecheck_path = NULL; @@ -961,7 +1212,7 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start, int block_len_in_page; /* Check the filesystem environment whether defrag can be done */ - ret = ext4_defrag_check(org_inode, defrag_size); + ret = ext4_defrag_check(org_inode, defrag_size, &goal); if (ret < 0) return ret; @@ -1071,14 +1322,14 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start, } /* Found an isolated block */ - if (seq_extents == 1) { + if (seq_extents == 1 && !goal) { seq_start = le32_to_cpu(ext_cur->ee_block); goto CLEANUP; } ret = ext4_defrag_new_extent_tree(org_inode, tmp_inode, org_path, seq_start, seq_blocks, - block_start); + block_start, goal); if (ret < 0) { break; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index aa3b639..ccea421 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1156,7 +1156,7 @@ extern void ext4_inode_table_set(struct super_block *sb, extern int ext4_ext_journal_restart(handle_t *handle, int needed); /* defrag.c */ extern int ext4_defrag(struct file *filp, ext4_lblk_t block_start, - ext4_lblk_t defrag_size); + ext4_lblk_t defrag_size, ext4_fsblk_t goal); static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) { diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 78d9174..d5e1fe7 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -217,6 +217,7 @@ setversion_out: case EXT4_IOC_DEFRAG: { struct ext4_ext_defrag_data defrag; + struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; int err; if (!capable(CAP_DAC_OVERRIDE)) { @@ -231,8 +232,17 @@ setversion_out: sizeof(defrag))) return -EFAULT; + /* Check goal offset if goal offset was given from userspace */ + if (defrag.goal != -1 && ext4_blocks_count(es) + <= defrag.goal) { + printk(KERN_ERR "ext4 defrag: Invalid goal offset" + " %llu, you can set goal offset up to %llu\n", + defrag.goal, ext4_blocks_count(es) - 1); + return -EINVAL; + } + err = ext4_defrag(filp, defrag.start_offset, - defrag.defrag_size); + defrag.defrag_size, defrag.goal); return err; }