ext4: online defrag-- Move victim files for the target file (-f mode)
From: Akira Fujita <[email protected]>
Move victim files to make sufficient space and reallocates
the contiguous blocks for the target file.
Signed-off-by: Akira Fujita <[email protected]>
Signed-off-by: Takashi Sato <[email protected]>
---
fs/ext4/balloc.c | 10 +-
fs/ext4/defrag.c | 427 +++++++++++++++++++++++++++++++++++++++++++++---
fs/ext4/ext4.h | 29 +++-
fs/ext4/ext4_extents.h | 5 +
fs/ext4/extents.c | 54 +++++--
fs/ext4/ioctl.c | 5 +-
fs/ext4/mballoc.c | 5 +
fs/ext4/mballoc.h | 1 +
8 files changed, 494 insertions(+), 42 deletions(-)
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 49b099c..3e22d69 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -383,7 +383,7 @@ restart:
* If the goal block is within the reservation window, return 1;
* otherwise, return 0;
*/
-static int
+int
goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
ext4_group_t group, struct super_block *sb)
{
@@ -488,7 +488,7 @@ void ext4_rsv_window_add(struct super_block *sb,
* from the filesystem reservation window rb tree. Must be called with
* rsv_lock hold.
*/
-static void rsv_window_remove(struct super_block *sb,
+void rsv_window_remove(struct super_block *sb,
struct ext4_reserve_window_node *rsv)
{
rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
@@ -503,7 +503,7 @@ static void rsv_window_remove(struct super_block *sb,
*
* returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
*/
-static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
+inline int rsv_is_empty(struct ext4_reserve_window *rsv)
{
/* a valid reservation end block could not be 0 */
return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
@@ -1239,7 +1239,7 @@ static int find_next_reservable_window(
* @bitmap_bh: the block group block bitmap
*
*/
-static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
+int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
ext4_grpblk_t grp_goal, struct super_block *sb,
ext4_group_t group, struct buffer_head *bitmap_bh)
{
@@ -1383,7 +1383,7 @@ retry:
* expand the reservation window size if necessary on a best-effort
* basis before ext4_new_blocks() tries to allocate blocks,
*/
-static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
+void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
struct super_block *sb, int size)
{
struct ext4_reserve_window_node *next_rsv;
diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
index 6b6b873..728e8fb 100644
--- a/fs/ext4/defrag.c
+++ b/fs/ext4/defrag.c
@@ -217,6 +217,268 @@ out:
}
/**
+ * ext4_defrag_reserve_blocks - Reserve blocks for defrag
+ *
+ * @inode target inode
+ * @goal block reservation goal
+ * @len blocks count to reserve
+ *
+ * This function returns 0 if succeeded, otherwise
+ * returns error value.
+ */
+
+static int
+ext4_defrag_reserve_blocks(struct inode *inode, ext4_fsblk_t goal, int len)
+{
+ struct super_block *sb = NULL;
+ handle_t *handle;
+ struct buffer_head *bitmap_bh = NULL;
+ struct ext4_block_alloc_info *block_i;
+ struct ext4_reserve_window_node *my_rsv = NULL;
+ unsigned short windowsz = 0;
+ ext4_group_t group_no;
+ ext4_grpblk_t grp_target_blk;
+ int err = 0;
+
+ down_write(&EXT4_I(inode)->i_data_sem);
+
+ handle = ext4_journal_start(inode, EXT4_RESERVE_TRANS_BLOCKS);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ handle = NULL;
+ goto out;
+ }
+
+ if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) {
+ ext4_init_block_alloc_info(inode);
+ } else if (!S_ISREG(inode->i_mode)) {
+ printk(KERN_ERR "ext4 defrag: Invalid file type\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ sb = inode->i_sb;
+ if (!sb) {
+ printk(KERN_ERR "ext4 defrag: Non-existent device\n");
+ err = -ENXIO;
+ goto out;
+ }
+ ext4_get_group_no_and_offset(sb, goal, &group_no,
+ &grp_target_blk);
+
+ block_i = EXT4_I(inode)->i_block_alloc_info;
+ /* Block reservation should be enabled */
+ BUG_ON(!block_i);
+
+ windowsz = block_i->rsv_window_node.rsv_goal_size;
+ /* Goal size should be set */
+ BUG_ON(!windowsz);
+
+
+ my_rsv = &block_i->rsv_window_node;
+
+ bitmap_bh = read_block_bitmap(sb, group_no);
+ if (!bitmap_bh) {
+ err = -ENOSPC;
+ goto out;
+ }
+
+ BUFFER_TRACE(bitmap_bh, "get undo access for new block");
+ err = ext4_journal_get_undo_access(handle, bitmap_bh);
+ if (err)
+ goto out;
+
+ err = alloc_new_reservation(my_rsv, grp_target_blk, sb,
+ group_no, bitmap_bh);
+ if (err < 0) {
+ printk(KERN_ERR "ext4 defrag: Block reservation failed."
+ "offset [%d], bg[%lu]\n",
+ grp_target_blk, group_no);
+ ext4_discard_reservation(inode);
+ goto out;
+ } else {
+ if (len > EXT4_DEFAULT_RESERVE_BLOCKS)
+ try_to_extend_reservation(my_rsv, sb,
+ len - EXT4_DEFAULT_RESERVE_BLOCKS);
+
+ }
+
+out:
+ up_write(&EXT4_I(inode)->i_data_sem);
+ ext4_journal_release_buffer(handle, bitmap_bh);
+ brelse(bitmap_bh);
+
+ if (handle)
+ ext4_journal_stop(handle);
+
+ return err;
+}
+
+/**
+ * ext4_defrag_block_within_rsv - Is target extent reserved ?
+ *
+ * @ inode inode of target file
+ * @ ex_start start physical block number of the extent
+ * which already moved
+ * @ ex_len block length of the extent which already moved
+ *
+ * This function returns 0 if succeeded, otherwise
+ * returns error value.
+ */
+static int ext4_defrag_block_within_rsv(struct inode *inode,
+ ext4_fsblk_t ex_start, int ex_len)
+{
+ struct super_block *sb = inode->i_sb;
+ struct ext4_block_alloc_info *block_i;
+ ext4_group_t group_no;
+ ext4_grpblk_t grp_blk;
+ struct ext4_reserve_window_node *rsv;
+
+ block_i = EXT4_I(inode)->i_block_alloc_info;
+ /* Block reservation should be enabled */
+ BUG_ON(!block_i);
+
+ /* Goal size should be set */
+ BUG_ON(!block_i->rsv_window_node.rsv_goal_size);
+
+ rsv = &block_i->rsv_window_node;
+ if (rsv_is_empty(&rsv->rsv_window)) {
+ printk(KERN_ERR "ext4 defrag: Reservation window is empty\n");
+ return -ENOSPC;
+ }
+
+ ext4_get_group_no_and_offset(sb, ex_start, &group_no, &grp_blk);
+
+ if (!goal_in_my_reservation(&rsv->rsv_window, grp_blk, group_no, sb)
+ || !goal_in_my_reservation(&rsv->rsv_window, grp_blk + ex_len - 1,
+ group_no, sb)){
+ printk(KERN_ERR "ext4 defrag: %d or %d in bg %lu is "
+ "not in rsv_window\n", grp_blk,
+ grp_blk + ex_len - 1, group_no);
+ return -ENOSPC;
+ }
+ return 0;
+}
+
+/*
+ * ext4_defrag_reserve_fblocks - Reserve free blocks
+ * with ext4_defrag_reserve_blocks
+ *
+ * @inode: To get a block group number
+ * @ext_info: freeblocks distribution which stored extent-like style
+ * @ext_info->ext[] an array of struct ext4_extents_data
+ */
+static int ext4_defrag_reserve_fblocks(struct inode *inode,
+ struct ext4_extents_info *ext_info)
+{
+ ext4_fsblk_t ex_start = 0;
+ int i;
+ int ret = 0;
+ int len = 0;
+
+ for (i = 0; i < ext_info->entries; i++) {
+ ex_start = ext_info->ext[i].start;
+ len = ext_info->ext[i].len;
+
+ ret = ext4_defrag_reserve_blocks(inode, ex_start, len);
+ if (ret < 0) {
+ printk(KERN_ERR "ext4 defrag: "
+ "Block reservation failed. offset [%llu], "
+ "length [%d]\n", ex_start, len);
+ goto err;
+ }
+ ret = ext4_defrag_block_within_rsv(inode, ex_start, len);
+ if (ret < 0) {
+ printk(KERN_ERR "ext4 defrag: "
+ "Reservation window is not set. "
+ "offset [%llu], length [%d]\n", ex_start, len);
+ goto err;
+ }
+ }
+ return ret;
+
+err:
+ down_write(&EXT4_I(inode)->i_data_sem);
+ ext4_discard_reservation(inode);
+ up_write(&EXT4_I(inode)->i_data_sem);
+ return ret;
+}
+
+/**
+ * ext4_defrag_move_victim - Create free space for defrag
+ *
+ * @target_filp target file
+ * @ext_info target extents array to move
+ *
+ * This function returns 0 if succeeded, otherwise
+ * returns error value.
+ */
+static int ext4_defrag_move_victim(struct file *target_filp,
+ struct ext4_extents_info *ext_info)
+{
+ struct inode *target_inode = target_filp->f_dentry->d_inode;
+ struct super_block *sb = target_inode->i_sb;
+ struct file victim_file;
+ struct dentry victim_dent;
+ struct inode *victim_inode;
+ ext4_fsblk_t goal = ext_info->goal;
+ int ret = 0;
+ int i = 0;
+ struct ext4_extent_data ext;
+ ext4_group_t group;
+ ext4_grpblk_t grp_off;
+
+ /* Setup dummy extent data */
+ ext.len = 0;
+
+ /* Get the inode of the victim file */
+ victim_inode = ext4_iget(sb, ext_info->ino);
+ if (IS_ERR(victim_inode))
+ return PTR_ERR(victim_inode);
+
+ /* Setup file for the victim file */
+ victim_dent.d_inode = victim_inode;
+ victim_file.f_dentry = &victim_dent;
+ victim_file.f_mapping = victim_inode->i_mapping;
+
+ /* Set the goal appropriate offset */
+ if (goal == -1) {
+ ext4_get_group_no_and_offset(victim_inode->i_sb,
+ ext_info->ext[0].start, &group, &grp_off);
+ goal = ext4_group_first_block_no(sb, group + 1);
+ }
+
+ for (i = 0; i < ext_info->entries; i++) {
+ /* Move original blocks to another block group */
+ ret = ext4_defrag(&victim_file, ext_info->ext[i].block,
+ ext_info->ext[i].len, goal, DEFRAG_FORCE_VICTIM, &ext);
+ if (ret < 0) {
+ printk(KERN_ERR "ext4 defrag: "
+ "Moving victim file failed. ino [%llu]\n",
+ ext_info->ino);
+ goto err;
+ }
+
+ /* Sync journal blocks before reservation */
+ ret = ext4_force_commit(sb);
+ if (ret) {
+ printk(KERN_ERR "ext4 defrag: "
+ "ext4_force_commit failed(%d)\n", ret);
+ goto err;
+ }
+ }
+
+ iput(victim_inode);
+ return 0;
+err:
+ down_write(&EXT4_I(target_inode)->i_data_sem);
+ ext4_discard_reservation(target_inode);
+ up_write(&EXT4_I(target_inode)->i_data_sem);
+ iput(victim_inode);
+ return ret;
+}
+
+/**
* ext4_defrag_fblocks_distribution - Search free blocks distribution
*
* @inode target file
@@ -379,6 +641,29 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
&ext_info, sizeof(ext_info)))
return -EFAULT;
}
+ } else if (cmd == EXT4_IOC_RESERVE_BLOCK) {
+ struct ext4_extents_info ext_info;
+
+ if (copy_from_user(&ext_info,
+ (struct ext4_extents_info __user *)arg,
+ sizeof(ext_info)))
+ return -EFAULT;
+
+ err = ext4_defrag_reserve_fblocks(inode, &ext_info);
+ } else if (cmd == EXT4_IOC_MOVE_VICTIM) {
+ struct ext4_extents_info ext_info;
+
+ if (copy_from_user(&ext_info,
+ (struct ext4_extents_info __user *)arg,
+ sizeof(ext_info)))
+ return -EFAULT;
+
+ err = ext4_defrag_move_victim(filp, &ext_info);
+
+ } else if (cmd == EXT4_IOC_BLOCK_RELEASE) {
+ down_write(&EXT4_I(inode)->i_data_sem);
+ ext4_discard_reservation(inode);
+ up_write(&EXT4_I(inode)->i_data_sem);
} else if (cmd == EXT4_IOC_DEFRAG) {
struct ext4_ext_defrag_data defrag;
@@ -387,7 +672,8 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
sizeof(defrag)))
return -EFAULT;
err = ext4_defrag(filp, defrag.start_offset,
- defrag.defrag_size, defrag.goal);
+ defrag.defrag_size, defrag.goal, defrag.flag,
+ &defrag.ext);
}
return err;
@@ -403,6 +689,7 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
* @start_ext first new extent to be merged
* @new_ext middle of new extent to be merged
* @end_ext last new extent to be merged
+ * @flag defrag mode (e.g. -f)
*
* This function returns 0 if succeed, otherwise returns error value.
*/
@@ -410,13 +697,20 @@ static int
ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *inode,
struct ext4_extent *o_start,
struct ext4_extent *o_end, struct ext4_extent *start_ext,
- struct ext4_extent *new_ext, struct ext4_extent *end_ext)
+ struct ext4_extent *new_ext, struct ext4_extent *end_ext,
+ int flag)
{
struct ext4_ext_path *org_path = NULL;
ext4_lblk_t eblock = 0;
int err = 0;
int new_flag = 0;
int end_flag = 0;
+ int defrag_flag;
+
+ if (flag == DEFRAG_FORCE_VICTIM)
+ defrag_flag = 1;
+ else
+ defrag_flag = 0;
if (le16_to_cpu(start_ext->ee_len) &&
le16_to_cpu(new_ext->ee_len) &&
@@ -494,7 +788,8 @@ ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *inode,
org_path = NULL;
goto out;
}
- err = ext4_ext_insert_extent(handle, inode, org_path, new_ext);
+ err = ext4_ext_insert_extent_defrag(handle, inode,
+ org_path, new_ext, defrag_flag);
if (err)
goto out;
}
@@ -507,7 +802,8 @@ ext4_defrag_merge_across_blocks(handle_t *handle, struct inode *inode,
org_path = NULL;
goto out;
}
- err = ext4_ext_insert_extent(handle, inode, org_path, end_ext);
+ err = ext4_ext_insert_extent_defrag(handle, inode,
+ org_path, end_ext, defrag_flag);
if (err)
goto out;
}
@@ -588,6 +884,7 @@ ext4_defrag_merge_inside_block(handle_t *handle, struct inode *inode,
* @new_ext middle of new extent to be merged
* @end_ext last new extent to be merged
* @replaced the number of blocks which will be replaced with new_ext
+ * @flag defrag mode (e.g. -f)
*
* This function returns 0 if succeed, otherwise returns error value.
*/
@@ -596,7 +893,7 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *inode,
struct ext4_ext_path *org_path,
struct ext4_extent *o_start, struct ext4_extent *o_end,
struct ext4_extent *start_ext, struct ext4_extent *new_ext,
- struct ext4_extent *end_ext, ext4_fsblk_t replaced)
+ struct ext4_extent *end_ext, ext4_fsblk_t replaced, int flag)
{
struct ext4_extent_header *eh;
unsigned need_slots, slots_range;
@@ -634,7 +931,7 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *inode,
ret = ext4_defrag_merge_across_blocks(handle, inode, o_start,
o_end, start_ext, new_ext,
- end_ext);
+ end_ext, flag);
if (ret < 0)
return ret;
} else {
@@ -667,13 +964,14 @@ ext4_defrag_merge_extents(handle_t *handle, struct inode *inode,
* @org_path path indicates first extent to be defraged
* @dext destination extent
* @from start offset on the target file
+ * @flag defrag mode (e.g. -f)
*
* This function returns 0 if succeed, otherwise returns error value.
*/
static int
ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
struct ext4_ext_path *org_path, struct ext4_extent *dext,
- ext4_lblk_t *from)
+ ext4_lblk_t *from, int flag)
{
unsigned long depth;
ext4_fsblk_t replaced = 0;
@@ -774,7 +1072,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
+ le16_to_cpu(oext->ee_len) - 1) {
ret = ext4_defrag_merge_extents(handle, org_inode,
org_path, o_start, o_end, &start_ext,
- &new_ext, &end_ext, replaced);
+ &new_ext, &end_ext, replaced, flag);
if (ret < 0)
return ret;
@@ -835,6 +1133,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
* @from_page page offset of org_inode
* @dest_from_page page offset of dest_inode
* @count_page page count to be replaced
+ * @flag defrag mode (e.g. -f)
*
* This function returns 0 if succeed, otherwise returns error value.
* Replace extents for blocks from "from" to "from + count - 1".
@@ -842,7 +1141,7 @@ ext4_defrag_leaf_block(handle_t *handle, struct inode *org_inode,
static int
ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,
struct inode *dest_inode, pgoff_t from_page,
- pgoff_t dest_from_page, pgoff_t count_page)
+ pgoff_t dest_from_page, pgoff_t count_page, int flag)
{
struct ext4_ext_path *org_path = NULL;
struct ext4_ext_path *dest_path = NULL;
@@ -910,7 +1209,7 @@ ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,
/* Loop for the original extent blocks */
err = ext4_defrag_leaf_block(handle, org_inode,
- org_path, dext, &from);
+ org_path, dext, &from, flag);
if (err < 0)
goto out;
@@ -920,7 +1219,7 @@ ext4_defrag_replace_branches(handle_t *handle, struct inode *org_inode,
* e.g. ext4_defrag_merge_extents()
*/
err = ext4_defrag_leaf_block(handle, dest_inode,
- dest_path, swap_ext, &dest_off);
+ dest_path, swap_ext, &dest_off, -1);
if (err < 0)
goto out;
@@ -1013,13 +1312,14 @@ out:
* @iblock file related offset
* @total_blocks contiguous blocks count
* @goal block offset for allocation
+ * @phase phase of the force defrag mode
*
* If succeed, fuction returns count of extent we got,
* otherwise returns err.
*/
static int ext4_defrag_alloc_blocks(struct inode *dest_inode,
struct inode *org_inode, ext4_lblk_t iblock,
- ext4_fsblk_t total_blocks, ext4_fsblk_t goal)
+ ext4_fsblk_t total_blocks, ext4_fsblk_t goal, int phase)
{
handle_t *handle = NULL;
struct ext4_ext_path *dest_path = NULL;
@@ -1032,8 +1332,9 @@ static int ext4_defrag_alloc_blocks(struct inode *dest_inode,
ext4_fsblk_t rest = total_blocks;
ext4_fsblk_t alloc_total = 0;
unsigned long org_len;
- ext4_group_t dest_grp_no;
- ext4_grpblk_t dest_blk_off;
+ ext4_group_t dest_grp_no, org_grp_no, goal_grp_no;
+ ext4_grpblk_t dest_blk_off, org_blk_off, goal_blk_off;
+ int org_depth = ext_depth(org_inode);
int metadata = 1;
int count = 0;
int credits = 0;
@@ -1044,6 +1345,22 @@ static int ext4_defrag_alloc_blocks(struct inode *dest_inode,
ar.len = total_blocks;
org_len = ar.len;
+ /* Calculate group nubmer of org_inode block */
+ if (phase == DEFRAG_FORCE_VICTIM) {
+ org_path = ext4_ext_find_extent(org_inode, iblock, org_path);
+ if (IS_ERR(org_path)) {
+ err = PTR_ERR(org_path);
+ org_path = NULL;
+ goto out2;
+ }
+ ext4_get_group_no_and_offset(org_inode->i_sb,
+ ext_pblock(org_path[org_depth].p_ext),
+ &org_grp_no, &org_blk_off);
+ ar.excepted_group = org_grp_no;
+ } else {
+ ar.excepted_group = -1;
+ }
+
/* Find first extent */
dest_path = ext4_ext_find_extent(dest_inode, iblock, dest_path);
if (IS_ERR(dest_path)) {
@@ -1087,6 +1404,13 @@ static int ext4_defrag_alloc_blocks(struct inode *dest_inode,
if (err) {
/* Failed to get the contiguous blocks */
goto out;
+ } else if ((ar.len != org_len) &&
+ (phase == DEFRAG_FORCE_TRY)) {
+ ext4_free_blocks(handle, org_inode, newblock,
+ ar.len, metadata);
+ /* -ENOSPC triggers DEFRAG_FORCE_VICTIM phase. */
+ err = -ENOSPC;
+ goto out;
} else {
/*
* Dirty buffer_head causes the overwriting
@@ -1104,13 +1428,51 @@ static int ext4_defrag_alloc_blocks(struct inode *dest_inode,
alloc_total += ar.len;
ext4_get_group_no_and_offset(dest_inode->i_sb,
+ goal, &goal_grp_no, &goal_blk_off);
+ ext4_get_group_no_and_offset(dest_inode->i_sb,
newblock, &dest_grp_no, &dest_blk_off);
+ /* Only the force defrag mode */
+ switch (phase) {
+ case DEFRAG_FORCE_VICTIM:
+ /*
+ * We can't allocate new blocks in the same
+ * block group.
+ */
+ if (dest_grp_no == org_grp_no) {
+ printk(KERN_ERR "ext4 defrag: "
+ "Failed to allocate victim file"
+ " to other block group\n");
+ ext4_free_blocks(handle, org_inode,
+ newblock, ar.len, metadata);
+ err = -ENOSPC;
+ goto out;
+ }
+ break;
+ case DEFRAG_FORCE_GATHER:
+ /*
+ * Maybe reserved blocks are already used by
+ * other process.
+ */
+ if (dest_grp_no != goal_grp_no
+ || alloc_total != total_blocks) {
+ printk(KERN_ERR "ext4 defrag: "
+ "Reserved blocks are already "
+ "used by other process\n");
+ ext4_free_blocks(handle, org_inode,
+ newblock, ar.len, metadata);
+ err = -EIO;
+ goto out;
+ }
+ break;
+ }
+
newex.ee_block = cpu_to_le32(alloc_total - ar.len);
ext4_ext_store_pblock(&newex, newblock);
newex.ee_len = cpu_to_le16(ar.len);
- ar.goal = newblock + ar.len;
+ if (!phase)
+ ar.goal = newblock + ar.len;
rest = rest - ar.len;
ar.len = rest;
@@ -1158,12 +1520,13 @@ out2:
* @filp: pointer to file
* @org_offset: page index on original file
* @dest_offset: page index on temporary file
+ * @flag: defrag mode (e.g. -f)
*
* This function returns 0 if succeeded, otherwise returns error value.
*/
static int
ext4_defrag_partial(struct inode *tmp_inode, struct file *filp,
- pgoff_t org_offset, pgoff_t dest_offset)
+ pgoff_t org_offset, pgoff_t dest_offset, int flag)
{
struct inode *inode = filp->f_dentry->d_inode;
struct address_space *mapping = inode->i_mapping;
@@ -1233,7 +1596,7 @@ ext4_defrag_partial(struct inode *tmp_inode, struct file *filp,
/* Release old bh and drop refs */
try_to_release_page(page, 0);
ret = ext4_defrag_replace_branches(handle, inode, tmp_inode,
- org_offset, dest_offset, 1);
+ org_offset, dest_offset, 1, flag);
if (ret < 0)
goto out;
@@ -1282,6 +1645,7 @@ out:
* @tar_blocks: the number of blocks to allocate
* @iblock: file related offset
* @goal: block offset for allocaton
+ * @flag: phase of the force defrag mode
*
* This function returns the value as below:
* 0(succeeded)
@@ -1292,7 +1656,7 @@ static int
ext4_defrag_new_extent_tree(struct inode *inode, struct inode *tmp_inode,
struct ext4_ext_path *path, ext4_lblk_t tar_start,
ext4_lblk_t tar_blocks, ext4_lblk_t iblock,
- ext4_fsblk_t goal)
+ ext4_fsblk_t goal, int flag)
{
struct ext4_extent *ext = NULL;
struct ext4_extent_header *eh = NULL;
@@ -1306,7 +1670,7 @@ ext4_defrag_new_extent_tree(struct inode *inode, struct inode *tmp_inode,
/* Allocate contiguous blocks */
sum_tmp = ext4_defrag_alloc_blocks(tmp_inode, inode, iblock,
- tar_blocks, goal);
+ tar_blocks, goal, flag);
if (sum_tmp < 0) {
ret = sum_tmp;
goto out;
@@ -1328,7 +1692,8 @@ ext4_defrag_new_extent_tree(struct inode *inode, struct inode *tmp_inode,
ret = ext4_ext_remove_space(tmp_inode, 0);
if (!ret)
ret = 1;
- } else if (sum_org < sum_tmp) {
+ } else if (sum_org < sum_tmp &&
+ flag != DEFRAG_FORCE_VICTIM) {
/* Fragment increased */
ret = ext4_ext_remove_space(tmp_inode, 0);
if (!ret)
@@ -1355,13 +1720,16 @@ out:
* @block_start: starting offset to defrag in blocks
* @defrag_size: size of defrag in blocks
* @goal: block offset for allocation
+ * @flag: phase of the force defrag mode
+ * @ext: extent to be moved (only -f)
*
* This function returns the number of blocks if succeeded, otherwise
* returns error value.
*/
int
ext4_defrag(struct file *filp, ext4_lblk_t block_start,
- ext4_lblk_t defrag_size, ext4_fsblk_t goal)
+ ext4_lblk_t defrag_size, ext4_fsblk_t goal,
+ int flag, struct ext4_extent_data *ext)
{
struct inode *inode = filp->f_dentry->d_inode, *tmp_inode = NULL;
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
@@ -1397,6 +1765,17 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start,
return -EINVAL;
}
+ if (ext->len) {
+ /* Setup for the force defrag mode */
+ if (ext->len < defrag_size) {
+ printk(KERN_ERR "ext4 defrag: "
+ "Invalid length of extent\n");
+ return -EINVAL;
+ }
+ flag = DEFRAG_FORCE_GATHER;
+ goal = ext->start;
+ }
+
if (file_end < block_end)
defrag_size -= block_end - file_end;
@@ -1520,11 +1899,11 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start,
}
ret = ext4_defrag_new_extent_tree(inode, tmp_inode, path,
- seq_start, seq_blocks, block_start, goal);
+ seq_start, seq_blocks, block_start, goal, flag);
if (ret < 0) {
break;
- } else if (ret == 1) {
+ } else if ((ret == 1) && (!goal || (goal && !flag))) {
ret = 0;
seq_start = le32_to_cpu(ext_cur->ee_block);
goto CLEANUP;
@@ -1549,7 +1928,7 @@ ext4_defrag(struct file *filp, ext4_lblk_t block_start,
while (page_offset <= seq_end_page) {
/* Swap original branches with new branches */
ret = ext4_defrag_partial(tmp_inode, filp,
- page_offset, dest_offset);
+ page_offset, dest_offset, flag);
if (ret < 0)
goto out;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ad553e1..6795fe3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -94,6 +94,11 @@ struct ext4_allocation_request {
unsigned long len;
/* flags. see above EXT4_MB_HINT_* */
unsigned long flags;
+ /*
+ * for ext4 online defrag:
+ * the block group which is excepted from allocation target
+ */
+ long long excepted_group;
};
/*
@@ -303,6 +308,9 @@ struct ext4_new_group_data {
#define EXT4_IOC_GROUP_INFO _IOW('f', 11, struct ext4_group_data_info)
#define EXT4_IOC_FREE_BLOCKS_INFO _IOW('f', 12, struct ext4_extents_info)
#define EXT4_IOC_EXTENTS_INFO _IOW('f', 13, struct ext4_extents_info)
+#define EXT4_IOC_RESERVE_BLOCK _IOW('f', 14, struct ext4_extents_info)
+#define EXT4_IOC_MOVE_VICTIM _IOW('f', 15, struct ext4_extents_info)
+#define EXT4_IOC_BLOCK_RELEASE _IO('f', 8)
/*
* ioctl commands in 32 bit emulation
@@ -331,8 +339,15 @@ struct ext4_new_group_data {
*
* DEFRAG_MAX_ENT: the maximum number of extents for exchanging between
* kernel-space and user-space per an ioctl
+ * DEFRAG_FORCE_TRY: check whether we have free space fragmentation or not
+ * DEFRAG_FORCE_VICTIM: move victim extents to make sufficient space
+ * DEFRAG_FORCE_GATHER: move the target file into the free space made in the
+ * DEFRAG_FORCE_VICTIM phase
*/
#define DEFRAG_MAX_ENT 32
+#define DEFRAG_FORCE_TRY 1
+#define DEFRAG_FORCE_VICTIM 2
+#define DEFRAG_FORCE_GATHER 3
struct ext4_extent_data {
ext4_lblk_t block; /* start logical block number */
@@ -344,6 +359,8 @@ struct ext4_ext_defrag_data {
ext4_lblk_t start_offset; /* start offset to defrag in blocks */
ext4_lblk_t defrag_size; /* size of defrag in blocks */
ext4_fsblk_t goal; /* block offset for allocation */
+ int flag; /* free space mode flag */
+ struct ext4_extent_data ext;
};
struct ext4_group_data_info {
@@ -1037,8 +1054,17 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
extern void ext4_init_block_alloc_info(struct inode *);
extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
+extern void try_to_extend_reservation(struct ext4_reserve_window_node *,
+ struct super_block *, int);
+extern int alloc_new_reservation(struct ext4_reserve_window_node *,
+ ext4_grpblk_t, struct super_block *,
+ ext4_group_t, struct buffer_head *);
extern ext4_grpblk_t bitmap_search_next_usable_block(ext4_grpblk_t,
struct buffer_head *, ext4_grpblk_t);
+extern int rsv_is_empty(struct ext4_reserve_window *rsv);
+extern int goal_in_my_reservation(struct ext4_reserve_window *rsv,
+ ext4_grpblk_t grp_goal, ext4_group_t group,
+ struct super_block *sb);
/* dir.c */
extern int ext4_check_dir_entry(const char *, struct inode *,
@@ -1164,7 +1190,8 @@ extern void ext4_inode_table_set(struct super_block *sb,
extern handle_t *ext4_ext_journal_restart(handle_t *handle, int needed);
/* defrag.c */
extern int ext4_defrag(struct file *filp, ext4_lblk_t block_start,
- ext4_lblk_t defrag_size, ext4_fsblk_t goal);
+ ext4_lblk_t defrag_size, ext4_fsblk_t goal,
+ int flag, struct ext4_extent_data *ext);
extern int ext4_defrag_ioctl(struct inode *, struct file *, unsigned int,
unsigned long);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 734c1c7..d9a6a73 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -233,5 +233,10 @@ extern void ext4_ext_drop_refs(struct ext4_ext_path *path);
extern ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
struct ext4_ext_path *path,
ext4_lblk_t block);
+extern int ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *newext, int defrag);
+extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
+
#endif /* _EXT4_EXTENTS */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e60e51b..a455c08 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -183,11 +183,17 @@ ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
static ext4_fsblk_t
ext4_ext_new_block(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
- struct ext4_extent *ex, int *err)
+ struct ext4_extent *ex, int *err,
+ ext4_fsblk_t defrag_goal)
{
ext4_fsblk_t goal, newblock;
- goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
+ if (defrag_goal) {
+ goal = defrag_goal;
+ } else {
+ goal = ext4_ext_find_goal(inode, path,
+ le32_to_cpu(ex->ee_block));
+ }
newblock = ext4_new_block(handle, inode, goal, err);
return newblock;
}
@@ -638,7 +644,8 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
*/
static int ext4_ext_split(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
- struct ext4_extent *newext, int at)
+ struct ext4_extent *newext, int at,
+ ext4_fsblk_t defrag_goal)
{
struct buffer_head *bh = NULL;
int depth = ext_depth(inode);
@@ -688,7 +695,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
/* allocate all needed blocks */
ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
for (a = 0; a < depth - at; a++) {
- newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+ newblock = ext4_ext_new_block(handle, inode, path,
+ newext, &err, defrag_goal);
if (newblock == 0)
goto cleanup;
ablocks[a] = newblock;
@@ -875,7 +883,8 @@ cleanup:
*/
static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
- struct ext4_extent *newext)
+ struct ext4_extent *newext,
+ ext4_fsblk_t defrag_goal)
{
struct ext4_ext_path *curp = path;
struct ext4_extent_header *neh;
@@ -884,7 +893,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
ext4_fsblk_t newblock;
int err = 0;
- newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+ newblock = ext4_ext_new_block(handle, inode, path,
+ newext, &err, defrag_goal);
if (newblock == 0)
return err;
@@ -960,7 +970,8 @@ out:
*/
static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
- struct ext4_extent *newext)
+ struct ext4_extent *newext,
+ ext4_fsblk_t defrag_goal)
{
struct ext4_ext_path *curp;
int depth, i, err = 0;
@@ -980,7 +991,8 @@ repeat:
if (EXT_HAS_FREE_INDEX(curp)) {
/* if we found index with free entry, then use that
* entry: create all needed subtree and add new leaf */
- err = ext4_ext_split(handle, inode, path, newext, i);
+ err = ext4_ext_split(handle, inode, path,
+ newext, i, defrag_goal);
/* refill path */
ext4_ext_drop_refs(path);
@@ -991,7 +1003,8 @@ repeat:
err = PTR_ERR(path);
} else {
/* tree is full, time to grow in depth */
- err = ext4_ext_grow_indepth(handle, inode, path, newext);
+ err = ext4_ext_grow_indepth(handle, inode, path,
+ newext, defrag_goal);
if (err)
goto out;
@@ -1171,7 +1184,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
* allocated block. Thus, index entries have to be consistent
* with leaves.
*/
-static ext4_lblk_t
+ext4_lblk_t
ext4_ext_next_allocated_block(struct ext4_ext_path *path)
{
int depth;
@@ -1437,6 +1450,19 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *newext)
{
+ return ext4_ext_insert_extent_defrag(handle, inode, path, newext, 0);
+}
+
+/*
+ * ext4_ext_insert_extent_defrag:
+ * The difference from ext4_ext_insert_extent is to use the first block
+ * in newext as the goal of the new index block.
+ */
+int
+ext4_ext_insert_extent_defrag(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *newext, int defrag)
+{
struct ext4_extent_header * eh;
struct ext4_extent *ex, *fex;
struct ext4_extent *nearex; /* nearest extent */
@@ -1444,6 +1470,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
int depth, len, err;
ext4_lblk_t next;
unsigned uninitialized = 0;
+ ext4_fsblk_t defrag_goal;
BUG_ON(ext4_ext_get_actual_len(newext) == 0);
depth = ext_depth(inode);
@@ -1504,11 +1531,16 @@ repeat:
le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
}
+ if (defrag)
+ defrag_goal = ext_pblock(newext);
+ else
+ defrag_goal = 0;
/*
* There is no free space in the found leaf.
* We're gonna add a new leaf in the tree.
*/
- err = ext4_ext_create_new_leaf(handle, inode, path, newext);
+ err = ext4_ext_create_new_leaf(handle, inode, path,
+ newext, defrag_goal);
if (err)
goto cleanup;
depth = ext_depth(inode);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index f216caa..6051901 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -235,7 +235,10 @@ flags_err:
case EXT4_IOC_DEFRAG:
case EXT4_IOC_GROUP_INFO:
case EXT4_IOC_FREE_BLOCKS_INFO:
- case EXT4_IOC_EXTENTS_INFO: {
+ case EXT4_IOC_EXTENTS_INFO:
+ case EXT4_IOC_RESERVE_BLOCK:
+ case EXT4_IOC_MOVE_VICTIM:
+ case EXT4_IOC_BLOCK_RELEASE: {
return ext4_defrag_ioctl(inode, filp, cmd, arg);
}
case EXT4_IOC_GROUP_ADD: {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 519e87b..1589dbc 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1750,6 +1750,10 @@ repeat:
if (group == EXT4_SB(sb)->s_groups_count)
group = 0;
+ if (ac->ac_excepted_group != -1 &&
+ group == ac->ac_excepted_group)
+ continue;
+
/* quick check to skip empty groups */
grp = ext4_get_group_info(ac->ac_sb, group);
if (grp->bb_free == 0)
@@ -3939,6 +3943,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
ac->ac_bitmap_page = NULL;
ac->ac_buddy_page = NULL;
ac->ac_lg = NULL;
+ ac->ac_excepted_group = ar->excepted_group;
/* we have to define context: we'll we work with a file or
* locality group. this is a policy, actually */
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index bfe6add..1141ad5 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -205,6 +205,7 @@ struct ext4_allocation_context {
struct page *ac_buddy_page;
struct ext4_prealloc_space *ac_pa;
struct ext4_locality_group *ac_lg;
+ long long ac_excepted_group;
};
#define AC_STATUS_CONTINUE 1