2007-12-27 11:11:41

by Akira Fujita

[permalink] [raw]
Subject: [RFC][PATCH 2/5] Allocate new contiguous blocks with mballoc

Search contiguous free blocks with mutil-block allocation
and allocate them for the temporary inode.

*This patch is applied on the top of
ext4 git tree(linux-2.6.24-rc5).
http://repo.or.cz/r/ext4-patch-queue.git

Signed-off-by: Takashi Sato <[email protected]>
Signed-off-by: Akira Fujita <[email protected]>
---
diff -X linux-2.6.24-rc5-defrag/Documentation/dontdiff -upNr linux-2.6.24-rc5-alloc-block/fs/ext4/defrag.c linux-2.6.24-rc5-move-data/fs/ext4/defrag.c
--- linux-2.6.24-rc5-alloc-block/fs/ext4/defrag.c 1970-01-01 09:00:00.000000000 +0900
+++ linux-2.6.24-rc5-move-data/fs/ext4/defrag.c 2007-12-25 20:47:03.000000000 +0900
@@ -0,0 +1,767 @@
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/ext4_jbd2.h>
+#include <linux/jbd2.h>
+#include <linux/highuid.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/falloc.h>
+#include <linux/ext4_fs_extents.h>
+#include <asm/uaccess.h>
+#include "group.h"
+
+/*
+ * this structure is used to gather extents from the tree via ioctl
+ */
+struct ext4_extent_buf {
+ ext4_fsblk_t start;
+ int buflen;
+ void *buffer;
+ void *cur;
+ int err;
+};
+
+/*
+ * this structure is used to collect stats info about the tree
+ */
+struct ext4_extent_tree_stats {
+ int depth;
+ int extents_num;
+ int leaf_num;
+};
+
+static int
+ext4_ext_store_extent_cb(struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_ext_cache *newex,
+ struct ext4_extent_buf *buf)
+{
+
+ if (newex->ec_type != EXT4_EXT_CACHE_EXTENT)
+ return EXT_CONTINUE;
+
+ if (buf->err < 0)
+ return EXT_BREAK;
+ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen)
+ return EXT_BREAK;
+
+ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) {
+ buf->err++;
+ buf->cur += sizeof(*newex);
+ } else {
+ buf->err = -EFAULT;
+ return EXT_BREAK;
+ }
+ return EXT_CONTINUE;
+}
+
+static int
+ext4_ext_collect_stats_cb(struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_ext_cache *ex,
+ struct ext4_extent_tree_stats *buf)
+{
+ int depth;
+
+ if (ex->ec_type != EXT4_EXT_CACHE_EXTENT)
+ return EXT_CONTINUE;
+
+ depth = ext_depth(inode);
+ buf->extents_num++;
+ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr))
+ buf->leaf_num++;
+ return EXT_CONTINUE;
+}
+
+/**
+ * ext4_ext_next_extent - search for next extent and set it to "extent"
+ * @inode: inode of the the original file
+ * @path: this will obtain data for next extent
+ * @extent: pointer to next extent we have just gotten
+ *
+ * This function returns 0 or 1(last_entry) if succeeded, otherwise
+ * returns -EIO
+ */
+static int
+ext4_ext_next_extent(struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent **extent)
+{
+ int ppos;
+ int leaf_ppos = path->p_depth;
+
+ ppos = leaf_ppos;
+ if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
+ /* leaf block */
+ *extent = ++path[ppos].p_ext;
+ return 0;
+ }
+
+ while (--ppos >= 0) {
+ if (EXT_LAST_INDEX(path[ppos].p_hdr) >
+ path[ppos].p_idx) {
+ int cur_ppos = ppos;
+
+ /* index block */
+ path[ppos].p_idx++;
+ path[ppos].p_block =
+ idx_pblock(path[ppos].p_idx);
+ if (path[ppos+1].p_bh)
+ brelse(path[ppos+1].p_bh);
+ path[ppos+1].p_bh =
+ sb_bread(inode->i_sb, path[ppos].p_block);
+ if (!path[ppos+1].p_bh)
+ return -EIO;
+ path[ppos+1].p_hdr =
+ ext_block_hdr(path[ppos+1].p_bh);
+
+ /* halfway index block */
+ while (++cur_ppos < leaf_ppos) {
+ path[cur_ppos].p_idx =
+ EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
+ path[cur_ppos].p_block =
+ idx_pblock(path[cur_ppos].p_idx);
+ if (path[cur_ppos+1].p_bh)
+ brelse(path[cur_ppos+1].p_bh);
+ path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
+ path[cur_ppos].p_block);
+ if (!path[cur_ppos+1].p_bh)
+ return -EIO;
+ path[cur_ppos+1].p_hdr =
+ ext_block_hdr(path[cur_ppos+1].p_bh);
+ }
+
+ /* leaf block */
+ path[leaf_ppos].p_ext = *extent =
+ EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
+ return 0;
+ }
+ }
+ /* last_extent */
+ return 1;
+}
+
+int ext4_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
+ unsigned long arg)
+{
+ int err = 0;
+ if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL ||
+ cmd == EXT4_IOC_FIBMAP))
+ return -EINVAL;
+
+ if (cmd == EXT4_IOC_GET_EXTENTS) {
+ struct ext4_extent_buf buf;
+
+ if (copy_from_user(&buf, (void *) arg, sizeof(buf)))
+ return -EFAULT;
+
+ buf.cur = buf.buffer;
+ buf.err = 0;
+ mutex_lock(&EXT4_I(inode)->truncate_mutex);
+ err = ext4_ext_walk_space(inode, buf.start, EXT_MAX_BLOCK,
+ (void *)ext4_ext_store_extent_cb, &buf);
+ mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+ if (err == 0)
+ err = buf.err;
+ } else if (cmd == EXT4_IOC_GET_TREE_STATS) {
+ struct ext4_extent_tree_stats buf;
+
+ mutex_lock(&EXT4_I(inode)->truncate_mutex);
+ buf.depth = ext_depth(inode);
+ buf.extents_num = 0;
+ buf.leaf_num = 0;
+ err = ext4_ext_walk_space(inode, 0, EXT_MAX_BLOCK,
+ (void *)ext4_ext_collect_stats_cb, &buf);
+ mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+ if (!err)
+ err = copy_to_user((void *) arg, &buf, sizeof(buf));
+ } else if (cmd == EXT4_IOC_GET_TREE_DEPTH) {
+ mutex_lock(&EXT4_I(inode)->truncate_mutex);
+ err = ext_depth(inode);
+ mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+ } else if (cmd == EXT4_IOC_FIBMAP) {
+ ext4_fsblk_t __user *p = (ext4_fsblk_t __user *)arg;
+ ext4_fsblk_t block = 0;
+ struct address_space *mapping = filp->f_mapping;
+
+ if (copy_from_user(&block, (ext4_fsblk_t __user *)arg,
+ sizeof(block)))
+ return -EFAULT;
+
+ lock_kernel();
+ block = ext4_bmap(mapping, block);
+ unlock_kernel();
+
+ return put_user(block, p);
+ } else if (cmd == EXT4_IOC_DEFRAG) {
+ struct ext4_ext_defrag_data defrag;
+
+ if (copy_from_user(&defrag,
+ (struct ext4_ext_defrag_data __user *)arg,
+ sizeof(defrag)))
+ return -EFAULT;
+ err = ext4_ext_defrag(filp, defrag.start_offset,
+ defrag.defrag_size, defrag.goal, defrag.flag,
+ &defrag.ext);
+ }
+
+ return err;
+}
+
+/**
+ * ext4_ext_alloc_blocks - allocate contiguous blocks to temporary inode
+ * @dest_inode temporary inode for multiple block allocation
+ * @org_inode original inode
+ * @iblock file related offset
+ * @total_blocks contiguous blocks count
+ * @goal block offset for allocation
+ * @phase phase of create free space mode
+ *
+ * If succeed, fuction returns count of extent we got,
+ * otherwise returns err.
+ */
+static int ext4_ext_alloc_blocks(struct inode *dest_inode,
+ struct inode *org_inode, ext4_fsblk_t iblock,
+ ext4_fsblk_t total_blocks, ext4_fsblk_t goal, int phase)
+{
+ handle_t *handle = NULL;
+ struct ext4_ext_path *dest_path = NULL;
+ struct ext4_ext_path *org_path = NULL;
+ struct ext4_extent newex;
+ struct ext4_allocation_request ar;
+ struct buffer_head *bh = NULL;
+ struct super_block *org_sb = org_inode->i_sb;
+ ext4_fsblk_t newblock = 0;
+ ext4_fsblk_t rest = total_blocks;
+ ext4_fsblk_t alloc_total = 0;
+ unsigned long dest_grp_no, org_grp_no, org_len, goal_grp_no;
+ ext4_grpblk_t dest_blk_off, org_blk_off, goal_blk_off;
+ int org_depth = ext_depth(org_inode);
+ int metadata = 1;
+ int count = 0;
+ int credits = 0;
+ int err = 0;
+ int err2 = 0;
+ int len_cnt = 0;
+
+ ar.len = total_blocks;
+ org_len = ar.len;
+
+ /* Calculate group nubmer of org_inode block */
+ if (phase == DEFRAG_FORCE_VICTIM) {
+ org_path = ext4_ext_find_extent(org_inode, iblock, org_path);
+ if (IS_ERR(org_path)) {
+ err = PTR_ERR(org_path);
+ org_path = NULL;
+ goto out2;
+ }
+ ext4_get_group_no_and_offset(org_inode->i_sb,
+ ext_pblock(org_path[org_depth].p_ext),
+ &org_grp_no, &org_blk_off);
+ ar.excepted_group = org_grp_no;
+ } else {
+ ar.excepted_group = -1;
+ }
+
+ /* Find first extent. */
+ dest_path = ext4_ext_find_extent(dest_inode, iblock, dest_path);
+ if (IS_ERR(dest_path)) {
+ err = PTR_ERR(dest_path);
+ dest_path = NULL;
+ goto out2;
+ }
+
+ ar.inode = dest_inode;
+ ar.flags = EXT4_MB_HINT_DATA | EXT4_MB_HINT_RESERVED
+ | EXT4_MB_HINT_NOPREALLOC;
+ if (goal) {
+ ar.goal = goal;
+ } else {
+ ar.goal = ext4_ext_find_goal(dest_inode, dest_path, iblock);
+ }
+
+ ar.logical = iblock;
+ ar.lleft = 0;
+ ar.pleft = 0;
+ ar.lright = 0;
+ ar.pright = 0;
+
+ handle = ext4_journal_start(dest_inode, credits);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out2;
+ }
+
+ while (alloc_total != total_blocks) {
+ credits = ext4_ext_calc_credits_for_insert(dest_inode,
+ dest_path);
+ handle = ext4_ext_journal_restart(handle,
+ credits + EXT4_TRANS_META_BLOCKS);
+
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ newblock = ext4_mb_new_blocks(handle, &ar, &err);
+
+ if (err) {
+ /* Failed to get the contiguous blocks */
+ goto out;
+ } else if ((ar.len != org_len) &&
+ (phase == DEFRAG_FORCE_TRY)) {
+ ext4_free_blocks(handle, org_inode, newblock,
+ ar.len, metadata);
+ /* go to force mode */
+ err = -ENOSPC;
+ goto out;
+ } else {
+ /*
+ * If ext4_mb_new_blocks() allcates
+ * the block which used to be the metadata block,
+ * its dirty buffer_head causes the overwriting
+ * with old metadata.
+ * We should call unmap_underlying_metadata()
+ * to clear the dirty flag.
+ */
+ for (len_cnt = 0; len_cnt < ar.len; len_cnt++) {
+ bh = sb_find_get_block(org_sb,
+ newblock + len_cnt);
+ unmap_underlying_metadata(org_sb->s_bdev,
+ newblock + len_cnt);
+ }
+
+ alloc_total += ar.len;
+ ext4_get_group_no_and_offset(dest_inode->i_sb,
+ goal, &goal_grp_no, &goal_blk_off);
+ ext4_get_group_no_and_offset(dest_inode->i_sb,
+ newblock, &dest_grp_no, &dest_blk_off);
+ /* We can't allocate at the same block group */
+ switch (phase) {
+ case DEFRAG_FORCE_VICTIM:
+ if (dest_grp_no == org_grp_no) {
+ printk(KERN_ERR "defrag: Can't allocate"
+ " in same block group\n");
+ ext4_free_blocks(handle, org_inode,
+ newblock, ar.len, metadata);
+ err = -ENOSPC;
+ goto out;
+ }
+ break;
+ case DEFRAG_FORCE_GATHER:
+ /* Maybe reserved blocks are already used by
+ other process */
+ if (dest_grp_no != goal_grp_no
+ || alloc_total != total_blocks) {
+ printk(KERN_ERR "defrag: Already used"
+ " the specified blocks\n");
+ ext4_free_blocks(handle, org_inode,
+ newblock, ar.len, metadata);
+ err = -EIO;
+ goto out;
+ }
+ break;
+ }
+
+ newex.ee_block = cpu_to_le32(alloc_total - ar.len);
+ ext4_ext_store_pblock(&newex, newblock);
+ newex.ee_len = cpu_to_le16(ar.len);
+
+ if (!phase)
+ ar.goal = newblock + ar.len;
+ rest = rest - ar.len;
+ ar.len = rest;
+
+ err = ext4_ext_insert_extent(handle, dest_inode,
+ dest_path, &newex);
+ if (!err) {
+ count++;
+ } else {
+ ext4_free_blocks(handle, org_inode,
+ newblock, ar.len, metadata);
+ goto out;
+ }
+ }
+ }
+
+out:
+ /* Faild case: We have to remove halfway blocks */
+ if (err)
+ err2 = ext4_ext_remove_space(dest_inode, 0);
+
+ /* Successful case */
+ if (dest_path) {
+ ext4_ext_drop_refs(dest_path);
+ kfree(dest_path);
+ }
+ if (org_path) {
+ ext4_ext_drop_refs(org_path);
+ kfree(org_path);
+ }
+out2:
+ ext4_journal_stop(handle);
+
+ if (err2) {
+ return err2;
+ } else if (err) {
+ return err;
+ }
+ /* return extents count */
+ return count;
+}
+
+/**
+ * ext4_ext_new_extent_tree - allocate contiguous blocks
+ * @inode: inode of the original file
+ * @tmp_inode: inode of the temporary file
+ * @path: the structure holding some info about
+ * original extent tree
+ * @tar_start: starting offset to allocate in blocks
+ * @tar_blocks: the number of blocks to allocate
+ * @iblock: file related offset
+ * @goal: block offset for allocaton
+ * @flag: phase of create free space mode
+ *
+ * This function returns the value as below:
+ * 0(succeeded)
+ * 1(not improved)
+ * negative value(error)
+ */
+static int
+ext4_ext_new_extent_tree(struct inode *inode, struct inode *tmp_inode,
+ struct ext4_ext_path *path, ext4_fsblk_t tar_start,
+ ext4_fsblk_t tar_blocks, ext4_fsblk_t iblock,
+ ext4_fsblk_t goal, int flag)
+{
+ struct ext4_extent *ext = NULL;
+ struct ext4_extent_header *eh = NULL;
+ ext4_fsblk_t tar_end = tar_start + tar_blocks - 1;
+ int sum_org = 0, sum_tmp = 0;
+ int ret = 0, depth;
+ int last_extent = 0;
+
+ eh = ext_inode_hdr(tmp_inode);
+ eh->eh_depth = 0;
+
+ /* allocate contiguous blocks */
+ sum_tmp = ext4_ext_alloc_blocks(tmp_inode, inode, iblock,
+ tar_blocks, goal, flag);
+ if (sum_tmp < 0) {
+ ret = sum_tmp;
+ goto ERR;
+ }
+
+ depth = ext_depth(inode);
+ ext = path[depth].p_ext;
+ while (1) {
+ if (!last_extent)
+ ++sum_org;
+
+ if (tar_end <= le32_to_cpu(ext->ee_block) +
+ le32_to_cpu(ext->ee_len) - 1 ||
+ last_extent) {
+
+ if ((sum_org == sum_tmp) && !goal) {
+ /* not improved */
+ if (!(ret =
+ ext4_ext_remove_space(tmp_inode, 0)))
+ ret = 1;
+ } else if (sum_org < sum_tmp &&
+ flag != DEFRAG_FORCE_VICTIM) {
+ /* fragment increased */
+ if (!(ret =
+ ext4_ext_remove_space(tmp_inode, 0)))
+ ret = -ENOSPC;
+ printk("defrag failed due to no space\n");
+ }
+ break;
+ }
+ if ((last_extent =
+ ext4_ext_next_extent(tmp_inode,
+ path, &ext)) < 0) {
+ ret = last_extent;
+ break;
+ }
+ }
+ERR:
+ return ret;
+}
+
+/**
+ * ext4_ext_defrag - defrag whole file
+ * @filp: pointer to file
+ * @from: starting offset to defrag in blocks
+ * @defrag_size: size of defrag in blocks
+ * @goal: block offset for allocation
+ * @flag: phase of create free space mode
+ * @ext: extent to be moved (only -f)
+ *
+ * This function returns the number of blocks if succeeded, otherwise
+ * returns error value
+ */
+int
+ext4_ext_defrag(struct file *filp, ext4_fsblk_t block_start,
+ ext4_fsblk_t defrag_size, ext4_fsblk_t goal,
+ int flag, struct ext4_extent_data *ext)
+{
+ struct inode *inode = filp->f_dentry->d_inode, *tmp_inode = NULL;
+ struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+ struct ext4_ext_path *path = NULL, *holecheck_path = NULL;
+ struct ext4_extent *ext_prev = NULL, *ext_cur = NULL, *ext_dummy = NULL;
+ handle_t *handle;
+ ext4_fsblk_t block_end = block_start + defrag_size - 1;
+ ext4_fsblk_t seq_blocks = 0, seq_start = 0;
+ ext4_fsblk_t add_blocks = 0;
+ ext4_fsblk_t file_end = (inode->i_size - 1) >> inode->i_blkbits;
+ pgoff_t page_offset = 0;
+ pgoff_t dest_offset = 0;
+ pgoff_t seq_end_page = 0;
+ int ret = 0, depth = 0, last_extent = 0, seq_extents = 0;
+
+ /* Check goal offset if goal offset was given from userspace. */
+ if (((0 < goal) && (ext4_blocks_count(es) < goal)) && (goal != -1)) {
+ printk(KERN_ERR "defrag: incorrect goal number %llu, "
+ "you can set goal until %llu\n", goal,
+ ext4_blocks_count(es));
+ ret = -EINVAL;
+ goto ERR1;
+ }
+
+ /* Setup for fixed blocks mode */
+ if (ext->len) {
+ if (ext->len < defrag_size) {
+ printk("Cannot defrag due to the insufficient"
+ " specified free blocks\n");
+ return -EINVAL;
+ }
+ flag = DEFRAG_FORCE_GATHER;
+ goal = ext->start;
+ }
+
+ if (file_end < block_end)
+ defrag_size -= block_end - file_end;
+
+ mutex_lock(&inode->i_mutex);
+ mutex_lock(&EXT4_I(inode)->truncate_mutex);
+
+ path = ext4_ext_find_extent(inode, block_start, NULL);
+ if (IS_ERR(path)) {
+ ret = PTR_ERR(path);
+ path = NULL;
+ goto ERR2;
+ }
+
+ /* get path structure to check hole */
+ holecheck_path = ext4_ext_find_extent(inode, block_start, NULL);
+ if (IS_ERR(holecheck_path)) {
+ ret = PTR_ERR(holecheck_path);
+ holecheck_path = NULL;
+ goto ERR2;
+ }
+
+ depth = ext_depth(inode);
+ ext_cur = holecheck_path[depth].p_ext;
+ if (ext_cur == NULL)
+ goto ERR2;
+
+ /*
+ * if block_start was within the hole, get proper extent whose ee_block
+ * is beyond block_start
+ */
+ if (le32_to_cpu(ext_cur->ee_block) +
+ le32_to_cpu(ext_cur->ee_len) - 1 < block_start) {
+ if ((last_extent =
+ ext4_ext_next_extent(inode, holecheck_path,
+ &ext_cur)) < 0) {
+ ret = last_extent;
+ goto ERR2;
+ }
+ if ((last_extent =
+ ext4_ext_next_extent(inode, path,
+ &ext_dummy)) < 0) {
+ ret = last_extent;
+ goto ERR2;
+ }
+ }
+ seq_extents = 1;
+ seq_start = ext_cur->ee_block;
+
+ /* no blocks existed within designated range */
+ if (le32_to_cpu(ext_cur->ee_block) > block_end) {
+ printk("nothing done due to the lack of contiguous blocks\n");
+ goto ERR2;
+ }
+
+ /* adjust start blocks */
+ add_blocks = min((ext4_fsblk_t)(ext_cur->ee_block +
+ ext_cur->ee_len), block_end + 1) -
+ max((ext4_fsblk_t)ext_cur->ee_block, block_start);
+
+ while (!last_extent && ext_cur->ee_block <= block_end) {
+ seq_blocks += add_blocks;
+
+ handle = ext4_journal_start(inode,
+ EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
+ EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+ 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) + 1);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto ERR1;
+ }
+ tmp_inode = ext4_new_inode(handle,
+ inode->i_sb->s_root->d_inode, S_IFREG);
+ if (IS_ERR(tmp_inode)) {
+ ret = -ENOMEM;
+ ext4_journal_stop(handle);
+ tmp_inode = NULL;
+ goto ERR1;
+ }
+
+ i_size_write(tmp_inode, i_size_read(inode));
+ tmp_inode->i_nlink = 0;
+ ext4_ext_tree_init(handle, tmp_inode);
+ ext4_orphan_add(handle, tmp_inode);
+ ext4_journal_stop(handle);
+
+ /* adjust tail blocks */
+ if (seq_start + seq_blocks - 1 > block_end)
+ seq_blocks = block_end - seq_start + 1;
+
+ ext_prev = ext_cur;
+ if ((last_extent =
+ ext4_ext_next_extent(inode, holecheck_path,
+ &ext_cur)) < 0) {
+ ret = last_extent;
+ break;
+ }
+ if (!last_extent)
+ seq_extents++;
+ add_blocks = le16_to_cpu(ext_cur->ee_len);
+
+ /* found hole or reached the tail of either a designated range
+ * or the file
+ */
+ if ((le32_to_cpu(ext_prev->ee_block) +
+ le16_to_cpu(ext_prev->ee_len) ==
+ le32_to_cpu(ext_cur->ee_block) &&
+ block_end >= le32_to_cpu(ext_cur->ee_block) &&
+ !last_extent)) {
+ if (tmp_inode) {
+ iput(tmp_inode);
+ tmp_inode = NULL;
+ }
+ continue;
+ }
+
+ /* found an isolated block */
+ if ((seq_extents == 1) && !goal) {
+ seq_start = ext_cur->ee_block;
+ goto CLEANUP;
+ }
+
+ ret = ext4_ext_new_extent_tree(inode, tmp_inode, path,
+ seq_start, seq_blocks, block_start, goal, flag);
+
+ if (ret < 0) {
+ break;
+ } else if ((ret == 1) && (!goal || (goal && !flag))) {
+ ret = 0;
+ seq_start = le32_to_cpu(ext_cur->ee_block);
+ goto CLEANUP;
+ }
+
+ page_offset = seq_start >>
+ (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ seq_end_page = (seq_start + seq_blocks - 1) >>
+ (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+ dest_offset = 0;
+ seq_start = le32_to_cpu(ext_cur->ee_block);
+
+ /* Discard all preallocations.
+ * This is provisional solution.
+ * When true ext4_mb_return_to_preallocation() is
+ * implemented, this will be removed.
+ */
+ ext4_mb_discard_inode_preallocations(inode);
+
+ if (inode->i_mapping->a_ops->write_begin) {
+ while (page_offset <= seq_end_page) {
+ /* replace original branches for new branches */
+ ret = ext4_ext_defrag_partial2(tmp_inode,
+ filp, page_offset,
+ dest_offset, flag);
+ if (ret < 0)
+ goto ERR2;
+
+ page_offset++;
+ dest_offset++;
+ }
+ } else {
+ while (page_offset <= seq_end_page) {
+ /* replace original branches for new branches */
+ ret = ext4_ext_defrag_partial(tmp_inode,
+ filp, page_offset,
+ dest_offset, flag);
+ if (ret < 0)
+ goto ERR2;
+
+ page_offset++;
+ dest_offset++;
+ }
+ }
+
+ /* decrease buffer counter */
+ if (holecheck_path)
+ ext4_ext_drop_refs(holecheck_path);
+ holecheck_path =
+ ext4_ext_find_extent(inode, seq_start, holecheck_path);
+ if (IS_ERR(holecheck_path)) {
+ ret = PTR_ERR(holecheck_path);
+ holecheck_path = NULL;
+ break;
+ }
+ depth = holecheck_path->p_depth;
+
+CLEANUP:
+ /* decrease buffer counter */
+ if (path)
+ ext4_ext_drop_refs(path);
+ path = ext4_ext_find_extent(inode, seq_start, path);
+ if (IS_ERR(path)) {
+ ret = PTR_ERR(path);
+ path = NULL;
+ break;
+ }
+
+ ext_cur = holecheck_path[depth].p_ext;
+ add_blocks = le16_to_cpu(ext_cur->ee_len);
+ seq_blocks = 0;
+ dest_offset = 0;
+ seq_extents = 1;
+
+ if (tmp_inode) {
+ iput(tmp_inode);
+ tmp_inode = NULL;
+ }
+ }
+
+ERR2:
+ if (path) {
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ }
+ if (holecheck_path) {
+ ext4_ext_drop_refs(holecheck_path);
+ kfree(holecheck_path);
+ }
+ERR1:
+ mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+ mutex_unlock(&inode->i_mutex);
+
+ if (tmp_inode)
+ iput(tmp_inode);
+
+ return (ret ? ret : defrag_size);
+}
diff -X linux-2.6.24-rc5-defrag/Documentation/dontdiff -upNr linux-2.6.24-rc5-alloc-block/fs/ext4/extents.c linux-2.6.24-rc5-move-
data/fs/ext4/extents.c
--- linux-2.6.24-rc5-alloc-block/fs/ext4/extents.c 2007-12-25 20:57:40.000000000 +0900
+++ linux-2.6.24-rc5-move-data/fs/ext4/extents.c 2007-12-25 20:47:33.000000000 +0900
@@ -48,7 +48,7 @@
* ext_pblock:
* combine low and high parts of physical block number into ext4_fsblk_t
*/
-static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
+ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
{
ext4_fsblk_t block;

@@ -92,7 +92,7 @@ static void ext4_idx_store_pblock(struct
ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
}

-static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed)
+handle_t *ext4_ext_journal_restart(handle_t *handle, int needed)
{
int err;

@@ -142,7 +142,7 @@ static int ext4_ext_dirty(handle_t *hand
return err;
}

-static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
+ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
struct ext4_ext_path *path,
ext4_lblk_t block)
{
@@ -1948,7 +1948,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path
return 1;
}

-static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
+int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
{
struct super_block *sb = inode->i_sb;
int depth = ext_depth(inode);
diff -X linux-2.6.24-rc5-defrag/Documentation/dontdiff -upNr linux-2.6.24-rc5-alloc-block/fs/ext4/inode.c linux-2.6.24-rc5-move-data/fs/ext4/inode.c
--- linux-2.6.24-rc5-alloc-block/fs/ext4/inode.c 2007-12-25 20:58:02.000000000 +0900
+++ linux-2.6.24-rc5-move-data/fs/ext4/inode.c 2007-12-25 20:47:54.000000000 +0900
@@ -1464,7 +1464,7 @@ out:
* So, if we see any bmap calls here on a modified, data-journaled file,
* take extra steps to flush any blocks which might be in the cache.
*/
-static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
+sector_t ext4_bmap(struct address_space *mapping, sector_t block)
{
struct inode *inode = mapping->host;
journal_t *journal;
diff -X linux-2.6.24-rc5-defrag/Documentation/dontdiff -upNr linux-2.6.24-rc5-alloc-block/fs/ext4/ioctl.c linux-2.6.24-rc5-move-data/fs/ext4/ioctl.c
--- linux-2.6.24-rc5-alloc-block/fs/ext4/ioctl.c 2007-12-25 20:58:29.000000000 +0900
+++ linux-2.6.24-rc5-move-data/fs/ext4/ioctl.c 2007-12-25 20:41:41.000000000 +0900
@@ -231,6 +231,19 @@ flags_err:

return err;
}
+ case EXT4_IOC_GET_EXTENTS:
+ case EXT4_IOC_GET_TREE_STATS:
+ case EXT4_IOC_GET_TREE_DEPTH:
+ case EXT4_IOC_FIBMAP:
+ case EXT4_IOC_DEFRAG:
+ case EXT4_IOC_GROUP_INFO:
+ case EXT4_IOC_FREE_BLOCKS_INFO:
+ case EXT4_IOC_EXTENTS_INFO:
+ case EXT4_IOC_RESERVE_BLOCK:
+ case EXT4_IOC_MOVE_VICTIM:
+ case EXT4_IOC_BLOCK_RELEASE: {
+ return ext4_ext_ioctl(inode, filp, cmd, arg);
+ }
case EXT4_IOC_GROUP_ADD: {
struct ext4_new_group_data input;
struct super_block *sb = inode->i_sb;
diff -X linux-2.6.24-rc5-defrag/Documentation/dontdiff -upNr linux-2.6.24-rc5-alloc-block/fs/ext4/mballoc.c linux-2.6.24-rc5-move-
data/fs/ext4/mballoc.c
--- linux-2.6.24-rc5-alloc-block/fs/ext4/mballoc.c 2007-12-25 20:59:14.000000000 +0900
+++ linux-2.6.24-rc5-move-data/fs/ext4/mballoc.c 2007-12-25 20:41:41.000000000 +0900
@@ -411,6 +411,7 @@ struct ext4_allocation_context {
struct page *ac_buddy_page;
struct ext4_prealloc_space *ac_pa;
struct ext4_locality_group *ac_lg;
+ long long ac_excepted_group;
};

#define AC_STATUS_CONTINUE 1
@@ -1873,6 +1874,11 @@ repeat:
if (group == EXT4_SB(sb)->s_groups_count)
group = 0;

+ if (ac->ac_excepted_group != -1 &&
+ group == ac->ac_excepted_group) {
+ continue;
+ }
+
/* quick check to skip empty groups */
grp = EXT4_GROUP_INFO(ac->ac_sb, group);
if (grp->bb_free == 0)
@@ -4080,6 +4086,7 @@ static int ext4_mb_initialize_context(st
ac->ac_bitmap_page = NULL;
ac->ac_buddy_page = NULL;
ac->ac_lg = NULL;
+ ac->ac_excepted_group = ar->excepted_group;

/* we have to define context: we'll we work with a file or
* locality group. this is a policy, actually */