2007-06-28 09:09:51

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH 1/2] ext4_get_blocks_wrap take the truncate_mutex early.

When doing a migrate from ext3 to ext4 inode we need to make sure the test
for inode type and walking inode data happens inside lock. To make this
happen move truncate_mutex early before checking the i_flags.


This actually should enable us to remove the verify_chain().

I haven't done any performance benchmark with this change.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/extents.c | 7 +++--
fs/ext4/inode.c | 69 +++++-----------------------------------------
include/linux/ext4_fs.h | 15 +++++++---
3 files changed, 23 insertions(+), 68 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index b9ce241..8193e97 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1977,6 +1977,10 @@ void ext4_ext_release(struct super_block *sb)
#endif
}

+/*
+ * Need to be called with
+ * mutex_lock(&EXT4_I(inode)->truncate_mutex);
+ */
int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t iblock,
unsigned long max_blocks, struct buffer_head *bh_result,
@@ -1991,7 +1995,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
__clear_bit(BH_New, &bh_result->b_state);
ext_debug("blocks %d/%lu requested for inode %u\n", (int) iblock,
max_blocks, (unsigned) inode->i_ino);
- mutex_lock(&EXT4_I(inode)->truncate_mutex);

/* check in cache */
goal = ext4_ext_in_cache(inode, iblock, &newex);
@@ -2127,8 +2130,6 @@ out2:
ext4_ext_drop_refs(path);
kfree(path);
}
- mutex_unlock(&EXT4_I(inode)->truncate_mutex);
-
return err ? err : allocated;
}

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8416fa2..255db76 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -243,13 +243,6 @@ static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
p->bh = bh;
}

-static int verify_chain(Indirect *from, Indirect *to)
-{
- while (from <= to && from->key == *from->p)
- from++;
- return (from > to);
-}
-
/**
* ext4_block_to_path - parse the block number into array of offsets
* @inode: inode in question (we are only interested in its superblock)
@@ -344,10 +337,11 @@ static int ext4_block_to_path(struct inode *inode,
* (pointer to last triple returned, *@err == 0)
* or when it gets an IO error reading an indirect block
* (ditto, *@err == -EIO)
- * or when it notices that chain had been changed while it was reading
- * (ditto, *@err == -EAGAIN)
* or when it reads all @depth-1 indirect blocks successfully and finds
* the whole chain, all way to the data (returns %NULL, *err == 0).
+ *
+ * Need to be called with
+ * mutex_lock(&EXT4_I(inode)->truncate_mutex)
*/
static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets,
Indirect chain[4], int *err)
@@ -365,9 +359,6 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets,
bh = sb_bread(sb, le32_to_cpu(p->key));
if (!bh)
goto failure;
- /* Reader: pointers */
- if (!verify_chain(chain, p))
- goto changed;
add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
/* Reader: end */
if (!p->key)
@@ -375,10 +366,6 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets,
}
return NULL;

-changed:
- brelse(bh);
- *err = -EAGAIN;
- goto no_block;
failure:
*err = -EIO;
no_block:
@@ -782,6 +769,10 @@ err_out:
* return > 0, # of blocks mapped or allocated.
* return = 0, if plain lookup failed.
* return < 0, error case.
+ *
+ *
+ * Need to be called with
+ * mutex_lock(&EXT4_I(inode)->truncate_mutex)
*/
int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
sector_t iblock, unsigned long maxblocks,
@@ -819,18 +810,6 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
while (count < maxblocks && count <= blocks_to_boundary) {
ext4_fsblk_t blk;

- if (!verify_chain(chain, partial)) {
- /*
- * Indirect block might be removed by
- * truncate while we were reading it.
- * Handling of that case: forget what we've
- * got now. Flag the err as EAGAIN, so it
- * will reread.
- */
- err = -EAGAIN;
- count = 0;
- break;
- }
blk = le32_to_cpu(*(chain[depth-1].p + count));

if (blk == first_block + count)
@@ -838,44 +817,13 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
else
break;
}
- if (err != -EAGAIN)
- goto got_it;
+ goto got_it;
}

/* Next simple case - plain lookup or failed read of indirect block */
if (!create || err == -EIO)
goto cleanup;

- mutex_lock(&ei->truncate_mutex);
-
- /*
- * If the indirect block is missing while we are reading
- * the chain(ext4_get_branch() returns -EAGAIN err), or
- * if the chain has been changed after we grab the semaphore,
- * (either because another process truncated this branch, or
- * another get_block allocated this branch) re-grab the chain to see if
- * the request block has been allocated or not.
- *
- * Since we already block the truncate/other get_block
- * at this point, we will have the current copy of the chain when we
- * splice the branch into the tree.
- */
- if (err == -EAGAIN || !verify_chain(chain, partial)) {
- while (partial > chain) {
- brelse(partial->bh);
- partial--;
- }
- partial = ext4_get_branch(inode, depth, offsets, chain, &err);
- if (!partial) {
- count++;
- mutex_unlock(&ei->truncate_mutex);
- if (err)
- goto cleanup;
- clear_buffer_new(bh_result);
- goto got_it;
- }
- }
-
/*
* Okay, we need to do block allocation. Lazily initialize the block
* allocation info here if necessary
@@ -917,7 +865,6 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
*/
if (!err && extend_disksize && inode->i_size > ei->i_disksize)
ei->i_disksize = inode->i_size;
- mutex_unlock(&ei->truncate_mutex);
if (err)
goto cleanup;

diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index de1f9f7..ebd6f86 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -988,11 +988,18 @@ ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
unsigned long max_blocks, struct buffer_head *bh,
int create, int extend_disksize)
{
- if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
- return ext4_ext_get_blocks(handle, inode, block, max_blocks,
+ int retval;
+ mutex_lock(&EXT4_I(inode)->truncate_mutex);
+ if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
+ retval = ext4_ext_get_blocks(handle, inode, block, max_blocks,
bh, create, extend_disksize);
- return ext4_get_blocks_handle(handle, inode, block, max_blocks, bh,
- create, extend_disksize);
+ } else {
+ retval = ext4_get_blocks_handle(handle, inode, block, max_blocks,
+ bh, create, extend_disksize);
+ }
+ mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+
+ return retval;
}


--
1.5.2.2.571.ge1341-dirty


2007-06-28 09:10:57

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH 2/2] Add EXT4_IOC_MIGRATE ioctl

From: Aneesh Kumar K.V <[email protected]>

The below patch add ioctl for migrating ext3 indirect block mapped inode
to ext4 extent mapped inode.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
fs/ext4/Makefile | 2 +-
fs/ext4/ioctl.c | 3 +
fs/ext4/migrate.c | 655 +++++++++++++++++++++++++++++++++++++++++++++++
include/linux/ext4_fs.h | 4 +
4 files changed, 663 insertions(+), 1 deletions(-)
create mode 100644 fs/ext4/migrate.c

diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index ae6e7e5..d5fd80b 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o

ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
- ext4_jbd2.o
+ ext4_jbd2.o migrate.o

ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 500567d..6a6d72b 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -248,6 +248,9 @@ flags_err:
return err;
}

+ case EXT4_IOC_MIGRATE:
+ return ext4_ext_migrate(inode, filp, cmd, arg);
+
default:
return -ENOTTY;
}
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
new file mode 100644
index 0000000..523de12
--- /dev/null
+++ b/fs/ext4/migrate.c
@@ -0,0 +1,655 @@
+/*
+ * Copyright IBM Corporation, 2007
+ * Author Aneesh Kumar K.V <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/ext4_jbd2.h>
+#include <linux/ext4_fs_extents.h>
+
+struct list_blocks_struct {
+ int first_block, last_block;
+ ext4_fsblk_t first_pblock, last_pblock;
+};
+
+/* will go away */
+static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
+{
+ ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+ ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
+}
+static int ext4_get_credit(struct inode *inode, struct ext4_ext_path *path)
+{
+ int depth, needed;
+
+ /* probably there is space in leaf? */
+ if (path) {
+ depth = ext_depth(inode);
+ if (le16_to_cpu(path[depth].p_hdr->eh_entries)
+ < le16_to_cpu(path[depth].p_hdr->eh_max))
+ /*
+ * Since we are adding all the extent entries together
+ * we already have credit alloted for writing this block
+ */
+ return 0;
+ }
+
+ /*
+ * given 32-bit logical block (4294967296 blocks), max. tree
+ * can be 4 levels in depth -- 4 * 340^4 == 53453440000.
+ * Let's also add one more level for imbalance.
+ */
+ depth = 5;
+
+ /* allocation of new data block(s) */
+ needed = 2;
+
+ /*
+ * tree can be full, so it would need to grow in depth:
+ * we need one credit to modify old root, credits for
+ * new root will be added in split accounting
+ */
+ needed += 1;
+
+ /*
+ * Index split can happen, we would need:
+ * allocate intermediate indexes (bitmap + group)
+ * + change two blocks at each level, but root (already included)
+ */
+ needed += (depth * 2) + (depth * 2);
+
+
+ return needed;
+
+}
+
+static int finish_range(handle_t *handle, struct inode *inode,
+ struct list_blocks_struct *lb)
+
+{
+ int retval = 0, needed;
+ struct ext4_extent newext;
+ struct ext4_ext_path *path;
+ if (lb->first_pblock == 0)
+ return 0;
+
+ /* Add the extent to temp inode*/
+ newext.ee_block = cpu_to_le32(lb->first_block);
+ newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block +1);
+ ext4_ext_store_pblock(&newext, lb->first_pblock);
+ path = ext4_ext_find_extent(inode, lb->first_block, NULL);
+
+ if (IS_ERR(path)) {
+ retval = PTR_ERR(path);
+ goto err_out;
+ }
+
+ /* Calculate the credit needed to inserting this extent */
+ needed = ext4_get_credit(inode, path);
+
+ /*
+ * Make sure the credit we accumalated is not really high
+ */
+
+ if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) {
+
+ /*
+ * +2 is needed for orignal inode write and
+ * super block modification
+ */
+
+ ext4_mark_inode_dirty(handle, inode);
+ retval = ext4_journal_restart(handle, needed + 2);
+ if (retval)
+ goto err_out;
+
+ }
+
+ if (needed && (retval = ext4_journal_extend(handle, needed)) != 0) {
+ /*
+ * IF not able to extend the journal restart the journal
+ *
+ * +2 is needed for orignal inode write and
+ * super block modification
+ */
+ ext4_mark_inode_dirty(handle, inode);
+ retval = ext4_journal_restart(handle, needed + 2);
+ if (retval)
+ goto err_out;
+ }
+
+ retval = ext4_ext_insert_extent(handle, inode, path, &newext);
+
+err_out:
+ lb->first_pblock = 0;
+ return retval;
+}
+static int update_extent_range(handle_t *handle, struct inode *inode,
+ ext4_fsblk_t pblock, int blk_num,
+ struct list_blocks_struct *lb)
+{
+ int retval;
+
+ /*
+ * See if we can add on to the existing range (if it exists)
+ */
+ if (lb->first_pblock &&
+ (lb->last_pblock+1 == pblock) &&
+ (lb->last_block+1 == blk_num)) {
+ lb->last_pblock = pblock;
+ lb->last_block = blk_num;
+ return 0;
+ }
+ /*
+ * Start a new range.
+ */
+ retval = finish_range(handle, inode, lb);
+ lb->first_pblock = lb->last_pblock = pblock;
+ lb->first_block = lb->last_block = blk_num;
+
+ return retval;
+
+}
+
+static int update_ind_extent_range(handle_t *handle, struct inode *inode,
+ ext4_fsblk_t pblock, int *blk_nump,
+ struct list_blocks_struct *lb)
+{
+ struct buffer_head *bh;
+ __le32 *i_data;
+ int i, retval = 0;
+ int blk_count = *blk_nump;
+ unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
+
+ if (!pblock) {
+ /* Only update the file block number */
+ *blk_nump += max_entries;
+ return 0;
+ }
+
+ bh = sb_bread(inode->i_sb, pblock);
+ if (!bh)
+ return -EIO;
+
+ i_data = (__le32 *)bh->b_data;
+
+ for (i = 0; i < max_entries; i++, blk_count++) {
+ if (i_data[i]) {
+ retval = update_extent_range(handle, inode,
+ le32_to_cpu(i_data[i]),
+ blk_count, lb);
+ if (retval)
+ break;
+ }
+ }
+
+ /* Update the file block number */
+ *blk_nump = blk_count;
+ brelse(bh);
+ return retval;
+
+}
+static int update_dind_extent_range(handle_t *handle, struct inode *inode,
+ ext4_fsblk_t pblock, int *blk_nump,
+ struct list_blocks_struct *lb)
+{
+ struct buffer_head *bh;
+ __le32 *i_data;
+ int i, retval = 0;
+ int blk_count = *blk_nump;
+ unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
+
+ if (!pblock) {
+ /* Only update the file block number */
+ *blk_nump += max_entries * max_entries;
+ return 0;
+ }
+
+ bh = sb_bread(inode->i_sb, pblock);
+ if (!bh)
+ return -EIO;
+
+ i_data = (__le32 *)bh->b_data;
+
+ for (i = 0; i < max_entries; i++) {
+ if (i_data[i]) {
+ retval = update_ind_extent_range(handle, inode,
+ le32_to_cpu(i_data[i]),
+ &blk_count, lb);
+ if (retval)
+ break;
+ } else {
+ /* Only update the file block number */
+ blk_count += max_entries;
+ }
+ }
+
+ /* Update the file block number */
+ *blk_nump = blk_count;
+ brelse(bh);
+ return retval;
+
+}
+static int update_tind_extent_range(handle_t *handle, struct inode *inode,
+ ext4_fsblk_t pblock, int *blk_nump,
+ struct list_blocks_struct *lb)
+{
+ struct buffer_head *bh;
+ __le32 *i_data;
+ int i, retval = 0;
+ int blk_count = *blk_nump;
+ unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
+
+ if (!pblock) {
+ /* Only update the file block number */
+ *blk_nump += max_entries * max_entries * max_entries;
+ return 0;
+ }
+
+ bh = sb_bread(inode->i_sb, pblock);
+ if (!bh)
+ return -EIO;
+
+ i_data = (__le32 *)bh->b_data;
+
+ for (i = 0; i < max_entries; i++) {
+ if (i_data[i]) {
+ retval = update_dind_extent_range(handle, inode,
+ le32_to_cpu(i_data[i]),
+ &blk_count, lb);
+ if (retval)
+ break;
+ } else {
+ /* Only update the file block number */
+ blk_count += max_entries * max_entries;
+ }
+ }
+
+ /* Update the file block number */
+ *blk_nump = blk_count;
+ brelse(bh);
+ return retval;
+
+}
+
+
+static int free_dind_blocks(handle_t *handle,
+ struct inode *inode, __le32 i_data)
+{
+ int i;
+ __le32 *tmp_idata;
+ struct buffer_head *bh;
+ unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
+
+ bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
+ if (!bh)
+ return -EIO;
+
+ tmp_idata = (__le32 *)bh->b_data;
+ for (i = 0; i < max_entries; i++) {
+ if (tmp_idata[i]) {
+ ext4_free_blocks(handle, inode,
+ le32_to_cpu(tmp_idata[i]), 1);
+ }
+ }
+ brelse(bh);
+ ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1);
+
+ return 0;
+
+
+}
+
+static int free_tind_blocks(handle_t *handle,
+ struct inode *inode, __le32 i_data)
+{
+ int i, retval = 0;
+ __le32 *tmp_idata;
+ struct buffer_head *bh;
+ unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
+
+ bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
+ if (!bh)
+ return -EIO;
+
+ tmp_idata = (__le32 *)bh->b_data;
+
+ for (i = 0; i < max_entries; i++) {
+ if (tmp_idata[i]) {
+ retval = free_dind_blocks(handle,
+ inode, tmp_idata[i]);
+ if (retval) {
+ brelse(bh);
+ return retval;
+ }
+ }
+ }
+ brelse(bh);
+ ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1);
+
+ return 0;
+
+
+}
+
+static int free_ind_block(handle_t *handle, struct inode *inode)
+{
+ int retval;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+
+ if (ei->i_data[EXT4_IND_BLOCK]) {
+
+ ext4_free_blocks(handle, inode,
+ le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1);
+
+ }
+
+ if (ei->i_data[EXT4_DIND_BLOCK]) {
+ retval = free_dind_blocks(handle, inode,
+ ei->i_data[EXT4_DIND_BLOCK]);
+ if (retval)
+ return retval;
+ }
+
+ if (ei->i_data[EXT4_TIND_BLOCK]) {
+ retval = free_tind_blocks(handle, inode,
+ ei->i_data[EXT4_TIND_BLOCK]);
+ if (retval)
+ return retval;
+ }
+
+
+ return 0;
+}
+static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
+ struct inode *tmp_inode, int retval)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
+
+
+ retval = free_ind_block(handle, inode);
+ if (retval)
+ goto err_out;
+
+ /*
+ * We have the extent map build with the tmp inode.
+ * Now copy the i_data across
+ */
+ ei->i_flags |= EXT4_EXTENTS_FL;
+ memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
+
+ /*
+ * Update i_blocks with the new blocks that got
+ * allocated while adding extents for extent index
+ * blocks.
+ *
+ * While converting to extents we need not
+ * update the orignal inode i_blocks for extent blocks
+ * via quota APIs. The quota update happened via tmp_inode already.
+ */
+ spin_lock(&inode->i_lock);
+ inode->i_blocks += tmp_inode->i_blocks;
+ spin_unlock(&inode->i_lock);
+
+ ext4_mark_inode_dirty(handle, inode);
+
+err_out:
+
+ return retval;
+}
+
+/* Will go away */
+static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
+{
+ ext4_fsblk_t block;
+
+ block = le32_to_cpu(ix->ei_leaf);
+ block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
+ return block;
+}
+
+static int free_ext_idx(handle_t *handle, struct inode *inode,
+ struct ext4_extent_idx *ix)
+{
+ int i, retval = 0;
+ ext4_fsblk_t block;
+ struct buffer_head *bh;
+ struct ext4_extent_header *eh;
+
+
+ block = idx_pblock(ix);
+ bh = sb_bread(inode->i_sb, block);
+ if (!bh)
+ return -EIO;
+
+ eh = (struct ext4_extent_header *)bh->b_data;
+ if (eh->eh_depth == 0) {
+
+ brelse(bh);
+ ext4_free_blocks(handle, inode, block, 1);
+
+ } else {
+
+ ix = EXT_FIRST_INDEX(eh);
+ for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
+ retval = free_ext_idx(handle, inode, ix);
+ if (retval)
+ return retval;
+ }
+
+ }
+
+ return retval;
+
+}
+/*
+ * Free the extent meta data blocks only
+ */
+static int free_ext_block(handle_t *handle, struct inode *inode)
+{
+ int i, retval = 0;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
+ struct ext4_extent_idx *ix;
+ if (eh->eh_depth == 0) {
+ /*
+ * No extra blocks allocated for extent meta data
+ */
+ return 0;
+ }
+ ix = EXT_FIRST_INDEX(eh);
+ for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
+ retval = free_ext_idx(handle, inode, ix);
+ if (retval)
+ return retval;
+ }
+
+ return retval;
+
+}
+int ext4_ext_migrate(struct inode * inode, struct file * filp,
+ unsigned int cmd, unsigned long arg)
+{
+ handle_t *handle;
+ int retval = 0, i;
+ __le32 *i_data;
+ int blk_count = 0;
+ struct ext4_inode_info *ei;
+ struct inode *tmp_inode = NULL;
+ struct list_blocks_struct lb;
+ unsigned long max_entries;
+
+
+ if (!test_opt(inode->i_sb, EXTENTS)) {
+ /*
+ * if mounted with noextents
+ * we don't allow the migrate
+ */
+ return -EINVAL;
+ }
+
+ if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+ return -EINVAL;
+
+ mutex_lock(&EXT4_I(inode)->truncate_mutex);
+
+
+ handle = ext4_journal_start(inode,
+ EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
+ EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+ 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
+ + 1);
+ if (IS_ERR(handle)) {
+ retval = PTR_ERR(handle);
+ goto err_out;
+ }
+
+ tmp_inode = ext4_new_inode(handle,
+ inode->i_sb->s_root->d_inode,
+ S_IFREG);
+
+ if (IS_ERR(tmp_inode)) {
+ retval = -ENOMEM;
+ ext4_journal_stop(handle);
+ tmp_inode = NULL;
+ goto err_out;
+ }
+
+ i_size_write(tmp_inode, i_size_read(inode));
+ /*
+ * We don't want the inode to be reclaimed
+ * if we got interrupted in between. We have
+ * this tmp inode carrying reference to the
+ * data blocks of the original file. We set
+ * the i_nlink to zero at the last stage after
+ * switching the original file to extent format
+ */
+ tmp_inode->i_nlink = 1;
+
+ ext4_ext_tree_init(handle, tmp_inode);
+ ext4_orphan_add(handle, tmp_inode);
+ ext4_journal_stop(handle);
+
+ ei = EXT4_I(inode);
+ i_data = ei->i_data;
+ memset(&lb, 0, sizeof(lb));
+
+ /* 32 bit block address 4 bytes */
+ max_entries = inode->i_sb->s_blocksize >> 2;
+
+ /*
+ * start with one credit accounted for writing the
+ * i_data field of the original inode and
+ * one credit for superblock modification.
+ *
+ * For the tmp_inode we already have commited the
+ * trascation that created the inode. Later as and
+ * when we add extents we extent the journal
+ */
+ handle = ext4_journal_start(inode, 2);
+ for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
+
+ if (i_data[i]) {
+ retval = update_extent_range(handle, tmp_inode,
+ le32_to_cpu(i_data[i]),
+ blk_count, &lb);
+ if (retval)
+ goto err_out;
+ }
+ }
+
+ if (i_data[EXT4_IND_BLOCK]) {
+ retval = update_ind_extent_range(handle, tmp_inode,
+ le32_to_cpu(i_data[EXT4_IND_BLOCK]),
+ &blk_count, &lb);
+ if (retval)
+ goto err_out;
+ } else {
+ blk_count += max_entries;
+ }
+
+ if (i_data[EXT4_DIND_BLOCK]) {
+ retval = update_dind_extent_range(handle, tmp_inode,
+ le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
+ &blk_count, &lb);
+ if (retval)
+ goto err_out;
+ } else {
+ blk_count += max_entries * max_entries;
+ }
+
+
+ if (i_data[EXT4_TIND_BLOCK]) {
+ retval = update_tind_extent_range(handle, tmp_inode,
+ le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
+ &blk_count, &lb);
+ if (retval)
+ goto err_out;
+ }
+
+ /*
+ * Build the last extent
+ */
+ retval = finish_range(handle, tmp_inode, &lb);
+
+err_out:
+ if (retval) {
+ /*
+ * Failure case delete the extent information with the
+ * tmp_inode
+ */
+ free_ext_block(handle, tmp_inode);
+
+ } else {
+
+ retval = ext4_ext_swap_inode_data(handle, inode,
+ tmp_inode, retval);
+ }
+
+ /*
+ * Mark the tmp_inode as of size zero
+ */
+ i_size_write(tmp_inode, 0);
+
+
+ /*
+ * set the i_blocks count to zero
+ * so that the ext4_delete_inode does the
+ * right job
+ *
+ * FIXME!! do we need to take the lock ?
+ */
+ spin_lock(&tmp_inode->i_lock);
+ tmp_inode->i_blocks = 0;
+ spin_unlock(&tmp_inode->i_lock);
+
+ /* Reset the extent details */
+ ext4_ext_tree_init(handle, tmp_inode);
+
+ /*
+ * Set the i_nlink to zero so that
+ * generic_drop_inode really deletes the
+ * inode
+ */
+ tmp_inode->i_nlink = 0;
+
+ ext4_journal_stop(handle);
+
+ mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+
+ if (tmp_inode)
+ iput(tmp_inode);
+
+
+ return retval;
+}
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index ebd6f86..f66fb7c 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -242,6 +242,7 @@ struct ext4_new_group_data {
#endif
#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
+#define EXT4_IOC_MIGRATE _IO('f', 7)

/*
* ioctl commands in 32 bit emulation
@@ -872,6 +873,9 @@ extern int ext4_ioctl (struct inode *, struct file *, unsigned int,
unsigned long);
extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);

+/* migrate.c */
+extern int ext4_ext_migrate (struct inode *, struct file *, unsigned int,
+ unsigned long);
/* namei.c */
extern int ext4_orphan_add(handle_t *, struct inode *);
extern int ext4_orphan_del(handle_t *, struct inode *);
--
1.5.2.2.571.ge1341-dirty