2008-09-27 07:27:14

by Akira Fujita

[permalink] [raw]
Subject: [RFC][PATCH 9/12]ext4: Add the EXT4_IOC_RESERVE_BLOCK ioctl

ext4: online defrag -- Add the EXT4_IOC_RESERVE_BLOCK ioctl.

From: Akira Fujita <[email protected]>

The EXT4_IOC_RESERVE_BLOCK ioctl reserves the specified
contiguous free space with ext4 block reservation function.
This ioctl is used only in the force defrag (-f).

The block reservation and the multi-block allocation are
mutually exclusive, so this ioctl will go away in the next version.

Signed-off-by: Akira Fujita <[email protected]>
Signed-off-by: Takashi Sato <[email protected]>
---
fs/ext4/balloc.c | 10 ++--
fs/ext4/defrag.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/ext4/ext4.h | 10 +++
fs/ext4/ioctl.c | 3 +-
4 files changed, 213 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 64ec04c..2344a96 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -433,7 +433,7 @@ restart:
* If the goal block is within the reservation window, return 1;
* otherwise, return 0;
*/
-static int
+int
goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
ext4_group_t group, struct super_block *sb)
{
@@ -538,7 +538,7 @@ void ext4_rsv_window_add(struct super_block *sb,
* from the filesystem reservation window rb tree. Must be called with
* rsv_lock hold.
*/
-static void rsv_window_remove(struct super_block *sb,
+void rsv_window_remove(struct super_block *sb,
struct ext4_reserve_window_node *rsv)
{
rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
@@ -553,7 +553,7 @@ static void rsv_window_remove(struct super_block *sb,
*
* returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
*/
-static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
+inline int rsv_is_empty(struct ext4_reserve_window *rsv)
{
/* a valid reservation end block could not be 0 */
return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
@@ -1289,7 +1289,7 @@ static int find_next_reservable_window(
* @bitmap_bh: the block group block bitmap
*
*/
-static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
+int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
ext4_grpblk_t grp_goal, struct super_block *sb,
ext4_group_t group, struct buffer_head *bitmap_bh)
{
@@ -1433,7 +1433,7 @@ retry:
* expand the reservation window size if necessary on a best-effort
* basis before ext4_new_blocks() tries to allocate blocks,
*/
-static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
+void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
struct super_block *sb, int size)
{
struct ext4_reserve_window_node *next_rsv;
diff --git a/fs/ext4/defrag.c b/fs/ext4/defrag.c
index f7c99de..26fb4a6 100644
--- a/fs/ext4/defrag.c
+++ b/fs/ext4/defrag.c
@@ -193,6 +193,193 @@ out:
}

/**
+ * ext4_defrag_reserve_blocks - Reserve blocks for defrag
+ *
+ * @org_inode: original inode
+ * @goal: the goal offset of the block reservation
+ * @len: blocks count we need to reserve
+ *
+ * This function returns 0 if succeed, otherwise returns error value.
+ */
+
+static int
+ext4_defrag_reserve_blocks(struct inode *org_inode, ext4_fsblk_t goal, int len)
+{
+ struct super_block *sb = NULL;
+ handle_t *handle;
+ struct buffer_head *bitmap_bh = NULL;
+ struct ext4_block_alloc_info *block_i;
+ struct ext4_reserve_window_node *my_rsv = NULL;
+ unsigned short windowsz = 0;
+ ext4_group_t group_no;
+ ext4_grpblk_t grp_target_blk;
+ int err = 0;
+
+ down_write(&EXT4_I(org_inode)->i_data_sem);
+
+ handle = ext4_journal_start(org_inode, EXT4_RESERVE_TRANS_BLOCKS);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ handle = NULL;
+ goto out;
+ }
+
+ if (S_ISREG(org_inode->i_mode) &&
+ !EXT4_I(org_inode)->i_block_alloc_info) {
+ ext4_init_block_alloc_info(org_inode);
+ } else if (!S_ISREG(org_inode->i_mode)) {
+ printk(KERN_ERR "ext4 defrag: Invalid file type\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ sb = org_inode->i_sb;
+ if (!sb) {
+ printk(KERN_ERR "ext4 defrag: Non-existent device\n");
+ err = -ENXIO;
+ goto out;
+ }
+ ext4_get_group_no_and_offset(sb, goal, &group_no,
+ &grp_target_blk);
+
+ block_i = EXT4_I(org_inode)->i_block_alloc_info;
+ /* Block reservation should be enabled */
+ BUG_ON(!block_i);
+
+ windowsz = block_i->rsv_window_node.rsv_goal_size;
+ /* Goal size should be set */
+ BUG_ON(!windowsz);
+
+ my_rsv = &block_i->rsv_window_node;
+
+ bitmap_bh = ext4_read_block_bitmap(sb, group_no);
+ if (!bitmap_bh) {
+ err = -ENOSPC;
+ goto out;
+ }
+
+ BUFFER_TRACE(bitmap_bh, "get undo access for new block");
+ err = ext4_journal_get_undo_access(handle, bitmap_bh);
+ if (err)
+ goto out;
+
+ err = alloc_new_reservation(my_rsv, grp_target_blk, sb,
+ group_no, bitmap_bh);
+ if (err < 0) {
+ printk(KERN_ERR "ext4 defrag: Block reservation failed."
+ "offset [%d], bg[%lu]\n", grp_target_blk, group_no);
+ ext4_discard_reservation(org_inode);
+ goto out;
+ } else if (len > EXT4_DEFAULT_RESERVE_BLOCKS) {
+ try_to_extend_reservation(my_rsv, sb,
+ len - EXT4_DEFAULT_RESERVE_BLOCKS);
+ }
+
+out:
+ up_write(&EXT4_I(org_inode)->i_data_sem);
+ ext4_journal_release_buffer(handle, bitmap_bh);
+ brelse(bitmap_bh);
+
+ if (handle)
+ ext4_journal_stop(handle);
+
+ return err;
+}
+
+/**
+ * ext4_defrag_block_within_rsv - Is target extent reserved ?
+ *
+ * @org_inode: original inode
+ * @ex_start: physical block offset of the extent which already moved
+ * @ex_len: block length of the extent
+ *
+ * This function returns 0 if succeed, otherwise returns error value.
+ */
+static int
+ext4_defrag_block_within_rsv(struct inode *org_inode, ext4_fsblk_t ex_start,
+ int ex_len)
+{
+ struct super_block *sb = org_inode->i_sb;
+ struct ext4_block_alloc_info *block_i;
+ ext4_group_t group_no;
+ ext4_grpblk_t grp_blk;
+ struct ext4_reserve_window_node *rsv;
+
+ block_i = EXT4_I(org_inode)->i_block_alloc_info;
+ /* Block reservation should be enabled */
+ BUG_ON(!block_i);
+
+ /* Goal size should be set */
+ BUG_ON(!block_i->rsv_window_node.rsv_goal_size);
+
+ rsv = &block_i->rsv_window_node;
+ if (rsv_is_empty(&rsv->rsv_window)) {
+ printk(KERN_ERR "ext4 defrag: Reservation window is empty\n");
+ return -ENOSPC;
+ }
+
+ ext4_get_group_no_and_offset(sb, ex_start, &group_no, &grp_blk);
+
+ if (!goal_in_my_reservation(&rsv->rsv_window, grp_blk, group_no, sb)
+ || !goal_in_my_reservation(&rsv->rsv_window,
+ grp_blk + ex_len - 1, group_no, sb)){
+ /* Goal blocks are not in the reservation window */
+ printk(KERN_ERR "ext4 defrag: %d or %d in bg %lu is "
+ "not in rsv_window\n", grp_blk,
+ grp_blk + ex_len - 1, group_no);
+ return -ENOSPC;
+ }
+ return 0;
+}
+
+/*
+ * ext4_defrag_reserve_fblocks -
+ * Reserve free blocks with ext4_defrag_reserve_blocks
+ *
+ * @org_inode: original inode to get a block group number
+ * @ext_info: freeblocks distribution which stored extent-like style
+ * @ext_info->ext[]: an array of struct ext4_extents_data
+ *
+ * This function returns 0 if succeed, otherwise returns error value.
+ */
+static int
+ext4_defrag_reserve_fblocks(struct inode *org_inode,
+ struct ext4_extents_info *ext_info)
+{
+ ext4_fsblk_t ex_start = 0;
+ int i, len, ret;
+
+ for (i = 0; i < ext_info->entries; i++) {
+ ex_start = ext_info->ext[i].start;
+ len = ext_info->ext[i].len;
+
+ ret = ext4_defrag_reserve_blocks(org_inode, ex_start, len);
+ if (ret < 0) {
+ printk(KERN_ERR "ext4 defrag: "
+ "Block reservation failed. offset [%llu], "
+ "length [%d]\n", ex_start, len);
+ goto err;
+ }
+
+ /* Confirm that blocks are in the reservation window */
+ ret = ext4_defrag_block_within_rsv(org_inode, ex_start, len);
+ if (ret < 0) {
+ printk(KERN_ERR "ext4 defrag: "
+ "Reservation window is not set. "
+ "offset [%llu], length [%d]\n", ex_start, len);
+ goto err;
+ }
+ }
+ return ret;
+
+err:
+ down_write(&EXT4_I(org_inode)->i_data_sem);
+ ext4_discard_reservation(org_inode);
+ up_write(&EXT4_I(org_inode)->i_data_sem);
+ return ret;
+}
+
+/**
* ext4_defrag_fblocks_distribution - Search free blocks distribution
*
* @org_inode: original inode
@@ -342,6 +529,15 @@ int ext4_defrag_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
} else if (cmd == EXT4_IOC_FIEMAP_INO) {

err = ext4_defrag_fiemap_ino(filp, arg);
+ } else if (cmd == EXT4_IOC_RESERVE_BLOCK) {
+ struct ext4_extents_info ext_info;
+
+ if (copy_from_user(&ext_info,
+ (struct ext4_extents_info __user *)arg,
+ sizeof(ext_info)))
+ return -EFAULT;
+
+ err = ext4_defrag_reserve_fblocks(inode, &ext_info);
} else if (cmd == EXT4_IOC_DEFRAG) {
struct ext4_ext_defrag_data defrag;
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8d008c8..eef7885 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -307,6 +307,7 @@ struct ext4_new_group_data {
#define EXT4_IOC_GROUP_INFO _IOW('f', 17, struct ext4_group_data_info)
#define EXT4_IOC_FREE_BLOCKS_INFO _IOW('f', 18, struct ext4_extents_info)
#define EXT4_IOC_FIEMAP_INO _IOW('f', 19, struct fiemap_ino)
+#define EXT4_IOC_RESERVE_BLOCK _IOW('f', 20, struct ext4_extents_info)

/*
* ioctl commands in 32 bit emulation
@@ -1046,8 +1047,17 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
extern void ext4_init_block_alloc_info(struct inode *);
extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
+extern void try_to_extend_reservation(struct ext4_reserve_window_node *,
+ struct super_block *, int);
+extern int alloc_new_reservation(struct ext4_reserve_window_node *,
+ ext4_grpblk_t, struct super_block *,
+ ext4_group_t, struct buffer_head *);
extern ext4_grpblk_t bitmap_search_next_usable_block(ext4_grpblk_t,
struct buffer_head *, ext4_grpblk_t);
+extern int rsv_is_empty(struct ext4_reserve_window *rsv);
+extern int goal_in_my_reservation(struct ext4_reserve_window *rsv,
+ ext4_grpblk_t grp_goal, ext4_group_t group,
+ struct super_block *sb);

/* dir.c */
extern int ext4_check_dir_entry(const char *, struct inode *,
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a0e4915..9c992d8 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -260,7 +260,8 @@ setversion_out:
case EXT4_IOC_DEFRAG:
case EXT4_IOC_GROUP_INFO:
case EXT4_IOC_FREE_BLOCKS_INFO:
- case EXT4_IOC_FIEMAP_INO: {
+ case EXT4_IOC_FIEMAP_INO:
+ case EXT4_IOC_RESERVE_BLOCK: {
return ext4_defrag_ioctl(inode, filp, cmd, arg);
}
case EXT4_IOC_GROUP_ADD: {