From: Jan Kara Subject: [PATCH 4/5] ext4: Use new framework for data=ordered mode in JBD2 Date: Wed, 4 Jun 2008 17:59:55 +0200 Message-ID: <20080604155955.GJ16572@duck.suse.cz> References: <20080604155431.GG16572@duck.suse.cz> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii To: linux-ext4@vger.kernel.org Return-path: Received: from styx.suse.cz ([82.119.242.94]:46611 "EHLO mail.suse.cz" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1751551AbYFDP75 (ORCPT ); Wed, 4 Jun 2008 11:59:57 -0400 Received: from duck.suse.cz (duck.suse.cz [10.20.1.74]) by mail.suse.cz (Postfix) with ESMTP id C8ED16280E0 for ; Wed, 4 Jun 2008 17:59:55 +0200 (CEST) Content-Disposition: inline In-Reply-To: <20080604155431.GG16572@duck.suse.cz> Sender: linux-ext4-owner@vger.kernel.org List-ID: From: Jan Kara Date: Thu, 22 May 2008 00:51:17 +0200 Subject: [PATCH] ext4: Use new framework for data=ordered mode in JBD2 Signed-off-by: Jan Kara --- fs/ext4/ext4_i.h | 1 + fs/ext4/ext4_jbd2.h | 7 ++- fs/ext4/ialloc.c | 1 + fs/ext4/inode.c | 160 ++++++++++++++++++--------------------------------- fs/ext4/super.c | 4 +- 5 files changed, 67 insertions(+), 106 deletions(-) Index: linux-2.6-linus/fs/ext4/ext4_i.h =================================================================== --- linux-2.6-linus.orig/fs/ext4/ext4_i.h +++ linux-2.6-linus/fs/ext4/ext4_i.h @@ -150,6 +150,7 @@ struct ext4_inode_info { */ struct rw_semaphore i_data_sem; struct inode vfs_inode; + struct jbd2_inode jinode; unsigned long i_ext_generation; struct ext4_ext_cache i_cached_extent; Index: linux-2.6-linus/fs/ext4/ext4_jbd2.h =================================================================== --- linux-2.6-linus.orig/fs/ext4/ext4_jbd2.h +++ linux-2.6-linus/fs/ext4/ext4_jbd2.h @@ -154,8 +154,6 @@ int __ext4_journal_dirty_metadata(const #define ext4_journal_forget(handle, bh) \ __ext4_journal_forget(__FUNCTION__, (handle), (bh)) -int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh); - handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); int __ext4_journal_stop(const char *where, handle_t *handle); @@ -192,6 +190,11 @@ static inline int ext4_journal_force_com return jbd2_journal_force_commit(journal); } +static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) +{ + return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode); +} + /* super.c */ int ext4_force_commit(struct super_block *sb); Index: linux-2.6-linus/fs/ext4/ialloc.c =================================================================== --- linux-2.6-linus.orig/fs/ext4/ialloc.c +++ linux-2.6-linus/fs/ext4/ialloc.c @@ -820,6 +820,7 @@ got: ei->i_state = EXT4_STATE_NEW; ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; + jbd2_journal_init_jbd_inode(&ei->jinode, inode); ret = inode; if(DQUOT_ALLOC_INODE(inode)) { Index: linux-2.6-linus/fs/ext4/inode.c =================================================================== --- linux-2.6-linus.orig/fs/ext4/inode.c +++ linux-2.6-linus/fs/ext4/inode.c @@ -39,6 +39,13 @@ #include "xattr.h" #include "acl.h" +static inline int ext4_begin_ordered_truncate(struct inode *inode, + loff_t new_size) +{ + return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode, + new_size); +} + /* * Test whether an inode is a fast symlink. */ @@ -181,6 +188,8 @@ void ext4_delete_inode (struct inode * i { handle_t *handle; + if (ext4_should_order_data(inode)) + ext4_begin_ordered_truncate(inode, 0); truncate_inode_pages(&inode->i_data, 0); if (is_bad_inode(inode)) @@ -1273,15 +1282,6 @@ out: return ret; } -int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh) -{ - int err = jbd2_journal_dirty_data(handle, bh); - if (err) - ext4_journal_abort_handle(__func__, __func__, - bh, handle, err); - return err; -} - /* For write_end() in data=journal mode */ static int write_end_fn(handle_t *handle, struct buffer_head *bh) { @@ -1311,8 +1311,7 @@ static int ext4_ordered_write_end(struct from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; - ret = walk_page_buffers(handle, page_buffers(page), - from, to, NULL, ext4_journal_dirty_data); + ret = ext4_jbd2_file_inode(handle, inode); if (ret == 0) { /* @@ -1472,25 +1471,22 @@ static int bput_one(handle_t *handle, st return 0; } -static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) -{ - if (buffer_mapped(bh)) - return ext4_journal_dirty_data(handle, bh); - return 0; -} - static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) { return !buffer_mapped(bh) || buffer_delay(bh); } /* - * Note that we don't need to start a transaction unless we're journaling - * data because we should have holes filled from ext4_page_mkwrite(). If - * we are journaling data, we cannot start transaction directly because - * transaction start ranks above page lock so we have to do some magic... + * Note that we don't need to start a transaction unless we're journaling data + * because we should have holes filled from ext4_page_mkwrite(). We even don't + * need to file the inode to the transaction's list in ordered mode because if + * we are writing back data added by write(), the inode is already there and if + * we are writing back data modified via mmap(), noone guarantees in which + * transaction the data will hit the disk. In case we are journaling data, we + * cannot start transaction directly because transaction start ranks above page + * lock so we have to do some magic. * - * In all journalling modes block_write_full_page() will start the I/O. + * In all journaling modes block_write_full_page() will start the I/O. * * Problem: * @@ -1533,86 +1529,7 @@ static int ext4_bh_unmapped_or_delay(han * us. * */ -static int __ext4_ordered_writepage(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - struct buffer_head *page_bufs; - handle_t *handle = NULL; - int ret = 0; - int err; - - if (!page_has_buffers(page)) { - create_empty_buffers(page, inode->i_sb->s_blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } - page_bufs = page_buffers(page); - walk_page_buffers(handle, page_bufs, 0, - PAGE_CACHE_SIZE, NULL, bget_one); - - ret = block_write_full_page(page, ext4_get_block, wbc); - - /* - * The page can become unlocked at any point now, and - * truncate can then come in and change things. So we - * can't touch *page from now on. But *page_bufs is - * safe due to elevated refcount. - */ - - /* - * And attach them to the current transaction. But only if - * block_write_full_page() succeeded. Otherwise they are unmapped, - * and generally junk. - */ - if (ret == 0) { - handle = ext4_journal_start(inode, - ext4_writepage_trans_blocks(inode)); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out_put; - } - - ret = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, - NULL, jbd2_journal_dirty_data_fn); - err = ext4_journal_stop(handle); - if (!ret) - ret = err; - } -out_put: - walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, - bput_one); - return ret; -} - -static int ext4_ordered_writepage(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - loff_t size = i_size_read(inode); - loff_t len; - - J_ASSERT(PageLocked(page)); - J_ASSERT(page_has_buffers(page)); - if (page->index == size >> PAGE_CACHE_SHIFT) - len = size & ~PAGE_CACHE_MASK; - else - len = PAGE_CACHE_SIZE; - BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, - ext4_bh_unmapped_or_delay)); - - /* - * We give up here if we're reentered, because it might be for a - * different filesystem. - */ - if (!ext4_journal_current_handle()) - return __ext4_ordered_writepage(page, wbc); - - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return 0; -} - -static int __ext4_writeback_writepage(struct page *page, +static int __ext4_normal_writepage(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; @@ -1624,7 +1541,7 @@ static int __ext4_writeback_writepage(st } -static int ext4_writeback_writepage(struct page *page, +static int ext4_normal_writepage(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; @@ -1641,7 +1558,7 @@ static int ext4_writeback_writepage(stru ext4_bh_unmapped_or_delay)); if (!ext4_journal_current_handle()) - return __ext4_writeback_writepage(page, wbc); + return __ext4_normal_writepage(page, wbc); redirty_page_for_writepage(wbc, page); unlock_page(page); @@ -1877,7 +1794,7 @@ static int ext4_journalled_set_page_dirt static const struct address_space_operations ext4_ordered_aops = { .readpage = ext4_readpage, .readpages = ext4_readpages, - .writepage = ext4_ordered_writepage, + .writepage = ext4_normal_writepage, .sync_page = block_sync_page, .write_begin = ext4_write_begin, .write_end = ext4_ordered_write_end, @@ -1891,7 +1808,7 @@ static const struct address_space_operat static const struct address_space_operations ext4_writeback_aops = { .readpage = ext4_readpage, .readpages = ext4_readpages, - .writepage = ext4_writeback_writepage, + .writepage = ext4_normal_writepage, .sync_page = block_sync_page, .write_begin = ext4_write_begin, .write_end = ext4_writeback_write_end, @@ -2019,7 +1936,7 @@ int ext4_block_truncate_page(handle_t *h err = ext4_journal_dirty_metadata(handle, bh); } else { if (ext4_should_order_data(inode)) - err = ext4_journal_dirty_data(handle, bh); + err = ext4_jbd2_file_inode(handle, inode); mark_buffer_dirty(bh); } @@ -2787,6 +2704,7 @@ struct inode *ext4_iget(struct super_blo ei->i_default_acl = EXT4_ACL_NOT_CACHED; #endif ei->i_block_alloc_info = NULL; + jbd2_journal_init_jbd_inode(&ei->jinode, inode); ret = __ext4_get_inode_loc(inode, &iloc, 0); if (ret < 0) @@ -3149,7 +3067,14 @@ int ext4_write_inode(struct inode *inode * be freed, so we have a strong guarantee that no future commit will * leave these blocks visible to the user.) * - * Called with inode->sem down. + * Another thing we have to assure is that if we are in ordered mode + * and inode is still attached to the committing transaction, we must + * we start writeout of all the dirty pages which are being truncated. + * This way we are sure that all the data written in the previous + * transaction are already on disk (truncate waits for pages under + * writeback). + * + * Called with inode->i_mutex down. */ int ext4_setattr(struct dentry *dentry, struct iattr *attr) { @@ -3215,6 +3140,22 @@ int ext4_setattr(struct dentry *dentry, if (!error) error = rc; ext4_journal_stop(handle); + + if (ext4_should_order_data(inode)) { + error = ext4_begin_ordered_truncate(inode, + attr->ia_size); + if (error) { + /* Do as much error cleanup as possible */ + handle = ext4_journal_start(inode, 3); + if (IS_ERR(handle)) { + ext4_orphan_del(NULL, inode); + goto err_out; + } + ext4_orphan_del(handle, inode); + ext4_journal_stop(handle); + goto err_out; + } + } } rc = inode_setattr(inode, attr); @@ -3624,12 +3565,13 @@ int ext4_page_mkwrite(struct vm_area_str lock_page(page); wbc.range_start = page_offset(page); wbc.range_end = page_offset(page) + len; - if (ext4_should_writeback_data(inode)) - ret = __ext4_writeback_writepage(page, &wbc); - else if (ext4_should_order_data(inode)) - ret = __ext4_ordered_writepage(page, &wbc); - else + if (!ext4_should_journal_data(inode)) { + ret = __ext4_normal_writepage(page, &wbc); + if (!ret && ext4_should_order_data(inode)) + ret = ext4_jbd2_file_inode(handle, inode); + } else { ret = __ext4_journalled_writepage(page, &wbc); + } /* Page got unlocked in writepage */ err = ext4_journal_stop(handle); if (!ret) Index: linux-2.6-linus/fs/ext4/super.c =================================================================== --- linux-2.6-linus.orig/fs/ext4/super.c +++ linux-2.6-linus/fs/ext4/super.c @@ -637,6 +637,8 @@ static void ext4_clear_inode(struct inod EXT4_I(inode)->i_block_alloc_info = NULL; if (unlikely(rsv)) kfree(rsv); + jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, + &EXT4_I(inode)->jinode); } static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) @@ -3353,7 +3355,7 @@ static ssize_t ext4_quota_write(struct s err = ext4_journal_dirty_metadata(handle, bh); else { /* Always do at least ordered writes for quotas */ - err = ext4_journal_dirty_data(handle, bh); + err = ext4_jbd2_file_inode(handle, inode); mark_buffer_dirty(bh); } brelse(bh); Index: linux-2.6-linus/fs/ext4/mballoc.c =================================================================== --- linux-2.6-linus.orig/fs/ext4/mballoc.c +++ linux-2.6-linus/fs/ext4/mballoc.c @@ -2255,6 +2255,8 @@ static int ext4_mb_init_backend(struct s printk(KERN_ERR "EXT4-fs: can't get new inode\n"); goto err_freesgi; } + jbd2_journal_init_jbd_inode(&EXT4_I(sbi->s_buddy_cache)->jinode, + sbi->s_buddy_cache); EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; metalen = sizeof(*meta_group_info) << EXT4_DESC_PER_BLOCK_BITS(sb);