From: Theodore Tso Subject: Re: [PATCH] Allow ext4 to run without a journal. Date: Mon, 5 Jan 2009 00:31:35 -0500 Message-ID: <20090105053135.GA10113@mit.edu> References: <1228953270.14314.15.camel@bobble.smo.corp.google.com> <20081217031210.GA17588@mit.edu> <20090103155221.GH4758@mit.edu> <1231013940.22046.4.camel@bobble.smo.corp.google.com> <20090104011747.GI4758@mit.edu> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Cc: linux-ext4@vger.kernel.org To: Frank Mayhar Return-path: Received: from thunk.org ([69.25.196.29]:44153 "EHLO thunker.thunk.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750774AbZAEFbj (ORCPT ); Mon, 5 Jan 2009 00:31:39 -0500 Content-Disposition: inline In-Reply-To: <20090104011747.GI4758@mit.edu> Sender: linux-ext4-owner@vger.kernel.org List-ID: Here is an updated patch that adds a check to make sure the right thing happens if an ext3 filesystem that needs recovery is mounted -o noload. This also fixes a potential oops in ext4_mark_recovery_complete() journal doesn't exist, and we avoid adding and remove orphans from the orphan list when the journal doesn't exist. - Ted ext4: Allow ext4 to run without a journal From: Frank Mayhar A few weeks ago I posted a patch for discussion that allowed ext4 to run without a journal. Since that time I've integrated the excellent comments from Andreas and fixed several serious bugs. We're currently running with this patch and generating some performance numbers against both ext2 (with backported reservations code) and ext4 with and without a journal. It just so happens that running without a journal is slightly faster for most everything. We did iozone -T -t 4 s 2g -r 256k -T -I -i0 -i1 -i2 which creates 4 threads, each of which create and do reads and writes on a 2G file, with a buffer size of 256K, using O_DIRECT for all file opens to bypass the page cache. Results: ext2 ext4, default ext4, no journal initial writes 13.0 MB/s 15.4 MB/s 15.7 MB/s rewrites 13.1 MB/s 15.6 MB/s 15.9 MB/s reads 15.2 MB/s 16.9 MB/s 17.2 MB/s re-reads 15.3 MB/s 16.9 MB/s 17.2 MB/s random readers 5.6 MB/s 5.6 MB/s 5.7 MB/s random writers 5.1 MB/s 5.3 MB/s 5.4 MB/s So it seems that, so far, this was a useful exercise. Signed-off-by: Frank Mayhar Signed-off-by: "Theodore Ts'o" diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 35f5f9a..31ebeb5 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -531,11 +531,11 @@ do_more: /* We dirtied the bitmap block */ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); - err = ext4_journal_dirty_metadata(handle, bitmap_bh); + err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); /* And the group descriptor block */ BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); - ret = ext4_journal_dirty_metadata(handle, gd_bh); + ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); if (!err) err = ret; *pdquot_freed_blocks += group_freed; diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index c75384b..ad13a84 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -7,53 +7,96 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle, struct buffer_head *bh) { - int err = jbd2_journal_get_undo_access(handle, bh); - if (err) - ext4_journal_abort_handle(where, __func__, bh, handle, err); + int err = 0; + + if (ext4_handle_valid(handle)) { + err = jbd2_journal_get_undo_access(handle, bh); + if (err) + ext4_journal_abort_handle(where, __func__, bh, + handle, err); + } return err; } int __ext4_journal_get_write_access(const char *where, handle_t *handle, struct buffer_head *bh) { - int err = jbd2_journal_get_write_access(handle, bh); - if (err) - ext4_journal_abort_handle(where, __func__, bh, handle, err); + int err = 0; + + if (ext4_handle_valid(handle)) { + err = jbd2_journal_get_write_access(handle, bh); + if (err) + ext4_journal_abort_handle(where, __func__, bh, + handle, err); + } return err; } int __ext4_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh) { - int err = jbd2_journal_forget(handle, bh); - if (err) - ext4_journal_abort_handle(where, __func__, bh, handle, err); + int err = 0; + + if (ext4_handle_valid(handle)) { + err = jbd2_journal_forget(handle, bh); + if (err) + ext4_journal_abort_handle(where, __func__, bh, + handle, err); + } return err; } int __ext4_journal_revoke(const char *where, handle_t *handle, ext4_fsblk_t blocknr, struct buffer_head *bh) { - int err = jbd2_journal_revoke(handle, blocknr, bh); - if (err) - ext4_journal_abort_handle(where, __func__, bh, handle, err); + int err = 0; + + if (ext4_handle_valid(handle)) { + err = jbd2_journal_revoke(handle, blocknr, bh); + if (err) + ext4_journal_abort_handle(where, __func__, bh, + handle, err); + } return err; } int __ext4_journal_get_create_access(const char *where, handle_t *handle, struct buffer_head *bh) { - int err = jbd2_journal_get_create_access(handle, bh); - if (err) - ext4_journal_abort_handle(where, __func__, bh, handle, err); + int err = 0; + + if (ext4_handle_valid(handle)) { + err = jbd2_journal_get_create_access(handle, bh); + if (err) + ext4_journal_abort_handle(where, __func__, bh, + handle, err); + } return err; } -int __ext4_journal_dirty_metadata(const char *where, - handle_t *handle, struct buffer_head *bh) +int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, + struct inode *inode, struct buffer_head *bh) { - int err = jbd2_journal_dirty_metadata(handle, bh); - if (err) - ext4_journal_abort_handle(where, __func__, bh, handle, err); + int err = 0; + + if (ext4_handle_valid(handle)) { + err = jbd2_journal_dirty_metadata(handle, bh); + if (err) + ext4_journal_abort_handle(where, __func__, bh, + handle, err); + } else { + mark_buffer_dirty(bh); + if (inode && inode_needs_sync(inode)) { + sync_dirty_buffer(bh); + if (buffer_req(bh) && !buffer_uptodate(bh)) { + ext4_error(inode->i_sb, __func__, + "IO error syncing inode, " + "inode=%lu, block=%llu", + inode->i_ino, + (unsigned long long) bh->b_blocknr); + err = -EIO; + } + } + } return err; } diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index b455c68..663197a 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -122,12 +122,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); * been done yet. */ -static inline void ext4_journal_release_buffer(handle_t *handle, - struct buffer_head *bh) -{ - jbd2_journal_release_buffer(handle, bh); -} - void ext4_journal_abort_handle(const char *caller, const char *err_fn, struct buffer_head *bh, handle_t *handle, int err); @@ -146,8 +140,8 @@ int __ext4_journal_revoke(const char *where, handle_t *handle, int __ext4_journal_get_create_access(const char *where, handle_t *handle, struct buffer_head *bh); -int __ext4_journal_dirty_metadata(const char *where, - handle_t *handle, struct buffer_head *bh); +int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, + struct inode *inode, struct buffer_head *bh); #define ext4_journal_get_undo_access(handle, bh) \ __ext4_journal_get_undo_access(__func__, (handle), (bh)) @@ -157,14 +151,57 @@ int __ext4_journal_dirty_metadata(const char *where, __ext4_journal_revoke(__func__, (handle), (blocknr), (bh)) #define ext4_journal_get_create_access(handle, bh) \ __ext4_journal_get_create_access(__func__, (handle), (bh)) -#define ext4_journal_dirty_metadata(handle, bh) \ - __ext4_journal_dirty_metadata(__func__, (handle), (bh)) #define ext4_journal_forget(handle, bh) \ __ext4_journal_forget(__func__, (handle), (bh)) +#define ext4_handle_dirty_metadata(handle, inode, bh) \ + __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh)) handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); int __ext4_journal_stop(const char *where, handle_t *handle); +#define EXT4_NOJOURNAL_HANDLE ((handle_t *) 0x1) + +static inline int ext4_handle_valid(handle_t *handle) +{ + if (handle == EXT4_NOJOURNAL_HANDLE) + return 0; + return 1; +} + +static inline void ext4_handle_sync(handle_t *handle) +{ + if (ext4_handle_valid(handle)) + handle->h_sync = 1; +} + +static inline void ext4_handle_release_buffer(handle_t *handle, + struct buffer_head *bh) +{ + if (ext4_handle_valid(handle)) + jbd2_journal_release_buffer(handle, bh); +} + +static inline int ext4_handle_is_aborted(handle_t *handle) +{ + if (ext4_handle_valid(handle)) + return is_handle_aborted(handle); + return 0; +} + +static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed) +{ + if (ext4_handle_valid(handle) && handle->h_buffer_credits < needed) + return 0; + return 1; +} + +static inline void ext4_journal_release_buffer(handle_t *handle, + struct buffer_head *bh) +{ + if (ext4_handle_valid(handle)) + jbd2_journal_release_buffer(handle, bh); +} + static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) { return ext4_journal_start_sb(inode->i_sb, nblocks); @@ -180,27 +217,37 @@ static inline handle_t *ext4_journal_current_handle(void) static inline int ext4_journal_extend(handle_t *handle, int nblocks) { - return jbd2_journal_extend(handle, nblocks); + if (ext4_handle_valid(handle)) + return jbd2_journal_extend(handle, nblocks); + return 0; } static inline int ext4_journal_restart(handle_t *handle, int nblocks) { - return jbd2_journal_restart(handle, nblocks); + if (ext4_handle_valid(handle)) + return jbd2_journal_restart(handle, nblocks); + return 0; } static inline int ext4_journal_blocks_per_page(struct inode *inode) { - return jbd2_journal_blocks_per_page(inode); + if (EXT4_JOURNAL(inode) != NULL) + return jbd2_journal_blocks_per_page(inode); + return 0; } static inline int ext4_journal_force_commit(journal_t *journal) { - return jbd2_journal_force_commit(journal); + if (journal) + return jbd2_journal_force_commit(journal); + return 0; } static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode) { - return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode); + if (ext4_handle_valid(handle)) + return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode); + return 0; } /* super.c */ @@ -208,6 +255,8 @@ int ext4_force_commit(struct super_block *sb); static inline int ext4_should_journal_data(struct inode *inode) { + if (EXT4_JOURNAL(inode) == NULL) + return 0; if (!S_ISREG(inode->i_mode)) return 1; if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) @@ -219,6 +268,8 @@ static inline int ext4_should_journal_data(struct inode *inode) static inline int ext4_should_order_data(struct inode *inode) { + if (EXT4_JOURNAL(inode) == NULL) + return 0; if (!S_ISREG(inode->i_mode)) return 0; if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) @@ -230,6 +281,8 @@ static inline int ext4_should_order_data(struct inode *inode) static inline int ext4_should_writeback_data(struct inode *inode) { + if (EXT4_JOURNAL(inode) == NULL) + return 0; if (!S_ISREG(inode->i_mode)) return 0; if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0917be5..743e3fe 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -97,6 +97,8 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed) { int err; + if (!ext4_handle_valid(handle)) + return 0; if (handle->h_buffer_credits > needed) return 0; err = ext4_journal_extend(handle, needed); @@ -134,7 +136,7 @@ static int ext4_ext_dirty(handle_t *handle, struct inode *inode, int err; if (path->p_bh) { /* path points to block */ - err = ext4_journal_dirty_metadata(handle, path->p_bh); + err = ext4_handle_dirty_metadata(handle, inode, path->p_bh); } else { /* path points to leaf/index in inode body */ err = ext4_mark_inode_dirty(handle, inode); @@ -780,7 +782,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, set_buffer_uptodate(bh); unlock_buffer(bh); - err = ext4_journal_dirty_metadata(handle, bh); + err = ext4_handle_dirty_metadata(handle, inode, bh); if (err) goto cleanup; brelse(bh); @@ -859,7 +861,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, set_buffer_uptodate(bh); unlock_buffer(bh); - err = ext4_journal_dirty_metadata(handle, bh); + err = ext4_handle_dirty_metadata(handle, inode, bh); if (err) goto cleanup; brelse(bh); @@ -955,7 +957,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, set_buffer_uptodate(bh); unlock_buffer(bh); - err = ext4_journal_dirty_metadata(handle, bh); + err = ext4_handle_dirty_metadata(handle, inode, bh); if (err) goto out; @@ -2947,7 +2949,7 @@ void ext4_ext_truncate(struct inode *inode) * transaction synchronous. */ if (IS_SYNC(inode)) - handle->h_sync = 1; + ext4_handle_sync(handle); out_stop: up_write(&EXT4_I(inode)->i_data_sem); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 08cac9f..42eea1c 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -253,12 +253,12 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) spin_unlock(sb_bgl_lock(sbi, flex_group)); } } - BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bh2); + BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, NULL, bh2); if (!fatal) fatal = err; } - BUFFER_TRACE(bitmap_bh, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bitmap_bh); + BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (!fatal) fatal = err; sb->s_dirt = 1; @@ -656,15 +656,16 @@ repeat_in_this_group: ino, bitmap_bh->b_data)) { /* we won it */ BUFFER_TRACE(bitmap_bh, - "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, + "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, + inode, bitmap_bh); if (err) goto fail; goto got; } /* we lost it */ - jbd2_journal_release_buffer(handle, bitmap_bh); + ext4_handle_release_buffer(handle, bitmap_bh); if (++ino < EXT4_INODES_PER_GROUP(sb)) goto repeat_in_this_group; @@ -726,7 +727,8 @@ got: /* Don't need to dirty bitmap block if we didn't change it */ if (free) { BUFFER_TRACE(block_bh, "dirty block bitmap"); - err = ext4_journal_dirty_metadata(handle, block_bh); + err = ext4_handle_dirty_metadata(handle, + NULL, block_bh); } brelse(block_bh); @@ -771,8 +773,8 @@ got: } gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); spin_unlock(sb_bgl_lock(sbi, group)); - BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bh2); + BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, NULL, bh2); if (err) goto fail; percpu_counter_dec(&sbi->s_freeinodes_counter); @@ -825,7 +827,7 @@ got: ext4_set_inode_flags(inode); if (IS_DIRSYNC(inode)) - handle->h_sync = 1; + ext4_handle_sync(handle); insert_inode_hash(inode); spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; @@ -1024,4 +1026,3 @@ unsigned long ext4_count_dirs(struct super_block * sb) } return count; } - diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5840576..5c8d71c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -71,12 +71,17 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) * "bh" may be NULL: a metadata block may have been freed from memory * but there may still be a record of it in the journal, and that record * still needs to be revoked. + * + * If the handle isn't valid we're not journaling so there's nothing to do. */ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t blocknr) { int err; + if (!ext4_handle_valid(handle)) + return 0; + might_sleep(); BUFFER_TRACE(bh, "enter"); @@ -169,7 +174,9 @@ static handle_t *start_transaction(struct inode *inode) */ static int try_to_extend_transaction(handle_t *handle, struct inode *inode) { - if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS) + if (!ext4_handle_valid(handle)) + return 0; + if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) return 0; if (!ext4_journal_extend(handle, blocks_for_truncate(inode))) return 0; @@ -183,6 +190,7 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode) */ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) { + BUG_ON(EXT4_JOURNAL(inode) == NULL); jbd_debug(2, "restarting handle %p\n", handle); return ext4_journal_restart(handle, blocks_for_truncate(inode)); } @@ -215,7 +223,7 @@ void ext4_delete_inode(struct inode *inode) } if (IS_SYNC(inode)) - handle->h_sync = 1; + ext4_handle_sync(handle); inode->i_size = 0; err = ext4_mark_inode_dirty(handle, inode); if (err) { @@ -232,7 +240,7 @@ void ext4_delete_inode(struct inode *inode) * enough credits left in the handle to remove the inode from * the orphan list and set the dtime field. */ - if (handle->h_buffer_credits < 3) { + if (!ext4_handle_has_enough_credits(handle, 3)) { err = ext4_journal_extend(handle, 3); if (err > 0) err = ext4_journal_restart(handle, 3); @@ -716,8 +724,8 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, set_buffer_uptodate(bh); unlock_buffer(bh); - BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bh); + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, inode, bh); if (err) goto failed; } @@ -799,8 +807,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. */ jbd_debug(5, "splicing indirect only\n"); - BUFFER_TRACE(where->bh, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, where->bh); + BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, inode, where->bh); if (err) goto err_out; } else { @@ -1228,8 +1236,8 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, set_buffer_uptodate(bh); } unlock_buffer(bh); - BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bh); + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, inode, bh); if (!fatal) fatal = err; } else { @@ -1394,7 +1402,7 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh) if (!buffer_mapped(bh) || buffer_freed(bh)) return 0; set_buffer_uptodate(bh); - return ext4_journal_dirty_metadata(handle, bh); + return ext4_handle_dirty_metadata(handle, NULL, bh); } /* @@ -2761,7 +2769,10 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) filemap_write_and_wait(mapping); } - if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { + BUG_ON(!EXT4_JOURNAL(inode) && + EXT4_I(inode)->i_state & EXT4_STATE_JDATA); + + if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { /* * This is a REALLY heavyweight approach, but the use of * bmap on dirty files is expected to be extremely rare: @@ -3032,7 +3043,10 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset) if (offset == 0) ClearPageChecked(page); - jbd2_journal_invalidatepage(journal, page, offset); + if (journal) + jbd2_journal_invalidatepage(journal, page, offset); + else + block_invalidatepage(page, offset); } static int ext4_releasepage(struct page *page, gfp_t wait) @@ -3042,7 +3056,10 @@ static int ext4_releasepage(struct page *page, gfp_t wait) WARN_ON(PageChecked(page)); if (!page_has_buffers(page)) return 0; - return jbd2_journal_try_to_free_buffers(journal, page, wait); + if (journal) + return jbd2_journal_try_to_free_buffers(journal, page, wait); + else + return try_to_free_buffers(page); } /* @@ -3314,7 +3331,7 @@ int ext4_block_truncate_page(handle_t *handle, err = 0; if (ext4_should_journal_data(inode)) { - err = ext4_journal_dirty_metadata(handle, bh); + err = ext4_handle_dirty_metadata(handle, inode, bh); } else { if (ext4_should_order_data(inode)) err = ext4_jbd2_file_inode(handle, inode); @@ -3438,8 +3455,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, __le32 *p; if (try_to_extend_transaction(handle, inode)) { if (bh) { - BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, bh); + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + ext4_handle_dirty_metadata(handle, inode, bh); } ext4_mark_inode_dirty(handle, inode); ext4_journal_test_restart(handle, inode); @@ -3539,7 +3556,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, count, block_to_free_p, p); if (this_bh) { - BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata"); + BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); /* * The buffer head should have an attached journal head at this @@ -3548,7 +3565,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, * the block was cleared. Check for this instead of OOPSing. */ if (bh2jh(this_bh)) - ext4_journal_dirty_metadata(handle, this_bh); + ext4_handle_dirty_metadata(handle, inode, this_bh); else ext4_error(inode->i_sb, __func__, "circular indirect block detected, " @@ -3578,7 +3595,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, ext4_fsblk_t nr; __le32 *p; - if (is_handle_aborted(handle)) + if (ext4_handle_is_aborted(handle)) return; if (depth--) { @@ -3648,7 +3665,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, * will merely complain about releasing a free block, * rather than leaking blocks. */ - if (is_handle_aborted(handle)) + if (ext4_handle_is_aborted(handle)) return; if (try_to_extend_transaction(handle, inode)) { ext4_mark_inode_dirty(handle, inode); @@ -3667,9 +3684,10 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, parent_bh)){ *p = 0; BUFFER_TRACE(parent_bh, - "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, - parent_bh); + "call ext4_handle_dirty_metadata"); + ext4_handle_dirty_metadata(handle, + inode, + parent_bh); } } } @@ -3857,7 +3875,7 @@ do_indirects: * synchronous */ if (IS_SYNC(inode)) - handle->h_sync = 1; + ext4_handle_sync(handle); out_stop: /* * If this was a simple ftruncate(), and the file will remain alive @@ -4354,8 +4372,8 @@ static int ext4_do_update_inode(handle_t *handle, EXT4_SET_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_LARGE_FILE); sb->s_dirt = 1; - handle->h_sync = 1; - err = ext4_journal_dirty_metadata(handle, + ext4_handle_sync(handle); + err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh); } } @@ -4382,9 +4400,8 @@ static int ext4_do_update_inode(handle_t *handle, raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); } - - BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); - rc = ext4_journal_dirty_metadata(handle, bh); + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + rc = ext4_handle_dirty_metadata(handle, inode, bh); if (!err) err = rc; ei->i_state &= ~EXT4_STATE_NEW; @@ -4447,6 +4464,25 @@ int ext4_write_inode(struct inode *inode, int wait) return ext4_force_commit(inode->i_sb); } +int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh) +{ + int err = 0; + + mark_buffer_dirty(bh); + if (inode && inode_needs_sync(inode)) { + sync_dirty_buffer(bh); + if (buffer_req(bh) && !buffer_uptodate(bh)) { + ext4_error(inode->i_sb, __func__, + "IO error syncing inode, " + "inode=%lu, block=%llu", + inode->i_ino, + (unsigned long long)bh->b_blocknr); + err = -EIO; + } + } + return err; +} + /* * ext4_setattr() * @@ -4751,16 +4787,15 @@ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc) { - int err = 0; - if (handle) { - err = ext4_get_inode_loc(inode, iloc); - if (!err) { - BUFFER_TRACE(iloc->bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, iloc->bh); - if (err) { - brelse(iloc->bh); - iloc->bh = NULL; - } + int err; + + err = ext4_get_inode_loc(inode, iloc); + if (!err) { + BUFFER_TRACE(iloc->bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, iloc->bh); + if (err) { + brelse(iloc->bh); + iloc->bh = NULL; } } ext4_std_error(inode->i_sb, err); @@ -4832,7 +4867,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) might_sleep(); err = ext4_reserve_inode_write(handle, inode, &iloc); - if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && + if (ext4_handle_valid(handle) && + EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { /* * We need extra buffer credits since we may write into EA block @@ -4884,6 +4920,11 @@ void ext4_dirty_inode(struct inode *inode) handle_t *current_handle = ext4_journal_current_handle(); handle_t *handle; + if (!ext4_handle_valid(current_handle)) { + ext4_mark_inode_dirty(current_handle, inode); + return; + } + handle = ext4_journal_start(inode, 2); if (IS_ERR(handle)) goto out; @@ -4921,8 +4962,9 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode) BUFFER_TRACE(iloc.bh, "get_write_access"); err = jbd2_journal_get_write_access(handle, iloc.bh); if (!err) - err = ext4_journal_dirty_metadata(handle, - iloc.bh); + err = ext4_handle_dirty_metadata(handle, + inode, + iloc.bh); brelse(iloc.bh); } } @@ -4948,6 +4990,8 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) */ journal = EXT4_JOURNAL(inode); + if (!journal) + return 0; if (is_journal_aborted(journal)) return -EROFS; @@ -4977,7 +5021,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) return PTR_ERR(handle); err = ext4_mark_inode_dirty(handle, inode); - handle->h_sync = 1; + ext4_handle_sync(handle); ext4_journal_stop(handle); ext4_std_error(inode->i_sb, err); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index dc99b47..42dc83f 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -99,7 +99,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) goto flags_out; } if (IS_SYNC(inode)) - handle->h_sync = 1; + ext4_handle_sync(handle); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) goto flags_err; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7beab71..edb512b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2553,7 +2553,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) ext4_mb_init_per_dev_proc(sb); ext4_mb_history_init(sb); - sbi->s_journal->j_commit_callback = release_blocks_on_commit; + if (sbi->s_journal) + sbi->s_journal->j_commit_callback = release_blocks_on_commit; printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); return 0; @@ -2854,7 +2855,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); - err = ext4_journal_dirty_metadata(handle, bitmap_bh); + err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (!err) err = -EAGAIN; goto out_err; @@ -2901,10 +2902,10 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, spin_unlock(sb_bgl_lock(sbi, flex_group)); } - err = ext4_journal_dirty_metadata(handle, bitmap_bh); + err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (err) goto out_err; - err = ext4_journal_dirty_metadata(handle, gdp_bh); + err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); out_err: sb->s_dirt = 1; @@ -4414,7 +4415,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, struct rb_node **n = &db->bb_free_root.rb_node, *node; struct rb_node *parent = NULL, *new_node; - + BUG_ON(!ext4_handle_valid(handle)); BUG_ON(e4b->bd_bitmap_page == NULL); BUG_ON(e4b->bd_buddy_page == NULL); @@ -4600,7 +4601,7 @@ do_more: /* We dirtied the bitmap block */ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); - err = ext4_journal_dirty_metadata(handle, bitmap_bh); + err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (ac) { ac->ac_b_ex.fe_group = block_group; @@ -4609,7 +4610,7 @@ do_more: ext4_mb_store_history(ac); } - if (metadata) { + if (metadata && ext4_handle_valid(handle)) { /* blocks being freed are metadata. these blocks shouldn't * be used until this transaction is committed */ ext4_mb_free_metadata(handle, &e4b, block_group, bit, count); @@ -4639,7 +4640,7 @@ do_more: /* And the group descriptor block */ BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); - ret = ext4_journal_dirty_metadata(handle, gd_bh); + ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); if (!err) err = ret; diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index f2a9cf4..e7cd488 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -59,7 +59,8 @@ static int finish_range(handle_t *handle, struct inode *inode, /* * Make sure the credit we accumalated is not really high */ - if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) { + if (needed && ext4_handle_has_enough_credits(handle, + EXT4_RESERVE_TRANS_BLOCKS)) { retval = ext4_journal_restart(handle, needed); if (retval) goto err_out; @@ -229,7 +230,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode) { int retval = 0, needed; - if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS) + if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1)) return 0; /* * We are freeing a blocks. During this we touch diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 39c0065..d92944e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1233,10 +1233,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, de = de2; } dx_insert_block(frame, hash2 + continued, newblock); - err = ext4_journal_dirty_metadata(handle, bh2); + err = ext4_handle_dirty_metadata(handle, dir, bh2); if (err) goto journal_error; - err = ext4_journal_dirty_metadata(handle, frame->bh); + err = ext4_handle_dirty_metadata(handle, dir, frame->bh); if (err) goto journal_error; brelse(bh2); @@ -1340,8 +1340,8 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, ext4_update_dx_flag(dir); dir->i_version++; ext4_mark_inode_dirty(handle, dir); - BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); - err = ext4_journal_dirty_metadata(handle, bh); + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(handle, dir, bh); if (err) ext4_std_error(dir->i_sb, err); brelse(bh); @@ -1581,7 +1581,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, dxtrace(dx_show_index("node", frames[1].entries)); dxtrace(dx_show_index("node", ((struct dx_node *) bh2->b_data)->entries)); - err = ext4_journal_dirty_metadata(handle, bh2); + err = ext4_handle_dirty_metadata(handle, inode, bh2); if (err) goto journal_error; brelse (bh2); @@ -1607,7 +1607,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (err) goto journal_error; } - ext4_journal_dirty_metadata(handle, frames[0].bh); + ext4_handle_dirty_metadata(handle, inode, frames[0].bh); } de = do_split(handle, dir, &bh, frame, &hinfo, &err); if (!de) @@ -1653,8 +1653,8 @@ static int ext4_delete_entry(handle_t *handle, else de->inode = 0; dir->i_version++; - BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, bh); + BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); + ext4_handle_dirty_metadata(handle, dir, bh); return 0; } i += ext4_rec_len_from_disk(de->rec_len); @@ -1730,7 +1730,7 @@ retry: return PTR_ERR(handle); if (IS_DIRSYNC(dir)) - handle->h_sync = 1; + ext4_handle_sync(handle); inode = ext4_new_inode (handle, dir, mode); err = PTR_ERR(inode); @@ -1764,7 +1764,7 @@ retry: return PTR_ERR(handle); if (IS_DIRSYNC(dir)) - handle->h_sync = 1; + ext4_handle_sync(handle); inode = ext4_new_inode(handle, dir, mode); err = PTR_ERR(inode); @@ -1800,7 +1800,7 @@ retry: return PTR_ERR(handle); if (IS_DIRSYNC(dir)) - handle->h_sync = 1; + ext4_handle_sync(handle); inode = ext4_new_inode(handle, dir, S_IFDIR | mode); err = PTR_ERR(inode); @@ -1829,8 +1829,8 @@ retry: strcpy(de->name, ".."); ext4_set_de_type(dir->i_sb, de, S_IFDIR); inode->i_nlink = 2; - BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, dir_block); + BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata"); + ext4_handle_dirty_metadata(handle, dir, dir_block); brelse(dir_block); ext4_mark_inode_dirty(handle, inode); err = ext4_add_entry(handle, dentry, inode); @@ -1940,6 +1940,9 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) struct ext4_iloc iloc; int err = 0, rc; + if (!ext4_handle_valid(handle)) + return 0; + lock_super(sb); if (!list_empty(&EXT4_I(inode)->i_orphan)) goto out_unlock; @@ -1968,7 +1971,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) /* Insert this inode at the head of the on-disk orphan list... */ NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); - err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); + err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh); rc = ext4_mark_iloc_dirty(handle, inode, &iloc); if (!err) err = rc; @@ -2006,6 +2009,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) struct ext4_iloc iloc; int err = 0; + if (!ext4_handle_valid(handle)) + return 0; + lock_super(inode->i_sb); if (list_empty(&ei->i_orphan)) { unlock_super(inode->i_sb); @@ -2024,7 +2030,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) * transaction handle with which to update the orphan list on * disk, but we still need to remove the inode from the linked * list in memory. */ - if (!handle) + if (sbi->s_journal && !handle) goto out; err = ext4_reserve_inode_write(handle, inode, &iloc); @@ -2038,7 +2044,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) if (err) goto out_brelse; sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); - err = ext4_journal_dirty_metadata(handle, sbi->s_sbh); + err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh); } else { struct ext4_iloc iloc2; struct inode *i_prev = @@ -2089,7 +2095,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) goto end_rmdir; if (IS_DIRSYNC(dir)) - handle->h_sync = 1; + ext4_handle_sync(handle); inode = dentry->d_inode; @@ -2143,7 +2149,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) return PTR_ERR(handle); if (IS_DIRSYNC(dir)) - handle->h_sync = 1; + ext4_handle_sync(handle); retval = -ENOENT; bh = ext4_find_entry(dir, &dentry->d_name, &de); @@ -2200,7 +2206,7 @@ retry: return PTR_ERR(handle); if (IS_DIRSYNC(dir)) - handle->h_sync = 1; + ext4_handle_sync(handle); inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO); err = PTR_ERR(inode); @@ -2263,7 +2269,7 @@ retry: return PTR_ERR(handle); if (IS_DIRSYNC(dir)) - handle->h_sync = 1; + ext4_handle_sync(handle); inode->i_ctime = ext4_current_time(inode); ext4_inc_count(handle, inode); @@ -2305,7 +2311,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, return PTR_ERR(handle); if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) - handle->h_sync = 1; + ext4_handle_sync(handle); old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de); /* @@ -2359,8 +2365,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, new_dir->i_ctime = new_dir->i_mtime = ext4_current_time(new_dir); ext4_mark_inode_dirty(handle, new_dir); - BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, new_bh); + BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata"); + ext4_handle_dirty_metadata(handle, new_dir, new_bh); brelse(new_bh); new_bh = NULL; } @@ -2410,8 +2416,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, BUFFER_TRACE(dir_bh, "get_write_access"); ext4_journal_get_write_access(handle, dir_bh); PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); - BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata"); - ext4_journal_dirty_metadata(handle, dir_bh); + BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); + ext4_handle_dirty_metadata(handle, old_dir, dir_bh); ext4_dec_count(handle, old_dir); if (new_inode) { /* checked empty_dir above, can't have another parent, diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index d448eb1..1665aa1 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -149,7 +149,7 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh, { int err; - if (handle->h_buffer_credits >= thresh) + if (ext4_handle_has_enough_credits(handle, thresh)) return 0; err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA); @@ -232,7 +232,7 @@ static int setup_new_group_blocks(struct super_block *sb, memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); set_buffer_uptodate(gdb); unlock_buffer(gdb); - ext4_journal_dirty_metadata(handle, gdb); + ext4_handle_dirty_metadata(handle, NULL, gdb); ext4_set_bit(bit, bh->b_data); brelse(gdb); } @@ -251,7 +251,7 @@ static int setup_new_group_blocks(struct super_block *sb, err = PTR_ERR(bh); goto exit_bh; } - ext4_journal_dirty_metadata(handle, gdb); + ext4_handle_dirty_metadata(handle, NULL, gdb); ext4_set_bit(bit, bh->b_data); brelse(gdb); } @@ -276,7 +276,7 @@ static int setup_new_group_blocks(struct super_block *sb, err = PTR_ERR(it); goto exit_bh; } - ext4_journal_dirty_metadata(handle, it); + ext4_handle_dirty_metadata(handle, NULL, it); brelse(it); ext4_set_bit(bit, bh->b_data); } @@ -286,7 +286,7 @@ static int setup_new_group_blocks(struct super_block *sb, mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb), bh->b_data); - ext4_journal_dirty_metadata(handle, bh); + ext4_handle_dirty_metadata(handle, NULL, bh); brelse(bh); /* Mark unused entries in inode bitmap used */ @@ -299,7 +299,7 @@ static int setup_new_group_blocks(struct super_block *sb, mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), bh->b_data); - ext4_journal_dirty_metadata(handle, bh); + ext4_handle_dirty_metadata(handle, NULL, bh); exit_bh: brelse(bh); @@ -486,12 +486,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, * reserved inode, and will become GDT blocks (primary and backup). */ data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0; - ext4_journal_dirty_metadata(handle, dind); + ext4_handle_dirty_metadata(handle, NULL, dind); brelse(dind); inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; ext4_mark_iloc_dirty(handle, inode, &iloc); memset((*primary)->b_data, 0, sb->s_blocksize); - ext4_journal_dirty_metadata(handle, *primary); + ext4_handle_dirty_metadata(handle, NULL, *primary); o_group_desc = EXT4_SB(sb)->s_group_desc; memcpy(n_group_desc, o_group_desc, @@ -502,7 +502,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, kfree(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); - ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); + ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); return 0; @@ -618,7 +618,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, primary[i]->b_blocknr, gdbackups, blk + primary[i]->b_blocknr); */ data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr); - err2 = ext4_journal_dirty_metadata(handle, primary[i]); + err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]); if (!err) err = err2; } @@ -676,7 +676,8 @@ static void update_backups(struct super_block *sb, struct buffer_head *bh; /* Out of journal space, and can't get more - abort - so sad */ - if (handle->h_buffer_credits == 0 && + if (ext4_handle_valid(handle) && + handle->h_buffer_credits == 0 && ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) && (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) break; @@ -696,7 +697,7 @@ static void update_backups(struct super_block *sb, memset(bh->b_data + size, 0, rest); set_buffer_uptodate(bh); unlock_buffer(bh); - ext4_journal_dirty_metadata(handle, bh); + ext4_handle_dirty_metadata(handle, NULL, bh); brelse(bh); } if ((err2 = ext4_journal_stop(handle)) && !err) @@ -916,7 +917,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) /* Update the global fs size fields */ sbi->s_groups_count++; - ext4_journal_dirty_metadata(handle, primary); + ext4_handle_dirty_metadata(handle, NULL, primary); /* Update the reserved block counts only once the new group is * active. */ @@ -938,7 +939,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) EXT4_INODES_PER_GROUP(sb); } - ext4_journal_dirty_metadata(handle, sbi->s_sbh); + ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); sb->s_dirt = 1; exit_journal: @@ -1072,7 +1073,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, goto exit_put; } ext4_blocks_count_set(es, o_blocks_count + add); - ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); + ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); sb->s_dirt = 1; unlock_super(sb); ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ba23b48..bc5a5e2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -136,13 +136,20 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) * backs (eg. EIO in the commit thread), then we still need to * take the FS itself readonly cleanly. */ journal = EXT4_SB(sb)->s_journal; - if (is_journal_aborted(journal)) { - ext4_abort(sb, __func__, - "Detected aborted journal"); - return ERR_PTR(-EROFS); + if (journal) { + if (is_journal_aborted(journal)) { + ext4_abort(sb, __func__, + "Detected aborted journal"); + return ERR_PTR(-EROFS); + } + return jbd2_journal_start(journal, nblocks); } - - return jbd2_journal_start(journal, nblocks); + /* + * We're not journaling. Return a pointer to our flag that + * indicates that fact. + */ + current->journal_info = EXT4_NOJOURNAL_HANDLE; + return current->journal_info; } /* @@ -157,6 +164,14 @@ int __ext4_journal_stop(const char *where, handle_t *handle) int err; int rc; + if (!ext4_handle_valid(handle)) { + /* + * Do this here since we don't call jbd2_journal_stop() in + * no-journal mode. + */ + current->journal_info = NULL; + return 0; + } sb = handle->h_transaction->t_journal->j_private; err = handle->h_err; rc = jbd2_journal_stop(handle); @@ -174,6 +189,8 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn, char nbuf[16]; const char *errstr = ext4_decode_error(NULL, err, nbuf); + BUG_ON(!ext4_handle_valid(handle)); + if (bh) BUFFER_TRACE(bh, "abort"); @@ -448,11 +465,13 @@ static void ext4_put_super(struct super_block *sb) ext4_mb_release(sb); ext4_ext_release(sb); ext4_xattr_put_super(sb); - err = jbd2_journal_destroy(sbi->s_journal); - sbi->s_journal = NULL; - if (err < 0) - ext4_abort(sb, __func__, "Couldn't clean up the journal"); - + if (sbi->s_journal) { + err = jbd2_journal_destroy(sbi->s_journal); + sbi->s_journal = NULL; + if (err < 0) + ext4_abort(sb, __func__, + "Couldn't clean up the journal"); + } if (!(sb->s_flags & MS_RDONLY)) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); es->s_state = cpu_to_le16(sbi->s_mount_state); @@ -522,6 +541,11 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); INIT_LIST_HEAD(&ei->i_prealloc_list); spin_lock_init(&ei->i_prealloc_lock); + /* + * Note: We can be called before EXT4_SB(sb)->s_journal is set, + * therefore it can be null here. Don't check it, just initialize + * jinode. + */ jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); ei->i_reserved_data_blocks = 0; ei->i_reserved_meta_blocks = 0; @@ -588,7 +612,8 @@ static void ext4_clear_inode(struct inode *inode) } #endif ext4_discard_preallocations(inode); - jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, + if (EXT4_JOURNAL(inode)) + jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, &EXT4_I(inode)->jinode); } @@ -1406,20 +1431,15 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, printk(KERN_WARNING "EXT4-fs warning: checktime reached, " "running e2fsck is recommended\n"); -#if 0 - /* @@@ We _will_ want to clear the valid bit if we find - * inconsistencies, to force a fsck at reboot. But for - * a plain journaled filesystem we can keep it set as - * valid forever! :) - */ - es->s_state &= cpu_to_le16(~EXT4_VALID_FS); -#endif + if (!sbi->s_journal) + es->s_state &= cpu_to_le16(~EXT4_VALID_FS); if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); le16_add_cpu(&es->s_mnt_count, 1); es->s_mtime = cpu_to_le32(get_seconds()); ext4_update_dynamic_rev(sb); - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + if (sbi->s_journal) + EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); ext4_commit_super(sb, es, 1); if (test_opt(sb, DEBUG)) @@ -1431,9 +1451,13 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, EXT4_INODES_PER_GROUP(sb), sbi->s_mount_opt); - printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", - sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : - "external", EXT4_SB(sb)->s_journal->j_devname); + if (EXT4_SB(sb)->s_journal) { + printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", + sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : + "external", EXT4_SB(sb)->s_journal->j_devname); + } else { + printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id); + } return res; } @@ -1867,6 +1891,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) unsigned long def_mount_opts; struct inode *root; char *cp; + const char *descr; int ret = -EINVAL; int blocksize; int db_count; @@ -2278,21 +2303,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); ext4_commit_super(sb, es, 1); - printk(KERN_CRIT - "EXT4-fs (device %s): mount failed\n", - sb->s_id); - goto failed_mount4; } } } else if (journal_inum) { if (ext4_create_journal(sb, es, journal_inum)) goto failed_mount3; + } if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && + EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { + printk(KERN_ERR "EXT4-fs: required journal recovery " + "suppressed and not mounted read-only\n"); + goto failed_mount4; } else { - if (!silent) - printk(KERN_ERR - "ext4: No journal on filesystem on %s\n", - sb->s_id); - goto failed_mount3; + clear_opt(sbi->s_mount_opt, DATA_FLAGS); + set_opt(sbi->s_mount_opt, WRITEBACK_DATA); + sbi->s_journal = NULL; + needs_recovery = 0; + goto no_journal; } if (ext4_blocks_count(es) > 0xffffffffULL && @@ -2344,6 +2370,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) break; } +no_journal: + if (test_opt(sb, NOBH)) { if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - " @@ -2428,13 +2456,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; ext4_orphan_cleanup(sb, es); EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; - if (needs_recovery) + if (needs_recovery) { printk(KERN_INFO "EXT4-fs: recovery complete.\n"); - ext4_mark_recovery_complete(sb, es); - printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n", - test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal": - test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": - "writeback"); + ext4_mark_recovery_complete(sb, es); + } + if (EXT4_SB(sb)->s_journal) { + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) + descr = " journalled data mode"; + else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) + descr = " ordered data mode"; + else + descr = " writeback data mode"; + } else + descr = "out journal"; + + printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n", + sb->s_id, descr); lock_kernel(); return 0; @@ -2446,8 +2483,11 @@ cantfind_ext4: goto failed_mount; failed_mount4: - jbd2_journal_destroy(sbi->s_journal); - sbi->s_journal = NULL; + printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id); + if (sbi->s_journal) { + jbd2_journal_destroy(sbi->s_journal); + sbi->s_journal = NULL; + } failed_mount3: percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); @@ -2508,6 +2548,8 @@ static journal_t *ext4_get_journal(struct super_block *sb, struct inode *journal_inode; journal_t *journal; + BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); + /* First, test for the existence of a valid inode on disk. Bad * things happen if we iget() an unused inode, as the subsequent * iput() will try to delete it. */ @@ -2556,6 +2598,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, struct ext4_super_block *es; struct block_device *bdev; + BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); + bdev = ext4_blkdev_get(j_dev); if (bdev == NULL) return NULL; @@ -2643,6 +2687,8 @@ static int ext4_load_journal(struct super_block *sb, int err = 0; int really_read_only; + BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); + if (journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { printk(KERN_INFO "EXT4-fs: external journal device major/minor " @@ -2817,6 +2863,10 @@ static void ext4_mark_recovery_complete(struct super_block *sb, { journal_t *journal = EXT4_SB(sb)->s_journal; + if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { + BUG_ON(journal != NULL); + return; + } jbd2_journal_lock_updates(journal); if (jbd2_journal_flush(journal) < 0) goto out; @@ -2846,6 +2896,8 @@ static void ext4_clear_journal_err(struct super_block *sb, int j_errno; const char *errstr; + BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); + journal = EXT4_SB(sb)->s_journal; /* @@ -2878,14 +2930,17 @@ static void ext4_clear_journal_err(struct super_block *sb, int ext4_force_commit(struct super_block *sb) { journal_t *journal; - int ret; + int ret = 0; if (sb->s_flags & MS_RDONLY) return 0; journal = EXT4_SB(sb)->s_journal; - sb->s_dirt = 0; - ret = ext4_journal_force_commit(journal); + if (journal) { + sb->s_dirt = 0; + ret = ext4_journal_force_commit(journal); + } + return ret; } @@ -2897,9 +2952,13 @@ int ext4_force_commit(struct super_block *sb) */ static void ext4_write_super(struct super_block *sb) { - if (mutex_trylock(&sb->s_lock) != 0) - BUG(); - sb->s_dirt = 0; + if (EXT4_SB(sb)->s_journal) { + if (mutex_trylock(&sb->s_lock) != 0) + BUG(); + sb->s_dirt = 0; + } else { + ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); + } } static int ext4_sync_fs(struct super_block *sb, int wait) @@ -2911,7 +2970,8 @@ static int ext4_sync_fs(struct super_block *sb, int wait) if (wait) ret = ext4_force_commit(sb); else - jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL); + if (EXT4_SB(sb)->s_journal) + jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL); return ret; } @@ -2926,15 +2986,17 @@ static void ext4_write_super_lockfs(struct super_block *sb) if (!(sb->s_flags & MS_RDONLY)) { journal_t *journal = EXT4_SB(sb)->s_journal; - /* Now we set up the journal barrier. */ - jbd2_journal_lock_updates(journal); + if (journal) { + /* Now we set up the journal barrier. */ + jbd2_journal_lock_updates(journal); - /* - * We don't want to clear needs_recovery flag when we failed - * to flush the journal. - */ - if (jbd2_journal_flush(journal) < 0) - return; + /* + * We don't want to clear needs_recovery flag when we + * failed to flush the journal. + */ + if (jbd2_journal_flush(journal) < 0) + return; + } /* Journal blocked and flushed, clear needs_recovery flag. */ EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); @@ -2948,7 +3010,7 @@ static void ext4_write_super_lockfs(struct super_block *sb) */ static void ext4_unlockfs(struct super_block *sb) { - if (!(sb->s_flags & MS_RDONLY)) { + if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) { lock_super(sb); /* Reser the needs_recovery flag before the fs is unlocked. */ EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); @@ -2999,7 +3061,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) es = sbi->s_es; - ext4_init_journal_params(sb, sbi->s_journal); + if (sbi->s_journal) + ext4_init_journal_params(sb, sbi->s_journal); if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || n_blocks_count > ext4_blocks_count(es)) { @@ -3028,9 +3091,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) * We have to unlock super so that we can wait for * transactions. */ - unlock_super(sb); - ext4_mark_recovery_complete(sb, es); - lock_super(sb); + if (sbi->s_journal) { + unlock_super(sb); + ext4_mark_recovery_complete(sb, es); + lock_super(sb); + } } else { __le32 ret; if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, @@ -3084,7 +3149,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) * been changed by e2fsck since we originally mounted * the partition.) */ - ext4_clear_journal_err(sb, es); + if (sbi->s_journal) + ext4_clear_journal_err(sb, es); sbi->s_mount_state = le16_to_cpu(es->s_state); if ((err = ext4_group_extend(sb, es, n_blocks_count))) goto restore_opts; @@ -3092,6 +3158,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) sb->s_flags &= ~MS_RDONLY; } } + if (sbi->s_journal == NULL) + ext4_commit_super(sb, es, 1); + #ifdef CONFIG_QUOTA /* Release old quota file names */ for (i = 0; i < MAXQUOTAS; i++) @@ -3368,7 +3437,8 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, * When we journal data on quota file, we have to flush journal to see * all updates to the file when we bypass pagecache... */ - if (ext4_should_journal_data(path.dentry->d_inode)) { + if (EXT4_SB(sb)->s_journal && + ext4_should_journal_data(path.dentry->d_inode)) { /* * We don't need to lock updates but journal_flush() could * otherwise be livelocked... @@ -3442,7 +3512,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, struct buffer_head *bh; handle_t *handle = journal_current_handle(); - if (!handle) { + if (EXT4_SB(sb)->s_journal && !handle) { printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" " cancelled because transaction is not started.\n", (unsigned long long)off, (unsigned long long)len); @@ -3467,7 +3537,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, flush_dcache_page(bh->b_page); unlock_buffer(bh); if (journal_quota) - err = ext4_journal_dirty_metadata(handle, bh); + err = ext4_handle_dirty_metadata(handle, NULL, bh); else { /* Always do at least ordered writes for quotas */ err = ext4_jbd2_file_inode(handle, inode); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 9b4a368..157ce65 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -457,7 +457,7 @@ static void ext4_xattr_update_super_block(handle_t *handle, if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR); sb->s_dirt = 1; - ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); + ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); } } @@ -487,9 +487,9 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, ext4_forget(handle, 1, inode, bh, bh->b_blocknr); } else { le32_add_cpu(&BHDR(bh)->h_refcount, -1); - error = ext4_journal_dirty_metadata(handle, bh); + error = ext4_handle_dirty_metadata(handle, inode, bh); if (IS_SYNC(inode)) - handle->h_sync = 1; + ext4_handle_sync(handle); DQUOT_FREE_BLOCK(inode, 1); ea_bdebug(bh, "refcount now=%d; releasing", le32_to_cpu(BHDR(bh)->h_refcount)); @@ -724,8 +724,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, if (error == -EIO) goto bad_block; if (!error) - error = ext4_journal_dirty_metadata(handle, - bs->bh); + error = ext4_handle_dirty_metadata(handle, + inode, + bs->bh); if (error) goto cleanup; goto inserted; @@ -794,8 +795,9 @@ inserted: ea_bdebug(new_bh, "reusing; refcount now=%d", le32_to_cpu(BHDR(new_bh)->h_refcount)); unlock_buffer(new_bh); - error = ext4_journal_dirty_metadata(handle, - new_bh); + error = ext4_handle_dirty_metadata(handle, + inode, + new_bh); if (error) goto cleanup_dquot; } @@ -833,7 +835,8 @@ getblk_failed: set_buffer_uptodate(new_bh); unlock_buffer(new_bh); ext4_xattr_cache_insert(new_bh); - error = ext4_journal_dirty_metadata(handle, new_bh); + error = ext4_handle_dirty_metadata(handle, + inode, new_bh); if (error) goto cleanup; } @@ -1040,7 +1043,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, */ is.iloc.bh = NULL; if (IS_SYNC(inode)) - handle->h_sync = 1; + ext4_handle_sync(handle); } cleanup: