2008-12-10 23:54:33

by Frank Mayhar

[permalink] [raw]
Subject: [PATCH 1/1] Allow ext4 to run without a journal.

A few weeks ago I posted a patch for discussion that allowed ext4 to run
without a journal. Since that time I've integrated the excellent
comments from Andreas and fixed several serious bugs. We're currently
running with this patch and generating some performance numbers against
both ext2 (with backported reservations code) and ext4 with and without
a journal. It just so happens that running without a journal is
slightly faster for most everything.

We did
iozone -T -t 4 s 2g -r 256k -T -I -i0 -i1 -i2

which creates 4 threads, each of which create and do reads and writes on
a 2G file, with a buffer size of 256K, using O_DIRECT for all file opens
to bypass the page cache. Results:

ext2 ext4, default ext4, no journal
initial writes 13.0 MB/s 15.4 MB/s 15.7 MB/s
rewrites 13.1 MB/s 15.6 MB/s 15.9 MB/s
reads 15.2 MB/s 16.9 MB/s 17.2 MB/s
re-reads 15.3 MB/s 16.9 MB/s 17.2 MB/s
random readers 5.6 MB/s 5.6 MB/s 5.7 MB/s
random writers 5.1 MB/s 5.3 MB/s 5.4 MB/s

So it seems that, so far, this was a useful exercise.

The patch follows. It was

Signed-off-by: Frank Mayhar <[email protected]>

---

fs/ext4/balloc.c | 4 +-
fs/ext4/ext4_jbd2.c | 60 +++++++++++++------
fs/ext4/ext4_jbd2.h | 95 ++++++++++++++++++++++++++----
fs/ext4/ext4_sb.h | 1 +
fs/ext4/extents.c | 12 ++--
fs/ext4/ialloc.c | 25 ++++----
fs/ext4/inode.c | 130 +++++++++++++++++++++++++++-------------
fs/ext4/ioctl.c | 2 +-
fs/ext4/mballoc.c | 17 +++---
fs/ext4/migrate.c | 5 +-
fs/ext4/namei.c | 50 ++++++++--------
fs/ext4/resize.c | 31 +++++-----
fs/ext4/super.c | 163 +++++++++++++++++++++++++++++++++++++--------------
fs/ext4/xattr.c | 21 ++++---
14 files changed, 419 insertions(+), 197 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d2003cd..466c8b2 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -531,11 +531,11 @@ do_more:

/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
- err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);

/* And the group descriptor block */
BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
- ret = ext4_journal_dirty_metadata(handle, gd_bh);
+ ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
if (!err) err = ret;
*pdquot_freed_blocks += group_freed;

diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index c75384b..2ad4633 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -7,53 +7,77 @@
int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
struct buffer_head *bh)
{
- int err = jbd2_journal_get_undo_access(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_get_undo_access(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ }
return err;
}

int __ext4_journal_get_write_access(const char *where, handle_t *handle,
struct buffer_head *bh)
{
- int err = jbd2_journal_get_write_access(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_get_write_access(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ }
return err;
}

int __ext4_journal_forget(const char *where, handle_t *handle,
struct buffer_head *bh)
{
- int err = jbd2_journal_forget(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_forget(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ }
return err;
}

int __ext4_journal_revoke(const char *where, handle_t *handle,
ext4_fsblk_t blocknr, struct buffer_head *bh)
{
- int err = jbd2_journal_revoke(handle, blocknr, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_revoke(handle, blocknr, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ }
return err;
}

int __ext4_journal_get_create_access(const char *where,
handle_t *handle, struct buffer_head *bh)
{
- int err = jbd2_journal_get_create_access(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_get_create_access(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ }
return err;
}

int __ext4_journal_dirty_metadata(const char *where,
handle_t *handle, struct buffer_head *bh)
{
- int err = jbd2_journal_dirty_metadata(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_dirty_metadata(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ }
return err;
}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index b455c68..3a05ae7 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -19,6 +19,8 @@
#include <linux/jbd2.h>
#include "ext4.h"

+#define EXT4_NOJOURNAL_MAGIC (void *)0x457874344e6a6e6c /* "Ext4Njnl" */
+
#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal)

/* Define the number of blocks we need to account to a transaction to
@@ -122,12 +124,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
* been done yet.
*/

-static inline void ext4_journal_release_buffer(handle_t *handle,
- struct buffer_head *bh)
-{
- jbd2_journal_release_buffer(handle, bh);
-}
-
void ext4_journal_abort_handle(const char *caller, const char *err_fn,
struct buffer_head *bh, handle_t *handle, int err);

@@ -149,6 +145,8 @@ int __ext4_journal_get_create_access(const char *where,
int __ext4_journal_dirty_metadata(const char *where,
handle_t *handle, struct buffer_head *bh);

+int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh);
+
#define ext4_journal_get_undo_access(handle, bh) \
__ext4_journal_get_undo_access(__func__, (handle), (bh))
#define ext4_journal_get_write_access(handle, bh) \
@@ -157,14 +155,68 @@ int __ext4_journal_dirty_metadata(const char *where,
__ext4_journal_revoke(__func__, (handle), (blocknr), (bh))
#define ext4_journal_get_create_access(handle, bh) \
__ext4_journal_get_create_access(__func__, (handle), (bh))
-#define ext4_journal_dirty_metadata(handle, bh) \
- __ext4_journal_dirty_metadata(__func__, (handle), (bh))
#define ext4_journal_forget(handle, bh) \
__ext4_journal_forget(__func__, (handle), (bh))

handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
int __ext4_journal_stop(const char *where, handle_t *handle);

+static inline int ext4_handle_valid(handle_t *handle)
+{
+ if (handle == NULL)
+ return 1;
+ if (handle->h_transaction == EXT4_NOJOURNAL_MAGIC)
+ return 0;
+ return 1;
+}
+
+static inline int ext4_handle_dirty_metadata(handle_t *handle,
+ struct inode *inode, struct buffer_head *bh)
+{
+ int err;
+
+ if (ext4_handle_valid(handle)) {
+ err = __ext4_journal_dirty_metadata(__func__, handle, bh);
+ } else {
+ err = __ext4_write_dirty_metadata(inode, bh);
+ }
+ return err;
+}
+
+static inline void ext4_handle_sync(handle_t *handle)
+{
+ if (ext4_handle_valid(handle))
+ handle->h_sync = 1;
+}
+
+static inline void ext4_handle_release_buffer(handle_t *handle,
+ struct buffer_head *bh)
+{
+ if (ext4_handle_valid(handle))
+ jbd2_journal_release_buffer(handle, bh);
+}
+
+static inline int ext4_handle_is_aborted(handle_t *handle)
+{
+ if (ext4_handle_valid(handle))
+ return is_handle_aborted(handle);
+ return 0;
+}
+
+static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
+{
+ if (ext4_handle_valid(handle) && handle->h_buffer_credits < needed)
+ return 0;
+ return 1;
+}
+
+static inline void ext4_journal_release_buffer(handle_t *handle,
+ struct buffer_head *bh)
+{
+ if (ext4_handle_valid(handle))
+ jbd2_journal_release_buffer(handle, bh);
+}
+
static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
{
return ext4_journal_start_sb(inode->i_sb, nblocks);
@@ -180,27 +232,37 @@ static inline handle_t *ext4_journal_current_handle(void)

static inline int ext4_journal_extend(handle_t *handle, int nblocks)
{
- return jbd2_journal_extend(handle, nblocks);
+ if (ext4_handle_valid(handle))
+ return jbd2_journal_extend(handle, nblocks);
+ return 0;
}

static inline int ext4_journal_restart(handle_t *handle, int nblocks)
{
- return jbd2_journal_restart(handle, nblocks);
+ if (ext4_handle_valid(handle))
+ return jbd2_journal_restart(handle, nblocks);
+ return 0;
}

static inline int ext4_journal_blocks_per_page(struct inode *inode)
{
- return jbd2_journal_blocks_per_page(inode);
+ if (EXT4_JOURNAL(inode) != NULL)
+ return jbd2_journal_blocks_per_page(inode);
+ return 0;
}

static inline int ext4_journal_force_commit(journal_t *journal)
{
- return jbd2_journal_force_commit(journal);
+ if (journal)
+ return jbd2_journal_force_commit(journal);
+ return 0;
}

static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
{
- return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
+ if (ext4_handle_valid(handle))
+ return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
+ return 0;
}

/* super.c */
@@ -208,6 +270,9 @@ int ext4_force_commit(struct super_block *sb);

static inline int ext4_should_journal_data(struct inode *inode)
{
+ BUG_ON(EXT4_JOURNAL(inode) == NULL &&
+ (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
+ EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL));
if (!S_ISREG(inode->i_mode))
return 1;
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
@@ -219,6 +284,8 @@ static inline int ext4_should_journal_data(struct inode *inode)

static inline int ext4_should_order_data(struct inode *inode)
{
+ BUG_ON(EXT4_JOURNAL(inode) == NULL &&
+ EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL);
if (!S_ISREG(inode->i_mode))
return 0;
if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
@@ -230,6 +297,8 @@ static inline int ext4_should_order_data(struct inode *inode)

static inline int ext4_should_writeback_data(struct inode *inode)
{
+ BUG_ON(EXT4_JOURNAL(inode) == NULL &&
+ EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL);
if (!S_ISREG(inode->i_mode))
return 0;
if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 445fde6..c39e675 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -28,6 +28,7 @@
* fourth extended-fs super-block data in memory
*/
struct ext4_sb_info {
+ int *s_nojournal_flag; /* Null to indicate "not a handle" */
unsigned long s_desc_size; /* Size of a group descriptor in bytes */
unsigned long s_inodes_per_block;/* Number of inodes per block */
unsigned long s_blocks_per_group;/* Number of blocks in a group */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ea2ce3c..783be01 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -97,6 +97,8 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
{
int err;

+ if (!ext4_handle_valid(handle))
+ return 0;
if (handle->h_buffer_credits > needed)
return 0;
err = ext4_journal_extend(handle, needed);
@@ -134,7 +136,7 @@ static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
int err;
if (path->p_bh) {
/* path points to block */
- err = ext4_journal_dirty_metadata(handle, path->p_bh);
+ err = ext4_handle_dirty_metadata(handle, inode, path->p_bh);
} else {
/* path points to leaf/index in inode body */
err = ext4_mark_inode_dirty(handle, inode);
@@ -780,7 +782,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
set_buffer_uptodate(bh);
unlock_buffer(bh);

- err = ext4_journal_dirty_metadata(handle, bh);
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
if (err)
goto cleanup;
brelse(bh);
@@ -859,7 +861,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
set_buffer_uptodate(bh);
unlock_buffer(bh);

- err = ext4_journal_dirty_metadata(handle, bh);
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
if (err)
goto cleanup;
brelse(bh);
@@ -955,7 +957,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
set_buffer_uptodate(bh);
unlock_buffer(bh);

- err = ext4_journal_dirty_metadata(handle, bh);
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
if (err)
goto out;

@@ -2950,7 +2952,7 @@ void ext4_ext_truncate(struct inode *inode)
* transaction synchronous.
*/
if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

out_stop:
up_write(&EXT4_I(inode)->i_data_sem);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 2a117e2..553a10b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -253,12 +253,12 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
spin_unlock(sb_bgl_lock(sbi, flex_group));
}
}
- BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bh2);
+ BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, NULL, bh2);
if (!fatal) fatal = err;
}
- BUFFER_TRACE(bitmap_bh, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+ BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (!fatal)
fatal = err;
sb->s_dirt = 1;
@@ -656,15 +656,16 @@ repeat_in_this_group:
ino, bitmap_bh->b_data)) {
/* we won it */
BUFFER_TRACE(bitmap_bh,
- "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle,
+ "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle,
+ inode,
bitmap_bh);
if (err)
goto fail;
goto got;
}
/* we lost it */
- jbd2_journal_release_buffer(handle, bitmap_bh);
+ ext4_handle_release_buffer(handle, bitmap_bh);

if (++ino < EXT4_INODES_PER_GROUP(sb))
goto repeat_in_this_group;
@@ -726,7 +727,8 @@ got:
/* Don't need to dirty bitmap block if we didn't change it */
if (free) {
BUFFER_TRACE(block_bh, "dirty block bitmap");
- err = ext4_journal_dirty_metadata(handle, block_bh);
+ err = ext4_handle_dirty_metadata(handle,
+ NULL, block_bh);
}

brelse(block_bh);
@@ -771,8 +773,8 @@ got:
}
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
spin_unlock(sb_bgl_lock(sbi, group));
- BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bh2);
+ BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, NULL, bh2);
if (err) goto fail;

percpu_counter_dec(&sbi->s_freeinodes_counter);
@@ -825,7 +827,7 @@ got:

ext4_set_inode_flags(inode);
if (IS_DIRSYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
insert_inode_hash(inode);
spin_lock(&sbi->s_next_gen_lock);
inode->i_generation = sbi->s_next_generation++;
@@ -1024,4 +1026,3 @@ unsigned long ext4_count_dirs(struct super_block * sb)
}
return count;
}
-
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index be21a5a..678ec07 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -71,12 +71,17 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
* "bh" may be NULL: a metadata block may have been freed from memory
* but there may still be a record of it in the journal, and that record
* still needs to be revoked.
+ *
+ * If the handle isn't valid we're not journaling so there's nothing to do.
*/
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t blocknr)
{
int err;

+ if (!ext4_handle_valid(handle))
+ return 0;
+
might_sleep();

BUFFER_TRACE(bh, "enter");
@@ -169,7 +174,9 @@ static handle_t *start_transaction(struct inode *inode)
*/
static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
{
- if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS)
+ if (!ext4_handle_valid(handle))
+ return 0;
+ if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
return 0;
if (!ext4_journal_extend(handle, blocks_for_truncate(inode)))
return 0;
@@ -183,6 +190,7 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
*/
static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
{
+ BUG_ON(EXT4_JOURNAL(inode) == NULL);
jbd_debug(2, "restarting handle %p\n", handle);
return ext4_journal_restart(handle, blocks_for_truncate(inode));
}
@@ -215,7 +223,7 @@ void ext4_delete_inode(struct inode *inode)
}

if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
inode->i_size = 0;
err = ext4_mark_inode_dirty(handle, inode);
if (err) {
@@ -232,7 +240,7 @@ void ext4_delete_inode(struct inode *inode)
* enough credits left in the handle to remove the inode from
* the orphan list and set the dtime field.
*/
- if (handle->h_buffer_credits < 3) {
+ if (!ext4_handle_has_enough_credits(handle, 3)) {
err = ext4_journal_extend(handle, 3);
if (err > 0)
err = ext4_journal_restart(handle, 3);
@@ -708,8 +716,8 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
set_buffer_uptodate(bh);
unlock_buffer(bh);

- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
if (err)
goto failed;
}
@@ -791,8 +799,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
* generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
*/
jbd_debug(5, "splicing indirect only\n");
- BUFFER_TRACE(where->bh, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, where->bh);
+ BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, inode, where->bh);
if (err)
goto err_out;
} else {
@@ -1220,8 +1228,8 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
set_buffer_uptodate(bh);
}
unlock_buffer(bh);
- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
if (!fatal)
fatal = err;
} else {
@@ -1386,7 +1394,7 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
if (!buffer_mapped(bh) || buffer_freed(bh))
return 0;
set_buffer_uptodate(bh);
- return ext4_journal_dirty_metadata(handle, bh);
+ return ext4_handle_dirty_metadata(handle, NULL, bh);
}

/*
@@ -2717,7 +2725,10 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
filemap_write_and_wait(mapping);
}

- if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
+ BUG_ON(!EXT4_JOURNAL(inode) &&
+ EXT4_I(inode)->i_state & EXT4_STATE_JDATA);
+
+ if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
/*
* This is a REALLY heavyweight approach, but the use of
* bmap on dirty files is expected to be extremely rare:
@@ -2988,7 +2999,10 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
if (offset == 0)
ClearPageChecked(page);

- jbd2_journal_invalidatepage(journal, page, offset);
+ if (journal)
+ jbd2_journal_invalidatepage(journal, page, offset);
+ else
+ block_invalidatepage(page, offset);
}

static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -2998,7 +3012,10 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
WARN_ON(PageChecked(page));
if (!page_has_buffers(page))
return 0;
- return jbd2_journal_try_to_free_buffers(journal, page, wait);
+ if (journal)
+ return jbd2_journal_try_to_free_buffers(journal, page, wait);
+ else
+ return try_to_free_buffers(page);
}

/*
@@ -3270,7 +3287,7 @@ int ext4_block_truncate_page(handle_t *handle,

err = 0;
if (ext4_should_journal_data(inode)) {
- err = ext4_journal_dirty_metadata(handle, bh);
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
} else {
if (ext4_should_order_data(inode))
err = ext4_jbd2_file_inode(handle, inode);
@@ -3394,8 +3411,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
__le32 *p;
if (try_to_extend_transaction(handle, inode)) {
if (bh) {
- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle, inode, bh);
}
ext4_mark_inode_dirty(handle, inode);
ext4_journal_test_restart(handle, inode);
@@ -3495,7 +3512,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
count, block_to_free_p, p);

if (this_bh) {
- BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata");
+ BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");

/*
* The buffer head should have an attached journal head at this
@@ -3504,7 +3521,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
* the block was cleared. Check for this instead of OOPSing.
*/
if (bh2jh(this_bh))
- ext4_journal_dirty_metadata(handle, this_bh);
+ ext4_handle_dirty_metadata(handle, inode, this_bh);
else
ext4_error(inode->i_sb, __func__,
"circular indirect block detected, "
@@ -3534,7 +3551,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
ext4_fsblk_t nr;
__le32 *p;

- if (is_handle_aborted(handle))
+ if (ext4_handle_is_aborted(handle))
return;

if (depth--) {
@@ -3604,7 +3621,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
* will merely complain about releasing a free block,
* rather than leaking blocks.
*/
- if (is_handle_aborted(handle))
+ if (ext4_handle_is_aborted(handle))
return;
if (try_to_extend_transaction(handle, inode)) {
ext4_mark_inode_dirty(handle, inode);
@@ -3623,9 +3640,10 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
parent_bh)){
*p = 0;
BUFFER_TRACE(parent_bh,
- "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle,
- parent_bh);
+ "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle,
+ inode,
+ parent_bh);
}
}
}
@@ -3813,7 +3831,7 @@ do_indirects:
* synchronous
*/
if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
out_stop:
/*
* If this was a simple ftruncate(), and the file will remain alive
@@ -4310,8 +4328,8 @@ static int ext4_do_update_inode(handle_t *handle,
EXT4_SET_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
sb->s_dirt = 1;
- handle->h_sync = 1;
- err = ext4_journal_dirty_metadata(handle,
+ ext4_handle_sync(handle);
+ err = ext4_handle_dirty_metadata(handle, inode,
EXT4_SB(sb)->s_sbh);
}
}
@@ -4338,9 +4356,8 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
}

-
- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- rc = ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ rc = ext4_handle_dirty_metadata(handle, inode, bh);
if (!err)
err = rc;
ei->i_state &= ~EXT4_STATE_NEW;
@@ -4403,6 +4420,25 @@ int ext4_write_inode(struct inode *inode, int wait)
return ext4_force_commit(inode->i_sb);
}

+int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh)
+{
+ int err = 0;
+
+ mark_buffer_dirty(bh);
+ if (inode && inode_needs_sync(inode)) {
+ sync_dirty_buffer(bh);
+ if (buffer_req(bh) && !buffer_uptodate(bh)) {
+ ext4_error(inode->i_sb, __func__,
+ "IO error syncing inode, "
+ "inode=%lu, block=%llu",
+ inode->i_ino,
+ (unsigned long long)bh->b_blocknr);
+ err = -EIO;
+ }
+ }
+ return err;
+}
+
/*
* ext4_setattr()
*
@@ -4707,16 +4743,15 @@ int
ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
struct ext4_iloc *iloc)
{
- int err = 0;
- if (handle) {
- err = ext4_get_inode_loc(inode, iloc);
- if (!err) {
- BUFFER_TRACE(iloc->bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, iloc->bh);
- if (err) {
- brelse(iloc->bh);
- iloc->bh = NULL;
- }
+ int err;
+
+ err = ext4_get_inode_loc(inode, iloc);
+ if (!err) {
+ BUFFER_TRACE(iloc->bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, iloc->bh);
+ if (err) {
+ brelse(iloc->bh);
+ iloc->bh = NULL;
}
}
ext4_std_error(inode->i_sb, err);
@@ -4788,7 +4823,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)

might_sleep();
err = ext4_reserve_inode_write(handle, inode, &iloc);
- if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
+ if (ext4_handle_valid(handle) &&
+ EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
!(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) {
/*
* We need extra buffer credits since we may write into EA block
@@ -4840,6 +4876,11 @@ void ext4_dirty_inode(struct inode *inode)
handle_t *current_handle = ext4_journal_current_handle();
handle_t *handle;

+ if (!ext4_handle_valid(current_handle)) {
+ ext4_mark_inode_dirty(current_handle, inode);
+ return;
+ }
+
handle = ext4_journal_start(inode, 2);
if (IS_ERR(handle))
goto out;
@@ -4877,8 +4918,9 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode)
BUFFER_TRACE(iloc.bh, "get_write_access");
err = jbd2_journal_get_write_access(handle, iloc.bh);
if (!err)
- err = ext4_journal_dirty_metadata(handle,
- iloc.bh);
+ err = ext4_handle_dirty_metadata(handle,
+ inode,
+ iloc.bh);
brelse(iloc.bh);
}
}
@@ -4904,6 +4946,8 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
*/

journal = EXT4_JOURNAL(inode);
+ if (!journal)
+ return 0;
if (is_journal_aborted(journal))
return -EROFS;

@@ -4933,7 +4977,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
return PTR_ERR(handle);

err = ext4_mark_inode_dirty(handle, inode);
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
ext4_journal_stop(handle);
ext4_std_error(inode->i_sb, err);

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index dc99b47..42dc83f 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -99,7 +99,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
goto flags_out;
}
if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err)
goto flags_err;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 444ad99..89677ca 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2551,7 +2551,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
ext4_mb_init_per_dev_proc(sb);
ext4_mb_history_init(sb);

- sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+ if (sbi->s_journal)
+ sbi->s_journal->j_commit_callback = release_blocks_on_commit;

printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
return 0;
@@ -2852,7 +2853,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
bitmap_bh->b_data, ac->ac_b_ex.fe_start,
ac->ac_b_ex.fe_len);
- err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (!err)
err = -EAGAIN;
goto out_err;
@@ -2899,10 +2900,10 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
spin_unlock(sb_bgl_lock(sbi, flex_group));
}

- err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (err)
goto out_err;
- err = ext4_journal_dirty_metadata(handle, gdp_bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);

out_err:
sb->s_dirt = 1;
@@ -4412,7 +4413,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
struct rb_node **n = &db->bb_free_root.rb_node, *node;
struct rb_node *parent = NULL, *new_node;

-
+ BUG_ON(!ext4_handle_valid(handle));
BUG_ON(e4b->bd_bitmap_page == NULL);
BUG_ON(e4b->bd_buddy_page == NULL);

@@ -4598,7 +4599,7 @@ do_more:

/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
- err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);

if (ac) {
ac->ac_b_ex.fe_group = block_group;
@@ -4607,7 +4608,7 @@ do_more:
ext4_mb_store_history(ac);
}

- if (metadata) {
+ if (metadata && ext4_handle_valid(handle)) {
/* blocks being freed are metadata. these blocks shouldn't
* be used until this transaction is committed */
ext4_mb_free_metadata(handle, &e4b, block_group, bit, count);
@@ -4637,7 +4638,7 @@ do_more:

/* And the group descriptor block */
BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
- ret = ext4_journal_dirty_metadata(handle, gd_bh);
+ ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
if (!err)
err = ret;

diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index f2a9cf4..e7cd488 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -59,7 +59,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
/*
* Make sure the credit we accumalated is not really high
*/
- if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) {
+ if (needed && ext4_handle_has_enough_credits(handle,
+ EXT4_RESERVE_TRANS_BLOCKS)) {
retval = ext4_journal_restart(handle, needed);
if (retval)
goto err_out;
@@ -229,7 +230,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
{
int retval = 0, needed;

- if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS)
+ if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
return 0;
/*
* We are freeing a blocks. During this we touch
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 63adcb7..abe0d7c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1228,10 +1228,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
de = de2;
}
dx_insert_block(frame, hash2 + continued, newblock);
- err = ext4_journal_dirty_metadata(handle, bh2);
+ err = ext4_handle_dirty_metadata(handle, dir, bh2);
if (err)
goto journal_error;
- err = ext4_journal_dirty_metadata(handle, frame->bh);
+ err = ext4_handle_dirty_metadata(handle, dir, frame->bh);
if (err)
goto journal_error;
brelse(bh2);
@@ -1335,8 +1335,8 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
ext4_update_dx_flag(dir);
dir->i_version++;
ext4_mark_inode_dirty(handle, dir);
- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, dir, bh);
if (err)
ext4_std_error(dir->i_sb, err);
brelse(bh);
@@ -1574,7 +1574,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
dxtrace(dx_show_index("node", frames[1].entries));
dxtrace(dx_show_index("node",
((struct dx_node *) bh2->b_data)->entries));
- err = ext4_journal_dirty_metadata(handle, bh2);
+ err = ext4_handle_dirty_metadata(handle, inode, bh2);
if (err)
goto journal_error;
brelse (bh2);
@@ -1600,7 +1600,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
if (err)
goto journal_error;
}
- ext4_journal_dirty_metadata(handle, frames[0].bh);
+ ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
}
de = do_split(handle, dir, &bh, frame, &hinfo, &err);
if (!de)
@@ -1646,8 +1646,8 @@ static int ext4_delete_entry(handle_t *handle,
else
de->inode = 0;
dir->i_version++;
- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle, dir, bh);
return 0;
}
i += ext4_rec_len_from_disk(de->rec_len);
@@ -1723,7 +1723,7 @@ retry:
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode = ext4_new_inode (handle, dir, mode);
err = PTR_ERR(inode);
@@ -1757,7 +1757,7 @@ retry:
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode = ext4_new_inode(handle, dir, mode);
err = PTR_ERR(inode);
@@ -1793,7 +1793,7 @@ retry:
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode = ext4_new_inode(handle, dir, S_IFDIR | mode);
err = PTR_ERR(inode);
@@ -1822,8 +1822,8 @@ retry:
strcpy(de->name, "..");
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
inode->i_nlink = 2;
- BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle, dir_block);
+ BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle, dir, dir_block);
brelse(dir_block);
ext4_mark_inode_dirty(handle, inode);
err = ext4_add_entry(handle, dentry, inode);
@@ -1961,7 +1961,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
/* Insert this inode at the head of the on-disk orphan list... */
NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
- err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+ err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh);
rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
if (!err)
err = rc;
@@ -2017,7 +2017,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
* transaction handle with which to update the orphan list on
* disk, but we still need to remove the inode from the linked
* list in memory. */
- if (!handle)
+ if (sbi->s_journal && !handle)
goto out;

err = ext4_reserve_inode_write(handle, inode, &iloc);
@@ -2031,7 +2031,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
if (err)
goto out_brelse;
sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
- err = ext4_journal_dirty_metadata(handle, sbi->s_sbh);
+ err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh);
} else {
struct ext4_iloc iloc2;
struct inode *i_prev =
@@ -2082,7 +2082,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
goto end_rmdir;

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode = dentry->d_inode;

@@ -2136,7 +2136,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

retval = -ENOENT;
bh = ext4_find_entry(dir, &dentry->d_name, &de);
@@ -2193,7 +2193,7 @@ retry:
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO);
err = PTR_ERR(inode);
@@ -2256,7 +2256,7 @@ retry:
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode->i_ctime = ext4_current_time(inode);
ext4_inc_count(handle, inode);
@@ -2298,7 +2298,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
return PTR_ERR(handle);

if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
/*
@@ -2352,8 +2352,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
new_dir->i_ctime = new_dir->i_mtime =
ext4_current_time(new_dir);
ext4_mark_inode_dirty(handle, new_dir);
- BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle, new_bh);
+ BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle, new_dir, new_bh);
brelse(new_bh);
new_bh = NULL;
}
@@ -2403,8 +2403,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
BUFFER_TRACE(dir_bh, "get_write_access");
ext4_journal_get_write_access(handle, dir_bh);
PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
- BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle, dir_bh);
+ BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
ext4_dec_count(handle, old_dir);
if (new_inode) {
/* checked empty_dir above, can't have another parent,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b6ec184..ef2332c 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -149,7 +149,7 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
{
int err;

- if (handle->h_buffer_credits >= thresh)
+ if (ext4_handle_has_enough_credits(handle, thresh))
return 0;

err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA);
@@ -232,7 +232,7 @@ static int setup_new_group_blocks(struct super_block *sb,
memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
set_buffer_uptodate(gdb);
unlock_buffer(gdb);
- ext4_journal_dirty_metadata(handle, gdb);
+ ext4_handle_dirty_metadata(handle, NULL, gdb);
ext4_set_bit(bit, bh->b_data);
brelse(gdb);
}
@@ -251,7 +251,7 @@ static int setup_new_group_blocks(struct super_block *sb,
err = PTR_ERR(bh);
goto exit_bh;
}
- ext4_journal_dirty_metadata(handle, gdb);
+ ext4_handle_dirty_metadata(handle, NULL, gdb);
ext4_set_bit(bit, bh->b_data);
brelse(gdb);
}
@@ -276,7 +276,7 @@ static int setup_new_group_blocks(struct super_block *sb,
err = PTR_ERR(it);
goto exit_bh;
}
- ext4_journal_dirty_metadata(handle, it);
+ ext4_handle_dirty_metadata(handle, NULL, it);
brelse(it);
ext4_set_bit(bit, bh->b_data);
}
@@ -286,7 +286,7 @@ static int setup_new_group_blocks(struct super_block *sb,

mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb),
bh->b_data);
- ext4_journal_dirty_metadata(handle, bh);
+ ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);

/* Mark unused entries in inode bitmap used */
@@ -299,7 +299,7 @@ static int setup_new_group_blocks(struct super_block *sb,

mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
bh->b_data);
- ext4_journal_dirty_metadata(handle, bh);
+ ext4_handle_dirty_metadata(handle, NULL, bh);
exit_bh:
brelse(bh);

@@ -486,12 +486,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
* reserved inode, and will become GDT blocks (primary and backup).
*/
data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
- ext4_journal_dirty_metadata(handle, dind);
+ ext4_handle_dirty_metadata(handle, NULL, dind);
brelse(dind);
inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
ext4_mark_iloc_dirty(handle, inode, &iloc);
memset((*primary)->b_data, 0, sb->s_blocksize);
- ext4_journal_dirty_metadata(handle, *primary);
+ ext4_handle_dirty_metadata(handle, NULL, *primary);

o_group_desc = EXT4_SB(sb)->s_group_desc;
memcpy(n_group_desc, o_group_desc,
@@ -502,7 +502,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
kfree(o_group_desc);

le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
- ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+ ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);

return 0;

@@ -618,7 +618,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
primary[i]->b_blocknr, gdbackups,
blk + primary[i]->b_blocknr); */
data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
- err2 = ext4_journal_dirty_metadata(handle, primary[i]);
+ err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]);
if (!err)
err = err2;
}
@@ -676,7 +676,8 @@ static void update_backups(struct super_block *sb,
struct buffer_head *bh;

/* Out of journal space, and can't get more - abort - so sad */
- if (handle->h_buffer_credits == 0 &&
+ if (ext4_handle_valid(handle) &&
+ handle->h_buffer_credits == 0 &&
ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) &&
(err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
break;
@@ -696,7 +697,7 @@ static void update_backups(struct super_block *sb,
memset(bh->b_data + size, 0, rest);
set_buffer_uptodate(bh);
unlock_buffer(bh);
- ext4_journal_dirty_metadata(handle, bh);
+ ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);
}
if ((err2 = ext4_journal_stop(handle)) && !err)
@@ -915,7 +916,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
/* Update the global fs size fields */
sbi->s_groups_count++;

- ext4_journal_dirty_metadata(handle, primary);
+ ext4_handle_dirty_metadata(handle, NULL, primary);

/* Update the reserved block counts only once the new group is
* active. */
@@ -937,7 +938,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
EXT4_INODES_PER_GROUP(sb);
}

- ext4_journal_dirty_metadata(handle, sbi->s_sbh);
+ ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
sb->s_dirt = 1;

exit_journal:
@@ -1071,7 +1072,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
goto exit_put;
}
ext4_blocks_count_set(es, o_blocks_count + add);
- ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+ ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
sb->s_dirt = 1;
unlock_super(sb);
ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e4a241c..e0f433c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -136,13 +136,20 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
* backs (eg. EIO in the commit thread), then we still need to
* take the FS itself readonly cleanly. */
journal = EXT4_SB(sb)->s_journal;
- if (is_journal_aborted(journal)) {
- ext4_abort(sb, __func__,
- "Detected aborted journal");
- return ERR_PTR(-EROFS);
+ if (journal) {
+ if (is_journal_aborted(journal)) {
+ ext4_abort(sb, __func__,
+ "Detected aborted journal");
+ return ERR_PTR(-EROFS);
+ }
+ return jbd2_journal_start(journal, nblocks);
}
-
- return jbd2_journal_start(journal, nblocks);
+ /*
+ * We're not journaling. Return a pointer to our flag that
+ * indicates that fact.
+ */
+ current->journal_info = (handle_t *)&EXT4_SB(sb)->s_nojournal_flag;
+ return current->journal_info;
}

/*
@@ -157,6 +164,14 @@ int __ext4_journal_stop(const char *where, handle_t *handle)
int err;
int rc;

+ if (!ext4_handle_valid(handle)) {
+ /*
+ * Do this here since we don't call jbd2_journal_stop() in
+ * no-journal mode.
+ */
+ current->journal_info = NULL;
+ return 0;
+ }
sb = handle->h_transaction->t_journal->j_private;
err = handle->h_err;
rc = jbd2_journal_stop(handle);
@@ -174,6 +189,8 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
char nbuf[16];
const char *errstr = ext4_decode_error(NULL, err, nbuf);

+ BUG_ON(!ext4_handle_valid(handle));
+
if (bh)
BUFFER_TRACE(bh, "abort");

@@ -448,11 +465,12 @@ static void ext4_put_super(struct super_block *sb)
ext4_mb_release(sb);
ext4_ext_release(sb);
ext4_xattr_put_super(sb);
- err = jbd2_journal_destroy(sbi->s_journal);
- sbi->s_journal = NULL;
- if (err < 0)
- ext4_abort(sb, __func__, "Couldn't clean up the journal");
-
+ if (sbi->s_journal) {
+ err = jbd2_journal_destroy(sbi->s_journal);
+ sbi->s_journal = NULL;
+ if (err < 0)
+ ext4_abort(sb, __func__, "Couldn't clean up the journal");
+ }
if (!(sb->s_flags & MS_RDONLY)) {
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -522,6 +540,11 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
INIT_LIST_HEAD(&ei->i_prealloc_list);
spin_lock_init(&ei->i_prealloc_lock);
+ /*
+ * Note: We can be called before EXT4_SB(sb)->s_journal is set,
+ * therefore it can be null here. Don't check it, just initialize
+ * jinode.
+ */
jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0;
@@ -588,7 +611,8 @@ static void ext4_clear_inode(struct inode *inode)
}
#endif
ext4_discard_preallocations(inode);
- jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
+ if (EXT4_JOURNAL(inode))
+ jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
&EXT4_I(inode)->jinode);
}

@@ -1419,7 +1443,8 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
le16_add_cpu(&es->s_mnt_count, 1);
es->s_mtime = cpu_to_le32(get_seconds());
ext4_update_dynamic_rev(sb);
- EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+ if (sbi->s_journal)
+ EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);

ext4_commit_super(sb, es, 1);
if (test_opt(sb, DEBUG))
@@ -1431,9 +1456,13 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
EXT4_INODES_PER_GROUP(sb),
sbi->s_mount_opt);

- printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
- sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
- "external", EXT4_SB(sb)->s_journal->j_devname);
+ if (EXT4_SB(sb)->s_journal) {
+ printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
+ sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
+ "external", EXT4_SB(sb)->s_journal->j_devname);
+ } else {
+ printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id);
+ }
return res;
}

@@ -1884,6 +1913,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (!sbi)
return -ENOMEM;
sb->s_fs_info = sbi;
+ sbi->s_nojournal_flag = EXT4_NOJOURNAL_MAGIC;
sbi->s_mount_opt = 0;
sbi->s_resuid = EXT4_DEF_RESUID;
sbi->s_resgid = EXT4_DEF_RESGID;
@@ -2284,7 +2314,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
printk(KERN_ERR
"ext4: No journal on filesystem on %s\n",
sb->s_id);
- goto failed_mount3;
+ clear_opt(sbi->s_mount_opt, DATA_FLAGS);
+ set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
+ sbi->s_journal = NULL;
+ es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
+ needs_recovery = 0;
+ goto no_journal;
}

if (ext4_blocks_count(es) > 0xffffffffULL &&
@@ -2336,6 +2371,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
break;
}

+no_journal:
+
if (test_opt(sb, NOBH)) {
if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - "
@@ -2423,10 +2460,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (needs_recovery)
printk(KERN_INFO "EXT4-fs: recovery complete.\n");
ext4_mark_recovery_complete(sb, es);
- printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
- test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
- test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
- "writeback");
+ if (EXT4_SB(sb)->s_journal) {
+ printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
+ test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
+ test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
+ "writeback");
+ }
+
+ printk(KERN_INFO "EXT4-fs: %s mounted. Flags: 0x%lx Opts: %s\n",
+ sb->s_id, sb->s_flags, (char *)data);

lock_kernel();
return 0;
@@ -2438,8 +2480,10 @@ cantfind_ext4:
goto failed_mount;

failed_mount4:
- jbd2_journal_destroy(sbi->s_journal);
- sbi->s_journal = NULL;
+ if (sbi->s_journal) {
+ jbd2_journal_destroy(sbi->s_journal);
+ sbi->s_journal = NULL;
+ }
failed_mount3:
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
@@ -2500,6 +2544,8 @@ static journal_t *ext4_get_journal(struct super_block *sb,
struct inode *journal_inode;
journal_t *journal;

+ BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+
/* First, test for the existence of a valid inode on disk. Bad
* things happen if we iget() an unused inode, as the subsequent
* iput() will try to delete it. */
@@ -2548,6 +2594,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
struct ext4_super_block *es;
struct block_device *bdev;

+ BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+
bdev = ext4_blkdev_get(j_dev);
if (bdev == NULL)
return NULL;
@@ -2635,6 +2683,8 @@ static int ext4_load_journal(struct super_block *sb,
int err = 0;
int really_read_only;

+ BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+
if (journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
printk(KERN_INFO "EXT4-fs: external journal device major/minor "
@@ -2748,6 +2798,9 @@ static int ext4_create_journal(struct super_block *sb,

EXT4_SB(sb)->s_journal = journal;

+ if (journal_inum == 0)
+ return 0;
+
ext4_update_dynamic_rev(sb);
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL);
@@ -2809,6 +2862,10 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
{
journal_t *journal = EXT4_SB(sb)->s_journal;

+ if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
+ BUG_ON(journal != NULL);
+ return;
+ }
jbd2_journal_lock_updates(journal);
if (jbd2_journal_flush(journal) < 0)
goto out;
@@ -2838,6 +2895,8 @@ static void ext4_clear_journal_err(struct super_block *sb,
int j_errno;
const char *errstr;

+ BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+
journal = EXT4_SB(sb)->s_journal;

/*
@@ -2870,14 +2929,17 @@ static void ext4_clear_journal_err(struct super_block *sb,
int ext4_force_commit(struct super_block *sb)
{
journal_t *journal;
- int ret;
+ int ret = 0;

if (sb->s_flags & MS_RDONLY)
return 0;

journal = EXT4_SB(sb)->s_journal;
- sb->s_dirt = 0;
- ret = ext4_journal_force_commit(journal);
+ if (journal) {
+ sb->s_dirt = 0;
+ ret = ext4_journal_force_commit(journal);
+ }
+
return ret;
}

@@ -2889,9 +2951,13 @@ int ext4_force_commit(struct super_block *sb)
*/
static void ext4_write_super(struct super_block *sb)
{
- if (mutex_trylock(&sb->s_lock) != 0)
- BUG();
- sb->s_dirt = 0;
+ if (EXT4_SB(sb)->s_journal) {
+ if (mutex_trylock(&sb->s_lock) != 0)
+ BUG();
+ sb->s_dirt = 0;
+ } else {
+ ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
+ }
}

static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -2903,7 +2969,8 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
if (wait)
ret = ext4_force_commit(sb);
else
- jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
+ if (EXT4_SB(sb)->s_journal)
+ jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
return ret;
}

@@ -2918,15 +2985,17 @@ static void ext4_write_super_lockfs(struct super_block *sb)
if (!(sb->s_flags & MS_RDONLY)) {
journal_t *journal = EXT4_SB(sb)->s_journal;

- /* Now we set up the journal barrier. */
- jbd2_journal_lock_updates(journal);
+ if (journal) {
+ /* Now we set up the journal barrier. */
+ jbd2_journal_lock_updates(journal);

- /*
- * We don't want to clear needs_recovery flag when we failed
- * to flush the journal.
- */
- if (jbd2_journal_flush(journal) < 0)
- return;
+ /*
+ * We don't want to clear needs_recovery flag when we
+ * failed to flush the journal.
+ */
+ if (jbd2_journal_flush(journal) < 0)
+ return;
+ }

/* Journal blocked and flushed, clear needs_recovery flag. */
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -2940,7 +3009,7 @@ static void ext4_write_super_lockfs(struct super_block *sb)
*/
static void ext4_unlockfs(struct super_block *sb)
{
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) {
lock_super(sb);
/* Reser the needs_recovery flag before the fs is unlocked. */
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -2991,7 +3060,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)

es = sbi->s_es;

- ext4_init_journal_params(sb, sbi->s_journal);
+ if (sbi->s_journal)
+ ext4_init_journal_params(sb, sbi->s_journal);

if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
n_blocks_count > ext4_blocks_count(es)) {
@@ -3076,7 +3146,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
* been changed by e2fsck since we originally mounted
* the partition.)
*/
- ext4_clear_journal_err(sb, es);
+ if (sbi->s_journal)
+ ext4_clear_journal_err(sb, es);
sbi->s_mount_state = le16_to_cpu(es->s_state);
if ((err = ext4_group_extend(sb, es, n_blocks_count)))
goto restore_opts;
@@ -3084,6 +3155,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
sb->s_flags &= ~MS_RDONLY;
}
}
+ if (sbi->s_journal == NULL)
+ ext4_commit_super(sb, es, 1);
+
#ifdef CONFIG_QUOTA
/* Release old quota file names */
for (i = 0; i < MAXQUOTAS; i++)
@@ -3360,7 +3434,8 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
* When we journal data on quota file, we have to flush journal to see
* all updates to the file when we bypass pagecache...
*/
- if (ext4_should_journal_data(path.dentry->d_inode)) {
+ if (EXT4_SB(sb)->s_journal &&
+ ext4_should_journal_data(path.dentry->d_inode)) {
/*
* We don't need to lock updates but journal_flush() could
* otherwise be livelocked...
@@ -3434,7 +3509,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
struct buffer_head *bh;
handle_t *handle = journal_current_handle();

- if (!handle) {
+ if (EXT4_SB(sb)->s_journal && !handle) {
printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
" cancelled because transaction is not started.\n",
(unsigned long long)off, (unsigned long long)len);
@@ -3459,7 +3534,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
flush_dcache_page(bh->b_page);
unlock_buffer(bh);
if (journal_quota)
- err = ext4_journal_dirty_metadata(handle, bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
else {
/* Always do at least ordered writes for quotas */
err = ext4_jbd2_file_inode(handle, inode);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 80626d5..7e8e1e7 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -457,7 +457,7 @@ static void ext4_xattr_update_super_block(handle_t *handle,
if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
sb->s_dirt = 1;
- ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+ ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
}
}

@@ -487,9 +487,9 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
} else {
le32_add_cpu(&BHDR(bh)->h_refcount, -1);
- error = ext4_journal_dirty_metadata(handle, bh);
+ error = ext4_handle_dirty_metadata(handle, inode, bh);
if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
DQUOT_FREE_BLOCK(inode, 1);
ea_bdebug(bh, "refcount now=%d; releasing",
le32_to_cpu(BHDR(bh)->h_refcount));
@@ -724,8 +724,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (error == -EIO)
goto bad_block;
if (!error)
- error = ext4_journal_dirty_metadata(handle,
- bs->bh);
+ error = ext4_handle_dirty_metadata(handle,
+ inode,
+ bs->bh);
if (error)
goto cleanup;
goto inserted;
@@ -794,8 +795,9 @@ inserted:
ea_bdebug(new_bh, "reusing; refcount now=%d",
le32_to_cpu(BHDR(new_bh)->h_refcount));
unlock_buffer(new_bh);
- error = ext4_journal_dirty_metadata(handle,
- new_bh);
+ error = ext4_handle_dirty_metadata(handle,
+ inode,
+ new_bh);
if (error)
goto cleanup_dquot;
}
@@ -833,7 +835,8 @@ getblk_failed:
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
ext4_xattr_cache_insert(new_bh);
- error = ext4_journal_dirty_metadata(handle, new_bh);
+ error = ext4_handle_dirty_metadata(handle,
+ inode, new_bh);
if (error)
goto cleanup;
}
@@ -1040,7 +1043,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
*/
is.iloc.bh = NULL;
if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
}

cleanup:




2008-12-15 17:35:59

by Frank Mayhar

[permalink] [raw]
Subject: Re: [PATCH 1/1] Allow ext4 to run without a journal.

On Wed, 2008-12-10 at 15:54 -0800, Frank Mayhar wrote:
> A few weeks ago I posted a patch for discussion that allowed ext4 to run
> without a journal. Since that time I've integrated the excellent
> comments from Andreas and fixed several serious bugs. We're currently
> running with this patch and generating some performance numbers against
> both ext2 (with backported reservations code) and ext4 with and without
> a journal. It just so happens that running without a journal is
> slightly faster for most everything.

Has anyone had a chance to take a look at this? Andreas? Ted? Anyone?
--
Frank Mayhar <[email protected]>
Google, Inc.


2008-12-16 04:00:14

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 1/1] Allow ext4 to run without a journal.

On Mon, Dec 15, 2008 at 09:35:55AM -0800, Frank Mayhar wrote:
> On Wed, 2008-12-10 at 15:54 -0800, Frank Mayhar wrote:
> > A few weeks ago I posted a patch for discussion that allowed ext4 to run
> > without a journal. Since that time I've integrated the excellent
> > comments from Andreas and fixed several serious bugs. We're currently
> > running with this patch and generating some performance numbers against
> > both ext2 (with backported reservations code) and ext4 with and without
> > a journal. It just so happens that running without a journal is
> > slightly faster for most everything.
>
> Has anyone had a chance to take a look at this? Andreas? Ted? Anyone?

For some reason it didn't show up in my mailbox; maybe an errant SPAM
checker grabbed it? (For some reason Herbert Xu's mail server
recently blacklisted my mail server, so he can't get any e-mails from
me, whether it's a reply to LKML or an invite to the kernel summit.
More recently Baracuda Networks, which handles mit.edu's spam
filtering, blacklisted the mail server for the vendor-sec mailing
list. Go figure.)

Fortunately your patch did show up in patchwork:

http://patchwork.ozlabs.org/project/linux-ext4/

I've been travelling, so I haven't had a chance to look at it,
although I did note that it had showed up over the weekend when I was
going over the patch queue and reconciling it with Patchwork, and
wondered why I hadn't seen earlier...

I will try to get it shortly.

Regards,

- Ted

2008-12-17 03:12:13

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 1/1] Allow ext4 to run without a journal.

On Wed, Dec 10, 2008 at 03:54:30PM -0800, Frank Mayhar wrote:
> diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
> index b455c68..3a05ae7 100644
> --- a/fs/ext4/ext4_jbd2.h
> +++ b/fs/ext4/ext4_jbd2.h
> @@ -19,6 +19,8 @@
> #include <linux/jbd2.h>
> #include "ext4.h"
>
> +#define EXT4_NOJOURNAL_MAGIC (void *)0x457874344e6a6e6c /* "Ext4Njnl" */
> +

This will cause warnings on systems where pointers are 32-bits. I'd
suggest using a 32-bit magic number. Also, the number you have picked
is divisble by 4, which means at least in theory it could be a valid
pointer. If we make EXT4_NOJOURNAL_MAGIC a 32-bit number, then a
number which is divisible by 4 definitely could be a valid pointer on
a 64-bit system. I'd suggest keeping it simple, and simply using 0x1.
This is known not be a valid pointer, since it's not 32-bit word
aligned, and there is precedent for using such a number --- for
example, SIG_IGN is ((void *) 1) on many architectures, and page 0 is
generally unmapped to catch null pointer dereferences. So I'd suggest
keeping things simple instead of using a fancy char string.

Also, why not use EXT4_NOJOURNAL_MAGIC constant directly, instead of
assigning it as a constant value to s_nojournal_flag in struct
ext4_sb_info? That bloats the data structure for no good reason that
I can tell.

> +static inline int ext4_handle_dirty_metadata(handle_t *handle,
> + struct inode *inode, struct buffer_head *bh)
> +{
> + int err;
> +
> + if (ext4_handle_valid(handle)) {
> + err = __ext4_journal_dirty_metadata(__func__, handle, bh);
> + } else {
> + err = __ext4_write_dirty_metadata(inode, bh);
> + }
> + return err;
> +}

I don't see the point in doing this as an inline function; either way,
each time the inline function is executed, it will result a function
call, either to __ext4_journal_dirty_metadata() or
__ext4_write_dirty_metadata(), both of which are called exactly once
by this inline function. So why not move the conditional and the
contents of _ext4_write_dirty_metadata() and
__ext4_journal_dirty_metadata() into __ext4_handle_dirty_metadata(),
and then do this:

#define ext4_handle_dirty_metadata(handle, inode, bh) \
__ext4_handle_dirty_metadata(__func__, handle, inode, bh)

Ext4_handle_dirty_metadata() gets called a *lot*, so this will shrink
the icache footprint of ext4 filesystem, and for CPU's that have
decent branch prediction, centralizing the condition into a single
location instead of an inline will also be a win, once again
demonstrating that with modern CPU's, optimizations that minimize code
size often also faster.

> /* super.c */
> @@ -208,6 +270,9 @@ int ext4_force_commit(struct super_block *sb);
>
> static inline int ext4_should_journal_data(struct inode *inode)
> {
> + BUG_ON(EXT4_JOURNAL(inode) == NULL &&
> + (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
> + EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL));
> if (!S_ISREG(inode->i_mode))
> return 1;
> if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)

This shouldn't be a BUG_ON; this gets triggered if there is an inode
which is marked as needing data journalling. So it is not a *bug*,
but at worst either a filesystem corruption, or more charitably a
conflict between a mount option and a setting in an inode flag. In
any case, these should not cause a BUG; at worst an ext4_error(), or
ext4_warning(). However, today, if a filesystem with inodes
requesting data journalling are mounted on ext2, the request is simply
ignored. So I'd argue that a similar treatmeant should be used for
ext4 filesystem running w/o a journal. At worst perhaps there should
be a one-time ext4_warning(), but I'm not convinced even this is
warranted. If the system administrator mounts a filesystem without a
journal, it's pretty clear what was meant.

> @@ -219,6 +284,8 @@ static inline int ext4_should_journal_data(struct inode *inode)
>
> static inline int ext4_should_order_data(struct inode *inode)
> {
> + BUG_ON(EXT4_JOURNAL(inode) == NULL &&
> + EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL);

See above discussion.

> static inline int ext4_should_writeback_data(struct inode *inode)
> {
> + BUG_ON(EXT4_JOURNAL(inode) == NULL &&
> + EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL);

Again, see above.

> --- a/fs/ext4/ext4_sb.h
> +++ b/fs/ext4/ext4_sb.h
> @@ -28,6 +28,7 @@
> * fourth extended-fs super-block data in memory
> */
> struct ext4_sb_info {
> + int *s_nojournal_flag; /* Null to indicate "not a handle" */
> unsigned long s_desc_size; /* Size of a group descriptor in bytes */

See above discussion; I don't understand why this is necessary, and
why it is an int * here, especially given that it is assigned a
constant value which is a (void *), and it is eventually cast into a
(handle_t *).

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e4a241c..e0f433c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2284,7 +2314,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
printk(KERN_ERR
"ext4: No journal on filesystem on %s\n",
sb->s_id);
- goto failed_mount3;
+ clear_opt(sbi->s_mount_opt, DATA_FLAGS);
+ set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
+ sbi->s_journal = NULL;
+ es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
+ needs_recovery = 0;
+ goto no_journal;
}

In the case where there is no journal, please remove the KERN_ERR
printk; that's just excess noise, and some day, Google System
Administrators will thank you for getting rid of it. :-)

@@ -2748,6 +2798,9 @@ static int ext4_create_journal(struct super_block *sb,

EXT4_SB(sb)->s_journal = journal;

+ if (journal_inum == 0)
+ return 0;
+
ext4_update_dynamic_rev(sb);
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL);

If journal_inum == 0, ext4_create_journal will never be called (see
line 2429 in ext4fill_super), so this check is not really necessary.

Actually, what we should probably do (as a separate patch before this
one), is to remove ext4_create_journal() and the journal_inum mount
option altogether. It's pointless code that was originally added to
ext3 by Stephen Tweedie back when he wanted to convert ext2
filesystems to ext3 before e2fsprogs supported tune2fs -j. It's been
obsolete for over eight years, in both ext3 and ext4.


In any case, these are all pretty tiny nits; on the whole I think this
patch is quite clean, so I'll add it to the ext4 patch queue for
testing. I would appreciate getting an updated patch which addresses
these suggested cleanups, though. Also, although I haven't tested it,
I suspect we need to add a check so that if there is no journal, and
the EXT4_FEATURE_INCOMPAT_RECOVERY feature is set, the mount should be
aborted with an error, instead of just clearning the recovery flag and
moving on. In actual practice, such a combination should never
happen, but if it does, failing the mount is probably a safer thing
to do.

Regards,

- Ted

2008-12-17 06:00:53

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 1/1] Allow ext4 to run without a journal.

Note, the patch allow-ext4-to-run-without-a-journal conflicted with a
large number of other patches in the patch queue. I fixed it up as
best I could, and reviewed all of the changed patches, and I think
it's OK. Please be cautious when first testing this, though.

- Ted

2008-12-17 17:51:24

by Frank Mayhar

[permalink] [raw]
Subject: Re: [PATCH 1/1] Allow ext4 to run without a journal.

This is just a quick response to one issue; I need to look at the rest
in more detail but they seem valid from a quick skim.

On Tue, 2008-12-16 at 22:12 -0500, Theodore Tso wrote:
> Also, why not use EXT4_NOJOURNAL_MAGIC constant directly, instead of
> assigning it as a constant value to s_nojournal_flag in struct
> ext4_sb_info? That bloats the data structure for no good reason that
> I can tell.

The way this works (which I did at Andreas' suggestion) is that when
there is no journal the "current handle" is really a pointer to
ext4_sb_info. I need to distinguish that structure from a valid handle,
so I added the magic field to do so; the first field of a handle_t is a
pointer (to a transaction) so I need something I can distinguish from
that. (And your comments regarding the magic value are very helpful,
thanks.)

If you look at ext4_handle_valid you'll see where it's actually used. I
admit this is very non-obvious (so far it has confused at least two
different reviewers); if you have a more clear way to handle it I'm all
ears.

> > static inline int ext4_should_journal_data(struct inode *inode)
> > {
> > + BUG_ON(EXT4_JOURNAL(inode) == NULL &&
> > + (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
> > + EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL));
> > if (!S_ISREG(inode->i_mode))
> > return 1;
> > if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
>
> This shouldn't be a BUG_ON; this gets triggered if there is an inode
> which is marked as needing data journalling.

Yeah, I have an open issue to remove all of these BUG_ONs after we've
had a chance to test this fairly thoroughly. In the meantime I was
leaving them in as a sanity check. I'll remove them from the next
version of the patch I send to you.

> In the case where there is no journal, please remove the KERN_ERR
> printk; that's just excess noise, and some day, Google System
> Administrators will thank you for getting rid of it. :-)

Heh. You're probably right.

> Actually, what we should probably do (as a separate patch before this
> one), is to remove ext4_create_journal() and the journal_inum mount
> option altogether. It's pointless code that was originally added to
> ext3 by Stephen Tweedie back when he wanted to convert ext2
> filesystems to ext3 before e2fsprogs supported tune2fs -j. It's been
> obsolete for over eight years, in both ext3 and ext4.

If by "we" you mean you, I'll just disregard those bits entirely. If
you mean me, I'll chat with my manager but I think it's doable since
it's pretty much just ripping out a bit of code.

> In any case, these are all pretty tiny nits; on the whole I think this
> patch is quite clean, so I'll add it to the ext4 patch queue for
> testing.

Thanks! And thanks for the comments, I will address them and generate a
new patch. Please think about the magic-number/ext4_sb_info thing,
though, and get back to me.

> I would appreciate getting an updated patch which addresses
> these suggested cleanups, though. Also, although I haven't tested it,
> I suspect we need to add a check so that if there is no journal, and
> the EXT4_FEATURE_INCOMPAT_RECOVERY feature is set, the mount should be
> aborted with an error, instead of just clearning the recovery flag and
> moving on. In actual practice, such a combination should never
> happen, but if it does, failing the mount is probably a safer thing
> to do.

Ah, good point. If we want to recover the journal and there isn't one
that's probably bad, yes.
--
Frank Mayhar <[email protected]>
Google, Inc.


2008-12-17 17:53:02

by Frank Mayhar

[permalink] [raw]
Subject: Re: [PATCH 1/1] Allow ext4 to run without a journal.

On Wed, 2008-12-17 at 01:00 -0500, Theodore Tso wrote:
> Note, the patch allow-ext4-to-run-without-a-journal conflicted with a
> large number of other patches in the patch queue. I fixed it up as
> best I could, and reviewed all of the changed patches, and I think
> it's OK. Please be cautious when first testing this, though.

Yeah, sorry, for reasons that are probably obvious this patch was
relative to the ext4 stable tree, not to the latest bits.
--
Frank Mayhar <[email protected]>
Google, Inc.


2008-12-18 06:51:14

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH 1/1] Allow ext4 to run without a journal.

On Dec 10, 2008 15:54 -0800, Frank Mayhar wrote:
> A few weeks ago I posted a patch for discussion that allowed ext4 to run
> without a journal. Since that time I've integrated the excellent
> comments from Andreas and fixed several serious bugs. We're currently
> running with this patch and generating some performance numbers against
> both ext2 (with backported reservations code) and ext4 with and without
> a journal. It just so happens that running without a journal is
> slightly faster for most everything.
>
> We did
> iozone -T -t 4 s 2g -r 256k -T -I -i0 -i1 -i2
>
> which creates 4 threads, each of which create and do reads and writes on
> a 2G file, with a buffer size of 256K, using O_DIRECT for all file opens
> to bypass the page cache. Results:
>
> ext2 ext4, default ext4, no journal
> initial writes 13.0 MB/s 15.4 MB/s 15.7 MB/s
> rewrites 13.1 MB/s 15.6 MB/s 15.9 MB/s
> reads 15.2 MB/s 16.9 MB/s 17.2 MB/s
> re-reads 15.3 MB/s 16.9 MB/s 17.2 MB/s
> random readers 5.6 MB/s 5.6 MB/s 5.7 MB/s
> random writers 5.1 MB/s 5.3 MB/s 5.4 MB/s
>
> So it seems that, so far, this was a useful exercise.

This would imply that there is very little value in keeping ext2 or
ext3 around for the long term anymore (though of course I'm not going
to suggest that we get rid of them today). It is now possible to mount
ext2 filesystems directly with ext4, and it seems clear that ext4 is
noticably faster, even with a journal.

> +#define EXT4_NOJOURNAL_MAGIC (void *)0x457874344e6a6e6c /* "Ext4Njnl" */

While I wouldn't go so far as Ted suggests to use "1" as the magic value,
it is definitely reasonable to not use a valid pointer value (so it should
be an odd number) and keeping it out of the kernel address space is also
useful. Ensuring that it is not a valid pointer is sufficient, because
the first element of struct handle_s is a pointer and it can never be an
odd number due to alignment requirements.

>> If you look at ext4_handle_valid you'll see where it's actually used. I
>> admit this is very non-obvious (so far it has confused at least two
>> different reviewers); if you have a more clear way to handle it I'm all
>> ears.

Adding a clear comments explaining this (not just a one-liner) at
ext4_sb_info so that no others are so confused about this.

> + /*
> + * We're not journaling. Return a pointer to our flag that
> + * indicates that fact.
> + */
> + current->journal_info = (handle_t *)&EXT4_SB(sb)->s_nojournal_flag;
> + return current->journal_info;


Since current->journal_info is a void * you shouldn't need to cast here.


Also, I thought there was somewhere that the "handle" was turned back into
ext4_sb_info again? I can't see this after looking through the code.
The EXT4_NOJOURNAL_MAGIC and s_nojournal_flag is only used to determine if
a handle is not valid, and I can't see anywhere that a handle is turned
into ext4_sb_info.

If there is nowhere that you need to translate a handle into a superblock
anymore then having a pointer to a static variable would be sufficient.
There is no need for a magic value, and dereferencing it will give an oops:

static int dummy_handle = 0x0bad;
void *ext4_no_journal_in_use = &dummy_handle;

static inline int ext4_handle_valid(handle_t *handle)
{
if (handle == ext4_no_journal_in_use)
return 0;
return 1;
}

handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
{

/*
* We're not journaling. Return a pointer to our flag that
* indicates that fact.
*/
current->journal_info = ext4_no_journal_in_use;
return current->journal_info;
}



Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


2008-12-18 18:00:05

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 1/1] Allow ext4 to run without a journal.

> A few weeks ago I posted a patch for discussion that allowed ext4 to run
> without a journal. Since that time I've integrated the excellent
> comments from Andreas and fixed several serious bugs. We're currently
> running with this patch and generating some performance numbers against
> both ext2 (with backported reservations code) and ext4 with and without
> a journal. It just so happens that running without a journal is
> slightly faster for most everything.
>
> We did
> iozone -T -t 4 s 2g -r 256k -T -I -i0 -i1 -i2
>
> which creates 4 threads, each of which create and do reads and writes on
> a 2G file, with a buffer size of 256K, using O_DIRECT for all file opens
> to bypass the page cache. Results:
>
> ext2 ext4, default ext4, no journal
> initial writes 13.0 MB/s 15.4 MB/s 15.7 MB/s
> rewrites 13.1 MB/s 15.6 MB/s 15.9 MB/s
> reads 15.2 MB/s 16.9 MB/s 17.2 MB/s
> re-reads 15.3 MB/s 16.9 MB/s 17.2 MB/s
> random readers 5.6 MB/s 5.6 MB/s 5.7 MB/s
> random writers 5.1 MB/s 5.3 MB/s 5.4 MB/s
>
> So it seems that, so far, this was a useful exercise.
Interesting although I'm not that surprised because those tests seem
to do a lot of data changes (which are never journaled in fact) and tiny
amount of metadata changes. If you run some benchmark doing lots of
directory operations, I guess the numbers would be considerably
different. Maybe trying dbench (I know it's kind of stupid ;) or
postmark will show the differences better.

Honza
--
Jan Kara <[email protected]>
SuSE CR Labs

2008-12-18 18:13:18

by Michael Rubin

[permalink] [raw]
Subject: Re: [PATCH 1/1] Allow ext4 to run without a journal.

On Thu, Dec 18, 2008 at 10:00 AM, Jan Kara <[email protected]> wrote:
> Interesting although I'm not that surprised because those tests seem
> to do a lot of data changes (which are never journaled in fact) and tiny
> amount of metadata changes. If you run some benchmark doing lots of
> directory operations, I guess the numbers would be considerably
> different.

Actually we have some compile bench numbers (coming to this list soon)
that also surprised us.
The stages of compile bench that I believe are dominated by directory
operations are also
showing improvements without the journal.

> Maybe trying dbench (I know it's kind of stupid ;) or
> postmark will show the differences better.

I admit also we see huge variance using dbench on subsequent runs. To
the point where I don't know how much I trust it's numbers.
Is this a tool that people on this list have a lot of faith in?

mrubin

2008-12-18 18:27:22

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 1/1] Allow ext4 to run without a journal.

> On Thu, Dec 18, 2008 at 10:00 AM, Jan Kara <[email protected]> wrote:
> > Interesting although I'm not that surprised because those tests seem
> > to do a lot of data changes (which are never journaled in fact) and tiny
> > amount of metadata changes. If you run some benchmark doing lots of
> > directory operations, I guess the numbers would be considerably
> > different.
>
> Actually we have some compile bench numbers (coming to this list soon)
> that also surprised us.
> The stages of compile bench that I believe are dominated by directory
> operations are also showing improvements without the journal.
Yes, compilebench excercises directory operations quite heavily so
that is the kind of benchmark I'd be interested in :).

> > Maybe trying dbench (I know it's kind of stupid ;) or
> > postmark will show the differences better.
>
> I admit also we see huge variance using dbench on subsequent runs. To
> the point where I don't know how much I trust it's numbers.
> Is this a tool that people on this list have a lot of faith in?
Not really (at least as far as I'm concerned :). I had to deal with
dbench recently tracking some reported performance regression and lost
a lot of my faith in it ;). It measures something but the numbers
vary greatly and also the load it puts on the filesystem is not much
realistic (lots of rewrites of the same pages and file deletes just
after creation). I think compilebench is a better choice, I just didn't
remember its name when I was writing the email.

Honza
--
Jan Kara <[email protected]>
SuSE CR Labs

2008-12-18 18:29:24

by Curt Wohlgemuth

[permalink] [raw]
Subject: Re: [PATCH 1/1] Allow ext4 to run without a journal.

I should be able to post some numbers we're seeing for compilebench on
various filesystems next week, including ext4 without a journal
(Frank's patch).

Curt

On Thu, Dec 18, 2008 at 10:13 AM, Michael Rubin <[email protected]> wrote:
> On Thu, Dec 18, 2008 at 10:00 AM, Jan Kara <[email protected]> wrote:
>> Interesting although I'm not that surprised because those tests seem
>> to do a lot of data changes (which are never journaled in fact) and tiny
>> amount of metadata changes. If you run some benchmark doing lots of
>> directory operations, I guess the numbers would be considerably
>> different.
>
> Actually we have some compile bench numbers (coming to this list soon)
> that also surprised us.
> The stages of compile bench that I believe are dominated by directory
> operations are also
> showing improvements without the journal.
>
>> Maybe trying dbench (I know it's kind of stupid ;) or
>> postmark will show the differences better.
>
> I admit also we see huge variance using dbench on subsequent runs. To
> the point where I don't know how much I trust it's numbers.
> Is this a tool that people on this list have a lot of faith in?
>
> mrubin
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>

2009-01-03 15:52:25

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] Allow ext4 to run without a journal.

I've modified your patch to reflect the comments I made two weeks ago.
Were there any other changes you have planned? I'd like to get this
pushed to Linus soon....

- Ted

ext4: Allow ext4 to run without a journal

From: Frank Mayhar <[email protected]>

A few weeks ago I posted a patch for discussion that allowed ext4 to run
without a journal. Since that time I've integrated the excellent
comments from Andreas and fixed several serious bugs. We're currently
running with this patch and generating some performance numbers against
both ext2 (with backported reservations code) and ext4 with and without
a journal. It just so happens that running without a journal is
slightly faster for most everything.

We did
iozone -T -t 4 s 2g -r 256k -T -I -i0 -i1 -i2

which creates 4 threads, each of which create and do reads and writes on
a 2G file, with a buffer size of 256K, using O_DIRECT for all file opens
to bypass the page cache. Results:

ext2 ext4, default ext4, no journal
initial writes 13.0 MB/s 15.4 MB/s 15.7 MB/s
rewrites 13.1 MB/s 15.6 MB/s 15.9 MB/s
reads 15.2 MB/s 16.9 MB/s 17.2 MB/s
re-reads 15.3 MB/s 16.9 MB/s 17.2 MB/s
random readers 5.6 MB/s 5.6 MB/s 5.7 MB/s
random writers 5.1 MB/s 5.3 MB/s 5.4 MB/s

So it seems that, so far, this was a useful exercise.

Signed-off-by: Frank Mayhar <[email protected]>
Signed-off-by: "Theodore Ts'o" <[email protected]>
---
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 35f5f9a..31ebeb5 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -531,11 +531,11 @@ do_more:

/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
- err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);

/* And the group descriptor block */
BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
- ret = ext4_journal_dirty_metadata(handle, gd_bh);
+ ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
if (!err) err = ret;
*pdquot_freed_blocks += group_freed;

diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index c75384b..fc4a46f 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -7,53 +7,91 @@
int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
struct buffer_head *bh)
{
- int err = jbd2_journal_get_undo_access(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_get_undo_access(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ }
return err;
}

int __ext4_journal_get_write_access(const char *where, handle_t *handle,
struct buffer_head *bh)
{
- int err = jbd2_journal_get_write_access(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_get_write_access(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ }
return err;
}

int __ext4_journal_forget(const char *where, handle_t *handle,
struct buffer_head *bh)
{
- int err = jbd2_journal_forget(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_forget(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ }
return err;
}

int __ext4_journal_revoke(const char *where, handle_t *handle,
ext4_fsblk_t blocknr, struct buffer_head *bh)
{
- int err = jbd2_journal_revoke(handle, blocknr, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_revoke(handle, blocknr, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ }
return err;
}

int __ext4_journal_get_create_access(const char *where,
handle_t *handle, struct buffer_head *bh)
{
- int err = jbd2_journal_get_create_access(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_get_create_access(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ }
return err;
}

-int __ext4_journal_dirty_metadata(const char *where,
- handle_t *handle, struct buffer_head *bh)
+int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
+ struct inode *inode, struct buffer_head *bh)
{
- int err = jbd2_journal_dirty_metadata(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_dirty_metadata(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh,
+ handle, err);
+ } else {
+ mark_buffer_dirty(bh);
+ if (inode && inode_needs_sync(inode)) {
+ sync_dirty_buffer(bh);
+ if (buffer_req(bh) && !buffer_uptodate(bh)) {
+ ext4_error(inode->i_sb, __func__,
+ "IO error syncing inode, "
+ "inode=%lu, block=%llu",
+ inode->i_ino,
+ (unsigned long long) bh->b_blocknr);
+ err = -EIO;
+ }
+ }
+ }
return err;
}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index b455c68..663197a 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -122,12 +122,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
* been done yet.
*/

-static inline void ext4_journal_release_buffer(handle_t *handle,
- struct buffer_head *bh)
-{
- jbd2_journal_release_buffer(handle, bh);
-}
-
void ext4_journal_abort_handle(const char *caller, const char *err_fn,
struct buffer_head *bh, handle_t *handle, int err);

@@ -146,8 +140,8 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
int __ext4_journal_get_create_access(const char *where,
handle_t *handle, struct buffer_head *bh);

-int __ext4_journal_dirty_metadata(const char *where,
- handle_t *handle, struct buffer_head *bh);
+int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
+ struct inode *inode, struct buffer_head *bh);

#define ext4_journal_get_undo_access(handle, bh) \
__ext4_journal_get_undo_access(__func__, (handle), (bh))
@@ -157,14 +151,57 @@ int __ext4_journal_dirty_metadata(const char *where,
__ext4_journal_revoke(__func__, (handle), (blocknr), (bh))
#define ext4_journal_get_create_access(handle, bh) \
__ext4_journal_get_create_access(__func__, (handle), (bh))
-#define ext4_journal_dirty_metadata(handle, bh) \
- __ext4_journal_dirty_metadata(__func__, (handle), (bh))
#define ext4_journal_forget(handle, bh) \
__ext4_journal_forget(__func__, (handle), (bh))
+#define ext4_handle_dirty_metadata(handle, inode, bh) \
+ __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh))

handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
int __ext4_journal_stop(const char *where, handle_t *handle);

+#define EXT4_NOJOURNAL_HANDLE ((handle_t *) 0x1)
+
+static inline int ext4_handle_valid(handle_t *handle)
+{
+ if (handle == EXT4_NOJOURNAL_HANDLE)
+ return 0;
+ return 1;
+}
+
+static inline void ext4_handle_sync(handle_t *handle)
+{
+ if (ext4_handle_valid(handle))
+ handle->h_sync = 1;
+}
+
+static inline void ext4_handle_release_buffer(handle_t *handle,
+ struct buffer_head *bh)
+{
+ if (ext4_handle_valid(handle))
+ jbd2_journal_release_buffer(handle, bh);
+}
+
+static inline int ext4_handle_is_aborted(handle_t *handle)
+{
+ if (ext4_handle_valid(handle))
+ return is_handle_aborted(handle);
+ return 0;
+}
+
+static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
+{
+ if (ext4_handle_valid(handle) && handle->h_buffer_credits < needed)
+ return 0;
+ return 1;
+}
+
+static inline void ext4_journal_release_buffer(handle_t *handle,
+ struct buffer_head *bh)
+{
+ if (ext4_handle_valid(handle))
+ jbd2_journal_release_buffer(handle, bh);
+}
+
static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
{
return ext4_journal_start_sb(inode->i_sb, nblocks);
@@ -180,27 +217,37 @@ static inline handle_t *ext4_journal_current_handle(void)

static inline int ext4_journal_extend(handle_t *handle, int nblocks)
{
- return jbd2_journal_extend(handle, nblocks);
+ if (ext4_handle_valid(handle))
+ return jbd2_journal_extend(handle, nblocks);
+ return 0;
}

static inline int ext4_journal_restart(handle_t *handle, int nblocks)
{
- return jbd2_journal_restart(handle, nblocks);
+ if (ext4_handle_valid(handle))
+ return jbd2_journal_restart(handle, nblocks);
+ return 0;
}

static inline int ext4_journal_blocks_per_page(struct inode *inode)
{
- return jbd2_journal_blocks_per_page(inode);
+ if (EXT4_JOURNAL(inode) != NULL)
+ return jbd2_journal_blocks_per_page(inode);
+ return 0;
}

static inline int ext4_journal_force_commit(journal_t *journal)
{
- return jbd2_journal_force_commit(journal);
+ if (journal)
+ return jbd2_journal_force_commit(journal);
+ return 0;
}

static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
{
- return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
+ if (ext4_handle_valid(handle))
+ return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
+ return 0;
}

/* super.c */
@@ -208,6 +255,8 @@ int ext4_force_commit(struct super_block *sb);

static inline int ext4_should_journal_data(struct inode *inode)
{
+ if (EXT4_JOURNAL(inode) == NULL)
+ return 0;
if (!S_ISREG(inode->i_mode))
return 1;
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
@@ -219,6 +268,8 @@ static inline int ext4_should_journal_data(struct inode *inode)

static inline int ext4_should_order_data(struct inode *inode)
{
+ if (EXT4_JOURNAL(inode) == NULL)
+ return 0;
if (!S_ISREG(inode->i_mode))
return 0;
if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
@@ -230,6 +281,8 @@ static inline int ext4_should_order_data(struct inode *inode)

static inline int ext4_should_writeback_data(struct inode *inode)
{
+ if (EXT4_JOURNAL(inode) == NULL)
+ return 0;
if (!S_ISREG(inode->i_mode))
return 0;
if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0917be5..743e3fe 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -97,6 +97,8 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
{
int err;

+ if (!ext4_handle_valid(handle))
+ return 0;
if (handle->h_buffer_credits > needed)
return 0;
err = ext4_journal_extend(handle, needed);
@@ -134,7 +136,7 @@ static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
int err;
if (path->p_bh) {
/* path points to block */
- err = ext4_journal_dirty_metadata(handle, path->p_bh);
+ err = ext4_handle_dirty_metadata(handle, inode, path->p_bh);
} else {
/* path points to leaf/index in inode body */
err = ext4_mark_inode_dirty(handle, inode);
@@ -780,7 +782,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
set_buffer_uptodate(bh);
unlock_buffer(bh);

- err = ext4_journal_dirty_metadata(handle, bh);
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
if (err)
goto cleanup;
brelse(bh);
@@ -859,7 +861,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
set_buffer_uptodate(bh);
unlock_buffer(bh);

- err = ext4_journal_dirty_metadata(handle, bh);
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
if (err)
goto cleanup;
brelse(bh);
@@ -955,7 +957,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
set_buffer_uptodate(bh);
unlock_buffer(bh);

- err = ext4_journal_dirty_metadata(handle, bh);
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
if (err)
goto out;

@@ -2947,7 +2949,7 @@ void ext4_ext_truncate(struct inode *inode)
* transaction synchronous.
*/
if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

out_stop:
up_write(&EXT4_I(inode)->i_data_sem);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 08cac9f..42eea1c 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -253,12 +253,12 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
spin_unlock(sb_bgl_lock(sbi, flex_group));
}
}
- BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bh2);
+ BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, NULL, bh2);
if (!fatal) fatal = err;
}
- BUFFER_TRACE(bitmap_bh, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+ BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (!fatal)
fatal = err;
sb->s_dirt = 1;
@@ -656,15 +656,16 @@ repeat_in_this_group:
ino, bitmap_bh->b_data)) {
/* we won it */
BUFFER_TRACE(bitmap_bh,
- "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle,
+ "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle,
+ inode,
bitmap_bh);
if (err)
goto fail;
goto got;
}
/* we lost it */
- jbd2_journal_release_buffer(handle, bitmap_bh);
+ ext4_handle_release_buffer(handle, bitmap_bh);

if (++ino < EXT4_INODES_PER_GROUP(sb))
goto repeat_in_this_group;
@@ -726,7 +727,8 @@ got:
/* Don't need to dirty bitmap block if we didn't change it */
if (free) {
BUFFER_TRACE(block_bh, "dirty block bitmap");
- err = ext4_journal_dirty_metadata(handle, block_bh);
+ err = ext4_handle_dirty_metadata(handle,
+ NULL, block_bh);
}

brelse(block_bh);
@@ -771,8 +773,8 @@ got:
}
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
spin_unlock(sb_bgl_lock(sbi, group));
- BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bh2);
+ BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, NULL, bh2);
if (err) goto fail;

percpu_counter_dec(&sbi->s_freeinodes_counter);
@@ -825,7 +827,7 @@ got:

ext4_set_inode_flags(inode);
if (IS_DIRSYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
insert_inode_hash(inode);
spin_lock(&sbi->s_next_gen_lock);
inode->i_generation = sbi->s_next_generation++;
@@ -1024,4 +1026,3 @@ unsigned long ext4_count_dirs(struct super_block * sb)
}
return count;
}
-
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5840576..5c8d71c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -71,12 +71,17 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
* "bh" may be NULL: a metadata block may have been freed from memory
* but there may still be a record of it in the journal, and that record
* still needs to be revoked.
+ *
+ * If the handle isn't valid we're not journaling so there's nothing to do.
*/
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t blocknr)
{
int err;

+ if (!ext4_handle_valid(handle))
+ return 0;
+
might_sleep();

BUFFER_TRACE(bh, "enter");
@@ -169,7 +174,9 @@ static handle_t *start_transaction(struct inode *inode)
*/
static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
{
- if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS)
+ if (!ext4_handle_valid(handle))
+ return 0;
+ if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
return 0;
if (!ext4_journal_extend(handle, blocks_for_truncate(inode)))
return 0;
@@ -183,6 +190,7 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
*/
static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
{
+ BUG_ON(EXT4_JOURNAL(inode) == NULL);
jbd_debug(2, "restarting handle %p\n", handle);
return ext4_journal_restart(handle, blocks_for_truncate(inode));
}
@@ -215,7 +223,7 @@ void ext4_delete_inode(struct inode *inode)
}

if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
inode->i_size = 0;
err = ext4_mark_inode_dirty(handle, inode);
if (err) {
@@ -232,7 +240,7 @@ void ext4_delete_inode(struct inode *inode)
* enough credits left in the handle to remove the inode from
* the orphan list and set the dtime field.
*/
- if (handle->h_buffer_credits < 3) {
+ if (!ext4_handle_has_enough_credits(handle, 3)) {
err = ext4_journal_extend(handle, 3);
if (err > 0)
err = ext4_journal_restart(handle, 3);
@@ -716,8 +724,8 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
set_buffer_uptodate(bh);
unlock_buffer(bh);

- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
if (err)
goto failed;
}
@@ -799,8 +807,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
* generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
*/
jbd_debug(5, "splicing indirect only\n");
- BUFFER_TRACE(where->bh, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, where->bh);
+ BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, inode, where->bh);
if (err)
goto err_out;
} else {
@@ -1228,8 +1236,8 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
set_buffer_uptodate(bh);
}
unlock_buffer(bh);
- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
if (!fatal)
fatal = err;
} else {
@@ -1394,7 +1402,7 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
if (!buffer_mapped(bh) || buffer_freed(bh))
return 0;
set_buffer_uptodate(bh);
- return ext4_journal_dirty_metadata(handle, bh);
+ return ext4_handle_dirty_metadata(handle, NULL, bh);
}

/*
@@ -2761,7 +2769,10 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
filemap_write_and_wait(mapping);
}

- if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
+ BUG_ON(!EXT4_JOURNAL(inode) &&
+ EXT4_I(inode)->i_state & EXT4_STATE_JDATA);
+
+ if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
/*
* This is a REALLY heavyweight approach, but the use of
* bmap on dirty files is expected to be extremely rare:
@@ -3032,7 +3043,10 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
if (offset == 0)
ClearPageChecked(page);

- jbd2_journal_invalidatepage(journal, page, offset);
+ if (journal)
+ jbd2_journal_invalidatepage(journal, page, offset);
+ else
+ block_invalidatepage(page, offset);
}

static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -3042,7 +3056,10 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
WARN_ON(PageChecked(page));
if (!page_has_buffers(page))
return 0;
- return jbd2_journal_try_to_free_buffers(journal, page, wait);
+ if (journal)
+ return jbd2_journal_try_to_free_buffers(journal, page, wait);
+ else
+ return try_to_free_buffers(page);
}

/*
@@ -3314,7 +3331,7 @@ int ext4_block_truncate_page(handle_t *handle,

err = 0;
if (ext4_should_journal_data(inode)) {
- err = ext4_journal_dirty_metadata(handle, bh);
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
} else {
if (ext4_should_order_data(inode))
err = ext4_jbd2_file_inode(handle, inode);
@@ -3438,8 +3455,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
__le32 *p;
if (try_to_extend_transaction(handle, inode)) {
if (bh) {
- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle, inode, bh);
}
ext4_mark_inode_dirty(handle, inode);
ext4_journal_test_restart(handle, inode);
@@ -3539,7 +3556,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
count, block_to_free_p, p);

if (this_bh) {
- BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata");
+ BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");

/*
* The buffer head should have an attached journal head at this
@@ -3548,7 +3565,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
* the block was cleared. Check for this instead of OOPSing.
*/
if (bh2jh(this_bh))
- ext4_journal_dirty_metadata(handle, this_bh);
+ ext4_handle_dirty_metadata(handle, inode, this_bh);
else
ext4_error(inode->i_sb, __func__,
"circular indirect block detected, "
@@ -3578,7 +3595,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
ext4_fsblk_t nr;
__le32 *p;

- if (is_handle_aborted(handle))
+ if (ext4_handle_is_aborted(handle))
return;

if (depth--) {
@@ -3648,7 +3665,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
* will merely complain about releasing a free block,
* rather than leaking blocks.
*/
- if (is_handle_aborted(handle))
+ if (ext4_handle_is_aborted(handle))
return;
if (try_to_extend_transaction(handle, inode)) {
ext4_mark_inode_dirty(handle, inode);
@@ -3667,9 +3684,10 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
parent_bh)){
*p = 0;
BUFFER_TRACE(parent_bh,
- "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle,
- parent_bh);
+ "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle,
+ inode,
+ parent_bh);
}
}
}
@@ -3857,7 +3875,7 @@ do_indirects:
* synchronous
*/
if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
out_stop:
/*
* If this was a simple ftruncate(), and the file will remain alive
@@ -4354,8 +4372,8 @@ static int ext4_do_update_inode(handle_t *handle,
EXT4_SET_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
sb->s_dirt = 1;
- handle->h_sync = 1;
- err = ext4_journal_dirty_metadata(handle,
+ ext4_handle_sync(handle);
+ err = ext4_handle_dirty_metadata(handle, inode,
EXT4_SB(sb)->s_sbh);
}
}
@@ -4382,9 +4400,8 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
}

-
- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- rc = ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ rc = ext4_handle_dirty_metadata(handle, inode, bh);
if (!err)
err = rc;
ei->i_state &= ~EXT4_STATE_NEW;
@@ -4447,6 +4464,25 @@ int ext4_write_inode(struct inode *inode, int wait)
return ext4_force_commit(inode->i_sb);
}

+int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh)
+{
+ int err = 0;
+
+ mark_buffer_dirty(bh);
+ if (inode && inode_needs_sync(inode)) {
+ sync_dirty_buffer(bh);
+ if (buffer_req(bh) && !buffer_uptodate(bh)) {
+ ext4_error(inode->i_sb, __func__,
+ "IO error syncing inode, "
+ "inode=%lu, block=%llu",
+ inode->i_ino,
+ (unsigned long long)bh->b_blocknr);
+ err = -EIO;
+ }
+ }
+ return err;
+}
+
/*
* ext4_setattr()
*
@@ -4751,16 +4787,15 @@ int
ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
struct ext4_iloc *iloc)
{
- int err = 0;
- if (handle) {
- err = ext4_get_inode_loc(inode, iloc);
- if (!err) {
- BUFFER_TRACE(iloc->bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, iloc->bh);
- if (err) {
- brelse(iloc->bh);
- iloc->bh = NULL;
- }
+ int err;
+
+ err = ext4_get_inode_loc(inode, iloc);
+ if (!err) {
+ BUFFER_TRACE(iloc->bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, iloc->bh);
+ if (err) {
+ brelse(iloc->bh);
+ iloc->bh = NULL;
}
}
ext4_std_error(inode->i_sb, err);
@@ -4832,7 +4867,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)

might_sleep();
err = ext4_reserve_inode_write(handle, inode, &iloc);
- if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
+ if (ext4_handle_valid(handle) &&
+ EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
!(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) {
/*
* We need extra buffer credits since we may write into EA block
@@ -4884,6 +4920,11 @@ void ext4_dirty_inode(struct inode *inode)
handle_t *current_handle = ext4_journal_current_handle();
handle_t *handle;

+ if (!ext4_handle_valid(current_handle)) {
+ ext4_mark_inode_dirty(current_handle, inode);
+ return;
+ }
+
handle = ext4_journal_start(inode, 2);
if (IS_ERR(handle))
goto out;
@@ -4921,8 +4962,9 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode)
BUFFER_TRACE(iloc.bh, "get_write_access");
err = jbd2_journal_get_write_access(handle, iloc.bh);
if (!err)
- err = ext4_journal_dirty_metadata(handle,
- iloc.bh);
+ err = ext4_handle_dirty_metadata(handle,
+ inode,
+ iloc.bh);
brelse(iloc.bh);
}
}
@@ -4948,6 +4990,8 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
*/

journal = EXT4_JOURNAL(inode);
+ if (!journal)
+ return 0;
if (is_journal_aborted(journal))
return -EROFS;

@@ -4977,7 +5021,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
return PTR_ERR(handle);

err = ext4_mark_inode_dirty(handle, inode);
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
ext4_journal_stop(handle);
ext4_std_error(inode->i_sb, err);

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index dc99b47..42dc83f 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -99,7 +99,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
goto flags_out;
}
if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err)
goto flags_err;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 7beab71..edb512b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2553,7 +2553,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
ext4_mb_init_per_dev_proc(sb);
ext4_mb_history_init(sb);

- sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+ if (sbi->s_journal)
+ sbi->s_journal->j_commit_callback = release_blocks_on_commit;

printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
return 0;
@@ -2854,7 +2855,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
bitmap_bh->b_data, ac->ac_b_ex.fe_start,
ac->ac_b_ex.fe_len);
- err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (!err)
err = -EAGAIN;
goto out_err;
@@ -2901,10 +2902,10 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
spin_unlock(sb_bgl_lock(sbi, flex_group));
}

- err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (err)
goto out_err;
- err = ext4_journal_dirty_metadata(handle, gdp_bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);

out_err:
sb->s_dirt = 1;
@@ -4414,7 +4415,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
struct rb_node **n = &db->bb_free_root.rb_node, *node;
struct rb_node *parent = NULL, *new_node;

-
+ BUG_ON(!ext4_handle_valid(handle));
BUG_ON(e4b->bd_bitmap_page == NULL);
BUG_ON(e4b->bd_buddy_page == NULL);

@@ -4600,7 +4601,7 @@ do_more:

/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
- err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);

if (ac) {
ac->ac_b_ex.fe_group = block_group;
@@ -4609,7 +4610,7 @@ do_more:
ext4_mb_store_history(ac);
}

- if (metadata) {
+ if (metadata && ext4_handle_valid(handle)) {
/* blocks being freed are metadata. these blocks shouldn't
* be used until this transaction is committed */
ext4_mb_free_metadata(handle, &e4b, block_group, bit, count);
@@ -4639,7 +4640,7 @@ do_more:

/* And the group descriptor block */
BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
- ret = ext4_journal_dirty_metadata(handle, gd_bh);
+ ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
if (!err)
err = ret;

diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index f2a9cf4..e7cd488 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -59,7 +59,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
/*
* Make sure the credit we accumalated is not really high
*/
- if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) {
+ if (needed && ext4_handle_has_enough_credits(handle,
+ EXT4_RESERVE_TRANS_BLOCKS)) {
retval = ext4_journal_restart(handle, needed);
if (retval)
goto err_out;
@@ -229,7 +230,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
{
int retval = 0, needed;

- if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS)
+ if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
return 0;
/*
* We are freeing a blocks. During this we touch
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 39c0065..36bfb7f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1233,10 +1233,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
de = de2;
}
dx_insert_block(frame, hash2 + continued, newblock);
- err = ext4_journal_dirty_metadata(handle, bh2);
+ err = ext4_handle_dirty_metadata(handle, dir, bh2);
if (err)
goto journal_error;
- err = ext4_journal_dirty_metadata(handle, frame->bh);
+ err = ext4_handle_dirty_metadata(handle, dir, frame->bh);
if (err)
goto journal_error;
brelse(bh2);
@@ -1340,8 +1340,8 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
ext4_update_dx_flag(dir);
dir->i_version++;
ext4_mark_inode_dirty(handle, dir);
- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, dir, bh);
if (err)
ext4_std_error(dir->i_sb, err);
brelse(bh);
@@ -1581,7 +1581,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
dxtrace(dx_show_index("node", frames[1].entries));
dxtrace(dx_show_index("node",
((struct dx_node *) bh2->b_data)->entries));
- err = ext4_journal_dirty_metadata(handle, bh2);
+ err = ext4_handle_dirty_metadata(handle, inode, bh2);
if (err)
goto journal_error;
brelse (bh2);
@@ -1607,7 +1607,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
if (err)
goto journal_error;
}
- ext4_journal_dirty_metadata(handle, frames[0].bh);
+ ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
}
de = do_split(handle, dir, &bh, frame, &hinfo, &err);
if (!de)
@@ -1653,8 +1653,8 @@ static int ext4_delete_entry(handle_t *handle,
else
de->inode = 0;
dir->i_version++;
- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle, dir, bh);
return 0;
}
i += ext4_rec_len_from_disk(de->rec_len);
@@ -1730,7 +1730,7 @@ retry:
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode = ext4_new_inode (handle, dir, mode);
err = PTR_ERR(inode);
@@ -1764,7 +1764,7 @@ retry:
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode = ext4_new_inode(handle, dir, mode);
err = PTR_ERR(inode);
@@ -1800,7 +1800,7 @@ retry:
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode = ext4_new_inode(handle, dir, S_IFDIR | mode);
err = PTR_ERR(inode);
@@ -1829,8 +1829,8 @@ retry:
strcpy(de->name, "..");
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
inode->i_nlink = 2;
- BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle, dir_block);
+ BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle, dir, dir_block);
brelse(dir_block);
ext4_mark_inode_dirty(handle, inode);
err = ext4_add_entry(handle, dentry, inode);
@@ -1968,7 +1968,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
/* Insert this inode at the head of the on-disk orphan list... */
NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
- err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+ err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh);
rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
if (!err)
err = rc;
@@ -2024,7 +2024,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
* transaction handle with which to update the orphan list on
* disk, but we still need to remove the inode from the linked
* list in memory. */
- if (!handle)
+ if (sbi->s_journal && !handle)
goto out;

err = ext4_reserve_inode_write(handle, inode, &iloc);
@@ -2038,7 +2038,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
if (err)
goto out_brelse;
sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
- err = ext4_journal_dirty_metadata(handle, sbi->s_sbh);
+ err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh);
} else {
struct ext4_iloc iloc2;
struct inode *i_prev =
@@ -2089,7 +2089,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
goto end_rmdir;

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode = dentry->d_inode;

@@ -2143,7 +2143,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

retval = -ENOENT;
bh = ext4_find_entry(dir, &dentry->d_name, &de);
@@ -2200,7 +2200,7 @@ retry:
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO);
err = PTR_ERR(inode);
@@ -2263,7 +2263,7 @@ retry:
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode->i_ctime = ext4_current_time(inode);
ext4_inc_count(handle, inode);
@@ -2305,7 +2305,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
return PTR_ERR(handle);

if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
/*
@@ -2359,8 +2359,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
new_dir->i_ctime = new_dir->i_mtime =
ext4_current_time(new_dir);
ext4_mark_inode_dirty(handle, new_dir);
- BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle, new_bh);
+ BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle, new_dir, new_bh);
brelse(new_bh);
new_bh = NULL;
}
@@ -2410,8 +2410,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
BUFFER_TRACE(dir_bh, "get_write_access");
ext4_journal_get_write_access(handle, dir_bh);
PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
- BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle, dir_bh);
+ BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
ext4_dec_count(handle, old_dir);
if (new_inode) {
/* checked empty_dir above, can't have another parent,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index d448eb1..1665aa1 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -149,7 +149,7 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
{
int err;

- if (handle->h_buffer_credits >= thresh)
+ if (ext4_handle_has_enough_credits(handle, thresh))
return 0;

err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA);
@@ -232,7 +232,7 @@ static int setup_new_group_blocks(struct super_block *sb,
memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
set_buffer_uptodate(gdb);
unlock_buffer(gdb);
- ext4_journal_dirty_metadata(handle, gdb);
+ ext4_handle_dirty_metadata(handle, NULL, gdb);
ext4_set_bit(bit, bh->b_data);
brelse(gdb);
}
@@ -251,7 +251,7 @@ static int setup_new_group_blocks(struct super_block *sb,
err = PTR_ERR(bh);
goto exit_bh;
}
- ext4_journal_dirty_metadata(handle, gdb);
+ ext4_handle_dirty_metadata(handle, NULL, gdb);
ext4_set_bit(bit, bh->b_data);
brelse(gdb);
}
@@ -276,7 +276,7 @@ static int setup_new_group_blocks(struct super_block *sb,
err = PTR_ERR(it);
goto exit_bh;
}
- ext4_journal_dirty_metadata(handle, it);
+ ext4_handle_dirty_metadata(handle, NULL, it);
brelse(it);
ext4_set_bit(bit, bh->b_data);
}
@@ -286,7 +286,7 @@ static int setup_new_group_blocks(struct super_block *sb,

mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb),
bh->b_data);
- ext4_journal_dirty_metadata(handle, bh);
+ ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);

/* Mark unused entries in inode bitmap used */
@@ -299,7 +299,7 @@ static int setup_new_group_blocks(struct super_block *sb,

mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
bh->b_data);
- ext4_journal_dirty_metadata(handle, bh);
+ ext4_handle_dirty_metadata(handle, NULL, bh);
exit_bh:
brelse(bh);

@@ -486,12 +486,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
* reserved inode, and will become GDT blocks (primary and backup).
*/
data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
- ext4_journal_dirty_metadata(handle, dind);
+ ext4_handle_dirty_metadata(handle, NULL, dind);
brelse(dind);
inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
ext4_mark_iloc_dirty(handle, inode, &iloc);
memset((*primary)->b_data, 0, sb->s_blocksize);
- ext4_journal_dirty_metadata(handle, *primary);
+ ext4_handle_dirty_metadata(handle, NULL, *primary);

o_group_desc = EXT4_SB(sb)->s_group_desc;
memcpy(n_group_desc, o_group_desc,
@@ -502,7 +502,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
kfree(o_group_desc);

le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
- ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+ ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);

return 0;

@@ -618,7 +618,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
primary[i]->b_blocknr, gdbackups,
blk + primary[i]->b_blocknr); */
data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
- err2 = ext4_journal_dirty_metadata(handle, primary[i]);
+ err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]);
if (!err)
err = err2;
}
@@ -676,7 +676,8 @@ static void update_backups(struct super_block *sb,
struct buffer_head *bh;

/* Out of journal space, and can't get more - abort - so sad */
- if (handle->h_buffer_credits == 0 &&
+ if (ext4_handle_valid(handle) &&
+ handle->h_buffer_credits == 0 &&
ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) &&
(err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
break;
@@ -696,7 +697,7 @@ static void update_backups(struct super_block *sb,
memset(bh->b_data + size, 0, rest);
set_buffer_uptodate(bh);
unlock_buffer(bh);
- ext4_journal_dirty_metadata(handle, bh);
+ ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);
}
if ((err2 = ext4_journal_stop(handle)) && !err)
@@ -916,7 +917,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
/* Update the global fs size fields */
sbi->s_groups_count++;

- ext4_journal_dirty_metadata(handle, primary);
+ ext4_handle_dirty_metadata(handle, NULL, primary);

/* Update the reserved block counts only once the new group is
* active. */
@@ -938,7 +939,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
EXT4_INODES_PER_GROUP(sb);
}

- ext4_journal_dirty_metadata(handle, sbi->s_sbh);
+ ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
sb->s_dirt = 1;

exit_journal:
@@ -1072,7 +1073,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
goto exit_put;
}
ext4_blocks_count_set(es, o_blocks_count + add);
- ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+ ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
sb->s_dirt = 1;
unlock_super(sb);
ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ba23b48..fae1a20 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -136,13 +136,20 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
* backs (eg. EIO in the commit thread), then we still need to
* take the FS itself readonly cleanly. */
journal = EXT4_SB(sb)->s_journal;
- if (is_journal_aborted(journal)) {
- ext4_abort(sb, __func__,
- "Detected aborted journal");
- return ERR_PTR(-EROFS);
+ if (journal) {
+ if (is_journal_aborted(journal)) {
+ ext4_abort(sb, __func__,
+ "Detected aborted journal");
+ return ERR_PTR(-EROFS);
+ }
+ return jbd2_journal_start(journal, nblocks);
}
-
- return jbd2_journal_start(journal, nblocks);
+ /*
+ * We're not journaling. Return a pointer to our flag that
+ * indicates that fact.
+ */
+ current->journal_info = EXT4_NOJOURNAL_HANDLE;
+ return current->journal_info;
}

/*
@@ -157,6 +164,14 @@ int __ext4_journal_stop(const char *where, handle_t *handle)
int err;
int rc;

+ if (!ext4_handle_valid(handle)) {
+ /*
+ * Do this here since we don't call jbd2_journal_stop() in
+ * no-journal mode.
+ */
+ current->journal_info = NULL;
+ return 0;
+ }
sb = handle->h_transaction->t_journal->j_private;
err = handle->h_err;
rc = jbd2_journal_stop(handle);
@@ -174,6 +189,8 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
char nbuf[16];
const char *errstr = ext4_decode_error(NULL, err, nbuf);

+ BUG_ON(!ext4_handle_valid(handle));
+
if (bh)
BUFFER_TRACE(bh, "abort");

@@ -448,11 +465,12 @@ static void ext4_put_super(struct super_block *sb)
ext4_mb_release(sb);
ext4_ext_release(sb);
ext4_xattr_put_super(sb);
- err = jbd2_journal_destroy(sbi->s_journal);
- sbi->s_journal = NULL;
- if (err < 0)
- ext4_abort(sb, __func__, "Couldn't clean up the journal");
-
+ if (sbi->s_journal) {
+ err = jbd2_journal_destroy(sbi->s_journal);
+ sbi->s_journal = NULL;
+ if (err < 0)
+ ext4_abort(sb, __func__, "Couldn't clean up the journal");
+ }
if (!(sb->s_flags & MS_RDONLY)) {
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -522,6 +540,11 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
INIT_LIST_HEAD(&ei->i_prealloc_list);
spin_lock_init(&ei->i_prealloc_lock);
+ /*
+ * Note: We can be called before EXT4_SB(sb)->s_journal is set,
+ * therefore it can be null here. Don't check it, just initialize
+ * jinode.
+ */
jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0;
@@ -588,7 +611,8 @@ static void ext4_clear_inode(struct inode *inode)
}
#endif
ext4_discard_preallocations(inode);
- jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
+ if (EXT4_JOURNAL(inode))
+ jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
&EXT4_I(inode)->jinode);
}

@@ -1419,7 +1443,8 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
le16_add_cpu(&es->s_mnt_count, 1);
es->s_mtime = cpu_to_le32(get_seconds());
ext4_update_dynamic_rev(sb);
- EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+ if (sbi->s_journal)
+ EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);

ext4_commit_super(sb, es, 1);
if (test_opt(sb, DEBUG))
@@ -1431,9 +1456,13 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
EXT4_INODES_PER_GROUP(sb),
sbi->s_mount_opt);

- printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
- sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
- "external", EXT4_SB(sb)->s_journal->j_devname);
+ if (EXT4_SB(sb)->s_journal) {
+ printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
+ sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
+ "external", EXT4_SB(sb)->s_journal->j_devname);
+ } else {
+ printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id);
+ }
return res;
}

@@ -2288,11 +2317,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (ext4_create_journal(sb, es, journal_inum))
goto failed_mount3;
} else {
- if (!silent)
- printk(KERN_ERR
- "ext4: No journal on filesystem on %s\n",
- sb->s_id);
- goto failed_mount3;
+ printk("ext4: Mounting filesystem %s without journal\n",
+ sb->s_id);
+ clear_opt(sbi->s_mount_opt, DATA_FLAGS);
+ set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
+ sbi->s_journal = NULL;
+ es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
+ needs_recovery = 0;
+ goto no_journal;
}

if (ext4_blocks_count(es) > 0xffffffffULL &&
@@ -2344,6 +2376,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
break;
}

+no_journal:
+
if (test_opt(sb, NOBH)) {
if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - "
@@ -2431,10 +2465,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (needs_recovery)
printk(KERN_INFO "EXT4-fs: recovery complete.\n");
ext4_mark_recovery_complete(sb, es);
- printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
- test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
- test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
- "writeback");
+ if (EXT4_SB(sb)->s_journal) {
+ printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
+ test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
+ test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
+ "writeback");
+ }
+
+ printk(KERN_INFO "EXT4-fs: %s mounted. Flags: 0x%lx Opts: %s\n",
+ sb->s_id, sb->s_flags, (char *)data);

lock_kernel();
return 0;
@@ -2446,8 +2485,10 @@ cantfind_ext4:
goto failed_mount;

failed_mount4:
- jbd2_journal_destroy(sbi->s_journal);
- sbi->s_journal = NULL;
+ if (sbi->s_journal) {
+ jbd2_journal_destroy(sbi->s_journal);
+ sbi->s_journal = NULL;
+ }
failed_mount3:
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
@@ -2508,6 +2549,8 @@ static journal_t *ext4_get_journal(struct super_block *sb,
struct inode *journal_inode;
journal_t *journal;

+ BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+
/* First, test for the existence of a valid inode on disk. Bad
* things happen if we iget() an unused inode, as the subsequent
* iput() will try to delete it. */
@@ -2556,6 +2599,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
struct ext4_super_block *es;
struct block_device *bdev;

+ BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+
bdev = ext4_blkdev_get(j_dev);
if (bdev == NULL)
return NULL;
@@ -2643,6 +2688,8 @@ static int ext4_load_journal(struct super_block *sb,
int err = 0;
int really_read_only;

+ BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+
if (journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
printk(KERN_INFO "EXT4-fs: external journal device major/minor "
@@ -2817,6 +2864,10 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
{
journal_t *journal = EXT4_SB(sb)->s_journal;

+ if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
+ BUG_ON(journal != NULL);
+ return;
+ }
jbd2_journal_lock_updates(journal);
if (jbd2_journal_flush(journal) < 0)
goto out;
@@ -2846,6 +2897,8 @@ static void ext4_clear_journal_err(struct super_block *sb,
int j_errno;
const char *errstr;

+ BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+
journal = EXT4_SB(sb)->s_journal;

/*
@@ -2878,14 +2931,17 @@ static void ext4_clear_journal_err(struct super_block *sb,
int ext4_force_commit(struct super_block *sb)
{
journal_t *journal;
- int ret;
+ int ret = 0;

if (sb->s_flags & MS_RDONLY)
return 0;

journal = EXT4_SB(sb)->s_journal;
- sb->s_dirt = 0;
- ret = ext4_journal_force_commit(journal);
+ if (journal) {
+ sb->s_dirt = 0;
+ ret = ext4_journal_force_commit(journal);
+ }
+
return ret;
}

@@ -2897,9 +2953,13 @@ int ext4_force_commit(struct super_block *sb)
*/
static void ext4_write_super(struct super_block *sb)
{
- if (mutex_trylock(&sb->s_lock) != 0)
- BUG();
- sb->s_dirt = 0;
+ if (EXT4_SB(sb)->s_journal) {
+ if (mutex_trylock(&sb->s_lock) != 0)
+ BUG();
+ sb->s_dirt = 0;
+ } else {
+ ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
+ }
}

static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -2911,7 +2971,8 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
if (wait)
ret = ext4_force_commit(sb);
else
- jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
+ if (EXT4_SB(sb)->s_journal)
+ jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
return ret;
}

@@ -2926,15 +2987,17 @@ static void ext4_write_super_lockfs(struct super_block *sb)
if (!(sb->s_flags & MS_RDONLY)) {
journal_t *journal = EXT4_SB(sb)->s_journal;

- /* Now we set up the journal barrier. */
- jbd2_journal_lock_updates(journal);
+ if (journal) {
+ /* Now we set up the journal barrier. */
+ jbd2_journal_lock_updates(journal);

- /*
- * We don't want to clear needs_recovery flag when we failed
- * to flush the journal.
- */
- if (jbd2_journal_flush(journal) < 0)
- return;
+ /*
+ * We don't want to clear needs_recovery flag when we
+ * failed to flush the journal.
+ */
+ if (jbd2_journal_flush(journal) < 0)
+ return;
+ }

/* Journal blocked and flushed, clear needs_recovery flag. */
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -2948,7 +3011,7 @@ static void ext4_write_super_lockfs(struct super_block *sb)
*/
static void ext4_unlockfs(struct super_block *sb)
{
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) {
lock_super(sb);
/* Reser the needs_recovery flag before the fs is unlocked. */
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -2999,7 +3062,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)

es = sbi->s_es;

- ext4_init_journal_params(sb, sbi->s_journal);
+ if (sbi->s_journal)
+ ext4_init_journal_params(sb, sbi->s_journal);

if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
n_blocks_count > ext4_blocks_count(es)) {
@@ -3084,7 +3148,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
* been changed by e2fsck since we originally mounted
* the partition.)
*/
- ext4_clear_journal_err(sb, es);
+ if (sbi->s_journal)
+ ext4_clear_journal_err(sb, es);
sbi->s_mount_state = le16_to_cpu(es->s_state);
if ((err = ext4_group_extend(sb, es, n_blocks_count)))
goto restore_opts;
@@ -3092,6 +3157,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
sb->s_flags &= ~MS_RDONLY;
}
}
+ if (sbi->s_journal == NULL)
+ ext4_commit_super(sb, es, 1);
+
#ifdef CONFIG_QUOTA
/* Release old quota file names */
for (i = 0; i < MAXQUOTAS; i++)
@@ -3368,7 +3436,8 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
* When we journal data on quota file, we have to flush journal to see
* all updates to the file when we bypass pagecache...
*/
- if (ext4_should_journal_data(path.dentry->d_inode)) {
+ if (EXT4_SB(sb)->s_journal &&
+ ext4_should_journal_data(path.dentry->d_inode)) {
/*
* We don't need to lock updates but journal_flush() could
* otherwise be livelocked...
@@ -3442,7 +3511,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
struct buffer_head *bh;
handle_t *handle = journal_current_handle();

- if (!handle) {
+ if (EXT4_SB(sb)->s_journal && !handle) {
printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
" cancelled because transaction is not started.\n",
(unsigned long long)off, (unsigned long long)len);
@@ -3467,7 +3536,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
flush_dcache_page(bh->b_page);
unlock_buffer(bh);
if (journal_quota)
- err = ext4_journal_dirty_metadata(handle, bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
else {
/* Always do at least ordered writes for quotas */
err = ext4_jbd2_file_inode(handle, inode);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 9b4a368..157ce65 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -457,7 +457,7 @@ static void ext4_xattr_update_super_block(handle_t *handle,
if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
sb->s_dirt = 1;
- ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+ ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
}
}

@@ -487,9 +487,9 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
} else {
le32_add_cpu(&BHDR(bh)->h_refcount, -1);
- error = ext4_journal_dirty_metadata(handle, bh);
+ error = ext4_handle_dirty_metadata(handle, inode, bh);
if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
DQUOT_FREE_BLOCK(inode, 1);
ea_bdebug(bh, "refcount now=%d; releasing",
le32_to_cpu(BHDR(bh)->h_refcount));
@@ -724,8 +724,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (error == -EIO)
goto bad_block;
if (!error)
- error = ext4_journal_dirty_metadata(handle,
- bs->bh);
+ error = ext4_handle_dirty_metadata(handle,
+ inode,
+ bs->bh);
if (error)
goto cleanup;
goto inserted;
@@ -794,8 +795,9 @@ inserted:
ea_bdebug(new_bh, "reusing; refcount now=%d",
le32_to_cpu(BHDR(new_bh)->h_refcount));
unlock_buffer(new_bh);
- error = ext4_journal_dirty_metadata(handle,
- new_bh);
+ error = ext4_handle_dirty_metadata(handle,
+ inode,
+ new_bh);
if (error)
goto cleanup_dquot;
}
@@ -833,7 +835,8 @@ getblk_failed:
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
ext4_xattr_cache_insert(new_bh);
- error = ext4_journal_dirty_metadata(handle, new_bh);
+ error = ext4_handle_dirty_metadata(handle,
+ inode, new_bh);
if (error)
goto cleanup;
}
@@ -1040,7 +1043,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
*/
is.iloc.bh = NULL;
if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
}

cleanup:

2009-01-03 20:19:45

by Frank Mayhar

[permalink] [raw]
Subject: Re: [PATCH] Allow ext4 to run without a journal.

On Sat, 2009-01-03 at 10:52 -0500, Theodore Tso wrote:
> I've modified your patch to reflect the comments I made two weeks ago.
> Were there any other changes you have planned? I'd like to get this
> pushed to Linus soon....

My apologies, I have a new patch that has been tested quite a bit more
(although I still wouldn't say quite thoroughly yet, but close), all I
need to do is set it up correctly for you guys and send it to the list.
Unfortunately other stuff (including the holidays) has gotten in the
way. Expect it on Monday or Tuesday. Note that this version will have
a handful of small but important bugs fixed, as well.

One question: Shold I make the patch relative to the stable tree or to
the latest bits?
--
Frank Mayhar <[email protected]>
Google, Inc.


2009-01-04 01:17:53

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH] Allow ext4 to run without a journal.

On Sat, Jan 03, 2009 at 12:19:00PM -0800, Frank Mayhar wrote:
> On Sat, 2009-01-03 at 10:52 -0500, Theodore Tso wrote:
> > I've modified your patch to reflect the comments I made two weeks ago.
> > Were there any other changes you have planned? I'd like to get this
> > pushed to Linus soon....
>
> My apologies, I have a new patch that has been tested quite a bit more
> (although I still wouldn't say quite thoroughly yet, but close), all I
> need to do is set it up correctly for you guys and send it to the list.
> Unfortunately other stuff (including the holidays) has gotten in the
> way. Expect it on Monday or Tuesday. Note that this version will have
> a handful of small but important bugs fixed, as well.
>
> One question: Shold I make the patch relative to the stable tree or to
> the latest bits?

At this point can you make your patch relative to whatever you used
last time (and remind me what it is)? Or, can you give me a delta
between what you did last time and what you have now that has the
fixes? The problem is that the patch touches so much of ext4 that
it's quite painful to maintain it in the ext4 patch queue --- it
either invalidates a large number of patches after the fact, or the
patch needs a huge amount of work to fix up patch conflicts if it is
applied afterwards --- and if it is placed in the middle of the patch
queue, both conditions apply.

So I will have to do a lot of fix ups either way. So unless you're
willing to pull down the ext4 patch queue, at:

git://repo.or.cz/ext4-patch-queue.git

... and worry about making it apply in place, it's probably easier for
you to give me delta's versus the last patch, and I'll integrate in
the changes by hand.

- Ted

2009-01-05 05:31:39

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH] Allow ext4 to run without a journal.


Here is an updated patch that adds a check to make sure the right
thing happens if an ext3 filesystem that needs recovery is mounted -o
noload. This also fixes a potential oops in
ext4_mark_recovery_complete() journal doesn't exist, and we avoid
adding and remove orphans from the orphan list when the journal
doesn't exist.

- Ted

ext4: Allow ext4 to run without a journal

From: Frank Mayhar <[email protected]>

A few weeks ago I posted a patch for discussion that allowed ext4 to run
without a journal. Since that time I've integrated the excellent
comments from Andreas and fixed several serious bugs. We're currently
running with this patch and generating some performance numbers against
both ext2 (with backported reservations code) and ext4 with and without
a journal. It just so happens that running without a journal is
slightly faster for most everything.

We did
iozone -T -t 4 s 2g -r 256k -T -I -i0 -i1 -i2

which creates 4 threads, each of which create and do reads and writes on
a 2G file, with a buffer size of 256K, using O_DIRECT for all file opens
to bypass the page cache. Results:

ext2 ext4, default ext4, no journal
initial writes 13.0 MB/s 15.4 MB/s 15.7 MB/s
rewrites 13.1 MB/s 15.6 MB/s 15.9 MB/s
reads 15.2 MB/s 16.9 MB/s 17.2 MB/s
re-reads 15.3 MB/s 16.9 MB/s 17.2 MB/s
random readers 5.6 MB/s 5.6 MB/s 5.7 MB/s
random writers 5.1 MB/s 5.3 MB/s 5.4 MB/s

So it seems that, so far, this was a useful exercise.

Signed-off-by: Frank Mayhar <[email protected]>
Signed-off-by: "Theodore Ts'o" <[email protected]>
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 35f5f9a..31ebeb5 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -531,11 +531,11 @@ do_more:

/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
- err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);

/* And the group descriptor block */
BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
- ret = ext4_journal_dirty_metadata(handle, gd_bh);
+ ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
if (!err) err = ret;
*pdquot_freed_blocks += group_freed;

diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index c75384b..ad13a84 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -7,53 +7,96 @@
int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
struct buffer_head *bh)
{
- int err = jbd2_journal_get_undo_access(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_get_undo_access(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh,
+ handle, err);
+ }
return err;
}

int __ext4_journal_get_write_access(const char *where, handle_t *handle,
struct buffer_head *bh)
{
- int err = jbd2_journal_get_write_access(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_get_write_access(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh,
+ handle, err);
+ }
return err;
}

int __ext4_journal_forget(const char *where, handle_t *handle,
struct buffer_head *bh)
{
- int err = jbd2_journal_forget(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_forget(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh,
+ handle, err);
+ }
return err;
}

int __ext4_journal_revoke(const char *where, handle_t *handle,
ext4_fsblk_t blocknr, struct buffer_head *bh)
{
- int err = jbd2_journal_revoke(handle, blocknr, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_revoke(handle, blocknr, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh,
+ handle, err);
+ }
return err;
}

int __ext4_journal_get_create_access(const char *where,
handle_t *handle, struct buffer_head *bh)
{
- int err = jbd2_journal_get_create_access(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_get_create_access(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh,
+ handle, err);
+ }
return err;
}

-int __ext4_journal_dirty_metadata(const char *where,
- handle_t *handle, struct buffer_head *bh)
+int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
+ struct inode *inode, struct buffer_head *bh)
{
- int err = jbd2_journal_dirty_metadata(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh, handle, err);
+ int err = 0;
+
+ if (ext4_handle_valid(handle)) {
+ err = jbd2_journal_dirty_metadata(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh,
+ handle, err);
+ } else {
+ mark_buffer_dirty(bh);
+ if (inode && inode_needs_sync(inode)) {
+ sync_dirty_buffer(bh);
+ if (buffer_req(bh) && !buffer_uptodate(bh)) {
+ ext4_error(inode->i_sb, __func__,
+ "IO error syncing inode, "
+ "inode=%lu, block=%llu",
+ inode->i_ino,
+ (unsigned long long) bh->b_blocknr);
+ err = -EIO;
+ }
+ }
+ }
return err;
}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index b455c68..663197a 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -122,12 +122,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
* been done yet.
*/

-static inline void ext4_journal_release_buffer(handle_t *handle,
- struct buffer_head *bh)
-{
- jbd2_journal_release_buffer(handle, bh);
-}
-
void ext4_journal_abort_handle(const char *caller, const char *err_fn,
struct buffer_head *bh, handle_t *handle, int err);

@@ -146,8 +140,8 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
int __ext4_journal_get_create_access(const char *where,
handle_t *handle, struct buffer_head *bh);

-int __ext4_journal_dirty_metadata(const char *where,
- handle_t *handle, struct buffer_head *bh);
+int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
+ struct inode *inode, struct buffer_head *bh);

#define ext4_journal_get_undo_access(handle, bh) \
__ext4_journal_get_undo_access(__func__, (handle), (bh))
@@ -157,14 +151,57 @@ int __ext4_journal_dirty_metadata(const char *where,
__ext4_journal_revoke(__func__, (handle), (blocknr), (bh))
#define ext4_journal_get_create_access(handle, bh) \
__ext4_journal_get_create_access(__func__, (handle), (bh))
-#define ext4_journal_dirty_metadata(handle, bh) \
- __ext4_journal_dirty_metadata(__func__, (handle), (bh))
#define ext4_journal_forget(handle, bh) \
__ext4_journal_forget(__func__, (handle), (bh))
+#define ext4_handle_dirty_metadata(handle, inode, bh) \
+ __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh))

handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
int __ext4_journal_stop(const char *where, handle_t *handle);

+#define EXT4_NOJOURNAL_HANDLE ((handle_t *) 0x1)
+
+static inline int ext4_handle_valid(handle_t *handle)
+{
+ if (handle == EXT4_NOJOURNAL_HANDLE)
+ return 0;
+ return 1;
+}
+
+static inline void ext4_handle_sync(handle_t *handle)
+{
+ if (ext4_handle_valid(handle))
+ handle->h_sync = 1;
+}
+
+static inline void ext4_handle_release_buffer(handle_t *handle,
+ struct buffer_head *bh)
+{
+ if (ext4_handle_valid(handle))
+ jbd2_journal_release_buffer(handle, bh);
+}
+
+static inline int ext4_handle_is_aborted(handle_t *handle)
+{
+ if (ext4_handle_valid(handle))
+ return is_handle_aborted(handle);
+ return 0;
+}
+
+static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
+{
+ if (ext4_handle_valid(handle) && handle->h_buffer_credits < needed)
+ return 0;
+ return 1;
+}
+
+static inline void ext4_journal_release_buffer(handle_t *handle,
+ struct buffer_head *bh)
+{
+ if (ext4_handle_valid(handle))
+ jbd2_journal_release_buffer(handle, bh);
+}
+
static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
{
return ext4_journal_start_sb(inode->i_sb, nblocks);
@@ -180,27 +217,37 @@ static inline handle_t *ext4_journal_current_handle(void)

static inline int ext4_journal_extend(handle_t *handle, int nblocks)
{
- return jbd2_journal_extend(handle, nblocks);
+ if (ext4_handle_valid(handle))
+ return jbd2_journal_extend(handle, nblocks);
+ return 0;
}

static inline int ext4_journal_restart(handle_t *handle, int nblocks)
{
- return jbd2_journal_restart(handle, nblocks);
+ if (ext4_handle_valid(handle))
+ return jbd2_journal_restart(handle, nblocks);
+ return 0;
}

static inline int ext4_journal_blocks_per_page(struct inode *inode)
{
- return jbd2_journal_blocks_per_page(inode);
+ if (EXT4_JOURNAL(inode) != NULL)
+ return jbd2_journal_blocks_per_page(inode);
+ return 0;
}

static inline int ext4_journal_force_commit(journal_t *journal)
{
- return jbd2_journal_force_commit(journal);
+ if (journal)
+ return jbd2_journal_force_commit(journal);
+ return 0;
}

static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
{
- return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
+ if (ext4_handle_valid(handle))
+ return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
+ return 0;
}

/* super.c */
@@ -208,6 +255,8 @@ int ext4_force_commit(struct super_block *sb);

static inline int ext4_should_journal_data(struct inode *inode)
{
+ if (EXT4_JOURNAL(inode) == NULL)
+ return 0;
if (!S_ISREG(inode->i_mode))
return 1;
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
@@ -219,6 +268,8 @@ static inline int ext4_should_journal_data(struct inode *inode)

static inline int ext4_should_order_data(struct inode *inode)
{
+ if (EXT4_JOURNAL(inode) == NULL)
+ return 0;
if (!S_ISREG(inode->i_mode))
return 0;
if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
@@ -230,6 +281,8 @@ static inline int ext4_should_order_data(struct inode *inode)

static inline int ext4_should_writeback_data(struct inode *inode)
{
+ if (EXT4_JOURNAL(inode) == NULL)
+ return 0;
if (!S_ISREG(inode->i_mode))
return 0;
if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0917be5..743e3fe 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -97,6 +97,8 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
{
int err;

+ if (!ext4_handle_valid(handle))
+ return 0;
if (handle->h_buffer_credits > needed)
return 0;
err = ext4_journal_extend(handle, needed);
@@ -134,7 +136,7 @@ static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
int err;
if (path->p_bh) {
/* path points to block */
- err = ext4_journal_dirty_metadata(handle, path->p_bh);
+ err = ext4_handle_dirty_metadata(handle, inode, path->p_bh);
} else {
/* path points to leaf/index in inode body */
err = ext4_mark_inode_dirty(handle, inode);
@@ -780,7 +782,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
set_buffer_uptodate(bh);
unlock_buffer(bh);

- err = ext4_journal_dirty_metadata(handle, bh);
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
if (err)
goto cleanup;
brelse(bh);
@@ -859,7 +861,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
set_buffer_uptodate(bh);
unlock_buffer(bh);

- err = ext4_journal_dirty_metadata(handle, bh);
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
if (err)
goto cleanup;
brelse(bh);
@@ -955,7 +957,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
set_buffer_uptodate(bh);
unlock_buffer(bh);

- err = ext4_journal_dirty_metadata(handle, bh);
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
if (err)
goto out;

@@ -2947,7 +2949,7 @@ void ext4_ext_truncate(struct inode *inode)
* transaction synchronous.
*/
if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

out_stop:
up_write(&EXT4_I(inode)->i_data_sem);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 08cac9f..42eea1c 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -253,12 +253,12 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
spin_unlock(sb_bgl_lock(sbi, flex_group));
}
}
- BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bh2);
+ BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, NULL, bh2);
if (!fatal) fatal = err;
}
- BUFFER_TRACE(bitmap_bh, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+ BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (!fatal)
fatal = err;
sb->s_dirt = 1;
@@ -656,15 +656,16 @@ repeat_in_this_group:
ino, bitmap_bh->b_data)) {
/* we won it */
BUFFER_TRACE(bitmap_bh,
- "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle,
+ "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle,
+ inode,
bitmap_bh);
if (err)
goto fail;
goto got;
}
/* we lost it */
- jbd2_journal_release_buffer(handle, bitmap_bh);
+ ext4_handle_release_buffer(handle, bitmap_bh);

if (++ino < EXT4_INODES_PER_GROUP(sb))
goto repeat_in_this_group;
@@ -726,7 +727,8 @@ got:
/* Don't need to dirty bitmap block if we didn't change it */
if (free) {
BUFFER_TRACE(block_bh, "dirty block bitmap");
- err = ext4_journal_dirty_metadata(handle, block_bh);
+ err = ext4_handle_dirty_metadata(handle,
+ NULL, block_bh);
}

brelse(block_bh);
@@ -771,8 +773,8 @@ got:
}
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
spin_unlock(sb_bgl_lock(sbi, group));
- BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bh2);
+ BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, NULL, bh2);
if (err) goto fail;

percpu_counter_dec(&sbi->s_freeinodes_counter);
@@ -825,7 +827,7 @@ got:

ext4_set_inode_flags(inode);
if (IS_DIRSYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
insert_inode_hash(inode);
spin_lock(&sbi->s_next_gen_lock);
inode->i_generation = sbi->s_next_generation++;
@@ -1024,4 +1026,3 @@ unsigned long ext4_count_dirs(struct super_block * sb)
}
return count;
}
-
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5840576..5c8d71c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -71,12 +71,17 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
* "bh" may be NULL: a metadata block may have been freed from memory
* but there may still be a record of it in the journal, and that record
* still needs to be revoked.
+ *
+ * If the handle isn't valid we're not journaling so there's nothing to do.
*/
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t blocknr)
{
int err;

+ if (!ext4_handle_valid(handle))
+ return 0;
+
might_sleep();

BUFFER_TRACE(bh, "enter");
@@ -169,7 +174,9 @@ static handle_t *start_transaction(struct inode *inode)
*/
static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
{
- if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS)
+ if (!ext4_handle_valid(handle))
+ return 0;
+ if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
return 0;
if (!ext4_journal_extend(handle, blocks_for_truncate(inode)))
return 0;
@@ -183,6 +190,7 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
*/
static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
{
+ BUG_ON(EXT4_JOURNAL(inode) == NULL);
jbd_debug(2, "restarting handle %p\n", handle);
return ext4_journal_restart(handle, blocks_for_truncate(inode));
}
@@ -215,7 +223,7 @@ void ext4_delete_inode(struct inode *inode)
}

if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
inode->i_size = 0;
err = ext4_mark_inode_dirty(handle, inode);
if (err) {
@@ -232,7 +240,7 @@ void ext4_delete_inode(struct inode *inode)
* enough credits left in the handle to remove the inode from
* the orphan list and set the dtime field.
*/
- if (handle->h_buffer_credits < 3) {
+ if (!ext4_handle_has_enough_credits(handle, 3)) {
err = ext4_journal_extend(handle, 3);
if (err > 0)
err = ext4_journal_restart(handle, 3);
@@ -716,8 +724,8 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
set_buffer_uptodate(bh);
unlock_buffer(bh);

- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
if (err)
goto failed;
}
@@ -799,8 +807,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
* generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
*/
jbd_debug(5, "splicing indirect only\n");
- BUFFER_TRACE(where->bh, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, where->bh);
+ BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, inode, where->bh);
if (err)
goto err_out;
} else {
@@ -1228,8 +1236,8 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
set_buffer_uptodate(bh);
}
unlock_buffer(bh);
- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
if (!fatal)
fatal = err;
} else {
@@ -1394,7 +1402,7 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
if (!buffer_mapped(bh) || buffer_freed(bh))
return 0;
set_buffer_uptodate(bh);
- return ext4_journal_dirty_metadata(handle, bh);
+ return ext4_handle_dirty_metadata(handle, NULL, bh);
}

/*
@@ -2761,7 +2769,10 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
filemap_write_and_wait(mapping);
}

- if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
+ BUG_ON(!EXT4_JOURNAL(inode) &&
+ EXT4_I(inode)->i_state & EXT4_STATE_JDATA);
+
+ if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
/*
* This is a REALLY heavyweight approach, but the use of
* bmap on dirty files is expected to be extremely rare:
@@ -3032,7 +3043,10 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
if (offset == 0)
ClearPageChecked(page);

- jbd2_journal_invalidatepage(journal, page, offset);
+ if (journal)
+ jbd2_journal_invalidatepage(journal, page, offset);
+ else
+ block_invalidatepage(page, offset);
}

static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -3042,7 +3056,10 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
WARN_ON(PageChecked(page));
if (!page_has_buffers(page))
return 0;
- return jbd2_journal_try_to_free_buffers(journal, page, wait);
+ if (journal)
+ return jbd2_journal_try_to_free_buffers(journal, page, wait);
+ else
+ return try_to_free_buffers(page);
}

/*
@@ -3314,7 +3331,7 @@ int ext4_block_truncate_page(handle_t *handle,

err = 0;
if (ext4_should_journal_data(inode)) {
- err = ext4_journal_dirty_metadata(handle, bh);
+ err = ext4_handle_dirty_metadata(handle, inode, bh);
} else {
if (ext4_should_order_data(inode))
err = ext4_jbd2_file_inode(handle, inode);
@@ -3438,8 +3455,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
__le32 *p;
if (try_to_extend_transaction(handle, inode)) {
if (bh) {
- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle, inode, bh);
}
ext4_mark_inode_dirty(handle, inode);
ext4_journal_test_restart(handle, inode);
@@ -3539,7 +3556,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
count, block_to_free_p, p);

if (this_bh) {
- BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata");
+ BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");

/*
* The buffer head should have an attached journal head at this
@@ -3548,7 +3565,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
* the block was cleared. Check for this instead of OOPSing.
*/
if (bh2jh(this_bh))
- ext4_journal_dirty_metadata(handle, this_bh);
+ ext4_handle_dirty_metadata(handle, inode, this_bh);
else
ext4_error(inode->i_sb, __func__,
"circular indirect block detected, "
@@ -3578,7 +3595,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
ext4_fsblk_t nr;
__le32 *p;

- if (is_handle_aborted(handle))
+ if (ext4_handle_is_aborted(handle))
return;

if (depth--) {
@@ -3648,7 +3665,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
* will merely complain about releasing a free block,
* rather than leaking blocks.
*/
- if (is_handle_aborted(handle))
+ if (ext4_handle_is_aborted(handle))
return;
if (try_to_extend_transaction(handle, inode)) {
ext4_mark_inode_dirty(handle, inode);
@@ -3667,9 +3684,10 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
parent_bh)){
*p = 0;
BUFFER_TRACE(parent_bh,
- "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle,
- parent_bh);
+ "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle,
+ inode,
+ parent_bh);
}
}
}
@@ -3857,7 +3875,7 @@ do_indirects:
* synchronous
*/
if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
out_stop:
/*
* If this was a simple ftruncate(), and the file will remain alive
@@ -4354,8 +4372,8 @@ static int ext4_do_update_inode(handle_t *handle,
EXT4_SET_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
sb->s_dirt = 1;
- handle->h_sync = 1;
- err = ext4_journal_dirty_metadata(handle,
+ ext4_handle_sync(handle);
+ err = ext4_handle_dirty_metadata(handle, inode,
EXT4_SB(sb)->s_sbh);
}
}
@@ -4382,9 +4400,8 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
}

-
- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- rc = ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ rc = ext4_handle_dirty_metadata(handle, inode, bh);
if (!err)
err = rc;
ei->i_state &= ~EXT4_STATE_NEW;
@@ -4447,6 +4464,25 @@ int ext4_write_inode(struct inode *inode, int wait)
return ext4_force_commit(inode->i_sb);
}

+int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh)
+{
+ int err = 0;
+
+ mark_buffer_dirty(bh);
+ if (inode && inode_needs_sync(inode)) {
+ sync_dirty_buffer(bh);
+ if (buffer_req(bh) && !buffer_uptodate(bh)) {
+ ext4_error(inode->i_sb, __func__,
+ "IO error syncing inode, "
+ "inode=%lu, block=%llu",
+ inode->i_ino,
+ (unsigned long long)bh->b_blocknr);
+ err = -EIO;
+ }
+ }
+ return err;
+}
+
/*
* ext4_setattr()
*
@@ -4751,16 +4787,15 @@ int
ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
struct ext4_iloc *iloc)
{
- int err = 0;
- if (handle) {
- err = ext4_get_inode_loc(inode, iloc);
- if (!err) {
- BUFFER_TRACE(iloc->bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, iloc->bh);
- if (err) {
- brelse(iloc->bh);
- iloc->bh = NULL;
- }
+ int err;
+
+ err = ext4_get_inode_loc(inode, iloc);
+ if (!err) {
+ BUFFER_TRACE(iloc->bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, iloc->bh);
+ if (err) {
+ brelse(iloc->bh);
+ iloc->bh = NULL;
}
}
ext4_std_error(inode->i_sb, err);
@@ -4832,7 +4867,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)

might_sleep();
err = ext4_reserve_inode_write(handle, inode, &iloc);
- if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
+ if (ext4_handle_valid(handle) &&
+ EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
!(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) {
/*
* We need extra buffer credits since we may write into EA block
@@ -4884,6 +4920,11 @@ void ext4_dirty_inode(struct inode *inode)
handle_t *current_handle = ext4_journal_current_handle();
handle_t *handle;

+ if (!ext4_handle_valid(current_handle)) {
+ ext4_mark_inode_dirty(current_handle, inode);
+ return;
+ }
+
handle = ext4_journal_start(inode, 2);
if (IS_ERR(handle))
goto out;
@@ -4921,8 +4962,9 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode)
BUFFER_TRACE(iloc.bh, "get_write_access");
err = jbd2_journal_get_write_access(handle, iloc.bh);
if (!err)
- err = ext4_journal_dirty_metadata(handle,
- iloc.bh);
+ err = ext4_handle_dirty_metadata(handle,
+ inode,
+ iloc.bh);
brelse(iloc.bh);
}
}
@@ -4948,6 +4990,8 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
*/

journal = EXT4_JOURNAL(inode);
+ if (!journal)
+ return 0;
if (is_journal_aborted(journal))
return -EROFS;

@@ -4977,7 +5021,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
return PTR_ERR(handle);

err = ext4_mark_inode_dirty(handle, inode);
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
ext4_journal_stop(handle);
ext4_std_error(inode->i_sb, err);

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index dc99b47..42dc83f 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -99,7 +99,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
goto flags_out;
}
if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err)
goto flags_err;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 7beab71..edb512b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2553,7 +2553,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
ext4_mb_init_per_dev_proc(sb);
ext4_mb_history_init(sb);

- sbi->s_journal->j_commit_callback = release_blocks_on_commit;
+ if (sbi->s_journal)
+ sbi->s_journal->j_commit_callback = release_blocks_on_commit;

printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
return 0;
@@ -2854,7 +2855,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
bitmap_bh->b_data, ac->ac_b_ex.fe_start,
ac->ac_b_ex.fe_len);
- err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (!err)
err = -EAGAIN;
goto out_err;
@@ -2901,10 +2902,10 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
spin_unlock(sb_bgl_lock(sbi, flex_group));
}

- err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (err)
goto out_err;
- err = ext4_journal_dirty_metadata(handle, gdp_bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);

out_err:
sb->s_dirt = 1;
@@ -4414,7 +4415,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
struct rb_node **n = &db->bb_free_root.rb_node, *node;
struct rb_node *parent = NULL, *new_node;

-
+ BUG_ON(!ext4_handle_valid(handle));
BUG_ON(e4b->bd_bitmap_page == NULL);
BUG_ON(e4b->bd_buddy_page == NULL);

@@ -4600,7 +4601,7 @@ do_more:

/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
- err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);

if (ac) {
ac->ac_b_ex.fe_group = block_group;
@@ -4609,7 +4610,7 @@ do_more:
ext4_mb_store_history(ac);
}

- if (metadata) {
+ if (metadata && ext4_handle_valid(handle)) {
/* blocks being freed are metadata. these blocks shouldn't
* be used until this transaction is committed */
ext4_mb_free_metadata(handle, &e4b, block_group, bit, count);
@@ -4639,7 +4640,7 @@ do_more:

/* And the group descriptor block */
BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
- ret = ext4_journal_dirty_metadata(handle, gd_bh);
+ ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
if (!err)
err = ret;

diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index f2a9cf4..e7cd488 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -59,7 +59,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
/*
* Make sure the credit we accumalated is not really high
*/
- if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) {
+ if (needed && ext4_handle_has_enough_credits(handle,
+ EXT4_RESERVE_TRANS_BLOCKS)) {
retval = ext4_journal_restart(handle, needed);
if (retval)
goto err_out;
@@ -229,7 +230,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
{
int retval = 0, needed;

- if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS)
+ if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
return 0;
/*
* We are freeing a blocks. During this we touch
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 39c0065..d92944e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1233,10 +1233,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
de = de2;
}
dx_insert_block(frame, hash2 + continued, newblock);
- err = ext4_journal_dirty_metadata(handle, bh2);
+ err = ext4_handle_dirty_metadata(handle, dir, bh2);
if (err)
goto journal_error;
- err = ext4_journal_dirty_metadata(handle, frame->bh);
+ err = ext4_handle_dirty_metadata(handle, dir, frame->bh);
if (err)
goto journal_error;
brelse(bh2);
@@ -1340,8 +1340,8 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
ext4_update_dx_flag(dir);
dir->i_version++;
ext4_mark_inode_dirty(handle, dir);
- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- err = ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ err = ext4_handle_dirty_metadata(handle, dir, bh);
if (err)
ext4_std_error(dir->i_sb, err);
brelse(bh);
@@ -1581,7 +1581,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
dxtrace(dx_show_index("node", frames[1].entries));
dxtrace(dx_show_index("node",
((struct dx_node *) bh2->b_data)->entries));
- err = ext4_journal_dirty_metadata(handle, bh2);
+ err = ext4_handle_dirty_metadata(handle, inode, bh2);
if (err)
goto journal_error;
brelse (bh2);
@@ -1607,7 +1607,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
if (err)
goto journal_error;
}
- ext4_journal_dirty_metadata(handle, frames[0].bh);
+ ext4_handle_dirty_metadata(handle, inode, frames[0].bh);
}
de = do_split(handle, dir, &bh, frame, &hinfo, &err);
if (!de)
@@ -1653,8 +1653,8 @@ static int ext4_delete_entry(handle_t *handle,
else
de->inode = 0;
dir->i_version++;
- BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle, bh);
+ BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle, dir, bh);
return 0;
}
i += ext4_rec_len_from_disk(de->rec_len);
@@ -1730,7 +1730,7 @@ retry:
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode = ext4_new_inode (handle, dir, mode);
err = PTR_ERR(inode);
@@ -1764,7 +1764,7 @@ retry:
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode = ext4_new_inode(handle, dir, mode);
err = PTR_ERR(inode);
@@ -1800,7 +1800,7 @@ retry:
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode = ext4_new_inode(handle, dir, S_IFDIR | mode);
err = PTR_ERR(inode);
@@ -1829,8 +1829,8 @@ retry:
strcpy(de->name, "..");
ext4_set_de_type(dir->i_sb, de, S_IFDIR);
inode->i_nlink = 2;
- BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle, dir_block);
+ BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle, dir, dir_block);
brelse(dir_block);
ext4_mark_inode_dirty(handle, inode);
err = ext4_add_entry(handle, dentry, inode);
@@ -1940,6 +1940,9 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
struct ext4_iloc iloc;
int err = 0, rc;

+ if (!ext4_handle_valid(handle))
+ return 0;
+
lock_super(sb);
if (!list_empty(&EXT4_I(inode)->i_orphan))
goto out_unlock;
@@ -1968,7 +1971,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
/* Insert this inode at the head of the on-disk orphan list... */
NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
- err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+ err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh);
rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
if (!err)
err = rc;
@@ -2006,6 +2009,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
struct ext4_iloc iloc;
int err = 0;

+ if (!ext4_handle_valid(handle))
+ return 0;
+
lock_super(inode->i_sb);
if (list_empty(&ei->i_orphan)) {
unlock_super(inode->i_sb);
@@ -2024,7 +2030,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
* transaction handle with which to update the orphan list on
* disk, but we still need to remove the inode from the linked
* list in memory. */
- if (!handle)
+ if (sbi->s_journal && !handle)
goto out;

err = ext4_reserve_inode_write(handle, inode, &iloc);
@@ -2038,7 +2044,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
if (err)
goto out_brelse;
sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
- err = ext4_journal_dirty_metadata(handle, sbi->s_sbh);
+ err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh);
} else {
struct ext4_iloc iloc2;
struct inode *i_prev =
@@ -2089,7 +2095,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
goto end_rmdir;

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode = dentry->d_inode;

@@ -2143,7 +2149,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

retval = -ENOENT;
bh = ext4_find_entry(dir, &dentry->d_name, &de);
@@ -2200,7 +2206,7 @@ retry:
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO);
err = PTR_ERR(inode);
@@ -2263,7 +2269,7 @@ retry:
return PTR_ERR(handle);

if (IS_DIRSYNC(dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

inode->i_ctime = ext4_current_time(inode);
ext4_inc_count(handle, inode);
@@ -2305,7 +2311,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
return PTR_ERR(handle);

if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);

old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
/*
@@ -2359,8 +2365,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
new_dir->i_ctime = new_dir->i_mtime =
ext4_current_time(new_dir);
ext4_mark_inode_dirty(handle, new_dir);
- BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle, new_bh);
+ BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle, new_dir, new_bh);
brelse(new_bh);
new_bh = NULL;
}
@@ -2410,8 +2416,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
BUFFER_TRACE(dir_bh, "get_write_access");
ext4_journal_get_write_access(handle, dir_bh);
PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
- BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata");
- ext4_journal_dirty_metadata(handle, dir_bh);
+ BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
+ ext4_handle_dirty_metadata(handle, old_dir, dir_bh);
ext4_dec_count(handle, old_dir);
if (new_inode) {
/* checked empty_dir above, can't have another parent,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index d448eb1..1665aa1 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -149,7 +149,7 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
{
int err;

- if (handle->h_buffer_credits >= thresh)
+ if (ext4_handle_has_enough_credits(handle, thresh))
return 0;

err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA);
@@ -232,7 +232,7 @@ static int setup_new_group_blocks(struct super_block *sb,
memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
set_buffer_uptodate(gdb);
unlock_buffer(gdb);
- ext4_journal_dirty_metadata(handle, gdb);
+ ext4_handle_dirty_metadata(handle, NULL, gdb);
ext4_set_bit(bit, bh->b_data);
brelse(gdb);
}
@@ -251,7 +251,7 @@ static int setup_new_group_blocks(struct super_block *sb,
err = PTR_ERR(bh);
goto exit_bh;
}
- ext4_journal_dirty_metadata(handle, gdb);
+ ext4_handle_dirty_metadata(handle, NULL, gdb);
ext4_set_bit(bit, bh->b_data);
brelse(gdb);
}
@@ -276,7 +276,7 @@ static int setup_new_group_blocks(struct super_block *sb,
err = PTR_ERR(it);
goto exit_bh;
}
- ext4_journal_dirty_metadata(handle, it);
+ ext4_handle_dirty_metadata(handle, NULL, it);
brelse(it);
ext4_set_bit(bit, bh->b_data);
}
@@ -286,7 +286,7 @@ static int setup_new_group_blocks(struct super_block *sb,

mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb),
bh->b_data);
- ext4_journal_dirty_metadata(handle, bh);
+ ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);

/* Mark unused entries in inode bitmap used */
@@ -299,7 +299,7 @@ static int setup_new_group_blocks(struct super_block *sb,

mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
bh->b_data);
- ext4_journal_dirty_metadata(handle, bh);
+ ext4_handle_dirty_metadata(handle, NULL, bh);
exit_bh:
brelse(bh);

@@ -486,12 +486,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
* reserved inode, and will become GDT blocks (primary and backup).
*/
data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
- ext4_journal_dirty_metadata(handle, dind);
+ ext4_handle_dirty_metadata(handle, NULL, dind);
brelse(dind);
inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
ext4_mark_iloc_dirty(handle, inode, &iloc);
memset((*primary)->b_data, 0, sb->s_blocksize);
- ext4_journal_dirty_metadata(handle, *primary);
+ ext4_handle_dirty_metadata(handle, NULL, *primary);

o_group_desc = EXT4_SB(sb)->s_group_desc;
memcpy(n_group_desc, o_group_desc,
@@ -502,7 +502,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
kfree(o_group_desc);

le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
- ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+ ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);

return 0;

@@ -618,7 +618,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
primary[i]->b_blocknr, gdbackups,
blk + primary[i]->b_blocknr); */
data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
- err2 = ext4_journal_dirty_metadata(handle, primary[i]);
+ err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]);
if (!err)
err = err2;
}
@@ -676,7 +676,8 @@ static void update_backups(struct super_block *sb,
struct buffer_head *bh;

/* Out of journal space, and can't get more - abort - so sad */
- if (handle->h_buffer_credits == 0 &&
+ if (ext4_handle_valid(handle) &&
+ handle->h_buffer_credits == 0 &&
ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) &&
(err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
break;
@@ -696,7 +697,7 @@ static void update_backups(struct super_block *sb,
memset(bh->b_data + size, 0, rest);
set_buffer_uptodate(bh);
unlock_buffer(bh);
- ext4_journal_dirty_metadata(handle, bh);
+ ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);
}
if ((err2 = ext4_journal_stop(handle)) && !err)
@@ -916,7 +917,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
/* Update the global fs size fields */
sbi->s_groups_count++;

- ext4_journal_dirty_metadata(handle, primary);
+ ext4_handle_dirty_metadata(handle, NULL, primary);

/* Update the reserved block counts only once the new group is
* active. */
@@ -938,7 +939,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
EXT4_INODES_PER_GROUP(sb);
}

- ext4_journal_dirty_metadata(handle, sbi->s_sbh);
+ ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
sb->s_dirt = 1;

exit_journal:
@@ -1072,7 +1073,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
goto exit_put;
}
ext4_blocks_count_set(es, o_blocks_count + add);
- ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+ ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
sb->s_dirt = 1;
unlock_super(sb);
ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ba23b48..bc5a5e2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -136,13 +136,20 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
* backs (eg. EIO in the commit thread), then we still need to
* take the FS itself readonly cleanly. */
journal = EXT4_SB(sb)->s_journal;
- if (is_journal_aborted(journal)) {
- ext4_abort(sb, __func__,
- "Detected aborted journal");
- return ERR_PTR(-EROFS);
+ if (journal) {
+ if (is_journal_aborted(journal)) {
+ ext4_abort(sb, __func__,
+ "Detected aborted journal");
+ return ERR_PTR(-EROFS);
+ }
+ return jbd2_journal_start(journal, nblocks);
}
-
- return jbd2_journal_start(journal, nblocks);
+ /*
+ * We're not journaling. Return a pointer to our flag that
+ * indicates that fact.
+ */
+ current->journal_info = EXT4_NOJOURNAL_HANDLE;
+ return current->journal_info;
}

/*
@@ -157,6 +164,14 @@ int __ext4_journal_stop(const char *where, handle_t *handle)
int err;
int rc;

+ if (!ext4_handle_valid(handle)) {
+ /*
+ * Do this here since we don't call jbd2_journal_stop() in
+ * no-journal mode.
+ */
+ current->journal_info = NULL;
+ return 0;
+ }
sb = handle->h_transaction->t_journal->j_private;
err = handle->h_err;
rc = jbd2_journal_stop(handle);
@@ -174,6 +189,8 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
char nbuf[16];
const char *errstr = ext4_decode_error(NULL, err, nbuf);

+ BUG_ON(!ext4_handle_valid(handle));
+
if (bh)
BUFFER_TRACE(bh, "abort");

@@ -448,11 +465,13 @@ static void ext4_put_super(struct super_block *sb)
ext4_mb_release(sb);
ext4_ext_release(sb);
ext4_xattr_put_super(sb);
- err = jbd2_journal_destroy(sbi->s_journal);
- sbi->s_journal = NULL;
- if (err < 0)
- ext4_abort(sb, __func__, "Couldn't clean up the journal");
-
+ if (sbi->s_journal) {
+ err = jbd2_journal_destroy(sbi->s_journal);
+ sbi->s_journal = NULL;
+ if (err < 0)
+ ext4_abort(sb, __func__,
+ "Couldn't clean up the journal");
+ }
if (!(sb->s_flags & MS_RDONLY)) {
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -522,6 +541,11 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
INIT_LIST_HEAD(&ei->i_prealloc_list);
spin_lock_init(&ei->i_prealloc_lock);
+ /*
+ * Note: We can be called before EXT4_SB(sb)->s_journal is set,
+ * therefore it can be null here. Don't check it, just initialize
+ * jinode.
+ */
jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0;
@@ -588,7 +612,8 @@ static void ext4_clear_inode(struct inode *inode)
}
#endif
ext4_discard_preallocations(inode);
- jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
+ if (EXT4_JOURNAL(inode))
+ jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
&EXT4_I(inode)->jinode);
}

@@ -1406,20 +1431,15 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
printk(KERN_WARNING
"EXT4-fs warning: checktime reached, "
"running e2fsck is recommended\n");
-#if 0
- /* @@@ We _will_ want to clear the valid bit if we find
- * inconsistencies, to force a fsck at reboot. But for
- * a plain journaled filesystem we can keep it set as
- * valid forever! :)
- */
- es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
-#endif
+ if (!sbi->s_journal)
+ es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
le16_add_cpu(&es->s_mnt_count, 1);
es->s_mtime = cpu_to_le32(get_seconds());
ext4_update_dynamic_rev(sb);
- EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+ if (sbi->s_journal)
+ EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);

ext4_commit_super(sb, es, 1);
if (test_opt(sb, DEBUG))
@@ -1431,9 +1451,13 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
EXT4_INODES_PER_GROUP(sb),
sbi->s_mount_opt);

- printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
- sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
- "external", EXT4_SB(sb)->s_journal->j_devname);
+ if (EXT4_SB(sb)->s_journal) {
+ printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
+ sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
+ "external", EXT4_SB(sb)->s_journal->j_devname);
+ } else {
+ printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id);
+ }
return res;
}

@@ -1867,6 +1891,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
unsigned long def_mount_opts;
struct inode *root;
char *cp;
+ const char *descr;
int ret = -EINVAL;
int blocksize;
int db_count;
@@ -2278,21 +2303,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
ext4_commit_super(sb, es, 1);
- printk(KERN_CRIT
- "EXT4-fs (device %s): mount failed\n",
- sb->s_id);
- goto failed_mount4;
}
}
} else if (journal_inum) {
if (ext4_create_journal(sb, es, journal_inum))
goto failed_mount3;
+ } if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
+ EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
+ printk(KERN_ERR "EXT4-fs: required journal recovery "
+ "suppressed and not mounted read-only\n");
+ goto failed_mount4;
} else {
- if (!silent)
- printk(KERN_ERR
- "ext4: No journal on filesystem on %s\n",
- sb->s_id);
- goto failed_mount3;
+ clear_opt(sbi->s_mount_opt, DATA_FLAGS);
+ set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
+ sbi->s_journal = NULL;
+ needs_recovery = 0;
+ goto no_journal;
}

if (ext4_blocks_count(es) > 0xffffffffULL &&
@@ -2344,6 +2370,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
break;
}

+no_journal:
+
if (test_opt(sb, NOBH)) {
if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - "
@@ -2428,13 +2456,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
ext4_orphan_cleanup(sb, es);
EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
- if (needs_recovery)
+ if (needs_recovery) {
printk(KERN_INFO "EXT4-fs: recovery complete.\n");
- ext4_mark_recovery_complete(sb, es);
- printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
- test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
- test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
- "writeback");
+ ext4_mark_recovery_complete(sb, es);
+ }
+ if (EXT4_SB(sb)->s_journal) {
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+ descr = " journalled data mode";
+ else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
+ descr = " ordered data mode";
+ else
+ descr = " writeback data mode";
+ } else
+ descr = "out journal";
+
+ printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n",
+ sb->s_id, descr);

lock_kernel();
return 0;
@@ -2446,8 +2483,11 @@ cantfind_ext4:
goto failed_mount;

failed_mount4:
- jbd2_journal_destroy(sbi->s_journal);
- sbi->s_journal = NULL;
+ printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id);
+ if (sbi->s_journal) {
+ jbd2_journal_destroy(sbi->s_journal);
+ sbi->s_journal = NULL;
+ }
failed_mount3:
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
@@ -2508,6 +2548,8 @@ static journal_t *ext4_get_journal(struct super_block *sb,
struct inode *journal_inode;
journal_t *journal;

+ BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+
/* First, test for the existence of a valid inode on disk. Bad
* things happen if we iget() an unused inode, as the subsequent
* iput() will try to delete it. */
@@ -2556,6 +2598,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
struct ext4_super_block *es;
struct block_device *bdev;

+ BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+
bdev = ext4_blkdev_get(j_dev);
if (bdev == NULL)
return NULL;
@@ -2643,6 +2687,8 @@ static int ext4_load_journal(struct super_block *sb,
int err = 0;
int really_read_only;

+ BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+
if (journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
printk(KERN_INFO "EXT4-fs: external journal device major/minor "
@@ -2817,6 +2863,10 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
{
journal_t *journal = EXT4_SB(sb)->s_journal;

+ if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
+ BUG_ON(journal != NULL);
+ return;
+ }
jbd2_journal_lock_updates(journal);
if (jbd2_journal_flush(journal) < 0)
goto out;
@@ -2846,6 +2896,8 @@ static void ext4_clear_journal_err(struct super_block *sb,
int j_errno;
const char *errstr;

+ BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
+
journal = EXT4_SB(sb)->s_journal;

/*
@@ -2878,14 +2930,17 @@ static void ext4_clear_journal_err(struct super_block *sb,
int ext4_force_commit(struct super_block *sb)
{
journal_t *journal;
- int ret;
+ int ret = 0;

if (sb->s_flags & MS_RDONLY)
return 0;

journal = EXT4_SB(sb)->s_journal;
- sb->s_dirt = 0;
- ret = ext4_journal_force_commit(journal);
+ if (journal) {
+ sb->s_dirt = 0;
+ ret = ext4_journal_force_commit(journal);
+ }
+
return ret;
}

@@ -2897,9 +2952,13 @@ int ext4_force_commit(struct super_block *sb)
*/
static void ext4_write_super(struct super_block *sb)
{
- if (mutex_trylock(&sb->s_lock) != 0)
- BUG();
- sb->s_dirt = 0;
+ if (EXT4_SB(sb)->s_journal) {
+ if (mutex_trylock(&sb->s_lock) != 0)
+ BUG();
+ sb->s_dirt = 0;
+ } else {
+ ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
+ }
}

static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -2911,7 +2970,8 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
if (wait)
ret = ext4_force_commit(sb);
else
- jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
+ if (EXT4_SB(sb)->s_journal)
+ jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
return ret;
}

@@ -2926,15 +2986,17 @@ static void ext4_write_super_lockfs(struct super_block *sb)
if (!(sb->s_flags & MS_RDONLY)) {
journal_t *journal = EXT4_SB(sb)->s_journal;

- /* Now we set up the journal barrier. */
- jbd2_journal_lock_updates(journal);
+ if (journal) {
+ /* Now we set up the journal barrier. */
+ jbd2_journal_lock_updates(journal);

- /*
- * We don't want to clear needs_recovery flag when we failed
- * to flush the journal.
- */
- if (jbd2_journal_flush(journal) < 0)
- return;
+ /*
+ * We don't want to clear needs_recovery flag when we
+ * failed to flush the journal.
+ */
+ if (jbd2_journal_flush(journal) < 0)
+ return;
+ }

/* Journal blocked and flushed, clear needs_recovery flag. */
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -2948,7 +3010,7 @@ static void ext4_write_super_lockfs(struct super_block *sb)
*/
static void ext4_unlockfs(struct super_block *sb)
{
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) {
lock_super(sb);
/* Reser the needs_recovery flag before the fs is unlocked. */
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -2999,7 +3061,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)

es = sbi->s_es;

- ext4_init_journal_params(sb, sbi->s_journal);
+ if (sbi->s_journal)
+ ext4_init_journal_params(sb, sbi->s_journal);

if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
n_blocks_count > ext4_blocks_count(es)) {
@@ -3028,9 +3091,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
* We have to unlock super so that we can wait for
* transactions.
*/
- unlock_super(sb);
- ext4_mark_recovery_complete(sb, es);
- lock_super(sb);
+ if (sbi->s_journal) {
+ unlock_super(sb);
+ ext4_mark_recovery_complete(sb, es);
+ lock_super(sb);
+ }
} else {
__le32 ret;
if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
@@ -3084,7 +3149,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
* been changed by e2fsck since we originally mounted
* the partition.)
*/
- ext4_clear_journal_err(sb, es);
+ if (sbi->s_journal)
+ ext4_clear_journal_err(sb, es);
sbi->s_mount_state = le16_to_cpu(es->s_state);
if ((err = ext4_group_extend(sb, es, n_blocks_count)))
goto restore_opts;
@@ -3092,6 +3158,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
sb->s_flags &= ~MS_RDONLY;
}
}
+ if (sbi->s_journal == NULL)
+ ext4_commit_super(sb, es, 1);
+
#ifdef CONFIG_QUOTA
/* Release old quota file names */
for (i = 0; i < MAXQUOTAS; i++)
@@ -3368,7 +3437,8 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
* When we journal data on quota file, we have to flush journal to see
* all updates to the file when we bypass pagecache...
*/
- if (ext4_should_journal_data(path.dentry->d_inode)) {
+ if (EXT4_SB(sb)->s_journal &&
+ ext4_should_journal_data(path.dentry->d_inode)) {
/*
* We don't need to lock updates but journal_flush() could
* otherwise be livelocked...
@@ -3442,7 +3512,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
struct buffer_head *bh;
handle_t *handle = journal_current_handle();

- if (!handle) {
+ if (EXT4_SB(sb)->s_journal && !handle) {
printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
" cancelled because transaction is not started.\n",
(unsigned long long)off, (unsigned long long)len);
@@ -3467,7 +3537,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
flush_dcache_page(bh->b_page);
unlock_buffer(bh);
if (journal_quota)
- err = ext4_journal_dirty_metadata(handle, bh);
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
else {
/* Always do at least ordered writes for quotas */
err = ext4_jbd2_file_inode(handle, inode);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 9b4a368..157ce65 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -457,7 +457,7 @@ static void ext4_xattr_update_super_block(handle_t *handle,
if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
sb->s_dirt = 1;
- ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+ ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
}
}

@@ -487,9 +487,9 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
} else {
le32_add_cpu(&BHDR(bh)->h_refcount, -1);
- error = ext4_journal_dirty_metadata(handle, bh);
+ error = ext4_handle_dirty_metadata(handle, inode, bh);
if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
DQUOT_FREE_BLOCK(inode, 1);
ea_bdebug(bh, "refcount now=%d; releasing",
le32_to_cpu(BHDR(bh)->h_refcount));
@@ -724,8 +724,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (error == -EIO)
goto bad_block;
if (!error)
- error = ext4_journal_dirty_metadata(handle,
- bs->bh);
+ error = ext4_handle_dirty_metadata(handle,
+ inode,
+ bs->bh);
if (error)
goto cleanup;
goto inserted;
@@ -794,8 +795,9 @@ inserted:
ea_bdebug(new_bh, "reusing; refcount now=%d",
le32_to_cpu(BHDR(new_bh)->h_refcount));
unlock_buffer(new_bh);
- error = ext4_journal_dirty_metadata(handle,
- new_bh);
+ error = ext4_handle_dirty_metadata(handle,
+ inode,
+ new_bh);
if (error)
goto cleanup_dquot;
}
@@ -833,7 +835,8 @@ getblk_failed:
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
ext4_xattr_cache_insert(new_bh);
- error = ext4_journal_dirty_metadata(handle, new_bh);
+ error = ext4_handle_dirty_metadata(handle,
+ inode, new_bh);
if (error)
goto cleanup;
}
@@ -1040,7 +1043,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
*/
is.iloc.bh = NULL;
if (IS_SYNC(inode))
- handle->h_sync = 1;
+ ext4_handle_sync(handle);
}

cleanup: