2009-11-23 02:18:29

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH 0/8] Clean up ext4's block free code paths

This patch series cleans up functions involved with freeing blocks. It
removes functions that are only called by a single caller and folds them
into their caller, and makes the code a little easier to read. It also
reduces ext4's code foot print slightly (by 136 bytes).

Finally, it fixes a bug in the extents migration function, which was
missing calls to ext4_forget() when it frees the indirect blocks after
converting an inode to using extents. This avoids potential file system
corruption after a crash while converting an entire filesystem to use
extents via "chattr -R +e /mntpt".

- Ted

Theodore Ts'o (8):
ext4: move ext4_forget() to ext4_jbd2.c
ext4: fold ext4_journal_revoke() into ext4_forget()
ext4: fold ext4_journal_forget() into ext4_forget()
ext4: fold ext4_free_blocks() and ext4_mb_free_blocks()
ext4: call ext4_forget() from ext4_free_blocks()
ext4: print i_mode in octal in ext4 tracepoints
ext4: add check for wraparound in ext4_data_block_valid()
ext4: use ext4_data_block_valid() in ext4_free_blocks()

fs/ext4/balloc.c | 38 --------------
fs/ext4/block_validity.c | 1 +
fs/ext4/ext4.h | 15 +++--
fs/ext4/ext4_jbd2.c | 82 ++++++++++++++++++++++---------
fs/ext4/ext4_jbd2.h | 23 +++------
fs/ext4/extents.c | 24 +++------
fs/ext4/inode.c | 116 +++++++++++--------------------------------
fs/ext4/mballoc.c | 61 ++++++++++++++++++-----
fs/ext4/migrate.c | 23 ++++++---
fs/ext4/xattr.c | 8 ++-
include/trace/events/ext4.h | 24 +++++----
11 files changed, 198 insertions(+), 217 deletions(-)



2009-11-23 02:18:29

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH 8/8] ext4: use ext4_data_block_valid() in ext4_free_blocks()

The block validity framework does a more comprehensive set of checks,
and it saves object code space to use the ext4_data_block_valid() than
the limited open-coded version that had been in ext4_free_blocks().

Signed-off-by: "Theodore Ts'o" <[email protected]>
---
fs/ext4/mballoc.c | 4 +---
1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 78de5d3..ab2dad1 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4463,9 +4463,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,

sbi = EXT4_SB(sb);
es = EXT4_SB(sb)->s_es;
- if (block < le32_to_cpu(es->s_first_data_block) ||
- block + count < block ||
- block + count > ext4_blocks_count(es)) {
+ if (!ext4_data_block_valid(sbi, block, count)) {
ext4_error(sb, __func__,
"Freeing blocks not in datazone - "
"block = %llu, count = %lu", block, count);
--
1.6.5.216.g5288a.dirty


2009-11-23 02:18:29

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH 6/8] ext4: print i_mode in octal in ext4 tracepoints

Inode permissions are much easier to understand if they are printed in
octal.

Signed-off-by: "Theodore Ts'o" <[email protected]>
---
include/trace/events/ext4.h | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 74f628b..287347c 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -38,7 +38,7 @@ TRACE_EVENT(ext4_free_inode,
__entry->blocks = inode->i_blocks;
),

- TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu",
+ TP_printk("dev %s ino %lu mode 0%o uid %u gid %u blocks %llu",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->mode, __entry->uid, __entry->gid,
(unsigned long long) __entry->blocks)
@@ -61,7 +61,7 @@ TRACE_EVENT(ext4_request_inode,
__entry->mode = mode;
),

- TP_printk("dev %s dir %lu mode %d",
+ TP_printk("dev %s dir %lu mode 0%o",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->dir,
__entry->mode)
);
@@ -85,7 +85,7 @@ TRACE_EVENT(ext4_allocate_inode,
__entry->mode = mode;
),

- TP_printk("dev %s ino %lu dir %lu mode %d",
+ TP_printk("dev %s ino %lu dir %lu mode 0%o",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
(unsigned long) __entry->dir, __entry->mode)
);
@@ -930,7 +930,7 @@ TRACE_EVENT(ext4_forget,
__entry->block = block;
),

- TP_printk("dev %s ino %lu mode %d is_metadata %d block %llu",
+ TP_printk("dev %s ino %lu mode 0%o is_metadata %d block %llu",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->mode, __entry->is_metadata, __entry->block)
);
--
1.6.5.216.g5288a.dirty


2009-11-23 02:18:29

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH 3/8] ext4: fold ext4_journal_forget() into ext4_forget()

Convert the last two callers of ext4_journal_forget() to use
ext4_forget() instead, and then fold ext4_journal_forget() into
ext4_forget(). This reduces are code complexity and shortens our call
stack.

Signed-off-by: "Theodore Ts'o" <[email protected]>
---
fs/ext4/ext4_jbd2.c | 22 +++++-----------------
fs/ext4/ext4_jbd2.h | 6 ------
fs/ext4/inode.c | 16 ++++++++++++++--
3 files changed, 19 insertions(+), 25 deletions(-)

diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 92c88a8..b57e5c7 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -34,22 +34,6 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
return err;
}

-int __ext4_journal_forget(const char *where, handle_t *handle,
- struct buffer_head *bh)
-{
- int err = 0;
-
- if (ext4_handle_valid(handle)) {
- err = jbd2_journal_forget(handle, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh,
- handle, err);
- }
- else
- bforget(bh);
- return err;
-}
-
/*
* The ext4 forget function must perform a revoke if we are freeing data
* which has been journaled. Metadata (eg. indirect blocks) must be
@@ -93,7 +77,11 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
(!is_metadata && !ext4_should_journal_data(inode))) {
if (bh) {
BUFFER_TRACE(bh, "call jbd2_journal_forget");
- return __ext4_journal_forget(where, handle, bh);
+ err = jbd2_journal_forget(handle, bh);
+ if (err)
+ ext4_journal_abort_handle(where, __func__, bh,
+ handle, err);
+ return err;
}
return 0;
}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index f9fb4bb..84bc98a 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -127,10 +127,6 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
int __ext4_journal_get_write_access(const char *where, handle_t *handle,
struct buffer_head *bh);

-/* When called with an invalid handle, this will still do a put on the BH */
-int __ext4_journal_forget(const char *where, handle_t *handle,
- struct buffer_head *bh);
-
int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
struct inode *inode, struct buffer_head *bh,
ext4_fsblk_t blocknr);
@@ -150,8 +146,6 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
(block_nr))
#define ext4_journal_get_create_access(handle, bh) \
__ext4_journal_get_create_access(__func__, (handle), (bh))
-#define ext4_journal_forget(handle, bh) \
- __ext4_journal_forget(__func__, (handle), (bh))
#define ext4_handle_dirty_metadata(handle, inode, bh) \
__ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh))

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index fa37f95..72c6943 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -767,7 +767,13 @@ failed:
/* Allocation failed, free what we already allocated */
for (i = 1; i <= n ; i++) {
BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget");
- ext4_journal_forget(handle, branch[i].bh);
+ /*
+ * Note: is_metadata is 0 because branch[i].bh is
+ * newly allocated, so there is no need to revoke the
+ * block. If we do, it's harmless, but not necessary.
+ */
+ ext4_forget(handle, 0, inode, branch[i].bh,
+ branch[i].bh->b_blocknr);
}
for (i = 0; i < indirect_blks; i++)
ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
@@ -852,7 +858,13 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
err_out:
for (i = 1; i <= num; i++) {
BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget");
- ext4_journal_forget(handle, where[i].bh);
+ /*
+ * Note: is_metadata is 0 because branch[i].bh is
+ * newly allocated, so there is no need to revoke the
+ * block. If we do, it's harmless, but not necessary.
+ */
+ ext4_forget(handle, 0, inode, where[i].bh,
+ where[i].bh->b_blocknr);
ext4_free_blocks(handle, inode,
le32_to_cpu(where[i-1].key), 1, 0);
}
--
1.6.5.216.g5288a.dirty


2009-11-23 02:18:29

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH 7/8] ext4: add check for wraparound in ext4_data_block_valid()

Signed-off-by: "Theodore Ts'o" <[email protected]>
---
fs/ext4/block_validity.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index dc79b75..4df8621 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -228,6 +228,7 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
struct rb_node *n = sbi->system_blks.rb_node;

if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
+ (start_blk + count < start_blk) ||
(start_blk + count > ext4_blocks_count(sbi->s_es)))
return 0;
while (n) {
--
1.6.5.216.g5288a.dirty


2009-11-23 02:18:29

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH 2/8] ext4: fold ext4_journal_revoke() into ext4_forget()

The only caller of ext4_journal_revoke() is ext4_forget(), so we can
fold ext4_journal_revoke() into ext4_forget() to simplfy the code and
shorten the call stack.

Signed-off-by: "Theodore Ts'o" <[email protected]>
---
fs/ext4/ext4_jbd2.c | 30 +++++++++++-------------------
fs/ext4/ext4_jbd2.h | 12 +-----------
2 files changed, 12 insertions(+), 30 deletions(-)

diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 913f857..92c88a8 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -50,22 +50,6 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
return err;
}

-int __ext4_journal_revoke(const char *where, handle_t *handle,
- ext4_fsblk_t blocknr, struct buffer_head *bh)
-{
- int err = 0;
-
- if (ext4_handle_valid(handle)) {
- err = jbd2_journal_revoke(handle, blocknr, bh);
- if (err)
- ext4_journal_abort_handle(where, __func__, bh,
- handle, err);
- }
- else
- bforget(bh);
- return err;
-}
-
/*
* The ext4 forget function must perform a revoke if we are freeing data
* which has been journaled. Metadata (eg. indirect blocks) must be
@@ -94,6 +78,12 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
bh, is_metadata, inode->i_mode,
test_opt(inode->i_sb, DATA_FLAGS));

+ /* In the no journal case, we can just do a bforget and return */
+ if (!ext4_handle_valid(handle)) {
+ bforget(bh);
+ return 0;
+ }
+
/* Never use the revoke function if we are doing full data
* journaling: there is no need to, and a V1 superblock won't
* support it. Otherwise, only skip the revoke on un-journaled
@@ -111,11 +101,13 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
/*
* data!=journal && (is_metadata || should_journal_data(inode))
*/
- BUFFER_TRACE(bh, "call ext4_journal_revoke");
- err = __ext4_journal_revoke(where, handle, blocknr, bh);
- if (err)
+ BUFFER_TRACE(bh, "call jbd2_journal_revoke");
+ err = jbd2_journal_revoke(handle, blocknr, bh);
+ if (err) {
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
ext4_abort(inode->i_sb, __func__,
"error %d when attempting revoke", err);
+ }
BUFFER_TRACE(bh, "exit");
return err;
}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index dc0b34a..f9fb4bb 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -116,12 +116,8 @@ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);

/*
- * Wrapper functions with which ext4 calls into JBD. The intent here is
- * to allow these to be turned into appropriate stubs so ext4 can control
- * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't
- * been done yet.
+ * Wrapper functions with which ext4 calls into JBD.
*/
-
void ext4_journal_abort_handle(const char *caller, const char *err_fn,
struct buffer_head *bh, handle_t *handle, int err);

@@ -135,10 +131,6 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
int __ext4_journal_forget(const char *where, handle_t *handle,
struct buffer_head *bh);

-/* When called with an invalid handle, this will still do a put on the BH */
-int __ext4_journal_revoke(const char *where, handle_t *handle,
- ext4_fsblk_t blocknr, struct buffer_head *bh);

2009-11-23 02:18:29

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH 1/8] ext4: move ext4_forget() to ext4_jbd2.c

The ext4_forget() function better belongs in ext4_jbd2.c. This will
allow us to do some cleanup of the ext4_journal_revoke() and
ext4_journal_forget() functions, as well as giving us better error
reporting since we can report the caller of ext4_forget() when things
go wrong.

Signed-off-by: "Theodore Ts'o" <[email protected]>
---
fs/ext4/ext4.h | 2 -
fs/ext4/ext4_jbd2.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++
fs/ext4/ext4_jbd2.h | 7 ++++++
fs/ext4/inode.c | 53 ------------------------------------------------
4 files changed, 63 insertions(+), 55 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 05ce38b..57c4e03 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1393,8 +1393,6 @@ extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
ext4_group_t, int);
/* inode.c */
-int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
- struct buffer_head *bh, ext4_fsblk_t blocknr);
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
ext4_lblk_t, int, int *);
struct buffer_head *ext4_bread(handle_t *, struct inode *,
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 6a94099..913f857 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -4,6 +4,8 @@

#include "ext4_jbd2.h"

+#include <trace/events/ext4.h>
+
int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
struct buffer_head *bh)
{
@@ -64,6 +66,60 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
return err;
}

+/*
+ * The ext4 forget function must perform a revoke if we are freeing data
+ * which has been journaled. Metadata (eg. indirect blocks) must be
+ * revoked in all cases.
+ *
+ * "bh" may be NULL: a metadata block may have been freed from memory
+ * but there may still be a record of it in the journal, and that record
+ * still needs to be revoked.
+ *
+ * If the handle isn't valid we're not journaling, but we still need to
+ * call into ext4_journal_revoke() to put the buffer head.
+ */
+int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
+ struct inode *inode, struct buffer_head *bh,
+ ext4_fsblk_t blocknr)
+{
+ int err;
+
+ might_sleep();
+
+ trace_ext4_forget(inode, is_metadata, blocknr);
+ BUFFER_TRACE(bh, "enter");
+
+ jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
+ "data mode %x\n",
+ bh, is_metadata, inode->i_mode,
+ test_opt(inode->i_sb, DATA_FLAGS));
+
+ /* Never use the revoke function if we are doing full data
+ * journaling: there is no need to, and a V1 superblock won't
+ * support it. Otherwise, only skip the revoke on un-journaled
+ * data blocks. */
+
+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
+ (!is_metadata && !ext4_should_journal_data(inode))) {
+ if (bh) {
+ BUFFER_TRACE(bh, "call jbd2_journal_forget");
+ return __ext4_journal_forget(where, handle, bh);
+ }
+ return 0;
+ }
+
+ /*
+ * data!=journal && (is_metadata || should_journal_data(inode))
+ */
+ BUFFER_TRACE(bh, "call ext4_journal_revoke");
+ err = __ext4_journal_revoke(where, handle, blocknr, bh);
+ if (err)
+ ext4_abort(inode->i_sb, __func__,
+ "error %d when attempting revoke", err);
+ BUFFER_TRACE(bh, "exit");
+ return err;
+}
+
int __ext4_journal_get_create_access(const char *where,
handle_t *handle, struct buffer_head *bh)
{
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index a286598..dc0b34a 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -139,6 +139,10 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
int __ext4_journal_revoke(const char *where, handle_t *handle,
ext4_fsblk_t blocknr, struct buffer_head *bh);

+int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
+ struct inode *inode, struct buffer_head *bh,
+ ext4_fsblk_t blocknr);
+
int __ext4_journal_get_create_access(const char *where,
handle_t *handle, struct buffer_head *bh);

@@ -151,6 +155,9 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
__ext4_journal_get_write_access(__func__, (handle), (bh))
#define ext4_journal_revoke(handle, blocknr, bh) \
__ext4_journal_revoke(__func__, (handle), (blocknr), (bh))
+#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
+ __ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\
+ (block_nr))
#define ext4_journal_get_create_access(handle, bh) \
__ext4_journal_get_create_access(__func__, (handle), (bh))
#define ext4_journal_forget(handle, bh) \
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3673ec7..fa37f95 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -71,59 +71,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
}

/*
- * The ext4 forget function must perform a revoke if we are freeing data
- * which has been journaled. Metadata (eg. indirect blocks) must be
- * revoked in all cases.
- *
- * "bh" may be NULL: a metadata block may have been freed from memory
- * but there may still be a record of it in the journal, and that record
- * still needs to be revoked.
- *
- * If the handle isn't valid we're not journaling, but we still need to
- * call into ext4_journal_revoke() to put the buffer head.
- */
-int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
- struct buffer_head *bh, ext4_fsblk_t blocknr)
-{
- int err;
-
- might_sleep();
-
- trace_ext4_forget(inode, is_metadata, blocknr);
- BUFFER_TRACE(bh, "enter");
-
- jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
- "data mode %x\n",
- bh, is_metadata, inode->i_mode,
- test_opt(inode->i_sb, DATA_FLAGS));
-
- /* Never use the revoke function if we are doing full data
- * journaling: there is no need to, and a V1 superblock won't
- * support it. Otherwise, only skip the revoke on un-journaled
- * data blocks. */
-
- if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
- (!is_metadata && !ext4_should_journal_data(inode))) {
- if (bh) {
- BUFFER_TRACE(bh, "call jbd2_journal_forget");
- return ext4_journal_forget(handle, bh);
- }
- return 0;
- }
-
- /*
- * data!=journal && (is_metadata || should_journal_data(inode))
- */
- BUFFER_TRACE(bh, "call ext4_journal_revoke");
- err = ext4_journal_revoke(handle, blocknr, bh);
- if (err)
- ext4_abort(inode->i_sb, __func__,
- "error %d when attempting revoke", err);
- BUFFER_TRACE(bh, "exit");
- return err;
-}

2009-11-23 02:18:29

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH 4/8] ext4: fold ext4_free_blocks() and ext4_mb_free_blocks()

ext4_mb_free_blocks() is only called by ext4_free_blocks(), and the
latter function doesn't really do much. So merge the two functions
together, such that ext4_free_blocks() is now found in
fs/ext4/mballoc.c. This saves about 200 bytes of compiled text space.

Signed-off-by: "Theodore Ts'o" <[email protected]>
---
fs/ext4/balloc.c | 38 --------------------------------------
fs/ext4/ext4.h | 7 +++----
fs/ext4/mballoc.c | 30 +++++++++++++++++++++++-------
3 files changed, 26 insertions(+), 49 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index f3032c9..22bc743 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -499,44 +499,6 @@ error_return:
}

/**
- * ext4_free_blocks() -- Free given blocks and update quota
- * @handle: handle for this transaction
- * @inode: inode
- * @block: start physical block to free
- * @count: number of blocks to count
- * @metadata: Are these metadata blocks
- */
-void ext4_free_blocks(handle_t *handle, struct inode *inode,
- ext4_fsblk_t block, unsigned long count,
- int metadata)
-{
- struct super_block *sb;
- unsigned long dquot_freed_blocks;
-
- /* this isn't the right place to decide whether block is metadata
- * inode.c/extents.c knows better, but for safety ... */
- if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- metadata = 1;
-
- /* We need to make sure we don't reuse
- * block released untill the transaction commit.
- * writeback mode have weak data consistency so
- * don't force data as metadata when freeing block
- * for writeback mode.
- */
- if (metadata == 0 && !ext4_should_writeback_data(inode))
- metadata = 1;
-
- sb = inode->i_sb;
-
- ext4_mb_free_blocks(handle, inode, block, count,
- metadata, &dquot_freed_blocks);
- if (dquot_freed_blocks)
- vfs_dq_free_block(inode, dquot_freed_blocks);
- return;
-}
-
-/**
* ext4_has_free_blocks()
* @sbi: in-core super block structure.
* @nblocks: number of needed blocks
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 57c4e03..210e1b5 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1325,8 +1325,6 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp);
extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
-extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
- ext4_fsblk_t block, unsigned long count, int metadata);
extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
ext4_fsblk_t block, unsigned long count);
extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
@@ -1385,8 +1383,9 @@ extern int ext4_mb_reserve_blocks(struct super_block *, int);
extern void ext4_discard_preallocations(struct inode *);
extern int __init init_ext4_mballoc(void);
extern void exit_ext4_mballoc(void);
-extern void ext4_mb_free_blocks(handle_t *, struct inode *,
- ext4_fsblk_t, unsigned long, int, unsigned long *);
+extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
+ ext4_fsblk_t block, unsigned long count,
+ int metadata);
extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc);
extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 6e5a23a..0dca90b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4427,18 +4427,24 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
return 0;
}

-/*
- * Main entry point into mballoc to free blocks
+/**
+ * ext4_free_blocks() -- Free given blocks and update quota
+ * @handle: handle for this transaction
+ * @inode: inode
+ * @block: start physical block to free
+ * @count: number of blocks to count
+ * @metadata: Are these metadata blocks
*/
-void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
- ext4_fsblk_t block, unsigned long count,
- int metadata, unsigned long *freed)
+void ext4_free_blocks(handle_t *handle, struct inode *inode,
+ ext4_fsblk_t block, unsigned long count,
+ int metadata)
{
struct buffer_head *bitmap_bh = NULL;
struct super_block *sb = inode->i_sb;
struct ext4_allocation_context *ac = NULL;
struct ext4_group_desc *gdp;
struct ext4_super_block *es;
+ unsigned long freed = 0;
unsigned int overflow;
ext4_grpblk_t bit;
struct buffer_head *gd_bh;
@@ -4448,7 +4454,15 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
int err = 0;
int ret;

- *freed = 0;
+ /*
+ * We need to make sure we don't reuse the freed block until
+ * after the transaction is committed, which we can do by
+ * treating the block as metadata, below. We make an
+ * exception if the inode is to be written in writeback mode
+ * since writeback mode has weak data consistency guarantees.
+ */
+ if (!ext4_should_writeback_data(inode))
+ metadata = 1;

sbi = EXT4_SB(sb);
es = EXT4_SB(sb)->s_es;
@@ -4577,7 +4591,7 @@ do_more:

ext4_mb_release_desc(&e4b);

- *freed += count;
+ freed += count;

/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
@@ -4597,6 +4611,8 @@ do_more:
}
sb->s_dirt = 1;
error_return:
+ if (freed)
+ vfs_dq_free_block(inode, freed);
brelse(bitmap_bh);
ext4_std_error(sb, err);
if (ac)
--
1.6.5.216.g5288a.dirty


2009-11-23 02:18:30

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH 5/8] ext4: call ext4_forget() from ext4_free_blocks()

Add the facility for ext4_forget() to be called form
ext4_free_blocks(). This simplifies the code in a large number of
places, and centralizes most of the work of calling ext4_forget() into
a single place.

Also fix a bug in the extents migration code; it wasn't calling
ext4_forget() when releasing the indirect blocks during the
conversion. As a result, if the system cashed during or shortly after
the extents migration, and the released indirect blocks get reused as
data blocks, the journal replay would corrupt the data blocks. With
this new patch, fixing this bug was as simple as adding the
EXT4_FREE_BLOCKS_FORGET flags to the call to ext4_free_blocks().

Signed-off-by: "Theodore Ts'o" <[email protected]>
Cc: "Aneesh Kumar K.V" <[email protected]>
---
fs/ext4/ext4.h | 10 +++++-
fs/ext4/extents.c | 24 ++++++---------
fs/ext4/inode.c | 67 ++++++++++++++++--------------------------
fs/ext4/mballoc.c | 49 +++++++++++++++++++++++--------
fs/ext4/migrate.c | 23 ++++++++++----
fs/ext4/xattr.c | 8 +++--
include/trace/events/ext4.h | 16 ++++++----
7 files changed, 109 insertions(+), 88 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 210e1b5..4cfc2f0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -376,6 +376,12 @@ struct ext4_new_group_data {
EXT4_GET_BLOCKS_DIO_CREATE_EXT)

/*
+ * Flags used by ext4_free_blocks
+ */
+#define EXT4_FREE_BLOCKS_METADATA 0x0001
+#define EXT4_FREE_BLOCKS_FORGET 0x0002
+
+/*
* ioctl commands
*/
#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
@@ -1384,8 +1390,8 @@ extern void ext4_discard_preallocations(struct inode *);
extern int __init init_ext4_mballoc(void);
extern void exit_ext4_mballoc(void);
extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
- ext4_fsblk_t block, unsigned long count,
- int metadata);
+ struct buffer_head *bh, ext4_fsblk_t block,
+ unsigned long count, int flags);
extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc);
extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 74dcff8..2c4a932 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1007,7 +1007,8 @@ cleanup:
for (i = 0; i < depth; i++) {
if (!ablocks[i])
continue;
- ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
+ ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
+ EXT4_FREE_BLOCKS_METADATA);
}
}
kfree(ablocks);
@@ -1957,7 +1958,6 @@ errout:
static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path)
{
- struct buffer_head *bh;
int err;
ext4_fsblk_t leaf;

@@ -1973,9 +1973,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
if (err)
return err;
ext_debug("index is empty, remove it, free block %llu\n", leaf);
- bh = sb_find_get_block(inode->i_sb, leaf);
- ext4_forget(handle, 1, inode, bh, leaf);
- ext4_free_blocks(handle, inode, leaf, 1, 1);
+ ext4_free_blocks(handle, inode, 0, leaf, 1,
+ EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
return err;
}

@@ -2042,12 +2041,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
struct ext4_extent *ex,
ext4_lblk_t from, ext4_lblk_t to)
{
- struct buffer_head *bh;
unsigned short ee_len = ext4_ext_get_actual_len(ex);
- int i, metadata = 0;
+ int flags = EXT4_FREE_BLOCKS_FORGET;

if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- metadata = 1;
+ flags |= EXT4_FREE_BLOCKS_METADATA;
#ifdef EXTENTS_STATS
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2072,11 +2070,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
num = le32_to_cpu(ex->ee_block) + ee_len - from;
start = ext_pblock(ex) + ee_len - num;
ext_debug("free last %u blocks starting %llu\n", num, start);
- for (i = 0; i < num; i++) {
- bh = sb_find_get_block(inode->i_sb, start + i);
- ext4_forget(handle, metadata, inode, bh, start + i);
- }
- ext4_free_blocks(handle, inode, start, num, metadata);
+ ext4_free_blocks(handle, inode, 0, start, num, flags);
} else if (from == le32_to_cpu(ex->ee_block)
&& to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -3319,8 +3313,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
/* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free() */
ext4_discard_preallocations(inode);
- ext4_free_blocks(handle, inode, ext_pblock(&newex),
- ext4_ext_get_actual_len(&newex), 0);
+ ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
+ ext4_ext_get_actual_len(&newex), 0);
goto out2;
}

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 72c6943..3b28e1f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -669,7 +669,7 @@ allocated:
return ret;
failed_out:
for (i = 0; i < index; i++)
- ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
+ ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
return ret;
}

@@ -765,20 +765,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
return err;
failed:
/* Allocation failed, free what we already allocated */
+ ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
for (i = 1; i <= n ; i++) {
- BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget");
/*
- * Note: is_metadata is 0 because branch[i].bh is
- * newly allocated, so there is no need to revoke the
- * block. If we do, it's harmless, but not necessary.
+ * branch[i].bh is newly allocated, so there is no
+ * need to revoke the block, which is why we don't
+ * need to set EXT4_FREE_BLOCKS_METADATA.
*/
- ext4_forget(handle, 0, inode, branch[i].bh,
- branch[i].bh->b_blocknr);
+ ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
+ EXT4_FREE_BLOCKS_FORGET);
}
- for (i = 0; i < indirect_blks; i++)
- ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
+ for (i = n+1; i < indirect_blks; i++)
+ ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);

- ext4_free_blocks(handle, inode, new_blocks[i], num, 0);
+ ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);

return err;
}
@@ -857,18 +857,16 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,

err_out:
for (i = 1; i <= num; i++) {
- BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget");
/*
- * Note: is_metadata is 0 because branch[i].bh is
- * newly allocated, so there is no need to revoke the
- * block. If we do, it's harmless, but not necessary.
+ * branch[i].bh is newly allocated, so there is no
+ * need to revoke the block, which is why we don't
+ * need to set EXT4_FREE_BLOCKS_METADATA.
*/
- ext4_forget(handle, 0, inode, where[i].bh,
- where[i].bh->b_blocknr);
- ext4_free_blocks(handle, inode,
- le32_to_cpu(where[i-1].key), 1, 0);
+ ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
+ EXT4_FREE_BLOCKS_FORGET);
}
- ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0);
+ ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
+ blks, 0);

return err;
}
@@ -4080,7 +4078,10 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
__le32 *last)
{
__le32 *p;
- int is_metadata = S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode);
+ int flags = EXT4_FREE_BLOCKS_FORGET;
+
+ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ flags |= EXT4_FREE_BLOCKS_METADATA;

if (try_to_extend_transaction(handle, inode)) {
if (bh) {
@@ -4096,27 +4097,10 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
}
}

- /*
- * Any buffers which are on the journal will be in memory. We
- * find them on the hash table so jbd2_journal_revoke() will
- * run jbd2_journal_forget() on them. We've already detached
- * each block from the file, so bforget() in
- * jbd2_journal_forget() should be safe.
- *
- * AKPM: turn on bforget in jbd2_journal_forget()!!!
- */
- for (p = first; p < last; p++) {
- u32 nr = le32_to_cpu(*p);
- if (nr) {
- struct buffer_head *tbh;
-
- *p = 0;
- tbh = sb_find_get_block(inode->i_sb, nr);
- ext4_forget(handle, is_metadata, inode, tbh, nr);
- }
- }
+ for (p = first; p < last; p++)
+ *p = 0;

- ext4_free_blocks(handle, inode, block_to_free, count, is_metadata);
+ ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
}

/**
@@ -4304,7 +4288,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
blocks_for_truncate(inode));
}

- ext4_free_blocks(handle, inode, nr, 1, 1);
+ ext4_free_blocks(handle, inode, 0, nr, 1,
+ EXT4_FREE_BLOCKS_METADATA);

if (parent_bh) {
/*
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 0dca90b..78de5d3 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4436,8 +4436,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
* @metadata: Are these metadata blocks
*/
void ext4_free_blocks(handle_t *handle, struct inode *inode,
- ext4_fsblk_t block, unsigned long count,
- int metadata)
+ struct buffer_head *bh, ext4_fsblk_t block,
+ unsigned long count, int flags)
{
struct buffer_head *bitmap_bh = NULL;
struct super_block *sb = inode->i_sb;
@@ -4454,15 +4454,12 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
int err = 0;
int ret;

- /*
- * We need to make sure we don't reuse the freed block until
- * after the transaction is committed, which we can do by
- * treating the block as metadata, below. We make an
- * exception if the inode is to be written in writeback mode
- * since writeback mode has weak data consistency guarantees.
- */
- if (!ext4_should_writeback_data(inode))
- metadata = 1;
+ if (bh) {
+ if (block)
+ BUG_ON(block != bh->b_blocknr);
+ else
+ block = bh->b_blocknr;
+ }

sbi = EXT4_SB(sb);
es = EXT4_SB(sb)->s_es;
@@ -4476,7 +4473,32 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
}

ext4_debug("freeing block %llu\n", block);
- trace_ext4_free_blocks(inode, block, count, metadata);
+ trace_ext4_free_blocks(inode, block, count, flags);
+
+ if (flags & EXT4_FREE_BLOCKS_FORGET) {
+ struct buffer_head *tbh = bh;
+ int i;
+
+ BUG_ON(bh && (count > 1));
+
+ for (i = 0; i < count; i++) {
+ if (!bh)
+ tbh = sb_find_get_block(inode->i_sb,
+ block + i);
+ ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
+ inode, tbh, block + i);
+ }
+ }
+
+ /*
+ * We need to make sure we don't reuse the freed block until
+ * after the transaction is committed, which we can do by
+ * treating the block as metadata, below. We make an
+ * exception if the inode is to be written in writeback mode
+ * since writeback mode has weak data consistency guarantees.
+ */
+ if (!ext4_should_writeback_data(inode))
+ flags |= EXT4_FREE_BLOCKS_METADATA;

ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (ac) {
@@ -4552,7 +4574,8 @@ do_more:
err = ext4_mb_load_buddy(sb, block_group, &e4b);
if (err)
goto error_return;
- if (metadata && ext4_handle_valid(handle)) {
+
+ if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
struct ext4_free_data *new_entry;
/*
* blocks being freed are metadata. these blocks shouldn't
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index a93d5b8..d641e13 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -262,13 +262,17 @@ static int free_dind_blocks(handle_t *handle,
for (i = 0; i < max_entries; i++) {
if (tmp_idata[i]) {
extend_credit_for_blkdel(handle, inode);
- ext4_free_blocks(handle, inode,
- le32_to_cpu(tmp_idata[i]), 1, 1);
+ ext4_free_blocks(handle, inode, 0,
+ le32_to_cpu(tmp_idata[i]), 1,
+ EXT4_FREE_BLOCKS_METADATA |
+ EXT4_FREE_BLOCKS_FORGET);
}
}
put_bh(bh);
extend_credit_for_blkdel(handle, inode);
- ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
+ ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
+ EXT4_FREE_BLOCKS_METADATA |
+ EXT4_FREE_BLOCKS_FORGET);
return 0;
}

@@ -297,7 +301,9 @@ static int free_tind_blocks(handle_t *handle,
}
put_bh(bh);
extend_credit_for_blkdel(handle, inode);
- ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
+ ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
+ EXT4_FREE_BLOCKS_METADATA |
+ EXT4_FREE_BLOCKS_FORGET);
return 0;
}

@@ -308,8 +314,10 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
/* ei->i_data[EXT4_IND_BLOCK] */
if (i_data[0]) {
extend_credit_for_blkdel(handle, inode);
- ext4_free_blocks(handle, inode,
- le32_to_cpu(i_data[0]), 1, 1);
+ ext4_free_blocks(handle, inode, 0,
+ le32_to_cpu(i_data[0]), 1,
+ EXT4_FREE_BLOCKS_METADATA |
+ EXT4_FREE_BLOCKS_FORGET);
}

/* ei->i_data[EXT4_DIND_BLOCK] */
@@ -419,7 +427,8 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
}
put_bh(bh);
extend_credit_for_blkdel(handle, inode);
- ext4_free_blocks(handle, inode, block, 1, 1);
+ ext4_free_blocks(handle, inode, 0, block, 1,
+ EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
return retval;
}

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 0257019..910bf9a 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -482,9 +482,10 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
ea_bdebug(bh, "refcount now=0; freeing");
if (ce)
mb_cache_entry_free(ce);
- ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
get_bh(bh);
- ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
+ ext4_free_blocks(handle, inode, bh, 0, 1,
+ EXT4_FREE_BLOCKS_METADATA |
+ EXT4_FREE_BLOCKS_FORGET);
} else {
le32_add_cpu(&BHDR(bh)->h_refcount, -1);
error = ext4_handle_dirty_metadata(handle, inode, bh);
@@ -832,7 +833,8 @@ inserted:
new_bh = sb_getblk(sb, block);
if (!new_bh) {
getblk_failed:
- ext4_free_blocks(handle, inode, block, 1, 1);
+ ext4_free_blocks(handle, inode, 0, block, 1,
+ EXT4_FREE_BLOCKS_METADATA);
error = -EIO;
goto cleanup;
}
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index b390e1f..74f628b 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -650,30 +650,32 @@ TRACE_EVENT(ext4_allocate_blocks,

TRACE_EVENT(ext4_free_blocks,
TP_PROTO(struct inode *inode, __u64 block, unsigned long count,
- int metadata),
+ int flags),

- TP_ARGS(inode, block, count, metadata),
+ TP_ARGS(inode, block, count, flags),

TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
+ __field( umode_t, mode )
__field( __u64, block )
__field( unsigned long, count )
- __field( int, metadata )
-
+ __field( int, flags )
),

TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
+ __entry->mode = inode->i_mode;
__entry->block = block;
__entry->count = count;
- __entry->metadata = metadata;
+ __entry->flags = flags;
),

- TP_printk("dev %s ino %lu block %llu count %lu metadata %d",
+ TP_printk("dev %s ino %lu mode 0%o block %llu count %lu flags %d",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
- __entry->block, __entry->count, __entry->metadata)
+ __entry->mode, __entry->block, __entry->count,
+ __entry->flags)
);

TRACE_EVENT(ext4_sync_file,
--
1.6.5.216.g5288a.dirty


2009-11-23 03:53:16

by Eric Sandeen

[permalink] [raw]
Subject: Re: [PATCH 0/8] Clean up ext4's block free code paths

Theodore Ts'o wrote:
> This patch series cleans up functions involved with freeing blocks. It
> removes functions that are only called by a single caller and folds them
> into their caller, and makes the code a little easier to read. It also
> reduces ext4's code foot print slightly (by 136 bytes).
>
> Finally, it fixes a bug in the extents migration function, which was
> missing calls to ext4_forget() when it frees the indirect blocks after
> converting an inode to using extents. This avoids potential file system
> corruption after a crash while converting an entire filesystem to use
> extents via "chattr -R +e /mntpt".

Have you double-checked stack usage before & after the series, just
in case all the folding-in increased some stack footprints?

Probably ok, but worth a double-check.

-Eric

> - Ted
>
> Theodore Ts'o (8):
> ext4: move ext4_forget() to ext4_jbd2.c
> ext4: fold ext4_journal_revoke() into ext4_forget()
> ext4: fold ext4_journal_forget() into ext4_forget()
> ext4: fold ext4_free_blocks() and ext4_mb_free_blocks()
> ext4: call ext4_forget() from ext4_free_blocks()
> ext4: print i_mode in octal in ext4 tracepoints
> ext4: add check for wraparound in ext4_data_block_valid()
> ext4: use ext4_data_block_valid() in ext4_free_blocks()
>
> fs/ext4/balloc.c | 38 --------------
> fs/ext4/block_validity.c | 1 +
> fs/ext4/ext4.h | 15 +++--
> fs/ext4/ext4_jbd2.c | 82 ++++++++++++++++++++++---------
> fs/ext4/ext4_jbd2.h | 23 +++------
> fs/ext4/extents.c | 24 +++------
> fs/ext4/inode.c | 116 +++++++++++--------------------------------
> fs/ext4/mballoc.c | 61 ++++++++++++++++++-----
> fs/ext4/migrate.c | 23 ++++++---
> fs/ext4/xattr.c | 8 ++-
> include/trace/events/ext4.h | 24 +++++----
> 11 files changed, 198 insertions(+), 217 deletions(-)
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html


2009-11-23 14:46:27

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 0/8] Clean up ext4's block free code paths

On Sun, Nov 22, 2009 at 09:53:16PM -0600, Eric Sandeen wrote:
>
> Have you double-checked stack usage before & after the series, just
> in case all the folding-in increased some stack footprints?

The static stack footprints (on an x86) showed slight increases:

Before:
ext4_mb_free_blocks [vmlinux]: 124
ext4_ext_truncate [vmlinux]: 100

After applying the patch series:

ext4_free_blocks [vmlinux]: 136
ext4_ext_truncate [vmlinux]: 116

I was more concerned about the dynamic stack usage, so I ran xfstests
QA and then re-running test #74 (fstest), which seems to be the one
that uses the most stack. The results are not fully consistent (which
is why I manually re-ran #74 a few times to try to provoke the
smallest possible stack space left), but the worse case stack usage I
was able to find was:

Before:

fstest used greatest stack depth: 1084 bytes left

After applying the patch series:

fstest used greatest stack depth: 1024 bytes left

So it's slightly worse, but hopefully not enough to push us over the
edge. I think I can move some stack variables into inner blocks in
ext4_free_blocks() which should help, if we think this is a major
problem.

- Ted

2009-11-23 19:22:19

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH 5/8] ext4: call ext4_forget() from ext4_free_blocks()

On 2009-11-22, at 19:18, Theodore Ts'o wrote:
> Add the facility for ext4_forget() to be called form
> ext4_free_blocks(). This simplifies the code in a large number of
> places, and centralizes most of the work of calling ext4_forget() into
> a single place.
>
> Also fix a bug in the extents migration code; it wasn't calling
> ext4_forget() when releasing the indirect blocks during the
> conversion.

Would this also solve Curt's problem, mentioned in "Bug in extent
zeroout: blocks not marked as new" where bforget was not being called
when the blocks are freed?

> As a result, if the system cashed during or shortly after the
> extents migration, and the released indirect blocks get reused as
> data blocks, the journal replay would corrupt the data blocks.
> With this new patch, fixing this bug was as simple as adding the
> EXT4_FREE_BLOCKS_FORGET flags to the call to ext4_free_blocks().
>
> Signed-off-by: "Theodore Ts'o" <[email protected]>
> Cc: "Aneesh Kumar K.V" <[email protected]>
> ---
> fs/ext4/ext4.h | 10 +++++-
> fs/ext4/extents.c | 24 ++++++---------
> fs/ext4/inode.c | 67 +++++++++++++++
> +--------------------------
> fs/ext4/mballoc.c | 49 +++++++++++++++++++++++--------
> fs/ext4/migrate.c | 23 ++++++++++----
> fs/ext4/xattr.c | 8 +++--
> include/trace/events/ext4.h | 16 ++++++----
> 7 files changed, 109 insertions(+), 88 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 210e1b5..4cfc2f0 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -376,6 +376,12 @@ struct ext4_new_group_data {
> EXT4_GET_BLOCKS_DIO_CREATE_EXT)
>
> /*
> + * Flags used by ext4_free_blocks
> + */
> +#define EXT4_FREE_BLOCKS_METADATA 0x0001
> +#define EXT4_FREE_BLOCKS_FORGET 0x0002
> +
> +/*
> * ioctl commands
> */
> #define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
> @@ -1384,8 +1390,8 @@ extern void ext4_discard_preallocations(struct
> inode *);
> extern int __init init_ext4_mballoc(void);
> extern void exit_ext4_mballoc(void);
> extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
> - ext4_fsblk_t block, unsigned long count,
> - int metadata);
> + struct buffer_head *bh, ext4_fsblk_t block,
> + unsigned long count, int flags);
> extern int ext4_mb_add_groupinfo(struct super_block *sb,
> ext4_group_t i, struct ext4_group_desc *desc);
> extern int ext4_mb_get_buddy_cache_lock(struct super_block *,
> ext4_group_t);
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 74dcff8..2c4a932 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -1007,7 +1007,8 @@ cleanup:
> for (i = 0; i < depth; i++) {
> if (!ablocks[i])
> continue;
> - ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
> + ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
> + EXT4_FREE_BLOCKS_METADATA);
> }
> }
> kfree(ablocks);
> @@ -1957,7 +1958,6 @@ errout:
> static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
> struct ext4_ext_path *path)
> {
> - struct buffer_head *bh;
> int err;
> ext4_fsblk_t leaf;
>
> @@ -1973,9 +1973,8 @@ static int ext4_ext_rm_idx(handle_t *handle,
> struct inode *inode,
> if (err)
> return err;
> ext_debug("index is empty, remove it, free block %llu\n", leaf);
> - bh = sb_find_get_block(inode->i_sb, leaf);
> - ext4_forget(handle, 1, inode, bh, leaf);
> - ext4_free_blocks(handle, inode, leaf, 1, 1);
> + ext4_free_blocks(handle, inode, 0, leaf, 1,
> + EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
> return err;
> }
>
> @@ -2042,12 +2041,11 @@ static int ext4_remove_blocks(handle_t
> *handle, struct inode *inode,
> struct ext4_extent *ex,
> ext4_lblk_t from, ext4_lblk_t to)
> {
> - struct buffer_head *bh;
> unsigned short ee_len = ext4_ext_get_actual_len(ex);
> - int i, metadata = 0;
> + int flags = EXT4_FREE_BLOCKS_FORGET;
>
> if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
> - metadata = 1;
> + flags |= EXT4_FREE_BLOCKS_METADATA;
> #ifdef EXTENTS_STATS
> {
> struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> @@ -2072,11 +2070,7 @@ static int ext4_remove_blocks(handle_t
> *handle, struct inode *inode,
> num = le32_to_cpu(ex->ee_block) + ee_len - from;
> start = ext_pblock(ex) + ee_len - num;
> ext_debug("free last %u blocks starting %llu\n", num, start);
> - for (i = 0; i < num; i++) {
> - bh = sb_find_get_block(inode->i_sb, start + i);
> - ext4_forget(handle, metadata, inode, bh, start + i);
> - }
> - ext4_free_blocks(handle, inode, start, num, metadata);
> + ext4_free_blocks(handle, inode, 0, start, num, flags);
> } else if (from == le32_to_cpu(ex->ee_block)
> && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
> printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
> @@ -3319,8 +3313,8 @@ int ext4_ext_get_blocks(handle_t *handle,
> struct inode *inode,
> /* not a good idea to call discard here directly,
> * but otherwise we'd need to call it every free() */
> ext4_discard_preallocations(inode);
> - ext4_free_blocks(handle, inode, ext_pblock(&newex),
> - ext4_ext_get_actual_len(&newex), 0);
> + ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
> + ext4_ext_get_actual_len(&newex), 0);
> goto out2;
> }
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 72c6943..3b28e1f 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -669,7 +669,7 @@ allocated:
> return ret;
> failed_out:
> for (i = 0; i < index; i++)
> - ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
> + ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
> return ret;
> }
>
> @@ -765,20 +765,20 @@ static int ext4_alloc_branch(handle_t *handle,
> struct inode *inode,
> return err;
> failed:
> /* Allocation failed, free what we already allocated */
> + ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
> for (i = 1; i <= n ; i++) {
> - BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget");
> /*
> - * Note: is_metadata is 0 because branch[i].bh is
> - * newly allocated, so there is no need to revoke the
> - * block. If we do, it's harmless, but not necessary.
> + * branch[i].bh is newly allocated, so there is no
> + * need to revoke the block, which is why we don't
> + * need to set EXT4_FREE_BLOCKS_METADATA.
> */
> - ext4_forget(handle, 0, inode, branch[i].bh,
> - branch[i].bh->b_blocknr);
> + ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
> + EXT4_FREE_BLOCKS_FORGET);
> }
> - for (i = 0; i < indirect_blks; i++)
> - ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
> + for (i = n+1; i < indirect_blks; i++)
> + ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
>
> - ext4_free_blocks(handle, inode, new_blocks[i], num, 0);
> + ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
>
> return err;
> }
> @@ -857,18 +857,16 @@ static int ext4_splice_branch(handle_t
> *handle, struct inode *inode,
>
> err_out:
> for (i = 1; i <= num; i++) {
> - BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget");
> /*
> - * Note: is_metadata is 0 because branch[i].bh is
> - * newly allocated, so there is no need to revoke the
> - * block. If we do, it's harmless, but not necessary.
> + * branch[i].bh is newly allocated, so there is no
> + * need to revoke the block, which is why we don't
> + * need to set EXT4_FREE_BLOCKS_METADATA.
> */
> - ext4_forget(handle, 0, inode, where[i].bh,
> - where[i].bh->b_blocknr);
> - ext4_free_blocks(handle, inode,
> - le32_to_cpu(where[i-1].key), 1, 0);
> + ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
> + EXT4_FREE_BLOCKS_FORGET);
> }
> - ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks,
> 0);
> + ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
> + blks, 0);
>
> return err;
> }
> @@ -4080,7 +4078,10 @@ static void ext4_clear_blocks(handle_t
> *handle, struct inode *inode,
> __le32 *last)
> {
> __le32 *p;
> - int is_metadata = S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode);
> + int flags = EXT4_FREE_BLOCKS_FORGET;
> +
> + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
> + flags |= EXT4_FREE_BLOCKS_METADATA;
>
> if (try_to_extend_transaction(handle, inode)) {
> if (bh) {
> @@ -4096,27 +4097,10 @@ static void ext4_clear_blocks(handle_t
> *handle, struct inode *inode,
> }
> }
>
> - /*
> - * Any buffers which are on the journal will be in memory. We
> - * find them on the hash table so jbd2_journal_revoke() will
> - * run jbd2_journal_forget() on them. We've already detached
> - * each block from the file, so bforget() in
> - * jbd2_journal_forget() should be safe.
> - *
> - * AKPM: turn on bforget in jbd2_journal_forget()!!!
> - */
> - for (p = first; p < last; p++) {
> - u32 nr = le32_to_cpu(*p);
> - if (nr) {
> - struct buffer_head *tbh;
> -
> - *p = 0;
> - tbh = sb_find_get_block(inode->i_sb, nr);
> - ext4_forget(handle, is_metadata, inode, tbh, nr);
> - }
> - }
> + for (p = first; p < last; p++)
> + *p = 0;
>
> - ext4_free_blocks(handle, inode, block_to_free, count, is_metadata);
> + ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
> }
>
> /**
> @@ -4304,7 +4288,8 @@ static void ext4_free_branches(handle_t
> *handle, struct inode *inode,
> blocks_for_truncate(inode));
> }
>
> - ext4_free_blocks(handle, inode, nr, 1, 1);
> + ext4_free_blocks(handle, inode, 0, nr, 1,
> + EXT4_FREE_BLOCKS_METADATA);
>
> if (parent_bh) {
> /*
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 0dca90b..78de5d3 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -4436,8 +4436,8 @@ ext4_mb_free_metadata(handle_t *handle, struct
> ext4_buddy *e4b,
> * @metadata: Are these metadata blocks
> */
> void ext4_free_blocks(handle_t *handle, struct inode *inode,
> - ext4_fsblk_t block, unsigned long count,
> - int metadata)
> + struct buffer_head *bh, ext4_fsblk_t block,
> + unsigned long count, int flags)
> {
> struct buffer_head *bitmap_bh = NULL;
> struct super_block *sb = inode->i_sb;
> @@ -4454,15 +4454,12 @@ void ext4_free_blocks(handle_t *handle,
> struct inode *inode,
> int err = 0;
> int ret;
>
> - /*
> - * We need to make sure we don't reuse the freed block until
> - * after the transaction is committed, which we can do by
> - * treating the block as metadata, below. We make an
> - * exception if the inode is to be written in writeback mode
> - * since writeback mode has weak data consistency guarantees.
> - */
> - if (!ext4_should_writeback_data(inode))
> - metadata = 1;
> + if (bh) {
> + if (block)
> + BUG_ON(block != bh->b_blocknr);
> + else
> + block = bh->b_blocknr;
> + }
>
> sbi = EXT4_SB(sb);
> es = EXT4_SB(sb)->s_es;
> @@ -4476,7 +4473,32 @@ void ext4_free_blocks(handle_t *handle,
> struct inode *inode,
> }
>
> ext4_debug("freeing block %llu\n", block);
> - trace_ext4_free_blocks(inode, block, count, metadata);
> + trace_ext4_free_blocks(inode, block, count, flags);
> +
> + if (flags & EXT4_FREE_BLOCKS_FORGET) {
> + struct buffer_head *tbh = bh;
> + int i;
> +
> + BUG_ON(bh && (count > 1));
> +
> + for (i = 0; i < count; i++) {
> + if (!bh)
> + tbh = sb_find_get_block(inode->i_sb,
> + block + i);
> + ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
> + inode, tbh, block + i);
> + }
> + }
> +
> + /*
> + * We need to make sure we don't reuse the freed block until
> + * after the transaction is committed, which we can do by
> + * treating the block as metadata, below. We make an
> + * exception if the inode is to be written in writeback mode
> + * since writeback mode has weak data consistency guarantees.
> + */
> + if (!ext4_should_writeback_data(inode))
> + flags |= EXT4_FREE_BLOCKS_METADATA;
>
> ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
> if (ac) {
> @@ -4552,7 +4574,8 @@ do_more:
> err = ext4_mb_load_buddy(sb, block_group, &e4b);
> if (err)
> goto error_return;
> - if (metadata && ext4_handle_valid(handle)) {
> +
> + if ((flags & EXT4_FREE_BLOCKS_METADATA) &&
> ext4_handle_valid(handle)) {
> struct ext4_free_data *new_entry;
> /*
> * blocks being freed are metadata. these blocks shouldn't
> diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
> index a93d5b8..d641e13 100644
> --- a/fs/ext4/migrate.c
> +++ b/fs/ext4/migrate.c
> @@ -262,13 +262,17 @@ static int free_dind_blocks(handle_t *handle,
> for (i = 0; i < max_entries; i++) {
> if (tmp_idata[i]) {
> extend_credit_for_blkdel(handle, inode);
> - ext4_free_blocks(handle, inode,
> - le32_to_cpu(tmp_idata[i]), 1, 1);
> + ext4_free_blocks(handle, inode, 0,
> + le32_to_cpu(tmp_idata[i]), 1,
> + EXT4_FREE_BLOCKS_METADATA |
> + EXT4_FREE_BLOCKS_FORGET);
> }
> }
> put_bh(bh);
> extend_credit_for_blkdel(handle, inode);
> - ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
> + ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
> + EXT4_FREE_BLOCKS_METADATA |
> + EXT4_FREE_BLOCKS_FORGET);
> return 0;
> }
>
> @@ -297,7 +301,9 @@ static int free_tind_blocks(handle_t *handle,
> }
> put_bh(bh);
> extend_credit_for_blkdel(handle, inode);
> - ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
> + ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
> + EXT4_FREE_BLOCKS_METADATA |
> + EXT4_FREE_BLOCKS_FORGET);
> return 0;
> }
>
> @@ -308,8 +314,10 @@ static int free_ind_block(handle_t *handle,
> struct inode *inode, __le32 *i_data)
> /* ei->i_data[EXT4_IND_BLOCK] */
> if (i_data[0]) {
> extend_credit_for_blkdel(handle, inode);
> - ext4_free_blocks(handle, inode,
> - le32_to_cpu(i_data[0]), 1, 1);
> + ext4_free_blocks(handle, inode, 0,
> + le32_to_cpu(i_data[0]), 1,
> + EXT4_FREE_BLOCKS_METADATA |
> + EXT4_FREE_BLOCKS_FORGET);
> }
>
> /* ei->i_data[EXT4_DIND_BLOCK] */
> @@ -419,7 +427,8 @@ static int free_ext_idx(handle_t *handle, struct
> inode *inode,
> }
> put_bh(bh);
> extend_credit_for_blkdel(handle, inode);
> - ext4_free_blocks(handle, inode, block, 1, 1);
> + ext4_free_blocks(handle, inode, 0, block, 1,
> + EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
> return retval;
> }
>
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index 0257019..910bf9a 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -482,9 +482,10 @@ ext4_xattr_release_block(handle_t *handle,
> struct inode *inode,
> ea_bdebug(bh, "refcount now=0; freeing");
> if (ce)
> mb_cache_entry_free(ce);
> - ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
> get_bh(bh);
> - ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
> + ext4_free_blocks(handle, inode, bh, 0, 1,
> + EXT4_FREE_BLOCKS_METADATA |
> + EXT4_FREE_BLOCKS_FORGET);
> } else {
> le32_add_cpu(&BHDR(bh)->h_refcount, -1);
> error = ext4_handle_dirty_metadata(handle, inode, bh);
> @@ -832,7 +833,8 @@ inserted:
> new_bh = sb_getblk(sb, block);
> if (!new_bh) {
> getblk_failed:
> - ext4_free_blocks(handle, inode, block, 1, 1);
> + ext4_free_blocks(handle, inode, 0, block, 1,
> + EXT4_FREE_BLOCKS_METADATA);
> error = -EIO;
> goto cleanup;
> }
> diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
> index b390e1f..74f628b 100644
> --- a/include/trace/events/ext4.h
> +++ b/include/trace/events/ext4.h
> @@ -650,30 +650,32 @@ TRACE_EVENT(ext4_allocate_blocks,
>
> TRACE_EVENT(ext4_free_blocks,
> TP_PROTO(struct inode *inode, __u64 block, unsigned long count,
> - int metadata),
> + int flags),
>
> - TP_ARGS(inode, block, count, metadata),
> + TP_ARGS(inode, block, count, flags),
>
> TP_STRUCT__entry(
> __field( dev_t, dev )
> __field( ino_t, ino )
> + __field( umode_t, mode )
> __field( __u64, block )
> __field( unsigned long, count )
> - __field( int, metadata )
> -
> + __field( int, flags )
> ),
>
> TP_fast_assign(
> __entry->dev = inode->i_sb->s_dev;
> __entry->ino = inode->i_ino;
> + __entry->mode = inode->i_mode;
> __entry->block = block;
> __entry->count = count;
> - __entry->metadata = metadata;
> + __entry->flags = flags;
> ),
>
> - TP_printk("dev %s ino %lu block %llu count %lu metadata %d",
> + TP_printk("dev %s ino %lu mode 0%o block %llu count %lu flags %d",
> jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
> - __entry->block, __entry->count, __entry->metadata)
> + __entry->mode, __entry->block, __entry->count,
> + __entry->flags)
> );
>
> TRACE_EVENT(ext4_sync_file,
> --
> 1.6.5.216.g5288a.dirty


Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.


2009-11-23 20:28:53

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 5/8] ext4: call ext4_forget() from ext4_free_blocks()

On Mon, Nov 23, 2009 at 12:22:29PM -0700, Andreas Dilger wrote:
> >Also fix a bug in the extents migration code; it wasn't calling
> >ext4_forget() when releasing the indirect blocks during the
> >conversion.
>
> Would this also solve Curt's problem, mentioned in "Bug in extent
> zeroout: blocks not marked as new" where bforget was not being
> called when the blocks are freed?

No, Curt was referring to the extents code which convert uninitialized
to initialized extents; the code I was referring to is the code which
migrates direct/indirect-mapped inodes to extent-mapped inodes.

- Ted


2010-06-27 02:40:25

by Amir G.

[permalink] [raw]
Subject: Re: [PATCH 5/8] ext4: call ext4_forget() from ext4_free_blocks()

Hi Ted,

There is a lurking bug that needs to be fixed in ext4_free_branches().
it can cause double freeing of blocks after crash.
please see bug description below.

Amir.

On Wed, Jun 23, 2010 at 10:01 PM, Amir G.
<[email protected]> wrote:
> Hi,
>
> We have experienced bitmap inconsistencies after crash during file
> delete under heavy load.
> The crash is not file system related and I the following patch in
> ext4_free_branches() fixes the recovery problem.
>
> If the transaction is restarted and there is a crash before the new
> transaction is committed,
> then after recovery, the blocks that this indirect block points to
> have been freed, but the indirect block itself
> has not been freed and may still point to some of the free blocks
> (because of the ext4_forget()).
>
> So ext4_forget() should be called inside ext4_free_blocks() to avoid
> this problem.
> Are there any consequences to this patch that I am not aware of?
>
> Amir.
>
> Signed-off-by: Amir Goldstein <[email protected]>
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 42272d6..682e2fa 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4458,6 +4458,7 @@ static void ext4_free_branches(handle_t *handle,
> struct inode *inode,
> ?{
> ? ? ? ?ext4_fsblk_t nr;
> ? ? ? ?__le32 *p;
> + ? ? ? int ? ? flags;
>
> ? ? ? ?if (ext4_handle_is_aborted(handle))
> ? ? ? ? ? ? ? ?return;
> @@ -4520,7 +4521,7 @@ static void ext4_free_branches(handle_t *handle,
> struct inode *inode,
> ? ? ? ? ? ? ? ? ? ? ? ? * revoke records must be emitted *before* clearing
> ? ? ? ? ? ? ? ? ? ? ? ? * this block's bit in the bitmaps.
> ? ? ? ? ? ? ? ? ? ? ? ? */
> - ? ? ? ? ? ? ? ? ? ? ? ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
> + ? ? ? ? ? ? ? ? ? ? ? flags =
> EXT4_FREE_BLOCKS_METADATA|EXT4_FREE_BLOCKS_FORGET;
>
> ? ? ? ? ? ? ? ? ? ? ? ?/*
> ? ? ? ? ? ? ? ? ? ? ? ? * Everything below this this pointer has been
> @@ -4546,8 +4547,7 @@ static void ext4_free_branches(handle_t *handle,
> struct inode *inode,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?blocks_for_truncate(inode));
> ? ? ? ? ? ? ? ? ? ? ? ?}
>
> - ? ? ? ? ? ? ? ? ? ? ? ext4_free_blocks(handle, inode, 0, nr, 1,
> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?EXT4_FREE_BLOCKS_METADATA);
> + ? ? ? ? ? ? ? ? ? ? ? ext4_free_blocks(handle, inode, 0, nr, 1, flags);
>
> ? ? ? ? ? ? ? ? ? ? ? ?if (parent_bh) {
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?/*
>

2010-07-23 13:07:27

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 5/8] ext4: call ext4_forget() from ext4_free_blocks()

On Sun, Jun 27, 2010 at 05:40:23AM +0300, Amir G. wrote:
> Hi Ted,
>
> There is a lurking bug that needs to be fixed in ext4_free_branches().
> it can cause double freeing of blocks after crash.
> please see bug description below.

Sorry, I've been travelling a lot lately and I'm a bit behind on
processing patch submissions. I'm going to try to catch up over the
next week or so in preparation for the next merge window. I'll send
you an update on this (and hopefully your Next3 e2fsprogs patch
submissions, although that doesn't have the same urgency due to
Linus's upcoming merge window for kernel patches) soon.

- Ted