LinuxLists.cc - [PATCH v2 1/3] ext4: Fix missing acl release in error path in acl.c

[permalink] [raw]

Subject: [PATCH v2 2/3] jbd2: Add extra parameter in start_this_handle() to control allocation flags.

changes from v1 -> v2 :
*) Update start_this_handle to take extra parameter to specify whether
to retry the allocation or not.
*) Added jbd allocation flags for callers to control the transaction allocation
behavior. Callers can pass JBD2_TOPLEVEL if allocation needs to be done using GFP_KERNEL.

Pass extra flags in journal routines to specify if its ok to
fail in the journal transaction allocation. Passing JBD2_FAIL_OK means caller is
ok with journal start failures and can handle ENOMEM.

Update ocfs2 and ext4 routines to pass JBD2_NO_FAIL for the updated journal
interface by default, to retain the existing behavior.

Signed-off-by: Manish Katiyar <[email protected]>
---
fs/ext4/ext4_jbd2.h | 2 +-
fs/ext4/super.c | 2 +-
fs/jbd2/transaction.c | 44 ++++++++++++++++----------------------------
fs/ocfs2/journal.c | 8 ++++----
include/linux/jbd2.h | 13 +++++++++----
5 files changed, 31 insertions(+), 38 deletions(-)

diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index bb85757..14e6599 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -220,7 +220,7 @@ static inline int ext4_journal_extend(handle_t *handle, int nblocks)
static inline int ext4_journal_restart(handle_t *handle, int nblocks)
{
if (ext4_handle_valid(handle))
- return jbd2_journal_restart(handle, nblocks);
+ return jbd2_journal_restart(handle, nblocks, JBD2_NO_FAIL);
return 0;
}

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d9937df..aa842f3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -295,7 +295,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
ext4_abort(sb, "Detected aborted journal");
return ERR_PTR(-EROFS);
}
- return jbd2_journal_start(journal, nblocks);
+ return jbd2_journal_start(journal, nblocks, JBD2_NO_FAIL);
}

/*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 3eec82d..7f53589 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -106,6 +106,11 @@ static inline void update_t_max_wait(transaction_t *transaction,
#endif
}

+static inline int __jbd_flags_to_mask(int jbd_alloc_flags)
+{
+ return (jbd_alloc_flags & JBD2_TOPLEVEL) ? GFP_KERNEL : GFP_NOFS;
+}
+
/*
* start_this_handle: Given a handle, deal with any locking or stalling
* needed to make sure that there is enough journal space for the handle
@@ -114,13 +119,14 @@ static inline void update_t_max_wait(transaction_t *transaction,
*/

static int start_this_handle(journal_t *journal, handle_t *handle,
- int gfp_mask)
+ int jbd_alloc_flags)
{
transaction_t *transaction, *new_transaction = NULL;
tid_t tid;
int needed, need_to_start;
int nblocks = handle->h_buffer_credits;
unsigned long ts = jiffies;
+ int gfp_mask = __jbd_flags_to_mask(jbd_alloc_flags);

if (nblocks > journal->j_max_transaction_buffers) {
printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
@@ -133,14 +139,7 @@ alloc_transaction:
if (!journal->j_running_transaction) {
new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask);
if (!new_transaction) {
- /*
- * If __GFP_FS is not present, then we may be
- * being called from inside the fs writeback
- * layer, so we MUST NOT fail. Since
- * __GFP_NOFAIL is going away, we will arrange
- * to retry the allocation ourselves.
- */
- if ((gfp_mask & __GFP_FS) == 0) {
+ if (jbd_alloc_flags & JBD2_NO_FAIL) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto alloc_transaction;
}
@@ -308,6 +307,7 @@ static handle_t *new_handle(int nblocks)
* handle_t *jbd2_journal_start() - Obtain a new handle.
* @journal: Journal to start transaction on.
* @nblocks: number of block buffer we might modify
+ * @jbd_alloc_flags: jbd allocation flags for transaction.
*
* We make sure that the transaction can guarantee at least nblocks of
* modified buffers in the log. We block until the log can guarantee
@@ -319,7 +319,8 @@ static handle_t *new_handle(int nblocks)
* Return a pointer to a newly allocated handle, or an ERR_PTR() value
* on failure.
*/
-handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
+handle_t *jbd2_journal_start(journal_t *journal,
+ int nblocks, int jbd_alloc_flags)
{
handle_t *handle = journal_current_handle();
int err;
@@ -339,7 +340,7 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)

current->journal_info = handle;

- err = start_this_handle(journal, handle, gfp_mask);
+ err = start_this_handle(journal, handle, jbd_alloc_flags);
if (err < 0) {
jbd2_free_handle(handle);
current->journal_info = NULL;
@@ -347,13 +348,6 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
}
return handle;
}
-EXPORT_SYMBOL(jbd2__journal_start);
-
-
-handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
-{
- return jbd2__journal_start(journal, nblocks, GFP_NOFS);
-}
EXPORT_SYMBOL(jbd2_journal_start);

@@ -442,7 +436,8 @@ out:
* transaction capabable of guaranteeing the requested number of
* credits.
*/
-int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
+int jbd2_journal_restart(handle_t *handle,
+ int nblocks, int jbd_alloc_flags)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal;
@@ -478,16 +473,9 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)

lock_map_release(&handle->h_lockdep_map);
handle->h_buffer_credits = nblocks;
- ret = start_this_handle(journal, handle, gfp_mask);
+ ret = start_this_handle(journal, handle, jbd_alloc_flags);
return ret;
}
-EXPORT_SYMBOL(jbd2__journal_restart);
-
-
-int jbd2_journal_restart(handle_t *handle, int nblocks)
-{
- return jbd2__journal_restart(handle, nblocks, GFP_NOFS);
-}
EXPORT_SYMBOL(jbd2_journal_restart);

/**
@@ -1437,7 +1425,7 @@ int jbd2_journal_force_commit(journal_t *journal)
handle_t *handle;
int ret;

- handle = jbd2_journal_start(journal, 1);
+ handle = jbd2_journal_start(journal, 1, JBD2_NO_FAIL);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
} else {
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index b141a44..a784a64 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -353,11 +353,11 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)

/* Nested transaction? Just return the handle... */
if (journal_current_handle())
- return jbd2_journal_start(journal, max_buffs);
+ return jbd2_journal_start(journal, max_buffs, JBD2_NO_FAIL);

down_read(&osb->journal->j_trans_barrier);

- handle = jbd2_journal_start(journal, max_buffs);
+ handle = jbd2_journal_start(journal, max_buffs, JBD2_NO_FAIL);
if (IS_ERR(handle)) {
up_read(&osb->journal->j_trans_barrier);

@@ -437,8 +437,8 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)

if (status > 0) {
trace_ocfs2_extend_trans_restart(old_nblocks + nblocks);
- status = jbd2_journal_restart(handle,
- old_nblocks + nblocks);
+ status = jbd2_journal_restart(handle, old_nblocks + nblocks,
+ JBD2_NO_FAIL);
if (status < 0) {
mlog_errno(status);
goto bail;
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 4ecb7b1..c11181b 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1106,10 +1106,10 @@ static inline handle_t *journal_current_handle(void)
* Register buffer modifications against the current transaction.
*/

-extern handle_t *jbd2_journal_start(journal_t *, int nblocks);
-extern handle_t *jbd2__journal_start(journal_t *, int nblocks, int gfp_mask);
-extern int jbd2_journal_restart(handle_t *, int nblocks);
-extern int jbd2__journal_restart(handle_t *, int nblocks, int gfp_mask);
+extern handle_t *jbd2_journal_start(journal_t *,
+ int nblocks, int jbd_alloc_flags);
+extern int jbd2_journal_restart(handle_t *,
+ int nblocks, int jbd_alloc_flags);
extern int jbd2_journal_extend (handle_t *, int nblocks);
extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
@@ -1322,6 +1322,11 @@ static inline int jbd_space_needed(journal_t *journal)

extern int jbd_blocks_per_page(struct inode *inode);

+/* JBD2 transaction allocation flags */
+#define JBD2_NO_FAIL 0x00000001
+#define JBD2_FAIL_OK 0x00000002
+#define JBD2_TOPLEVEL 0x00000004
+
#ifdef __KERNEL__

#define buffer_trace_init(bh) do {} while (0)
--
1.7.4.1

2011-05-28 06:22:05

[permalink] [raw]

Subject: [PATCH v2 3/3] ext4: Fix ext4 to use pass jbd allocation flags if they can handle ENOMEM.

This patch updates ext4 code to pass appropriate transaction
allocation flags to callers who are safe to deal with ENOMEM.
Pass JBD2_TOPLEVEL from the callers who are safe to do allocations
with GFP_KERNEL instead of GFP_NOFS.

Signed-off-by: Manish Katiyar <[email protected]>
---
fs/ext4/acl.c | 6 ++++--
fs/ext4/ext4_jbd2.h | 8 +++++---
fs/ext4/extents.c | 11 ++++++-----
fs/ext4/ialloc.c | 2 +-
fs/ext4/inode.c | 40 ++++++++++++++++++++++++----------------
fs/ext4/ioctl.c | 6 ++++--
fs/ext4/migrate.c | 10 +++++-----
fs/ext4/move_extent.c | 3 ++-
fs/ext4/namei.c | 47 ++++++++++++++++++++++++++++++-----------------
fs/ext4/resize.c | 8 ++++----
fs/ext4/super.c | 18 +++++++++++-------
fs/ext4/xattr.c | 3 ++-
12 files changed, 98 insertions(+), 64 deletions(-)

diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 93dc9a6..95db569 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -351,7 +351,8 @@ ext4_acl_chmod(struct inode *inode)

retry:
handle = ext4_journal_start(inode,
- EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
+ EXT4_DATA_TRANS_BLOCKS(inode->i_sb),
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
if (error != -ENOMEM)
@@ -450,7 +451,8 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
acl = NULL;

retry:
- handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
+ handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb),
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
goto release_and_out;
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 14e6599..55084c2 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -156,7 +156,8 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
#define ext4_handle_dirty_super(handle, sb) \
__ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))

-handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
+handle_t *ext4_journal_start_sb(struct super_block *sb,
+ int nblocks, int jbd_alloc_flags);
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);

#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
@@ -197,9 +198,10 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
return 1;
}

-static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
+static inline handle_t *ext4_journal_start(struct inode *inode,
+ int nblocks, int jbd_alloc_flags)
{
- return ext4_journal_start_sb(inode->i_sb, nblocks);
+ return ext4_journal_start_sb(inode->i_sb, nblocks, jbd_alloc_flags);
}

#define ext4_journal_stop(handle) \
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5199bac..b610294 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2546,7 +2546,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
ext_debug("truncate since %u\n", start);

/* probably first extent we're gonna free will be last in block */
- handle = ext4_journal_start(inode, depth + 1);
+ handle = ext4_journal_start(inode, depth + 1, JBD2_NO_FAIL);
if (IS_ERR(handle))
return PTR_ERR(handle);

@@ -3672,7 +3672,7 @@ void ext4_ext_truncate(struct inode *inode)
* probably first extent we're gonna free will be last in block
*/
err = ext4_writepage_trans_blocks(inode);
- handle = ext4_journal_start(inode, err);
+ handle = ext4_journal_start(inode, err, JBD2_NO_FAIL);
if (IS_ERR(handle))
return;

@@ -3811,7 +3811,8 @@ retry:
while (ret >= 0 && ret < max_blocks) {
map.m_lblk = map.m_lblk + ret;
map.m_len = max_blocks = max_blocks - ret;
- handle = ext4_journal_start(inode, credits);
+ handle = ext4_journal_start(inode, credits,
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
break;
@@ -3889,7 +3890,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
while (ret >= 0 && ret < max_blocks) {
map.m_lblk += ret;
map.m_len = (max_blocks -= ret);
- handle = ext4_journal_start(inode, credits);
+ handle = ext4_journal_start(inode, credits, JBD2_NO_FAIL);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
break;
@@ -4215,7 +4216,7 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
ext4_flush_completed_IO(inode);

credits = ext4_writepage_trans_blocks(inode);
- handle = ext4_journal_start(inode, credits);
+ handle = ext4_journal_start(inode, credits, JBD2_NO_FAIL);
if (IS_ERR(handle))
return PTR_ERR(handle);

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 21bb2f6..09b73d7 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1263,7 +1263,7 @@ extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
goto out;

- handle = ext4_journal_start_sb(sb, 1);
+ handle = ext4_journal_start_sb(sb, 1, JBD2_NO_FAIL);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 50d0e9c..0355858 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -129,7 +129,8 @@ static handle_t *start_transaction(struct inode *inode)
{
handle_t *result;

- result = ext4_journal_start(inode, blocks_for_truncate(inode));
+ result = ext4_journal_start(inode,
+ blocks_for_truncate(inode), JBD2_NO_FAIL);
if (!IS_ERR(result))
return result;

@@ -204,7 +205,8 @@ void ext4_evict_inode(struct inode *inode)
if (is_bad_inode(inode))
goto no_delete;

- handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3);
+ handle = ext4_journal_start(inode,
+ blocks_for_truncate(inode)+3, JBD2_NO_FAIL);
if (IS_ERR(handle)) {
ext4_std_error(inode->i_sb, PTR_ERR(handle));
/*
@@ -1401,7 +1403,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
if (map.m_len > DIO_MAX_BLOCKS)
map.m_len = DIO_MAX_BLOCKS;
dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
- handle = ext4_journal_start(inode, dio_credits);
+ handle = ext4_journal_start(inode, dio_credits, JBD2_NO_FAIL);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
return ret;
@@ -1624,7 +1626,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
to = from + len;

retry:
- handle = ext4_journal_start(inode, needed_blocks);
+ handle = ext4_journal_start(inode, needed_blocks, JBD2_FAIL_OK);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
@@ -2558,7 +2560,8 @@ static int __ext4_journalled_writepage(struct page *page,
* references to buffers so we are safe */
unlock_page(page);

- handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
+ handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode),
+ JBD2_NO_FAIL);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
@@ -2988,7 +2991,7 @@ retry:
needed_blocks = ext4_da_writepages_trans_blocks(inode);

/* start a new transaction*/
- handle = ext4_journal_start(inode, needed_blocks);
+ handle = ext4_journal_start(inode, needed_blocks, JBD2_NO_FAIL);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
@@ -3124,7 +3127,7 @@ retry:
* to journalling the i_disksize update if writes to the end
* of file which has an already mapped buffer.
*/
- handle = ext4_journal_start(inode, 1);
+ handle = ext4_journal_start(inode, 1, JBD2_FAIL_OK);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
@@ -3478,7 +3481,7 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,

if (final_size > inode->i_size) {
/* Credits for sb + inode write */
- handle = ext4_journal_start(inode, 2);
+ handle = ext4_journal_start(inode, 2, JBD2_FAIL_OK);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
@@ -3521,7 +3524,7 @@ retry:
int err;

/* Credits for sb + inode write */
- handle = ext4_journal_start(inode, 2);
+ handle = ext4_journal_start(inode, 2, JBD2_NO_FAIL);
if (IS_ERR(handle)) {
/* This is really bad luck. We've written the data
* but cannot extend i_size. Bail out and pretend
@@ -5329,8 +5332,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)

/* (user+group)*(old+new) structure, inode write (sb,
* inode block, ? - but truncate inode update has it) */
- handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
- EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
+ handle = ext4_journal_start(inode,
+ (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+ EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3,
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
goto err_out;
@@ -5364,7 +5369,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
(attr->ia_size < inode->i_size)) {
handle_t *handle;

- handle = ext4_journal_start(inode, 3);
+ handle = ext4_journal_start(inode, 3,
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
goto err_out;
@@ -5384,7 +5390,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
attr->ia_size);
if (error) {
/* Do as much error cleanup as possible */
- handle = ext4_journal_start(inode, 3);
+ handle = ext4_journal_start(inode, 3,
+ JBD2_NO_FAIL);
if (IS_ERR(handle)) {
ext4_orphan_del(NULL, inode);
goto err_out;
@@ -5421,7 +5428,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
rc = ext4_acl_chmod(inode);

err_out:
- ext4_std_error(inode->i_sb, error);
+ if (error != -ENOMEM)
+ ext4_std_error(inode->i_sb, error);
if (!error)
error = rc;
return error;
@@ -5737,7 +5745,7 @@ void ext4_dirty_inode(struct inode *inode)
{
handle_t *handle;

- handle = ext4_journal_start(inode, 2);
+ handle = ext4_journal_start(inode, 2, JBD2_NO_FAIL);
if (IS_ERR(handle))
goto out;

@@ -5821,7 +5829,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)

/* Finally we can mark the inode as dirty. */

- handle = ext4_journal_start(inode, 1);
+ handle = ext4_journal_start(inode, 1, JBD2_NO_FAIL);
if (IS_ERR(handle))
return PTR_ERR(handle);

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 808c554..b88e295 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -101,7 +101,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
} else if (oldflags & EXT4_EOFBLOCKS_FL)
ext4_truncate(inode);

- handle = ext4_journal_start(inode, 1);
+ handle = ext4_journal_start(inode, 1,
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
goto flags_out;
@@ -157,7 +158,8 @@ flags_out:
goto setversion_out;
}

- handle = ext4_journal_start(inode, 1);
+ handle = ext4_journal_start(inode, 1,
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
goto setversion_out;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b57b98f..f233f12 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -485,10 +485,10 @@ int ext4_ext_migrate(struct inode *inode)
return retval;

handle = ext4_journal_start(inode,
- EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
- EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
- + 1);
+ EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
+ EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+ EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
+ + 1, (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
return retval;
@@ -533,7 +533,7 @@ int ext4_ext_migrate(struct inode *inode)
ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
up_read((&EXT4_I(inode)->i_data_sem));

- handle = ext4_journal_start(inode, 1);
+ handle = ext4_journal_start(inode, 1, JBD2_NO_FAIL);
if (IS_ERR(handle)) {
/*
* It is impossible to update on-disk structures without
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 2b8304b..ff4289f 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -813,7 +813,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
* inode and donor_inode may change each different metadata blocks.
*/
jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
- handle = ext4_journal_start(orig_inode, jblocks);
+ handle = ext4_journal_start(orig_inode, jblocks,
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle)) {
*err = PTR_ERR(handle);
return 0;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b754b77..101d5d5 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1750,9 +1750,11 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
dquot_initialize(dir);

retry:
- handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
- EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+ handle = ext4_journal_start(dir,
+ EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+ EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+ EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb),
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle))
return PTR_ERR(handle);

@@ -1786,9 +1788,11 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
dquot_initialize(dir);

retry:
- handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
- EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+ handle = ext4_journal_start(dir,
+ EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+ EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+ EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb),
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle))
return PTR_ERR(handle);

@@ -1825,9 +1829,11 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
dquot_initialize(dir);

retry:
- handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
- EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+ handle = ext4_journal_start(dir,
+ EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+ EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+ EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb),
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle))
return PTR_ERR(handle);

@@ -2140,7 +2146,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
dquot_initialize(dir);
dquot_initialize(dentry->d_inode);

- handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
+ handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb),
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle))
return PTR_ERR(handle);

@@ -2202,7 +2209,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
dquot_initialize(dir);
dquot_initialize(dentry->d_inode);

- handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
+ handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb),
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle))
return PTR_ERR(handle);

@@ -2279,7 +2287,8 @@ static int ext4_symlink(struct inode *dir,
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
}
retry:
- handle = ext4_journal_start(dir, credits);
+ handle = ext4_journal_start(dir,
+ credits, (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle))
return PTR_ERR(handle);

@@ -2319,7 +2328,8 @@ retry:
*/
handle = ext4_journal_start(dir,
EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
+ EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1,
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
goto err_drop_inode;
@@ -2364,8 +2374,10 @@ static int ext4_link(struct dentry *old_dentry,
dquot_initialize(dir);

retry:
- handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS);
+ handle = ext4_journal_start(dir,
+ EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+ EXT4_INDEX_EXTRA_TRANS_BLOCKS,
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle))
return PTR_ERR(handle);

@@ -2416,8 +2428,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dentry->d_inode)
dquot_initialize(new_dentry->d_inode);
handle = ext4_journal_start(old_dir, 2 *
- EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
- EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
+ EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
+ EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2,
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle))
return PTR_ERR(handle);

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 80bbc9c..6bc36ec 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -176,7 +176,7 @@ static int setup_new_group_blocks(struct super_block *sb,
int err = 0, err2;

/* This transaction may be extended/restarted along the way */
- handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
+ handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA, JBD2_NO_FAIL);

if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -655,7 +655,7 @@ static void update_backups(struct super_block *sb,
handle_t *handle;
int err = 0, err2;

- handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
+ handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA, JBD2_NO_FAIL);
if (IS_ERR(handle)) {
group = 1;
err = PTR_ERR(handle);
@@ -793,7 +793,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
*/
handle = ext4_journal_start_sb(sb,
ext4_bg_has_super(sb, input->group) ?
- 3 + reserved_gdb : 4);
+ 3 + reserved_gdb : 4, JBD2_NO_FAIL);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
goto exit_put;
@@ -1031,7 +1031,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
/* We will update the superblock, one block bitmap, and
* one group descriptor via ext4_free_blocks().
*/
- handle = ext4_journal_start_sb(sb, 3);
+ handle = ext4_journal_start_sb(sb, 3, JBD2_NO_FAIL);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
ext4_warning(sb, "error %d on journal start", err);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index aa842f3..8beb687 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -263,7 +263,8 @@ static void ext4_put_nojournal(handle_t *handle)
* ext4 prevents a new handle from being started by s_frozen, which
* is in an upper layer.
*/
-handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
+handle_t *ext4_journal_start_sb(struct super_block *sb,
+ int nblocks, int jbd_alloc_flags)
{
journal_t *journal;
handle_t *handle;
@@ -295,7 +296,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
ext4_abort(sb, "Detected aborted journal");
return ERR_PTR(-EROFS);
}
- return jbd2_journal_start(journal, nblocks, JBD2_NO_FAIL);
+ return jbd2_journal_start(journal, nblocks, jbd_alloc_flags);
}

/*
@@ -4561,7 +4562,8 @@ static int ext4_write_dquot(struct dquot *dquot)

inode = dquot_to_inode(dquot);
handle = ext4_journal_start(inode,
- EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
+ EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb),
+ JBD2_NO_FAIL);
if (IS_ERR(handle))
return PTR_ERR(handle);
ret = dquot_commit(dquot);
@@ -4577,7 +4579,8 @@ static int ext4_acquire_dquot(struct dquot *dquot)
handle_t *handle;

handle = ext4_journal_start(dquot_to_inode(dquot),
- EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
+ EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb),
+ JBD2_NO_FAIL);
if (IS_ERR(handle))
return PTR_ERR(handle);
ret = dquot_acquire(dquot);
@@ -4593,7 +4596,8 @@ static int ext4_release_dquot(struct dquot *dquot)
handle_t *handle;

handle = ext4_journal_start(dquot_to_inode(dquot),
- EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+ EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb),
+ JBD2_NO_FAIL);
if (IS_ERR(handle)) {
/* Release dquot anyway to avoid endless cycle in dqput() */
dquot_release(dquot);
@@ -4624,7 +4628,7 @@ static int ext4_write_info(struct super_block *sb, int type)
handle_t *handle;

/* Data block + inode block */
- handle = ext4_journal_start(sb->s_root->d_inode, 2);
+ handle = ext4_journal_start(sb->s_root->d_inode, 2, JBD2_NO_FAIL);
if (IS_ERR(handle))
return PTR_ERR(handle);
ret = dquot_commit_info(sb, type);
@@ -4702,7 +4706,7 @@ static int ext4_quota_off(struct super_block *sb, int type)

/* Update modification times of quota files when userspace can
* start looking at them */
- handle = ext4_journal_start(inode, 1);
+ handle = ext4_journal_start(inode, 1, (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle))
goto out;
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c757adc..e27b289 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1084,7 +1084,8 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
int error, retries = 0;

retry:
- handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
+ handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb),
+ (JBD2_FAIL_OK | JBD2_TOPLEVEL));
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
} else {
--
1.7.4.1

2011-05-31 11:22:55

[permalink] [raw]

Subject: Re: [PATCH v2 2/3] jbd2: Add extra parameter in start_this_handle() to control allocation flags.

On Fri 27-05-11 23:20:57, Manish Katiyar wrote:
> changes from v1 -> v2 :
> *) Update start_this_handle to take extra parameter to specify whether
> to retry the allocation or not.
> *) Added jbd allocation flags for callers to control the transaction allocation
> behavior. Callers can pass JBD2_TOPLEVEL if allocation needs to be done using GFP_KERNEL.
The above changelog should be below where (*) is.

Also - this is mainly for Ted: I've looked at where we JBD2_TOPLEVEL could
actually be enabled and the results are: Pretty much nowhere.

The problem is that with ext4, we need i_mutex in io completion path to
end page writeback. So we cannot do GFP_KERNEL allocation whenever we hold
i_mutex because mm might wait in direct reclaim for IO to complete and that
cannot happen until we release i_mutex. And pretty much every write path in
ext4 holds i_mutex.

So JBD2_TOPLEVEL looks like a useless excercise to me and I'd just don't do
it.

> Pass extra flags in journal routines to specify if its ok to
> fail in the journal transaction allocation. Passing JBD2_FAIL_OK means caller is
> ok with journal start failures and can handle ENOMEM.
>
> Update ocfs2 and ext4 routines to pass JBD2_NO_FAIL for the updated journal
> interface by default, to retain the existing behavior.
>
> Signed-off-by: Manish Katiyar <[email protected]>
> ---
> fs/ext4/ext4_jbd2.h | 2 +-
> fs/ext4/super.c | 2 +-
> fs/jbd2/transaction.c | 44 ++++++++++++++++----------------------------
> fs/ocfs2/journal.c | 8 ++++----
> include/linux/jbd2.h | 13 +++++++++----
> 5 files changed, 31 insertions(+), 38 deletions(-)
(*) HERE

> +/* JBD2 transaction allocation flags */
> +#define JBD2_NO_FAIL 0x00000001
> +#define JBD2_FAIL_OK 0x00000002
> +#define JBD2_TOPLEVEL 0x00000004
> +
I guess there's no need for JBD2_FAIL_OK - if NOFAIL is not set, we can
fail. Otherwise the patch looks OK.

Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR

2011-05-31 12:10:23

[permalink] [raw]

Subject: Re: [PATCH v2 3/3] ext4: Fix ext4 to use pass jbd allocation flags if they can handle ENOMEM.

On Fri 27-05-11 23:21:54, Manish Katiyar wrote:
> This patch updates ext4 code to pass appropriate transaction
> allocation flags to callers who are safe to deal with ENOMEM.
> Pass JBD2_TOPLEVEL from the callers who are safe to do allocations
> with GFP_KERNEL instead of GFP_NOFS.
I've indicated places where TOPLEVEL cannot be below. There are about 4
or 5 which are probably OK. Otherwise the patch is OK.

> Signed-off-by: Manish Katiyar <[email protected]>
> ---
> fs/ext4/acl.c | 6 ++++--
> fs/ext4/ext4_jbd2.h | 8 +++++---
> fs/ext4/extents.c | 11 ++++++-----
> fs/ext4/ialloc.c | 2 +-
> fs/ext4/inode.c | 40 ++++++++++++++++++++++++----------------
> fs/ext4/ioctl.c | 6 ++++--
> fs/ext4/migrate.c | 10 +++++-----
> fs/ext4/move_extent.c | 3 ++-
> fs/ext4/namei.c | 47 ++++++++++++++++++++++++++++++-----------------
> fs/ext4/resize.c | 8 ++++----
> fs/ext4/super.c | 18 +++++++++++-------
> fs/ext4/xattr.c | 3 ++-
> 12 files changed, 98 insertions(+), 64 deletions(-)
>
> diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
> index 93dc9a6..95db569 100644
> --- a/fs/ext4/acl.c
> +++ b/fs/ext4/acl.c
> @@ -351,7 +351,8 @@ ext4_acl_chmod(struct inode *inode)
>
> retry:
> handle = ext4_journal_start(inode,
> - EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
> + EXT4_DATA_TRANS_BLOCKS(inode->i_sb),
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
As I wrote in my previous email, JBD2_TOPLEVEL is not OK here because we
hold i_mutex.

> if (IS_ERR(handle)) {
> error = PTR_ERR(handle);
> if (error != -ENOMEM)
> @@ -450,7 +451,8 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
> acl = NULL;
>
> retry:
> - handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
> + handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb),
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
The same here.

> if (IS_ERR(handle)) {
> error = PTR_ERR(handle);
> goto release_and_out;
...
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 5199bac..b610294 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -3811,7 +3811,8 @@ retry:
> while (ret >= 0 && ret < max_blocks) {
> map.m_lblk = map.m_lblk + ret;
> map.m_len = max_blocks = max_blocks - ret;
> - handle = ext4_journal_start(inode, credits);
> + handle = ext4_journal_start(inode, credits,
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
And here as well.

> if (IS_ERR(handle)) {
> ret = PTR_ERR(handle);
> break;
...
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 50d0e9c..0355858 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -5329,8 +5332,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
>
> /* (user+group)*(old+new) structure, inode write (sb,
> * inode block, ? - but truncate inode update has it) */
> - handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
> - EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
> + handle = ext4_journal_start(inode,
> + (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
> + EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3,
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
> if (IS_ERR(handle)) {
> error = PTR_ERR(handle);
> goto err_out;
Again, TOPLEVEL cannot be here.

> @@ -5364,7 +5369,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
> (attr->ia_size < inode->i_size)) {
> handle_t *handle;
>
> - handle = ext4_journal_start(inode, 3);
> + handle = ext4_journal_start(inode, 3,
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
> if (IS_ERR(handle)) {
> error = PTR_ERR(handle);
> goto err_out;
And here as well.

> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 808c554..b88e295 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -101,7 +101,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> } else if (oldflags & EXT4_EOFBLOCKS_FL)
> ext4_truncate(inode);
>
> - handle = ext4_journal_start(inode, 1);
> + handle = ext4_journal_start(inode, 1,
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
And here as well.

> if (IS_ERR(handle)) {
> err = PTR_ERR(handle);
> goto flags_out;
> @@ -157,7 +158,8 @@ flags_out:
> goto setversion_out;
> }
>
> - handle = ext4_journal_start(inode, 1);
> + handle = ext4_journal_start(inode, 1,
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
Here, actually TOPLEVEL is OK.

> if (IS_ERR(handle)) {
> err = PTR_ERR(handle);
> goto setversion_out;
> diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
> index b57b98f..f233f12 100644
> --- a/fs/ext4/migrate.c
> +++ b/fs/ext4/migrate.c
> @@ -485,10 +485,10 @@ int ext4_ext_migrate(struct inode *inode)
> return retval;
>
> handle = ext4_journal_start(inode,
> - EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
> - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
> - EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
> - + 1);
> + EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
> + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
> + EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
> + + 1, (JBD2_FAIL_OK | JBD2_TOPLEVEL));
TOPLEVEL cannot be here.

> if (IS_ERR(handle)) {
> retval = PTR_ERR(handle);
> return retval;
...
> diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
> index 2b8304b..ff4289f 100644
> --- a/fs/ext4/move_extent.c
> +++ b/fs/ext4/move_extent.c
> @@ -813,7 +813,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
> * inode and donor_inode may change each different metadata blocks.
> */
> jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
> - handle = ext4_journal_start(orig_inode, jblocks);
> + handle = ext4_journal_start(orig_inode, jblocks,
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
TOPLEVEL cannot be here.

> if (IS_ERR(handle)) {
> *err = PTR_ERR(handle);
> return 0;
> diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> index b754b77..101d5d5 100644
> --- a/fs/ext4/namei.c
> +++ b/fs/ext4/namei.c
> @@ -1750,9 +1750,11 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
> dquot_initialize(dir);
>
> retry:
> - handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
> - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
> - EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
> + handle = ext4_journal_start(dir,
> + EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
> + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
> + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb),
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
OK, TOPLEVEL possibly could be here because we hold only directory mutex
so writeback won't be stalled. But it's a bit subtle. The same holds for
mknod, mkdir and rmdir below.

> if (IS_ERR(handle))
> return PTR_ERR(handle);
>
> @@ -1786,9 +1788,11 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
> dquot_initialize(dir);
>
> retry:
> - handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
> - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
> - EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
> + handle = ext4_journal_start(dir,
> + EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
> + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
> + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb),
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
> if (IS_ERR(handle))
> return PTR_ERR(handle);
>
> @@ -1825,9 +1829,11 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
> dquot_initialize(dir);
>
> retry:
> - handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
> - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
> - EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
> + handle = ext4_journal_start(dir,
> + EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
> + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
> + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb),
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
> if (IS_ERR(handle))
> return PTR_ERR(handle);
>
> @@ -2140,7 +2146,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
> dquot_initialize(dir);
> dquot_initialize(dentry->d_inode);
>
> - handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
> + handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb),
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
> if (IS_ERR(handle))
> return PTR_ERR(handle);
>
> @@ -2202,7 +2209,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
> dquot_initialize(dir);
> dquot_initialize(dentry->d_inode);
>
> - handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
> + handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb),
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
TOPLEVEL cannot be here.

> if (IS_ERR(handle))
> return PTR_ERR(handle);
>
> @@ -2279,7 +2287,8 @@ static int ext4_symlink(struct inode *dir,
> EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
> }
> retry:
> - handle = ext4_journal_start(dir, credits);
> + handle = ext4_journal_start(dir,
> + credits, (JBD2_FAIL_OK | JBD2_TOPLEVEL));
> if (IS_ERR(handle))
> return PTR_ERR(handle);
>
> @@ -2319,7 +2328,8 @@ retry:
> */
> handle = ext4_journal_start(dir,
> EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
> - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
> + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1,
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
Above two cases should be OK.

> if (IS_ERR(handle)) {
> err = PTR_ERR(handle);
> goto err_drop_inode;
> @@ -2364,8 +2374,10 @@ static int ext4_link(struct dentry *old_dentry,
> dquot_initialize(dir);
>
> retry:
> - handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
> - EXT4_INDEX_EXTRA_TRANS_BLOCKS);
> + handle = ext4_journal_start(dir,
> + EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
> + EXT4_INDEX_EXTRA_TRANS_BLOCKS,
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
TOPLEVEL cannot be here.

> if (IS_ERR(handle))
> return PTR_ERR(handle);
>
> @@ -2416,8 +2428,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
> if (new_dentry->d_inode)
> dquot_initialize(new_dentry->d_inode);
> handle = ext4_journal_start(old_dir, 2 *
> - EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
> - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
> + EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
> + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2,
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
And it cannot be here either.

> if (IS_ERR(handle))
> return PTR_ERR(handle);
>
...
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index aa842f3..8beb687 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
...
> @@ -4702,7 +4706,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
>
> /* Update modification times of quota files when userspace can
> * start looking at them */
> - handle = ext4_journal_start(inode, 1);
> + handle = ext4_journal_start(inode, 1, (JBD2_FAIL_OK | JBD2_TOPLEVEL));
TOPLEVEL cannot be here because we hold s_umount on superblock.

> if (IS_ERR(handle))
> goto out;
> inode->i_mtime = inode->i_ctime = CURRENT_TIME;
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index c757adc..e27b289 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -1084,7 +1084,8 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
> int error, retries = 0;
>
> retry:
> - handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
> + handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb),
> + (JBD2_FAIL_OK | JBD2_TOPLEVEL));
TOPLEVEL cannot be here.

> if (IS_ERR(handle)) {
> error = PTR_ERR(handle);
> } else {
--
Jan Kara <[email protected]>
SUSE Labs, CR

2011-06-01 02:22:29

[permalink] [raw]

Subject: Re: [PATCH v2 2/3] jbd2: Add extra parameter in start_this_handle() to control allocation flags.

On Tue, May 31, 2011 at 01:22:53PM +0200, Jan Kara wrote:
>
> The problem is that with ext4, we need i_mutex in io completion path to
> end page writeback. So we cannot do GFP_KERNEL allocation whenever we hold
> i_mutex because mm might wait in direct reclaim for IO to complete and that
> cannot happen until we release i_mutex.

OK, maybe I'm being dense, but I'm not seeing it. I see where we need
i_mutex on the ext4_da_writepages() codepath, but that's never used
for direct reclaim. Direct reclaim only calls ext4_writepage(), and
that doesn't seem to try to grab i_mutex as near as I can tell. Am I
missing something?

- Ted

2011-06-02 09:54:26

[permalink] [raw]

Subject: Re: [PATCH v2 2/3] jbd2: Add extra parameter in start_this_handle() to control allocation flags.

On Tue 31-05-11 18:27:20, Ted Tso wrote:
> On Tue, May 31, 2011 at 01:22:53PM +0200, Jan Kara wrote:
> >
> > The problem is that with ext4, we need i_mutex in io completion path to
> > end page writeback. So we cannot do GFP_KERNEL allocation whenever we hold
> > i_mutex because mm might wait in direct reclaim for IO to complete and that
> > cannot happen until we release i_mutex.
>
> OK, maybe I'm being dense, but I'm not seeing it. I see where we need
> i_mutex on the ext4_da_writepages() codepath, but that's never used
> for direct reclaim. Direct reclaim only calls ext4_writepage(), and
> that doesn't seem to try to grab i_mutex as near as I can tell. Am I
> missing something?
What happens is that direct reclaim sometimes does
wait_on_page_writeback() (e.g. shrink_page_list()) or it explicitely waits
for NR_WRITEBACK statistics to go below some threshold
(throttle_vm_writeout()). And that is deadlockable if we hold i_mutex while
doing this because we may need i_mutex to actually move the page from
PageWriteback state...

As I'm saying this, I've realized ext4 has this problem also with
stable-pages patches because there we can wait for PageWriteback in
grab_cache_page_write_begin() when we also hold i_mutex. So I think we'll
have to come up with a way to convert unwritten extents without having to
hold i_mutex. That's going to be interesting.

Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR

2011-06-06 00:12:45

[permalink] [raw]

Subject: Re: [PATCH v2 2/3] jbd2: Add extra parameter in start_this_handle() to control allocation flags.

On Thu, Jun 2, 2011 at 2:54 AM, Jan Kara <[email protected]> wrote:
> On Tue 31-05-11 18:27:20, Ted Tso wrote:
>> On Tue, May 31, 2011 at 01:22:53PM +0200, Jan Kara wrote:
>> >
>> > The problem is that with ext4, we need i_mutex in io completion path to
>> > end page writeback. So we cannot do GFP_KERNEL allocation whenever we hold
>> > i_mutex because mm might wait in direct reclaim for IO to complete and that
>> > cannot happen until we release i_mutex.
>>
>> OK, maybe I'm being dense, but I'm not seeing it. ?I see where we need
>> i_mutex on the ext4_da_writepages() codepath, but that's never used
>> for direct reclaim. ?Direct reclaim only calls ext4_writepage(), and
>> that doesn't seem to try to grab i_mutex as near as I can tell. ?Am I
>> missing something?
> ?What happens is that direct reclaim sometimes does
> wait_on_page_writeback() (e.g. shrink_page_list()) or it explicitely waits
> for NR_WRITEBACK statistics to go below some threshold
> (throttle_vm_writeout()). And that is deadlockable if we hold i_mutex while
> doing this because we may need i_mutex to actually move the page from
> PageWriteback state...
>
> As I'm saying this, I've realized ext4 has this problem also with
> stable-pages patches because there we can wait for PageWriteback in
> grab_cache_page_write_begin() when we also hold i_mutex. So I think we'll
> have to come up with a way to convert unwritten extents without having to
> hold i_mutex. That's going to be interesting.

Hi Jan/Ted,

Does that mean I should remove the whole JBD2_TOPLEVEL thing from my
revised patch ? Or should I fix it as per your feedback in
the other patch ?

--
Thanks -
Manish

2011-06-06 03:21:19

[permalink] [raw]

Subject: Re: [PATCH v2 2/3] jbd2: Add extra parameter in start_this_handle() to control allocation flags.

On Thu, Jun 02, 2011 at 11:54:24AM +0200, Jan Kara wrote:
> What happens is that direct reclaim sometimes does
> wait_on_page_writeback() (e.g. shrink_page_list()) or it explicitely waits
> for NR_WRITEBACK statistics to go below some threshold
> (throttle_vm_writeout()). And that is deadlockable if we hold i_mutex while
> doing this because we may need i_mutex to actually move the page from
> PageWriteback state...

We don't actully call set_page_writeback() until right before we
submit the page for writeback. And we convert the unwritten extents
in a workqueue, which gets submitted after we call
end_page_writeback(). So I'm still not seeing a problem; sorry if I'm
being dense!

- Ted

> As I'm saying this, I've realized ext4 has this problem also with
> stable-pages patches because there we can wait for PageWriteback in
> grab_cache_page_write_begin() when we also hold i_mutex. So I think we'll
> have to come up with a way to convert unwritten extents without having to
> hold i_mutex. That's going to be interesting.
>
> Honza
> --
> Jan Kara <[email protected]>
> SUSE Labs, CR
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2011-06-08 14:10:09

[permalink] [raw]

Subject: Re: [PATCH v2 2/3] jbd2: Add extra parameter in start_this_handle() to control allocation flags.

Ping?

- Ted

On Sun, Jun 05, 2011 at 11:21:13PM -0400, Ted Ts'o wrote:
> On Thu, Jun 02, 2011 at 11:54:24AM +0200, Jan Kara wrote:
> > What happens is that direct reclaim sometimes does
> > wait_on_page_writeback() (e.g. shrink_page_list()) or it explicitely waits
> > for NR_WRITEBACK statistics to go below some threshold
> > (throttle_vm_writeout()). And that is deadlockable if we hold i_mutex while
> > doing this because we may need i_mutex to actually move the page from
> > PageWriteback state...
>
> We don't actully call set_page_writeback() until right before we
> submit the page for writeback. And we convert the unwritten extents
> in a workqueue, which gets submitted after we call
> end_page_writeback(). So I'm still not seeing a problem; sorry if I'm
> being dense!
>
> - Ted
>
> > As I'm saying this, I've realized ext4 has this problem also with
> > stable-pages patches because there we can wait for PageWriteback in
> > grab_cache_page_write_begin() when we also hold i_mutex. So I think we'll
> > have to come up with a way to convert unwritten extents without having to
> > hold i_mutex. That's going to be interesting.
> >
> > Honza
> > --
> > Jan Kara <[email protected]>
> > SUSE Labs, CR
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> > the body of a message to [email protected]
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2011-06-17 06:32:32

[permalink] [raw]

Subject: Re: [PATCH v2 2/3] jbd2: Add extra parameter in start_this_handle() to control allocation flags.

On Wed, Jun 8, 2011 at 7:10 AM, Ted Ts'o <[email protected]> wrote:
> Ping?

Hi Jan,

Is there anything required from my side on this patch ? From your
other post it seems that holding i_mutex isn't a problem. Please
advise.

--
Thanks -
Manish

2011-06-20 14:32:04

[permalink] [raw]

Subject: Re: [PATCH v2 2/3] jbd2: Add extra parameter in start_this_handle() to control allocation flags.

Hi,

On Thu 16-06-11 23:32:12, Manish Katiyar wrote:
> On Wed, Jun 8, 2011 at 7:10 AM, Ted Ts'o <[email protected]> wrote:
> > Ping?
>
> Is there anything required from my side on this patch ? From your
> other post it seems that holding i_mutex isn't a problem. Please
> advise.
OK, after my discussion with Ted today, i_mutex really shouldn't be an
issue so your patch should be fine. I just suggest you resend it to Ted to
remind yourself :).

Honza
--
Jan Kara <[email protected]>
SUSE Labs, CR

2011-06-20 14:41:13

[permalink] [raw]

Subject: Re: [PATCH v2 2/3] jbd2: Add extra parameter in start_this_handle() to control allocation flags.

On Mon, Jun 20, 2011 at 7:32 AM, Jan Kara <[email protected]> wrote:
> ?Hi,
>
> On Thu 16-06-11 23:32:12, Manish Katiyar wrote:
>> On Wed, Jun 8, 2011 at 7:10 AM, Ted Ts'o <[email protected]> wrote:
>> > Ping?
>>
>> Is there anything required from my side on this patch ? From your
>> other post it seems that holding i_mutex isn't a problem. Please
>> advise.
> ?OK, after my discussion with Ted today, i_mutex really shouldn't be an
> issue so your patch should be fine. I just suggest you resend it to Ted to
> remind yourself :).

Thanks a lot Jan, Will resend the series to Ted.

--
Thanks -
Manish

2011-06-20 17:57:25

[permalink] [raw]

Subject: Re: [PATCH v2 2/3] jbd2: Add extra parameter in start_this_handle() to control allocation flags.

On Mon, Jun 20, 2011 at 07:40:53AM -0700, Manish Katiyar wrote:
> > ?OK, after my discussion with Ted today, i_mutex really shouldn't be an
> > issue so your patch should be fine. I just suggest you resend it to Ted to
> > remind yourself :).
>
> Thanks a lot Jan, Will resend the series to Ted.

Thanks, but don't bother resending it unless you have any changes to
update.

I do keep track of patches using patchwork[1], so I haven't forgotten. :-)

[1] http://patchwork.ozlabs.org/project/linux-ext4/list/

I'll be starting to go through patches for the next merge window this
week.

- Ted

2011-06-20 18:08:51