2022-03-08 11:09:49

by harshad shirwadkar

[permalink] [raw]
Subject: [PATCH 1/5] ext4: convert i_fc_lock to spinlock

From: Harshad Shirwadkar <[email protected]>

Convert ext4_inode_info->i_fc_lock to spinlock to avoid sleeping
in invalid contexts.

Signed-off-by: Harshad Shirwadkar <[email protected]>
---
fs/ext4/ext4.h | 7 +++++--
fs/ext4/fast_commit.c | 24 ++++++++++++++----------
fs/ext4/super.c | 2 +-
3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3f87cca49f0c..fb6d65f1176f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1065,8 +1065,11 @@ struct ext4_inode_info {
/* Fast commit wait queue for this inode */
wait_queue_head_t i_fc_wait;

- /* Protect concurrent accesses on i_fc_lblk_start, i_fc_lblk_len */
- struct mutex i_fc_lock;
+ /*
+ * Protect concurrent accesses on i_fc_lblk_start, i_fc_lblk_len
+ * and inode's EXT4_FC_STATE_COMMITTING state bit.
+ */
+ spinlock_t i_fc_lock;

/*
* i_disksize keeps track of what the inode size is ON DISK, not
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 5ac594e03402..4f2caf6f987c 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -387,7 +387,7 @@ static int ext4_fc_track_template(
return -EINVAL;

tid = handle->h_transaction->t_tid;
- mutex_lock(&ei->i_fc_lock);
+ spin_lock(&ei->i_fc_lock);
if (tid == ei->i_sync_tid) {
update = true;
} else {
@@ -395,7 +395,7 @@ static int ext4_fc_track_template(
ei->i_sync_tid = tid;
}
ret = __fc_track_fn(inode, args, update);
- mutex_unlock(&ei->i_fc_lock);
+ spin_unlock(&ei->i_fc_lock);

if (!enqueue)
return ret;
@@ -427,11 +427,11 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
struct dentry *dentry = dentry_update->dentry;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);

- mutex_unlock(&ei->i_fc_lock);
+ spin_unlock(&ei->i_fc_lock);
node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
if (!node) {
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
- mutex_lock(&ei->i_fc_lock);
+ spin_lock(&ei->i_fc_lock);
return -ENOMEM;
}

@@ -444,7 +444,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
kmem_cache_free(ext4_fc_dentry_cachep, node);
ext4_fc_mark_ineligible(inode->i_sb,
EXT4_FC_REASON_NOMEM, NULL);
- mutex_lock(&ei->i_fc_lock);
+ spin_lock(&ei->i_fc_lock);
return -ENOMEM;
}
memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
@@ -478,7 +478,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist);
}
spin_unlock(&sbi->s_fc_lock);
- mutex_lock(&ei->i_fc_lock);
+ spin_lock(&ei->i_fc_lock);

return 0;
}
@@ -867,15 +867,15 @@ static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
struct ext4_extent *ex;
int ret;

- mutex_lock(&ei->i_fc_lock);
+ spin_lock(&ei->i_fc_lock);
if (ei->i_fc_lblk_len == 0) {
- mutex_unlock(&ei->i_fc_lock);
+ spin_unlock(&ei->i_fc_lock);
return 0;
}
old_blk_size = ei->i_fc_lblk_start;
new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
ei->i_fc_lblk_len = 0;
- mutex_unlock(&ei->i_fc_lock);
+ spin_unlock(&ei->i_fc_lock);

cur_lblk_off = old_blk_size;
jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n",
@@ -972,9 +972,13 @@ static int ext4_fc_wait_inode_data_all(journal_t *journal)

spin_lock(&sbi->s_fc_lock);
list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
+ spin_lock(&pos->i_fc_lock);
if (!ext4_test_inode_state(&pos->vfs_inode,
- EXT4_STATE_FC_COMMITTING))
+ EXT4_STATE_FC_COMMITTING)) {
+ spin_unlock(&pos->i_fc_lock);
continue;
+ }
+ spin_unlock(&pos->i_fc_lock);
spin_unlock(&sbi->s_fc_lock);

ret = jbd2_wait_inode_data(journal, pos->jinode);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1e5f4994fe57..38d63113c383 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1346,7 +1346,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
atomic_set(&ei->i_unwritten, 0);
INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
ext4_fc_init_inode(&ei->vfs_inode);
- mutex_init(&ei->i_fc_lock);
+ spin_lock_init(&ei->i_fc_lock);
return &ei->vfs_inode;
}

--
2.35.1.616.g0bdcbb4464-goog


2022-03-08 12:36:40

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 1/5] ext4: convert i_fc_lock to spinlock

On Tue 08-03-22 02:51:08, Harshad Shirwadkar wrote:
> From: Harshad Shirwadkar <[email protected]>
>
> Convert ext4_inode_info->i_fc_lock to spinlock to avoid sleeping
> in invalid contexts.
>
> Signed-off-by: Harshad Shirwadkar <[email protected]>

I don't think ext4_debug() is safe under spinlock in __track_range(). But
otherwise the patch looks good to me. So feel free to add:

Reviewed-by: Jan Kara <[email protected]>

after fixing this.

Honza

> ---
> fs/ext4/ext4.h | 7 +++++--
> fs/ext4/fast_commit.c | 24 ++++++++++++++----------
> fs/ext4/super.c | 2 +-
> 3 files changed, 20 insertions(+), 13 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 3f87cca49f0c..fb6d65f1176f 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1065,8 +1065,11 @@ struct ext4_inode_info {
> /* Fast commit wait queue for this inode */
> wait_queue_head_t i_fc_wait;
>
> - /* Protect concurrent accesses on i_fc_lblk_start, i_fc_lblk_len */
> - struct mutex i_fc_lock;
> + /*
> + * Protect concurrent accesses on i_fc_lblk_start, i_fc_lblk_len
> + * and inode's EXT4_FC_STATE_COMMITTING state bit.
> + */
> + spinlock_t i_fc_lock;
>
> /*
> * i_disksize keeps track of what the inode size is ON DISK, not
> diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
> index 5ac594e03402..4f2caf6f987c 100644
> --- a/fs/ext4/fast_commit.c
> +++ b/fs/ext4/fast_commit.c
> @@ -387,7 +387,7 @@ static int ext4_fc_track_template(
> return -EINVAL;
>
> tid = handle->h_transaction->t_tid;
> - mutex_lock(&ei->i_fc_lock);
> + spin_lock(&ei->i_fc_lock);
> if (tid == ei->i_sync_tid) {
> update = true;
> } else {
> @@ -395,7 +395,7 @@ static int ext4_fc_track_template(
> ei->i_sync_tid = tid;
> }
> ret = __fc_track_fn(inode, args, update);
> - mutex_unlock(&ei->i_fc_lock);
> + spin_unlock(&ei->i_fc_lock);
>
> if (!enqueue)
> return ret;
> @@ -427,11 +427,11 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
> struct dentry *dentry = dentry_update->dentry;
> struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
>
> - mutex_unlock(&ei->i_fc_lock);
> + spin_unlock(&ei->i_fc_lock);
> node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
> if (!node) {
> ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
> - mutex_lock(&ei->i_fc_lock);
> + spin_lock(&ei->i_fc_lock);
> return -ENOMEM;
> }
>
> @@ -444,7 +444,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
> kmem_cache_free(ext4_fc_dentry_cachep, node);
> ext4_fc_mark_ineligible(inode->i_sb,
> EXT4_FC_REASON_NOMEM, NULL);
> - mutex_lock(&ei->i_fc_lock);
> + spin_lock(&ei->i_fc_lock);
> return -ENOMEM;
> }
> memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
> @@ -478,7 +478,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
> list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist);
> }
> spin_unlock(&sbi->s_fc_lock);
> - mutex_lock(&ei->i_fc_lock);
> + spin_lock(&ei->i_fc_lock);
>
> return 0;
> }
> @@ -867,15 +867,15 @@ static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
> struct ext4_extent *ex;
> int ret;
>
> - mutex_lock(&ei->i_fc_lock);
> + spin_lock(&ei->i_fc_lock);
> if (ei->i_fc_lblk_len == 0) {
> - mutex_unlock(&ei->i_fc_lock);
> + spin_unlock(&ei->i_fc_lock);
> return 0;
> }
> old_blk_size = ei->i_fc_lblk_start;
> new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
> ei->i_fc_lblk_len = 0;
> - mutex_unlock(&ei->i_fc_lock);
> + spin_unlock(&ei->i_fc_lock);
>
> cur_lblk_off = old_blk_size;
> jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n",
> @@ -972,9 +972,13 @@ static int ext4_fc_wait_inode_data_all(journal_t *journal)
>
> spin_lock(&sbi->s_fc_lock);
> list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
> + spin_lock(&pos->i_fc_lock);
> if (!ext4_test_inode_state(&pos->vfs_inode,
> - EXT4_STATE_FC_COMMITTING))
> + EXT4_STATE_FC_COMMITTING)) {
> + spin_unlock(&pos->i_fc_lock);
> continue;
> + }
> + spin_unlock(&pos->i_fc_lock);
> spin_unlock(&sbi->s_fc_lock);
>
> ret = jbd2_wait_inode_data(journal, pos->jinode);
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 1e5f4994fe57..38d63113c383 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1346,7 +1346,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
> atomic_set(&ei->i_unwritten, 0);
> INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
> ext4_fc_init_inode(&ei->vfs_inode);
> - mutex_init(&ei->i_fc_lock);
> + spin_lock_init(&ei->i_fc_lock);
> return &ei->vfs_inode;
> }
>
> --
> 2.35.1.616.g0bdcbb4464-goog
>
--
Jan Kara <[email protected]>
SUSE Labs, CR