Use rwsem to ensure serialization of the callers and to avoid
starvation of high priority tasks, when the system is under
heavy IO workload.
Signed-off-by: Sahitya Tummala <[email protected]>
---
fs/f2fs/checkpoint.c | 8 ++++----
fs/f2fs/f2fs.h | 2 +-
fs/f2fs/gc.c | 4 ++--
fs/f2fs/recovery.c | 4 ++--
fs/f2fs/super.c | 2 +-
5 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 023462e..7fb8c80 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -348,13 +348,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
goto skip_write;
/* if locked failed, cp will flush dirty pages instead */
- if (!mutex_trylock(&sbi->cp_mutex))
+ if (!down_write_trylock(&sbi->cp_global_sem))
goto skip_write;
trace_f2fs_writepages(mapping->host, wbc, META);
diff = nr_pages_to_write(sbi, META, wbc);
written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
- mutex_unlock(&sbi->cp_mutex);
+ up_write(&sbi->cp_global_sem);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
return 0;
@@ -1572,7 +1572,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_warn(sbi, "Start checkpoint disabled!");
}
if (cpc->reason != CP_RESIZE)
- mutex_lock(&sbi->cp_mutex);
+ down_write(&sbi->cp_global_sem);
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
@@ -1647,7 +1647,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
out:
if (cpc->reason != CP_RESIZE)
- mutex_unlock(&sbi->cp_mutex);
+ up_write(&sbi->cp_global_sem);
return err;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e082677..842c2ca 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1369,7 +1369,7 @@ struct f2fs_sb_info {
int cur_cp_pack; /* remain current cp pack */
spinlock_t cp_lock; /* for flag in ckpt */
struct inode *meta_inode; /* cache meta blocks */
- struct mutex cp_mutex; /* checkpoint procedure lock */
+ struct rw_semaphore cp_global_sem; /* checkpoint procedure lock */
struct rw_semaphore cp_rwsem; /* blocking FS operations */
struct rw_semaphore node_write; /* locking node writes */
struct rw_semaphore node_change; /* locking node change */
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 05641a1..3ef84e6 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1986,7 +1986,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
freeze_super(sbi->sb);
down_write(&sbi->gc_lock);
- mutex_lock(&sbi->cp_mutex);
+ down_write(&sbi->cp_global_sem);
spin_lock(&sbi->stat_lock);
if (shrunk_blocks + valid_user_blocks(sbi) +
@@ -2031,7 +2031,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
spin_unlock(&sbi->stat_lock);
}
out_err:
- mutex_unlock(&sbi->cp_mutex);
+ up_write(&sbi->cp_global_sem);
up_write(&sbi->gc_lock);
thaw_super(sbi->sb);
clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 0947d36..da75d5d 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -799,7 +799,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
INIT_LIST_HEAD(&dir_list);
/* prevent checkpoint */
- mutex_lock(&sbi->cp_mutex);
+ down_write(&sbi->cp_global_sem);
/* step #1: find fsynced inode numbers */
err = find_fsync_dnodes(sbi, &inode_list, check_only);
@@ -850,7 +850,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
if (!err)
clear_sbi_flag(sbi, SBI_POR_DOING);
- mutex_unlock(&sbi->cp_mutex);
+ up_write(&sbi->cp_global_sem);
/* let's drop all the directory inodes for clean checkpoint */
destroy_fsync_dnodes(&dir_list, err);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 87f7a6e..e33b0da 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3552,7 +3552,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
sbi->valid_super_block = valid_super_block;
init_rwsem(&sbi->gc_lock);
mutex_init(&sbi->writepages);
- mutex_init(&sbi->cp_mutex);
+ init_rwsem(&sbi->cp_global_sem);
init_rwsem(&sbi->node_write);
init_rwsem(&sbi->node_change);
--
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.
From: Sahitya Tummala
> Sent: 23 November 2020 05:29
>
> Use rwsem to ensure serialization of the callers and to avoid
> starvation of high priority tasks, when the system is under
> heavy IO workload.
I can't see any read lock requests.
So why the change?
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
On 2020/11/23 13:28, Sahitya Tummala wrote:
> Use rwsem to ensure serialization of the callers and to avoid
> starvation of high priority tasks, when the system is under
> heavy IO workload.
>
> Signed-off-by: Sahitya Tummala <[email protected]>
Reviewed-by: Chao Yu <[email protected]>
Thanks,
On 2020/11/24 1:05, David Laight wrote:
> From: Sahitya Tummala
>> Sent: 23 November 2020 05:29
>>
>> Use rwsem to ensure serialization of the callers and to avoid
>> starvation of high priority tasks, when the system is under
>> heavy IO workload.
>
> I can't see any read lock requests.
>
> So why the change?
Hi David,
You can check the context of this patch in below link:
https://lore.kernel.org/linux-f2fs-devel/[email protected]/T/#t
BTW, the root cause here is that mutex lock won't serialize callers, so there
could be potential starvation problem when this lock is always grabbed by high
priority tasks.
Thanks,
From: Chao Yu
> Sent: 24 November 2020 03:12
>
> On 2020/11/24 1:05, David Laight wrote:
> > From: Sahitya Tummala
> >> Sent: 23 November 2020 05:29
> >>
> >> Use rwsem to ensure serialization of the callers and to avoid
> >> starvation of high priority tasks, when the system is under
> >> heavy IO workload.
> >
> > I can't see any read lock requests.
> >
> > So why the change?
>
> Hi David,
>
> You can check the context of this patch in below link:
>
> https://lore.kernel.org/linux-f2fs-devel/[email protected]/T/#t
>
> BTW, the root cause here is that mutex lock won't serialize callers, so there
> could be potential starvation problem when this lock is always grabbed by high
> priority tasks.
That doesn't seem right.
If I read the above correctly it was high priority tasks that were
being 'starved' precisely because mutex lock serializes wakers.
If you have a lock that is contended so much that it is held 100%
of the time you need a different locking strategy.
IIRC mutex locks are 'ticket' locks so that only one thread is woken
each time the mutex is released, and they are woken in the order
they went to sleep.
While this behaviour might not be the one you want, relying on
rwsem (which might happen currently to work differently) doesn't
seem the correct long term fix.
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Hi David,
On Tue, Nov 24, 2020 at 09:12:12AM +0000, David Laight wrote:
> From: Chao Yu
> > Sent: 24 November 2020 03:12
> >
> > On 2020/11/24 1:05, David Laight wrote:
> > > From: Sahitya Tummala
> > >> Sent: 23 November 2020 05:29
> > >>
> > >> Use rwsem to ensure serialization of the callers and to avoid
> > >> starvation of high priority tasks, when the system is under
> > >> heavy IO workload.
> > >
> > > I can't see any read lock requests.
> > >
> > > So why the change?
> >
> > Hi David,
> >
> > You can check the context of this patch in below link:
> >
> > https://lore.kernel.org/linux-f2fs-devel/[email protected]/T/#t
> >
> > BTW, the root cause here is that mutex lock won't serialize callers, so there
> > could be potential starvation problem when this lock is always grabbed by high
> > priority tasks.
>
> That doesn't seem right.
>
> If I read the above correctly it was high priority tasks that were
> being 'starved' precisely because mutex lock serializes wakers.
Actually it can happen for any random task irrespective of the priority.
In my case, I was observing that the thread that went to sleep first is
not able to acquire the lock first and other new threads that came in
just around the mutex unlock time were acquiring the lock.
>
> If you have a lock that is contended so much that it is held 100%
> of the time you need a different locking strategy.
>
> IIRC mutex locks are 'ticket' locks so that only one thread is woken
> each time the mutex is released, and they are woken in the order
> they went to sleep.
AFAIK mutex locks doesn't *strictly* enforce FIFO order. The lock is released
before waking the first waiting task. The waiting task has to run to claim
the lock. So the lock is available for other tasks in this *short* window.
Thanks,
>
> While this behaviour might not be the one you want, relying on
> rwsem (which might happen currently to work differently) doesn't
> seem the correct long term fix.
>
> David
>
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
--
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.