2020-06-24 01:24:21

by Jaegeuk Kim

[permalink] [raw]
Subject: [PATCH] f2fs: avoid readahead race condition

If two readahead threads having same offset enter in readpages, every read
IOs are split and issued to the disk which giving lower bandwidth.

This patch tries to avoid redundant readahead calls.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
fs/f2fs/data.c | 15 +++++++++++++++
fs/f2fs/f2fs.h | 1 +
fs/f2fs/super.c | 2 ++
3 files changed, 18 insertions(+)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index dfd3225153570..1886d83bc5f15 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2292,6 +2292,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
unsigned nr_pages = rac ? readahead_count(rac) : 1;
unsigned max_nr_pages = nr_pages;
int ret = 0;
+ bool drop_ra = false;

map.m_pblk = 0;
map.m_lblk = 0;
@@ -2302,6 +2303,17 @@ static int f2fs_mpage_readpages(struct inode *inode,
map.m_seg_type = NO_CHECK_TYPE;
map.m_may_create = false;

+ /*
+ * Two readahead threads for same address range can cause race condition
+ * which fragments sequential read IOs. So let's avoid each other.
+ */
+ if (rac && readahead_count(rac)) {
+ if (F2FS_I(inode)->ra_offset == readahead_index(rac))
+ drop_ra = true;
+ else
+ F2FS_I(inode)->ra_offset = readahead_index(rac);
+ }
+
for (; nr_pages; nr_pages--) {
if (rac) {
page = readahead_page(rac);
@@ -2368,6 +2380,9 @@ static int f2fs_mpage_readpages(struct inode *inode,
}
if (bio)
__submit_bio(F2FS_I_SB(inode), bio, DATA);
+
+ if (rac && readahead_count(rac) && !drop_ra)
+ F2FS_I(inode)->ra_offset = -1;
return ret;
}

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7fb2a1a334388..753782426feac 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -809,6 +809,7 @@ struct f2fs_inode_info {
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct task_struct *inmem_task; /* store inmemory task */
struct mutex inmem_lock; /* lock for inmemory pages */
+ pgoff_t ra_offset; /* ongoing readahead offset */
struct extent_tree *extent_tree; /* cached extent_tree entry */

/* avoid racing between foreground op and gc */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 7326522057378..80cb7cd358f84 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1015,6 +1015,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
/* Will be used by directory only */
fi->i_dir_level = F2FS_SB(sb)->dir_level;

+ fi->ra_offset = -1;
+
return &fi->vfs_inode;
}

--
2.27.0.111.gc72c7da667-goog


2020-06-28 02:36:37

by Chao Yu

[permalink] [raw]
Subject: Re: [f2fs-dev] [PATCH] f2fs: avoid readahead race condition

On 2020/6/24 9:21, Jaegeuk Kim wrote:
> If two readahead threads having same offset enter in readpages, every read
> IOs are split and issued to the disk which giving lower bandwidth.
>
> This patch tries to avoid redundant readahead calls.
>
> Signed-off-by: Jaegeuk Kim <[email protected]>
> ---
> fs/f2fs/data.c | 15 +++++++++++++++
> fs/f2fs/f2fs.h | 1 +
> fs/f2fs/super.c | 2 ++
> 3 files changed, 18 insertions(+)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index dfd3225153570..1886d83bc5f15 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -2292,6 +2292,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
> unsigned nr_pages = rac ? readahead_count(rac) : 1;
> unsigned max_nr_pages = nr_pages;
> int ret = 0;
> + bool drop_ra = false;
>
> map.m_pblk = 0;
> map.m_lblk = 0;
> @@ -2302,6 +2303,17 @@ static int f2fs_mpage_readpages(struct inode *inode,
> map.m_seg_type = NO_CHECK_TYPE;
> map.m_may_create = false;
>
> + /*
> + * Two readahead threads for same address range can cause race condition
> + * which fragments sequential read IOs. So let's avoid each other.
> + */
> + if (rac && readahead_count(rac)) {
> + if (F2FS_I(inode)->ra_offset == readahead_index(rac))
> + drop_ra = true;

I guess you missed to return at somewhere when drop_ra is true?

thanks,

> + else
> + F2FS_I(inode)->ra_offset = readahead_index(rac);
> + }
> +
> for (; nr_pages; nr_pages--) {
> if (rac) {
> page = readahead_page(rac);
> @@ -2368,6 +2380,9 @@ static int f2fs_mpage_readpages(struct inode *inode,
> }
> if (bio)
> __submit_bio(F2FS_I_SB(inode), bio, DATA);
> +
> + if (rac && readahead_count(rac) && !drop_ra)
> + F2FS_I(inode)->ra_offset = -1;
> return ret;
> }
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 7fb2a1a334388..753782426feac 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -809,6 +809,7 @@ struct f2fs_inode_info {
> struct list_head inmem_pages; /* inmemory pages managed by f2fs */
> struct task_struct *inmem_task; /* store inmemory task */
> struct mutex inmem_lock; /* lock for inmemory pages */
> + pgoff_t ra_offset; /* ongoing readahead offset */
> struct extent_tree *extent_tree; /* cached extent_tree entry */
>
> /* avoid racing between foreground op and gc */
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 7326522057378..80cb7cd358f84 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1015,6 +1015,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
> /* Will be used by directory only */
> fi->i_dir_level = F2FS_SB(sb)->dir_level;
>
> + fi->ra_offset = -1;
> +
> return &fi->vfs_inode;
> }
>
>

2020-06-29 21:35:43

by Jaegeuk Kim

[permalink] [raw]
Subject: Re: [PATCH v2] f2fs: avoid readahead race condition

If two readahead threads having same offset enter in readpages, every read
IOs are split and issued to the disk which giving lower bandwidth.

This patch tries to avoid redundant readahead calls.

Signed-off-by: Jaegeuk Kim <[email protected]>
---
v2:
- add missing code to bypass read

fs/f2fs/data.c | 18 +++++++++++++++++-
fs/f2fs/f2fs.h | 1 +
fs/f2fs/super.c | 2 ++
3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index d6094b9f3916..9b69a159cc6c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2403,6 +2403,7 @@ int f2fs_mpage_readpages(struct address_space *mapping,
#endif
unsigned max_nr_pages = nr_pages;
int ret = 0;
+ bool drop_ra = false;

map.m_pblk = 0;
map.m_lblk = 0;
@@ -2413,13 +2414,25 @@ int f2fs_mpage_readpages(struct address_space *mapping,
map.m_seg_type = NO_CHECK_TYPE;
map.m_may_create = false;

+ /*
+ * Two readahead threads for same address range can cause race condition
+ * which fragments sequential read IOs. So let's avoid each other.
+ */
+ if (pages && is_readahead) {
+ page = list_last_entry(pages, struct page, lru);
+ if (F2FS_I(inode)->ra_offset == page_index(page))
+ drop_ra = true;
+ else
+ F2FS_I(inode)->ra_offset = page_index(page);
+ }
+
for (; nr_pages; nr_pages--) {
if (pages) {
page = list_last_entry(pages, struct page, lru);

prefetchw(&page->flags);
list_del(&page->lru);
- if (add_to_page_cache_lru(page, mapping,
+ if (drop_ra || add_to_page_cache_lru(page, mapping,
page_index(page),
readahead_gfp_mask(mapping)))
goto next_page;
@@ -2484,6 +2497,9 @@ int f2fs_mpage_readpages(struct address_space *mapping,
BUG_ON(pages && !list_empty(pages));
if (bio)
__f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
+
+ if (pages && is_readahead && !drop_ra)
+ F2FS_I(inode)->ra_offset = -1;
return pages ? 0 : ret;
}

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 35afa13124b8..a95f84d72a55 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -806,6 +806,7 @@ struct f2fs_inode_info {
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct task_struct *inmem_task; /* store inmemory task */
struct mutex inmem_lock; /* lock for inmemory pages */
+ pgoff_t ra_offset; /* ongoing readahead offset */
struct extent_tree *extent_tree; /* cached extent_tree entry */

/* avoid racing between foreground op and gc */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 0e860186a9c5..6fd2ad43d9e4 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1011,6 +1011,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
/* Will be used by directory only */
fi->i_dir_level = F2FS_SB(sb)->dir_level;

+ fi->ra_offset = -1;
+
return &fi->vfs_inode;
}

--
2.27.0.111.gc72c7da667-goog