Quoted from Jing Xia's report, there is a potential deadlock may happen
between kworker and checkpoint as below:
[T:writeback] [T:checkpoint]
- wb_writeback
- blk_start_plug
bio contains NodeA was plugged in writeback threads
- do_writepages -- sync write inodeB, inc wb_sync_req[DATA]
- f2fs_write_data_pages
- f2fs_write_single_data_page -- write last dirty page
- f2fs_do_write_data_page
- set_page_writeback -- clear page dirty flag and
PAGECACHE_TAG_DIRTY tag in radix tree
- f2fs_outplace_write_data
- f2fs_update_data_blkaddr
- f2fs_wait_on_page_writeback -- wait NodeA to writeback here
- inode_dec_dirty_pages
- writeback_sb_inodes
- writeback_single_inode
- do_writepages
- f2fs_write_data_pages -- skip writepages due to wb_sync_req[DATA]
- wbc->pages_skipped += get_dirty_pages() -- PAGECACHE_TAG_DIRTY is not set but get_dirty_pages() returns one
- requeue_inode -- requeue inode to wb->b_dirty queue due to non-zero.pages_skipped
- blk_finish_plug
Let's try to avoid deadlock condition by forcing unplugging previous bio via
blk_finish_plug(current->plug) once we'v skipped writeback in writepages()
due to valid sbi->wb_sync_req[DATA/NODE].
Fixes: 687de7f1010c ("f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE")
Signed-off-by: Zhiguo Niu <[email protected]>
Signed-off-by: Jing Xia <[email protected]>
Signed-off-by: Chao Yu <[email protected]>
---
fs/f2fs/data.c | 6 +++++-
fs/f2fs/node.c | 6 +++++-
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 76d6fe7b0c8f..932a4c81acaf 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3174,8 +3174,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
if (wbc->sync_mode == WB_SYNC_ALL)
atomic_inc(&sbi->wb_sync_req[DATA]);
- else if (atomic_read(&sbi->wb_sync_req[DATA]))
+ else if (atomic_read(&sbi->wb_sync_req[DATA])) {
+ /* to avoid potential deadlock */
+ if (current->plug)
+ blk_finish_plug(current->plug);
goto skip_write;
+ }
if (__should_serialize_io(inode, wbc)) {
mutex_lock(&sbi->writepages);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 556fcd8457f3..69c6bcaf5aae 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2106,8 +2106,12 @@ static int f2fs_write_node_pages(struct address_space *mapping,
if (wbc->sync_mode == WB_SYNC_ALL)
atomic_inc(&sbi->wb_sync_req[NODE]);
- else if (atomic_read(&sbi->wb_sync_req[NODE]))
+ else if (atomic_read(&sbi->wb_sync_req[NODE])) {
+ /* to avoid potential deadlock */
+ if (current->plug)
+ blk_finish_plug(current->plug);
goto skip_write;
+ }
trace_f2fs_writepages(mapping->host, wbc, NODE);
--
2.32.0
On 01/27, Chao Yu wrote:
> Quoted from Jing Xia's report, there is a potential deadlock may happen
> between kworker and checkpoint as below:
>
> [T:writeback] [T:checkpoint]
> - wb_writeback
> - blk_start_plug
> bio contains NodeA was plugged in writeback threads
I'm still trying to understand more precisely. So, how is it possible to
have bio having node write in this current context?
> - do_writepages -- sync write inodeB, inc wb_sync_req[DATA]
> - f2fs_write_data_pages
> - f2fs_write_single_data_page -- write last dirty page
> - f2fs_do_write_data_page
> - set_page_writeback -- clear page dirty flag and
> PAGECACHE_TAG_DIRTY tag in radix tree
> - f2fs_outplace_write_data
> - f2fs_update_data_blkaddr
> - f2fs_wait_on_page_writeback -- wait NodeA to writeback here
> - inode_dec_dirty_pages
> - writeback_sb_inodes
> - writeback_single_inode
> - do_writepages
> - f2fs_write_data_pages -- skip writepages due to wb_sync_req[DATA]
> - wbc->pages_skipped += get_dirty_pages() -- PAGECACHE_TAG_DIRTY is not set but get_dirty_pages() returns one
> - requeue_inode -- requeue inode to wb->b_dirty queue due to non-zero.pages_skipped
> - blk_finish_plug
>
> Let's try to avoid deadlock condition by forcing unplugging previous bio via
> blk_finish_plug(current->plug) once we'v skipped writeback in writepages()
> due to valid sbi->wb_sync_req[DATA/NODE].
>
> Fixes: 687de7f1010c ("f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE")
> Signed-off-by: Zhiguo Niu <[email protected]>
> Signed-off-by: Jing Xia <[email protected]>
> Signed-off-by: Chao Yu <[email protected]>
> ---
> fs/f2fs/data.c | 6 +++++-
> fs/f2fs/node.c | 6 +++++-
> 2 files changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 76d6fe7b0c8f..932a4c81acaf 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -3174,8 +3174,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
> if (wbc->sync_mode == WB_SYNC_ALL)
> atomic_inc(&sbi->wb_sync_req[DATA]);
> - else if (atomic_read(&sbi->wb_sync_req[DATA]))
> + else if (atomic_read(&sbi->wb_sync_req[DATA])) {
> + /* to avoid potential deadlock */
> + if (current->plug)
> + blk_finish_plug(current->plug);
> goto skip_write;
> + }
>
> if (__should_serialize_io(inode, wbc)) {
> mutex_lock(&sbi->writepages);
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 556fcd8457f3..69c6bcaf5aae 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -2106,8 +2106,12 @@ static int f2fs_write_node_pages(struct address_space *mapping,
>
> if (wbc->sync_mode == WB_SYNC_ALL)
> atomic_inc(&sbi->wb_sync_req[NODE]);
> - else if (atomic_read(&sbi->wb_sync_req[NODE]))
> + else if (atomic_read(&sbi->wb_sync_req[NODE])) {
> + /* to avoid potential deadlock */
> + if (current->plug)
> + blk_finish_plug(current->plug);
> goto skip_write;
> + }
>
> trace_f2fs_writepages(mapping->host, wbc, NODE);
>
> --
> 2.32.0
On 2022/1/28 5:59, Jaegeuk Kim wrote:
> On 01/27, Chao Yu wrote:
>> Quoted from Jing Xia's report, there is a potential deadlock may happen
>> between kworker and checkpoint as below:
>>
>> [T:writeback] [T:checkpoint]
>> - wb_writeback
>> - blk_start_plug
>> bio contains NodeA was plugged in writeback threads
>
> I'm still trying to understand more precisely. So, how is it possible to
> have bio having node write in this current context?
IMO, after above blk_start_plug(), it may plug some inode's node page in kworker
during writebacking node_inode's data page (which should be node page)?
Thanks,
>
>> - do_writepages -- sync write inodeB, inc wb_sync_req[DATA]
>> - f2fs_write_data_pages
>> - f2fs_write_single_data_page -- write last dirty page
>> - f2fs_do_write_data_page
>> - set_page_writeback -- clear page dirty flag and
>> PAGECACHE_TAG_DIRTY tag in radix tree
>> - f2fs_outplace_write_data
>> - f2fs_update_data_blkaddr
>> - f2fs_wait_on_page_writeback -- wait NodeA to writeback here
>> - inode_dec_dirty_pages
>> - writeback_sb_inodes
>> - writeback_single_inode
>> - do_writepages
>> - f2fs_write_data_pages -- skip writepages due to wb_sync_req[DATA]
>> - wbc->pages_skipped += get_dirty_pages() -- PAGECACHE_TAG_DIRTY is not set but get_dirty_pages() returns one
>> - requeue_inode -- requeue inode to wb->b_dirty queue due to non-zero.pages_skipped
>> - blk_finish_plug
>>
>> Let's try to avoid deadlock condition by forcing unplugging previous bio via
>> blk_finish_plug(current->plug) once we'v skipped writeback in writepages()
>> due to valid sbi->wb_sync_req[DATA/NODE].
>>
>> Fixes: 687de7f1010c ("f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE")
>> Signed-off-by: Zhiguo Niu <[email protected]>
>> Signed-off-by: Jing Xia <[email protected]>
>> Signed-off-by: Chao Yu <[email protected]>
>> ---
>> fs/f2fs/data.c | 6 +++++-
>> fs/f2fs/node.c | 6 +++++-
>> 2 files changed, 10 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>> index 76d6fe7b0c8f..932a4c81acaf 100644
>> --- a/fs/f2fs/data.c
>> +++ b/fs/f2fs/data.c
>> @@ -3174,8 +3174,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>> /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
>> if (wbc->sync_mode == WB_SYNC_ALL)
>> atomic_inc(&sbi->wb_sync_req[DATA]);
>> - else if (atomic_read(&sbi->wb_sync_req[DATA]))
>> + else if (atomic_read(&sbi->wb_sync_req[DATA])) {
>> + /* to avoid potential deadlock */
>> + if (current->plug)
>> + blk_finish_plug(current->plug);
>> goto skip_write;
>> + }
>>
>> if (__should_serialize_io(inode, wbc)) {
>> mutex_lock(&sbi->writepages);
>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
>> index 556fcd8457f3..69c6bcaf5aae 100644
>> --- a/fs/f2fs/node.c
>> +++ b/fs/f2fs/node.c
>> @@ -2106,8 +2106,12 @@ static int f2fs_write_node_pages(struct address_space *mapping,
>>
>> if (wbc->sync_mode == WB_SYNC_ALL)
>> atomic_inc(&sbi->wb_sync_req[NODE]);
>> - else if (atomic_read(&sbi->wb_sync_req[NODE]))
>> + else if (atomic_read(&sbi->wb_sync_req[NODE])) {
>> + /* to avoid potential deadlock */
>> + if (current->plug)
>> + blk_finish_plug(current->plug);
>> goto skip_write;
>> + }
>>
>> trace_f2fs_writepages(mapping->host, wbc, NODE);
>>
>> --
>> 2.32.0
On 01/28, Chao Yu wrote:
> On 2022/1/28 5:59, Jaegeuk Kim wrote:
> > On 01/27, Chao Yu wrote:
> > > Quoted from Jing Xia's report, there is a potential deadlock may happen
> > > between kworker and checkpoint as below:
> > >
> > > [T:writeback] [T:checkpoint]
> > > - wb_writeback
> > > - blk_start_plug
> > > bio contains NodeA was plugged in writeback threads
> >
> > I'm still trying to understand more precisely. So, how is it possible to
> > have bio having node write in this current context?
>
> IMO, after above blk_start_plug(), it may plug some inode's node page in kworker
> during writebacking node_inode's data page (which should be node page)?
Wasn't that added into a different task->plug?
>
> Thanks,
>
> >
> > > - do_writepages -- sync write inodeB, inc wb_sync_req[DATA]
> > > - f2fs_write_data_pages
> > > - f2fs_write_single_data_page -- write last dirty page
> > > - f2fs_do_write_data_page
> > > - set_page_writeback -- clear page dirty flag and
> > > PAGECACHE_TAG_DIRTY tag in radix tree
> > > - f2fs_outplace_write_data
> > > - f2fs_update_data_blkaddr
> > > - f2fs_wait_on_page_writeback -- wait NodeA to writeback here
> > > - inode_dec_dirty_pages
> > > - writeback_sb_inodes
> > > - writeback_single_inode
> > > - do_writepages
> > > - f2fs_write_data_pages -- skip writepages due to wb_sync_req[DATA]
> > > - wbc->pages_skipped += get_dirty_pages() -- PAGECACHE_TAG_DIRTY is not set but get_dirty_pages() returns one
> > > - requeue_inode -- requeue inode to wb->b_dirty queue due to non-zero.pages_skipped
> > > - blk_finish_plug
> > >
> > > Let's try to avoid deadlock condition by forcing unplugging previous bio via
> > > blk_finish_plug(current->plug) once we'v skipped writeback in writepages()
> > > due to valid sbi->wb_sync_req[DATA/NODE].
> > >
> > > Fixes: 687de7f1010c ("f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE")
> > > Signed-off-by: Zhiguo Niu <[email protected]>
> > > Signed-off-by: Jing Xia <[email protected]>
> > > Signed-off-by: Chao Yu <[email protected]>
> > > ---
> > > fs/f2fs/data.c | 6 +++++-
> > > fs/f2fs/node.c | 6 +++++-
> > > 2 files changed, 10 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > index 76d6fe7b0c8f..932a4c81acaf 100644
> > > --- a/fs/f2fs/data.c
> > > +++ b/fs/f2fs/data.c
> > > @@ -3174,8 +3174,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> > > /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
> > > if (wbc->sync_mode == WB_SYNC_ALL)
> > > atomic_inc(&sbi->wb_sync_req[DATA]);
> > > - else if (atomic_read(&sbi->wb_sync_req[DATA]))
> > > + else if (atomic_read(&sbi->wb_sync_req[DATA])) {
> > > + /* to avoid potential deadlock */
> > > + if (current->plug)
> > > + blk_finish_plug(current->plug);
> > > goto skip_write;
> > > + }
> > > if (__should_serialize_io(inode, wbc)) {
> > > mutex_lock(&sbi->writepages);
> > > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > > index 556fcd8457f3..69c6bcaf5aae 100644
> > > --- a/fs/f2fs/node.c
> > > +++ b/fs/f2fs/node.c
> > > @@ -2106,8 +2106,12 @@ static int f2fs_write_node_pages(struct address_space *mapping,
> > > if (wbc->sync_mode == WB_SYNC_ALL)
> > > atomic_inc(&sbi->wb_sync_req[NODE]);
> > > - else if (atomic_read(&sbi->wb_sync_req[NODE]))
> > > + else if (atomic_read(&sbi->wb_sync_req[NODE])) {
> > > + /* to avoid potential deadlock */
> > > + if (current->plug)
> > > + blk_finish_plug(current->plug);
> > > goto skip_write;
> > > + }
> > > trace_f2fs_writepages(mapping->host, wbc, NODE);
> > > --
> > > 2.32.0
On 2022/1/29 8:37, Jaegeuk Kim wrote:
> On 01/28, Chao Yu wrote:
>> On 2022/1/28 5:59, Jaegeuk Kim wrote:
>>> On 01/27, Chao Yu wrote:
>>>> Quoted from Jing Xia's report, there is a potential deadlock may happen
>>>> between kworker and checkpoint as below:
>>>>
>>>> [T:writeback] [T:checkpoint]
>>>> - wb_writeback
>>>> - blk_start_plug
>>>> bio contains NodeA was plugged in writeback threads
>>>
>>> I'm still trying to understand more precisely. So, how is it possible to
>>> have bio having node write in this current context?
>>
>> IMO, after above blk_start_plug(), it may plug some inode's node page in kworker
>> during writebacking node_inode's data page (which should be node page)?
>
> Wasn't that added into a different task->plug?
I'm not sure I've got your concern correctly...
Do you mean NodeA and other IOs from do_writepages() were plugged in
different local plug variables?
Thanks,
>
>>
>> Thanks,
>>
>>>
>>>> - do_writepages -- sync write inodeB, inc wb_sync_req[DATA]
>>>> - f2fs_write_data_pages
>>>> - f2fs_write_single_data_page -- write last dirty page
>>>> - f2fs_do_write_data_page
>>>> - set_page_writeback -- clear page dirty flag and
>>>> PAGECACHE_TAG_DIRTY tag in radix tree
>>>> - f2fs_outplace_write_data
>>>> - f2fs_update_data_blkaddr
>>>> - f2fs_wait_on_page_writeback -- wait NodeA to writeback here
>>>> - inode_dec_dirty_pages
>>>> - writeback_sb_inodes
>>>> - writeback_single_inode
>>>> - do_writepages
>>>> - f2fs_write_data_pages -- skip writepages due to wb_sync_req[DATA]
>>>> - wbc->pages_skipped += get_dirty_pages() -- PAGECACHE_TAG_DIRTY is not set but get_dirty_pages() returns one
>>>> - requeue_inode -- requeue inode to wb->b_dirty queue due to non-zero.pages_skipped
>>>> - blk_finish_plug
>>>>
>>>> Let's try to avoid deadlock condition by forcing unplugging previous bio via
>>>> blk_finish_plug(current->plug) once we'v skipped writeback in writepages()
>>>> due to valid sbi->wb_sync_req[DATA/NODE].
>>>>
>>>> Fixes: 687de7f1010c ("f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE")
>>>> Signed-off-by: Zhiguo Niu <[email protected]>
>>>> Signed-off-by: Jing Xia <[email protected]>
>>>> Signed-off-by: Chao Yu <[email protected]>
>>>> ---
>>>> fs/f2fs/data.c | 6 +++++-
>>>> fs/f2fs/node.c | 6 +++++-
>>>> 2 files changed, 10 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>> index 76d6fe7b0c8f..932a4c81acaf 100644
>>>> --- a/fs/f2fs/data.c
>>>> +++ b/fs/f2fs/data.c
>>>> @@ -3174,8 +3174,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>> /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
>>>> if (wbc->sync_mode == WB_SYNC_ALL)
>>>> atomic_inc(&sbi->wb_sync_req[DATA]);
>>>> - else if (atomic_read(&sbi->wb_sync_req[DATA]))
>>>> + else if (atomic_read(&sbi->wb_sync_req[DATA])) {
>>>> + /* to avoid potential deadlock */
>>>> + if (current->plug)
>>>> + blk_finish_plug(current->plug);
>>>> goto skip_write;
>>>> + }
>>>> if (__should_serialize_io(inode, wbc)) {
>>>> mutex_lock(&sbi->writepages);
>>>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
>>>> index 556fcd8457f3..69c6bcaf5aae 100644
>>>> --- a/fs/f2fs/node.c
>>>> +++ b/fs/f2fs/node.c
>>>> @@ -2106,8 +2106,12 @@ static int f2fs_write_node_pages(struct address_space *mapping,
>>>> if (wbc->sync_mode == WB_SYNC_ALL)
>>>> atomic_inc(&sbi->wb_sync_req[NODE]);
>>>> - else if (atomic_read(&sbi->wb_sync_req[NODE]))
>>>> + else if (atomic_read(&sbi->wb_sync_req[NODE])) {
>>>> + /* to avoid potential deadlock */
>>>> + if (current->plug)
>>>> + blk_finish_plug(current->plug);
>>>> goto skip_write;
>>>> + }
>>>> trace_f2fs_writepages(mapping->host, wbc, NODE);
>>>> --
>>>> 2.32.0
On 01/29, Chao Yu wrote:
> On 2022/1/29 8:37, Jaegeuk Kim wrote:
> > On 01/28, Chao Yu wrote:
> > > On 2022/1/28 5:59, Jaegeuk Kim wrote:
> > > > On 01/27, Chao Yu wrote:
> > > > > Quoted from Jing Xia's report, there is a potential deadlock may happen
> > > > > between kworker and checkpoint as below:
> > > > >
> > > > > [T:writeback] [T:checkpoint]
> > > > > - wb_writeback
> > > > > - blk_start_plug
> > > > > bio contains NodeA was plugged in writeback threads
> > > >
> > > > I'm still trying to understand more precisely. So, how is it possible to
> > > > have bio having node write in this current context?
> > >
> > > IMO, after above blk_start_plug(), it may plug some inode's node page in kworker
> > > during writebacking node_inode's data page (which should be node page)?
> >
> > Wasn't that added into a different task->plug?
>
> I'm not sure I've got your concern correctly...
>
> Do you mean NodeA and other IOs from do_writepages() were plugged in
> different local plug variables?
I think so.
>
> Thanks,
>
> >
> > >
> > > Thanks,
> > >
> > > >
> > > > > - do_writepages -- sync write inodeB, inc wb_sync_req[DATA]
> > > > > - f2fs_write_data_pages
> > > > > - f2fs_write_single_data_page -- write last dirty page
> > > > > - f2fs_do_write_data_page
> > > > > - set_page_writeback -- clear page dirty flag and
> > > > > PAGECACHE_TAG_DIRTY tag in radix tree
> > > > > - f2fs_outplace_write_data
> > > > > - f2fs_update_data_blkaddr
> > > > > - f2fs_wait_on_page_writeback -- wait NodeA to writeback here
> > > > > - inode_dec_dirty_pages
> > > > > - writeback_sb_inodes
> > > > > - writeback_single_inode
> > > > > - do_writepages
> > > > > - f2fs_write_data_pages -- skip writepages due to wb_sync_req[DATA]
> > > > > - wbc->pages_skipped += get_dirty_pages() -- PAGECACHE_TAG_DIRTY is not set but get_dirty_pages() returns one
> > > > > - requeue_inode -- requeue inode to wb->b_dirty queue due to non-zero.pages_skipped
> > > > > - blk_finish_plug
> > > > >
> > > > > Let's try to avoid deadlock condition by forcing unplugging previous bio via
> > > > > blk_finish_plug(current->plug) once we'v skipped writeback in writepages()
> > > > > due to valid sbi->wb_sync_req[DATA/NODE].
> > > > >
> > > > > Fixes: 687de7f1010c ("f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE")
> > > > > Signed-off-by: Zhiguo Niu <[email protected]>
> > > > > Signed-off-by: Jing Xia <[email protected]>
> > > > > Signed-off-by: Chao Yu <[email protected]>
> > > > > ---
> > > > > fs/f2fs/data.c | 6 +++++-
> > > > > fs/f2fs/node.c | 6 +++++-
> > > > > 2 files changed, 10 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > > > index 76d6fe7b0c8f..932a4c81acaf 100644
> > > > > --- a/fs/f2fs/data.c
> > > > > +++ b/fs/f2fs/data.c
> > > > > @@ -3174,8 +3174,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> > > > > /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
> > > > > if (wbc->sync_mode == WB_SYNC_ALL)
> > > > > atomic_inc(&sbi->wb_sync_req[DATA]);
> > > > > - else if (atomic_read(&sbi->wb_sync_req[DATA]))
> > > > > + else if (atomic_read(&sbi->wb_sync_req[DATA])) {
> > > > > + /* to avoid potential deadlock */
> > > > > + if (current->plug)
> > > > > + blk_finish_plug(current->plug);
> > > > > goto skip_write;
> > > > > + }
> > > > > if (__should_serialize_io(inode, wbc)) {
> > > > > mutex_lock(&sbi->writepages);
> > > > > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > > > > index 556fcd8457f3..69c6bcaf5aae 100644
> > > > > --- a/fs/f2fs/node.c
> > > > > +++ b/fs/f2fs/node.c
> > > > > @@ -2106,8 +2106,12 @@ static int f2fs_write_node_pages(struct address_space *mapping,
> > > > > if (wbc->sync_mode == WB_SYNC_ALL)
> > > > > atomic_inc(&sbi->wb_sync_req[NODE]);
> > > > > - else if (atomic_read(&sbi->wb_sync_req[NODE]))
> > > > > + else if (atomic_read(&sbi->wb_sync_req[NODE])) {
> > > > > + /* to avoid potential deadlock */
> > > > > + if (current->plug)
> > > > > + blk_finish_plug(current->plug);
> > > > > goto skip_write;
> > > > > + }
> > > > > trace_f2fs_writepages(mapping->host, wbc, NODE);
> > > > > --
> > > > > 2.32.0
On 2022/2/3 9:51, Jaegeuk Kim wrote:
> On 01/29, Chao Yu wrote:
>> On 2022/1/29 8:37, Jaegeuk Kim wrote:
>>> On 01/28, Chao Yu wrote:
>>>> On 2022/1/28 5:59, Jaegeuk Kim wrote:
>>>>> On 01/27, Chao Yu wrote:
>>>>>> Quoted from Jing Xia's report, there is a potential deadlock may happen
>>>>>> between kworker and checkpoint as below:
>>>>>>
>>>>>> [T:writeback] [T:checkpoint]
>>>>>> - wb_writeback
>>>>>> - blk_start_plug
>>>>>> bio contains NodeA was plugged in writeback threads
>>>>>
>>>>> I'm still trying to understand more precisely. So, how is it possible to
>>>>> have bio having node write in this current context?
>>>>
>>>> IMO, after above blk_start_plug(), it may plug some inode's node page in kworker
>>>> during writebacking node_inode's data page (which should be node page)?
>>>
>>> Wasn't that added into a different task->plug?
>>
>> I'm not sure I've got your concern correctly...
>>
>> Do you mean NodeA and other IOs from do_writepages() were plugged in
>> different local plug variables?
>
> I think so.
I guess block plug helper says it doesn't allow to use nested plug, so there
is only one plug in kworker thread?
void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
{
struct task_struct *tsk = current;
/*
* If this is a nested plug, don't actually assign it.
*/
if (tsk->plug)
return;
...
}
Thanks,
>
>>
>> Thanks,
>>
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>> - do_writepages -- sync write inodeB, inc wb_sync_req[DATA]
>>>>>> - f2fs_write_data_pages
>>>>>> - f2fs_write_single_data_page -- write last dirty page
>>>>>> - f2fs_do_write_data_page
>>>>>> - set_page_writeback -- clear page dirty flag and
>>>>>> PAGECACHE_TAG_DIRTY tag in radix tree
>>>>>> - f2fs_outplace_write_data
>>>>>> - f2fs_update_data_blkaddr
>>>>>> - f2fs_wait_on_page_writeback -- wait NodeA to writeback here
>>>>>> - inode_dec_dirty_pages
>>>>>> - writeback_sb_inodes
>>>>>> - writeback_single_inode
>>>>>> - do_writepages
>>>>>> - f2fs_write_data_pages -- skip writepages due to wb_sync_req[DATA]
>>>>>> - wbc->pages_skipped += get_dirty_pages() -- PAGECACHE_TAG_DIRTY is not set but get_dirty_pages() returns one
>>>>>> - requeue_inode -- requeue inode to wb->b_dirty queue due to non-zero.pages_skipped
>>>>>> - blk_finish_plug
>>>>>>
>>>>>> Let's try to avoid deadlock condition by forcing unplugging previous bio via
>>>>>> blk_finish_plug(current->plug) once we'v skipped writeback in writepages()
>>>>>> due to valid sbi->wb_sync_req[DATA/NODE].
>>>>>>
>>>>>> Fixes: 687de7f1010c ("f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE")
>>>>>> Signed-off-by: Zhiguo Niu <[email protected]>
>>>>>> Signed-off-by: Jing Xia <[email protected]>
>>>>>> Signed-off-by: Chao Yu <[email protected]>
>>>>>> ---
>>>>>> fs/f2fs/data.c | 6 +++++-
>>>>>> fs/f2fs/node.c | 6 +++++-
>>>>>> 2 files changed, 10 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>> index 76d6fe7b0c8f..932a4c81acaf 100644
>>>>>> --- a/fs/f2fs/data.c
>>>>>> +++ b/fs/f2fs/data.c
>>>>>> @@ -3174,8 +3174,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>> /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
>>>>>> if (wbc->sync_mode == WB_SYNC_ALL)
>>>>>> atomic_inc(&sbi->wb_sync_req[DATA]);
>>>>>> - else if (atomic_read(&sbi->wb_sync_req[DATA]))
>>>>>> + else if (atomic_read(&sbi->wb_sync_req[DATA])) {
>>>>>> + /* to avoid potential deadlock */
>>>>>> + if (current->plug)
>>>>>> + blk_finish_plug(current->plug);
>>>>>> goto skip_write;
>>>>>> + }
>>>>>> if (__should_serialize_io(inode, wbc)) {
>>>>>> mutex_lock(&sbi->writepages);
>>>>>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
>>>>>> index 556fcd8457f3..69c6bcaf5aae 100644
>>>>>> --- a/fs/f2fs/node.c
>>>>>> +++ b/fs/f2fs/node.c
>>>>>> @@ -2106,8 +2106,12 @@ static int f2fs_write_node_pages(struct address_space *mapping,
>>>>>> if (wbc->sync_mode == WB_SYNC_ALL)
>>>>>> atomic_inc(&sbi->wb_sync_req[NODE]);
>>>>>> - else if (atomic_read(&sbi->wb_sync_req[NODE]))
>>>>>> + else if (atomic_read(&sbi->wb_sync_req[NODE])) {
>>>>>> + /* to avoid potential deadlock */
>>>>>> + if (current->plug)
>>>>>> + blk_finish_plug(current->plug);
>>>>>> goto skip_write;
>>>>>> + }
>>>>>> trace_f2fs_writepages(mapping->host, wbc, NODE);
>>>>>> --
>>>>>> 2.32.0
On 2022/2/3 22:57, Chao Yu wrote:
> On 2022/2/3 9:51, Jaegeuk Kim wrote:
>> On 01/29, Chao Yu wrote:
>>> On 2022/1/29 8:37, Jaegeuk Kim wrote:
>>>> On 01/28, Chao Yu wrote:
>>>>> On 2022/1/28 5:59, Jaegeuk Kim wrote:
>>>>>> On 01/27, Chao Yu wrote:
>>>>>>> Quoted from Jing Xia's report, there is a potential deadlock may happen
>>>>>>> between kworker and checkpoint as below:
>>>>>>>
>>>>>>> [T:writeback] [T:checkpoint]
>>>>>>> - wb_writeback
>>>>>>> - blk_start_plug
>>>>>>> bio contains NodeA was plugged in writeback threads
>>>>>>
>>>>>> I'm still trying to understand more precisely. So, how is it possible to
>>>>>> have bio having node write in this current context?
>>>>>
>>>>> IMO, after above blk_start_plug(), it may plug some inode's node page in kworker
>>>>> during writebacking node_inode's data page (which should be node page)?
>>>>
>>>> Wasn't that added into a different task->plug?
>>>
>>> I'm not sure I've got your concern correctly...
>>>
>>> Do you mean NodeA and other IOs from do_writepages() were plugged in
>>> different local plug variables?
>>
>> I think so.
>
> I guess block plug helper says it doesn't allow to use nested plug, so there
> is only one plug in kworker thread?
>
> void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
> {
> struct task_struct *tsk = current;
>
> /*
> * If this is a nested plug, don't actually assign it.
> */
> if (tsk->plug)
> return;
> ...
> }
Any further comments?
Thanks,
>
> Thanks,
>
>>
>>>
>>> Thanks,
>>>
>>>>
>>>>>
>>>>> Thanks,
>>>>>
>>>>>>
>>>>>>> - do_writepages -- sync write inodeB, inc wb_sync_req[DATA]
>>>>>>> - f2fs_write_data_pages
>>>>>>> - f2fs_write_single_data_page -- write last dirty page
>>>>>>> - f2fs_do_write_data_page
>>>>>>> - set_page_writeback -- clear page dirty flag and
>>>>>>> PAGECACHE_TAG_DIRTY tag in radix tree
>>>>>>> - f2fs_outplace_write_data
>>>>>>> - f2fs_update_data_blkaddr
>>>>>>> - f2fs_wait_on_page_writeback -- wait NodeA to writeback here
>>>>>>> - inode_dec_dirty_pages
>>>>>>> - writeback_sb_inodes
>>>>>>> - writeback_single_inode
>>>>>>> - do_writepages
>>>>>>> - f2fs_write_data_pages -- skip writepages due to wb_sync_req[DATA]
>>>>>>> - wbc->pages_skipped += get_dirty_pages() -- PAGECACHE_TAG_DIRTY is not set but get_dirty_pages() returns one
>>>>>>> - requeue_inode -- requeue inode to wb->b_dirty queue due to non-zero.pages_skipped
>>>>>>> - blk_finish_plug
>>>>>>>
>>>>>>> Let's try to avoid deadlock condition by forcing unplugging previous bio via
>>>>>>> blk_finish_plug(current->plug) once we'v skipped writeback in writepages()
>>>>>>> due to valid sbi->wb_sync_req[DATA/NODE].
>>>>>>>
>>>>>>> Fixes: 687de7f1010c ("f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE")
>>>>>>> Signed-off-by: Zhiguo Niu <[email protected]>
>>>>>>> Signed-off-by: Jing Xia <[email protected]>
>>>>>>> Signed-off-by: Chao Yu <[email protected]>
>>>>>>> ---
>>>>>>> fs/f2fs/data.c | 6 +++++-
>>>>>>> fs/f2fs/node.c | 6 +++++-
>>>>>>> 2 files changed, 10 insertions(+), 2 deletions(-)
>>>>>>>
>>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>>> index 76d6fe7b0c8f..932a4c81acaf 100644
>>>>>>> --- a/fs/f2fs/data.c
>>>>>>> +++ b/fs/f2fs/data.c
>>>>>>> @@ -3174,8 +3174,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>>> /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
>>>>>>> if (wbc->sync_mode == WB_SYNC_ALL)
>>>>>>> atomic_inc(&sbi->wb_sync_req[DATA]);
>>>>>>> - else if (atomic_read(&sbi->wb_sync_req[DATA]))
>>>>>>> + else if (atomic_read(&sbi->wb_sync_req[DATA])) {
>>>>>>> + /* to avoid potential deadlock */
>>>>>>> + if (current->plug)
>>>>>>> + blk_finish_plug(current->plug);
>>>>>>> goto skip_write;
>>>>>>> + }
>>>>>>> if (__should_serialize_io(inode, wbc)) {
>>>>>>> mutex_lock(&sbi->writepages);
>>>>>>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
>>>>>>> index 556fcd8457f3..69c6bcaf5aae 100644
>>>>>>> --- a/fs/f2fs/node.c
>>>>>>> +++ b/fs/f2fs/node.c
>>>>>>> @@ -2106,8 +2106,12 @@ static int f2fs_write_node_pages(struct address_space *mapping,
>>>>>>> if (wbc->sync_mode == WB_SYNC_ALL)
>>>>>>> atomic_inc(&sbi->wb_sync_req[NODE]);
>>>>>>> - else if (atomic_read(&sbi->wb_sync_req[NODE]))
>>>>>>> + else if (atomic_read(&sbi->wb_sync_req[NODE])) {
>>>>>>> + /* to avoid potential deadlock */
>>>>>>> + if (current->plug)
>>>>>>> + blk_finish_plug(current->plug);
>>>>>>> goto skip_write;
>>>>>>> + }
>>>>>>> trace_f2fs_writepages(mapping->host, wbc, NODE);
>>>>>>> --
>>>>>>> 2.32.0
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
ping,
On 2022/2/25 11:02, Chao Yu wrote:
> On 2022/2/3 22:57, Chao Yu wrote:
>> On 2022/2/3 9:51, Jaegeuk Kim wrote:
>>> On 01/29, Chao Yu wrote:
>>>> On 2022/1/29 8:37, Jaegeuk Kim wrote:
>>>>> On 01/28, Chao Yu wrote:
>>>>>> On 2022/1/28 5:59, Jaegeuk Kim wrote:
>>>>>>> On 01/27, Chao Yu wrote:
>>>>>>>> Quoted from Jing Xia's report, there is a potential deadlock may happen
>>>>>>>> between kworker and checkpoint as below:
>>>>>>>>
>>>>>>>> [T:writeback] [T:checkpoint]
>>>>>>>> - wb_writeback
>>>>>>>> - blk_start_plug
>>>>>>>> bio contains NodeA was plugged in writeback threads
>>>>>>>
>>>>>>> I'm still trying to understand more precisely. So, how is it possible to
>>>>>>> have bio having node write in this current context?
>>>>>>
>>>>>> IMO, after above blk_start_plug(), it may plug some inode's node page in kworker
>>>>>> during writebacking node_inode's data page (which should be node page)?
>>>>>
>>>>> Wasn't that added into a different task->plug?
>>>>
>>>> I'm not sure I've got your concern correctly...
>>>>
>>>> Do you mean NodeA and other IOs from do_writepages() were plugged in
>>>> different local plug variables?
>>>
>>> I think so.
>>
>> I guess block plug helper says it doesn't allow to use nested plug, so there
>> is only one plug in kworker thread?
>>
>> void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
>> {
>> struct task_struct *tsk = current;
>>
>> /*
>> * If this is a nested plug, don't actually assign it.
>> */
>> if (tsk->plug)
>> return;
>> ...
>> }
>
> Any further comments?
>
> Thanks,
>
>>
>> Thanks,
>>
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>>>
>>>>>>>> - do_writepages -- sync write inodeB, inc wb_sync_req[DATA]
>>>>>>>> - f2fs_write_data_pages
>>>>>>>> - f2fs_write_single_data_page -- write last dirty page
>>>>>>>> - f2fs_do_write_data_page
>>>>>>>> - set_page_writeback -- clear page dirty flag and
>>>>>>>> PAGECACHE_TAG_DIRTY tag in radix tree
>>>>>>>> - f2fs_outplace_write_data
>>>>>>>> - f2fs_update_data_blkaddr
>>>>>>>> - f2fs_wait_on_page_writeback -- wait NodeA to writeback here
>>>>>>>> - inode_dec_dirty_pages
>>>>>>>> - writeback_sb_inodes
>>>>>>>> - writeback_single_inode
>>>>>>>> - do_writepages
>>>>>>>> - f2fs_write_data_pages -- skip writepages due to wb_sync_req[DATA]
>>>>>>>> - wbc->pages_skipped += get_dirty_pages() -- PAGECACHE_TAG_DIRTY is not set but get_dirty_pages() returns one
>>>>>>>> - requeue_inode -- requeue inode to wb->b_dirty queue due to non-zero.pages_skipped
>>>>>>>> - blk_finish_plug
>>>>>>>>
>>>>>>>> Let's try to avoid deadlock condition by forcing unplugging previous bio via
>>>>>>>> blk_finish_plug(current->plug) once we'v skipped writeback in writepages()
>>>>>>>> due to valid sbi->wb_sync_req[DATA/NODE].
>>>>>>>>
>>>>>>>> Fixes: 687de7f1010c ("f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE")
>>>>>>>> Signed-off-by: Zhiguo Niu <[email protected]>
>>>>>>>> Signed-off-by: Jing Xia <[email protected]>
>>>>>>>> Signed-off-by: Chao Yu <[email protected]>
>>>>>>>> ---
>>>>>>>> fs/f2fs/data.c | 6 +++++-
>>>>>>>> fs/f2fs/node.c | 6 +++++-
>>>>>>>> 2 files changed, 10 insertions(+), 2 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>>>> index 76d6fe7b0c8f..932a4c81acaf 100644
>>>>>>>> --- a/fs/f2fs/data.c
>>>>>>>> +++ b/fs/f2fs/data.c
>>>>>>>> @@ -3174,8 +3174,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>>>> /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
>>>>>>>> if (wbc->sync_mode == WB_SYNC_ALL)
>>>>>>>> atomic_inc(&sbi->wb_sync_req[DATA]);
>>>>>>>> - else if (atomic_read(&sbi->wb_sync_req[DATA]))
>>>>>>>> + else if (atomic_read(&sbi->wb_sync_req[DATA])) {
>>>>>>>> + /* to avoid potential deadlock */
>>>>>>>> + if (current->plug)
>>>>>>>> + blk_finish_plug(current->plug);
>>>>>>>> goto skip_write;
>>>>>>>> + }
>>>>>>>> if (__should_serialize_io(inode, wbc)) {
>>>>>>>> mutex_lock(&sbi->writepages);
>>>>>>>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
>>>>>>>> index 556fcd8457f3..69c6bcaf5aae 100644
>>>>>>>> --- a/fs/f2fs/node.c
>>>>>>>> +++ b/fs/f2fs/node.c
>>>>>>>> @@ -2106,8 +2106,12 @@ static int f2fs_write_node_pages(struct address_space *mapping,
>>>>>>>> if (wbc->sync_mode == WB_SYNC_ALL)
>>>>>>>> atomic_inc(&sbi->wb_sync_req[NODE]);
>>>>>>>> - else if (atomic_read(&sbi->wb_sync_req[NODE]))
>>>>>>>> + else if (atomic_read(&sbi->wb_sync_req[NODE])) {
>>>>>>>> + /* to avoid potential deadlock */
>>>>>>>> + if (current->plug)
>>>>>>>> + blk_finish_plug(current->plug);
>>>>>>>> goto skip_write;
>>>>>>>> + }
>>>>>>>> trace_f2fs_writepages(mapping->host, wbc, NODE);
>>>>>>>> --
>>>>>>>> 2.32.0
>>
>>
>> _______________________________________________
>> Linux-f2fs-devel mailing list
>> [email protected]
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>
>
> _______________________________________________
> Linux-f2fs-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
On 03/02, Chao Yu wrote:
> ping,
>
> On 2022/2/25 11:02, Chao Yu wrote:
> > On 2022/2/3 22:57, Chao Yu wrote:
> > > On 2022/2/3 9:51, Jaegeuk Kim wrote:
> > > > On 01/29, Chao Yu wrote:
> > > > > On 2022/1/29 8:37, Jaegeuk Kim wrote:
> > > > > > On 01/28, Chao Yu wrote:
> > > > > > > On 2022/1/28 5:59, Jaegeuk Kim wrote:
> > > > > > > > On 01/27, Chao Yu wrote:
> > > > > > > > > Quoted from Jing Xia's report, there is a potential deadlock may happen
> > > > > > > > > between kworker and checkpoint as below:
> > > > > > > > >
> > > > > > > > > [T:writeback]??????????????? [T:checkpoint]
> > > > > > > > > - wb_writeback
> > > > > > > > > ??? - blk_start_plug
> > > > > > > > > bio contains NodeA was plugged in writeback threads
> > > > > > > >
> > > > > > > > I'm still trying to understand more precisely. So, how is it possible to
> > > > > > > > have bio having node write in this current context?
> > > > > > >
> > > > > > > IMO, after above blk_start_plug(), it may plug some inode's node page in kworker
> > > > > > > during writebacking node_inode's data page (which should be node page)?
> > > > > >
> > > > > > Wasn't that added into a different task->plug?
> > > > >
> > > > > I'm not sure I've got your concern correctly...
> > > > >
> > > > > Do you mean NodeA and other IOs from do_writepages() were plugged in
> > > > > different local plug variables?
> > > >
> > > > I think so.
> > >
> > > I guess block plug helper says it doesn't allow to use nested plug, so there
> > > is only one plug in kworker thread?
Is there only one kworker thread that flushes node and inode pages?
> > >
> > > void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
> > > {
> > > ?????struct task_struct *tsk = current;
> > >
> > > ?????/*
> > > ????? * If this is a nested plug, don't actually assign it.
> > > ????? */
> > > ?????if (tsk->plug)
> > > ???????? return;
> > > ...
> > > }
> >
> > Any further comments?
> >
> > Thanks,
> >
> > >
> > > Thanks,
> > >
> > > >
> > > > >
> > > > > Thanks,
> > > > >
> > > > > >
> > > > > > >
> > > > > > > Thanks,
> > > > > > >
> > > > > > > >
> > > > > > > > > ??????????????????? - do_writepages? -- sync write inodeB, inc wb_sync_req[DATA]
> > > > > > > > > ???????????????????? - f2fs_write_data_pages
> > > > > > > > > ????????????????????? - f2fs_write_single_data_page -- write last dirty page
> > > > > > > > > ?????????????????????? - f2fs_do_write_data_page
> > > > > > > > > ??????????????????????? - set_page_writeback? -- clear page dirty flag and
> > > > > > > > > ??????????????????????? PAGECACHE_TAG_DIRTY tag in radix tree
> > > > > > > > > ??????????????????????? - f2fs_outplace_write_data
> > > > > > > > > ???????????????????????? - f2fs_update_data_blkaddr
> > > > > > > > > ????????????????????????? - f2fs_wait_on_page_writeback -- wait NodeA to writeback here
> > > > > > > > > ?????????????????????? - inode_dec_dirty_pages
> > > > > > > > > ??? - writeback_sb_inodes
> > > > > > > > > ???? - writeback_single_inode
> > > > > > > > > ????? - do_writepages
> > > > > > > > > ?????? - f2fs_write_data_pages -- skip writepages due to wb_sync_req[DATA]
> > > > > > > > > ??????? - wbc->pages_skipped += get_dirty_pages() -- PAGECACHE_TAG_DIRTY is not set but get_dirty_pages() returns one
> > > > > > > > > ???? - requeue_inode -- requeue inode to wb->b_dirty queue due to non-zero.pages_skipped
> > > > > > > > > ??? - blk_finish_plug
> > > > > > > > >
> > > > > > > > > Let's try to avoid deadlock condition by forcing unplugging previous bio via
> > > > > > > > > blk_finish_plug(current->plug) once we'v skipped writeback in writepages()
> > > > > > > > > due to valid sbi->wb_sync_req[DATA/NODE].
> > > > > > > > >
> > > > > > > > > Fixes: 687de7f1010c ("f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE")
> > > > > > > > > Signed-off-by: Zhiguo Niu <[email protected]>
> > > > > > > > > Signed-off-by: Jing Xia <[email protected]>
> > > > > > > > > Signed-off-by: Chao Yu <[email protected]>
> > > > > > > > > ---
> > > > > > > > > ??? fs/f2fs/data.c | 6 +++++-
> > > > > > > > > ??? fs/f2fs/node.c | 6 +++++-
> > > > > > > > > ??? 2 files changed, 10 insertions(+), 2 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > > > > > > > > index 76d6fe7b0c8f..932a4c81acaf 100644
> > > > > > > > > --- a/fs/f2fs/data.c
> > > > > > > > > +++ b/fs/f2fs/data.c
> > > > > > > > > @@ -3174,8 +3174,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
> > > > > > > > > ??????? /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
> > > > > > > > > ??????? if (wbc->sync_mode == WB_SYNC_ALL)
> > > > > > > > > ??????????? atomic_inc(&sbi->wb_sync_req[DATA]);
> > > > > > > > > -??? else if (atomic_read(&sbi->wb_sync_req[DATA]))
> > > > > > > > > +??? else if (atomic_read(&sbi->wb_sync_req[DATA])) {
> > > > > > > > > +??????? /* to avoid potential deadlock */
> > > > > > > > > +??????? if (current->plug)
> > > > > > > > > +??????????? blk_finish_plug(current->plug);
> > > > > > > > > ??????????? goto skip_write;
> > > > > > > > > +??? }
> > > > > > > > > ??????? if (__should_serialize_io(inode, wbc)) {
> > > > > > > > > ??????????? mutex_lock(&sbi->writepages);
> > > > > > > > > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > > > > > > > > index 556fcd8457f3..69c6bcaf5aae 100644
> > > > > > > > > --- a/fs/f2fs/node.c
> > > > > > > > > +++ b/fs/f2fs/node.c
> > > > > > > > > @@ -2106,8 +2106,12 @@ static int f2fs_write_node_pages(struct address_space *mapping,
> > > > > > > > > ??????? if (wbc->sync_mode == WB_SYNC_ALL)
> > > > > > > > > ??????????? atomic_inc(&sbi->wb_sync_req[NODE]);
> > > > > > > > > -??? else if (atomic_read(&sbi->wb_sync_req[NODE]))
> > > > > > > > > +??? else if (atomic_read(&sbi->wb_sync_req[NODE])) {
> > > > > > > > > +??????? /* to avoid potential deadlock */
> > > > > > > > > +??????? if (current->plug)
> > > > > > > > > +??????????? blk_finish_plug(current->plug);
> > > > > > > > > ??????????? goto skip_write;
> > > > > > > > > +??? }
> > > > > > > > > ??????? trace_f2fs_writepages(mapping->host, wbc, NODE);
> > > > > > > > > --
> > > > > > > > > 2.32.0
> > >
> > >
> > > _______________________________________________
> > > Linux-f2fs-devel mailing list
> > > [email protected]
> > > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> >
> >
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > [email protected]
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
On 2022/3/2 13:26, Jaegeuk Kim wrote:
> On 03/02, Chao Yu wrote:
>> ping,
>>
>> On 2022/2/25 11:02, Chao Yu wrote:
>>> On 2022/2/3 22:57, Chao Yu wrote:
>>>> On 2022/2/3 9:51, Jaegeuk Kim wrote:
>>>>> On 01/29, Chao Yu wrote:
>>>>>> On 2022/1/29 8:37, Jaegeuk Kim wrote:
>>>>>>> On 01/28, Chao Yu wrote:
>>>>>>>> On 2022/1/28 5:59, Jaegeuk Kim wrote:
>>>>>>>>> On 01/27, Chao Yu wrote:
>>>>>>>>>> Quoted from Jing Xia's report, there is a potential deadlock may happen
>>>>>>>>>> between kworker and checkpoint as below:
>>>>>>>>>>
>>>>>>>>>> [T:writeback] [T:checkpoint]
>>>>>>>>>> - wb_writeback
>>>>>>>>>> - blk_start_plug
>>>>>>>>>> bio contains NodeA was plugged in writeback threads
>>>>>>>>>
>>>>>>>>> I'm still trying to understand more precisely. So, how is it possible to
>>>>>>>>> have bio having node write in this current context?
>>>>>>>>
>>>>>>>> IMO, after above blk_start_plug(), it may plug some inode's node page in kworker
>>>>>>>> during writebacking node_inode's data page (which should be node page)?
>>>>>>>
>>>>>>> Wasn't that added into a different task->plug?
>>>>>>
>>>>>> I'm not sure I've got your concern correctly...
>>>>>>
>>>>>> Do you mean NodeA and other IOs from do_writepages() were plugged in
>>>>>> different local plug variables?
>>>>>
>>>>> I think so.
>>>>
>>>> I guess block plug helper says it doesn't allow to use nested plug, so there
>>>> is only one plug in kworker thread?
>
> Is there only one kworker thread that flushes node and inode pages?
IIRC, =one kworker per block device?
Thanks,
>
>>>>
>>>> void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
>>>> {
>>>> struct task_struct *tsk = current;
>>>>
>>>> /*
>>>> * If this is a nested plug, don't actually assign it.
>>>> */
>>>> if (tsk->plug)
>>>> return;
>>>> ...
>>>> }
>>>
>>> Any further comments?
>>>
>>> Thanks,
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>>
>>>>>>>>>
>>>>>>>>>> - do_writepages -- sync write inodeB, inc wb_sync_req[DATA]
>>>>>>>>>> - f2fs_write_data_pages
>>>>>>>>>> - f2fs_write_single_data_page -- write last dirty page
>>>>>>>>>> - f2fs_do_write_data_page
>>>>>>>>>> - set_page_writeback -- clear page dirty flag and
>>>>>>>>>> PAGECACHE_TAG_DIRTY tag in radix tree
>>>>>>>>>> - f2fs_outplace_write_data
>>>>>>>>>> - f2fs_update_data_blkaddr
>>>>>>>>>> - f2fs_wait_on_page_writeback -- wait NodeA to writeback here
>>>>>>>>>> - inode_dec_dirty_pages
>>>>>>>>>> - writeback_sb_inodes
>>>>>>>>>> - writeback_single_inode
>>>>>>>>>> - do_writepages
>>>>>>>>>> - f2fs_write_data_pages -- skip writepages due to wb_sync_req[DATA]
>>>>>>>>>> - wbc->pages_skipped += get_dirty_pages() -- PAGECACHE_TAG_DIRTY is not set but get_dirty_pages() returns one
>>>>>>>>>> - requeue_inode -- requeue inode to wb->b_dirty queue due to non-zero.pages_skipped
>>>>>>>>>> - blk_finish_plug
>>>>>>>>>>
>>>>>>>>>> Let's try to avoid deadlock condition by forcing unplugging previous bio via
>>>>>>>>>> blk_finish_plug(current->plug) once we'v skipped writeback in writepages()
>>>>>>>>>> due to valid sbi->wb_sync_req[DATA/NODE].
>>>>>>>>>>
>>>>>>>>>> Fixes: 687de7f1010c ("f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE")
>>>>>>>>>> Signed-off-by: Zhiguo Niu <[email protected]>
>>>>>>>>>> Signed-off-by: Jing Xia <[email protected]>
>>>>>>>>>> Signed-off-by: Chao Yu <[email protected]>
>>>>>>>>>> ---
>>>>>>>>>> fs/f2fs/data.c | 6 +++++-
>>>>>>>>>> fs/f2fs/node.c | 6 +++++-
>>>>>>>>>> 2 files changed, 10 insertions(+), 2 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>>>>>> index 76d6fe7b0c8f..932a4c81acaf 100644
>>>>>>>>>> --- a/fs/f2fs/data.c
>>>>>>>>>> +++ b/fs/f2fs/data.c
>>>>>>>>>> @@ -3174,8 +3174,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
>>>>>>>>>> /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
>>>>>>>>>> if (wbc->sync_mode == WB_SYNC_ALL)
>>>>>>>>>> atomic_inc(&sbi->wb_sync_req[DATA]);
>>>>>>>>>> - else if (atomic_read(&sbi->wb_sync_req[DATA]))
>>>>>>>>>> + else if (atomic_read(&sbi->wb_sync_req[DATA])) {
>>>>>>>>>> + /* to avoid potential deadlock */
>>>>>>>>>> + if (current->plug)
>>>>>>>>>> + blk_finish_plug(current->plug);
>>>>>>>>>> goto skip_write;
>>>>>>>>>> + }
>>>>>>>>>> if (__should_serialize_io(inode, wbc)) {
>>>>>>>>>> mutex_lock(&sbi->writepages);
>>>>>>>>>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
>>>>>>>>>> index 556fcd8457f3..69c6bcaf5aae 100644
>>>>>>>>>> --- a/fs/f2fs/node.c
>>>>>>>>>> +++ b/fs/f2fs/node.c
>>>>>>>>>> @@ -2106,8 +2106,12 @@ static int f2fs_write_node_pages(struct address_space *mapping,
>>>>>>>>>> if (wbc->sync_mode == WB_SYNC_ALL)
>>>>>>>>>> atomic_inc(&sbi->wb_sync_req[NODE]);
>>>>>>>>>> - else if (atomic_read(&sbi->wb_sync_req[NODE]))
>>>>>>>>>> + else if (atomic_read(&sbi->wb_sync_req[NODE])) {
>>>>>>>>>> + /* to avoid potential deadlock */
>>>>>>>>>> + if (current->plug)
>>>>>>>>>> + blk_finish_plug(current->plug);
>>>>>>>>>> goto skip_write;
>>>>>>>>>> + }
>>>>>>>>>> trace_f2fs_writepages(mapping->host, wbc, NODE);
>>>>>>>>>> --
>>>>>>>>>> 2.32.0
>>>>
>>>>
>>>> _______________________________________________
>>>> Linux-f2fs-devel mailing list
>>>> [email protected]
>>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
>>>
>>>
>>> _______________________________________________
>>> Linux-f2fs-devel mailing list
>>> [email protected]
>>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel