If inode has already started to atomic commit, then set_page_dirty will
not mix the gc pages with the inmem atomic pages, so the page can be
gced safely.
Signed-off-by: Yunlong Song <[email protected]>
---
fs/f2fs/data.c | 5 ++---
fs/f2fs/gc.c | 6 ++++--
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7435830..edafcb6 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1580,14 +1580,13 @@ bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
return true;
if (S_ISDIR(inode->i_mode))
return true;
- if (f2fs_is_atomic_file(inode))
- return true;
if (fio) {
if (is_cold_data(fio->page))
return true;
if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
return true;
- }
+ } else if (f2fs_is_atomic_file(inode))
+ return true;
return false;
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index b9d93fd..84ab3ff 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -622,7 +622,8 @@ static void move_data_block(struct inode *inode, block_t bidx,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
- if (f2fs_is_atomic_file(inode))
+ if (f2fs_is_atomic_file(inode) &&
+ !f2fs_is_commit_atomic_write(inode))
goto out;
if (f2fs_is_pinned_file(inode)) {
@@ -729,7 +730,8 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
- if (f2fs_is_atomic_file(inode))
+ if (f2fs_is_atomic_file(inode) &&
+ !f2fs_is_commit_atomic_write(inode))
goto out;
if (f2fs_is_pinned_file(inode)) {
if (gc_type == FG_GC)
--
1.8.5.2
This patch enables to gc atomic file by adding GC_WRITTEN_PAGE to
identify the gced pages of atomic file, which can avoid
register_inmem_page in set_page_dirty, so the gced pages will not mix
with the inmem pages.
Signed-off-by: Yunlong Song <[email protected]>
---
fs/f2fs/data.c | 7 ++++++-
fs/f2fs/gc.c | 25 ++++++++++++++++++-------
fs/f2fs/segment.h | 3 +++
3 files changed, 27 insertions(+), 8 deletions(-)
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index edafcb6..5e1fc5d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -120,6 +120,10 @@ static void f2fs_write_end_io(struct bio *bio)
dec_page_count(sbi, type);
clear_cold_data(page);
+ if (IS_GC_WRITTEN_PAGE(page)) {
+ set_page_private(page, 0);
+ ClearPagePrivate(page);
+ }
end_page_writeback(page);
}
if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
@@ -2418,7 +2422,8 @@ static int f2fs_set_data_page_dirty(struct page *page)
if (!PageUptodate(page))
SetPageUptodate(page);
- if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
+ if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)
+ && !IS_GC_WRITTEN_PAGE(page)) {
if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
register_inmem_page(inode, page);
return 1;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 84ab3ff..9d54ddb 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -622,10 +622,6 @@ static void move_data_block(struct inode *inode, block_t bidx,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
- if (f2fs_is_atomic_file(inode) &&
- !f2fs_is_commit_atomic_write(inode))
- goto out;
-
if (f2fs_is_pinned_file(inode)) {
f2fs_pin_file_control(inode, true);
goto out;
@@ -680,6 +676,12 @@ static void move_data_block(struct inode *inode, block_t bidx,
goto put_page_out;
}
+ if (f2fs_is_atomic_file(inode) &&
+ !f2fs_is_commit_atomic_write(inode) &&
+ !IS_GC_WRITTEN_PAGE(fio.encrypted_page)) {
+ set_page_private(fio.encrypted_page, (unsigned long)GC_WRITTEN_PAGE);
+ SetPagePrivate(fio.encrypted_page);
+ }
set_page_dirty(fio.encrypted_page);
f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
if (clear_page_dirty_for_io(fio.encrypted_page))
@@ -730,9 +732,6 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
- if (f2fs_is_atomic_file(inode) &&
- !f2fs_is_commit_atomic_write(inode))
- goto out;
if (f2fs_is_pinned_file(inode)) {
if (gc_type == FG_GC)
f2fs_pin_file_control(inode, true);
@@ -742,6 +741,12 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
if (gc_type == BG_GC) {
if (PageWriteback(page))
goto out;
+ if (f2fs_is_atomic_file(inode) &&
+ !f2fs_is_commit_atomic_write(inode) &&
+ !IS_GC_WRITTEN_PAGE(page)) {
+ set_page_private(page, (unsigned long)GC_WRITTEN_PAGE);
+ SetPagePrivate(page);
+ }
set_page_dirty(page);
set_cold_data(page);
} else {
@@ -762,6 +767,12 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
int err;
retry:
+ if (f2fs_is_atomic_file(inode) &&
+ !f2fs_is_commit_atomic_write(inode) &&
+ !IS_GC_WRITTEN_PAGE(page)) {
+ set_page_private(page, (unsigned long)GC_WRITTEN_PAGE);
+ SetPagePrivate(page);
+ }
set_page_dirty(page);
f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page)) {
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index f11c4bc..f0a6432 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -203,11 +203,14 @@ struct segment_allocation {
*/
#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1)
#define DUMMY_WRITTEN_PAGE ((unsigned long)-2)
+#define GC_WRITTEN_PAGE ((unsigned long)-3)
#define IS_ATOMIC_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
#define IS_DUMMY_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
+#define IS_GC_WRITTEN_PAGE(page) \
+ (page_private(page) == (unsigned long)GC_WRITTEN_PAGE)
struct inmem_pages {
struct list_head list;
--
1.8.5.2
On 2018/2/3 10:47, Yunlong Song wrote:
> If inode has already started to atomic commit, then set_page_dirty will
> not mix the gc pages with the inmem atomic pages, so the page can be
> gced safely.
Let's avoid Ccing fs mailing list if the patch didn't change vfs common
codes.
As you know, the problem here is mixed dnode block flushing w/o writebacking
gced data block, result in making transaction unintegrated.
So how about just using dio_rwsem[WRITE] during atomic committing to exclude
GCing data block of atomic opened file?
Thanks,
>
> Signed-off-by: Yunlong Song <[email protected]>
> ---
> fs/f2fs/data.c | 5 ++---
> fs/f2fs/gc.c | 6 ++++--
> 2 files changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 7435830..edafcb6 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1580,14 +1580,13 @@ bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
> return true;
> if (S_ISDIR(inode->i_mode))
> return true;
> - if (f2fs_is_atomic_file(inode))
> - return true;
> if (fio) {
> if (is_cold_data(fio->page))
> return true;
> if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
> return true;
> - }
> + } else if (f2fs_is_atomic_file(inode))
> + return true;
> return false;
> }
>
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index b9d93fd..84ab3ff 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -622,7 +622,8 @@ static void move_data_block(struct inode *inode, block_t bidx,
> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
> goto out;
>
> - if (f2fs_is_atomic_file(inode))
> + if (f2fs_is_atomic_file(inode) &&
> + !f2fs_is_commit_atomic_write(inode))
> goto out;
>
> if (f2fs_is_pinned_file(inode)) {
> @@ -729,7 +730,8 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
> goto out;
>
> - if (f2fs_is_atomic_file(inode))
> + if (f2fs_is_atomic_file(inode) &&
> + !f2fs_is_commit_atomic_write(inode))
> goto out;
> if (f2fs_is_pinned_file(inode)) {
> if (gc_type == FG_GC)
>
Is it necessary to add a lock here? What's the problem of this patch (no
lock at all)? Anyway, the problem is expected to be fixed asap, since
attackers can easily write an app with only atomic start and no atomic
commit, which will cause f2fs run into loop gc if the disk layout is
much fragmented, since f2fs_gc will select the same target victim all
the time (e.g. one block of target victim belongs to the opened atomic
file, and it will not be moved and do_garbage_collect will finally
return 0, and that victim is selected again next time) and goto gc_more
time and time again, which will block all the fs ops (all the fs ops
will hang up in f2fs_balance_fs).
On 2018/2/4 22:56, Chao Yu wrote:
> On 2018/2/3 10:47, Yunlong Song wrote:
>> If inode has already started to atomic commit, then set_page_dirty will
>> not mix the gc pages with the inmem atomic pages, so the page can be
>> gced safely.
>
> Let's avoid Ccing fs mailing list if the patch didn't change vfs common
> codes.
>
> As you know, the problem here is mixed dnode block flushing w/o writebacking
> gced data block, result in making transaction unintegrated.
>
> So how about just using dio_rwsem[WRITE] during atomic committing to exclude
> GCing data block of atomic opened file?
>
> Thanks,
>
>>
>> Signed-off-by: Yunlong Song <[email protected]>
>> ---
>> fs/f2fs/data.c | 5 ++---
>> fs/f2fs/gc.c | 6 ++++--
>> 2 files changed, 6 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>> index 7435830..edafcb6 100644
>> --- a/fs/f2fs/data.c
>> +++ b/fs/f2fs/data.c
>> @@ -1580,14 +1580,13 @@ bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
>> return true;
>> if (S_ISDIR(inode->i_mode))
>> return true;
>> - if (f2fs_is_atomic_file(inode))
>> - return true;
>> if (fio) {
>> if (is_cold_data(fio->page))
>> return true;
>> if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
>> return true;
>> - }
>> + } else if (f2fs_is_atomic_file(inode))
>> + return true;
>> return false;
>> }
>>
>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>> index b9d93fd..84ab3ff 100644
>> --- a/fs/f2fs/gc.c
>> +++ b/fs/f2fs/gc.c
>> @@ -622,7 +622,8 @@ static void move_data_block(struct inode *inode, block_t bidx,
>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>> goto out;
>>
>> - if (f2fs_is_atomic_file(inode))
>> + if (f2fs_is_atomic_file(inode) &&
>> + !f2fs_is_commit_atomic_write(inode))
>> goto out;
>>
>> if (f2fs_is_pinned_file(inode)) {
>> @@ -729,7 +730,8 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>> goto out;
>>
>> - if (f2fs_is_atomic_file(inode))
>> + if (f2fs_is_atomic_file(inode) &&
>> + !f2fs_is_commit_atomic_write(inode))
>> goto out;
>> if (f2fs_is_pinned_file(inode)) {
>> if (gc_type == FG_GC)
>>
>
> .
>
--
Thanks,
Yunlong Song
On 2018/2/5 10:53, Yunlong Song wrote:
> Is it necessary to add a lock here? What's the problem of this patch (no
> lock at all)? Anyway, the problem is expected to be fixed asap, since
> attackers can easily write an app with only atomic start and no atomic
> commit, which will cause f2fs run into loop gc if the disk layout is
> much fragmented, since f2fs_gc will select the same target victim all
> the time (e.g. one block of target victim belongs to the opened atomic
> file, and it will not be moved and do_garbage_collect will finally
> return 0, and that victim is selected again next time) and goto gc_more
> time and time again, which will block all the fs ops (all the fs ops
> will hang up in f2fs_balance_fs).
Hmm.. w/ original commit log and implementation, I supposed that the patch
intended to fix to make atomic write be isolated from other IOs like GC
triggered writes...
Alright, we have discuss the problem before in below link:
https://www.mail-archive.com/[email protected]/msg1571951.html
I meant, for example:
f2fs_ioc_start_atomic_write()
inode->atomic_open_time = get_mtime();
f2fs_ioc_commit_atomic_write()
inode->atomic_open_time = 0;
f2fs_balance_fs_bg()
for_each_atomic_open_file()
if (inode->atomic_open_time &&
inode->atomic_open_time > threshold) {
drop_inmem_pages();
f2fs_msg();
}
threshold = 30s
Any thoughts?
Thanks,
>
> On 2018/2/4 22:56, Chao Yu wrote:
>> On 2018/2/3 10:47, Yunlong Song wrote:
>>> If inode has already started to atomic commit, then set_page_dirty will
>>> not mix the gc pages with the inmem atomic pages, so the page can be
>>> gced safely.
>>
>> Let's avoid Ccing fs mailing list if the patch didn't change vfs common
>> codes.
>>
>> As you know, the problem here is mixed dnode block flushing w/o writebacking
>> gced data block, result in making transaction unintegrated.
>>
>> So how about just using dio_rwsem[WRITE] during atomic committing to exclude
>> GCing data block of atomic opened file?
>>
>> Thanks,
>>
>>>
>>> Signed-off-by: Yunlong Song <[email protected]>
>>> ---
>>> fs/f2fs/data.c | 5 ++---
>>> fs/f2fs/gc.c | 6 ++++--
>>> 2 files changed, 6 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>> index 7435830..edafcb6 100644
>>> --- a/fs/f2fs/data.c
>>> +++ b/fs/f2fs/data.c
>>> @@ -1580,14 +1580,13 @@ bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
>>> return true;
>>> if (S_ISDIR(inode->i_mode))
>>> return true;
>>> - if (f2fs_is_atomic_file(inode))
>>> - return true;
>>> if (fio) {
>>> if (is_cold_data(fio->page))
>>> return true;
>>> if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
>>> return true;
>>> - }
>>> + } else if (f2fs_is_atomic_file(inode))
>>> + return true;
>>> return false;
>>> }
>>>
>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>> index b9d93fd..84ab3ff 100644
>>> --- a/fs/f2fs/gc.c
>>> +++ b/fs/f2fs/gc.c
>>> @@ -622,7 +622,8 @@ static void move_data_block(struct inode *inode, block_t bidx,
>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>> goto out;
>>>
>>> - if (f2fs_is_atomic_file(inode))
>>> + if (f2fs_is_atomic_file(inode) &&
>>> + !f2fs_is_commit_atomic_write(inode))
>>> goto out;
>>>
>>> if (f2fs_is_pinned_file(inode)) {
>>> @@ -729,7 +730,8 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>> goto out;
>>>
>>> - if (f2fs_is_atomic_file(inode))
>>> + if (f2fs_is_atomic_file(inode) &&
>>> + !f2fs_is_commit_atomic_write(inode))
>>> goto out;
>>> if (f2fs_is_pinned_file(inode)) {
>>> if (gc_type == FG_GC)
>>>
>>
>> .
>>
>
Is it necessary to make atomic commit fail? What's the problem of this
patch (no lock at all and does not make atomic fail)? These two patches
aims to provide ability to gc old blocks of opened atomic file, with no
affection to original atomic commit and no mix with inmem pages.
On 2018/2/5 14:29, Chao Yu wrote:
> On 2018/2/5 10:53, Yunlong Song wrote:
>> Is it necessary to add a lock here? What's the problem of this patch (no
>> lock at all)? Anyway, the problem is expected to be fixed asap, since
>> attackers can easily write an app with only atomic start and no atomic
>> commit, which will cause f2fs run into loop gc if the disk layout is
>> much fragmented, since f2fs_gc will select the same target victim all
>> the time (e.g. one block of target victim belongs to the opened atomic
>> file, and it will not be moved and do_garbage_collect will finally
>> return 0, and that victim is selected again next time) and goto gc_more
>> time and time again, which will block all the fs ops (all the fs ops
>> will hang up in f2fs_balance_fs).
>
> Hmm.. w/ original commit log and implementation, I supposed that the patch
> intended to fix to make atomic write be isolated from other IOs like GC
> triggered writes...
>
> Alright, we have discuss the problem before in below link:
> https://www.mail-archive.com/[email protected]/msg1571951.html
>
> I meant, for example:
>
> f2fs_ioc_start_atomic_write()
> inode->atomic_open_time = get_mtime();
>
> f2fs_ioc_commit_atomic_write()
> inode->atomic_open_time = 0;
>
> f2fs_balance_fs_bg()
> for_each_atomic_open_file()
> if (inode->atomic_open_time &&
> inode->atomic_open_time > threshold) {
> drop_inmem_pages();
> f2fs_msg();
> }
>
> threshold = 30s
>
> Any thoughts?
>
> Thanks,
>
>>
>> On 2018/2/4 22:56, Chao Yu wrote:
>>> On 2018/2/3 10:47, Yunlong Song wrote:
>>>> If inode has already started to atomic commit, then set_page_dirty will
>>>> not mix the gc pages with the inmem atomic pages, so the page can be
>>>> gced safely.
>>>
>>> Let's avoid Ccing fs mailing list if the patch didn't change vfs common
>>> codes.
>>>
>>> As you know, the problem here is mixed dnode block flushing w/o writebacking
>>> gced data block, result in making transaction unintegrated.
>>>
>>> So how about just using dio_rwsem[WRITE] during atomic committing to exclude
>>> GCing data block of atomic opened file?
>>>
>>> Thanks,
>>>
>>>>
>>>> Signed-off-by: Yunlong Song <[email protected]>
>>>> ---
>>>> fs/f2fs/data.c | 5 ++---
>>>> fs/f2fs/gc.c | 6 ++++--
>>>> 2 files changed, 6 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>> index 7435830..edafcb6 100644
>>>> --- a/fs/f2fs/data.c
>>>> +++ b/fs/f2fs/data.c
>>>> @@ -1580,14 +1580,13 @@ bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
>>>> return true;
>>>> if (S_ISDIR(inode->i_mode))
>>>> return true;
>>>> - if (f2fs_is_atomic_file(inode))
>>>> - return true;
>>>> if (fio) {
>>>> if (is_cold_data(fio->page))
>>>> return true;
>>>> if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
>>>> return true;
>>>> - }
>>>> + } else if (f2fs_is_atomic_file(inode))
>>>> + return true;
>>>> return false;
>>>> }
>>>>
>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>> index b9d93fd..84ab3ff 100644
>>>> --- a/fs/f2fs/gc.c
>>>> +++ b/fs/f2fs/gc.c
>>>> @@ -622,7 +622,8 @@ static void move_data_block(struct inode *inode, block_t bidx,
>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>> goto out;
>>>>
>>>> - if (f2fs_is_atomic_file(inode))
>>>> + if (f2fs_is_atomic_file(inode) &&
>>>> + !f2fs_is_commit_atomic_write(inode))
>>>> goto out;
>>>>
>>>> if (f2fs_is_pinned_file(inode)) {
>>>> @@ -729,7 +730,8 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>> goto out;
>>>>
>>>> - if (f2fs_is_atomic_file(inode))
>>>> + if (f2fs_is_atomic_file(inode) &&
>>>> + !f2fs_is_commit_atomic_write(inode))
>>>> goto out;
>>>> if (f2fs_is_pinned_file(inode)) {
>>>> if (gc_type == FG_GC)
>>>>
>>>
>>> .
>>>
>>
>
>
> .
>
--
Thanks,
Yunlong Song
On 2018/2/5 14:40, Yunlong Song wrote:
> Is it necessary to make atomic commit fail? What's the problem of this
> patch (no lock at all and does not make atomic fail)? These two patches
> aims to provide ability to gc old blocks of opened atomic file, with no
> affection to original atomic commit and no mix with inmem pages.
>
> On 2018/2/5 14:29, Chao Yu wrote:
>> On 2018/2/5 10:53, Yunlong Song wrote:
>>> Is it necessary to add a lock here? What's the problem of this patch (no
>>> lock at all)? Anyway, the problem is expected to be fixed asap, since
>>> attackers can easily write an app with only atomic start and no atomic
>>> commit, which will cause f2fs run into loop gc if the disk layout is
>>> much fragmented, since f2fs_gc will select the same target victim all
>>> the time (e.g. one block of target victim belongs to the opened atomic
>>> file, and it will not be moved and do_garbage_collect will finally
>>> return 0, and that victim is selected again next time) and goto gc_more
>>> time and time again, which will block all the fs ops (all the fs ops
>>> will hang up in f2fs_balance_fs).
>>
>> Hmm.. w/ original commit log and implementation, I supposed that the patch
>> intended to fix to make atomic write be isolated from other IOs like GC
>> triggered writes...
>>
>> Alright, we have discuss the problem before in below link:
>> https://www.mail-archive.com/[email protected]/msg1571951.html
>>
>> I meant, for example:
>>
>> f2fs_ioc_start_atomic_write()
>> inode->atomic_open_time = get_mtime();
>>
>> f2fs_ioc_commit_atomic_write()
>> inode->atomic_open_time = 0;
>>
>> f2fs_balance_fs_bg()
>> for_each_atomic_open_file()
>> if (inode->atomic_open_time &&
>> inode->atomic_open_time > threshold) {
>> drop_inmem_pages();
>> f2fs_msg();
>> }
>>
>> threshold = 30s
>>
>> Any thoughts?
>>
>> Thanks,
>>
>>>
>>> On 2018/2/4 22:56, Chao Yu wrote:
>>>> On 2018/2/3 10:47, Yunlong Song wrote:
>>>>> If inode has already started to atomic commit, then set_page_dirty will
>>>>> not mix the gc pages with the inmem atomic pages, so the page can be
>>>>> gced safely.
>>>>
>>>> Let's avoid Ccing fs mailing list if the patch didn't change vfs common
>>>> codes.
>>>>
>>>> As you know, the problem here is mixed dnode block flushing w/o writebacking
>>>> gced data block, result in making transaction unintegrated.
OK, details as I explained before:
atomic_commit GC
- file_write_and_wait_range
- move_data_block
- f2fs_submit_page_write
- f2fs_update_data_blkaddr
- set_page_dirty
- fsync_node_pages
1. atomic writes data page #1 & update node #1
2. GC data page #2 & update node #2
3. page #1 & node #1 & #2 can be committed into nand flash before page #2 be
committed.
After a sudden pow-cut, database transaction will be inconsistent. So I think
there will be better to exclude gc/atomic_write to each other, with a lock
instead of flag checking.
Thanks,
>>>>
>>>> So how about just using dio_rwsem[WRITE] during atomic committing to exclude
>>>> GCing data block of atomic opened file?
>>>>
>>>> Thanks,
>>>>
>>>>>
>>>>> Signed-off-by: Yunlong Song <[email protected]>
>>>>> ---
>>>>> fs/f2fs/data.c | 5 ++---
>>>>> fs/f2fs/gc.c | 6 ++++--
>>>>> 2 files changed, 6 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>> index 7435830..edafcb6 100644
>>>>> --- a/fs/f2fs/data.c
>>>>> +++ b/fs/f2fs/data.c
>>>>> @@ -1580,14 +1580,13 @@ bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
>>>>> return true;
>>>>> if (S_ISDIR(inode->i_mode))
>>>>> return true;
>>>>> - if (f2fs_is_atomic_file(inode))
>>>>> - return true;
>>>>> if (fio) {
>>>>> if (is_cold_data(fio->page))
>>>>> return true;
>>>>> if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
>>>>> return true;
>>>>> - }
>>>>> + } else if (f2fs_is_atomic_file(inode))
>>>>> + return true;
>>>>> return false;
>>>>> }
>>>>>
>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>> index b9d93fd..84ab3ff 100644
>>>>> --- a/fs/f2fs/gc.c
>>>>> +++ b/fs/f2fs/gc.c
>>>>> @@ -622,7 +622,8 @@ static void move_data_block(struct inode *inode, block_t bidx,
>>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>>> goto out;
>>>>>
>>>>> - if (f2fs_is_atomic_file(inode))
>>>>> + if (f2fs_is_atomic_file(inode) &&
>>>>> + !f2fs_is_commit_atomic_write(inode))
>>>>> goto out;
>>>>>
>>>>> if (f2fs_is_pinned_file(inode)) {
>>>>> @@ -729,7 +730,8 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
>>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>>> goto out;
>>>>>
>>>>> - if (f2fs_is_atomic_file(inode))
>>>>> + if (f2fs_is_atomic_file(inode) &&
>>>>> + !f2fs_is_commit_atomic_write(inode))
>>>>> goto out;
>>>>> if (f2fs_is_pinned_file(inode)) {
>>>>> if (gc_type == FG_GC)
>>>>>
>>>>
>>>> .
>>>>
>>>
>>
>>
>> .
>>
>
> OK, details as I explained before:
>
> atomic_commit GC
> - file_write_and_wait_range
> - move_data_block
> - f2fs_submit_page_write
> - f2fs_update_data_blkaddr
> - set_page_dirty
> - fsync_node_pages
>
> 1. atomic writes data page #1 & update node #1
> 2. GC data page #2 & update node #2
> 3. page #1 & node #1 & #2 can be committed into nand flash before page #2 be
> committed.
>
> After a sudden pow-cut, database transaction will be inconsistent. So I think
> there will be better to exclude gc/atomic_write to each other, with a lock
> instead of flag checking.
>
I do not understand why this transaction is inconsistent, is it a
problem that page #2 is not committed into nand flash? Since normal
gc also has this problem:
Suppose that there is db file A, f2fs_gc moves data page #1 of db file
A. But if write checkpoint only commit node page #1 and then a sudden
power-cut happens. Data page #1 is not committed to nand flash, but
node page #1 is committed. Is the db transaction broken and
inconsistent?
Come back to your example, I think data page 2 of atomic file does not
belong to this transaction, so even node page 2 is committed, it is just
the same problem as what I have listed above(db file A), and it does not
break this transaction.
> Thanks,
>
>>>>>
>>>>> So how about just using dio_rwsem[WRITE] during atomic committing to exclude
>>>>> GCing data block of atomic opened file?
>>>>>
>>>>> Thanks,
>>>>>
>>>>>>
>>>>>> Signed-off-by: Yunlong Song <[email protected]>
>>>>>> ---
>>>>>> fs/f2fs/data.c | 5 ++---
>>>>>> fs/f2fs/gc.c | 6 ++++--
>>>>>> 2 files changed, 6 insertions(+), 5 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>> index 7435830..edafcb6 100644
>>>>>> --- a/fs/f2fs/data.c
>>>>>> +++ b/fs/f2fs/data.c
>>>>>> @@ -1580,14 +1580,13 @@ bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
>>>>>> return true;
>>>>>> if (S_ISDIR(inode->i_mode))
>>>>>> return true;
>>>>>> - if (f2fs_is_atomic_file(inode))
>>>>>> - return true;
>>>>>> if (fio) {
>>>>>> if (is_cold_data(fio->page))
>>>>>> return true;
>>>>>> if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
>>>>>> return true;
>>>>>> - }
>>>>>> + } else if (f2fs_is_atomic_file(inode))
>>>>>> + return true;
>>>>>> return false;
>>>>>> }
>>>>>>
>>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>>> index b9d93fd..84ab3ff 100644
>>>>>> --- a/fs/f2fs/gc.c
>>>>>> +++ b/fs/f2fs/gc.c
>>>>>> @@ -622,7 +622,8 @@ static void move_data_block(struct inode *inode, block_t bidx,
>>>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>>>> goto out;
>>>>>>
>>>>>> - if (f2fs_is_atomic_file(inode))
>>>>>> + if (f2fs_is_atomic_file(inode) &&
>>>>>> + !f2fs_is_commit_atomic_write(inode))
>>>>>> goto out;
>>>>>>
>>>>>> if (f2fs_is_pinned_file(inode)) {
>>>>>> @@ -729,7 +730,8 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
>>>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>>>> goto out;
>>>>>>
>>>>>> - if (f2fs_is_atomic_file(inode))
>>>>>> + if (f2fs_is_atomic_file(inode) &&
>>>>>> + !f2fs_is_commit_atomic_write(inode))
>>>>>> goto out;
>>>>>> if (f2fs_is_pinned_file(inode)) {
>>>>>> if (gc_type == FG_GC)
>>>>>>
>>>>>
>>>>> .
>>>>>
>>>>
>>>
>>>
>>> .
>>>
>>
>
>
> .
>
--
Thanks,
Yunlong Song
On 2018/2/5 17:37, Yunlong Song wrote:
>
>> OK, details as I explained before:
>>
>> atomic_commit GC
>> - file_write_and_wait_range
>> - move_data_block
>> - f2fs_submit_page_write
>> - f2fs_update_data_blkaddr
>> - set_page_dirty
>> - fsync_node_pages
>>
>> 1. atomic writes data page #1 & update node #1
>> 2. GC data page #2 & update node #2
>> 3. page #1 & node #1 & #2 can be committed into nand flash before page #2 be
>> committed.
>>
>> After a sudden pow-cut, database transaction will be inconsistent. So I think
>> there will be better to exclude gc/atomic_write to each other, with a lock
>> instead of flag checking.
>>
>
> I do not understand why this transaction is inconsistent, is it a
> problem that page #2 is not committed into nand flash? Since normal
Yes, node #2 contains newly updated LBAx of page #2, but if page #2 is not
committed to LBAx, after recovery, page #2 's block address in node #2 will
point to LBAx which contains random data, result in corrupted db file.
> gc also has this problem:
>
> Suppose that there is db file A, f2fs_gc moves data page #1 of db file
> A. But if write checkpoint only commit node page #1 and then a sudden
f2fs will ensure GCed data being persisted during checkpoint, so migrated page
#1 and updated node #1 will both be committed in this checkpoint.
Please check WB_DATA_TYPE macro to see how we define data type that cp
guarantees to writeback.
> power-cut happens. Data page #1 is not committed to nand flash, but
> node page #1 is committed. Is the db transaction broken and
> inconsistent?
>
> Come back to your example, I think data page 2 of atomic file does not
> belong to this transaction, so even node page 2 is committed, it is just
If node #2 is committed only, it will be harmful to db transaction due to the
reason I said above.
Thanks,
> the same problem as what I have listed above(db file A), and it does not
> break this transaction.
>
>> Thanks,
>>
>>>>>>
>>>>>> So how about just using dio_rwsem[WRITE] during atomic committing to exclude
>>>>>> GCing data block of atomic opened file?
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>>>
>>>>>>> Signed-off-by: Yunlong Song <[email protected]>
>>>>>>> ---
>>>>>>> fs/f2fs/data.c | 5 ++---
>>>>>>> fs/f2fs/gc.c | 6 ++++--
>>>>>>> 2 files changed, 6 insertions(+), 5 deletions(-)
>>>>>>>
>>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>>> index 7435830..edafcb6 100644
>>>>>>> --- a/fs/f2fs/data.c
>>>>>>> +++ b/fs/f2fs/data.c
>>>>>>> @@ -1580,14 +1580,13 @@ bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
>>>>>>> return true;
>>>>>>> if (S_ISDIR(inode->i_mode))
>>>>>>> return true;
>>>>>>> - if (f2fs_is_atomic_file(inode))
>>>>>>> - return true;
>>>>>>> if (fio) {
>>>>>>> if (is_cold_data(fio->page))
>>>>>>> return true;
>>>>>>> if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
>>>>>>> return true;
>>>>>>> - }
>>>>>>> + } else if (f2fs_is_atomic_file(inode))
>>>>>>> + return true;
>>>>>>> return false;
>>>>>>> }
>>>>>>>
>>>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>>>> index b9d93fd..84ab3ff 100644
>>>>>>> --- a/fs/f2fs/gc.c
>>>>>>> +++ b/fs/f2fs/gc.c
>>>>>>> @@ -622,7 +622,8 @@ static void move_data_block(struct inode *inode, block_t bidx,
>>>>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>>>>> goto out;
>>>>>>>
>>>>>>> - if (f2fs_is_atomic_file(inode))
>>>>>>> + if (f2fs_is_atomic_file(inode) &&
>>>>>>> + !f2fs_is_commit_atomic_write(inode))
>>>>>>> goto out;
>>>>>>>
>>>>>>> if (f2fs_is_pinned_file(inode)) {
>>>>>>> @@ -729,7 +730,8 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
>>>>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>>>>> goto out;
>>>>>>>
>>>>>>> - if (f2fs_is_atomic_file(inode))
>>>>>>> + if (f2fs_is_atomic_file(inode) &&
>>>>>>> + !f2fs_is_commit_atomic_write(inode))
>>>>>>> goto out;
>>>>>>> if (f2fs_is_pinned_file(inode)) {
>>>>>>> if (gc_type == FG_GC)
>>>>>>>
>>>>>>
>>>>>> .
>>>>>>
>>>>>
>>>>
>>>>
>>>> .
>>>>
>>>
>>
>>
>> .
>>
>
OK, now I got it, thanks for the explanation. Then the point is to avoid
set_page_dirty between file_write_and_wait_range and fsync_node_pages,
so we can lock before file_write_and_wait_range and unlock after
fsync_node_pages, and lock before set_page_dirty and unlock after
set_page_dirty. These patches and the locks can make sure the GCed data
pages are all committed to nand flash with their nodes.
On 2018/2/5 19:10, Chao Yu wrote:
> On 2018/2/5 17:37, Yunlong Song wrote:
>>
>>> OK, details as I explained before:
>>>
>>> atomic_commit GC
>>> - file_write_and_wait_range
>>> - move_data_block
>>> - f2fs_submit_page_write
>>> - f2fs_update_data_blkaddr
>>> - set_page_dirty
>>> - fsync_node_pages
>>>
>>> 1. atomic writes data page #1 & update node #1
>>> 2. GC data page #2 & update node #2
>>> 3. page #1 & node #1 & #2 can be committed into nand flash before page #2 be
>>> committed.
>>>
>>> After a sudden pow-cut, database transaction will be inconsistent. So I think
>>> there will be better to exclude gc/atomic_write to each other, with a lock
>>> instead of flag checking.
>>>
>>
>> I do not understand why this transaction is inconsistent, is it a
>> problem that page #2 is not committed into nand flash? Since normal
>
> Yes, node #2 contains newly updated LBAx of page #2, but if page #2 is not
> committed to LBAx, after recovery, page #2 's block address in node #2 will
> point to LBAx which contains random data, result in corrupted db file.
>
>> gc also has this problem:
>>
>> Suppose that there is db file A, f2fs_gc moves data page #1 of db file
>> A. But if write checkpoint only commit node page #1 and then a sudden
>
> f2fs will ensure GCed data being persisted during checkpoint, so migrated page
> #1 and updated node #1 will both be committed in this checkpoint.
>
> Please check WB_DATA_TYPE macro to see how we define data type that cp
> guarantees to writeback.
>
>> power-cut happens. Data page #1 is not committed to nand flash, but
>> node page #1 is committed. Is the db transaction broken and
>> inconsistent?
>>
>> Come back to your example, I think data page 2 of atomic file does not
>> belong to this transaction, so even node page 2 is committed, it is just
>
> If node #2 is committed only, it will be harmful to db transaction due to the
> reason I said above.
>
> Thanks,
>
>> the same problem as what I have listed above(db file A), and it does not
>> break this transaction.
>>
>>> Thanks,
>>>
>>>>>>>
>>>>>>> So how about just using dio_rwsem[WRITE] during atomic committing to exclude
>>>>>>> GCing data block of atomic opened file?
>>>>>>>
>>>>>>> Thanks,
>>>>>>>
>>>>>>>>
>>>>>>>> Signed-off-by: Yunlong Song <[email protected]>
>>>>>>>> ---
>>>>>>>> fs/f2fs/data.c | 5 ++---
>>>>>>>> fs/f2fs/gc.c | 6 ++++--
>>>>>>>> 2 files changed, 6 insertions(+), 5 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>>>>> index 7435830..edafcb6 100644
>>>>>>>> --- a/fs/f2fs/data.c
>>>>>>>> +++ b/fs/f2fs/data.c
>>>>>>>> @@ -1580,14 +1580,13 @@ bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
>>>>>>>> return true;
>>>>>>>> if (S_ISDIR(inode->i_mode))
>>>>>>>> return true;
>>>>>>>> - if (f2fs_is_atomic_file(inode))
>>>>>>>> - return true;
>>>>>>>> if (fio) {
>>>>>>>> if (is_cold_data(fio->page))
>>>>>>>> return true;
>>>>>>>> if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
>>>>>>>> return true;
>>>>>>>> - }
>>>>>>>> + } else if (f2fs_is_atomic_file(inode))
>>>>>>>> + return true;
>>>>>>>> return false;
>>>>>>>> }
>>>>>>>>
>>>>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>>>>> index b9d93fd..84ab3ff 100644
>>>>>>>> --- a/fs/f2fs/gc.c
>>>>>>>> +++ b/fs/f2fs/gc.c
>>>>>>>> @@ -622,7 +622,8 @@ static void move_data_block(struct inode *inode, block_t bidx,
>>>>>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>>>>>> goto out;
>>>>>>>>
>>>>>>>> - if (f2fs_is_atomic_file(inode))
>>>>>>>> + if (f2fs_is_atomic_file(inode) &&
>>>>>>>> + !f2fs_is_commit_atomic_write(inode))
>>>>>>>> goto out;
>>>>>>>>
>>>>>>>> if (f2fs_is_pinned_file(inode)) {
>>>>>>>> @@ -729,7 +730,8 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
>>>>>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>>>>>> goto out;
>>>>>>>>
>>>>>>>> - if (f2fs_is_atomic_file(inode))
>>>>>>>> + if (f2fs_is_atomic_file(inode) &&
>>>>>>>> + !f2fs_is_commit_atomic_write(inode))
>>>>>>>> goto out;
>>>>>>>> if (f2fs_is_pinned_file(inode)) {
>>>>>>>> if (gc_type == FG_GC)
>>>>>>>>
>>>>>>>
>>>>>>> .
>>>>>>>
>>>>>>
>>>>>
>>>>>
>>>>> .
>>>>>
>>>>
>>>
>>>
>>> .
>>>
>>
>
>
> .
>
--
Thanks,
Yunlong Song
This patch adds fi->commit_lock to avoid the case that GCed node pages
are committed but GCed data pages are not committed. This can avoid the
db file run into inconsistent state when sudden-power-off happens if
data pages of atomic file is allowed to be GCed before.
Signed-off-by: Yunlong Song <[email protected]>
---
fs/f2fs/f2fs.h | 1 +
fs/f2fs/file.c | 15 ++++++++++++--
fs/f2fs/gc.c | 61 +++++++++++++++++++++++++++++++++++++--------------------
fs/f2fs/super.c | 1 +
4 files changed, 55 insertions(+), 23 deletions(-)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index dbe87c7..b58a8f2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -628,6 +628,7 @@ struct f2fs_inode_info {
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct task_struct *inmem_task; /* store inmemory task */
struct mutex inmem_lock; /* lock for inmemory pages */
+ struct mutex commit_lock; /* lock for commit GCed pages */
struct extent_tree *extent_tree; /* cached extent_tree entry */
struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */
struct rw_semaphore i_mmap_sem;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 672a542..7e14724 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -202,6 +202,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
{
struct inode *inode = file->f_mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
nid_t ino = inode->i_ino;
int ret = 0;
enum cp_reason_type cp_reason = 0;
@@ -219,11 +220,13 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
/* if fdatasync is triggered, let's do in-place-update */
if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
set_inode_flag(inode, FI_NEED_IPU);
+ mutex_lock(&fi->commit_lock);
ret = file_write_and_wait_range(file, start, end);
clear_inode_flag(inode, FI_NEED_IPU);
if (ret) {
trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
+ mutex_unlock(&fi->commit_lock);
return ret;
}
@@ -244,8 +247,11 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
goto go_write;
if (is_inode_flag_set(inode, FI_UPDATE_WRITE) ||
- exist_written_data(sbi, ino, UPDATE_INO))
+ exist_written_data(sbi, ino, UPDATE_INO)) {
+ mutex_unlock(&fi->commit_lock);
goto flush_out;
+ }
+ mutex_unlock(&fi->commit_lock);
goto out;
}
go_write:
@@ -268,16 +274,20 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
try_to_fix_pino(inode);
clear_inode_flag(inode, FI_APPEND_WRITE);
clear_inode_flag(inode, FI_UPDATE_WRITE);
+ mutex_unlock(&fi->commit_lock);
goto out;
}
sync_nodes:
ret = fsync_node_pages(sbi, inode, &wbc, atomic);
- if (ret)
+ if (ret) {
+ mutex_unlock(&fi->commit_lock);
goto out;
+ }
/* if cp_error was enabled, we should avoid infinite loop */
if (unlikely(f2fs_cp_error(sbi))) {
ret = -EIO;
+ mutex_unlock(&fi->commit_lock);
goto out;
}
@@ -286,6 +296,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
f2fs_write_inode(inode, NULL);
goto sync_nodes;
}
+ mutex_unlock(&fi->commit_lock);
/*
* If it's atomic_write, it's just fine to keep write ordering. So
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 9d54ddb..b98aff5 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -676,13 +676,20 @@ static void move_data_block(struct inode *inode, block_t bidx,
goto put_page_out;
}
- if (f2fs_is_atomic_file(inode) &&
- !f2fs_is_commit_atomic_write(inode) &&
- !IS_GC_WRITTEN_PAGE(fio.encrypted_page)) {
- set_page_private(fio.encrypted_page, (unsigned long)GC_WRITTEN_PAGE);
- SetPagePrivate(fio.encrypted_page);
- }
- set_page_dirty(fio.encrypted_page);
+ if (f2fs_is_atomic_file(inode)) {
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+
+ mutex_lock(&fi->commit_lock);
+ if (!f2fs_is_commit_atomic_write(inode) &&
+ !IS_GC_WRITTEN_PAGE(fio.encrypted_page)) {
+ set_page_private(fio.encrypted_page,
+ (unsigned long)GC_WRITTEN_PAGE);
+ SetPagePrivate(fio.encrypted_page);
+ }
+ set_page_dirty(fio.encrypted_page);
+ mutex_unlock(&fi->commit_lock);
+ } else
+ set_page_dirty(fio.encrypted_page);
f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
if (clear_page_dirty_for_io(fio.encrypted_page))
dec_page_count(fio.sbi, F2FS_DIRTY_META);
@@ -741,13 +748,19 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
if (gc_type == BG_GC) {
if (PageWriteback(page))
goto out;
- if (f2fs_is_atomic_file(inode) &&
- !f2fs_is_commit_atomic_write(inode) &&
- !IS_GC_WRITTEN_PAGE(page)) {
- set_page_private(page, (unsigned long)GC_WRITTEN_PAGE);
- SetPagePrivate(page);
- }
- set_page_dirty(page);
+ if (f2fs_is_atomic_file(inode)) {
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+
+ mutex_lock(&fi->commit_lock);
+ if (!f2fs_is_commit_atomic_write(inode) &&
+ !IS_GC_WRITTEN_PAGE(page)) {
+ set_page_private(page, (unsigned long)GC_WRITTEN_PAGE);
+ SetPagePrivate(page);
+ }
+ set_page_dirty(page);
+ mutex_unlock(&fi->commit_lock);
+ } else
+ set_page_dirty(page);
set_cold_data(page);
} else {
struct f2fs_io_info fio = {
@@ -767,13 +780,19 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
int err;
retry:
- if (f2fs_is_atomic_file(inode) &&
- !f2fs_is_commit_atomic_write(inode) &&
- !IS_GC_WRITTEN_PAGE(page)) {
- set_page_private(page, (unsigned long)GC_WRITTEN_PAGE);
- SetPagePrivate(page);
- }
- set_page_dirty(page);
+ if (f2fs_is_atomic_file(inode)) {
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+
+ mutex_lock(&fi->commit_lock);
+ if (!f2fs_is_commit_atomic_write(inode) &&
+ !IS_GC_WRITTEN_PAGE(page)) {
+ set_page_private(page, (unsigned long)GC_WRITTEN_PAGE);
+ SetPagePrivate(page);
+ }
+ set_page_dirty(page);
+ mutex_unlock(&fi->commit_lock);
+ } else
+ set_page_dirty(page);
f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 7966cf7..ed6afd1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -772,6 +772,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
INIT_LIST_HEAD(&fi->inmem_ilist);
INIT_LIST_HEAD(&fi->inmem_pages);
mutex_init(&fi->inmem_lock);
+ mutex_init(&fi->commit_lock);
init_rwsem(&fi->dio_rwsem[READ]);
init_rwsem(&fi->dio_rwsem[WRITE]);
init_rwsem(&fi->i_mmap_sem);
--
1.8.5.2
On 2018/2/6 11:49, Yunlong Song wrote:
> This patch adds fi->commit_lock to avoid the case that GCed node pages
> are committed but GCed data pages are not committed. This can avoid the
> db file run into inconsistent state when sudden-power-off happens if
> data pages of atomic file is allowed to be GCed before.
do_fsync: GC:
- mutex_lock(&fi->commit_lock);
- lock_page()
- mutex_lock(&fi->commit_lock);
- lock_page()
Well, please consider lock dependency & code complexity, IMO, reuse
fi->dio_rwsem[WRITE] will be enough as below:
---
fs/f2fs/file.c | 3 +++
fs/f2fs/gc.c | 5 -----
2 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 672a542e5464..1bdc11feb8d0 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1711,6 +1711,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
inode_lock(inode);
+ down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+
if (f2fs_is_volatile_file(inode))
goto err_out;
@@ -1729,6 +1731,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
}
err_out:
+ up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index b9d93fd532a9..e49416283563 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -622,9 +622,6 @@ static void move_data_block(struct inode *inode, block_t bidx,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
- if (f2fs_is_atomic_file(inode))
- goto out;
-
if (f2fs_is_pinned_file(inode)) {
f2fs_pin_file_control(inode, true);
goto out;
@@ -729,8 +726,6 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
- if (f2fs_is_atomic_file(inode))
- goto out;
if (f2fs_is_pinned_file(inode)) {
if (gc_type == FG_GC)
f2fs_pin_file_control(inode, true);
--
2.14.1.145.gb3622a4ee
Thanks,
Then the GCed data pages are totally mixed with the inmem atomic pages,
this will cause the atomic commit ops write the GCed data pages twice
(the first write happens in GC).
How about using the early two patches to separate the inmem data pages
and GCed data pages, and use dio_rwsem instead of this patch to fix the
dnode page problem (dnode page commited but data page are not committed
for the GCed page)?
On 2018/2/7 20:16, Chao Yu wrote:
> On 2018/2/6 11:49, Yunlong Song wrote:
>> This patch adds fi->commit_lock to avoid the case that GCed node pages
>> are committed but GCed data pages are not committed. This can avoid the
>> db file run into inconsistent state when sudden-power-off happens if
>> data pages of atomic file is allowed to be GCed before.
>
> do_fsync: GC:
> - mutex_lock(&fi->commit_lock);
> - lock_page()
> - mutex_lock(&fi->commit_lock);
> - lock_page()
>
>
> Well, please consider lock dependency & code complexity, IMO, reuse
> fi->dio_rwsem[WRITE] will be enough as below:
>
> ---
> fs/f2fs/file.c | 3 +++
> fs/f2fs/gc.c | 5 -----
> 2 files changed, 3 insertions(+), 5 deletions(-)
>
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 672a542e5464..1bdc11feb8d0 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -1711,6 +1711,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
>
> inode_lock(inode);
>
> + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
> +
> if (f2fs_is_volatile_file(inode))
> goto err_out;
>
> @@ -1729,6 +1731,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
> ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
> }
> err_out:
> + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
> inode_unlock(inode);
> mnt_drop_write_file(filp);
> return ret;
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index b9d93fd532a9..e49416283563 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -622,9 +622,6 @@ static void move_data_block(struct inode *inode, block_t bidx,
> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
> goto out;
>
> - if (f2fs_is_atomic_file(inode))
> - goto out;
> -
> if (f2fs_is_pinned_file(inode)) {
> f2fs_pin_file_control(inode, true);
> goto out;
> @@ -729,8 +726,6 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
> goto out;
>
> - if (f2fs_is_atomic_file(inode))
> - goto out;
> if (f2fs_is_pinned_file(inode)) {
> if (gc_type == FG_GC)
> f2fs_pin_file_control(inode, true);
>
--
Thanks,
Yunlong Song
On 2018/2/8 11:11, Yunlong Song wrote:
> Then the GCed data pages are totally mixed with the inmem atomic pages,
If we add dio_rwsem, GC flow is exclude with atomic write flow. There
will be not race case to mix GCed page into atomic pages.
Or you mean:
- gc_data_segment
- move_data_page
- f2fs_is_atomic_file
- f2fs_ioc_start_atomic_write
- set_inode_flag(inode, FI_ATOMIC_FILE);
- f2fs_set_data_page_dirty
- register_inmem_page
In this case, GCed page can be mixed into database transaction, but could
it cause any problem except break rule of isolation for transaction.
> this will cause the atomic commit ops write the GCed data pages twice
> (the first write happens in GC).
>
> How about using the early two patches to separate the inmem data pages
> and GCed data pages, and use dio_rwsem instead of this patch to fix the
> dnode page problem (dnode page commited but data page are not committed
> for the GCed page)?
Could we fix the race case first, based on that fixing, and then find the
place that we can improve?
>
>
> On 2018/2/7 20:16, Chao Yu wrote:
>> On 2018/2/6 11:49, Yunlong Song wrote:
>>> This patch adds fi->commit_lock to avoid the case that GCed node pages
>>> are committed but GCed data pages are not committed. This can avoid the
>>> db file run into inconsistent state when sudden-power-off happens if
>>> data pages of atomic file is allowed to be GCed before.
>>
>> do_fsync: GC:
>> - mutex_lock(&fi->commit_lock);
>> - lock_page()
>> - mutex_lock(&fi->commit_lock);
>> - lock_page()
>>
>>
>> Well, please consider lock dependency & code complexity, IMO, reuse
>> fi->dio_rwsem[WRITE] will be enough as below:
>>
>> ---
>> fs/f2fs/file.c | 3 +++
>> fs/f2fs/gc.c | 5 -----
>> 2 files changed, 3 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>> index 672a542e5464..1bdc11feb8d0 100644
>> --- a/fs/f2fs/file.c
>> +++ b/fs/f2fs/file.c
>> @@ -1711,6 +1711,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
>>
>> inode_lock(inode);
>>
>> + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
>> +
>> if (f2fs_is_volatile_file(inode))
>> goto err_out;
>>
>> @@ -1729,6 +1731,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
>> ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
>> }
>> err_out:
>> + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
>> inode_unlock(inode);
>> mnt_drop_write_file(filp);
>> return ret;
>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>> index b9d93fd532a9..e49416283563 100644
>> --- a/fs/f2fs/gc.c
>> +++ b/fs/f2fs/gc.c
>> @@ -622,9 +622,6 @@ static void move_data_block(struct inode *inode, block_t bidx,
>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>> goto out;
>>
>> - if (f2fs_is_atomic_file(inode))
>> - goto out;
Seems that we need this check.
>> -
>> if (f2fs_is_pinned_file(inode)) {
>> f2fs_pin_file_control(inode, true);
>> goto out;
>> @@ -729,8 +726,6 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>> goto out;
>>
>> - if (f2fs_is_atomic_file(inode))
>> - goto out;
Ditto.
Thanks,
>> if (f2fs_is_pinned_file(inode)) {
>> if (gc_type == FG_GC)
>> f2fs_pin_file_control(inode, true);
>>
>
As what I point in last mail, if the atomic file is not committed
yet, gc_data_segment will register_inmem_page the GCed data pages.
This will cause these data pages written twice, the first write
happens in move_data_page->do_write_data_page, and the second
write happens in later __commit_inmem_pages->do_write_data_page.
On 2018/2/9 20:44, Chao Yu wrote:
> On 2018/2/8 11:11, Yunlong Song wrote:
>> Then the GCed data pages are totally mixed with the inmem atomic pages,
>
> If we add dio_rwsem, GC flow is exclude with atomic write flow. There
> will be not race case to mix GCed page into atomic pages.
>
> Or you mean:
>
> - gc_data_segment
> - move_data_page
> - f2fs_is_atomic_file
> - f2fs_ioc_start_atomic_write
> - set_inode_flag(inode, FI_ATOMIC_FILE);
> - f2fs_set_data_page_dirty
> - register_inmem_page
>
> In this case, GCed page can be mixed into database transaction, but could
> it cause any problem except break rule of isolation for transaction.
>
>> this will cause the atomic commit ops write the GCed data pages twice
>> (the first write happens in GC).
>>
>> How about using the early two patches to separate the inmem data pages
>> and GCed data pages, and use dio_rwsem instead of this patch to fix the
>> dnode page problem (dnode page commited but data page are not committed
>> for the GCed page)?
>
> Could we fix the race case first, based on that fixing, and then find the
> place that we can improve?
>
>>
>>
>> On 2018/2/7 20:16, Chao Yu wrote:
>>> On 2018/2/6 11:49, Yunlong Song wrote:
>>>> This patch adds fi->commit_lock to avoid the case that GCed node pages
>>>> are committed but GCed data pages are not committed. This can avoid the
>>>> db file run into inconsistent state when sudden-power-off happens if
>>>> data pages of atomic file is allowed to be GCed before.
>>>
>>> do_fsync: GC:
>>> - mutex_lock(&fi->commit_lock);
>>> - lock_page()
>>> - mutex_lock(&fi->commit_lock);
>>> - lock_page()
>>>
>>>
>>> Well, please consider lock dependency & code complexity, IMO, reuse
>>> fi->dio_rwsem[WRITE] will be enough as below:
>>>
>>> ---
>>> fs/f2fs/file.c | 3 +++
>>> fs/f2fs/gc.c | 5 -----
>>> 2 files changed, 3 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>> index 672a542e5464..1bdc11feb8d0 100644
>>> --- a/fs/f2fs/file.c
>>> +++ b/fs/f2fs/file.c
>>> @@ -1711,6 +1711,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
>>>
>>> inode_lock(inode);
>>>
>>> + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
>>> +
>>> if (f2fs_is_volatile_file(inode))
>>> goto err_out;
>>>
>>> @@ -1729,6 +1731,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
>>> ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
>>> }
>>> err_out:
>>> + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
>>> inode_unlock(inode);
>>> mnt_drop_write_file(filp);
>>> return ret;
>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>> index b9d93fd532a9..e49416283563 100644
>>> --- a/fs/f2fs/gc.c
>>> +++ b/fs/f2fs/gc.c
>>> @@ -622,9 +622,6 @@ static void move_data_block(struct inode *inode, block_t bidx,
>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>> goto out;
>>>
>>> - if (f2fs_is_atomic_file(inode))
>>> - goto out;
>
> Seems that we need this check.
>
>>> -
>>> if (f2fs_is_pinned_file(inode)) {
>>> f2fs_pin_file_control(inode, true);
>>> goto out;
>>> @@ -729,8 +726,6 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>> goto out;
>>>
>>> - if (f2fs_is_atomic_file(inode))
>>> - goto out;
>
> Ditto.
>
> Thanks,
>
>>> if (f2fs_is_pinned_file(inode)) {
>>> if (gc_type == FG_GC)
>>> f2fs_pin_file_control(inode, true);
>>>
>>
>
> .
>
--
Thanks,
Yunlong Song
On 2018/2/9 20:56, Yunlong Song wrote:
> As what I point in last mail, if the atomic file is not committed
> yet, gc_data_segment will register_inmem_page the GCed data pages.
We will skip GCing that page as below check:
- move_data_{page,block}
- f2fs_is_atomic_file()
skip out;
No?
Thanks,
> This will cause these data pages written twice, the first write
> happens in move_data_page->do_write_data_page, and the second
> write happens in later __commit_inmem_pages->do_write_data_page.
>
> On 2018/2/9 20:44, Chao Yu wrote:
>> On 2018/2/8 11:11, Yunlong Song wrote:
>>> Then the GCed data pages are totally mixed with the inmem atomic pages,
>>
>> If we add dio_rwsem, GC flow is exclude with atomic write flow. There
>> will be not race case to mix GCed page into atomic pages.
>>
>> Or you mean:
>>
>> - gc_data_segment
>> - move_data_page
>> - f2fs_is_atomic_file
>> - f2fs_ioc_start_atomic_write
>> - set_inode_flag(inode, FI_ATOMIC_FILE);
>> - f2fs_set_data_page_dirty
>> - register_inmem_page
>>
>> In this case, GCed page can be mixed into database transaction, but could
>> it cause any problem except break rule of isolation for transaction.
>>
>>> this will cause the atomic commit ops write the GCed data pages twice
>>> (the first write happens in GC).
>>>
>>> How about using the early two patches to separate the inmem data pages
>>> and GCed data pages, and use dio_rwsem instead of this patch to fix the
>>> dnode page problem (dnode page commited but data page are not committed
>>> for the GCed page)?
>>
>> Could we fix the race case first, based on that fixing, and then find the
>> place that we can improve?
>>
>>>
>>>
>>> On 2018/2/7 20:16, Chao Yu wrote:
>>>> On 2018/2/6 11:49, Yunlong Song wrote:
>>>>> This patch adds fi->commit_lock to avoid the case that GCed node pages
>>>>> are committed but GCed data pages are not committed. This can avoid the
>>>>> db file run into inconsistent state when sudden-power-off happens if
>>>>> data pages of atomic file is allowed to be GCed before.
>>>>
>>>> do_fsync: GC:
>>>> - mutex_lock(&fi->commit_lock);
>>>> - lock_page()
>>>> - mutex_lock(&fi->commit_lock);
>>>> - lock_page()
>>>>
>>>>
>>>> Well, please consider lock dependency & code complexity, IMO, reuse
>>>> fi->dio_rwsem[WRITE] will be enough as below:
>>>>
>>>> ---
>>>> fs/f2fs/file.c | 3 +++
>>>> fs/f2fs/gc.c | 5 -----
>>>> 2 files changed, 3 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>> index 672a542e5464..1bdc11feb8d0 100644
>>>> --- a/fs/f2fs/file.c
>>>> +++ b/fs/f2fs/file.c
>>>> @@ -1711,6 +1711,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
>>>>
>>>> inode_lock(inode);
>>>>
>>>> + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
>>>> +
>>>> if (f2fs_is_volatile_file(inode))
>>>> goto err_out;
>>>>
>>>> @@ -1729,6 +1731,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
>>>> ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
>>>> }
>>>> err_out:
>>>> + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
>>>> inode_unlock(inode);
>>>> mnt_drop_write_file(filp);
>>>> return ret;
>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>> index b9d93fd532a9..e49416283563 100644
>>>> --- a/fs/f2fs/gc.c
>>>> +++ b/fs/f2fs/gc.c
>>>> @@ -622,9 +622,6 @@ static void move_data_block(struct inode *inode, block_t bidx,
>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>> goto out;
>>>>
>>>> - if (f2fs_is_atomic_file(inode))
>>>> - goto out;
>>
>> Seems that we need this check.
>>
>>>> -
>>>> if (f2fs_is_pinned_file(inode)) {
>>>> f2fs_pin_file_control(inode, true);
>>>> goto out;
>>>> @@ -729,8 +726,6 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>> goto out;
>>>>
>>>> - if (f2fs_is_atomic_file(inode))
>>>> - goto out;
>>
>> Ditto.
>>
>> Thanks,
>>
>>>> if (f2fs_is_pinned_file(inode)) {
>>>> if (gc_type == FG_GC)
>>>> f2fs_pin_file_control(inode, true);
>>>>
>>>
>>
>> .
>>
>
Back to the problem, if we skip out, then the f2fs_gc will go
into dead loop if the apps only atomic start but never atomic
commit. The main aim of my two patches is to remove the skip
action to avoid the dead loop.
On 2018/2/9 21:26, Chao Yu wrote:
> On 2018/2/9 20:56, Yunlong Song wrote:
>> As what I point in last mail, if the atomic file is not committed
>> yet, gc_data_segment will register_inmem_page the GCed data pages.
>
> We will skip GCing that page as below check:
>
> - move_data_{page,block}
> - f2fs_is_atomic_file()
> skip out;
>
> No?
>
> Thanks,
>
>> This will cause these data pages written twice, the first write
>> happens in move_data_page->do_write_data_page, and the second
>> write happens in later __commit_inmem_pages->do_write_data_page.
>>
>> On 2018/2/9 20:44, Chao Yu wrote:
>>> On 2018/2/8 11:11, Yunlong Song wrote:
>>>> Then the GCed data pages are totally mixed with the inmem atomic pages,
>>>
>>> If we add dio_rwsem, GC flow is exclude with atomic write flow. There
>>> will be not race case to mix GCed page into atomic pages.
>>>
>>> Or you mean:
>>>
>>> - gc_data_segment
>>> - move_data_page
>>> - f2fs_is_atomic_file
>>> - f2fs_ioc_start_atomic_write
>>> - set_inode_flag(inode, FI_ATOMIC_FILE);
>>> - f2fs_set_data_page_dirty
>>> - register_inmem_page
>>>
>>> In this case, GCed page can be mixed into database transaction, but could
>>> it cause any problem except break rule of isolation for transaction.
>>>
>>>> this will cause the atomic commit ops write the GCed data pages twice
>>>> (the first write happens in GC).
>>>>
>>>> How about using the early two patches to separate the inmem data pages
>>>> and GCed data pages, and use dio_rwsem instead of this patch to fix the
>>>> dnode page problem (dnode page commited but data page are not committed
>>>> for the GCed page)?
>>>
>>> Could we fix the race case first, based on that fixing, and then find the
>>> place that we can improve?
>>>
>>>>
>>>>
>>>> On 2018/2/7 20:16, Chao Yu wrote:
>>>>> On 2018/2/6 11:49, Yunlong Song wrote:
>>>>>> This patch adds fi->commit_lock to avoid the case that GCed node pages
>>>>>> are committed but GCed data pages are not committed. This can avoid the
>>>>>> db file run into inconsistent state when sudden-power-off happens if
>>>>>> data pages of atomic file is allowed to be GCed before.
>>>>>
>>>>> do_fsync: GC:
>>>>> - mutex_lock(&fi->commit_lock);
>>>>> - lock_page()
>>>>> - mutex_lock(&fi->commit_lock);
>>>>> - lock_page()
>>>>>
>>>>>
>>>>> Well, please consider lock dependency & code complexity, IMO, reuse
>>>>> fi->dio_rwsem[WRITE] will be enough as below:
>>>>>
>>>>> ---
>>>>> fs/f2fs/file.c | 3 +++
>>>>> fs/f2fs/gc.c | 5 -----
>>>>> 2 files changed, 3 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>>> index 672a542e5464..1bdc11feb8d0 100644
>>>>> --- a/fs/f2fs/file.c
>>>>> +++ b/fs/f2fs/file.c
>>>>> @@ -1711,6 +1711,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
>>>>>
>>>>> inode_lock(inode);
>>>>>
>>>>> + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
>>>>> +
>>>>> if (f2fs_is_volatile_file(inode))
>>>>> goto err_out;
>>>>>
>>>>> @@ -1729,6 +1731,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
>>>>> ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
>>>>> }
>>>>> err_out:
>>>>> + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
>>>>> inode_unlock(inode);
>>>>> mnt_drop_write_file(filp);
>>>>> return ret;
>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>> index b9d93fd532a9..e49416283563 100644
>>>>> --- a/fs/f2fs/gc.c
>>>>> +++ b/fs/f2fs/gc.c
>>>>> @@ -622,9 +622,6 @@ static void move_data_block(struct inode *inode, block_t bidx,
>>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>>> goto out;
>>>>>
>>>>> - if (f2fs_is_atomic_file(inode))
>>>>> - goto out;
>>>
>>> Seems that we need this check.
>>>
>>>>> -
>>>>> if (f2fs_is_pinned_file(inode)) {
>>>>> f2fs_pin_file_control(inode, true);
>>>>> goto out;
>>>>> @@ -729,8 +726,6 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
>>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>>> goto out;
>>>>>
>>>>> - if (f2fs_is_atomic_file(inode))
>>>>> - goto out;
>>>
>>> Ditto.
>>>
>>> Thanks,
>>>
>>>>> if (f2fs_is_pinned_file(inode)) {
>>>>> if (gc_type == FG_GC)
>>>>> f2fs_pin_file_control(inode, true);
>>>>>
>>>>
>>>
>>> .
>>>
>>
>
> .
>
--
Thanks,
Yunlong Song
On 2018/2/9 21:29, Yunlong Song wrote:
> Back to the problem, if we skip out, then the f2fs_gc will go
> into dead loop if the apps only atomic start but never atomic
That's another issue, which I have suggest to set a threshold time
to release atomic/volatile pages by balance_fs_bg.
Thanks,
> commit. The main aim of my two patches is to remove the skip
> action to avoid the dead loop.
>
> On 2018/2/9 21:26, Chao Yu wrote:
>> On 2018/2/9 20:56, Yunlong Song wrote:
>>> As what I point in last mail, if the atomic file is not committed
>>> yet, gc_data_segment will register_inmem_page the GCed data pages.
>>
>> We will skip GCing that page as below check:
>>
>> - move_data_{page,block}
>> - f2fs_is_atomic_file()
>> skip out;
>>
>> No?
>>
>> Thanks,
>>
>>> This will cause these data pages written twice, the first write
>>> happens in move_data_page->do_write_data_page, and the second
>>> write happens in later __commit_inmem_pages->do_write_data_page.
>>>
>>> On 2018/2/9 20:44, Chao Yu wrote:
>>>> On 2018/2/8 11:11, Yunlong Song wrote:
>>>>> Then the GCed data pages are totally mixed with the inmem atomic pages,
>>>>
>>>> If we add dio_rwsem, GC flow is exclude with atomic write flow. There
>>>> will be not race case to mix GCed page into atomic pages.
>>>>
>>>> Or you mean:
>>>>
>>>> - gc_data_segment
>>>> - move_data_page
>>>> - f2fs_is_atomic_file
>>>> - f2fs_ioc_start_atomic_write
>>>> - set_inode_flag(inode, FI_ATOMIC_FILE);
>>>> - f2fs_set_data_page_dirty
>>>> - register_inmem_page
>>>>
>>>> In this case, GCed page can be mixed into database transaction, but could
>>>> it cause any problem except break rule of isolation for transaction.
>>>>
>>>>> this will cause the atomic commit ops write the GCed data pages twice
>>>>> (the first write happens in GC).
>>>>>
>>>>> How about using the early two patches to separate the inmem data pages
>>>>> and GCed data pages, and use dio_rwsem instead of this patch to fix the
>>>>> dnode page problem (dnode page commited but data page are not committed
>>>>> for the GCed page)?
>>>>
>>>> Could we fix the race case first, based on that fixing, and then find the
>>>> place that we can improve?
>>>>
>>>>>
>>>>>
>>>>> On 2018/2/7 20:16, Chao Yu wrote:
>>>>>> On 2018/2/6 11:49, Yunlong Song wrote:
>>>>>>> This patch adds fi->commit_lock to avoid the case that GCed node pages
>>>>>>> are committed but GCed data pages are not committed. This can avoid the
>>>>>>> db file run into inconsistent state when sudden-power-off happens if
>>>>>>> data pages of atomic file is allowed to be GCed before.
>>>>>>
>>>>>> do_fsync: GC:
>>>>>> - mutex_lock(&fi->commit_lock);
>>>>>> - lock_page()
>>>>>> - mutex_lock(&fi->commit_lock);
>>>>>> - lock_page()
>>>>>>
>>>>>>
>>>>>> Well, please consider lock dependency & code complexity, IMO, reuse
>>>>>> fi->dio_rwsem[WRITE] will be enough as below:
>>>>>>
>>>>>> ---
>>>>>> fs/f2fs/file.c | 3 +++
>>>>>> fs/f2fs/gc.c | 5 -----
>>>>>> 2 files changed, 3 insertions(+), 5 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>>>> index 672a542e5464..1bdc11feb8d0 100644
>>>>>> --- a/fs/f2fs/file.c
>>>>>> +++ b/fs/f2fs/file.c
>>>>>> @@ -1711,6 +1711,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
>>>>>>
>>>>>> inode_lock(inode);
>>>>>>
>>>>>> + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
>>>>>> +
>>>>>> if (f2fs_is_volatile_file(inode))
>>>>>> goto err_out;
>>>>>>
>>>>>> @@ -1729,6 +1731,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
>>>>>> ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
>>>>>> }
>>>>>> err_out:
>>>>>> + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
>>>>>> inode_unlock(inode);
>>>>>> mnt_drop_write_file(filp);
>>>>>> return ret;
>>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>>> index b9d93fd532a9..e49416283563 100644
>>>>>> --- a/fs/f2fs/gc.c
>>>>>> +++ b/fs/f2fs/gc.c
>>>>>> @@ -622,9 +622,6 @@ static void move_data_block(struct inode *inode, block_t bidx,
>>>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>>>> goto out;
>>>>>>
>>>>>> - if (f2fs_is_atomic_file(inode))
>>>>>> - goto out;
>>>>
>>>> Seems that we need this check.
>>>>
>>>>>> -
>>>>>> if (f2fs_is_pinned_file(inode)) {
>>>>>> f2fs_pin_file_control(inode, true);
>>>>>> goto out;
>>>>>> @@ -729,8 +726,6 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
>>>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>>>> goto out;
>>>>>>
>>>>>> - if (f2fs_is_atomic_file(inode))
>>>>>> - goto out;
>>>>
>>>> Ditto.
>>>>
>>>> Thanks,
>>>>
>>>>>> if (f2fs_is_pinned_file(inode)) {
>>>>>> if (gc_type == FG_GC)
>>>>>> f2fs_pin_file_control(inode, true);
>>>>>>
>>>>>
>>>>
>>>> .
>>>>
>>>
>>
>> .
>>
>
The problem is that you can not find a proper value of the threshold
time, when f2fs_gc select the GCed data page of the atomic file (which
has atomic started but not atomic committed yet), then f2fs_gc will
run into loop, and all the f2fs ops will be blocked in f2fs_balane_fs.
If the threshold time is set larger (e.g. 5s? Then all the f2fs ops will
block 5s, which will cause unexpected bad result of user experience).
And if the threshold time is set smaller (e.g. 500ms? Then the atomic
ops will probably fail frequently). BTW, some more patches are needed
to notify the atomic ops itself that it has run time out, and should
handle the inmem pages....
Back to these two patches, why not use them to separate inmem pages
and GCed data pages in such a simple way.
On 2018/2/9 21:38, Chao Yu wrote:
> On 2018/2/9 21:29, Yunlong Song wrote:
>> Back to the problem, if we skip out, then the f2fs_gc will go
>> into dead loop if the apps only atomic start but never atomic
>
> That's another issue, which I have suggest to set a threshold time
> to release atomic/volatile pages by balance_fs_bg.
>
> Thanks,
>
>> commit. The main aim of my two patches is to remove the skip
>> action to avoid the dead loop.
>>
>> On 2018/2/9 21:26, Chao Yu wrote:
>>> On 2018/2/9 20:56, Yunlong Song wrote:
>>>> As what I point in last mail, if the atomic file is not committed
>>>> yet, gc_data_segment will register_inmem_page the GCed data pages.
>>>
>>> We will skip GCing that page as below check:
>>>
>>> - move_data_{page,block}
>>> - f2fs_is_atomic_file()
>>> skip out;
>>>
>>> No?
>>>
>>> Thanks,
>>>
>>>> This will cause these data pages written twice, the first write
>>>> happens in move_data_page->do_write_data_page, and the second
>>>> write happens in later __commit_inmem_pages->do_write_data_page.
>>>>
>>>> On 2018/2/9 20:44, Chao Yu wrote:
>>>>> On 2018/2/8 11:11, Yunlong Song wrote:
>>>>>> Then the GCed data pages are totally mixed with the inmem atomic pages,
>>>>>
>>>>> If we add dio_rwsem, GC flow is exclude with atomic write flow. There
>>>>> will be not race case to mix GCed page into atomic pages.
>>>>>
>>>>> Or you mean:
>>>>>
>>>>> - gc_data_segment
>>>>> - move_data_page
>>>>> - f2fs_is_atomic_file
>>>>> - f2fs_ioc_start_atomic_write
>>>>> - set_inode_flag(inode, FI_ATOMIC_FILE);
>>>>> - f2fs_set_data_page_dirty
>>>>> - register_inmem_page
>>>>>
>>>>> In this case, GCed page can be mixed into database transaction, but could
>>>>> it cause any problem except break rule of isolation for transaction.
>>>>>
>>>>>> this will cause the atomic commit ops write the GCed data pages twice
>>>>>> (the first write happens in GC).
>>>>>>
>>>>>> How about using the early two patches to separate the inmem data pages
>>>>>> and GCed data pages, and use dio_rwsem instead of this patch to fix the
>>>>>> dnode page problem (dnode page commited but data page are not committed
>>>>>> for the GCed page)?
>>>>>
>>>>> Could we fix the race case first, based on that fixing, and then find the
>>>>> place that we can improve?
>>>>>
>>>>>>
>>>>>>
>>>>>> On 2018/2/7 20:16, Chao Yu wrote:
>>>>>>> On 2018/2/6 11:49, Yunlong Song wrote:
>>>>>>>> This patch adds fi->commit_lock to avoid the case that GCed node pages
>>>>>>>> are committed but GCed data pages are not committed. This can avoid the
>>>>>>>> db file run into inconsistent state when sudden-power-off happens if
>>>>>>>> data pages of atomic file is allowed to be GCed before.
>>>>>>>
>>>>>>> do_fsync: GC:
>>>>>>> - mutex_lock(&fi->commit_lock);
>>>>>>> - lock_page()
>>>>>>> - mutex_lock(&fi->commit_lock);
>>>>>>> - lock_page()
>>>>>>>
>>>>>>>
>>>>>>> Well, please consider lock dependency & code complexity, IMO, reuse
>>>>>>> fi->dio_rwsem[WRITE] will be enough as below:
>>>>>>>
>>>>>>> ---
>>>>>>> fs/f2fs/file.c | 3 +++
>>>>>>> fs/f2fs/gc.c | 5 -----
>>>>>>> 2 files changed, 3 insertions(+), 5 deletions(-)
>>>>>>>
>>>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>>>>> index 672a542e5464..1bdc11feb8d0 100644
>>>>>>> --- a/fs/f2fs/file.c
>>>>>>> +++ b/fs/f2fs/file.c
>>>>>>> @@ -1711,6 +1711,8 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
>>>>>>>
>>>>>>> inode_lock(inode);
>>>>>>>
>>>>>>> + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
>>>>>>> +
>>>>>>> if (f2fs_is_volatile_file(inode))
>>>>>>> goto err_out;
>>>>>>>
>>>>>>> @@ -1729,6 +1731,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
>>>>>>> ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
>>>>>>> }
>>>>>>> err_out:
>>>>>>> + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
>>>>>>> inode_unlock(inode);
>>>>>>> mnt_drop_write_file(filp);
>>>>>>> return ret;
>>>>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>>>>> index b9d93fd532a9..e49416283563 100644
>>>>>>> --- a/fs/f2fs/gc.c
>>>>>>> +++ b/fs/f2fs/gc.c
>>>>>>> @@ -622,9 +622,6 @@ static void move_data_block(struct inode *inode, block_t bidx,
>>>>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>>>>> goto out;
>>>>>>>
>>>>>>> - if (f2fs_is_atomic_file(inode))
>>>>>>> - goto out;
>>>>>
>>>>> Seems that we need this check.
>>>>>
>>>>>>> -
>>>>>>> if (f2fs_is_pinned_file(inode)) {
>>>>>>> f2fs_pin_file_control(inode, true);
>>>>>>> goto out;
>>>>>>> @@ -729,8 +726,6 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
>>>>>>> if (!check_valid_map(F2FS_I_SB(inode), segno, off))
>>>>>>> goto out;
>>>>>>>
>>>>>>> - if (f2fs_is_atomic_file(inode))
>>>>>>> - goto out;
>>>>>
>>>>> Ditto.
>>>>>
>>>>> Thanks,
>>>>>
>>>>>>> if (f2fs_is_pinned_file(inode)) {
>>>>>>> if (gc_type == FG_GC)
>>>>>>> f2fs_pin_file_control(inode, true);
>>>>>>>
>>>>>>
>>>>>
>>>>> .
>>>>>
>>>>
>>>
>>> .
>>>
>>
>
> .
>
--
Thanks,
Yunlong Song