2022-03-31 15:35:19

by Jan Kara

[permalink] [raw]
Subject: Re: [PATCH 1/2] nfsd: Fix a write performance regression

On Thu 31-03-22 09:54:01, [email protected] wrote:
> From: Trond Myklebust <[email protected]>
>
> The call to filemap_flush() in nfsd_file_put() is there to ensure that
> we clear out any writes belonging to a NFSv3 client relatively quickly
> and avoid situations where the file can't be evicted by the garbage
> collector. It also ensures that we detect write errors quickly.
>
> The problem is this causes a regression in performance for some
> workloads.
>
> So try to improve matters by deferring writeback until we're ready to
> close the file, and need to detect errors so that we can force the
> client to resend.
>
> Tested-by: Jan Kara <[email protected]>
> Fixes: b6669305d35a ("nfsd: Reduce the number of calls to nfsd_file_gc()")
> Signed-off-by: Trond Myklebust <[email protected]>

Perphaps you could add:

Link: https://lore.kernel.org/all/[email protected]

To make life of Thorsten Leemhuis doing regression tracking simpler :) (I
think his automation will close the regression once it sees a patch like
that merged).

Honza

> ---
> fs/nfsd/filecache.c | 18 +++++++++++++++---
> 1 file changed, 15 insertions(+), 3 deletions(-)
>
> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
> index 8bc807c5fea4..9578a6317709 100644
> --- a/fs/nfsd/filecache.c
> +++ b/fs/nfsd/filecache.c
> @@ -235,6 +235,13 @@ nfsd_file_check_write_error(struct nfsd_file *nf)
> return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
> }
>
> +static void
> +nfsd_file_flush(struct nfsd_file *nf)
> +{
> + if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0)
> + nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
> +}
> +
> static void
> nfsd_file_do_unhash(struct nfsd_file *nf)
> {
> @@ -302,11 +309,14 @@ nfsd_file_put(struct nfsd_file *nf)
> return;
> }
>
> - filemap_flush(nf->nf_file->f_mapping);
> is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
> - nfsd_file_put_noref(nf);
> - if (is_hashed)
> + if (!is_hashed) {
> + nfsd_file_flush(nf);
> + nfsd_file_put_noref(nf);
> + } else {
> + nfsd_file_put_noref(nf);
> nfsd_file_schedule_laundrette();
> + }
> if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
> nfsd_file_gc();
> }
> @@ -327,6 +337,7 @@ nfsd_file_dispose_list(struct list_head *dispose)
> while(!list_empty(dispose)) {
> nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
> list_del(&nf->nf_lru);
> + nfsd_file_flush(nf);
> nfsd_file_put_noref(nf);
> }
> }
> @@ -340,6 +351,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose)
> while(!list_empty(dispose)) {
> nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
> list_del(&nf->nf_lru);
> + nfsd_file_flush(nf);
> if (!refcount_dec_and_test(&nf->nf_ref))
> continue;
> if (nfsd_file_free(nf))
> --
> 2.35.1
>
--
Jan Kara <[email protected]>
SUSE Labs, CR


2022-03-31 18:43:13

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH 1/2] nfsd: Fix a write performance regression



> On Mar 31, 2022, at 10:36 AM, Jan Kara <[email protected]> wrote:
>
> On Thu 31-03-22 09:54:01, [email protected] wrote:
>> From: Trond Myklebust <[email protected]>
>>
>> The call to filemap_flush() in nfsd_file_put() is there to ensure that
>> we clear out any writes belonging to a NFSv3 client relatively quickly
>> and avoid situations where the file can't be evicted by the garbage
>> collector. It also ensures that we detect write errors quickly.
>>
>> The problem is this causes a regression in performance for some
>> workloads.
>>
>> So try to improve matters by deferring writeback until we're ready to
>> close the file, and need to detect errors so that we can force the
>> client to resend.
>>
>> Tested-by: Jan Kara <[email protected]>
>> Fixes: b6669305d35a ("nfsd: Reduce the number of calls to nfsd_file_gc()")
>> Signed-off-by: Trond Myklebust <[email protected]>
>
> Perphaps you could add:
>
> Link: https://lore.kernel.org/all/[email protected]
>
> To make life of Thorsten Leemhuis doing regression tracking simpler :) (I
> think his automation will close the regression once it sees a patch like
> that merged).

I'll add that (no need to resend).


>
> Honza
>
>> ---
>> fs/nfsd/filecache.c | 18 +++++++++++++++---
>> 1 file changed, 15 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
>> index 8bc807c5fea4..9578a6317709 100644
>> --- a/fs/nfsd/filecache.c
>> +++ b/fs/nfsd/filecache.c
>> @@ -235,6 +235,13 @@ nfsd_file_check_write_error(struct nfsd_file *nf)
>> return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
>> }
>>
>> +static void
>> +nfsd_file_flush(struct nfsd_file *nf)
>> +{
>> + if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0)
>> + nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
>> +}
>> +
>> static void
>> nfsd_file_do_unhash(struct nfsd_file *nf)
>> {
>> @@ -302,11 +309,14 @@ nfsd_file_put(struct nfsd_file *nf)
>> return;
>> }
>>
>> - filemap_flush(nf->nf_file->f_mapping);
>> is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
>> - nfsd_file_put_noref(nf);
>> - if (is_hashed)
>> + if (!is_hashed) {
>> + nfsd_file_flush(nf);
>> + nfsd_file_put_noref(nf);
>> + } else {
>> + nfsd_file_put_noref(nf);
>> nfsd_file_schedule_laundrette();
>> + }
>> if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
>> nfsd_file_gc();
>> }
>> @@ -327,6 +337,7 @@ nfsd_file_dispose_list(struct list_head *dispose)
>> while(!list_empty(dispose)) {
>> nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
>> list_del(&nf->nf_lru);
>> + nfsd_file_flush(nf);
>> nfsd_file_put_noref(nf);
>> }
>> }
>> @@ -340,6 +351,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose)
>> while(!list_empty(dispose)) {
>> nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
>> list_del(&nf->nf_lru);
>> + nfsd_file_flush(nf);
>> if (!refcount_dec_and_test(&nf->nf_ref))
>> continue;
>> if (nfsd_file_free(nf))
>> --
>> 2.35.1
>>
> --
> Jan Kara <[email protected]>
> SUSE Labs, CR

--
Chuck Lever