2022-02-19 00:54:18

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v5 0/6] Readdir improvements

From: Trond Myklebust <[email protected]>

The current NFS readdir code will always try to maximise the amount of
readahead it performs on the assumption that we can cache anything that
isn't immediately read by the process.
There are several cases where this assumption breaks down, including
when the 'ls -l' heuristic kicks in to try to force use of readdirplus
as a batch replacement for lookup/getattr.

--
v2: Remove reset of dtsize when NFS_INO_FORCE_READDIR is set
v3: Avoid excessive window shrinking in uncached_readdir case
v4: Track 'ls -l' cache hit/miss statistics
Improved algorithm for falling back to uncached readdir
Skip readdirplus when files are being written to
v5: bugfixes
Skip readdirplus when the acdirmax/acregmax values are low
Request a full XDR buffer when doing READDIRPLUS

Trond Myklebust (6):
NFS: Adjust the amount of readahead performed by NFS readdir
NFS: Simplify nfs_readdir_xdr_to_array()
NFS: Improve algorithm for falling back to uncached readdir
NFS: Improve heuristic for readdirplus
NFS: Don't ask for readdirplus unless it can help nfs_getattr()
NFSv4: Ask for a full XDR buffer of readdir goodness

fs/nfs/dir.c | 214 +++++++++++++++++++++++++++--------------
fs/nfs/inode.c | 37 +++----
fs/nfs/internal.h | 4 +-
fs/nfs/nfs3xdr.c | 7 +-
fs/nfs/nfs4xdr.c | 6 +-
fs/nfs/nfstrace.h | 1 -
include/linux/nfs_fs.h | 7 +-
7 files changed, 173 insertions(+), 103 deletions(-)

--
2.35.1


2022-02-19 11:00:07

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v5 1/6] NFS: Adjust the amount of readahead performed by NFS readdir

From: Trond Myklebust <[email protected]>

The current NFS readdir code will always try to maximise the amount of
readahead it performs on the assumption that we can cache anything that
isn't immediately read by the process.
There are several cases where this assumption breaks down, including
when the 'ls -l' heuristic kicks in to try to force use of readdirplus
as a batch replacement for lookup/getattr.

This patch therefore tries to tone down the amount of readahead we
perform, and adjust it to try to match the amount of data being
requested by user space.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/dir.c | 55 +++++++++++++++++++++++++++++++++++++++++-
include/linux/nfs_fs.h | 1 +
2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8b190c8e4a45..b0ee3a0e0f81 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -69,6 +69,8 @@ const struct address_space_operations nfs_dir_aops = {
.freepage = nfs_readdir_clear_array,
};

+#define NFS_INIT_DTSIZE PAGE_SIZE
+
static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir)
{
struct nfs_inode *nfsi = NFS_I(dir);
@@ -80,6 +82,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
ctx->dir_cookie = 0;
ctx->dup_cookie = 0;
ctx->page_index = 0;
+ ctx->dtsize = NFS_INIT_DTSIZE;
ctx->eof = false;
spin_lock(&dir->i_lock);
if (list_empty(&nfsi->open_files) &&
@@ -155,6 +158,7 @@ struct nfs_readdir_descriptor {
struct page *page;
struct dir_context *ctx;
pgoff_t page_index;
+ pgoff_t page_index_max;
u64 dir_cookie;
u64 last_cookie;
u64 dup_cookie;
@@ -167,12 +171,36 @@ struct nfs_readdir_descriptor {
unsigned long gencount;
unsigned long attr_gencount;
unsigned int cache_entry_index;
+ unsigned int buffer_fills;
+ unsigned int dtsize;
signed char duped;
bool plus;
bool eob;
bool eof;
};

+static void nfs_set_dtsize(struct nfs_readdir_descriptor *desc, unsigned int sz)
+{
+ struct nfs_server *server = NFS_SERVER(file_inode(desc->file));
+ unsigned int maxsize = server->dtsize;
+
+ if (sz > maxsize)
+ sz = maxsize;
+ if (sz < NFS_MIN_FILE_IO_SIZE)
+ sz = NFS_MIN_FILE_IO_SIZE;
+ desc->dtsize = sz;
+}
+
+static void nfs_shrink_dtsize(struct nfs_readdir_descriptor *desc)
+{
+ nfs_set_dtsize(desc, desc->dtsize >> 1);
+}
+
+static void nfs_grow_dtsize(struct nfs_readdir_descriptor *desc)
+{
+ nfs_set_dtsize(desc, desc->dtsize << 1);
+}
+
static void nfs_readdir_array_init(struct nfs_cache_array *array)
{
memset(array, 0, sizeof(struct nfs_cache_array));
@@ -759,6 +787,7 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
break;
arrays++;
*arrays = page = new;
+ desc->page_index_max++;
} else {
new = nfs_readdir_page_get_next(mapping,
page->index + 1,
@@ -768,6 +797,7 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
if (page != *arrays)
nfs_readdir_page_unlock_and_put(page);
page = new;
+ desc->page_index_max = new->index;
}
status = nfs_readdir_add_to_array(entry, page);
} while (!status && !entry->eof);
@@ -833,7 +863,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
struct nfs_entry *entry;
size_t array_size;
struct inode *inode = file_inode(desc->file);
- size_t dtsize = NFS_SERVER(inode)->dtsize;
+ unsigned int dtsize = desc->dtsize;
int status = -ENOMEM;

entry = kzalloc(sizeof(*entry), GFP_KERNEL);
@@ -869,6 +899,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,

status = nfs_readdir_page_filler(desc, entry, pages, pglen,
arrays, narrays);
+ desc->buffer_fills++;
} while (!status && nfs_readdir_page_needs_filling(page) &&
page_mapping(page));

@@ -916,6 +947,7 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
if (!desc->page)
return -ENOMEM;
if (nfs_readdir_page_needs_filling(desc->page)) {
+ desc->page_index_max = desc->page_index;
res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf,
&desc->page, 1);
if (res < 0) {
@@ -1047,6 +1079,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
desc->cache_entry_index = 0;
desc->last_cookie = desc->dir_cookie;
desc->duped = 0;
+ desc->page_index_max = 0;

status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz);

@@ -1056,10 +1089,22 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
}
desc->page = NULL;

+ /*
+ * Grow the dtsize if we have to go back for more pages,
+ * or shrink it if we're reading too many.
+ */
+ if (!desc->eof) {
+ if (!desc->eob)
+ nfs_grow_dtsize(desc);
+ else if (desc->buffer_fills == 1 &&
+ i < (desc->page_index_max >> 1))
+ nfs_shrink_dtsize(desc);
+ }

for (i = 0; i < sz && arrays[i]; i++)
nfs_readdir_page_array_free(arrays[i]);
out:
+ desc->page_index_max = -1;
kfree(arrays);
dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
return status;
@@ -1102,6 +1147,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
desc->file = file;
desc->ctx = ctx;
desc->plus = nfs_use_readdirplus(inode, ctx);
+ desc->page_index_max = -1;

spin_lock(&file->f_lock);
desc->dir_cookie = dir_ctx->dir_cookie;
@@ -1110,6 +1156,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
page_index = dir_ctx->page_index;
desc->attr_gencount = dir_ctx->attr_gencount;
desc->eof = dir_ctx->eof;
+ nfs_set_dtsize(desc, dir_ctx->dtsize);
memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
spin_unlock(&file->f_lock);

@@ -1151,6 +1198,11 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)

nfs_do_filldir(desc, nfsi->cookieverf);
nfs_readdir_page_unlock_and_put_cached(desc);
+ if (desc->eob || desc->eof)
+ break;
+ /* Grow the dtsize if we have to go back for more pages */
+ if (desc->page_index == desc->page_index_max)
+ nfs_grow_dtsize(desc);
} while (!desc->eob && !desc->eof);

spin_lock(&file->f_lock);
@@ -1160,6 +1212,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
dir_ctx->attr_gencount = desc->attr_gencount;
dir_ctx->page_index = desc->page_index;
dir_ctx->eof = desc->eof;
+ dir_ctx->dtsize = desc->dtsize;
memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
spin_unlock(&file->f_lock);
out_free:
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 6e10725887d1..d27f7e788624 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -106,6 +106,7 @@ struct nfs_open_dir_context {
__u64 dir_cookie;
__u64 dup_cookie;
pgoff_t page_index;
+ unsigned int dtsize;
signed char duped;
bool eof;
};
--
2.35.1

2022-02-21 09:32:02

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v5 2/6] NFS: Simplify nfs_readdir_xdr_to_array()

From: Trond Myklebust <[email protected]>

Recent changes to readdir mean that we can cope with partially filled
page cache entries, so we no longer need to rely on looping in
nfs_readdir_xdr_to_array().

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/dir.c | 29 +++++++++++------------------
1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b0ee3a0e0f81..10421b5331ca 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -864,6 +864,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
size_t array_size;
struct inode *inode = file_inode(desc->file);
unsigned int dtsize = desc->dtsize;
+ unsigned int pglen;
int status = -ENOMEM;

entry = kzalloc(sizeof(*entry), GFP_KERNEL);
@@ -881,28 +882,20 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
if (!pages)
goto out;

- do {
- unsigned int pglen;
- status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie,
- pages, dtsize,
- verf_res);
- if (status < 0)
- break;
-
- pglen = status;
- if (pglen == 0) {
- nfs_readdir_page_set_eof(page);
- break;
- }
-
- verf_arg = verf_res;
+ status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie, pages,
+ dtsize, verf_res);
+ if (status < 0)
+ goto free_pages;

+ pglen = status;
+ if (pglen != 0)
status = nfs_readdir_page_filler(desc, entry, pages, pglen,
arrays, narrays);
- desc->buffer_fills++;
- } while (!status && nfs_readdir_page_needs_filling(page) &&
- page_mapping(page));
+ else
+ nfs_readdir_page_set_eof(page);
+ desc->buffer_fills++;

+free_pages:
nfs_readdir_free_pages(pages, array_size);
out:
nfs_free_fattr(entry->fattr);
--
2.35.1