2022-03-13 19:11:25

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v10 08/26] NFS: Don't re-read the entire page cache to find the next cookie

From: Trond Myklebust <[email protected]>

If the page cache entry that was last read gets invalidated for some
reason, then make sure we can re-create it on the next call to readdir.
This, combined with the cache page validation, allows us to reuse the
cached value of page-index on successive calls to nfs_readdir.

Credit is due to Benjamin Coddington for showing that the concept works,
and that it allows for improved cache sharing between processes even in
the case where pages are lost due to LRU or active invalidation.

Suggested-by: Benjamin Coddington <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/dir.c | 10 +++++++---
include/linux/nfs_fs.h | 1 +
2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a1767f755460..93f70698e401 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1120,6 +1120,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
desc->dup_cookie = dir_ctx->dup_cookie;
desc->duped = dir_ctx->duped;
page_index = dir_ctx->page_index;
+ desc->page_index = page_index;
+ desc->last_cookie = dir_ctx->last_cookie;
desc->attr_gencount = dir_ctx->attr_gencount;
desc->eof = dir_ctx->eof;
memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
@@ -1168,6 +1170,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
spin_lock(&file->f_lock);
dir_ctx->dir_cookie = desc->dir_cookie;
dir_ctx->dup_cookie = desc->dup_cookie;
+ dir_ctx->last_cookie = desc->last_cookie;
dir_ctx->duped = desc->duped;
dir_ctx->attr_gencount = desc->attr_gencount;
dir_ctx->page_index = desc->page_index;
@@ -1209,10 +1212,11 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
}
if (offset != filp->f_pos) {
filp->f_pos = offset;
- if (nfs_readdir_use_cookie(filp))
- dir_ctx->dir_cookie = offset;
- else
+ if (!nfs_readdir_use_cookie(filp)) {
dir_ctx->dir_cookie = 0;
+ dir_ctx->page_index = 0;
+ } else
+ dir_ctx->dir_cookie = offset;
if (offset == 0)
memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf));
dir_ctx->duped = 0;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 6e10725887d1..1c533f2c1f36 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -105,6 +105,7 @@ struct nfs_open_dir_context {
__be32 verf[NFS_DIR_VERIFIER_SIZE];
__u64 dir_cookie;
__u64 dup_cookie;
+ __u64 last_cookie;
pgoff_t page_index;
signed char duped;
bool eof;
--
2.35.1


2022-03-14 07:39:28

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v10 09/26] NFS: Don't advance the page pointer unless the page is full

From: Trond Myklebust <[email protected]>

When we hit the end of the data in the readdir page, we don't want to
start filling a new page, unless this one is full.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/dir.c | 32 ++++++++++++++++++++++----------
1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 93f70698e401..60f7feee0a16 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -417,6 +417,18 @@ bool nfs_readdir_use_cookie(const struct file *filp)
return true;
}

+static void nfs_readdir_seek_next_array(struct nfs_cache_array *array,
+ struct nfs_readdir_descriptor *desc)
+{
+ if (array->page_full) {
+ desc->last_cookie = array->last_cookie;
+ desc->current_index += array->size;
+ desc->cache_entry_index = 0;
+ desc->page_index++;
+ } else
+ desc->last_cookie = array->array[0].cookie;
+}
+
static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
struct nfs_readdir_descriptor *desc)
{
@@ -428,6 +440,7 @@ static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
if (diff >= array->size) {
if (array->page_is_eof)
goto out_eof;
+ nfs_readdir_seek_next_array(array, desc);
return -EAGAIN;
}

@@ -500,7 +513,8 @@ static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
status = -EBADCOOKIE;
if (desc->dir_cookie == array->last_cookie)
desc->eof = true;
- }
+ } else
+ nfs_readdir_seek_next_array(array, desc);
out:
return status;
}
@@ -517,11 +531,6 @@ static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
else
status = nfs_readdir_search_for_cookie(array, desc);

- if (status == -EAGAIN) {
- desc->last_cookie = array->last_cookie;
- desc->current_index += array->size;
- desc->page_index++;
- }
kunmap_atomic(array);
return status;
}
@@ -998,7 +1007,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
{
struct file *file = desc->file;
struct nfs_cache_array *array;
- unsigned int i = 0;
+ unsigned int i;

array = kmap(desc->page);
for (i = desc->cache_entry_index; i < array->size; i++) {
@@ -1011,10 +1020,13 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
break;
}
memcpy(desc->verf, verf, sizeof(desc->verf));
- if (i < (array->size-1))
- desc->dir_cookie = array->array[i+1].cookie;
- else
+ if (i == array->size - 1) {
desc->dir_cookie = array->last_cookie;
+ nfs_readdir_seek_next_array(array, desc);
+ } else {
+ desc->dir_cookie = array->array[i + 1].cookie;
+ desc->last_cookie = array->array[0].cookie;
+ }
if (nfs_readdir_use_cookie(file))
desc->ctx->pos = desc->dir_cookie;
else
--
2.35.1

2022-03-14 12:14:28

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v10 10/26] NFS: Adjust the amount of readahead performed by NFS readdir

From: Trond Myklebust <[email protected]>

The current NFS readdir code will always try to maximise the amount of
readahead it performs on the assumption that we can cache anything that
isn't immediately read by the process.
There are several cases where this assumption breaks down, including
when the 'ls -l' heuristic kicks in to try to force use of readdirplus
as a batch replacement for lookup/getattr.

This patch therefore tries to tone down the amount of readahead we
perform, and adjust it to try to match the amount of data being
requested by user space.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/dir.c | 53 +++++++++++++++++++++++++++++++++++++++++-
include/linux/nfs_fs.h | 1 +
2 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 60f7feee0a16..520dc3ec4aef 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -69,6 +69,8 @@ const struct address_space_operations nfs_dir_aops = {
.freepage = nfs_readdir_clear_array,
};

+#define NFS_INIT_DTSIZE PAGE_SIZE
+
static struct nfs_open_dir_context *
alloc_nfs_open_dir_context(struct inode *dir)
{
@@ -78,6 +80,7 @@ alloc_nfs_open_dir_context(struct inode *dir)
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
if (ctx != NULL) {
ctx->attr_gencount = nfsi->attr_gencount;
+ ctx->dtsize = NFS_INIT_DTSIZE;
spin_lock(&dir->i_lock);
if (list_empty(&nfsi->open_files) &&
(nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
@@ -154,6 +157,7 @@ struct nfs_readdir_descriptor {
struct page *page;
struct dir_context *ctx;
pgoff_t page_index;
+ pgoff_t page_index_max;
u64 dir_cookie;
u64 last_cookie;
u64 dup_cookie;
@@ -166,12 +170,36 @@ struct nfs_readdir_descriptor {
unsigned long gencount;
unsigned long attr_gencount;
unsigned int cache_entry_index;
+ unsigned int buffer_fills;
+ unsigned int dtsize;
signed char duped;
bool plus;
bool eob;
bool eof;
};

+static void nfs_set_dtsize(struct nfs_readdir_descriptor *desc, unsigned int sz)
+{
+ struct nfs_server *server = NFS_SERVER(file_inode(desc->file));
+ unsigned int maxsize = server->dtsize;
+
+ if (sz > maxsize)
+ sz = maxsize;
+ if (sz < NFS_MIN_FILE_IO_SIZE)
+ sz = NFS_MIN_FILE_IO_SIZE;
+ desc->dtsize = sz;
+}
+
+static void nfs_shrink_dtsize(struct nfs_readdir_descriptor *desc)
+{
+ nfs_set_dtsize(desc, desc->dtsize >> 1);
+}
+
+static void nfs_grow_dtsize(struct nfs_readdir_descriptor *desc)
+{
+ nfs_set_dtsize(desc, desc->dtsize << 1);
+}
+
static void nfs_readdir_array_init(struct nfs_cache_array *array)
{
memset(array, 0, sizeof(struct nfs_cache_array));
@@ -784,6 +812,7 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
break;
arrays++;
*arrays = page = new;
+ desc->page_index_max++;
} else {
new = nfs_readdir_page_get_next(mapping,
page->index + 1,
@@ -793,6 +822,7 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
if (page != *arrays)
nfs_readdir_page_unlock_and_put(page);
page = new;
+ desc->page_index_max = new->index;
}
status = nfs_readdir_add_to_array(entry, page);
} while (!status && !entry->eof);
@@ -858,7 +888,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
struct nfs_entry *entry;
size_t array_size;
struct inode *inode = file_inode(desc->file);
- size_t dtsize = NFS_SERVER(inode)->dtsize;
+ unsigned int dtsize = desc->dtsize;
int status = -ENOMEM;

entry = kzalloc(sizeof(*entry), GFP_KERNEL);
@@ -894,6 +924,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,

status = nfs_readdir_page_filler(desc, entry, pages, pglen,
arrays, narrays);
+ desc->buffer_fills++;
} while (!status && nfs_readdir_page_needs_filling(page) &&
page_mapping(page));

@@ -941,6 +972,10 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
if (!desc->page)
return -ENOMEM;
if (nfs_readdir_page_needs_filling(desc->page)) {
+ /* Grow the dtsize if we had to go back for more pages */
+ if (desc->page_index == desc->page_index_max)
+ nfs_grow_dtsize(desc);
+ desc->page_index_max = desc->page_index;
res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf,
&desc->page, 1);
if (res < 0) {
@@ -1075,6 +1110,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
desc->cache_entry_index = 0;
desc->last_cookie = desc->dir_cookie;
desc->duped = 0;
+ desc->page_index_max = 0;

status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz);

@@ -1084,10 +1120,22 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
}
desc->page = NULL;

+ /*
+ * Grow the dtsize if we have to go back for more pages,
+ * or shrink it if we're reading too many.
+ */
+ if (!desc->eof) {
+ if (!desc->eob)
+ nfs_grow_dtsize(desc);
+ else if (desc->buffer_fills == 1 &&
+ i < (desc->page_index_max >> 1))
+ nfs_shrink_dtsize(desc);
+ }

for (i = 0; i < sz && arrays[i]; i++)
nfs_readdir_page_array_free(arrays[i]);
out:
+ desc->page_index_max = -1;
kfree(arrays);
dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
return status;
@@ -1126,6 +1174,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
desc->file = file;
desc->ctx = ctx;
desc->plus = nfs_use_readdirplus(inode, ctx);
+ desc->page_index_max = -1;

spin_lock(&file->f_lock);
desc->dir_cookie = dir_ctx->dir_cookie;
@@ -1136,6 +1185,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
desc->last_cookie = dir_ctx->last_cookie;
desc->attr_gencount = dir_ctx->attr_gencount;
desc->eof = dir_ctx->eof;
+ nfs_set_dtsize(desc, dir_ctx->dtsize);
memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
spin_unlock(&file->f_lock);

@@ -1187,6 +1237,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
dir_ctx->attr_gencount = desc->attr_gencount;
dir_ctx->page_index = desc->page_index;
dir_ctx->eof = desc->eof;
+ dir_ctx->dtsize = desc->dtsize;
memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
spin_unlock(&file->f_lock);
out_free:
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1c533f2c1f36..691a27936849 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -107,6 +107,7 @@ struct nfs_open_dir_context {
__u64 dup_cookie;
__u64 last_cookie;
pgoff_t page_index;
+ unsigned int dtsize;
signed char duped;
bool eof;
};
--
2.35.1