2022-02-25 19:24:47

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v8 20/24] NFS: Trace effects of the readdirplus heuristic

From: Trond Myklebust <[email protected]>

Enable tracking of when the readdirplus heuristic causes a page cache
invalidation.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/dir.c | 11 ++++++++++-
fs/nfs/nfstrace.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index fa32eb5f6391..2571d2c03116 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -988,6 +988,8 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
if (res == -EBADCOOKIE || res == -ENOTSYNC) {
invalidate_inode_pages2(desc->file->f_mapping);
desc->page_index = 0;
+ trace_nfs_readdir_invalidate_cache_range(
+ inode, 0, MAX_LFS_FILESIZE);
return -EAGAIN;
}
return res;
@@ -1002,6 +1004,9 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
invalidate_inode_pages2_range(desc->file->f_mapping,
desc->page_index_max + 1,
-1);
+ trace_nfs_readdir_invalidate_cache_range(
+ inode, desc->page_index_max + 1,
+ MAX_LFS_FILESIZE);
}
}
res = nfs_readdir_search_array(desc);
@@ -1148,7 +1153,11 @@ static void nfs_readdir_handle_cache_misses(struct inode *inode,
if (desc->ctx->pos == 0 ||
cache_misses <= NFS_READDIR_CACHE_MISS_THRESHOLD)
return;
- invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1);
+ if (invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1) == 0)
+ return;
+ trace_nfs_readdir_invalidate_cache_range(
+ inode, (loff_t)(page_index + 1) << PAGE_SHIFT,
+ MAX_LFS_FILESIZE);
}

/* The file offset position represents the dirent entry number. A
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 7c1102b991d0..ec2645d20abf 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -273,6 +273,56 @@ DEFINE_NFS_UPDATE_SIZE_EVENT(wcc);
DEFINE_NFS_UPDATE_SIZE_EVENT(update);
DEFINE_NFS_UPDATE_SIZE_EVENT(grow);

+DECLARE_EVENT_CLASS(nfs_inode_range_event,
+ TP_PROTO(
+ const struct inode *inode,
+ loff_t range_start,
+ loff_t range_end
+ ),
+
+ TP_ARGS(inode, range_start, range_end),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __field(u64, version)
+ __field(loff_t, range_start)
+ __field(loff_t, range_end)
+ ),
+
+ TP_fast_assign(
+ const struct nfs_inode *nfsi = NFS_I(inode);
+
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+ __entry->fileid = nfsi->fileid;
+ __entry->version = inode_peek_iversion_raw(inode);
+ __entry->range_start = range_start;
+ __entry->range_end = range_end;
+ ),
+
+ TP_printk(
+ "fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
+ "range=[%lld, %lld]",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle, __entry->version,
+ __entry->range_start, __entry->range_end
+ )
+);
+
+#define DEFINE_NFS_INODE_RANGE_EVENT(name) \
+ DEFINE_EVENT(nfs_inode_range_event, name, \
+ TP_PROTO( \
+ const struct inode *inode, \
+ loff_t range_start, \
+ loff_t range_end \
+ ), \
+ TP_ARGS(inode, range_start, range_end))
+
+DEFINE_NFS_INODE_RANGE_EVENT(nfs_readdir_invalidate_cache_range);
+
DECLARE_EVENT_CLASS(nfs_readdir_event,
TP_PROTO(
const struct file *file,
--
2.35.1


2022-02-26 02:27:29

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v8 21/24] NFS: Convert readdir page cache to use a cookie based index

From: Trond Myklebust <[email protected]>

Instead of using a linear index to address the pages, use the cookie of
the first entry, since that is what we use to match the page anyway.

This allows us to avoid re-reading the entire cache on a seekdir() type
of operation. The latter is very common when re-exporting NFS, and is a
major performance drain.

The change does affect our duplicate cookie detection, since we can no
longer rely on the page index as a linear offset for detecting whether
we looped backwards. However since we no longer do a linear search
through all the pages on each call to nfs_readdir(), this is less of a
concern than it was previously.
The other downside is that invalidate_mapping_pages() no longer can use
the page index to avoid clearing pages that have been read. A subsequent
patch will restore the functionality this provides to the 'ls -l'
heuristic.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/Kconfig | 4 ++
fs/nfs/dir.c | 141 ++++++++++++++++++-----------------------
include/linux/nfs_fs.h | 2 -
3 files changed, 67 insertions(+), 80 deletions(-)

diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 14a72224b657..47a53b3362b6 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -4,6 +4,10 @@ config NFS_FS
depends on INET && FILE_LOCKING && MULTIUSER
select LOCKD
select SUNRPC
+ select CRYPTO
+ select CRYPTO_HASH
+ select XXHASH
+ select CRYPTO_XXHASH
select NFS_ACL_SUPPORT if NFS_V3_ACL
help
Choose Y here if you want to access files residing on other
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2571d2c03116..827ca1ed0cb7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -39,6 +39,7 @@
#include <linux/sched.h>
#include <linux/kmemleak.h>
#include <linux/xattr.h>
+#include <linux/xxhash.h>

#include "delegation.h"
#include "iostat.h"
@@ -159,9 +160,7 @@ struct nfs_readdir_descriptor {
pgoff_t page_index_max;
u64 dir_cookie;
u64 last_cookie;
- u64 dup_cookie;
loff_t current_index;
- loff_t prev_index;

__be32 verf[NFS_DIR_VERIFIER_SIZE];
unsigned long dir_verifier;
@@ -171,7 +170,6 @@ struct nfs_readdir_descriptor {
unsigned int cache_entry_index;
unsigned int buffer_fills;
unsigned int dtsize;
- signed char duped;
bool plus;
bool eob;
bool eof;
@@ -334,6 +332,28 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
return ret;
}

+#define NFS_READDIR_COOKIE_MASK (U32_MAX >> 14)
+/*
+ * Hash algorithm allowing content addressible access to sequences
+ * of directory cookies. Content is addressed by the value of the
+ * cookie index of the first readdir entry in a page.
+ *
+ * The xxhash algorithm is chosen because it is fast, and is supposed
+ * to result in a decent flat distribution of hashes.
+ *
+ * We then select only the first 18 bits to avoid issues with excessive
+ * memory use for the page cache XArray. 18 bits should allow the caching
+ * of 262144 pages of sequences of readdir entries. Since each page holds
+ * 127 readdir entries for a typical 64-bit system, that works out to a
+ * cache of ~ 33 million entries per directory.
+ */
+static pgoff_t nfs_readdir_page_cookie_hash(u64 cookie)
+{
+ if (cookie == 0)
+ return 0;
+ return xxhash(&cookie, sizeof(cookie), 0) & NFS_READDIR_COOKIE_MASK;
+}
+
static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie,
u64 change_attr)
{
@@ -355,15 +375,15 @@ static void nfs_readdir_page_unlock_and_put(struct page *page)
}

static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
- pgoff_t index, u64 last_cookie)
+ u64 last_cookie,
+ u64 change_attr)
{
+ pgoff_t index = nfs_readdir_page_cookie_hash(last_cookie);
struct page *page;
- u64 change_attr;

page = grab_cache_page(mapping, index);
if (!page)
return NULL;
- change_attr = inode_peek_iversion_raw(mapping->host);
if (PageUptodate(page)) {
if (nfs_readdir_page_validate(page, last_cookie, change_attr))
return page;
@@ -374,11 +394,6 @@ static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
return page;
}

-static loff_t nfs_readdir_page_offset(struct page *page)
-{
- return (loff_t)page->index * (loff_t)nfs_readdir_array_maxentries();
-}
-
static u64 nfs_readdir_page_last_cookie(struct page *page)
{
struct nfs_cache_array *array;
@@ -411,11 +426,11 @@ static void nfs_readdir_page_set_eof(struct page *page)
}

static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
- pgoff_t index, u64 cookie)
+ u64 cookie, u64 change_attr)
{
struct page *page;

- page = nfs_readdir_page_get_locked(mapping, index, cookie);
+ page = nfs_readdir_page_get_locked(mapping, cookie, change_attr);
if (page) {
if (nfs_readdir_page_last_cookie(page) == cookie)
return page;
@@ -443,6 +458,13 @@ bool nfs_readdir_use_cookie(const struct file *filp)
return true;
}

+static void nfs_readdir_rewind_search(struct nfs_readdir_descriptor *desc)
+{
+ desc->current_index = 0;
+ desc->last_cookie = 0;
+ desc->page_index = 0;
+}
+
static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
struct nfs_readdir_descriptor *desc)
{
@@ -491,32 +513,11 @@ static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,

for (i = 0; i < array->size; i++) {
if (array->array[i].cookie == desc->dir_cookie) {
- struct nfs_inode *nfsi = NFS_I(file_inode(desc->file));
-
- new_pos = nfs_readdir_page_offset(desc->page) + i;
- if (desc->attr_gencount != nfsi->attr_gencount) {
- desc->duped = 0;
- desc->attr_gencount = nfsi->attr_gencount;
- } else if (new_pos < desc->prev_index) {
- if (desc->duped > 0
- && desc->dup_cookie == desc->dir_cookie) {
- if (printk_ratelimit()) {
- pr_notice("NFS: directory %pD2 contains a readdir loop."
- "Please contact your server vendor. "
- "The file: %s has duplicate cookie %llu\n",
- desc->file, array->array[i].name, desc->dir_cookie);
- }
- status = -ELOOP;
- goto out;
- }
- desc->dup_cookie = desc->dir_cookie;
- desc->duped = -1;
- }
+ new_pos = desc->current_index + i;
if (nfs_readdir_use_cookie(desc->file))
desc->ctx->pos = desc->dir_cookie;
else
desc->ctx->pos = new_pos;
- desc->prev_index = new_pos;
desc->cache_entry_index = i;
return 0;
}
@@ -527,7 +528,6 @@ static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
if (desc->dir_cookie == array->last_cookie)
desc->eof = true;
}
-out:
return status;
}

@@ -777,10 +777,9 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
/* Perform conversion from xdr to cache array */
static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
struct nfs_entry *entry,
- struct page **xdr_pages,
- unsigned int buflen,
- struct page **arrays,
- size_t narrays)
+ struct page **xdr_pages, unsigned int buflen,
+ struct page **arrays, size_t narrays,
+ u64 change_attr)
{
struct address_space *mapping = desc->file->f_mapping;
struct xdr_stream stream;
@@ -820,18 +819,16 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
break;
arrays++;
*arrays = page = new;
- desc->page_index_max++;
} else {
- new = nfs_readdir_page_get_next(mapping,
- page->index + 1,
- entry->prev_cookie);
+ new = nfs_readdir_page_get_next(
+ mapping, entry->prev_cookie, change_attr);
if (!new)
break;
if (page != *arrays)
nfs_readdir_page_unlock_and_put(page);
page = new;
- desc->page_index_max = new->index;
}
+ desc->page_index_max++;
status = nfs_readdir_add_to_array(entry, page);
} while (!status && !entry->eof);

@@ -891,6 +888,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
__be32 *verf_arg, __be32 *verf_res,
struct page **arrays, size_t narrays)
{
+ u64 change_attr;
struct page **pages;
struct page *page = *arrays;
struct nfs_entry *entry;
@@ -915,6 +913,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
if (!pages)
goto out;

+ change_attr = inode_peek_iversion_raw(inode);
status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie, pages,
dtsize, verf_res);
if (status < 0)
@@ -923,7 +922,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
pglen = status;
if (pglen != 0)
status = nfs_readdir_page_filler(desc, entry, pages, pglen,
- arrays, narrays);
+ arrays, narrays, change_attr);
else
nfs_readdir_page_set_eof(page);
desc->buffer_fills++;
@@ -953,9 +952,11 @@ nfs_readdir_page_unlock_and_put_cached(struct nfs_readdir_descriptor *desc)
static struct page *
nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc)
{
- return nfs_readdir_page_get_locked(desc->file->f_mapping,
- desc->page_index,
- desc->last_cookie);
+ struct address_space *mapping = desc->file->f_mapping;
+ u64 change_attr = inode_peek_iversion_raw(mapping->host);
+
+ return nfs_readdir_page_get_locked(mapping, desc->last_cookie,
+ change_attr);
}

/*
@@ -987,7 +988,7 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
trace_nfs_readdir_cache_fill_done(inode, res);
if (res == -EBADCOOKIE || res == -ENOTSYNC) {
invalidate_inode_pages2(desc->file->f_mapping);
- desc->page_index = 0;
+ nfs_readdir_rewind_search(desc);
trace_nfs_readdir_invalidate_cache_range(
inode, 0, MAX_LFS_FILESIZE);
return -EAGAIN;
@@ -1001,12 +1002,10 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
memcmp(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf))) {
memcpy(nfsi->cookieverf, verf,
sizeof(nfsi->cookieverf));
- invalidate_inode_pages2_range(desc->file->f_mapping,
- desc->page_index_max + 1,
+ invalidate_inode_pages2_range(desc->file->f_mapping, 1,
-1);
trace_nfs_readdir_invalidate_cache_range(
- inode, desc->page_index_max + 1,
- MAX_LFS_FILESIZE);
+ inode, 1, MAX_LFS_FILESIZE);
}
}
res = nfs_readdir_search_array(desc);
@@ -1022,11 +1021,6 @@ static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
int res;

do {
- if (desc->page_index == 0) {
- desc->current_index = 0;
- desc->prev_index = 0;
- desc->last_cookie = 0;
- }
res = find_and_lock_cache_page(desc);
} while (res == -EAGAIN);
return res;
@@ -1061,8 +1055,6 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
desc->ctx->pos = desc->dir_cookie;
else
desc->ctx->pos++;
- if (desc->duped != 0)
- desc->duped = 1;
}
if (array->page_is_eof)
desc->eof = !desc->eob;
@@ -1104,7 +1096,6 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
desc->page_index = 0;
desc->cache_entry_index = 0;
desc->last_cookie = desc->dir_cookie;
- desc->duped = 0;
desc->page_index_max = 0;

trace_nfs_readdir_uncached(desc->file, desc->verf, desc->last_cookie,
@@ -1137,6 +1128,8 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
for (i = 0; i < sz && arrays[i]; i++)
nfs_readdir_page_array_free(arrays[i]);
out:
+ if (!nfs_readdir_use_cookie(desc->file))
+ nfs_readdir_rewind_search(desc);
desc->page_index_max = -1;
kfree(arrays);
dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
@@ -1147,17 +1140,14 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)

static void nfs_readdir_handle_cache_misses(struct inode *inode,
struct nfs_readdir_descriptor *desc,
- pgoff_t page_index,
unsigned int cache_misses)
{
if (desc->ctx->pos == 0 ||
cache_misses <= NFS_READDIR_CACHE_MISS_THRESHOLD)
return;
- if (invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1) == 0)
+ if (invalidate_mapping_pages(inode->i_mapping, 0, -1) == 0)
return;
- trace_nfs_readdir_invalidate_cache_range(
- inode, (loff_t)(page_index + 1) << PAGE_SHIFT,
- MAX_LFS_FILESIZE);
+ trace_nfs_readdir_invalidate_cache_range(inode, 0, MAX_LFS_FILESIZE);
}

/* The file offset position represents the dirent entry number. A
@@ -1197,8 +1187,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)

spin_lock(&file->f_lock);
desc->dir_cookie = dir_ctx->dir_cookie;
- desc->dup_cookie = dir_ctx->dup_cookie;
- desc->duped = dir_ctx->duped;
page_index = dir_ctx->page_index;
desc->page_index = page_index;
desc->last_cookie = dir_ctx->last_cookie;
@@ -1216,7 +1204,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
}

desc->plus = nfs_use_readdirplus(inode, ctx, cache_hits, cache_misses);
- nfs_readdir_handle_cache_misses(inode, desc, page_index, cache_misses);
+ nfs_readdir_handle_cache_misses(inode, desc, cache_misses);

do {
res = readdir_search_pagecache(desc);
@@ -1236,7 +1224,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
}
if (res == -ETOOSMALL && desc->plus) {
nfs_zap_caches(inode);
- desc->page_index = 0;
desc->plus = false;
desc->eof = false;
continue;
@@ -1250,9 +1237,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)

spin_lock(&file->f_lock);
dir_ctx->dir_cookie = desc->dir_cookie;
- dir_ctx->dup_cookie = desc->dup_cookie;
dir_ctx->last_cookie = desc->last_cookie;
- dir_ctx->duped = desc->duped;
dir_ctx->attr_gencount = desc->attr_gencount;
dir_ctx->page_index = desc->page_index;
dir_ctx->eof = desc->eof;
@@ -1295,13 +1280,13 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
if (offset != filp->f_pos) {
filp->f_pos = offset;
dir_ctx->page_index = 0;
- if (!nfs_readdir_use_cookie(filp))
+ if (!nfs_readdir_use_cookie(filp)) {
dir_ctx->dir_cookie = 0;
- else
+ dir_ctx->last_cookie = 0;
+ } else {
dir_ctx->dir_cookie = offset;
- if (offset == 0)
- memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf));
- dir_ctx->duped = 0;
+ dir_ctx->last_cookie = offset;
+ }
dir_ctx->eof = false;
}
spin_unlock(&filp->f_lock);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 20a4cf0acad2..42aad886d3c0 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -106,11 +106,9 @@ struct nfs_open_dir_context {
unsigned long attr_gencount;
__be32 verf[NFS_DIR_VERIFIER_SIZE];
__u64 dir_cookie;
- __u64 dup_cookie;
__u64 last_cookie;
pgoff_t page_index;
unsigned int dtsize;
- signed char duped;
bool eof;
struct rcu_head rcu_head;
};
--
2.35.1

2022-02-26 02:38:00

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v8 22/24] NFS: Fix up forced readdirplus

From: Trond Myklebust <[email protected]>

Avoid clearing the entire readdir page cache if we're just doing forced
readdirplus for the 'ls -l' heuristic.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/dir.c | 57 ++++++++++++++++++++++++++++--------------
fs/nfs/nfstrace.h | 1 +
include/linux/nfs_fs.h | 1 +
3 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 827ca1ed0cb7..4af00465806f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -170,6 +170,8 @@ struct nfs_readdir_descriptor {
unsigned int cache_entry_index;
unsigned int buffer_fills;
unsigned int dtsize;
+ bool clear_cache;
+ bool force_plus;
bool plus;
bool eob;
bool eof;
@@ -368,6 +370,16 @@ static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie,
return ret;
}

+static bool nfs_readdir_page_is_full(struct page *page)
+{
+ struct nfs_cache_array *array = kmap_atomic(page);
+ int ret;
+
+ ret = nfs_readdir_array_is_full(array);
+ kunmap_atomic(array);
+ return ret;
+}
+
static void nfs_readdir_page_unlock_and_put(struct page *page)
{
unlock_page(page);
@@ -376,7 +388,7 @@ static void nfs_readdir_page_unlock_and_put(struct page *page)

static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
u64 last_cookie,
- u64 change_attr)
+ u64 change_attr, bool clear)
{
pgoff_t index = nfs_readdir_page_cookie_hash(last_cookie);
struct page *page;
@@ -385,8 +397,10 @@ static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
if (!page)
return NULL;
if (PageUptodate(page)) {
- if (nfs_readdir_page_validate(page, last_cookie, change_attr))
- return page;
+ if (nfs_readdir_page_validate(page, last_cookie, change_attr)) {
+ if (!clear || !nfs_readdir_page_is_full(page))
+ return page;
+ }
nfs_readdir_clear_array(page);
}
nfs_readdir_page_init_array(page, last_cookie, change_attr);
@@ -407,13 +421,7 @@ static u64 nfs_readdir_page_last_cookie(struct page *page)

static bool nfs_readdir_page_needs_filling(struct page *page)
{
- struct nfs_cache_array *array;
- bool ret;
-
- array = kmap_atomic(page);
- ret = !nfs_readdir_array_is_full(array);
- kunmap_atomic(array);
- return ret;
+ return !nfs_readdir_page_is_full(page);
}

static void nfs_readdir_page_set_eof(struct page *page)
@@ -426,11 +434,12 @@ static void nfs_readdir_page_set_eof(struct page *page)
}

static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
- u64 cookie, u64 change_attr)
+ u64 cookie, u64 change_attr,
+ bool clear)
{
struct page *page;

- page = nfs_readdir_page_get_locked(mapping, cookie, change_attr);
+ page = nfs_readdir_page_get_locked(mapping, cookie, change_attr, clear);
if (page) {
if (nfs_readdir_page_last_cookie(page) == cookie)
return page;
@@ -820,8 +829,10 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
arrays++;
*arrays = page = new;
} else {
- new = nfs_readdir_page_get_next(
- mapping, entry->prev_cookie, change_attr);
+ new = nfs_readdir_page_get_next(mapping,
+ entry->prev_cookie,
+ change_attr,
+ desc->clear_cache);
if (!new)
break;
if (page != *arrays)
@@ -956,7 +967,7 @@ nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc)
u64 change_attr = inode_peek_iversion_raw(mapping->host);

return nfs_readdir_page_get_locked(mapping, desc->last_cookie,
- change_attr);
+ change_attr, desc->clear_cache);
}

/*
@@ -1007,6 +1018,7 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
trace_nfs_readdir_invalidate_cache_range(
inode, 1, MAX_LFS_FILESIZE);
}
+ desc->clear_cache = false;
}
res = nfs_readdir_search_array(desc);
if (res == 0)
@@ -1145,9 +1157,8 @@ static void nfs_readdir_handle_cache_misses(struct inode *inode,
if (desc->ctx->pos == 0 ||
cache_misses <= NFS_READDIR_CACHE_MISS_THRESHOLD)
return;
- if (invalidate_mapping_pages(inode->i_mapping, 0, -1) == 0)
- return;
- trace_nfs_readdir_invalidate_cache_range(inode, 0, MAX_LFS_FILESIZE);
+ desc->force_plus = true;
+ trace_nfs_readdir_force_readdirplus(inode);
}

/* The file offset position represents the dirent entry number. A
@@ -1191,6 +1202,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
desc->page_index = page_index;
desc->last_cookie = dir_ctx->last_cookie;
desc->attr_gencount = dir_ctx->attr_gencount;
+ desc->force_plus = dir_ctx->force_plus;
desc->eof = dir_ctx->eof;
nfs_set_dtsize(desc, dir_ctx->dtsize);
memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
@@ -1204,7 +1216,11 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
}

desc->plus = nfs_use_readdirplus(inode, ctx, cache_hits, cache_misses);
- nfs_readdir_handle_cache_misses(inode, desc, cache_misses);
+ if (desc->plus)
+ nfs_readdir_handle_cache_misses(inode, desc, cache_misses);
+ else
+ desc->force_plus = false;
+ desc->clear_cache = desc->force_plus;

do {
res = readdir_search_pagecache(desc);
@@ -1233,6 +1249,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)

nfs_do_filldir(desc, nfsi->cookieverf);
nfs_readdir_page_unlock_and_put_cached(desc);
+ if (desc->page_index == desc->page_index_max)
+ desc->clear_cache = desc->force_plus;
} while (!desc->eob && !desc->eof);

spin_lock(&file->f_lock);
@@ -1240,6 +1258,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
dir_ctx->last_cookie = desc->last_cookie;
dir_ctx->attr_gencount = desc->attr_gencount;
dir_ctx->page_index = desc->page_index;
+ dir_ctx->force_plus = desc->force_plus;
dir_ctx->eof = desc->eof;
dir_ctx->dtsize = desc->dtsize;
memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index ec2645d20abf..59f4ca803fd0 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -160,6 +160,7 @@ DEFINE_NFS_INODE_EVENT(nfs_fsync_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit);
DEFINE_NFS_INODE_EVENT(nfs_access_enter);
DEFINE_NFS_INODE_EVENT_DONE(nfs_set_cache_invalid);
+DEFINE_NFS_INODE_EVENT(nfs_readdir_force_readdirplus);
DEFINE_NFS_INODE_EVENT_DONE(nfs_readdir_cache_fill_done);
DEFINE_NFS_INODE_EVENT_DONE(nfs_readdir_uncached_done);

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 42aad886d3c0..3f9625c7d0ef 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -109,6 +109,7 @@ struct nfs_open_dir_context {
__u64 last_cookie;
pgoff_t page_index;
unsigned int dtsize;
+ bool force_plus;
bool eof;
struct rcu_head rcu_head;
};
--
2.35.1