Attempt to address two performance issues:
1) The regression pointed out by Ben when doing 'ls -l' on a directory
that is being modified.
2) When using readdir() to iterate through a directory, avoid lookups
using the same method we use to avoid attribute revalidation.
The last patch is a cleanup.
Trond Myklebust (3):
NFS: Fix a performance regression in readdir
NFS: Be more targeted about readdirplus use when doing
lookup/revalidation
NFS: Replace nfs_force_use_readdirplus() with
nfs_advise_use_readdirplus()
fs/nfs/dir.c | 47 ++++++++++++++---------------------------------
fs/nfs/inode.c | 2 +-
fs/nfs/internal.h | 2 +-
3 files changed, 16 insertions(+), 35 deletions(-)
--
2.7.4
Ben Coddington reports that commit 311324ad1713, by adding the function
nfs_dir_mapping_need_revalidate() that checks page cache validity on
each call to nfs_readdir() causes a performance regression when
the directory is being modified.
If the directory is changing while we're iterating through the directory,
POSIX does not require us to invalidate the page cache unless the user
calls rewinddir(). However, we still do want to ensure that we use
readdirplus in order to avoid a load of stat() calls when the user
is doing an 'ls -l' workload.
The fix should be to invalidate the page cache immediately when we're
setting the NFS_INO_ADVISE_RDPLUS bit.
Reported-by: Benjamin Coddington <[email protected]>
Fixes: 311324ad1713 ("NFS: Be more aggressive in using readdirplus...")
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/dir.c | 15 ++-------------
1 file changed, 2 insertions(+), 13 deletions(-)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 5f1af4cd1a33..53e02b8bd9bd 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -477,7 +477,7 @@ void nfs_force_use_readdirplus(struct inode *dir)
{
if (!list_empty(&NFS_I(dir)->open_files)) {
nfs_advise_use_readdirplus(dir);
- nfs_zap_mapping(dir, dir->i_mapping);
+ invalidate_mapping_pages(dir->i_mapping, 0, -1);
}
}
@@ -886,17 +886,6 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc)
goto out;
}
-static bool nfs_dir_mapping_need_revalidate(struct inode *dir)
-{
- struct nfs_inode *nfsi = NFS_I(dir);
-
- if (nfs_attribute_cache_expired(dir))
- return true;
- if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
- return true;
- return false;
-}
-
/* The file offset position represents the dirent entry number. A
last cookie cache takes care of the common case of reading the
whole directory.
@@ -928,7 +917,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
desc->decode = NFS_PROTO(inode)->decode_dirent;
desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
- if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode))
+ if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
res = nfs_revalidate_mapping(inode, file->f_mapping);
if (res < 0)
goto out;
--
2.7.4
There is little point in setting NFS_INO_ADVISE_RDPLUS in nfs_lookup and
nfs_lookup_revalidate() unless a process is actually doing readdir on the
parent directory.
Furthermore, there is little point in using readdirplus if we're trying
to revalidate a negative dentry.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/dir.c | 28 +++++++++++++++++-----------
1 file changed, 17 insertions(+), 11 deletions(-)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 53e02b8bd9bd..5befd382be7d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -455,14 +455,23 @@ bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx)
}
/*
- * This function is called by the lookup code to request the use of
- * readdirplus to accelerate any future lookups in the same
+ * This function is called by the lookup and getattr code to request the
+ * use of readdirplus to accelerate any future lookups in the same
* directory.
+ * Do this by checking if there is an active file descriptor
+ * and calling nfs_advise_use_readdirplus, then forcing a
+ * cache flush.
*/
static
void nfs_advise_use_readdirplus(struct inode *dir)
{
- set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags);
+ struct nfs_inode *nfsi = NFS_I(dir);
+
+ if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
+ !list_empty(&nfsi->open_files)) {
+ set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
+ invalidate_mapping_pages(dir->i_mapping, 0, -1);
+ }
}
/*
@@ -475,10 +484,7 @@ void nfs_advise_use_readdirplus(struct inode *dir)
*/
void nfs_force_use_readdirplus(struct inode *dir)
{
- if (!list_empty(&NFS_I(dir)->open_files)) {
- nfs_advise_use_readdirplus(dir);
- invalidate_mapping_pages(dir->i_mapping, 0, -1);
- }
+ nfs_advise_use_readdirplus(dir);
}
static
@@ -1150,7 +1156,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
return -ECHILD;
goto out_bad;
}
- goto out_valid_noent;
+ goto out_valid;
}
if (is_bad_inode(inode)) {
@@ -1192,6 +1198,9 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
if (IS_ERR(label))
goto out_error;
+ /* We need to force a revalidation: set a readdirplus hint */
+ nfs_advise_use_readdirplus(dir);
+
trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
@@ -1211,9 +1220,6 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
out_set_verifier:
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
out_valid:
- /* Success: notify readdir to use READDIRPLUS */
- nfs_advise_use_readdirplus(dir);
- out_valid_noent:
if (flags & LOOKUP_RCU) {
if (parent != ACCESS_ONCE(dentry->d_parent))
return -ECHILD;
--
2.7.4
Now that the two functions are the same, just merge them.
Signed-off-by: Trond Myklebust <[email protected]
---
fs/nfs/dir.c | 14 --------------
fs/nfs/inode.c | 2 +-
fs/nfs/internal.h | 2 +-
3 files changed, 2 insertions(+), 16 deletions(-)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 5befd382be7d..5ee9666fa8b4 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -462,7 +462,6 @@ bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx)
* and calling nfs_advise_use_readdirplus, then forcing a
* cache flush.
*/
-static
void nfs_advise_use_readdirplus(struct inode *dir)
{
struct nfs_inode *nfsi = NFS_I(dir);
@@ -474,19 +473,6 @@ void nfs_advise_use_readdirplus(struct inode *dir)
}
}
-/*
- * This function is mainly for use by nfs_getattr().
- *
- * If this is an 'ls -l', we want to force use of readdirplus.
- * Do this by checking if there is an active file descriptor
- * and calling nfs_advise_use_readdirplus, then forcing a
- * cache flush.
- */
-void nfs_force_use_readdirplus(struct inode *dir)
-{
- nfs_advise_use_readdirplus(dir);
-}
-
static
void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
{
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 3575e3408bd7..02453d76bfea 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -639,7 +639,7 @@ static void nfs_request_parent_use_readdirplus(struct dentry *dentry)
struct dentry *parent;
parent = dget_parent(dentry);
- nfs_force_use_readdirplus(d_inode(parent));
+ nfs_advise_use_readdirplus(d_inode(parent));
dput(parent);
}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 80bcc0befb07..f625a7b2ef14 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -346,7 +346,7 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
const struct nfs_client_initdata *);
/* dir.c */
-extern void nfs_force_use_readdirplus(struct inode *dir);
+extern void nfs_advise_use_readdirplus(struct inode *dir);
extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
struct shrink_control *sc);
extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
--
2.7.4
.. this one breaks things again:
On 19 Nov 2016, at 11:54, Trond Myklebust wrote:
> There is little point in setting NFS_INO_ADVISE_RDPLUS in nfs_lookup
> and
> nfs_lookup_revalidate() unless a process is actually doing readdir on
> the
> parent directory.
> Furthermore, there is little point in using readdirplus if we're
> trying
> to revalidate a negative dentry.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/dir.c | 28 +++++++++++++++++-----------
> 1 file changed, 17 insertions(+), 11 deletions(-)
>
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index 53e02b8bd9bd..5befd382be7d 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -455,14 +455,23 @@ bool nfs_use_readdirplus(struct inode *dir,
> struct dir_context *ctx)
> }
>
> /*
> - * This function is called by the lookup code to request the use of
> - * readdirplus to accelerate any future lookups in the same
> + * This function is called by the lookup and getattr code to request
> the
> + * use of readdirplus to accelerate any future lookups in the same
> * directory.
> + * Do this by checking if there is an active file descriptor
> + * and calling nfs_advise_use_readdirplus, then forcing a
> + * cache flush.
> */
> static
> void nfs_advise_use_readdirplus(struct inode *dir)
> {
> - set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags);
> + struct nfs_inode *nfsi = NFS_I(dir);
> +
> + if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
> + !list_empty(&nfsi->open_files)) {
> + set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
> + invalidate_mapping_pages(dir->i_mapping, 0, -1);
> + }
> }
So every time advise_use_readdirplus it drops the mapping.. but what
about
subsequent calls into nfs_readdir() to get the next batch of entries?
For
the ls -l case, we want to keep setting NFS_INO_ADVISE_RDPLUS, but we
shouldn't start over filling the mapping from the beginning again.
>
> /*
> @@ -475,10 +484,7 @@ void nfs_advise_use_readdirplus(struct inode
> *dir)
> */
> void nfs_force_use_readdirplus(struct inode *dir)
> {
> - if (!list_empty(&NFS_I(dir)->open_files)) {
> - nfs_advise_use_readdirplus(dir);
> - invalidate_mapping_pages(dir->i_mapping, 0, -1);
> - }
> + nfs_advise_use_readdirplus(dir);
> }
>
> static
> @@ -1150,7 +1156,7 @@ static int nfs_lookup_revalidate(struct dentry
> *dentry, unsigned int flags)
> return -ECHILD;
> goto out_bad;
> }
> - goto out_valid_noent;
> + goto out_valid;
> }
>
> if (is_bad_inode(inode)) {
> @@ -1192,6 +1198,9 @@ static int nfs_lookup_revalidate(struct dentry
> *dentry, unsigned int flags)
> if (IS_ERR(label))
> goto out_error;
>
> + /* We need to force a revalidation: set a readdirplus hint */
> + nfs_advise_use_readdirplus(dir);
> +
> trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
> error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr,
> label);
> trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
> @@ -1211,9 +1220,6 @@ static int nfs_lookup_revalidate(struct dentry
> *dentry, unsigned int flags)
> out_set_verifier:
> nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
> out_valid:
> - /* Success: notify readdir to use READDIRPLUS */
> - nfs_advise_use_readdirplus(dir);
> - out_valid_noent:
> if (flags & LOOKUP_RCU) {
> if (parent != ACCESS_ONCE(dentry->d_parent))
> return -ECHILD;
Now when listing with `ls -l`: the second call into nfs_readdir() to
get
the next batch of entries will not have NFS_INO_ADVISE_RDPLUS.
I think this removes nfs_advise_use_readdirplus(dir) from the common
"goto
out_valid" path through nfs_lookup_revalidate() (the block with the
'iff'
typo).
Ben
On 19 Nov 2016, at 11:54, Trond Myklebust wrote:
> Ben Coddington reports that commit 311324ad1713, by adding the
> function
> nfs_dir_mapping_need_revalidate() that checks page cache validity on
> each call to nfs_readdir() causes a performance regression when
> the directory is being modified.
>
> If the directory is changing while we're iterating through the
> directory,
> POSIX does not require us to invalidate the page cache unless the user
> calls rewinddir(). However, we still do want to ensure that we use
> readdirplus in order to avoid a load of stat() calls when the user
> is doing an 'ls -l' workload.
>
> The fix should be to invalidate the page cache immediately when we're
> setting the NFS_INO_ADVISE_RDPLUS bit.
>
> Reported-by: Benjamin Coddington <[email protected]>
> Fixes: 311324ad1713 ("NFS: Be more aggressive in using
> readdirplus...")
> Signed-off-by: Trond Myklebust <[email protected]>
Hi Trond, I apologize for the delay, thanks for these! This one works
as
expected.
Tested-by: Benjamin Coddington <[email protected]>
Ben
> ---
> fs/nfs/dir.c | 15 ++-------------
> 1 file changed, 2 insertions(+), 13 deletions(-)
>
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index 5f1af4cd1a33..53e02b8bd9bd 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -477,7 +477,7 @@ void nfs_force_use_readdirplus(struct inode *dir)
> {
> if (!list_empty(&NFS_I(dir)->open_files)) {
> nfs_advise_use_readdirplus(dir);
> - nfs_zap_mapping(dir, dir->i_mapping);
> + invalidate_mapping_pages(dir->i_mapping, 0, -1);
> }
> }
>
> @@ -886,17 +886,6 @@ int uncached_readdir(nfs_readdir_descriptor_t
> *desc)
> goto out;
> }
>
> -static bool nfs_dir_mapping_need_revalidate(struct inode *dir)
> -{
> - struct nfs_inode *nfsi = NFS_I(dir);
> -
> - if (nfs_attribute_cache_expired(dir))
> - return true;
> - if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
> - return true;
> - return false;
> -}
> -
> /* The file offset position represents the dirent entry number. A
> last cookie cache takes care of the common case of reading the
> whole directory.
> @@ -928,7 +917,7 @@ static int nfs_readdir(struct file *file, struct
> dir_context *ctx)
> desc->decode = NFS_PROTO(inode)->decode_dirent;
> desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
>
> - if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode))
> + if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
> res = nfs_revalidate_mapping(inode, file->f_mapping);
> if (res < 0)
> goto out;
> --
> 2.7.4
T24gV2VkLCAyMDE2LTExLTMwIGF0IDE0OjA5IC0wNTAwLCBCZW5qYW1pbiBDb2RkaW5ndG9uIHdy
b3RlOg0KPiAuLiB0aGlzIG9uZSBicmVha3MgdGhpbmdzIGFnYWluOg0KPiANCj4gT24gMTkgTm92
IDIwMTYsIGF0IDExOjU0LCBUcm9uZCBNeWtsZWJ1c3Qgd3JvdGU6DQo+IA0KPiA+IFRoZXJlIGlz
IGxpdHRsZSBwb2ludCBpbiBzZXR0aW5nIE5GU19JTk9fQURWSVNFX1JEUExVUyBpbg0KPiA+IG5m
c19sb29rdXDCoA0KPiA+IGFuZA0KPiA+IG5mc19sb29rdXBfcmV2YWxpZGF0ZSgpIHVubGVzcyBh
IHByb2Nlc3MgaXMgYWN0dWFsbHkgZG9pbmcgcmVhZGRpcg0KPiA+IG9uwqANCj4gPiB0aGUNCj4g
PiBwYXJlbnQgZGlyZWN0b3J5Lg0KPiA+IEZ1cnRoZXJtb3JlLCB0aGVyZSBpcyBsaXR0bGUgcG9p
bnQgaW4gdXNpbmcgcmVhZGRpcnBsdXMgaWYgd2UncmXCoA0KPiA+IHRyeWluZw0KPiA+IHRvIHJl
dmFsaWRhdGUgYSBuZWdhdGl2ZSBkZW50cnkuDQo+ID4gDQo+ID4gU2lnbmVkLW9mZi1ieTogVHJv
bmQgTXlrbGVidXN0IDx0cm9uZC5teWtsZWJ1c3RAcHJpbWFyeWRhdGEuY29tPg0KPiA+IC0tLQ0K
PiA+IMKgZnMvbmZzL2Rpci5jIHwgMjggKysrKysrKysrKysrKysrKystLS0tLS0tLS0tLQ0KPiA+
IMKgMSBmaWxlIGNoYW5nZWQsIDE3IGluc2VydGlvbnMoKyksIDExIGRlbGV0aW9ucygtKQ0KPiA+
IA0KPiA+IGRpZmYgLS1naXQgYS9mcy9uZnMvZGlyLmMgYi9mcy9uZnMvZGlyLmMNCj4gPiBpbmRl
eCA1M2UwMmI4YmQ5YmQuLjViZWZkMzgyYmU3ZCAxMDA2NDQNCj4gPiAtLS0gYS9mcy9uZnMvZGly
LmMNCj4gPiArKysgYi9mcy9uZnMvZGlyLmMNCj4gPiBAQCAtNDU1LDE0ICs0NTUsMjMgQEAgYm9v
bCBuZnNfdXNlX3JlYWRkaXJwbHVzKHN0cnVjdCBpbm9kZSAqZGlyLMKgDQo+ID4gc3RydWN0IGRp
cl9jb250ZXh0ICpjdHgpDQo+ID4gwqB9DQo+ID4gDQo+ID4gwqAvKg0KPiA+IC0gKiBUaGlzIGZ1
bmN0aW9uIGlzIGNhbGxlZCBieSB0aGUgbG9va3VwIGNvZGUgdG8gcmVxdWVzdCB0aGUgdXNlDQo+
ID4gb2YNCj4gPiAtICogcmVhZGRpcnBsdXMgdG8gYWNjZWxlcmF0ZSBhbnkgZnV0dXJlIGxvb2t1
cHMgaW4gdGhlIHNhbWUNCj4gPiArICogVGhpcyBmdW5jdGlvbiBpcyBjYWxsZWQgYnkgdGhlIGxv
b2t1cCBhbmQgZ2V0YXR0ciBjb2RlIHRvDQo+ID4gcmVxdWVzdMKgDQo+ID4gdGhlDQo+ID4gKyAq
IHVzZSBvZiByZWFkZGlycGx1cyB0byBhY2NlbGVyYXRlIGFueSBmdXR1cmUgbG9va3VwcyBpbiB0
aGUgc2FtZQ0KPiA+IMKgICogZGlyZWN0b3J5Lg0KPiA+ICsgKiBEbyB0aGlzIGJ5IGNoZWNraW5n
IGlmIHRoZXJlIGlzIGFuIGFjdGl2ZSBmaWxlIGRlc2NyaXB0b3INCj4gPiArICogYW5kIGNhbGxp
bmcgbmZzX2FkdmlzZV91c2VfcmVhZGRpcnBsdXMsIHRoZW4gZm9yY2luZyBhDQo+ID4gKyAqIGNh
Y2hlIGZsdXNoLg0KPiA+IMKgICovDQo+ID4gwqBzdGF0aWMNCj4gPiDCoHZvaWQgbmZzX2Fkdmlz
ZV91c2VfcmVhZGRpcnBsdXMoc3RydWN0IGlub2RlICpkaXIpDQo+ID4gwqB7DQo+ID4gLQlzZXRf
Yml0KE5GU19JTk9fQURWSVNFX1JEUExVUywgJk5GU19JKGRpciktPmZsYWdzKTsNCj4gPiArCXN0
cnVjdCBuZnNfaW5vZGUgKm5mc2kgPSBORlNfSShkaXIpOw0KPiA+ICsNCj4gPiArCWlmIChuZnNf
c2VydmVyX2NhcGFibGUoZGlyLCBORlNfQ0FQX1JFQURESVJQTFVTKSAmJg0KPiA+ICsJwqDCoMKg
wqAhbGlzdF9lbXB0eSgmbmZzaS0+b3Blbl9maWxlcykpIHsNCj4gPiArCQlzZXRfYml0KE5GU19J
Tk9fQURWSVNFX1JEUExVUywgJm5mc2ktPmZsYWdzKTsNCj4gPiArCQlpbnZhbGlkYXRlX21hcHBp
bmdfcGFnZXMoZGlyLT5pX21hcHBpbmcsIDAsIC0xKTsNCj4gPiArCX0NCj4gPiDCoH0NCj4gDQo+
IFNvIGV2ZXJ5IHRpbWUgYWR2aXNlX3VzZV9yZWFkZGlycGx1cyBpdCBkcm9wcyB0aGUgbWFwcGlu
Zy4uIGJ1dCB3aGF0wqANCj4gYWJvdXQNCj4gc3Vic2VxdWVudCBjYWxscyBpbnRvIG5mc19yZWFk
ZGlyKCkgdG8gZ2V0IHRoZSBuZXh0IGJhdGNoIG9mDQo+IGVudHJpZXM/wqDCoA0KPiBGb3INCj4g
dGhlIGxzIC1sIGNhc2UsIHdlIHdhbnQgdG8ga2VlcCBzZXR0aW5nIE5GU19JTk9fQURWSVNFX1JE
UExVUywgYnV0IHdlDQo+IHNob3VsZG4ndCBzdGFydCBvdmVyIGZpbGxpbmcgdGhlIG1hcHBpbmcg
ZnJvbSB0aGUgYmVnaW5uaW5nIGFnYWluLg0KDQpIb3cgZG8gSSBlbnN1cmUgdGhhdCB0aGUgcmVh
ZGRpciBpc24ndCBiZWluZyBzZXJ2ZWQgZnJvbSBjYWNoZSwgaWYgSQ0KZG9uJ3QgaW52YWxpZGF0
ZSB0aGUgbWFwcGluZz8gVGhlIGludGVudGlvbiBvZiB0aGUgcGF0Y2ggaXMgdG8gZW5zdXJlDQp0
aGF0IHdlIG9ubHkgY2FsbCB0aGlzIG9uIGEgZGNhY2hlIG9yIGlub2RlIGF0dHJpYnV0ZSBjYWNo
ZSBtaXNzLg0KDQo+IA0KPiA+IA0KPiA+IMKgLyoNCj4gPiBAQCAtNDc1LDEwICs0ODQsNyBAQCB2
b2lkIG5mc19hZHZpc2VfdXNlX3JlYWRkaXJwbHVzKHN0cnVjdCBpbm9kZcKgDQo+ID4gKmRpcikN
Cj4gPiDCoCAqLw0KPiA+IMKgdm9pZCBuZnNfZm9yY2VfdXNlX3JlYWRkaXJwbHVzKHN0cnVjdCBp
bm9kZSAqZGlyKQ0KPiA+IMKgew0KPiA+IC0JaWYgKCFsaXN0X2VtcHR5KCZORlNfSShkaXIpLT5v
cGVuX2ZpbGVzKSkgew0KPiA+IC0JCW5mc19hZHZpc2VfdXNlX3JlYWRkaXJwbHVzKGRpcik7DQo+
ID4gLQkJaW52YWxpZGF0ZV9tYXBwaW5nX3BhZ2VzKGRpci0+aV9tYXBwaW5nLCAwLCAtMSk7DQo+
ID4gLQl9DQo+ID4gKwluZnNfYWR2aXNlX3VzZV9yZWFkZGlycGx1cyhkaXIpOw0KPiA+IMKgfQ0K
PiA+IA0KPiA+IMKgc3RhdGljDQo+ID4gQEAgLTExNTAsNyArMTE1Niw3IEBAIHN0YXRpYyBpbnQg
bmZzX2xvb2t1cF9yZXZhbGlkYXRlKHN0cnVjdA0KPiA+IGRlbnRyecKgDQo+ID4gKmRlbnRyeSwg
dW5zaWduZWQgaW50IGZsYWdzKQ0KPiA+IMKgCQkJCXJldHVybiAtRUNISUxEOw0KPiA+IMKgCQkJ
Z290byBvdXRfYmFkOw0KPiA+IMKgCQl9DQo+ID4gLQkJZ290byBvdXRfdmFsaWRfbm9lbnQ7DQo+
ID4gKwkJZ290byBvdXRfdmFsaWQ7DQo+ID4gwqAJfQ0KPiA+IA0KPiA+IMKgCWlmIChpc19iYWRf
aW5vZGUoaW5vZGUpKSB7DQo+ID4gQEAgLTExOTIsNiArMTE5OCw5IEBAIHN0YXRpYyBpbnQgbmZz
X2xvb2t1cF9yZXZhbGlkYXRlKHN0cnVjdA0KPiA+IGRlbnRyecKgDQo+ID4gKmRlbnRyeSwgdW5z
aWduZWQgaW50IGZsYWdzKQ0KPiA+IMKgCWlmIChJU19FUlIobGFiZWwpKQ0KPiA+IMKgCQlnb3Rv
IG91dF9lcnJvcjsNCj4gPiANCj4gPiArCS8qIFdlIG5lZWQgdG8gZm9yY2UgYSByZXZhbGlkYXRp
b246IHNldCBhIHJlYWRkaXJwbHVzIGhpbnQNCj4gPiAqLw0KPiA+ICsJbmZzX2FkdmlzZV91c2Vf
cmVhZGRpcnBsdXMoZGlyKTsNCj4gPiArDQo+ID4gwqAJdHJhY2VfbmZzX2xvb2t1cF9yZXZhbGlk
YXRlX2VudGVyKGRpciwgZGVudHJ5LCBmbGFncyk7DQo+ID4gwqAJZXJyb3IgPSBORlNfUFJPVE8o
ZGlyKS0+bG9va3VwKGRpciwgJmRlbnRyeS0+ZF9uYW1lLA0KPiA+IGZoYW5kbGUsIGZhdHRyLMKg
DQo+ID4gbGFiZWwpOw0KPiA+IMKgCXRyYWNlX25mc19sb29rdXBfcmV2YWxpZGF0ZV9leGl0KGRp
ciwgZGVudHJ5LCBmbGFncywNCj4gPiBlcnJvcik7DQo+ID4gQEAgLTEyMTEsOSArMTIyMCw2IEBA
IHN0YXRpYyBpbnQgbmZzX2xvb2t1cF9yZXZhbGlkYXRlKHN0cnVjdA0KPiA+IGRlbnRyecKgDQo+
ID4gKmRlbnRyeSwgdW5zaWduZWQgaW50IGZsYWdzKQ0KPiA+IMKgb3V0X3NldF92ZXJpZmllcjoN
Cj4gPiDCoAluZnNfc2V0X3ZlcmlmaWVyKGRlbnRyeSwgbmZzX3NhdmVfY2hhbmdlX2F0dHJpYnV0
ZShkaXIpKTsNCj4gPiDCoCBvdXRfdmFsaWQ6DQo+ID4gLQkvKiBTdWNjZXNzOiBub3RpZnkgcmVh
ZGRpciB0byB1c2UgUkVBRERJUlBMVVMgKi8NCj4gPiAtCW5mc19hZHZpc2VfdXNlX3JlYWRkaXJw
bHVzKGRpcik7DQo+ID4gLSBvdXRfdmFsaWRfbm9lbnQ6DQo+ID4gwqAJaWYgKGZsYWdzICYgTE9P
S1VQX1JDVSkgew0KPiA+IMKgCQlpZiAocGFyZW50ICE9IEFDQ0VTU19PTkNFKGRlbnRyeS0+ZF9w
YXJlbnQpKQ0KPiA+IMKgCQkJcmV0dXJuIC1FQ0hJTEQ7DQo+IA0KPiANCj4gTm93IHdoZW4gbGlz
dGluZyB3aXRoIGBscyAtbGA6wqDCoHRoZSBzZWNvbmQgY2FsbCBpbnRvIG5mc19yZWFkZGlyKCkN
Cj4gdG/CoA0KPiBnZXQNCj4gdGhlIG5leHQgYmF0Y2ggb2YgZW50cmllcyB3aWxsIG5vdCBoYXZl
IE5GU19JTk9fQURWSVNFX1JEUExVUy4NCj4gDQo+IEkgdGhpbmsgdGhpcyByZW1vdmVzIG5mc19h
ZHZpc2VfdXNlX3JlYWRkaXJwbHVzKGRpcikgZnJvbSB0aGUgY29tbW9uwqANCj4gImdvdG8NCj4g
b3V0X3ZhbGlkIiBwYXRoIHRocm91Z2ggbmZzX2xvb2t1cF9yZXZhbGlkYXRlKCkgKHRoZSBibG9j
ayB3aXRoIHRoZcKgDQo+ICdpZmYnDQo+IHR5cG8pLg0KPiANCg0KQWN0dWFsbHksICdpZmYnIGlz
IGludGVudGlvbmFsbHkgdXNlZCB0aGVyZSBhcyB0aGUgY29tbW9uIHNob3J0aGFuZCBmb3INCidp
ZiBhbmQgb25seSBpZicgKGh0dHBzOi8vd3d3Lm1lcnJpYW0td2Vic3Rlci5jb20vZGljdGlvbmFy
eS9pZmYpLg0KDQpBcyBJIHNhaWQgYWJvdmUsIHRoZSBwb2ludCBpcyB0byBvbmx5IHRyaWdnZXIg
UkVBRERJUlBMVVMgd2hlbiB3ZSBrbm93DQp0aGUgZGNhY2hlIG9yIHRoZSBpbm9kZSBjYWNoZSBu
ZWVkcyByZXZhbGlkYXRpb24uIE90aGVyd2lzZSB3ZSB3YW50IHRvDQp1c2UgdGhlIGxlc3MgZXhw
ZW5zaXZlIFJFQURESVIuIEknbSBvcGVuIGZvciBzdWdnZXN0aW9ucyBhcyB0byBob3cgd2UNCmNh
biBpbXByb3ZlIHRoYXQgaGV1cmlzdGljLg0KDQpDaGVlcnMNCiAgVHJvbmQ=
On 1 Dec 2016, at 15:47, Trond Myklebust wrote:
> On Wed, 2016-11-30 at 14:09 -0500, Benjamin Coddington wrote:
>> .. this one breaks things again:
>>
>> On 19 Nov 2016, at 11:54, Trond Myklebust wrote:
>>
>>> static
>>> void nfs_advise_use_readdirplus(struct inode *dir)
>>> {
>>> - set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags);
>>> + struct nfs_inode *nfsi = NFS_I(dir);
>>> +
>>> + if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
>>> + !list_empty(&nfsi->open_files)) {
>>> + set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
>>> + invalidate_mapping_pages(dir->i_mapping, 0, -1);
>>> + }
>>> }
>>
>> So every time advise_use_readdirplus it drops the mapping.. but what
>> about
>> subsequent calls into nfs_readdir() to get the next batch of
>> entries?
>> For
>> the ls -l case, we want to keep setting NFS_INO_ADVISE_RDPLUS, but we
>> shouldn't start over filling the mapping from the beginning again.
>
> How do I ensure that the readdir isn't being served from cache, if I
> don't invalidate the mapping?
The way it is now. Isn't advise_use_readdirplus() just a marker to say
"continue to use readdirplus" after nfs_force_use_readdirplus() has
invalidated the mapping?
> The intention of the patch is to ensure that we only call this on a dcache
> or inode attribute cache miss.
I think it also needs to be called when we detect if a child of the
directory is looked up/revalidated in order to set NFS_INO_ADVISE_RDPLUS
again. Otherwise we lose the optimization in commit 311324ad1713.
Maybe I am just really confused..
>>> @@ -1211,9 +1220,6 @@ static int nfs_lookup_revalidate(struct
>>> dentry
>>> *dentry, unsigned int flags)
>>> out_set_verifier:
>>> nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
>>> out_valid:
>>> - /* Success: notify readdir to use READDIRPLUS */
>>> - nfs_advise_use_readdirplus(dir);
>>> - out_valid_noent:
>>> if (flags & LOOKUP_RCU) {
>>> if (parent != ACCESS_ONCE(dentry->d_parent))
>>> return -ECHILD;
>>
>>
>> Now when listing with `ls -l`: the second call into nfs_readdir()
>> to
>> get
>> the next batch of entries will not have NFS_INO_ADVISE_RDPLUS.
>>
>> I think this removes nfs_advise_use_readdirplus(dir) from the common
>> "goto
>> out_valid" path through nfs_lookup_revalidate() (the block with the
>> 'iff'
>> typo).
>>
>
> Actually, 'iff' is intentionally used there as the common shorthand for
> 'if and only if' (https://www.merriam-webster.com/dictionary/iff).
Ah, and now I see it used everywhere. Thank you for filling in that hole
in my head.
> As I said above, the point is to only trigger READDIRPLUS when we know
> the dcache or the inode cache needs revalidation. Otherwise we want to
> use the less expensive READDIR.
Not if using ls -l. With this patch, an ls -l of a directory of 2000
entries follows this pattern:
- nfs_readdir() returns first 1024 entries obtained with READDIRPLUS
- nfs_readdir() returns last 976 entries with READDIR
- stat()/LOOKUPs are done on those 976
- ls -l does its final getdents() on the last position, but we've now
invalidated the pages, so finally:
- nfs_readdir() is called with position 2000, and we do READDIRPLUS for
_every_ entry all over again.
This seems to break commit 311324ad1713's optimization, since now we'll send
RPC to read the entire directory twice for ls -l.
> I'm open for suggestions as to how we can improve that heuristic.
OK. Right now I've got nothing to suggest, but I'll spend some time thinking
about it.
Ben
DQo+IE9uIERlYyAyLCAyMDE2LCBhdCAwODo1NiwgQmVuamFtaW4gQ29kZGluZ3RvbiA8YmNvZGRp
bmdAcmVkaGF0LmNvbT4gd3JvdGU6DQo+IA0KPiBPbiAxIERlYyAyMDE2LCBhdCAxNTo0NywgVHJv
bmQgTXlrbGVidXN0IHdyb3RlOg0KPiANCj4+IE9uIFdlZCwgMjAxNi0xMS0zMCBhdCAxNDowOSAt
MDUwMCwgQmVuamFtaW4gQ29kZGluZ3RvbiB3cm90ZToNCj4+PiAuLiB0aGlzIG9uZSBicmVha3Mg
dGhpbmdzIGFnYWluOg0KPj4+IA0KPj4+IE9uIDE5IE5vdiAyMDE2LCBhdCAxMTo1NCwgVHJvbmQg
TXlrbGVidXN0IHdyb3RlOg0KPj4+IA0KPj4+PiAgc3RhdGljDQo+Pj4+ICB2b2lkIG5mc19hZHZp
c2VfdXNlX3JlYWRkaXJwbHVzKHN0cnVjdCBpbm9kZSAqZGlyKQ0KPj4+PiAgew0KPj4+PiAtCXNl
dF9iaXQoTkZTX0lOT19BRFZJU0VfUkRQTFVTLCAmTkZTX0koZGlyKS0+ZmxhZ3MpOw0KPj4+PiAr
CXN0cnVjdCBuZnNfaW5vZGUgKm5mc2kgPSBORlNfSShkaXIpOw0KPj4+PiArDQo+Pj4+ICsJaWYg
KG5mc19zZXJ2ZXJfY2FwYWJsZShkaXIsIE5GU19DQVBfUkVBRERJUlBMVVMpICYmDQo+Pj4+ICsJ
ICAgICFsaXN0X2VtcHR5KCZuZnNpLT5vcGVuX2ZpbGVzKSkgew0KPj4+PiArCQlzZXRfYml0KE5G
U19JTk9fQURWSVNFX1JEUExVUywgJm5mc2ktPmZsYWdzKTsNCj4+Pj4gKwkJaW52YWxpZGF0ZV9t
YXBwaW5nX3BhZ2VzKGRpci0+aV9tYXBwaW5nLCAwLCAtMSk7DQo+Pj4+ICsJfQ0KPj4+PiAgfQ0K
Pj4+IA0KPj4+IFNvIGV2ZXJ5IHRpbWUgYWR2aXNlX3VzZV9yZWFkZGlycGx1cyBpdCBkcm9wcyB0
aGUgbWFwcGluZy4uIGJ1dCB3aGF0IA0KPj4+IGFib3V0DQo+Pj4gc3Vic2VxdWVudCBjYWxscyBp
bnRvIG5mc19yZWFkZGlyKCkgdG8gZ2V0IHRoZSBuZXh0IGJhdGNoIG9mDQo+Pj4gZW50cmllcz8g
IA0KPj4+IEZvcg0KPj4+IHRoZSBscyAtbCBjYXNlLCB3ZSB3YW50IHRvIGtlZXAgc2V0dGluZyBO
RlNfSU5PX0FEVklTRV9SRFBMVVMsIGJ1dCB3ZQ0KPj4+IHNob3VsZG4ndCBzdGFydCBvdmVyIGZp
bGxpbmcgdGhlIG1hcHBpbmcgZnJvbSB0aGUgYmVnaW5uaW5nIGFnYWluLg0KPj4gDQo+PiBIb3cg
ZG8gSSBlbnN1cmUgdGhhdCB0aGUgcmVhZGRpciBpc24ndCBiZWluZyBzZXJ2ZWQgZnJvbSBjYWNo
ZSwgaWYgSQ0KPj4gZG9uJ3QgaW52YWxpZGF0ZSB0aGUgbWFwcGluZz8NCj4gDQo+IFRoZSB3YXkg
aXQgaXMgbm93LiAgSXNuJ3QgYWR2aXNlX3VzZV9yZWFkZGlycGx1cygpIGp1c3QgYSBtYXJrZXIg
dG8gc2F5DQo+ICJjb250aW51ZSB0byB1c2UgcmVhZGRpcnBsdXMiIGFmdGVyIG5mc19mb3JjZV91
c2VfcmVhZGRpcnBsdXMoKSBoYXMNCj4gaW52YWxpZGF0ZWQgdGhlIG1hcHBpbmc/DQoNClNvLCB5
ZXMuIEkgd2FzIGNvbnNpZGVyaW5nIHJlbmFtaW5nIHRoZSBmdW5jdGlvbnMgaW4gdGhlIHYyIHBh
dGNoIHNlcmllcywgYnV0IEkgY291bGRu4oCZdCByZWFsbHkgZmluZCBhIGdvb2QgbmFtZS4NCklu
IGVzc2VuY2U6DQotIG5mc19mb3JjZV91c2VfcmVhZGRpcnBsdXMoKSBpcyBoaW50aW5nIHRvIHJl
YWRkaXIgdGhhdCB3ZSBoYWQgYSBjYWNoZSBtaXNzLCBhbmQgc28gd2Ugc2hvdWxkIGNsZWFyIHRo
ZSByZWFkaXIgY2FjaGUgYW5kIHVzZSByZWFkZGlycGx1cyBpbiBvcmRlciB0byBwb3B1bGF0ZSB0
aGUgY2FjaGUuDQotIG5mc19hZHZpc2VfdXNlX3JlYWRkaXJwbHVzKCkgaXMgaGludGluZyB0aGF0
IHdlIGhhZCBhIGNhY2hlIGhpdCwgYW5kIHNvIHdlIHNob3VsZCBjb250aW51ZSB0byB1c2UgcmVh
ZGRpcnBsdXMNCg0KPiANCj4+IFRoZSBpbnRlbnRpb24gb2YgdGhlIHBhdGNoIGlzIHRvIGVuc3Vy
ZSB0aGF0IHdlIG9ubHkgY2FsbCB0aGlzIG9uIGEgZGNhY2hlDQo+PiBvciBpbm9kZSBhdHRyaWJ1
dGUgY2FjaGUgbWlzcy4NCj4gDQo+IEkgdGhpbmsgaXQgYWxzbyBuZWVkcyB0byBiZSBjYWxsZWQg
d2hlbiB3ZSBkZXRlY3QgaWYgYSBjaGlsZCBvZiB0aGUNCj4gZGlyZWN0b3J5IGlzIGxvb2tlZCB1
cC9yZXZhbGlkYXRlZCBpbiBvcmRlciB0byBzZXQgTkZTX0lOT19BRFZJU0VfUkRQTFVTDQo+IGFn
YWluLiAgT3RoZXJ3aXNlIHdlIGxvc2UgdGhlIG9wdGltaXphdGlvbiBpbiBjb21taXQgMzExMzI0
YWQxNzEzLg0KPiANCj4gTWF5YmUgSSBhbSBqdXN0IHJlYWxseSBjb25mdXNlZC4uDQoNCk5vLCBJ
IHRoaW5rIHRoYXQgaXMgY29ycmVjdC4gSXQganVzdCBuZWVkcyB0byBiZSBkb25lIG1vcmUgY29u
c2lzdGVudGx5LiBBZ2Fpbiwgc2VlIHRoZSB2MiBwYXRjaCBzZXJpZXMNCg0KQ2hlZXJzDQogIFRy
b25k