2020-01-22 23:49:52

by Dai Ngo

[permalink] [raw]
Subject: [PATCH RFC] nfs: optimise readdir cache page invalidation

When the directory is large and it's being modified by one client
while another client is doing the 'ls -l' on the same directory then
the cache page invalidation from nfs_force_use_readdirplus causes
the reading client to keep restarting READDIRPLUS from cookie 0
which causes the 'ls -l' to take a very long time to complete,
possibly never completing.

Currently when nfs_force_use_readdirplus is called to switch from
READDIR to READDIRPLUS, it invalidates all the cached pages of the
directory. This cache page invalidation causes the next nfs_readdir
to re-read the directory content from cookie 0.

This patch is to optimise the cache invalidation in
nfs_force_use_readdirplus by only truncating the cached pages from
last page index accessed to the end the file. It also marks the
inode to delay invalidating all the cached page of the directory
until the next initial nfs_readdir of the next 'ls' instance.

Signed-off-by: Dai Ngo <[email protected]>

---
fs/nfs/dir.c | 14 +++++++++++++-
include/linux/nfs_fs.h | 3 +++
2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e180033e35cf..3fbf2e41d523 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -437,7 +437,10 @@ void nfs_force_use_readdirplus(struct inode *dir)
if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
!list_empty(&nfsi->open_files)) {
set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
- invalidate_mapping_pages(dir->i_mapping, 0, -1);
+ nfs_zap_mapping(dir, dir->i_mapping);
+ if (NFS_PROTO(dir)->version == 3)
+ invalidate_mapping_pages(dir->i_mapping,
+ nfsi->page_index + 1, -1);
}
}

@@ -709,6 +712,9 @@ struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
int find_cache_page(nfs_readdir_descriptor_t *desc)
{
int res;
+ struct inode *inode;
+ struct nfs_inode *nfsi;
+ struct dentry *dentry;

desc->page = get_cache_page(desc);
if (IS_ERR(desc->page))
@@ -717,6 +723,12 @@ int find_cache_page(nfs_readdir_descriptor_t *desc)
res = nfs_readdir_search_array(desc);
if (res != 0)
cache_page_release(desc);
+
+ dentry = file_dentry(desc->file);
+ inode = d_inode(dentry);
+ nfsi = NFS_I(inode);
+ nfsi->page_index = desc->page_index;
+
return res;
}

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c06b1fd130f3..a5f8f03ecd59 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -168,6 +168,9 @@ struct nfs_inode {
struct rw_semaphore rmdir_sem;
struct mutex commit_mutex;

+ /* track last access to cached pages */
+ unsigned long page_index;
+
#if IS_ENABLED(CONFIG_NFS_V4)
struct nfs4_cached_acl *nfs4_acl;
/* NFSv4 state */
--
1.8.3.1


2020-01-23 00:10:59

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH RFC] nfs: optimise readdir cache page invalidation

On Wed, 2020-01-22 at 18:48 -0500, Dai Ngo wrote:
> When the directory is large and it's being modified by one client
> while another client is doing the 'ls -l' on the same directory then
> the cache page invalidation from nfs_force_use_readdirplus causes
> the reading client to keep restarting READDIRPLUS from cookie 0
> which causes the 'ls -l' to take a very long time to complete,
> possibly never completing.
>
> Currently when nfs_force_use_readdirplus is called to switch from
> READDIR to READDIRPLUS, it invalidates all the cached pages of the
> directory. This cache page invalidation causes the next nfs_readdir
> to re-read the directory content from cookie 0.
>
> This patch is to optimise the cache invalidation in
> nfs_force_use_readdirplus by only truncating the cached pages from
> last page index accessed to the end the file. It also marks the
> inode to delay invalidating all the cached page of the directory
> until the next initial nfs_readdir of the next 'ls' instance.
>
> Signed-off-by: Dai Ngo <[email protected]>
>
> ---
> fs/nfs/dir.c | 14 +++++++++++++-
> include/linux/nfs_fs.h | 3 +++
> 2 files changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index e180033e35cf..3fbf2e41d523 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -437,7 +437,10 @@ void nfs_force_use_readdirplus(struct inode
> *dir)
> if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
> !list_empty(&nfsi->open_files)) {
> set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
> - invalidate_mapping_pages(dir->i_mapping, 0, -1);
> + nfs_zap_mapping(dir, dir->i_mapping);
> + if (NFS_PROTO(dir)->version == 3)
> + invalidate_mapping_pages(dir->i_mapping,
> + nfsi->page_index + 1, -1);

Let's do this for NFSv4 as well. The motivation is the same: to allow
us to replace a series of GETATTR calls to one or more READDIRPLUS
calls.

> }
> }
>
> @@ -709,6 +712,9 @@ struct page
> *get_cache_page(nfs_readdir_descriptor_t *desc)
> int find_cache_page(nfs_readdir_descriptor_t *desc)
> {
> int res;
> + struct inode *inode;
> + struct nfs_inode *nfsi;
> + struct dentry *dentry;
>
> desc->page = get_cache_page(desc);
> if (IS_ERR(desc->page))
> @@ -717,6 +723,12 @@ int find_cache_page(nfs_readdir_descriptor_t
> *desc)
> res = nfs_readdir_search_array(desc);
> if (res != 0)
> cache_page_release(desc);
> +
> + dentry = file_dentry(desc->file);
> + inode = d_inode(dentry);
> + nfsi = NFS_I(inode);
> + nfsi->page_index = desc->page_index;
> +
> return res;
> }
>
> diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
> index c06b1fd130f3..a5f8f03ecd59 100644
> --- a/include/linux/nfs_fs.h
> +++ b/include/linux/nfs_fs.h
> @@ -168,6 +168,9 @@ struct nfs_inode {
> struct rw_semaphore rmdir_sem;
> struct mutex commit_mutex;
>
> + /* track last access to cached pages */
> + unsigned long page_index;
> +
> #if IS_ENABLED(CONFIG_NFS_V4)
> struct nfs4_cached_acl *nfs4_acl;
> /* NFSv4 state */

Otherwise, the patch looks good to me.

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]


2020-01-23 18:37:21

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH RFC] nfs: optimise readdir cache page invalidation

On Wed, 2020-01-22 at 18:48 -0500, Dai Ngo wrote:
> When the directory is large and it's being modified by one client
> while another client is doing the 'ls -l' on the same directory then
> the cache page invalidation from nfs_force_use_readdirplus causes
> the reading client to keep restarting READDIRPLUS from cookie 0
> which causes the 'ls -l' to take a very long time to complete,
> possibly never completing.
>
> Currently when nfs_force_use_readdirplus is called to switch from
> READDIR to READDIRPLUS, it invalidates all the cached pages of the
> directory. This cache page invalidation causes the next nfs_readdir
> to re-read the directory content from cookie 0.
>
> This patch is to optimise the cache invalidation in
> nfs_force_use_readdirplus by only truncating the cached pages from
> last page index accessed to the end the file. It also marks the
> inode to delay invalidating all the cached page of the directory
> until the next initial nfs_readdir of the next 'ls' instance.
>
> Signed-off-by: Dai Ngo <[email protected]>

Reviewed-by: Trond Myklebust <[email protected]>

>
> ---
> fs/nfs/dir.c | 14 +++++++++++++-
> include/linux/nfs_fs.h | 3 +++
> 2 files changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index e180033e35cf..3fbf2e41d523 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -437,7 +437,10 @@ void nfs_force_use_readdirplus(struct inode
> *dir)
> if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
> !list_empty(&nfsi->open_files)) {
> set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
> - invalidate_mapping_pages(dir->i_mapping, 0, -1);
> + nfs_zap_mapping(dir, dir->i_mapping);
> + if (NFS_PROTO(dir)->version == 3)
> + invalidate_mapping_pages(dir->i_mapping,
> + nfsi->page_index + 1, -1);
> }
> }
>
> @@ -709,6 +712,9 @@ struct page
> *get_cache_page(nfs_readdir_descriptor_t *desc)
> int find_cache_page(nfs_readdir_descriptor_t *desc)
> {
> int res;
> + struct inode *inode;
> + struct nfs_inode *nfsi;
> + struct dentry *dentry;
>
> desc->page = get_cache_page(desc);
> if (IS_ERR(desc->page))
> @@ -717,6 +723,12 @@ int find_cache_page(nfs_readdir_descriptor_t
> *desc)
> res = nfs_readdir_search_array(desc);
> if (res != 0)
> cache_page_release(desc);
> +
> + dentry = file_dentry(desc->file);
> + inode = d_inode(dentry);
> + nfsi = NFS_I(inode);
> + nfsi->page_index = desc->page_index;
> +
> return res;
> }
>
> diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
> index c06b1fd130f3..a5f8f03ecd59 100644
> --- a/include/linux/nfs_fs.h
> +++ b/include/linux/nfs_fs.h
> @@ -168,6 +168,9 @@ struct nfs_inode {
> struct rw_semaphore rmdir_sem;
> struct mutex commit_mutex;
>
> + /* track last access to cached pages */
> + unsigned long page_index;
> +
> #if IS_ENABLED(CONFIG_NFS_V4)
> struct nfs4_cached_acl *nfs4_acl;
> /* NFSv4 state */
--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]


2020-01-23 18:39:03

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH RFC] nfs: optimise readdir cache page invalidation

On Thu, 2020-01-23 at 10:35 -0800, Trond Myklebust wrote:
> On Wed, 2020-01-22 at 18:48 -0500, Dai Ngo wrote:
> > When the directory is large and it's being modified by one client
> > while another client is doing the 'ls -l' on the same directory
> > then
> > the cache page invalidation from nfs_force_use_readdirplus causes
> > the reading client to keep restarting READDIRPLUS from cookie 0
> > which causes the 'ls -l' to take a very long time to complete,
> > possibly never completing.
> >
> > Currently when nfs_force_use_readdirplus is called to switch from
> > READDIR to READDIRPLUS, it invalidates all the cached pages of the
> > directory. This cache page invalidation causes the next nfs_readdir
> > to re-read the directory content from cookie 0.
> >
> > This patch is to optimise the cache invalidation in
> > nfs_force_use_readdirplus by only truncating the cached pages from
> > last page index accessed to the end the file. It also marks the
> > inode to delay invalidating all the cached page of the directory
> > until the next initial nfs_readdir of the next 'ls' instance.
> >
> > Signed-off-by: Dai Ngo <[email protected]>
>
> Reviewed-by: Trond Myklebust <[email protected]>

Sorry I intended this for the PATCHv2... I've looked through that
version and it looks OK to me.

Anna, please apply the later v2 instead of this one...

>
> > ---
> > fs/nfs/dir.c | 14 +++++++++++++-
> > include/linux/nfs_fs.h | 3 +++
> > 2 files changed, 16 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> > index e180033e35cf..3fbf2e41d523 100644
> > --- a/fs/nfs/dir.c
> > +++ b/fs/nfs/dir.c
> > @@ -437,7 +437,10 @@ void nfs_force_use_readdirplus(struct inode
> > *dir)
> > if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
> > !list_empty(&nfsi->open_files)) {
> > set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
> > - invalidate_mapping_pages(dir->i_mapping, 0, -1);
> > + nfs_zap_mapping(dir, dir->i_mapping);
> > + if (NFS_PROTO(dir)->version == 3)
> > + invalidate_mapping_pages(dir->i_mapping,
> > + nfsi->page_index + 1, -1);
> > }
> > }
> >
> > @@ -709,6 +712,9 @@ struct page
> > *get_cache_page(nfs_readdir_descriptor_t *desc)
> > int find_cache_page(nfs_readdir_descriptor_t *desc)
> > {
> > int res;
> > + struct inode *inode;
> > + struct nfs_inode *nfsi;
> > + struct dentry *dentry;
> >
> > desc->page = get_cache_page(desc);
> > if (IS_ERR(desc->page))
> > @@ -717,6 +723,12 @@ int find_cache_page(nfs_readdir_descriptor_t
> > *desc)
> > res = nfs_readdir_search_array(desc);
> > if (res != 0)
> > cache_page_release(desc);
> > +
> > + dentry = file_dentry(desc->file);
> > + inode = d_inode(dentry);
> > + nfsi = NFS_I(inode);
> > + nfsi->page_index = desc->page_index;
> > +
> > return res;
> > }
> >
> > diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
> > index c06b1fd130f3..a5f8f03ecd59 100644
> > --- a/include/linux/nfs_fs.h
> > +++ b/include/linux/nfs_fs.h
> > @@ -168,6 +168,9 @@ struct nfs_inode {
> > struct rw_semaphore rmdir_sem;
> > struct mutex commit_mutex;
> >
> > + /* track last access to cached pages */
> > + unsigned long page_index;
> > +
> > #if IS_ENABLED(CONFIG_NFS_V4)
> > struct nfs4_cached_acl *nfs4_acl;
> > /* NFSv4 state */
--
Trond Myklebust
CTO, Hammerspace Inc
4300 El Camino Real, Suite 105
Los Altos, CA 94022
http://www.hammer.space


2020-01-23 18:56:32

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH RFC] nfs: optimise readdir cache page invalidation

On Thu, 2020-01-23 at 18:37 +0000, Trond Myklebust wrote:
> NetApp Security WARNING: This is an external email. Do not click links or open
> attachments unless you recognize the sender and know the content is safe.
>
>
>
>
> On Thu, 2020-01-23 at 10:35 -0800, Trond Myklebust wrote:
> > On Wed, 2020-01-22 at 18:48 -0500, Dai Ngo wrote:
> > > When the directory is large and it's being modified by one client
> > > while another client is doing the 'ls -l' on the same directory
> > > then
> > > the cache page invalidation from nfs_force_use_readdirplus causes
> > > the reading client to keep restarting READDIRPLUS from cookie 0
> > > which causes the 'ls -l' to take a very long time to complete,
> > > possibly never completing.
> > >
> > > Currently when nfs_force_use_readdirplus is called to switch from
> > > READDIR to READDIRPLUS, it invalidates all the cached pages of the
> > > directory. This cache page invalidation causes the next nfs_readdir
> > > to re-read the directory content from cookie 0.
> > >
> > > This patch is to optimise the cache invalidation in
> > > nfs_force_use_readdirplus by only truncating the cached pages from
> > > last page index accessed to the end the file. It also marks the
> > > inode to delay invalidating all the cached page of the directory
> > > until the next initial nfs_readdir of the next 'ls' instance.
> > >
> > > Signed-off-by: Dai Ngo <[email protected]>
> >
> > Reviewed-by: Trond Myklebust <[email protected]>
>
> Sorry I intended this for the PATCHv2... I've looked through that
> version and it looks OK to me.
>
> Anna, please apply the later v2 instead of this one...

I applied the v2 about 5 minutes before you sent this. I'll make sure your
Reviewed-by gets added to it, too :)

Anna
>
> > > ---
> > > fs/nfs/dir.c | 14 +++++++++++++-
> > > include/linux/nfs_fs.h | 3 +++
> > > 2 files changed, 16 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> > > index e180033e35cf..3fbf2e41d523 100644
> > > --- a/fs/nfs/dir.c
> > > +++ b/fs/nfs/dir.c
> > > @@ -437,7 +437,10 @@ void nfs_force_use_readdirplus(struct inode
> > > *dir)
> > > if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
> > > !list_empty(&nfsi->open_files)) {
> > > set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
> > > - invalidate_mapping_pages(dir->i_mapping, 0, -1);
> > > + nfs_zap_mapping(dir, dir->i_mapping);
> > > + if (NFS_PROTO(dir)->version == 3)
> > > + invalidate_mapping_pages(dir->i_mapping,
> > > + nfsi->page_index + 1, -1);
> > > }
> > > }
> > >
> > > @@ -709,6 +712,9 @@ struct page
> > > *get_cache_page(nfs_readdir_descriptor_t *desc)
> > > int find_cache_page(nfs_readdir_descriptor_t *desc)
> > > {
> > > int res;
> > > + struct inode *inode;
> > > + struct nfs_inode *nfsi;
> > > + struct dentry *dentry;
> > >
> > > desc->page = get_cache_page(desc);
> > > if (IS_ERR(desc->page))
> > > @@ -717,6 +723,12 @@ int find_cache_page(nfs_readdir_descriptor_t
> > > *desc)
> > > res = nfs_readdir_search_array(desc);
> > > if (res != 0)
> > > cache_page_release(desc);
> > > +
> > > + dentry = file_dentry(desc->file);
> > > + inode = d_inode(dentry);
> > > + nfsi = NFS_I(inode);
> > > + nfsi->page_index = desc->page_index;
> > > +
> > > return res;
> > > }
> > >
> > > diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
> > > index c06b1fd130f3..a5f8f03ecd59 100644
> > > --- a/include/linux/nfs_fs.h
> > > +++ b/include/linux/nfs_fs.h
> > > @@ -168,6 +168,9 @@ struct nfs_inode {
> > > struct rw_semaphore rmdir_sem;
> > > struct mutex commit_mutex;
> > >
> > > + /* track last access to cached pages */
> > > + unsigned long page_index;
> > > +
> > > #if IS_ENABLED(CONFIG_NFS_V4)
> > > struct nfs4_cached_acl *nfs4_acl;
> > > /* NFSv4 state */
> --
> Trond Myklebust
> CTO, Hammerspace Inc
> 4300 El Camino Real, Suite 105
> Los Altos, CA 94022
> http://www.hammer.space
>
>