From: Trond Myklebust <[email protected]>
Refactor to use pagecache_get_page() so that we can fill the page
in multiple stages.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/dir.c | 138 ++++++++++++++++++++++++++++-----------------------
1 file changed, 77 insertions(+), 61 deletions(-)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 68acbde3f914..842f69120a01 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -149,7 +149,7 @@ typedef struct nfs_readdir_descriptor {
struct file *file;
struct page *page;
struct dir_context *ctx;
- unsigned long page_index;
+ pgoff_t page_index;
u64 dir_cookie;
u64 last_cookie;
u64 dup_cookie;
@@ -166,13 +166,18 @@ typedef struct nfs_readdir_descriptor {
bool eof;
} nfs_readdir_descriptor_t;
-static
-void nfs_readdir_init_array(struct page *page)
+static void nfs_readdir_array_init(struct nfs_cache_array *array)
+{
+ memset(array, 0, sizeof(struct nfs_cache_array));
+}
+
+static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie)
{
struct nfs_cache_array *array;
array = kmap_atomic(page);
- memset(array, 0, sizeof(struct nfs_cache_array));
+ nfs_readdir_array_init(array);
+ array->last_cookie = last_cookie;
kunmap_atomic(array);
}
@@ -188,7 +193,7 @@ void nfs_readdir_clear_array(struct page *page)
array = kmap_atomic(page);
for (i = 0; i < array->size; i++)
kfree(array->array[i].string.name);
- array->size = 0;
+ nfs_readdir_array_init(array);
kunmap_atomic(array);
}
@@ -268,6 +273,44 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
return ret;
}
+static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
+ pgoff_t index, u64 last_cookie)
+{
+ struct page *page;
+
+ page = grab_cache_page(mapping, index);
+ if (page && !PageUptodate(page)) {
+ nfs_readdir_page_init_array(page, last_cookie);
+ if (invalidate_inode_pages2_range(mapping, index + 1, -1) < 0)
+ nfs_zap_mapping(mapping->host, mapping);
+ SetPageUptodate(page);
+ }
+
+ return page;
+}
+
+static u64 nfs_readdir_page_last_cookie(struct page *page)
+{
+ struct nfs_cache_array *array;
+ u64 ret;
+
+ array = kmap_atomic(page);
+ ret = array->last_cookie;
+ kunmap_atomic(array);
+ return ret;
+}
+
+static bool nfs_readdir_page_needs_filling(struct page *page)
+{
+ struct nfs_cache_array *array;
+ bool ret;
+
+ array = kmap_atomic(page);
+ ret = !nfs_readdir_array_is_full(array);
+ kunmap_atomic(array);
+ return ret;
+}
+
static void nfs_readdir_page_set_eof(struct page *page)
{
struct nfs_cache_array *array;
@@ -682,10 +725,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
int status = -ENOMEM;
unsigned int array_size = ARRAY_SIZE(pages);
- nfs_readdir_init_array(page);
-
entry.prev_cookie = 0;
- entry.cookie = desc->last_cookie;
+ entry.cookie = nfs_readdir_page_last_cookie(page);
entry.eof = 0;
entry.fh = nfs_alloc_fhandle();
entry.fattr = nfs_alloc_fattr();
@@ -730,48 +771,25 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
return status;
}
-/*
- * Now we cache directories properly, by converting xdr information
- * to an array that can be used for lookups later. This results in
- * fewer cache pages, since we can store more information on each page.
- * We only need to convert from xdr once so future lookups are much simpler
- */
-static
-int nfs_readdir_filler(void *data, struct page* page)
+static void nfs_readdir_page_put(struct nfs_readdir_descriptor *desc)
{
- nfs_readdir_descriptor_t *desc = data;
- struct inode *inode = file_inode(desc->file);
- int ret;
-
- ret = nfs_readdir_xdr_to_array(desc, page, inode);
- if (ret < 0)
- goto error;
- SetPageUptodate(page);
-
- if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
- /* Should never happen */
- nfs_zap_mapping(inode, inode->i_mapping);
- }
- unlock_page(page);
- return 0;
- error:
- nfs_readdir_clear_array(page);
- unlock_page(page);
- return ret;
+ put_page(desc->page);
+ desc->page = NULL;
}
-static
-void cache_page_release(nfs_readdir_descriptor_t *desc)
+static void
+nfs_readdir_page_unlock_and_put_cached(struct nfs_readdir_descriptor *desc)
{
- put_page(desc->page);
- desc->page = NULL;
+ unlock_page(desc->page);
+ nfs_readdir_page_put(desc);
}
-static
-struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
+static struct page *
+nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc)
{
- return read_cache_page(desc->file->f_mapping, desc->page_index,
- nfs_readdir_filler, desc);
+ return nfs_readdir_page_get_locked(desc->file->f_mapping,
+ desc->page_index,
+ desc->last_cookie);
}
/*
@@ -785,23 +803,21 @@ int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc)
struct nfs_inode *nfsi = NFS_I(inode);
int res;
- desc->page = get_cache_page(desc);
- if (IS_ERR(desc->page))
- return PTR_ERR(desc->page);
- res = lock_page_killable(desc->page);
- if (res != 0)
- goto error;
- res = -EAGAIN;
- if (desc->page->mapping != NULL) {
- res = nfs_readdir_search_array(desc);
- if (res == 0) {
- nfsi->page_index = desc->page_index;
- return 0;
- }
+ desc->page = nfs_readdir_page_get_cached(desc);
+ if (!desc->page)
+ return -ENOMEM;
+ if (nfs_readdir_page_needs_filling(desc->page)) {
+ res = nfs_readdir_xdr_to_array(desc, desc->page, inode);
+ if (res < 0)
+ goto error;
+ }
+ res = nfs_readdir_search_array(desc);
+ if (res == 0) {
+ nfsi->page_index = desc->page_index;
+ return 0;
}
- unlock_page(desc->page);
error:
- cache_page_release(desc);
+ nfs_readdir_page_unlock_and_put_cached(desc);
return res;
}
@@ -896,6 +912,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc)
desc->page = page;
desc->duped = 0;
+ nfs_readdir_page_init_array(page, desc->dir_cookie);
status = nfs_readdir_xdr_to_array(desc, page, inode);
if (status < 0)
goto out_release;
@@ -904,7 +921,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc)
out_release:
nfs_readdir_clear_array(desc->page);
- cache_page_release(desc);
+ nfs_readdir_page_put(desc);
out:
dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
__func__, status);
@@ -976,8 +993,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
break;
res = nfs_do_filldir(desc);
- unlock_page(desc->page);
- cache_page_release(desc);
+ nfs_readdir_page_unlock_and_put_cached(desc);
if (res < 0)
break;
} while (!desc->eof);
--
2.28.0
From: Trond Myklebust <[email protected]>
If a readdir call returns more data than we can fit into one page
cache page, then allocate a new one for that data rather than
discarding the data.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/dir.c | 46 ++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 42 insertions(+), 4 deletions(-)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 842f69120a01..f7248145c333 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -320,6 +320,26 @@ static void nfs_readdir_page_set_eof(struct page *page)
kunmap_atomic(array);
}
+static void nfs_readdir_page_unlock_and_put(struct page *page)
+{
+ unlock_page(page);
+ put_page(page);
+}
+
+static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
+ pgoff_t index, u64 cookie)
+{
+ struct page *page;
+
+ page = nfs_readdir_page_get_locked(mapping, index, cookie);
+ if (page) {
+ if (nfs_readdir_page_last_cookie(page) == cookie)
+ return page;
+ nfs_readdir_page_unlock_and_put(page);
+ }
+ return NULL;
+}
+
static inline
int is_32bit_api(void)
{
@@ -637,13 +657,15 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
}
/* Perform conversion from xdr to cache array */
-static
-int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
- struct page **xdr_pages, struct page *page, unsigned int buflen)
+static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
+ struct nfs_entry *entry,
+ struct page **xdr_pages,
+ struct page *fillme, unsigned int buflen)
{
+ struct address_space *mapping = desc->file->f_mapping;
struct xdr_stream stream;
struct xdr_buf buf;
- struct page *scratch;
+ struct page *scratch, *new, *page = fillme;
int status;
scratch = alloc_page(GFP_KERNEL);
@@ -666,6 +688,19 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
desc->dir_verifier);
status = nfs_readdir_add_to_array(entry, page);
+ if (status != -ENOSPC)
+ continue;
+
+ if (page->mapping != mapping)
+ break;
+ new = nfs_readdir_page_get_next(mapping, page->index + 1,
+ entry->prev_cookie);
+ if (!new)
+ break;
+ if (page != fillme)
+ nfs_readdir_page_unlock_and_put(page);
+ page = new;
+ status = nfs_readdir_add_to_array(entry, page);
} while (!status && !entry->eof);
switch (status) {
@@ -681,6 +716,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
break;
}
+ if (page != fillme)
+ nfs_readdir_page_unlock_and_put(page);
+
put_page(scratch);
return status;
}
--
2.28.0
On Wed, Nov 4, 2020 at 11:27 AM <[email protected]> wrote:
>
> From: Trond Myklebust <[email protected]>
>
> If a readdir call returns more data than we can fit into one page
> cache page, then allocate a new one for that data rather than
> discarding the data.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/dir.c | 46 ++++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 42 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index 842f69120a01..f7248145c333 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -320,6 +320,26 @@ static void nfs_readdir_page_set_eof(struct page *page)
> kunmap_atomic(array);
> }
>
> +static void nfs_readdir_page_unlock_and_put(struct page *page)
> +{
> + unlock_page(page);
> + put_page(page);
> +}
> +
> +static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
> + pgoff_t index, u64 cookie)
> +{
> + struct page *page;
> +
> + page = nfs_readdir_page_get_locked(mapping, index, cookie);
> + if (page) {
> + if (nfs_readdir_page_last_cookie(page) == cookie)
> + return page;
> + nfs_readdir_page_unlock_and_put(page);
> + }
> + return NULL;
> +}
> +
> static inline
> int is_32bit_api(void)
> {
> @@ -637,13 +657,15 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
> }
>
> /* Perform conversion from xdr to cache array */
> -static
> -int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
> - struct page **xdr_pages, struct page *page, unsigned int buflen)
> +static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
> + struct nfs_entry *entry,
> + struct page **xdr_pages,
> + struct page *fillme, unsigned int buflen)
> {
> + struct address_space *mapping = desc->file->f_mapping;
> struct xdr_stream stream;
> struct xdr_buf buf;
> - struct page *scratch;
> + struct page *scratch, *new, *page = fillme;
> int status;
>
> scratch = alloc_page(GFP_KERNEL);
> @@ -666,6 +688,19 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
> desc->dir_verifier);
>
> status = nfs_readdir_add_to_array(entry, page);
> + if (status != -ENOSPC)
> + continue;
> +
> + if (page->mapping != mapping)
> + break;
> + new = nfs_readdir_page_get_next(mapping, page->index + 1,
> + entry->prev_cookie);
> + if (!new)
> + break;
> + if (page != fillme)
> + nfs_readdir_page_unlock_and_put(page);
> + page = new;
> + status = nfs_readdir_add_to_array(entry, page);
> } while (!status && !entry->eof);
>
> switch (status) {
> @@ -681,6 +716,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
> break;
> }
>
> + if (page != fillme)
> + nfs_readdir_page_unlock_and_put(page);
> +
> put_page(scratch);
> return status;
> }
> --
> 2.28.0
>
It doesn't look like this handles uncached_readdir. Were you planning
on addressing that somehow, or should we think about something like
this to move dtsize up as a parameter to nfs_readdir_xdr_to_array(),
and force uncached_readdir() to 1 page:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b6c3501e8f61..ca30e2dbb9c3 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -791,13 +791,12 @@ static struct page
**nfs_readdir_alloc_pages(size_t npages)
static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
struct page *page, __be32 *verf_arg,
- __be32 *verf_res)
+ __be32 *verf_res, size_t dtsize)
{
struct page **pages;
struct nfs_entry *entry;
size_t array_size;
struct inode *inode = file_inode(desc->file);
- size_t dtsize = NFS_SERVER(inode)->dtsize;
int status = -ENOMEM;
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
@@ -879,13 +878,15 @@ static int find_and_lock_cache_page(struct
nfs_readdir_descriptor *desc)
struct nfs_inode *nfsi = NFS_I(inode);
__be32 verf[NFS_DIR_VERIFIER_SIZE];
int res;
+ size_t dtsize = NFS_SERVER(inode)->dtsize;
desc->page = nfs_readdir_page_get_cached(desc);
if (!desc->page)
return -ENOMEM;
if (nfs_readdir_page_needs_filling(desc->page)) {
res = nfs_readdir_xdr_to_array(desc, desc->page,
- nfsi->cookieverf, verf);
+ nfsi->cookieverf, verf,
+ dtsize);
if (res < 0) {
nfs_readdir_page_unlock_and_put_cached(desc);
if (res == -EBADCOOKIE || res == -ENOTSYNC) {
@@ -995,7 +996,8 @@ static int uncached_readdir(struct
nfs_readdir_descriptor *desc)
desc->duped = 0;
nfs_readdir_page_init_array(page, desc->dir_cookie);
- status = nfs_readdir_xdr_to_array(desc, page, desc->verf, verf);
+ status = nfs_readdir_xdr_to_array(desc, page, desc->verf, verf,
+ PAGE_SIZE);
if (status < 0)
goto out_release;
On Fri, 2020-11-06 at 08:30 -0500, David Wysochanski wrote:
> On Wed, Nov 4, 2020 at 11:27 AM <[email protected]> wrote:
> >
> > From: Trond Myklebust <[email protected]>
> >
> > If a readdir call returns more data than we can fit into one page
> > cache page, then allocate a new one for that data rather than
> > discarding the data.
> >
> > Signed-off-by: Trond Myklebust <[email protected]>
> > ---
> > fs/nfs/dir.c | 46 ++++++++++++++++++++++++++++++++++++++++++----
> > 1 file changed, 42 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> > index 842f69120a01..f7248145c333 100644
> > --- a/fs/nfs/dir.c
> > +++ b/fs/nfs/dir.c
> > @@ -320,6 +320,26 @@ static void nfs_readdir_page_set_eof(struct
> > page *page)
> > kunmap_atomic(array);
> > }
> >
> > +static void nfs_readdir_page_unlock_and_put(struct page *page)
> > +{
> > + unlock_page(page);
> > + put_page(page);
> > +}
> > +
> > +static struct page *nfs_readdir_page_get_next(struct address_space
> > *mapping,
> > + pgoff_t index, u64
> > cookie)
> > +{
> > + struct page *page;
> > +
> > + page = nfs_readdir_page_get_locked(mapping, index, cookie);
> > + if (page) {
> > + if (nfs_readdir_page_last_cookie(page) == cookie)
> > + return page;
> > + nfs_readdir_page_unlock_and_put(page);
> > + }
> > + return NULL;
> > +}
> > +
> > static inline
> > int is_32bit_api(void)
> > {
> > @@ -637,13 +657,15 @@ void nfs_prime_dcache(struct dentry *parent,
> > struct nfs_entry *entry,
> > }
> >
> > /* Perform conversion from xdr to cache array */
> > -static
> > -int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct
> > nfs_entry *entry,
> > - struct page **xdr_pages, struct
> > page *page, unsigned int buflen)
> > +static int nfs_readdir_page_filler(struct nfs_readdir_descriptor
> > *desc,
> > + struct nfs_entry *entry,
> > + struct page **xdr_pages,
> > + struct page *fillme, unsigned
> > int buflen)
> > {
> > + struct address_space *mapping = desc->file->f_mapping;
> > struct xdr_stream stream;
> > struct xdr_buf buf;
> > - struct page *scratch;
> > + struct page *scratch, *new, *page = fillme;
> > int status;
> >
> > scratch = alloc_page(GFP_KERNEL);
> > @@ -666,6 +688,19 @@ int
> > nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct
> > nfs_entry *en
> > desc->dir_verifier);
> >
> > status = nfs_readdir_add_to_array(entry, page);
> > + if (status != -ENOSPC)
> > + continue;
> > +
> > + if (page->mapping != mapping)
> > + break;
> > + new = nfs_readdir_page_get_next(mapping, page-
> > >index + 1,
> > + entry-
> > >prev_cookie);
> > + if (!new)
> > + break;
> > + if (page != fillme)
> > + nfs_readdir_page_unlock_and_put(page);
> > + page = new;
> > + status = nfs_readdir_add_to_array(entry, page);
> > } while (!status && !entry->eof);
> >
> > switch (status) {
> > @@ -681,6 +716,9 @@ int
> > nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct
> > nfs_entry *en
> > break;
> > }
> >
> > + if (page != fillme)
> > + nfs_readdir_page_unlock_and_put(page);
> > +
> > put_page(scratch);
> > return status;
> > }
> > --
> > 2.28.0
> >
>
> It doesn't look like this handles uncached_readdir. Were you
> planning
> on addressing that somehow, or should we think about something like
> this to move dtsize up as a parameter to nfs_readdir_xdr_to_array(),
> and force uncached_readdir() to 1 page:
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index b6c3501e8f61..ca30e2dbb9c3 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -791,13 +791,12 @@ static struct page
> **nfs_readdir_alloc_pages(size_t npages)
>
> static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor
> *desc,
> struct page *page, __be32
> *verf_arg,
> - __be32 *verf_res)
> + __be32 *verf_res, size_t dtsize)
> {
> struct page **pages;
> struct nfs_entry *entry;
> size_t array_size;
> struct inode *inode = file_inode(desc->file);
> - size_t dtsize = NFS_SERVER(inode)->dtsize;
> int status = -ENOMEM;
>
> entry = kzalloc(sizeof(*entry), GFP_KERNEL);
> @@ -879,13 +878,15 @@ static int find_and_lock_cache_page(struct
> nfs_readdir_descriptor *desc)
> struct nfs_inode *nfsi = NFS_I(inode);
> __be32 verf[NFS_DIR_VERIFIER_SIZE];
> int res;
> + size_t dtsize = NFS_SERVER(inode)->dtsize;
>
> desc->page = nfs_readdir_page_get_cached(desc);
> if (!desc->page)
> return -ENOMEM;
> if (nfs_readdir_page_needs_filling(desc->page)) {
> res = nfs_readdir_xdr_to_array(desc, desc->page,
> - nfsi->cookieverf,
> verf);
> + nfsi->cookieverf,
> verf,
> + dtsize);
> if (res < 0) {
> nfs_readdir_page_unlock_and_put_cached(desc);
> if (res == -EBADCOOKIE || res == -ENOTSYNC) {
> @@ -995,7 +996,8 @@ static int uncached_readdir(struct
> nfs_readdir_descriptor *desc)
> desc->duped = 0;
>
> nfs_readdir_page_init_array(page, desc->dir_cookie);
> - status = nfs_readdir_xdr_to_array(desc, page, desc->verf,
> verf);
> + status = nfs_readdir_xdr_to_array(desc, page, desc->verf,
> verf,
> + PAGE_SIZE);
> if (status < 0)
> goto out_release;
>
Actually for uncached readdir, I was thinking we might want to convert
nfs_readdir_xdr_to_array() and nfs_readdir_page_filler() to take an
array of pages + buffer size.
IOW: convert uncached_readdir() to allocate an array of pages, and pass
in a 'struct page **' + a buffer length.
I don't like the idea of passing in a dtsize because that restricts the
size of the READDIR RPC request buffer instead of restricting the
number of entries the server returns. For any given buffer size, that
number of entries fluctuates wildly depending on the filenames in that
directory and their differing lengths, whereas your page can take a
fixed number of entries irrespective of the filename lengths (in fact
it can always take 127 entries on an x86_64).
It is true that the number of entries that nfs_do_filldir() can handle
also depends on the filename length, but we don't have any information
in the filesystem about the buffer size that was passed in to the
getdents() system call of how much space remains in that buffer. All
that information is hidden in the opaque 'struct dir_context'. So for
that reason, we can't use that information to set a dtsize either.
--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]
On Fri, Nov 6, 2020 at 10:05 AM Trond Myklebust <[email protected]> wrote:
>
> On Fri, 2020-11-06 at 08:30 -0500, David Wysochanski wrote:
> > On Wed, Nov 4, 2020 at 11:27 AM <[email protected]> wrote:
> > >
> > > From: Trond Myklebust <[email protected]>
> > >
> > > If a readdir call returns more data than we can fit into one page
> > > cache page, then allocate a new one for that data rather than
> > > discarding the data.
> > >
> > > Signed-off-by: Trond Myklebust <[email protected]>
> > > ---
> > > fs/nfs/dir.c | 46 ++++++++++++++++++++++++++++++++++++++++++----
> > > 1 file changed, 42 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> > > index 842f69120a01..f7248145c333 100644
> > > --- a/fs/nfs/dir.c
> > > +++ b/fs/nfs/dir.c
> > > @@ -320,6 +320,26 @@ static void nfs_readdir_page_set_eof(struct
> > > page *page)
> > > kunmap_atomic(array);
> > > }
> > >
> > > +static void nfs_readdir_page_unlock_and_put(struct page *page)
> > > +{
> > > + unlock_page(page);
> > > + put_page(page);
> > > +}
> > > +
> > > +static struct page *nfs_readdir_page_get_next(struct address_space
> > > *mapping,
> > > + pgoff_t index, u64
> > > cookie)
> > > +{
> > > + struct page *page;
> > > +
> > > + page = nfs_readdir_page_get_locked(mapping, index, cookie);
> > > + if (page) {
> > > + if (nfs_readdir_page_last_cookie(page) == cookie)
> > > + return page;
> > > + nfs_readdir_page_unlock_and_put(page);
> > > + }
> > > + return NULL;
> > > +}
> > > +
> > > static inline
> > > int is_32bit_api(void)
> > > {
> > > @@ -637,13 +657,15 @@ void nfs_prime_dcache(struct dentry *parent,
> > > struct nfs_entry *entry,
> > > }
> > >
> > > /* Perform conversion from xdr to cache array */
> > > -static
> > > -int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct
> > > nfs_entry *entry,
> > > - struct page **xdr_pages, struct
> > > page *page, unsigned int buflen)
> > > +static int nfs_readdir_page_filler(struct nfs_readdir_descriptor
> > > *desc,
> > > + struct nfs_entry *entry,
> > > + struct page **xdr_pages,
> > > + struct page *fillme, unsigned
> > > int buflen)
> > > {
> > > + struct address_space *mapping = desc->file->f_mapping;
> > > struct xdr_stream stream;
> > > struct xdr_buf buf;
> > > - struct page *scratch;
> > > + struct page *scratch, *new, *page = fillme;
> > > int status;
> > >
> > > scratch = alloc_page(GFP_KERNEL);
> > > @@ -666,6 +688,19 @@ int
> > > nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct
> > > nfs_entry *en
> > > desc->dir_verifier);
> > >
> > > status = nfs_readdir_add_to_array(entry, page);
> > > + if (status != -ENOSPC)
> > > + continue;
> > > +
> > > + if (page->mapping != mapping)
> > > + break;
> > > + new = nfs_readdir_page_get_next(mapping, page-
> > > >index + 1,
> > > + entry-
> > > >prev_cookie);
> > > + if (!new)
> > > + break;
> > > + if (page != fillme)
> > > + nfs_readdir_page_unlock_and_put(page);
> > > + page = new;
> > > + status = nfs_readdir_add_to_array(entry, page);
> > > } while (!status && !entry->eof);
> > >
> > > switch (status) {
> > > @@ -681,6 +716,9 @@ int
> > > nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct
> > > nfs_entry *en
> > > break;
> > > }
> > >
> > > + if (page != fillme)
> > > + nfs_readdir_page_unlock_and_put(page);
> > > +
> > > put_page(scratch);
> > > return status;
> > > }
> > > --
> > > 2.28.0
> > >
> >
> > It doesn't look like this handles uncached_readdir. Were you
> > planning
> > on addressing that somehow, or should we think about something like
> > this to move dtsize up as a parameter to nfs_readdir_xdr_to_array(),
> > and force uncached_readdir() to 1 page:
> > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> > index b6c3501e8f61..ca30e2dbb9c3 100644
> > --- a/fs/nfs/dir.c
> > +++ b/fs/nfs/dir.c
> > @@ -791,13 +791,12 @@ static struct page
> > **nfs_readdir_alloc_pages(size_t npages)
> >
> > static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor
> > *desc,
> > struct page *page, __be32
> > *verf_arg,
> > - __be32 *verf_res)
> > + __be32 *verf_res, size_t dtsize)
> > {
> > struct page **pages;
> > struct nfs_entry *entry;
> > size_t array_size;
> > struct inode *inode = file_inode(desc->file);
> > - size_t dtsize = NFS_SERVER(inode)->dtsize;
> > int status = -ENOMEM;
> >
> > entry = kzalloc(sizeof(*entry), GFP_KERNEL);
> > @@ -879,13 +878,15 @@ static int find_and_lock_cache_page(struct
> > nfs_readdir_descriptor *desc)
> > struct nfs_inode *nfsi = NFS_I(inode);
> > __be32 verf[NFS_DIR_VERIFIER_SIZE];
> > int res;
> > + size_t dtsize = NFS_SERVER(inode)->dtsize;
> >
> > desc->page = nfs_readdir_page_get_cached(desc);
> > if (!desc->page)
> > return -ENOMEM;
> > if (nfs_readdir_page_needs_filling(desc->page)) {
> > res = nfs_readdir_xdr_to_array(desc, desc->page,
> > - nfsi->cookieverf,
> > verf);
> > + nfsi->cookieverf,
> > verf,
> > + dtsize);
> > if (res < 0) {
> > nfs_readdir_page_unlock_and_put_cached(desc);
> > if (res == -EBADCOOKIE || res == -ENOTSYNC) {
> > @@ -995,7 +996,8 @@ static int uncached_readdir(struct
> > nfs_readdir_descriptor *desc)
> > desc->duped = 0;
> >
> > nfs_readdir_page_init_array(page, desc->dir_cookie);
> > - status = nfs_readdir_xdr_to_array(desc, page, desc->verf,
> > verf);
> > + status = nfs_readdir_xdr_to_array(desc, page, desc->verf,
> > verf,
> > + PAGE_SIZE);
> > if (status < 0)
> > goto out_release;
> >
>
> Actually for uncached readdir, I was thinking we might want to convert
> nfs_readdir_xdr_to_array() and nfs_readdir_page_filler() to take an
> array of pages + buffer size.
> IOW: convert uncached_readdir() to allocate an array of pages, and pass
> in a 'struct page **' + a buffer length.
>
Yes I agree and this looks more like the right way to fix it rather than
this single page approach.
> I don't like the idea of passing in a dtsize because that restricts the
> size of the READDIR RPC request buffer instead of restricting the
> number of entries the server returns. For any given buffer size, that
> number of entries fluctuates wildly depending on the filenames in that
> directory and their differing lengths, whereas your page can take a
> fixed number of entries irrespective of the filename lengths (in fact
> it can always take 127 entries on an x86_64).
>
> It is true that the number of entries that nfs_do_filldir() can handle
> also depends on the filename length, but we don't have any information
> in the filesystem about the buffer size that was passed in to the
> getdents() system call of how much space remains in that buffer. All
> that information is hidden in the opaque 'struct dir_context'. So for
> that reason, we can't use that information to set a dtsize either.
>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> [email protected]
>
>