2020-04-15 21:40:45

by Matthew Wilcox

[permalink] [raw]
Subject: [PATCH v11 24/25] fuse: Convert from readpages to readahead

From: "Matthew Wilcox (Oracle)" <[email protected]>

Implement the new readahead operation in fuse by using __readahead_batch()
to fill the array of pages in fuse_args_pages directly. This lets us
inline fuse_readpages_fill() into fuse_readahead().

Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
Reviewed-by: Dave Chinner <[email protected]>
Reviewed-by: William Kucharski <[email protected]>
---
fs/fuse/file.c | 99 ++++++++++++++------------------------------------
1 file changed, 27 insertions(+), 72 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9d67b830fb7a..db82fb29dd39 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -915,84 +915,39 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
fuse_readpages_end(fc, &ap->args, err);
}

-struct fuse_fill_data {
- struct fuse_io_args *ia;
- struct file *file;
- struct inode *inode;
- unsigned int nr_pages;
- unsigned int max_pages;
-};
-
-static int fuse_readpages_fill(void *_data, struct page *page)
+static void fuse_readahead(struct readahead_control *rac)
{
- struct fuse_fill_data *data = _data;
- struct fuse_io_args *ia = data->ia;
- struct fuse_args_pages *ap = &ia->ap;
- struct inode *inode = data->inode;
+ struct inode *inode = rac->mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
+ unsigned int i, max_pages, nr_pages = 0;

- fuse_wait_on_page_writeback(inode, page->index);
-
- if (ap->num_pages &&
- (ap->num_pages == fc->max_pages ||
- (ap->num_pages + 1) * PAGE_SIZE > fc->max_read ||
- ap->pages[ap->num_pages - 1]->index + 1 != page->index)) {
- data->max_pages = min_t(unsigned int, data->nr_pages,
- fc->max_pages);
- fuse_send_readpages(ia, data->file);
- data->ia = ia = fuse_io_alloc(NULL, data->max_pages);
- if (!ia) {
- unlock_page(page);
- return -ENOMEM;
- }
- ap = &ia->ap;
- }
-
- if (WARN_ON(ap->num_pages >= data->max_pages)) {
- unlock_page(page);
- fuse_io_free(ia);
- return -EIO;
- }
-
- get_page(page);
- ap->pages[ap->num_pages] = page;
- ap->descs[ap->num_pages].length = PAGE_SIZE;
- ap->num_pages++;
- data->nr_pages--;
- return 0;
-}
-
-static int fuse_readpages(struct file *file, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
-{
- struct inode *inode = mapping->host;
- struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_fill_data data;
- int err;
-
- err = -EIO;
if (is_bad_inode(inode))
- goto out;
+ return;

- data.file = file;
- data.inode = inode;
- data.nr_pages = nr_pages;
- data.max_pages = min_t(unsigned int, nr_pages, fc->max_pages);
-;
- data.ia = fuse_io_alloc(NULL, data.max_pages);
- err = -ENOMEM;
- if (!data.ia)
- goto out;
+ max_pages = min(fc->max_pages, fc->max_read / PAGE_SIZE);

- err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
- if (!err) {
- if (data.ia->ap.num_pages)
- fuse_send_readpages(data.ia, file);
- else
- fuse_io_free(data.ia);
+ for (;;) {
+ struct fuse_io_args *ia;
+ struct fuse_args_pages *ap;
+
+ nr_pages = readahead_count(rac) - nr_pages;
+ if (nr_pages > max_pages)
+ nr_pages = max_pages;
+ if (nr_pages == 0)
+ break;
+ ia = fuse_io_alloc(NULL, nr_pages);
+ if (!ia)
+ return;
+ ap = &ia->ap;
+ nr_pages = __readahead_batch(rac, ap->pages, nr_pages);
+ for (i = 0; i < nr_pages; i++) {
+ fuse_wait_on_page_writeback(inode,
+ readahead_index(rac) + i);
+ ap->descs[i].length = PAGE_SIZE;
+ }
+ ap->num_pages = nr_pages;
+ fuse_send_readpages(ia, rac->file);
}
-out:
- return err;
}

static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -3373,10 +3328,10 @@ static const struct file_operations fuse_file_operations = {

static const struct address_space_operations fuse_file_aops = {
.readpage = fuse_readpage,
+ .readahead = fuse_readahead,
.writepage = fuse_writepage,
.writepages = fuse_writepages,
.launder_page = fuse_launder_page,
- .readpages = fuse_readpages,
.set_page_dirty = __set_page_dirty_nobuffers,
.bmap = fuse_bmap,
.direct_IO = fuse_direct_IO,
--
2.25.1


2020-04-20 11:15:38

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH v11 24/25] fuse: Convert from readpages to readahead

On Tue, Apr 14, 2020 at 5:08 PM Matthew Wilcox <[email protected]> wrote:
>
> From: "Matthew Wilcox (Oracle)" <[email protected]>
>
> Implement the new readahead operation in fuse by using __readahead_batch()
> to fill the array of pages in fuse_args_pages directly. This lets us
> inline fuse_readpages_fill() into fuse_readahead().
>
> Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
> Reviewed-by: Dave Chinner <[email protected]>
> Reviewed-by: William Kucharski <[email protected]>
> ---
> fs/fuse/file.c | 99 ++++++++++++++------------------------------------
> 1 file changed, 27 insertions(+), 72 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 9d67b830fb7a..db82fb29dd39 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -915,84 +915,39 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file)
> fuse_readpages_end(fc, &ap->args, err);
> }
>
> -struct fuse_fill_data {
> - struct fuse_io_args *ia;
> - struct file *file;
> - struct inode *inode;
> - unsigned int nr_pages;
> - unsigned int max_pages;
> -};
> -
> -static int fuse_readpages_fill(void *_data, struct page *page)
> +static void fuse_readahead(struct readahead_control *rac)
> {
> - struct fuse_fill_data *data = _data;
> - struct fuse_io_args *ia = data->ia;
> - struct fuse_args_pages *ap = &ia->ap;
> - struct inode *inode = data->inode;
> + struct inode *inode = rac->mapping->host;
> struct fuse_conn *fc = get_fuse_conn(inode);
> + unsigned int i, max_pages, nr_pages = 0;
>
> - fuse_wait_on_page_writeback(inode, page->index);
> -
> - if (ap->num_pages &&
> - (ap->num_pages == fc->max_pages ||
> - (ap->num_pages + 1) * PAGE_SIZE > fc->max_read ||
> - ap->pages[ap->num_pages - 1]->index + 1 != page->index)) {
> - data->max_pages = min_t(unsigned int, data->nr_pages,
> - fc->max_pages);
> - fuse_send_readpages(ia, data->file);
> - data->ia = ia = fuse_io_alloc(NULL, data->max_pages);
> - if (!ia) {
> - unlock_page(page);
> - return -ENOMEM;
> - }
> - ap = &ia->ap;
> - }
> -
> - if (WARN_ON(ap->num_pages >= data->max_pages)) {
> - unlock_page(page);
> - fuse_io_free(ia);
> - return -EIO;
> - }
> -
> - get_page(page);
> - ap->pages[ap->num_pages] = page;
> - ap->descs[ap->num_pages].length = PAGE_SIZE;
> - ap->num_pages++;
> - data->nr_pages--;
> - return 0;
> -}
> -
> -static int fuse_readpages(struct file *file, struct address_space *mapping,
> - struct list_head *pages, unsigned nr_pages)
> -{
> - struct inode *inode = mapping->host;
> - struct fuse_conn *fc = get_fuse_conn(inode);
> - struct fuse_fill_data data;
> - int err;
> -
> - err = -EIO;
> if (is_bad_inode(inode))
> - goto out;
> + return;
>
> - data.file = file;
> - data.inode = inode;
> - data.nr_pages = nr_pages;
> - data.max_pages = min_t(unsigned int, nr_pages, fc->max_pages);
> -;
> - data.ia = fuse_io_alloc(NULL, data.max_pages);
> - err = -ENOMEM;
> - if (!data.ia)
> - goto out;
> + max_pages = min(fc->max_pages, fc->max_read / PAGE_SIZE);
>
> - err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
> - if (!err) {
> - if (data.ia->ap.num_pages)
> - fuse_send_readpages(data.ia, file);
> - else
> - fuse_io_free(data.ia);
> + for (;;) {
> + struct fuse_io_args *ia;
> + struct fuse_args_pages *ap;
> +
> + nr_pages = readahead_count(rac) - nr_pages;

Hmm. I see what's going on here, but it's confusing. Why is
__readahead_batch() decrementing the readahead count at the start,
rather than at the end?

At the very least it needs a comment about why nr_pages is calculated this way.

> + if (nr_pages > max_pages)
> + nr_pages = max_pages;
> + if (nr_pages == 0)
> + break;
> + ia = fuse_io_alloc(NULL, nr_pages);
> + if (!ia)
> + return;
> + ap = &ia->ap;
> + nr_pages = __readahead_batch(rac, ap->pages, nr_pages);
> + for (i = 0; i < nr_pages; i++) {
> + fuse_wait_on_page_writeback(inode,
> + readahead_index(rac) + i);

What's wrong with ap->pages[i]->index? Are we trying to wean off using ->index?

Thanks,
Miklos

2020-04-20 11:46:45

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH v11 24/25] fuse: Convert from readpages to readahead

On Mon, Apr 20, 2020 at 01:14:17PM +0200, Miklos Szeredi wrote:
> > + for (;;) {
> > + struct fuse_io_args *ia;
> > + struct fuse_args_pages *ap;
> > +
> > + nr_pages = readahead_count(rac) - nr_pages;
>
> Hmm. I see what's going on here, but it's confusing. Why is
> __readahead_batch() decrementing the readahead count at the start,
> rather than at the end?
>
> At the very least it needs a comment about why nr_pages is calculated this way.

Because usually that's what we want. See, for example, fs/mpage.c:

while ((page = readahead_page(rac))) {
prefetchw(&page->flags);
args.page = page;
args.nr_pages = readahead_count(rac);
args.bio = do_mpage_readpage(&args);
put_page(page);
}

fuse is different because it's trying to allocate for the next batch,
not for the batch we're currently on.

I'm a little annoyed because I posted almost this exact loop here:

https://lore.kernel.org/linux-fsdevel/CAJfpegtrhGamoSqD-3Svfj3-iTdAbfD8TP44H_o+HE+g+CAnCA@mail.gmail.com/

and you said "I think that's fine", modified only by your concern
for it not being obvious that nr_pages couldn't be decremented by
__readahead_batch(), so I modified the loop slightly to assign to
nr_pages. The part you're now complaining about is unchanged.

> > + if (nr_pages > max_pages)
> > + nr_pages = max_pages;
> > + if (nr_pages == 0)
> > + break;
> > + ia = fuse_io_alloc(NULL, nr_pages);
> > + if (!ia)
> > + return;
> > + ap = &ia->ap;
> > + nr_pages = __readahead_batch(rac, ap->pages, nr_pages);
> > + for (i = 0; i < nr_pages; i++) {
> > + fuse_wait_on_page_writeback(inode,
> > + readahead_index(rac) + i);
>
> What's wrong with ap->pages[i]->index? Are we trying to wean off using ->index?

It saves reading from a cacheline? I wouldn't be surprised if the
compiler hoisted the read from rac->_index to outside the loop and just
iterated from rac->_index to rac->_index + nr_pages.

2020-04-20 11:56:24

by Miklos Szeredi

[permalink] [raw]
Subject: Re: [PATCH v11 24/25] fuse: Convert from readpages to readahead

On Mon, Apr 20, 2020 at 1:43 PM Matthew Wilcox <[email protected]> wrote:
>
> On Mon, Apr 20, 2020 at 01:14:17PM +0200, Miklos Szeredi wrote:
> > > + for (;;) {
> > > + struct fuse_io_args *ia;
> > > + struct fuse_args_pages *ap;
> > > +
> > > + nr_pages = readahead_count(rac) - nr_pages;
> >
> > Hmm. I see what's going on here, but it's confusing. Why is
> > __readahead_batch() decrementing the readahead count at the start,
> > rather than at the end?
> >
> > At the very least it needs a comment about why nr_pages is calculated this way.
>
> Because usually that's what we want. See, for example, fs/mpage.c:
>
> while ((page = readahead_page(rac))) {
> prefetchw(&page->flags);
> args.page = page;
> args.nr_pages = readahead_count(rac);
> args.bio = do_mpage_readpage(&args);
> put_page(page);
> }
>
> fuse is different because it's trying to allocate for the next batch,
> not for the batch we're currently on.
>
> I'm a little annoyed because I posted almost this exact loop here:
>
> https://lore.kernel.org/linux-fsdevel/CAJfpegtrhGamoSqD-3Svfj3-iTdAbfD8TP44H_o+HE+g+CAnCA@mail.gmail.com/
>
> and you said "I think that's fine", modified only by your concern
> for it not being obvious that nr_pages couldn't be decremented by
> __readahead_batch(), so I modified the loop slightly to assign to
> nr_pages. The part you're now complaining about is unchanged.

Your annoyance is perfectly understandable. This is something I
noticed now, not back then.

>
> > > + if (nr_pages > max_pages)
> > > + nr_pages = max_pages;
> > > + if (nr_pages == 0)
> > > + break;
> > > + ia = fuse_io_alloc(NULL, nr_pages);
> > > + if (!ia)
> > > + return;
> > > + ap = &ia->ap;
> > > + nr_pages = __readahead_batch(rac, ap->pages, nr_pages);
> > > + for (i = 0; i < nr_pages; i++) {
> > > + fuse_wait_on_page_writeback(inode,
> > > + readahead_index(rac) + i);
> >
> > What's wrong with ap->pages[i]->index? Are we trying to wean off using ->index?
>
> It saves reading from a cacheline? I wouldn't be surprised if the
> compiler hoisted the read from rac->_index to outside the loop and just
> iterated from rac->_index to rac->_index + nr_pages.

Hah, if such optimizations were worth anything with codepaths
involving roundtrips to userspace...

Anyway, I'll let these be, and maybe clean them up later.

Acked-by: Miklos Szeredi <[email protected]>

Thanks,
Miklos