Hi Willy,
Here's a set of patches to expand the use of the readahead_control struct,
essentially from do_sync_mmap_readahead() down. Note that I've been
passing the number of pages to read in rac->_nr_pages, and then saving it
and changing it certain points, e.g. page_cache_readahead_unbounded().
Also pass file_ra_state into force_page_cache_readahead().
Also there's an apparent minor bug in khugepaged.c that I've included a
patch for: page_cache_sync_readahead() looks to be given the wrong size in
collapse_file().
David
---
David Howells (7):
Fix khugepaged's request size in collapse_file()
mm: Make ondemand_readahead() take a readahead_control struct
mm: Push readahead_control down into force_page_cache_readahead()
mm: Pass readahead_control into page_cache_{sync,async}_readahead()
mm: Make __do_page_cache_readahead() use rac->_nr_pages
mm: Fold ra_submit() into do_sync_mmap_readahead()
mm: Pass a file_ra_state struct into force_page_cache_readahead()
fs/btrfs/free-space-cache.c | 7 +--
fs/btrfs/ioctl.c | 10 +++--
fs/btrfs/relocation.c | 14 +++---
fs/btrfs/send.c | 15 ++++---
fs/ext4/dir.c | 12 ++---
fs/ext4/verity.c | 8 ++--
fs/f2fs/dir.c | 10 +++--
fs/f2fs/verity.c | 8 ++--
include/linux/pagemap.h | 11 ++---
mm/fadvise.c | 6 ++-
mm/filemap.c | 33 +++++++-------
mm/internal.h | 16 +------
mm/khugepaged.c | 6 +--
mm/readahead.c | 89 ++++++++++++++++++-------------------
14 files changed, 127 insertions(+), 118 deletions(-)
collapse_file() in khugepaged passes PAGE_SIZE as the number of pages to be
read ahead to page_cache_sync_readahead(). It seems this was expressed as a
number of bytes rather than a number of pages.
Fix it to use the number of pages to the end of the window instead.
Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS")
Signed-off-by: David Howells <[email protected]>
cc: Matthew Wilcox <[email protected]>
cc: Song Liu <[email protected]>
---
mm/khugepaged.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 6d199c353281..f2d243077b74 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1706,7 +1706,7 @@ static void collapse_file(struct mm_struct *mm,
xas_unlock_irq(&xas);
page_cache_sync_readahead(mapping, &file->f_ra,
file, index,
- PAGE_SIZE);
+ end - index);
/* drain pagevecs to help isolate_lru_page() */
lru_add_drain();
page = find_lock_page(mapping, index);
Make ondemand_readahead() take a readahead_control struct in preparation
for making do_sync_mmap_readahead() pass down an RAC struct.
Signed-off-by: David Howells <[email protected]>
---
mm/readahead.c | 35 +++++++++++++++++++++--------------
1 file changed, 21 insertions(+), 14 deletions(-)
diff --git a/mm/readahead.c b/mm/readahead.c
index 91859e6e2b7d..0e16fb4809f5 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -511,14 +511,15 @@ static bool page_cache_readahead_order(struct readahead_control *rac,
/*
* A minimal readahead algorithm for trivial sequential/random reads.
*/
-static void ondemand_readahead(struct address_space *mapping,
- struct file_ra_state *ra, struct file *file,
- struct page *page, pgoff_t index, unsigned long req_size)
+static void ondemand_readahead(struct readahead_control *rac,
+ struct file_ra_state *ra,
+ struct page *page)
{
- DEFINE_READAHEAD(rac, file, mapping, index);
- struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
+ struct backing_dev_info *bdi = inode_to_bdi(rac->mapping->host);
unsigned long max_pages = ra->ra_pages;
unsigned long add_pages;
+ unsigned long req_size = rac->_nr_pages;
+ unsigned long index = rac->_index;
pgoff_t prev_index;
/*
@@ -556,7 +557,7 @@ static void ondemand_readahead(struct address_space *mapping,
pgoff_t start;
rcu_read_lock();
- start = page_cache_next_miss(mapping, index + 1, max_pages);
+ start = page_cache_next_miss(rac->mapping, index + 1, max_pages);
rcu_read_unlock();
if (!start || start - index > max_pages)
@@ -589,14 +590,14 @@ static void ondemand_readahead(struct address_space *mapping,
* Query the page cache and look for the traces(cached history pages)
* that a sequential stream would leave behind.
*/
- if (try_context_readahead(mapping, ra, index, req_size, max_pages))
+ if (try_context_readahead(rac->mapping, ra, index, req_size, max_pages))
goto readit;
/*
* standalone, small random read
* Read as is, and do not pollute the readahead state.
*/
- __do_page_cache_readahead(&rac, req_size, 0);
+ __do_page_cache_readahead(rac, req_size, 0);
return;
initial_readahead:
@@ -622,10 +623,10 @@ static void ondemand_readahead(struct address_space *mapping,
}
}
- rac._index = ra->start;
- if (page && page_cache_readahead_order(&rac, ra, thp_order(page)))
+ rac->_index = ra->start;
+ if (page && page_cache_readahead_order(rac, ra, thp_order(page)))
return;
- __do_page_cache_readahead(&rac, ra->size, ra->async_size);
+ __do_page_cache_readahead(rac, ra->size, ra->async_size);
}
/**
@@ -645,6 +646,9 @@ void page_cache_sync_readahead(struct address_space *mapping,
struct file_ra_state *ra, struct file *filp,
pgoff_t index, unsigned long req_count)
{
+ DEFINE_READAHEAD(rac, filp, mapping, index);
+ rac._nr_pages = req_count;
+
/* no read-ahead */
if (!ra->ra_pages)
return;
@@ -659,7 +663,7 @@ void page_cache_sync_readahead(struct address_space *mapping,
}
/* do read-ahead */
- ondemand_readahead(mapping, ra, filp, NULL, index, req_count);
+ ondemand_readahead(&rac, ra, NULL);
}
EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
@@ -683,7 +687,10 @@ page_cache_async_readahead(struct address_space *mapping,
struct page *page, pgoff_t index,
unsigned long req_count)
{
- /* no read-ahead */
+ DEFINE_READAHEAD(rac, filp, mapping, index);
+ rac._nr_pages = req_count;
+
+ /* No Read-ahead */
if (!ra->ra_pages)
return;
@@ -705,7 +712,7 @@ page_cache_async_readahead(struct address_space *mapping,
return;
/* do read-ahead */
- ondemand_readahead(mapping, ra, filp, page, index, req_count);
+ ondemand_readahead(&rac, ra, page);
}
EXPORT_SYMBOL_GPL(page_cache_async_readahead);
Pass a file_ra_state struct into force_page_cache_readahead(). One caller
has one that should be passed in and the other doesn't, but the former
needs to pass its in.
Signed-off-by: David Howells <[email protected]>
---
mm/fadvise.c | 2 +-
mm/internal.h | 2 +-
mm/readahead.c | 6 +++---
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/mm/fadvise.c b/mm/fadvise.c
index b68d2f2959d5..2f1550279757 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -107,7 +107,7 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
{
DEFINE_READAHEAD(rac, file, mapping, start_index);
rac._nr_pages = nrpages;
- force_page_cache_readahead(&rac);
+ force_page_cache_readahead(&rac, &rac.file->f_ra);
}
break;
case POSIX_FADV_NOREUSE:
diff --git a/mm/internal.h b/mm/internal.h
index de3b2ce2743a..977ad7d81b1b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -49,7 +49,7 @@ void unmap_page_range(struct mmu_gather *tlb,
unsigned long addr, unsigned long end,
struct zap_details *details);
-void force_page_cache_readahead(struct readahead_control *);
+void force_page_cache_readahead(struct readahead_control *, struct file_ra_state *);
void __do_page_cache_readahead(struct readahead_control *, unsigned long);
/**
diff --git a/mm/readahead.c b/mm/readahead.c
index 28ff80304a21..b001720c13aa 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -273,11 +273,11 @@ void __do_page_cache_readahead(struct readahead_control *rac,
* Chunk the readahead into 2 megabyte units, so that we don't pin too much
* memory at once.
*/
-void force_page_cache_readahead(struct readahead_control *rac)
+void force_page_cache_readahead(struct readahead_control *rac,
+ struct file_ra_state *ra)
{
struct address_space *mapping = rac->mapping;
struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
- struct file_ra_state *ra = &rac->file->f_ra;
unsigned long max_pages, index, nr_to_read;
if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages &&
@@ -657,7 +657,7 @@ void page_cache_sync_readahead(struct readahead_control *rac,
/* be dumb */
if (rac->file && (rac->file->f_mode & FMODE_RANDOM)) {
- force_page_cache_readahead(rac);
+ force_page_cache_readahead(rac, ra);
return;
}
Pass struct readahead_control into the page_cache_{sync,async}_readahead()
functions in preparation for making do_sync_mmap_readahead() pass down an
RAC struct.
Signed-off-by: David Howells <[email protected]>
---
fs/btrfs/free-space-cache.c | 7 ++++---
fs/btrfs/ioctl.c | 10 +++++++---
fs/btrfs/relocation.c | 14 ++++++++------
fs/btrfs/send.c | 15 +++++++++------
fs/ext4/dir.c | 12 +++++++-----
fs/f2fs/dir.c | 10 +++++++---
include/linux/pagemap.h | 8 +++-----
mm/filemap.c | 27 +++++++++++++++------------
mm/khugepaged.c | 6 +++---
mm/readahead.c | 40 ++++++++++++----------------------------
10 files changed, 75 insertions(+), 74 deletions(-)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index dc82fd0c80cb..0ca9361acf30 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -286,16 +286,17 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
static void readahead_cache(struct inode *inode)
{
struct file_ra_state *ra;
- unsigned long last_index;
+
+ DEFINE_READAHEAD(rac, NULL, inode->i_mapping, 0);
ra = kzalloc(sizeof(*ra), GFP_NOFS);
if (!ra)
return;
file_ra_state_init(ra, inode->i_mapping);
- last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+ rac._nr_pages = (i_size_read(inode) - 1) >> PAGE_SHIFT;
- page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
+ page_cache_sync_readahead(&rac, ra);
kfree(ra);
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bd3511c5ca81..5025a6a800e9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1428,6 +1428,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
struct page **pages = NULL;
bool do_compress = range->flags & BTRFS_DEFRAG_RANGE_COMPRESS;
+ DEFINE_READAHEAD(rac, file, inode->i_mapping, 0);
+
if (isize == 0)
return 0;
@@ -1534,9 +1536,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
if (i + cluster > ra_index) {
ra_index = max(i, ra_index);
- if (ra)
- page_cache_sync_readahead(inode->i_mapping, ra,
- file, ra_index, cluster);
+ if (ra) {
+ rac._index = ra_index;
+ rac._nr_pages = cluster;
+ page_cache_sync_readahead(&rac, ra);
+ }
ra_index += cluster;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 4ba1ab9cc76d..1979803fd475 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2684,6 +2684,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
int nr = 0;
int ret = 0;
+ DEFINE_READAHEAD(rac, NULL, inode->i_mapping, 0);
+
if (!cluster->nr)
return 0;
@@ -2712,9 +2714,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
page = find_lock_page(inode->i_mapping, index);
if (!page) {
- page_cache_sync_readahead(inode->i_mapping,
- ra, NULL, index,
- last_index + 1 - index);
+ rac._index = index;
+ rac._nr_pages = last_index + 1 - index;
+ page_cache_sync_readahead(&rac, ra);
page = find_or_create_page(inode->i_mapping, index,
mask);
if (!page) {
@@ -2728,9 +2730,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
}
if (PageReadahead(page)) {
- page_cache_async_readahead(inode->i_mapping,
- ra, NULL, page, index,
- last_index + 1 - index);
+ rac._index = index;
+ rac._nr_pages = last_index + 1 - index;
+ page_cache_async_readahead(&rac, ra, page);
}
if (!PageUptodate(page)) {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index d9813a5b075a..ee0a9a2b5d08 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4811,6 +4811,8 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
unsigned pg_offset = offset_in_page(offset);
ssize_t ret = 0;
+ DEFINE_READAHEAD(rac, NULL, NULL, 0);
+
inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -4829,15 +4831,18 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
/* initial readahead */
memset(&sctx->ra, 0, sizeof(struct file_ra_state));
file_ra_state_init(&sctx->ra, inode->i_mapping);
+ rac.mapping = inode->i_mapping;
while (index <= last_index) {
unsigned cur_len = min_t(unsigned, len,
PAGE_SIZE - pg_offset);
+ rac._index = index;
+ rac._nr_pages = last_index + 1 - index;
+
page = find_lock_page(inode->i_mapping, index);
if (!page) {
- page_cache_sync_readahead(inode->i_mapping, &sctx->ra,
- NULL, index, last_index + 1 - index);
+ page_cache_sync_readahead(&rac, &sctx->ra);
page = find_or_create_page(inode->i_mapping, index,
GFP_KERNEL);
@@ -4847,10 +4852,8 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
}
}
- if (PageReadahead(page)) {
- page_cache_async_readahead(inode->i_mapping, &sctx->ra,
- NULL, page, index, last_index + 1 - index);
- }
+ if (PageReadahead(page))
+ page_cache_async_readahead(&rac, &sctx->ra, page);
if (!PageUptodate(page)) {
btrfs_readpage(NULL, page);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 1d82336b1cd4..6205c6830454 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -118,6 +118,8 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
struct buffer_head *bh = NULL;
struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
+ DEFINE_READAHEAD(rac, file, sb->s_bdev->bd_inode->i_mapping, 0);
+
if (IS_ENCRYPTED(inode)) {
err = fscrypt_get_encryption_info(inode);
if (err)
@@ -176,11 +178,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
if (err > 0) {
pgoff_t index = map.m_pblk >>
(PAGE_SHIFT - inode->i_blkbits);
- if (!ra_has_index(&file->f_ra, index))
- page_cache_sync_readahead(
- sb->s_bdev->bd_inode->i_mapping,
- &file->f_ra, file,
- index, 1);
+ if (!ra_has_index(&file->f_ra, index)) {
+ rac._index = index;
+ rac._nr_pages = 1;
+ page_cache_sync_readahead(&rac, &file->f_ra);
+ }
file->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
bh = ext4_bread(NULL, inode, map.m_lblk, 0);
if (IS_ERR(bh)) {
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 069f498af1e3..982f6d37454a 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -1027,6 +1027,8 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
int err = 0;
+ DEFINE_READAHEAD(rac, file, inode->i_mapping, 0);
+
if (IS_ENCRYPTED(inode)) {
err = fscrypt_get_encryption_info(inode);
if (err)
@@ -1052,9 +1054,11 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
cond_resched();
/* readahead for multi pages of dir */
- if (npages - n > 1 && !ra_has_index(ra, n))
- page_cache_sync_readahead(inode->i_mapping, ra, file, n,
- min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
+ if (npages - n > 1 && !ra_has_index(ra, n)) {
+ rac._index = n;
+ rac._nr_pages = min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES);
+ page_cache_sync_readahead(&rac, ra);
+ }
dentry_page = f2fs_find_data_page(inode, n);
if (IS_ERR(dentry_page)) {
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 8bf048a76c43..cd7bde29d4cc 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -769,11 +769,9 @@ void delete_from_page_cache_batch(struct address_space *mapping,
#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
-void page_cache_sync_readahead(struct address_space *, struct file_ra_state *,
- struct file *, pgoff_t index, unsigned long req_count);
-void page_cache_async_readahead(struct address_space *, struct file_ra_state *,
- struct file *, struct page *, pgoff_t index,
- unsigned long req_count);
+void page_cache_sync_readahead(struct readahead_control *, struct file_ra_state *);
+void page_cache_async_readahead(struct readahead_control *, struct file_ra_state *,
+ struct page *);
void page_cache_readahead_unbounded(struct readahead_control *,
unsigned long nr_to_read, unsigned long lookahead_count);
diff --git a/mm/filemap.c b/mm/filemap.c
index 82b97cf4306c..9f2f99db7318 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2070,6 +2070,8 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
unsigned int prev_offset;
int error = 0;
+ DEFINE_READAHEAD(rac, filp, mapping, 0);
+
if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
return 0;
iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
@@ -2097,9 +2099,9 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
if (!page) {
if (iocb->ki_flags & IOCB_NOIO)
goto would_block;
- page_cache_sync_readahead(mapping,
- ra, filp,
- index, last_index - index);
+ rac._index = index;
+ rac._nr_pages = last_index - index;
+ page_cache_sync_readahead(&rac, ra);
page = find_get_page(mapping, index);
if (unlikely(page == NULL))
goto no_cached_page;
@@ -2109,9 +2111,9 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
put_page(page);
goto out;
}
- page_cache_async_readahead(mapping,
- ra, filp, thp_head(page),
- index, last_index - index);
+ rac._index = index;
+ rac._nr_pages = last_index - index;
+ page_cache_async_readahead(&rac, ra, thp_head(page));
}
if (!PageUptodate(page)) {
/*
@@ -2469,6 +2471,8 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
pgoff_t offset = vmf->pgoff;
unsigned int mmap_miss;
+ DEFINE_READAHEAD(rac, file, mapping, offset);
+
/* If we don't want any read-ahead, don't bother */
if (vmf->vma->vm_flags & VM_RAND_READ)
return fpin;
@@ -2477,8 +2481,8 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
if (vmf->vma->vm_flags & VM_SEQ_READ) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
- page_cache_sync_readahead(mapping, ra, file, offset,
- ra->ra_pages);
+ rac._nr_pages = ra->ra_pages;
+ page_cache_sync_readahead(&rac, ra);
return fpin;
}
@@ -2515,10 +2519,10 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
{
struct file *file = vmf->vma->vm_file;
struct file_ra_state *ra = &file->f_ra;
- struct address_space *mapping = file->f_mapping;
struct file *fpin = NULL;
unsigned int mmap_miss;
- pgoff_t offset = vmf->pgoff;
+
+ DEFINE_READAHEAD(rac, file, file->f_mapping, vmf->pgoff);
/* If we don't want any read-ahead, don't bother */
if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
@@ -2528,8 +2532,7 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
WRITE_ONCE(ra->mmap_miss, --mmap_miss);
if (PageReadahead(thp_head(page))) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
- page_cache_async_readahead(mapping, ra, file,
- thp_head(page), offset, ra->ra_pages);
+ page_cache_async_readahead(&rac, ra, thp_head(page));
}
return fpin;
}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index f2d243077b74..0bece7ab0ce7 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1703,10 +1703,10 @@ static void collapse_file(struct mm_struct *mm,
}
} else { /* !is_shmem */
if (!page || xa_is_value(page)) {
+ DEFINE_READAHEAD(rac, file, mapping, index);
+ rac._nr_pages = end - index;
xas_unlock_irq(&xas);
- page_cache_sync_readahead(mapping, &file->f_ra,
- file, index,
- end - index);
+ page_cache_sync_readahead(&rac, &file->f_ra);
/* drain pagevecs to help isolate_lru_page() */
lru_add_drain();
page = find_lock_page(mapping, index);
diff --git a/mm/readahead.c b/mm/readahead.c
index e557c6d5a183..7114246b4e41 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -635,24 +635,17 @@ static void ondemand_readahead(struct readahead_control *rac,
/**
* page_cache_sync_readahead - generic file readahead
- * @mapping: address_space which holds the pagecache and I/O vectors
+ * @rac: Readahead control.
* @ra: file_ra_state which holds the readahead state
- * @filp: passed on to ->readpage() and ->readpages()
- * @index: Index of first page to be read.
- * @req_count: Total number of pages being read by the caller.
*
* page_cache_sync_readahead() should be called when a cache miss happened:
* it will submit the read. The readahead logic may decide to piggyback more
* pages onto the read request if access patterns suggest it will improve
* performance.
*/
-void page_cache_sync_readahead(struct address_space *mapping,
- struct file_ra_state *ra, struct file *filp,
- pgoff_t index, unsigned long req_count)
+void page_cache_sync_readahead(struct readahead_control *rac,
+ struct file_ra_state *ra)
{
- DEFINE_READAHEAD(rac, filp, mapping, index);
- rac._nr_pages = req_count;
-
/* no read-ahead */
if (!ra->ra_pages)
return;
@@ -661,39 +654,30 @@ void page_cache_sync_readahead(struct address_space *mapping,
return;
/* be dumb */
- if (filp && (filp->f_mode & FMODE_RANDOM)) {
- force_page_cache_readahead(&rac);
+ if (rac->file && (rac->file->f_mode & FMODE_RANDOM)) {
+ force_page_cache_readahead(rac);
return;
}
/* do read-ahead */
- ondemand_readahead(&rac, ra, NULL);
+ ondemand_readahead(rac, ra, NULL);
}
EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
/**
* page_cache_async_readahead - file readahead for marked pages
- * @mapping: address_space which holds the pagecache and I/O vectors
+ * @rac: Readahead control.
* @ra: file_ra_state which holds the readahead state
- * @filp: passed on to ->readpage() and ->readpages()
- * @page: The page at @index which triggered the readahead call.
- * @index: Index of first page to be read.
- * @req_count: Total number of pages being read by the caller.
*
* page_cache_async_readahead() should be called when a page is used which
* is marked as PageReadahead; this is a marker to suggest that the application
* has used up enough of the readahead window that we should start pulling in
* more pages.
*/
-void
-page_cache_async_readahead(struct address_space *mapping,
- struct file_ra_state *ra, struct file *filp,
- struct page *page, pgoff_t index,
- unsigned long req_count)
+void page_cache_async_readahead(struct readahead_control *rac,
+ struct file_ra_state *ra,
+ struct page *page)
{
- DEFINE_READAHEAD(rac, filp, mapping, index);
- rac._nr_pages = req_count;
-
/* No Read-ahead */
if (!ra->ra_pages)
return;
@@ -709,14 +693,14 @@ page_cache_async_readahead(struct address_space *mapping,
/*
* Defer asynchronous read-ahead on IO congestion.
*/
- if (inode_read_congested(mapping->host))
+ if (inode_read_congested(rac->mapping->host))
return;
if (blk_cgroup_congested())
return;
/* do read-ahead */
- ondemand_readahead(&rac, ra, page);
+ ondemand_readahead(rac, ra, page);
}
EXPORT_SYMBOL_GPL(page_cache_async_readahead);
Push readahead_control down into force_page_cache_readahead() from its
callers in preparation for making do_sync_mmap_readahead() pass down an RAC
struct.
Signed-off-by: David Howells <[email protected]>
---
mm/fadvise.c | 6 +++++-
mm/internal.h | 3 +--
mm/readahead.c | 20 ++++++++++++--------
3 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 0e66f2aaeea3..b68d2f2959d5 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -104,7 +104,11 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
if (!nrpages)
nrpages = ~0UL;
- force_page_cache_readahead(mapping, file, start_index, nrpages);
+ {
+ DEFINE_READAHEAD(rac, file, mapping, start_index);
+ rac._nr_pages = nrpages;
+ force_page_cache_readahead(&rac);
+ }
break;
case POSIX_FADV_NOREUSE:
break;
diff --git a/mm/internal.h b/mm/internal.h
index bf2bee6c42a1..2eb9f7f5f134 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -49,8 +49,7 @@ void unmap_page_range(struct mmu_gather *tlb,
unsigned long addr, unsigned long end,
struct zap_details *details);
-void force_page_cache_readahead(struct address_space *, struct file *,
- pgoff_t index, unsigned long nr_to_read);
+void force_page_cache_readahead(struct readahead_control *);
void __do_page_cache_readahead(struct readahead_control *,
unsigned long nr_to_read, unsigned long lookahead_size);
diff --git a/mm/readahead.c b/mm/readahead.c
index 0e16fb4809f5..e557c6d5a183 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -271,13 +271,12 @@ void __do_page_cache_readahead(struct readahead_control *rac,
* Chunk the readahead into 2 megabyte units, so that we don't pin too much
* memory at once.
*/
-void force_page_cache_readahead(struct address_space *mapping,
- struct file *file, pgoff_t index, unsigned long nr_to_read)
+void force_page_cache_readahead(struct readahead_control *rac)
{
- DEFINE_READAHEAD(rac, file, mapping, index);
+ struct address_space *mapping = rac->mapping;
struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
- struct file_ra_state *ra = &file->f_ra;
- unsigned long max_pages;
+ struct file_ra_state *ra = &rac->file->f_ra;
+ unsigned long max_pages, index, nr_to_read;
if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages &&
!mapping->a_ops->readahead))
@@ -287,14 +286,19 @@ void force_page_cache_readahead(struct address_space *mapping,
* If the request exceeds the readahead window, allow the read to
* be up to the optimal hardware IO size
*/
+ index = readahead_index(rac);
max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
- nr_to_read = min(nr_to_read, max_pages);
+ nr_to_read = min_t(unsigned long, readahead_count(rac), max_pages);
while (nr_to_read) {
unsigned long this_chunk = (2 * 1024 * 1024) / PAGE_SIZE;
if (this_chunk > nr_to_read)
this_chunk = nr_to_read;
- __do_page_cache_readahead(&rac, this_chunk, 0);
+
+ rac->_index = index;
+ rac->_nr_pages = this_chunk;
+ // Do I need to modify rac->_batch_count?
+ __do_page_cache_readahead(rac, this_chunk, 0);
index += this_chunk;
nr_to_read -= this_chunk;
@@ -658,7 +662,7 @@ void page_cache_sync_readahead(struct address_space *mapping,
/* be dumb */
if (filp && (filp->f_mode & FMODE_RANDOM)) {
- force_page_cache_readahead(mapping, filp, index, req_count);
+ force_page_cache_readahead(&rac);
return;
}
Make __do_page_cache_readahead() use rac->_nr_pages rather than passing in
nr_to_read argument.
Signed-off-by: David Howells <[email protected]>
---
fs/ext4/verity.c | 8 +++++---
fs/f2fs/verity.c | 8 +++++---
include/linux/pagemap.h | 3 +--
mm/internal.h | 6 +++---
mm/readahead.c | 20 +++++++++++---------
5 files changed, 25 insertions(+), 20 deletions(-)
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
index 6fc2dbc87c0b..3d377110e839 100644
--- a/fs/ext4/verity.c
+++ b/fs/ext4/verity.c
@@ -356,10 +356,12 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode,
page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
if (!page || !PageUptodate(page)) {
- if (page)
+ if (page) {
put_page(page);
- else if (num_ra_pages > 1)
- page_cache_readahead_unbounded(&rac, num_ra_pages, 0);
+ } else if (num_ra_pages > 1) {
+ rac._nr_pages = num_ra_pages;
+ page_cache_readahead_unbounded(&rac, 0);
+ }
page = read_mapping_page(inode->i_mapping, index, NULL);
}
return page;
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
index 392dd07f4214..8445eed5a1bc 100644
--- a/fs/f2fs/verity.c
+++ b/fs/f2fs/verity.c
@@ -235,10 +235,12 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
if (!page || !PageUptodate(page)) {
- if (page)
+ if (page) {
put_page(page);
- else if (num_ra_pages > 1)
- page_cache_readahead_unbounded(&rac, num_ra_pages, 0);
+ } else if (num_ra_pages > 1) {
+ rac._nr_pages = num_ra_pages;
+ page_cache_readahead_unbounded(&rac, 0);
+ }
page = read_mapping_page(inode->i_mapping, index, NULL);
}
return page;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index cd7bde29d4cc..72e9c44d62bb 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -772,8 +772,7 @@ void delete_from_page_cache_batch(struct address_space *mapping,
void page_cache_sync_readahead(struct readahead_control *, struct file_ra_state *);
void page_cache_async_readahead(struct readahead_control *, struct file_ra_state *,
struct page *);
-void page_cache_readahead_unbounded(struct readahead_control *,
- unsigned long nr_to_read, unsigned long lookahead_count);
+void page_cache_readahead_unbounded(struct readahead_control *, unsigned long);
/*
* Like add_to_page_cache_locked, but used to add newly allocated pages:
diff --git a/mm/internal.h b/mm/internal.h
index 2eb9f7f5f134..e1d296e76fb0 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -50,8 +50,7 @@ void unmap_page_range(struct mmu_gather *tlb,
struct zap_details *details);
void force_page_cache_readahead(struct readahead_control *);
-void __do_page_cache_readahead(struct readahead_control *,
- unsigned long nr_to_read, unsigned long lookahead_size);
+void __do_page_cache_readahead(struct readahead_control *, unsigned long);
/*
* Submit IO for the read-ahead request in file_ra_state.
@@ -60,7 +59,8 @@ static inline void ra_submit(struct file_ra_state *ra,
struct address_space *mapping, struct file *file)
{
DEFINE_READAHEAD(rac, file, mapping, ra->start);
- __do_page_cache_readahead(&rac, ra->size, ra->async_size);
+ rac._nr_pages = ra->size;
+ __do_page_cache_readahead(&rac, ra->async_size);
}
/**
diff --git a/mm/readahead.c b/mm/readahead.c
index 7114246b4e41..28ff80304a21 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -172,10 +172,11 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages,
* May sleep, but will not reenter filesystem to reclaim memory.
*/
void page_cache_readahead_unbounded(struct readahead_control *rac,
- unsigned long nr_to_read, unsigned long lookahead_size)
+ unsigned long lookahead_size)
{
struct address_space *mapping = rac->mapping;
unsigned long index = readahead_index(rac);
+ unsigned long nr_to_read = readahead_count(rac);
LIST_HEAD(page_pool);
gfp_t gfp_mask = readahead_gfp_mask(mapping);
unsigned long i;
@@ -195,6 +196,7 @@ void page_cache_readahead_unbounded(struct readahead_control *rac,
/*
* Preallocate as many pages as we will need.
*/
+ rac->_nr_pages = 0;
for (i = 0; i < nr_to_read; i++) {
struct page *page = xa_load(&mapping->i_pages, index + i);
@@ -247,7 +249,7 @@ EXPORT_SYMBOL_GPL(page_cache_readahead_unbounded);
* We really don't want to intermingle reads and writes like that.
*/
void __do_page_cache_readahead(struct readahead_control *rac,
- unsigned long nr_to_read, unsigned long lookahead_size)
+ unsigned long lookahead_size)
{
struct inode *inode = rac->mapping->host;
unsigned long index = readahead_index(rac);
@@ -261,10 +263,10 @@ void __do_page_cache_readahead(struct readahead_control *rac,
if (index > end_index)
return;
/* Don't read past the page containing the last byte of the file */
- if (nr_to_read > end_index - index)
- nr_to_read = end_index - index + 1;
+ if (readahead_count(rac) > end_index - index)
+ rac->_nr_pages = end_index - index + 1;
- page_cache_readahead_unbounded(rac, nr_to_read, lookahead_size);
+ page_cache_readahead_unbounded(rac, lookahead_size);
}
/*
@@ -297,8 +299,7 @@ void force_page_cache_readahead(struct readahead_control *rac)
rac->_index = index;
rac->_nr_pages = this_chunk;
- // Do I need to modify rac->_batch_count?
- __do_page_cache_readahead(rac, this_chunk, 0);
+ __do_page_cache_readahead(rac, 0);
index += this_chunk;
nr_to_read -= this_chunk;
@@ -601,7 +602,7 @@ static void ondemand_readahead(struct readahead_control *rac,
* standalone, small random read
* Read as is, and do not pollute the readahead state.
*/
- __do_page_cache_readahead(rac, req_size, 0);
+ __do_page_cache_readahead(rac, 0);
return;
initial_readahead:
@@ -630,7 +631,8 @@ static void ondemand_readahead(struct readahead_control *rac,
rac->_index = ra->start;
if (page && page_cache_readahead_order(rac, ra, thp_order(page)))
return;
- __do_page_cache_readahead(rac, ra->size, ra->async_size);
+ rac->_nr_pages = ra->size;
+ __do_page_cache_readahead(rac, ra->async_size);
}
/**
Fold ra_submit() into its last remaining user and pass the previously added
readahead_control struct down into __do_page_cache_readahead().
Signed-off-by: David Howells <[email protected]>
---
mm/filemap.c | 6 +++---
mm/internal.h | 11 -----------
2 files changed, 3 insertions(+), 14 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c
index 9f2f99db7318..c22bb01e8ba6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2502,10 +2502,10 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
* mmap read-around
*/
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
- ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
- ra->size = ra->ra_pages;
+ ra->start = rac._index = max_t(long, 0, offset - ra->ra_pages / 2);
+ ra->size = rac._nr_pages = ra->ra_pages;
ra->async_size = ra->ra_pages / 4;
- ra_submit(ra, mapping, file);
+ __do_page_cache_readahead(&rac, ra->async_size);
return fpin;
}
diff --git a/mm/internal.h b/mm/internal.h
index e1d296e76fb0..de3b2ce2743a 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -52,17 +52,6 @@ void unmap_page_range(struct mmu_gather *tlb,
void force_page_cache_readahead(struct readahead_control *);
void __do_page_cache_readahead(struct readahead_control *, unsigned long);
-/*
- * Submit IO for the read-ahead request in file_ra_state.
- */
-static inline void ra_submit(struct file_ra_state *ra,
- struct address_space *mapping, struct file *file)
-{
- DEFINE_READAHEAD(rac, file, mapping, ra->start);
- rac._nr_pages = ra->size;
- __do_page_cache_readahead(&rac, ra->async_size);
-}
-
/**
* page_evictable - test whether a page is evictable
* @page: the page to test
On Tue, Sep 01, 2020 at 05:28:15PM +0100, David Howells wrote:
>
> Hi Willy,
>
> Here's a set of patches to expand the use of the readahead_control struct,
> essentially from do_sync_mmap_readahead() down. Note that I've been
> passing the number of pages to read in rac->_nr_pages, and then saving it
> and changing it certain points, e.g. page_cache_readahead_unbounded().
>
> Also pass file_ra_state into force_page_cache_readahead().
>
> Also there's an apparent minor bug in khugepaged.c that I've included a
> patch for: page_cache_sync_readahead() looks to be given the wrong size in
> collapse_file().
>
What branch does this apply to?
- Eric
On Tue, Sep 01, 2020 at 09:41:32AM -0700, Eric Biggers wrote:
> On Tue, Sep 01, 2020 at 05:28:15PM +0100, David Howells wrote:
> >
> > Hi Willy,
> >
> > Here's a set of patches to expand the use of the readahead_control struct,
> > essentially from do_sync_mmap_readahead() down. Note that I've been
> > passing the number of pages to read in rac->_nr_pages, and then saving it
> > and changing it certain points, e.g. page_cache_readahead_unbounded().
> >
> > Also pass file_ra_state into force_page_cache_readahead().
> >
> > Also there's an apparent minor bug in khugepaged.c that I've included a
> > patch for: page_cache_sync_readahead() looks to be given the wrong size in
> > collapse_file().
> >
>
> What branch does this apply to?
He's done it on top of http://git.infradead.org/users/willy/pagecache.git
I was hoping he'd include
http://git.infradead.org/users/willy/pagecache.git/commitdiff/c71de787328809026cfabbcc5485cb01caca8646
http://git.infradead.org/users/willy/pagecache.git/commitdiff/f3a1cd6447e29a54b03efc2189d943f12ac1c9b9
http://git.infradead.org/users/willy/pagecache.git/commitdiff/c03d3a5a5716bb0df2fe15ec528bbd895cd18e6e
as the first three patches in the series; then it should apply to Linus'
tree.
On Tue, Sep 01, 2020 at 05:28:15PM +0100, David Howells wrote:
> Here's a set of patches to expand the use of the readahead_control struct,
> essentially from do_sync_mmap_readahead() down.
I like this.
> Note that I've been
> passing the number of pages to read in rac->_nr_pages, and then saving it
> and changing it certain points, e.g. page_cache_readahead_unbounded().
I do not like this. You're essentially mutating the meaning of _nr_pages
as the ractl moves down the stack, and that's going to lead to bugs.
I'd keep it as a separate argument.
> Also there's an apparent minor bug in khugepaged.c that I've included a
> patch for: page_cache_sync_readahead() looks to be given the wrong size in
> collapse_file().
This needs to go in as an independent fix. But you didn't actually cc Song.
On Tue, Sep 1, 2020 at 9:28 AM David Howells <[email protected]> wrote:
>
> collapse_file() in khugepaged passes PAGE_SIZE as the number of pages to be
> read ahead to page_cache_sync_readahead(). It seems this was expressed as a
> number of bytes rather than a number of pages.
>
> Fix it to use the number of pages to the end of the window instead.
>
> Fixes: 99cb0dbd47a1 ("mm,thp: add read-only THP support for (non-shmem) FS")
> Signed-off-by: David Howells <[email protected]>
> cc: Matthew Wilcox <[email protected]>
> cc: Song Liu <[email protected]>
Thanks for the fix!
Acked-by: Song Liu <[email protected]>
> ---
>
> mm/khugepaged.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 6d199c353281..f2d243077b74 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1706,7 +1706,7 @@ static void collapse_file(struct mm_struct *mm,
> xas_unlock_irq(&xas);
> page_cache_sync_readahead(mapping, &file->f_ra,
> file, index,
> - PAGE_SIZE);
> + end - index);
> /* drain pagevecs to help isolate_lru_page() */
> lru_add_drain();
> page = find_lock_page(mapping, index);
>
>
>
Matthew Wilcox <[email protected]> wrote:
> > Also there's an apparent minor bug in khugepaged.c that I've included a
> > patch for: page_cache_sync_readahead() looks to be given the wrong size in
> > collapse_file().
>
> This needs to go in as an independent fix. But you didn't actually cc Song.
Bah. I forgot to pass --auto to stgit. No matter, he saw it anyway.
David
Matthew Wilcox <[email protected]> wrote:
> > Note that I've been
> > passing the number of pages to read in rac->_nr_pages, and then saving it
> > and changing it certain points, e.g. page_cache_readahead_unbounded().
>
> I do not like this. You're essentially mutating the meaning of _nr_pages
> as the ractl moves down the stack, and that's going to lead to bugs.
> I'd keep it as a separate argument.
The meaning isn't specified in linux/pagemap.h. Can you adjust the comments
on the struct and on readahead_count() to make it more clear what the value
represents?
Thanks,
David
On Tue, Sep 01, 2020 at 08:44:14PM +0100, David Howells wrote:
> Matthew Wilcox <[email protected]> wrote:
>
> > > Note that I've been
> > > passing the number of pages to read in rac->_nr_pages, and then saving it
> > > and changing it certain points, e.g. page_cache_readahead_unbounded().
> >
> > I do not like this. You're essentially mutating the meaning of _nr_pages
> > as the ractl moves down the stack, and that's going to lead to bugs.
> > I'd keep it as a separate argument.
>
> The meaning isn't specified in linux/pagemap.h. Can you adjust the comments
> on the struct and on readahead_count() to make it more clear what the value
> represents?
It's intentionally not documented. This documentation is for the
filesystem author, who should not be looking at it. Neither should
you :-P
Matthew Wilcox <[email protected]> wrote:
> He's done it on top of http://git.infradead.org/users/willy/pagecache.git
Sorry, yes, I should've mentioned that.
> I was hoping he'd include
> http://git.infradead.org/users/willy/pagecache.git/commitdiff/c71de787328809026cfabbcc5485cb01caca8646
> http://git.infradead.org/users/willy/pagecache.git/commitdiff/f3a1cd6447e29a54b03efc2189d943f12ac1c9b9
> http://git.infradead.org/users/willy/pagecache.git/commitdiff/c03d3a5a5716bb0df2fe15ec528bbd895cd18e6e
>
> as the first three patches in the series; then it should apply to Linus'
> tree.
Did you want me to carry those patches and pass them to Linus through my
fscache branch?
David