2022-06-06 03:58:35

by Matthew Wilcox

[permalink] [raw]
Subject: [PATCH 00/10] Convert to filemap_get_folios()

This patch series removes find_get_pages_range(), pagevec_lookup()
and pagevec_lookup_range(), converting all callers to use the new
filemap_get_folios(). I've only run xfstests over ext4 ... some other
testing might be appropriate.

Matthew Wilcox (Oracle) (10):
filemap: Add filemap_get_folios()
buffer: Convert clean_bdev_aliases() to use filemap_get_folios()
ext4: Convert mpage_release_unused_pages() to use filemap_get_folios()
ext4: Convert mpage_map_and_submit_buffers() to use
filemap_get_folios()
f2fs: Convert f2fs_invalidate_compress_pages() to use
filemap_get_folios()
hugetlbfs: Convert remove_inode_hugepages() to use
filemap_get_folios()
nilfs2: Convert nilfs_copy_back_pages() to use filemap_get_folios()
vmscan: Add check_move_unevictable_folios()
shmem: Convert shmem_unlock_mapping() to use filemap_get_folios()
filemap: Remove find_get_pages_range() and associated functions

fs/buffer.c | 26 +++++++--------
fs/ext4/inode.c | 40 ++++++++++++-----------
fs/f2fs/compress.c | 35 +++++++++-----------
fs/hugetlbfs/inode.c | 44 ++++++++-----------------
fs/nilfs2/page.c | 60 +++++++++++++++++-----------------
include/linux/pagemap.h | 5 ++-
include/linux/pagevec.h | 10 ------
include/linux/swap.h | 3 +-
mm/filemap.c | 72 +++++++++++++++++------------------------
mm/shmem.c | 13 ++++----
mm/swap.c | 29 -----------------
mm/vmscan.c | 55 ++++++++++++++++++-------------
12 files changed, 166 insertions(+), 226 deletions(-)

--
2.35.1


2022-06-06 03:59:51

by Matthew Wilcox

[permalink] [raw]
Subject: [PATCH 01/10] filemap: Add filemap_get_folios()

This is the equivalent of find_get_pages() but fills a folio_batch
instead of an array of pages.

Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
---
include/linux/pagemap.h | 2 ++
mm/filemap.c | 55 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 57 insertions(+)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 5555689ea809..50e57b2d845f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -718,6 +718,8 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index)
return head + (index & (thp_nr_pages(head) - 1));
}

+unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
+ pgoff_t end, struct folio_batch *fbatch);
unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
pgoff_t end, unsigned int nr_pages,
struct page **pages);
diff --git a/mm/filemap.c b/mm/filemap.c
index 1e66eea98a7e..ea4145b7a84c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2127,6 +2127,61 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
return folio_batch_count(fbatch);
}

+/**
+ * filemap_get_folios - Get a batch of folios
+ * @mapping: The address_space to search
+ * @start: The starting page index
+ * @end: The final page index (inclusive)
+ * @fbatch: The batch to fill.
+ *
+ * Search for and return a batch of folios in the mapping starting at
+ * index @start and up to index @end (inclusive). The folios are returned
+ * in @fbatch with an elevated reference count.
+ *
+ * The first folio may start before @start; if it does, it will contain
+ * @start. The final folio may extend beyond @end; if it does, it will
+ * contain @end. The folios have ascending indices. There may be gaps
+ * between the folios if there are indices which have no folio in the
+ * page cache. If folios are added to or removed from the page cache
+ * while this is running, they may or may not be found by this call.
+ *
+ * Return: The number of folios which were found.
+ * We also update @start to index the next folio for the traversal.
+ */
+unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
+ pgoff_t end, struct folio_batch *fbatch)
+{
+ XA_STATE(xas, &mapping->i_pages, *start);
+ struct folio *folio;
+
+ rcu_read_lock();
+ while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) {
+ /* Skip over shadow, swap and DAX entries */
+ if (xa_is_value(folio))
+ continue;
+ if (!folio_batch_add(fbatch, folio)) {
+ *start = folio->index + folio_nr_pages(folio);
+ goto out;
+ }
+ }
+
+ /*
+ * We come here when there is no page beyond @end. We take care to not
+ * overflow the index @start as it confuses some of the callers. This
+ * breaks the iteration when there is a page at index -1 but that is
+ * already broken anyway.
+ */
+ if (end == (pgoff_t)-1)
+ *start = (pgoff_t)-1;
+ else
+ *start = end + 1;
+out:
+ rcu_read_unlock();
+
+ return folio_batch_count(fbatch);
+}
+EXPORT_SYMBOL(filemap_get_folios);
+
static inline
bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max)
{
--
2.35.1

2022-06-06 04:46:24

by Matthew Wilcox

[permalink] [raw]
Subject: [PATCH 06/10] hugetlbfs: Convert remove_inode_hugepages() to use filemap_get_folios()

Use folios throughout this function. That removes the last caller of
huge_pagevec_release(), so delete that too.

Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
---
fs/hugetlbfs/inode.c | 44 ++++++++++++++------------------------------
1 file changed, 14 insertions(+), 30 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ae2524480f23..14d33f725e05 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -108,16 +108,6 @@ static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
}
#endif

-static void huge_pagevec_release(struct pagevec *pvec)
-{
- int i;
-
- for (i = 0; i < pagevec_count(pvec); ++i)
- put_page(pvec->pages[i]);
-
- pagevec_reinit(pvec);
-}
-
/*
* Mask used when checking the page offset value passed in via system
* calls. This value will be converted to a loff_t which is signed.
@@ -480,25 +470,19 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
struct address_space *mapping = &inode->i_data;
const pgoff_t start = lstart >> huge_page_shift(h);
const pgoff_t end = lend >> huge_page_shift(h);
- struct pagevec pvec;
+ struct folio_batch fbatch;
pgoff_t next, index;
int i, freed = 0;
bool truncate_op = (lend == LLONG_MAX);

- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
next = start;
- while (next < end) {
- /*
- * When no more pages are found, we are done.
- */
- if (!pagevec_lookup_range(&pvec, mapping, &next, end - 1))
- break;
-
- for (i = 0; i < pagevec_count(&pvec); ++i) {
- struct page *page = pvec.pages[i];
+ while (filemap_get_folios(mapping, &next, end - 1, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); ++i) {
+ struct folio *folio = fbatch.folios[i];
u32 hash = 0;

- index = page->index;
+ index = folio->index;
if (!truncate_op) {
/*
* Only need to hold the fault mutex in the
@@ -511,15 +495,15 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
}

/*
- * If page is mapped, it was faulted in after being
+ * If folio is mapped, it was faulted in after being
* unmapped in caller. Unmap (again) now after taking
* the fault mutex. The mutex will prevent faults
- * until we finish removing the page.
+ * until we finish removing the folio.
*
* This race can only happen in the hole punch case.
* Getting here in a truncate operation is a bug.
*/
- if (unlikely(page_mapped(page))) {
+ if (unlikely(folio_mapped(folio))) {
BUG_ON(truncate_op);

mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -532,7 +516,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
i_mmap_unlock_write(mapping);
}

- lock_page(page);
+ folio_lock(folio);
/*
* We must free the huge page and remove from page
* cache (remove_huge_page) BEFORE removing the
@@ -542,8 +526,8 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
* the subpool and global reserve usage count can need
* to be adjusted.
*/
- VM_BUG_ON(HPageRestoreReserve(page));
- remove_huge_page(page);
+ VM_BUG_ON(HPageRestoreReserve(&folio->page));
+ remove_huge_page(&folio->page);
freed++;
if (!truncate_op) {
if (unlikely(hugetlb_unreserve_pages(inode,
@@ -551,11 +535,11 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
hugetlb_fix_reserve_counts(inode);
}

- unlock_page(page);
+ folio_unlock(folio);
if (!truncate_op)
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
}
- huge_pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
}

--
2.35.1

2022-06-06 05:00:19

by Matthew Wilcox

[permalink] [raw]
Subject: [PATCH 03/10] ext4: Convert mpage_release_unused_pages() to use filemap_get_folios()

If the folio is large, it may overlap the beginning or end of the
unused range. If it does, we need to avoid invalidating it.

Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
---
fs/ext4/inode.c | 21 ++++++++++++---------
1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3dce7d058985..32a7f5e024d6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1554,9 +1554,9 @@ struct mpage_da_data {
static void mpage_release_unused_pages(struct mpage_da_data *mpd,
bool invalidate)
{
- int nr_pages, i;
+ unsigned nr, i;
pgoff_t index, end;
- struct pagevec pvec;
+ struct folio_batch fbatch;
struct inode *inode = mpd->inode;
struct address_space *mapping = inode->i_mapping;

@@ -1574,15 +1574,18 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
ext4_es_remove_extent(inode, start, last - start + 1);
}

- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
while (index <= end) {
- nr_pages = pagevec_lookup_range(&pvec, mapping, &index, end);
- if (nr_pages == 0)
+ nr = filemap_get_folios(mapping, &index, end, &fbatch);
+ if (nr == 0)
break;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
- struct folio *folio = page_folio(page);
+ for (i = 0; i < nr; i++) {
+ struct folio *folio = fbatch.folios[i];

+ if (folio->index < mpd->first_page)
+ continue;
+ if (folio->index + folio_nr_pages(folio) - 1 > end)
+ continue;
BUG_ON(!folio_test_locked(folio));
BUG_ON(folio_test_writeback(folio));
if (invalidate) {
@@ -1594,7 +1597,7 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
}
folio_unlock(folio);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
}
}

--
2.35.1

2022-06-13 07:02:09

by Sumanth Korikkar

[permalink] [raw]
Subject: Re: [PATCH 06/10] hugetlbfs: Convert remove_inode_hugepages() to use filemap_get_folios()

On Fri, Jun 10, 2022 at 10:17:36PM +0100, Matthew Wilcox wrote:
> On Fri, Jun 10, 2022 at 05:52:05PM +0200, Sumanth Korikkar wrote:
> > To reproduce:
> > * clone libhugetlbfs:
> > * Execute, PATH=$PATH:"obj64/" LD_LIBRARY_PATH=../obj64/ alloc-instantiate-race shared
>
> ... it's a lot harder to set up hugetlb than that ...
>
> anyway, i figured it out without being able to run the reproducer.
>
> Can you try this?
>
> diff --git a/mm/filemap.c b/mm/filemap.c
> index a30587f2e598..8ef861297ffb 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2160,7 +2160,11 @@ unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
> if (xa_is_value(folio))
> continue;
> if (!folio_batch_add(fbatch, folio)) {
> - *start = folio->index + folio_nr_pages(folio);
> + unsigned long nr = folio_nr_pages(folio);
> +
> + if (folio_test_hugetlb(folio))
> + nr = 1;
> + *start = folio->index + nr;
> goto out;
> }
> }

Yes, With the patch, The above tests works fine.

--
Thanks,
Sumanth