This patchset replaces all calls of lru_cache_add() with the folio
equivalent: folio_add_lru(). This is allows us to get rid of the wrapper
The series passes xfstests and the userfaultfd selftests.
I haven't been able to hit the fuse_try_move_page() function, so I
haven't been able to test those changes. Since replace_page_cache_page()
is only called by fuse_try_move_page() I haven't been able to test that
as well. Testing and review of both would be beneficial.
Vishal Moola (Oracle) (5):
filemap: Convert replace_page_cache_page() to
replace_page_cache_folio()
fuse: Convert fuse_try_move_page() to use folios
userfualtfd: Replace lru_cache functions with folio_add functions
khugepage: Replace lru_cache_add() with folio_add_lru()
folio-compat: Remove lru_cache_add()
fs/fuse/dev.c | 55 +++++++++++++++++++++--------------------
include/linux/pagemap.h | 2 +-
include/linux/swap.h | 1 -
mm/filemap.c | 52 +++++++++++++++++++-------------------
mm/folio-compat.c | 6 -----
mm/khugepaged.c | 11 ++++++---
mm/truncate.c | 2 +-
mm/userfaultfd.c | 6 +++--
mm/workingset.c | 2 +-
9 files changed, 67 insertions(+), 70 deletions(-)
--
2.38.1
Converts the function to try to move folios instead of pages. Also
converts fuse_check_page() to fuse_get_folio() since this is its only
caller. This change removes 15 calls to compound_head().
Signed-off-by: Vishal Moola (Oracle) <[email protected]>
---
fs/fuse/dev.c | 55 ++++++++++++++++++++++++++-------------------------
1 file changed, 28 insertions(+), 27 deletions(-)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 26817a2db463..204c332cd343 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -764,11 +764,11 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
return ncpy;
}
-static int fuse_check_page(struct page *page)
+static int fuse_check_folio(struct folio *folio)
{
- if (page_mapcount(page) ||
- page->mapping != NULL ||
- (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
+ if (folio_mapped(folio) ||
+ folio->mapping != NULL ||
+ (folio->flags & PAGE_FLAGS_CHECK_AT_PREP &
~(1 << PG_locked |
1 << PG_referenced |
1 << PG_uptodate |
@@ -778,7 +778,7 @@ static int fuse_check_page(struct page *page)
1 << PG_reclaim |
1 << PG_waiters |
LRU_GEN_MASK | LRU_REFS_MASK))) {
- dump_page(page, "fuse: trying to steal weird page");
+ dump_page(&folio->page, "fuse: trying to steal weird page");
return 1;
}
return 0;
@@ -787,11 +787,11 @@ static int fuse_check_page(struct page *page)
static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
{
int err;
- struct page *oldpage = *pagep;
- struct page *newpage;
+ struct folio *oldfolio = page_folio(*pagep);
+ struct folio *newfolio;
struct pipe_buffer *buf = cs->pipebufs;
- get_page(oldpage);
+ folio_get(oldfolio);
err = unlock_request(cs->req);
if (err)
goto out_put_old;
@@ -814,35 +814,36 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (!pipe_buf_try_steal(cs->pipe, buf))
goto out_fallback;
- newpage = buf->page;
+ newfolio = page_folio(buf->page);
- if (!PageUptodate(newpage))
- SetPageUptodate(newpage);
+ if (!folio_test_uptodate(newfolio))
+ folio_mark_uptodate(newfolio);
- ClearPageMappedToDisk(newpage);
+ folio_clear_mappedtodisk(newfolio);
- if (fuse_check_page(newpage) != 0)
+ if (fuse_check_folio(newfolio) != 0)
goto out_fallback_unlock;
/*
* This is a new and locked page, it shouldn't be mapped or
* have any special flags on it
*/
- if (WARN_ON(page_mapped(oldpage)))
+ if (WARN_ON(folio_mapped(oldfolio)))
goto out_fallback_unlock;
- if (WARN_ON(page_has_private(oldpage)))
+ if (WARN_ON(folio_has_private(oldfolio)))
goto out_fallback_unlock;
- if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
+ if (WARN_ON(folio_test_dirty(oldfolio) ||
+ folio_test_writeback(oldfolio)))
goto out_fallback_unlock;
- if (WARN_ON(PageMlocked(oldpage)))
+ if (WARN_ON(folio_test_mlocked(oldfolio)))
goto out_fallback_unlock;
- replace_page_cache_folio(page_folio(oldpage), page_folio(newpage));
+ replace_page_cache_folio(oldfolio, newfolio);
- get_page(newpage);
+ folio_get(newfolio);
if (!(buf->flags & PIPE_BUF_FLAG_LRU))
- lru_cache_add(newpage);
+ folio_add_lru(newfolio);
/*
* Release while we have extra ref on stolen page. Otherwise
@@ -855,28 +856,28 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (test_bit(FR_ABORTED, &cs->req->flags))
err = -ENOENT;
else
- *pagep = newpage;
+ *pagep = &newfolio->page;
spin_unlock(&cs->req->waitq.lock);
if (err) {
- unlock_page(newpage);
- put_page(newpage);
+ folio_unlock(newfolio);
+ folio_put(newfolio);
goto out_put_old;
}
- unlock_page(oldpage);
+ folio_unlock(oldfolio);
/* Drop ref for ap->pages[] array */
- put_page(oldpage);
+ folio_put(oldfolio);
cs->len = 0;
err = 0;
out_put_old:
/* Drop ref obtained in this function */
- put_page(oldpage);
+ folio_put(oldfolio);
return err;
out_fallback_unlock:
- unlock_page(newpage);
+ folio_unlock(newfolio);
out_fallback:
cs->pg = buf->page;
cs->offset = buf->offset;
--
2.38.1
Replaces some calls with their folio equivalents. This is in preparation
for the removal of lru_cache_add(). This replaces 3 calls to
compound_head() with 1.
Signed-off-by: Vishal Moola (Oracle) <[email protected]>
---
mm/khugepaged.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 4734315f7940..e432d5279043 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1970,6 +1970,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
if (result == SCAN_SUCCEED) {
struct page *page, *tmp;
+ struct folio *folio;
/*
* Replacing old pages with new one has succeeded, now we
@@ -1997,11 +1998,13 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
index++;
}
- SetPageUptodate(hpage);
- page_ref_add(hpage, HPAGE_PMD_NR - 1);
+ folio = page_folio(hpage);
+ folio_mark_uptodate(folio);
+ folio_ref_add(folio, HPAGE_PMD_NR - 1);
+
if (is_shmem)
- set_page_dirty(hpage);
- lru_cache_add(hpage);
+ folio_mark_dirty(folio);
+ folio_add_lru(folio);
/*
* Remove pte page tables, so we can re-fault the page as huge.
--
2.38.1
Eliminates 7 calls to compound_head().
Signed-off-by: Vishal Moola (Oracle) <[email protected]>
---
fs/fuse/dev.c | 2 +-
include/linux/pagemap.h | 2 +-
mm/filemap.c | 52 ++++++++++++++++++++---------------------
3 files changed, 27 insertions(+), 29 deletions(-)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index b4a6e0a1b945..26817a2db463 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -837,7 +837,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (WARN_ON(PageMlocked(oldpage)))
goto out_fallback_unlock;
- replace_page_cache_page(oldpage, newpage);
+ replace_page_cache_folio(page_folio(oldpage), page_folio(newpage));
get_page(newpage);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index bbccb4044222..275810697d71 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1104,7 +1104,7 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio,
void filemap_remove_folio(struct folio *folio);
void delete_from_page_cache(struct page *page);
void __filemap_remove_folio(struct folio *folio, void *shadow);
-void replace_page_cache_page(struct page *old, struct page *new);
+void replace_page_cache_folio(struct folio *old, struct folio *new);
void delete_from_page_cache_batch(struct address_space *mapping,
struct folio_batch *fbatch);
int try_to_release_page(struct page *page, gfp_t gfp);
diff --git a/mm/filemap.c b/mm/filemap.c
index 08341616ae7a..c61dfaa81fee 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -785,56 +785,54 @@ int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
EXPORT_SYMBOL(file_write_and_wait_range);
/**
- * replace_page_cache_page - replace a pagecache page with a new one
- * @old: page to be replaced
- * @new: page to replace with
- *
- * This function replaces a page in the pagecache with a new one. On
- * success it acquires the pagecache reference for the new page and
- * drops it for the old page. Both the old and new pages must be
- * locked. This function does not add the new page to the LRU, the
+ * replace_page_cache_folio - replace a pagecache folio with a new one
+ * @old: folio to be replaced
+ * @new: folio to replace with
+ *
+ * This function replaces a folio in the pagecache with a new one. On
+ * success it acquires the pagecache reference for the new folio and
+ * drops it for the old folio. Both the old and new folios must be
+ * locked. This function does not add the new folio to the LRU, the
* caller must do that.
*
* The remove + add is atomic. This function cannot fail.
*/
-void replace_page_cache_page(struct page *old, struct page *new)
+void replace_page_cache_folio(struct folio *old, struct folio *new)
{
- struct folio *fold = page_folio(old);
- struct folio *fnew = page_folio(new);
struct address_space *mapping = old->mapping;
void (*free_folio)(struct folio *) = mapping->a_ops->free_folio;
pgoff_t offset = old->index;
XA_STATE(xas, &mapping->i_pages, offset);
- VM_BUG_ON_PAGE(!PageLocked(old), old);
- VM_BUG_ON_PAGE(!PageLocked(new), new);
- VM_BUG_ON_PAGE(new->mapping, new);
+ VM_BUG_ON_FOLIO(!folio_test_locked(old), old);
+ VM_BUG_ON_FOLIO(!folio_test_locked(new), new);
+ VM_BUG_ON_FOLIO(new->mapping, new);
- get_page(new);
+ folio_get(new);
new->mapping = mapping;
new->index = offset;
- mem_cgroup_migrate(fold, fnew);
+ mem_cgroup_migrate(old, new);
xas_lock_irq(&xas);
xas_store(&xas, new);
old->mapping = NULL;
/* hugetlb pages do not participate in page cache accounting. */
- if (!PageHuge(old))
- __dec_lruvec_page_state(old, NR_FILE_PAGES);
- if (!PageHuge(new))
- __inc_lruvec_page_state(new, NR_FILE_PAGES);
- if (PageSwapBacked(old))
- __dec_lruvec_page_state(old, NR_SHMEM);
- if (PageSwapBacked(new))
- __inc_lruvec_page_state(new, NR_SHMEM);
+ if (!folio_test_hugetlb(old))
+ __lruvec_stat_sub_folio(old, NR_FILE_PAGES);
+ if (!folio_test_hugetlb(new))
+ __lruvec_stat_add_folio(new, NR_FILE_PAGES);
+ if (folio_test_swapbacked(old))
+ __lruvec_stat_sub_folio(old, NR_SHMEM);
+ if (folio_test_swapbacked(new))
+ __lruvec_stat_add_folio(new, NR_SHMEM);
xas_unlock_irq(&xas);
if (free_folio)
- free_folio(fold);
- folio_put(fold);
+ free_folio(old);
+ folio_put(old);
}
-EXPORT_SYMBOL_GPL(replace_page_cache_page);
+EXPORT_SYMBOL_GPL(replace_page_cache_folio);
noinline int __filemap_add_folio(struct address_space *mapping,
struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
--
2.38.1
There are no longer any callers of lru_cache_add(), so remove it. This
saves 107 bytes of kernel text. Also cleanup some comments such that
they reference the new folio_add_lru() instead.
Signed-off-by: Vishal Moola (Oracle) <[email protected]>
---
include/linux/swap.h | 1 -
mm/folio-compat.c | 6 ------
mm/truncate.c | 2 +-
mm/workingset.c | 2 +-
4 files changed, 2 insertions(+), 9 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a18cf4b7c724..c92ccff9b962 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -388,7 +388,6 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages);
void lru_note_cost_folio(struct folio *);
void folio_add_lru(struct folio *);
void folio_add_lru_vma(struct folio *, struct vm_area_struct *);
-void lru_cache_add(struct page *);
void mark_page_accessed(struct page *);
void folio_mark_accessed(struct folio *);
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index e1e23b4947d7..efd65b7f48bb 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -82,12 +82,6 @@ bool redirty_page_for_writepage(struct writeback_control *wbc,
}
EXPORT_SYMBOL(redirty_page_for_writepage);
-void lru_cache_add(struct page *page)
-{
- folio_add_lru(page_folio(page));
-}
-EXPORT_SYMBOL(lru_cache_add);
-
void lru_cache_add_inactive_or_unevictable(struct page *page,
struct vm_area_struct *vma)
{
diff --git a/mm/truncate.c b/mm/truncate.c
index c0be77e5c008..184fa17fce60 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -573,7 +573,7 @@ EXPORT_SYMBOL(invalidate_mapping_pages);
* refcount. We do this because invalidate_inode_pages2() needs stronger
* invalidation guarantees, and cannot afford to leave pages behind because
* shrink_page_list() has a temp ref on them, or because they're transiently
- * sitting in the lru_cache_add() pagevecs.
+ * sitting in the folio_add_lru() pagevecs.
*/
static int invalidate_complete_folio2(struct address_space *mapping,
struct folio *folio)
diff --git a/mm/workingset.c b/mm/workingset.c
index ae7e984b23c6..25844171b72d 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -492,7 +492,7 @@ void workingset_refault(struct folio *folio, void *shadow)
/* Folio was active prior to eviction */
if (workingset) {
folio_set_workingset(folio);
- /* XXX: Move to lru_cache_add() when it supports new vs putback */
+ /* XXX: Move to folio_add_lru() when it supports new vs putback */
lru_note_cost_folio(folio);
mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file, nr);
}
--
2.38.1
Replaces lru_cache_add() and lru_cache_add_inactive_or_unevictable()
with folio_add_lru() and folio_add_lru_vma(). This is in preparation for
the removal of lru_cache_add().
Signed-off-by: Vishal Moola (Oracle) <[email protected]>
---
mm/userfaultfd.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index e24e8a47ce8a..2560973b00d8 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -66,6 +66,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
bool vm_shared = dst_vma->vm_flags & VM_SHARED;
bool page_in_cache = page->mapping;
spinlock_t *ptl;
+ struct folio *folio;
struct inode *inode;
pgoff_t offset, max_off;
@@ -113,14 +114,15 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
if (!pte_none_mostly(*dst_pte))
goto out_unlock;
+ folio = page_folio(page);
if (page_in_cache) {
/* Usually, cache pages are already added to LRU */
if (newly_allocated)
- lru_cache_add(page);
+ folio_add_lru(folio);
page_add_file_rmap(page, dst_vma, false);
} else {
page_add_new_anon_rmap(page, dst_vma, dst_addr);
- lru_cache_add_inactive_or_unevictable(page, dst_vma);
+ folio_add_lru_vma(folio, dst_vma);
}
/*
--
2.38.1
On Tue, Nov 01, 2022 at 10:53:22AM -0700, Vishal Moola (Oracle) wrote:
> Eliminates 7 calls to compound_head().
>
> Signed-off-by: Vishal Moola (Oracle) <[email protected]>
Reviewed-by: Matthew Wilcox (Oracle) <[email protected]>
On Tue, Nov 01, 2022 at 10:53:23AM -0700, Vishal Moola (Oracle) wrote:
> Converts the function to try to move folios instead of pages. Also
> converts fuse_check_page() to fuse_get_folio() since this is its only
> caller. This change removes 15 calls to compound_head().
This all looks good. I wonder if we should't add an assertion that the
page we're trying to steal is !large? It seems to me that there are
assumptions in this part of fuse that it's only dealing with order-0
pages, and if someone gives it a page that's part of a large folio,
it's going to be messy. Miklos, any thoughts?
> Signed-off-by: Vishal Moola (Oracle) <[email protected]>
> ---
> fs/fuse/dev.c | 55 ++++++++++++++++++++++++++-------------------------
> 1 file changed, 28 insertions(+), 27 deletions(-)
>
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 26817a2db463..204c332cd343 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -764,11 +764,11 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
> return ncpy;
> }
>
> -static int fuse_check_page(struct page *page)
> +static int fuse_check_folio(struct folio *folio)
> {
> - if (page_mapcount(page) ||
> - page->mapping != NULL ||
> - (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
> + if (folio_mapped(folio) ||
> + folio->mapping != NULL ||
> + (folio->flags & PAGE_FLAGS_CHECK_AT_PREP &
> ~(1 << PG_locked |
> 1 << PG_referenced |
> 1 << PG_uptodate |
> @@ -778,7 +778,7 @@ static int fuse_check_page(struct page *page)
> 1 << PG_reclaim |
> 1 << PG_waiters |
> LRU_GEN_MASK | LRU_REFS_MASK))) {
> - dump_page(page, "fuse: trying to steal weird page");
> + dump_page(&folio->page, "fuse: trying to steal weird page");
> return 1;
> }
> return 0;
> @@ -787,11 +787,11 @@ static int fuse_check_page(struct page *page)
> static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
> {
> int err;
> - struct page *oldpage = *pagep;
> - struct page *newpage;
> + struct folio *oldfolio = page_folio(*pagep);
> + struct folio *newfolio;
> struct pipe_buffer *buf = cs->pipebufs;
>
> - get_page(oldpage);
> + folio_get(oldfolio);
> err = unlock_request(cs->req);
> if (err)
> goto out_put_old;
> @@ -814,35 +814,36 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
> if (!pipe_buf_try_steal(cs->pipe, buf))
> goto out_fallback;
>
> - newpage = buf->page;
> + newfolio = page_folio(buf->page);
>
> - if (!PageUptodate(newpage))
> - SetPageUptodate(newpage);
> + if (!folio_test_uptodate(newfolio))
> + folio_mark_uptodate(newfolio);
>
> - ClearPageMappedToDisk(newpage);
> + folio_clear_mappedtodisk(newfolio);
>
> - if (fuse_check_page(newpage) != 0)
> + if (fuse_check_folio(newfolio) != 0)
> goto out_fallback_unlock;
>
> /*
> * This is a new and locked page, it shouldn't be mapped or
> * have any special flags on it
> */
> - if (WARN_ON(page_mapped(oldpage)))
> + if (WARN_ON(folio_mapped(oldfolio)))
> goto out_fallback_unlock;
> - if (WARN_ON(page_has_private(oldpage)))
> + if (WARN_ON(folio_has_private(oldfolio)))
> goto out_fallback_unlock;
> - if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
> + if (WARN_ON(folio_test_dirty(oldfolio) ||
> + folio_test_writeback(oldfolio)))
> goto out_fallback_unlock;
> - if (WARN_ON(PageMlocked(oldpage)))
> + if (WARN_ON(folio_test_mlocked(oldfolio)))
> goto out_fallback_unlock;
>
> - replace_page_cache_folio(page_folio(oldpage), page_folio(newpage));
> + replace_page_cache_folio(oldfolio, newfolio);
>
> - get_page(newpage);
> + folio_get(newfolio);
>
> if (!(buf->flags & PIPE_BUF_FLAG_LRU))
> - lru_cache_add(newpage);
> + folio_add_lru(newfolio);
>
> /*
> * Release while we have extra ref on stolen page. Otherwise
> @@ -855,28 +856,28 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
> if (test_bit(FR_ABORTED, &cs->req->flags))
> err = -ENOENT;
> else
> - *pagep = newpage;
> + *pagep = &newfolio->page;
> spin_unlock(&cs->req->waitq.lock);
>
> if (err) {
> - unlock_page(newpage);
> - put_page(newpage);
> + folio_unlock(newfolio);
> + folio_put(newfolio);
> goto out_put_old;
> }
>
> - unlock_page(oldpage);
> + folio_unlock(oldfolio);
> /* Drop ref for ap->pages[] array */
> - put_page(oldpage);
> + folio_put(oldfolio);
> cs->len = 0;
>
> err = 0;
> out_put_old:
> /* Drop ref obtained in this function */
> - put_page(oldpage);
> + folio_put(oldfolio);
> return err;
>
> out_fallback_unlock:
> - unlock_page(newpage);
> + folio_unlock(newfolio);
> out_fallback:
> cs->pg = buf->page;
> cs->offset = buf->offset;
> --
> 2.38.1
>
>
On Tue, Nov 01, 2022 at 10:53:25AM -0700, Vishal Moola (Oracle) wrote:
> Replaces some calls with their folio equivalents. This is in preparation
> for the removal of lru_cache_add(). This replaces 3 calls to
> compound_head() with 1.
>
> Signed-off-by: Vishal Moola (Oracle) <[email protected]>
Reviewed-by: Matthew Wilcox (Oracle) <[email protected]>
On Tue, Nov 01, 2022 at 10:53:24AM -0700, Vishal Moola (Oracle) wrote:
> Replaces lru_cache_add() and lru_cache_add_inactive_or_unevictable()
> with folio_add_lru() and folio_add_lru_vma(). This is in preparation for
> the removal of lru_cache_add().
Ummmmm. Reviewing this patch reveals a bug (not introduced by your
patch). Look:
mfill_atomic_install_pte:
bool page_in_cache = page->mapping;
mcontinue_atomic_pte:
ret = shmem_get_folio(inode, pgoff, &folio, SGP_NOALLOC);
...
page = folio_file_page(folio, pgoff);
ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
page, false, wp_copy);
That says pretty plainly that mfill_atomic_install_pte() can be passed
a tail page from shmem, and if it is ...
if (page_in_cache) {
...
} else {
page_add_new_anon_rmap(page, dst_vma, dst_addr);
lru_cache_add_inactive_or_unevictable(page, dst_vma);
}
it'll get put on the rmap as an anon page!
> Signed-off-by: Vishal Moola (Oracle) <[email protected]>
> ---
> mm/userfaultfd.c | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index e24e8a47ce8a..2560973b00d8 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -66,6 +66,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
> bool vm_shared = dst_vma->vm_flags & VM_SHARED;
> bool page_in_cache = page->mapping;
> spinlock_t *ptl;
> + struct folio *folio;
> struct inode *inode;
> pgoff_t offset, max_off;
>
> @@ -113,14 +114,15 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
> if (!pte_none_mostly(*dst_pte))
> goto out_unlock;
>
> + folio = page_folio(page);
> if (page_in_cache) {
> /* Usually, cache pages are already added to LRU */
> if (newly_allocated)
> - lru_cache_add(page);
> + folio_add_lru(folio);
> page_add_file_rmap(page, dst_vma, false);
> } else {
> page_add_new_anon_rmap(page, dst_vma, dst_addr);
> - lru_cache_add_inactive_or_unevictable(page, dst_vma);
> + folio_add_lru_vma(folio, dst_vma);
> }
>
> /*
> --
> 2.38.1
>
>
On Tue, Nov 01, 2022 at 10:53:26AM -0700, Vishal Moola (Oracle) wrote:
> There are no longer any callers of lru_cache_add(), so remove it. This
> saves 107 bytes of kernel text. Also cleanup some comments such that
> they reference the new folio_add_lru() instead.
>
> Signed-off-by: Vishal Moola (Oracle) <[email protected]>
Reviewed-by: Matthew Wilcox (Oracle) <[email protected]>
On Tue, Nov 01, 2022 at 06:31:26PM +0000, Matthew Wilcox wrote:
> On Tue, Nov 01, 2022 at 10:53:24AM -0700, Vishal Moola (Oracle) wrote:
> > Replaces lru_cache_add() and lru_cache_add_inactive_or_unevictable()
> > with folio_add_lru() and folio_add_lru_vma(). This is in preparation for
> > the removal of lru_cache_add().
>
> Ummmmm. Reviewing this patch reveals a bug (not introduced by your
> patch). Look:
>
> mfill_atomic_install_pte:
> bool page_in_cache = page->mapping;
>
> mcontinue_atomic_pte:
> ret = shmem_get_folio(inode, pgoff, &folio, SGP_NOALLOC);
> ...
> page = folio_file_page(folio, pgoff);
> ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
> page, false, wp_copy);
>
> That says pretty plainly that mfill_atomic_install_pte() can be passed
> a tail page from shmem, and if it is ...
>
> if (page_in_cache) {
> ...
> } else {
> page_add_new_anon_rmap(page, dst_vma, dst_addr);
> lru_cache_add_inactive_or_unevictable(page, dst_vma);
> }
>
> it'll get put on the rmap as an anon page!
Hmm yeah.. thanks Matthew!
Does the patch attached look reasonable to you?
Copying Axel too.
>
> > Signed-off-by: Vishal Moola (Oracle) <[email protected]>
> > ---
> > mm/userfaultfd.c | 6 ++++--
> > 1 file changed, 4 insertions(+), 2 deletions(-)
> >
> > diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> > index e24e8a47ce8a..2560973b00d8 100644
> > --- a/mm/userfaultfd.c
> > +++ b/mm/userfaultfd.c
> > @@ -66,6 +66,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
> > bool vm_shared = dst_vma->vm_flags & VM_SHARED;
> > bool page_in_cache = page->mapping;
> > spinlock_t *ptl;
> > + struct folio *folio;
> > struct inode *inode;
> > pgoff_t offset, max_off;
> >
> > @@ -113,14 +114,15 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
> > if (!pte_none_mostly(*dst_pte))
> > goto out_unlock;
> >
> > + folio = page_folio(page);
> > if (page_in_cache) {
> > /* Usually, cache pages are already added to LRU */
> > if (newly_allocated)
> > - lru_cache_add(page);
> > + folio_add_lru(folio);
> > page_add_file_rmap(page, dst_vma, false);
> > } else {
> > page_add_new_anon_rmap(page, dst_vma, dst_addr);
> > - lru_cache_add_inactive_or_unevictable(page, dst_vma);
> > + folio_add_lru_vma(folio, dst_vma);
> > }
> >
> > /*
> > --
> > 2.38.1
> >
> >
>
--
Peter Xu
On Wed, Nov 02, 2022 at 03:02:35PM -0400, Peter Xu wrote:
> Does the patch attached look reasonable to you?
Mmm, no. If the page is in the swap cache, this will be "true".
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 3d0fef3980b3..650ab6cfd5f4 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -64,7 +64,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
> pte_t _dst_pte, *dst_pte;
> bool writable = dst_vma->vm_flags & VM_WRITE;
> bool vm_shared = dst_vma->vm_flags & VM_SHARED;
> - bool page_in_cache = page->mapping;
> + bool page_in_cache = page_mapping(page);
We could do:
struct page *head = compound_head(page);
bool page_in_cache = head->mapping && !PageMappingFlags(head);
On Wed, 2 Nov 2022 15:02:35 -0400 Peter Xu <[email protected]> wrote:
> mfill_atomic_install_pte() checks page->mapping to detect whether one page
> is used in the page cache. However as pointed out by Matthew, the page can
> logically be a tail page rather than always the head in the case of uffd
> minor mode with UFFDIO_CONTINUE. It means we could wrongly install one pte
> with shmem thp tail page assuming it's an anonymous page.
>
> It's not that clear even for anonymous page, since normally anonymous pages
> also have page->mapping being setup with the anon vma. It's safe here only
> because the only such caller to mfill_atomic_install_pte() is always
> passing in a newly allocated page (mcopy_atomic_pte()), whose page->mapping
> is not yet setup. However that's not extremely obvious either.
>
> For either of above, use page_mapping() instead.
>
> And this should be stable material.
I added
Fixes: 153132571f02 ("userfaultfd/shmem: support UFFDIO_CONTINUE for shmem")
On Wed, Nov 02, 2022 at 07:21:19PM +0000, Matthew Wilcox wrote:
> On Wed, Nov 02, 2022 at 03:02:35PM -0400, Peter Xu wrote:
> > Does the patch attached look reasonable to you?
>
> Mmm, no. If the page is in the swap cache, this will be "true".
It will not happen in practise, right?
I mean, shmem_get_folio() should have done the swap-in, and we should have
the page lock held at the meantime.
For anon, mcopy_atomic_pte() is the only user and it's passing in a newly
allocated page here.
>
> > diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> > index 3d0fef3980b3..650ab6cfd5f4 100644
> > --- a/mm/userfaultfd.c
> > +++ b/mm/userfaultfd.c
> > @@ -64,7 +64,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
> > pte_t _dst_pte, *dst_pte;
> > bool writable = dst_vma->vm_flags & VM_WRITE;
> > bool vm_shared = dst_vma->vm_flags & VM_SHARED;
> > - bool page_in_cache = page->mapping;
> > + bool page_in_cache = page_mapping(page);
>
> We could do:
>
> struct page *head = compound_head(page);
> bool page_in_cache = head->mapping && !PageMappingFlags(head);
Sounds good to me, but it just gets a bit complicated.
If page_mapping() doesn't sound good, how about we just pass that over from
callers? We only have three, so quite doable too.
--
Peter Xu
On Wed, Nov 2, 2022 at 1:44 PM Peter Xu <[email protected]> wrote:
>
> On Wed, Nov 02, 2022 at 07:21:19PM +0000, Matthew Wilcox wrote:
> > On Wed, Nov 02, 2022 at 03:02:35PM -0400, Peter Xu wrote:
> > > Does the patch attached look reasonable to you?
> >
> > Mmm, no. If the page is in the swap cache, this will be "true".
>
> It will not happen in practise, right?
>
> I mean, shmem_get_folio() should have done the swap-in, and we should have
> the page lock held at the meantime.
>
> For anon, mcopy_atomic_pte() is the only user and it's passing in a newly
> allocated page here.
>
> >
> > > diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> > > index 3d0fef3980b3..650ab6cfd5f4 100644
> > > --- a/mm/userfaultfd.c
> > > +++ b/mm/userfaultfd.c
> > > @@ -64,7 +64,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
> > > pte_t _dst_pte, *dst_pte;
> > > bool writable = dst_vma->vm_flags & VM_WRITE;
> > > bool vm_shared = dst_vma->vm_flags & VM_SHARED;
> > > - bool page_in_cache = page->mapping;
> > > + bool page_in_cache = page_mapping(page);
> >
> > We could do:
> >
> > struct page *head = compound_head(page);
> > bool page_in_cache = head->mapping && !PageMappingFlags(head);
>
> Sounds good to me, but it just gets a bit complicated.
>
> If page_mapping() doesn't sound good, how about we just pass that over from
> callers? We only have three, so quite doable too.
For what it's worth, I think I like Matthew's version better than the
original patch. This is because, although page_mapping() looks simpler
here, looking into the definition of page_mapping() I feel it's
handling several cases, not all of which are relevant here (or, as
Matthew points out, would actually be wrong if it were possible to
reach those cases here).
It's not clear to me what is meant by "pass that over from callers"?
Do you mean, have callers pass in true/false for page_in_cache
directly?
That could work, but I still think I prefer Matthew's version slightly
better, if only because this function already takes a lot of
arguments.
>
> --
> Peter Xu
>
On Thu, Nov 03, 2022 at 10:34:38AM -0700, Axel Rasmussen wrote:
> On Wed, Nov 2, 2022 at 1:44 PM Peter Xu <[email protected]> wrote:
> >
> > On Wed, Nov 02, 2022 at 07:21:19PM +0000, Matthew Wilcox wrote:
> > > On Wed, Nov 02, 2022 at 03:02:35PM -0400, Peter Xu wrote:
> > > > Does the patch attached look reasonable to you?
> > >
> > > Mmm, no. If the page is in the swap cache, this will be "true".
> >
> > It will not happen in practise, right?
> >
> > I mean, shmem_get_folio() should have done the swap-in, and we should have
> > the page lock held at the meantime.
> >
> > For anon, mcopy_atomic_pte() is the only user and it's passing in a newly
> > allocated page here.
> >
> > >
> > > > diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> > > > index 3d0fef3980b3..650ab6cfd5f4 100644
> > > > --- a/mm/userfaultfd.c
> > > > +++ b/mm/userfaultfd.c
> > > > @@ -64,7 +64,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
> > > > pte_t _dst_pte, *dst_pte;
> > > > bool writable = dst_vma->vm_flags & VM_WRITE;
> > > > bool vm_shared = dst_vma->vm_flags & VM_SHARED;
> > > > - bool page_in_cache = page->mapping;
> > > > + bool page_in_cache = page_mapping(page);
> > >
> > > We could do:
> > >
> > > struct page *head = compound_head(page);
> > > bool page_in_cache = head->mapping && !PageMappingFlags(head);
> >
> > Sounds good to me, but it just gets a bit complicated.
> >
> > If page_mapping() doesn't sound good, how about we just pass that over from
> > callers? We only have three, so quite doable too.
>
> For what it's worth, I think I like Matthew's version better than the
> original patch. This is because, although page_mapping() looks simpler
> here, looking into the definition of page_mapping() I feel it's
> handling several cases, not all of which are relevant here (or, as
> Matthew points out, would actually be wrong if it were possible to
> reach those cases here).
>
> It's not clear to me what is meant by "pass that over from callers"?
> Do you mean, have callers pass in true/false for page_in_cache
> directly?
Yes.
>
> That could work, but I still think I prefer Matthew's version slightly
> better, if only because this function already takes a lot of
> arguments.
IMHO that's not an issue, we can merge them into flags, cleaning things
alongside.
The simplest so far is still just to use page_mapping() to me, but no
strong opinion here.
If to go with Matthew's patch, it'll be great if we can add a comment
showing what we're doing (something like "Unwrapped page_mapping() but
avoid looking into swap cache" would be good enough to me).
Thanks,
--
Peter Xu
On Tue, Nov 1, 2022 at 11:24 AM Matthew Wilcox <[email protected]> wrote:
>
> On Tue, Nov 01, 2022 at 10:53:23AM -0700, Vishal Moola (Oracle) wrote:
> > Converts the function to try to move folios instead of pages. Also
> > converts fuse_check_page() to fuse_get_folio() since this is its only
> > caller. This change removes 15 calls to compound_head().
>
> This all looks good. I wonder if we should't add an assertion that the
> page we're trying to steal is !large? It seems to me that there are
> assumptions in this part of fuse that it's only dealing with order-0
> pages, and if someone gives it a page that's part of a large folio,
> it's going to be messy. Miklos, any thoughts?
Miklos, could you please look over this patch?
> > Signed-off-by: Vishal Moola (Oracle) <[email protected]>
> > ---
> > fs/fuse/dev.c | 55 ++++++++++++++++++++++++++-------------------------
> > 1 file changed, 28 insertions(+), 27 deletions(-)
> >
> > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> > index 26817a2db463..204c332cd343 100644
> > --- a/fs/fuse/dev.c
> > +++ b/fs/fuse/dev.c
> > @@ -764,11 +764,11 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
> > return ncpy;
> > }
> >
> > -static int fuse_check_page(struct page *page)
> > +static int fuse_check_folio(struct folio *folio)
> > {
> > - if (page_mapcount(page) ||
> > - page->mapping != NULL ||
> > - (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
> > + if (folio_mapped(folio) ||
> > + folio->mapping != NULL ||
> > + (folio->flags & PAGE_FLAGS_CHECK_AT_PREP &
> > ~(1 << PG_locked |
> > 1 << PG_referenced |
> > 1 << PG_uptodate |
> > @@ -778,7 +778,7 @@ static int fuse_check_page(struct page *page)
> > 1 << PG_reclaim |
> > 1 << PG_waiters |
> > LRU_GEN_MASK | LRU_REFS_MASK))) {
> > - dump_page(page, "fuse: trying to steal weird page");
> > + dump_page(&folio->page, "fuse: trying to steal weird page");
> > return 1;
> > }
> > return 0;
> > @@ -787,11 +787,11 @@ static int fuse_check_page(struct page *page)
> > static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
> > {
> > int err;
> > - struct page *oldpage = *pagep;
> > - struct page *newpage;
> > + struct folio *oldfolio = page_folio(*pagep);
> > + struct folio *newfolio;
> > struct pipe_buffer *buf = cs->pipebufs;
> >
> > - get_page(oldpage);
> > + folio_get(oldfolio);
> > err = unlock_request(cs->req);
> > if (err)
> > goto out_put_old;
> > @@ -814,35 +814,36 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
> > if (!pipe_buf_try_steal(cs->pipe, buf))
> > goto out_fallback;
> >
> > - newpage = buf->page;
> > + newfolio = page_folio(buf->page);
> >
> > - if (!PageUptodate(newpage))
> > - SetPageUptodate(newpage);
> > + if (!folio_test_uptodate(newfolio))
> > + folio_mark_uptodate(newfolio);
> >
> > - ClearPageMappedToDisk(newpage);
> > + folio_clear_mappedtodisk(newfolio);
> >
> > - if (fuse_check_page(newpage) != 0)
> > + if (fuse_check_folio(newfolio) != 0)
> > goto out_fallback_unlock;
> >
> > /*
> > * This is a new and locked page, it shouldn't be mapped or
> > * have any special flags on it
> > */
> > - if (WARN_ON(page_mapped(oldpage)))
> > + if (WARN_ON(folio_mapped(oldfolio)))
> > goto out_fallback_unlock;
> > - if (WARN_ON(page_has_private(oldpage)))
> > + if (WARN_ON(folio_has_private(oldfolio)))
> > goto out_fallback_unlock;
> > - if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
> > + if (WARN_ON(folio_test_dirty(oldfolio) ||
> > + folio_test_writeback(oldfolio)))
> > goto out_fallback_unlock;
> > - if (WARN_ON(PageMlocked(oldpage)))
> > + if (WARN_ON(folio_test_mlocked(oldfolio)))
> > goto out_fallback_unlock;
> >
> > - replace_page_cache_folio(page_folio(oldpage), page_folio(newpage));
> > + replace_page_cache_folio(oldfolio, newfolio);
> >
> > - get_page(newpage);
> > + folio_get(newfolio);
> >
> > if (!(buf->flags & PIPE_BUF_FLAG_LRU))
> > - lru_cache_add(newpage);
> > + folio_add_lru(newfolio);
> >
> > /*
> > * Release while we have extra ref on stolen page. Otherwise
> > @@ -855,28 +856,28 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
> > if (test_bit(FR_ABORTED, &cs->req->flags))
> > err = -ENOENT;
> > else
> > - *pagep = newpage;
> > + *pagep = &newfolio->page;
> > spin_unlock(&cs->req->waitq.lock);
> >
> > if (err) {
> > - unlock_page(newpage);
> > - put_page(newpage);
> > + folio_unlock(newfolio);
> > + folio_put(newfolio);
> > goto out_put_old;
> > }
> >
> > - unlock_page(oldpage);
> > + folio_unlock(oldfolio);
> > /* Drop ref for ap->pages[] array */
> > - put_page(oldpage);
> > + folio_put(oldfolio);
> > cs->len = 0;
> >
> > err = 0;
> > out_put_old:
> > /* Drop ref obtained in this function */
> > - put_page(oldpage);
> > + folio_put(oldfolio);
> > return err;
> >
> > out_fallback_unlock:
> > - unlock_page(newpage);
> > + folio_unlock(newfolio);
> > out_fallback:
> > cs->pg = buf->page;
> > cs->offset = buf->offset;
> > --
> > 2.38.1
> >
> >
On Thu, 10 Nov 2022 at 19:36, Vishal Moola <[email protected]> wrote:
>
> On Tue, Nov 1, 2022 at 11:24 AM Matthew Wilcox <[email protected]> wrote:
> >
> > On Tue, Nov 01, 2022 at 10:53:23AM -0700, Vishal Moola (Oracle) wrote:
> > > Converts the function to try to move folios instead of pages. Also
> > > converts fuse_check_page() to fuse_get_folio() since this is its only
> > > caller. This change removes 15 calls to compound_head().
> >
> > This all looks good. I wonder if we should't add an assertion that the
> > page we're trying to steal is !large? It seems to me that there are
> > assumptions in this part of fuse that it's only dealing with order-0
> > pages, and if someone gives it a page that's part of a large folio,
> > it's going to be messy. Miklos, any thoughts?
>
> Miklos, could you please look over this patch?
I think this should take care of the large folio case in fuse_try_move_page():
if (cs->len != PAGE_SIZE)
goto out_fallback;
The patch looks okay.
Acked-by: Miklos Szeredi <[email protected]>
Thanks,
Miklos
On Tue, Nov 1, 2022 at 10:53 AM Vishal Moola (Oracle)
<[email protected]> wrote:
>
> This patchset replaces all calls of lru_cache_add() with the folio
> equivalent: folio_add_lru(). This is allows us to get rid of the wrapper
> The series passes xfstests and the userfaultfd selftests.
All of these patches have been reviewed. Andrew, is there anything
you'd like to see changed, or will you take these?