This RFC patchset attempts to implement a listed filemap TODO which is
changing hugetlb folios to have ->index in PAGE_SIZE. This simplifies many
functions within filemap.c as they have to special case hugetlb pages.
From the last RFC[1], Mike pointed out that hugetlb will have to maintain
a huge page sized index as well because it is used for the reservation
map as well as the hash function for the hugetlb mutex table.
This patchset adds new wrappers for hugetlb code to to interact with the
page cache. These wrappers calculate a linear page index as this is now
what the page cache expects for hugetlb pages as well.
This series passes the LTP hugetlb test cases. I will do more testing
and performance analysis but wanted to get some early feedback if this
approach is acceptable.
[1]:https://lore.kernel.org/lkml/20230425012721.GA6696@monkey/T/
Sidhartha Kumar (2):
mm/filemap: remove hugetlb special casing in filemap.c
mm/hugetlb: add wrapper functions for interactions with page cache
fs/hugetlbfs/inode.c | 14 +++++++-------
include/linux/hugetlb.h | 21 +++++++++++++++++++--
include/linux/pagemap.h | 8 --------
mm/filemap.c | 36 +++++++++++-------------------------
mm/hugetlb.c | 22 +++++++++++++---------
5 files changed, 50 insertions(+), 51 deletions(-)
--
2.40.1
This patch removes special cased hugetlb handling code within the page
cache by changing the granularity of each index to the base page size
rather than the huge page size.
Signed-off-by: Sidhartha Kumar <[email protected]>
---
include/linux/pagemap.h | 6 ------
mm/filemap.c | 36 +++++++++++-------------------------
2 files changed, 11 insertions(+), 31 deletions(-)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c1ae5ebc375fe..7ca967849c2cc 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -719,9 +719,6 @@ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
*/
static inline bool folio_contains(struct folio *folio, pgoff_t index)
{
- /* HugeTLBfs indexes the page cache in units of hpage_size */
- if (folio_test_hugetlb(folio))
- return folio->index == index;
return index - folio_index(folio) < folio_nr_pages(folio);
}
@@ -846,12 +843,9 @@ static inline loff_t folio_file_pos(struct folio *folio)
/*
* Get the offset in PAGE_SIZE (even for hugetlb folios).
- * (TODO: hugetlb folios should have ->index in PAGE_SIZE)
*/
static inline pgoff_t folio_pgoff(struct folio *folio)
{
- if (unlikely(folio_test_hugetlb(folio)))
- return hugetlb_basepage_index(&folio->page);
return folio->index;
}
diff --git a/mm/filemap.c b/mm/filemap.c
index 570bc8c3db878..12eee69240525 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -134,11 +134,8 @@ static void page_cache_delete(struct address_space *mapping,
mapping_set_update(&xas, mapping);
- /* hugetlb pages are represented by a single entry in the xarray */
- if (!folio_test_hugetlb(folio)) {
- xas_set_order(&xas, folio->index, folio_order(folio));
- nr = folio_nr_pages(folio);
- }
+ xas_set_order(&xas, folio->index, folio_order(folio));
+ nr = folio_nr_pages(folio);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
@@ -237,7 +234,7 @@ void filemap_free_folio(struct address_space *mapping, struct folio *folio)
if (free_folio)
free_folio(folio);
- if (folio_test_large(folio) && !folio_test_hugetlb(folio))
+ if (folio_test_large(folio))
refs = folio_nr_pages(folio);
folio_put_refs(folio, refs);
}
@@ -858,14 +855,15 @@ noinline int __filemap_add_folio(struct address_space *mapping,
if (!huge) {
int error = mem_cgroup_charge(folio, NULL, gfp);
- VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
if (error)
return error;
charged = true;
- xas_set_order(&xas, index, folio_order(folio));
- nr = folio_nr_pages(folio);
}
+ VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
+ xas_set_order(&xas, index, folio_order(folio));
+ nr = folio_nr_pages(folio);
+
gfp &= GFP_RECLAIM_MASK;
folio_ref_add(folio, nr);
folio->mapping = mapping;
@@ -2042,7 +2040,7 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t *start,
int idx = folio_batch_count(fbatch) - 1;
folio = fbatch->folios[idx];
- if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
+ if (!xa_is_value(folio))
nr = folio_nr_pages(folio);
*start = indices[idx] + nr;
}
@@ -2106,7 +2104,7 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
int idx = folio_batch_count(fbatch) - 1;
folio = fbatch->folios[idx];
- if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
+ if (!xa_is_value(folio))
nr = folio_nr_pages(folio);
*start = indices[idx] + nr;
}
@@ -2147,9 +2145,6 @@ unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
continue;
if (!folio_batch_add(fbatch, folio)) {
unsigned long nr = folio_nr_pages(folio);
-
- if (folio_test_hugetlb(folio))
- nr = 1;
*start = folio->index + nr;
goto out;
}
@@ -2175,7 +2170,7 @@ EXPORT_SYMBOL(filemap_get_folios);
static inline
bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max)
{
- if (!folio_test_large(folio) || folio_test_hugetlb(folio))
+ if (!folio_test_large(folio))
return false;
if (index >= max)
return false;
@@ -2225,9 +2220,6 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
if (!folio_batch_add(fbatch, folio)) {
nr = folio_nr_pages(folio);
-
- if (folio_test_hugetlb(folio))
- nr = 1;
*start = folio->index + nr;
goto out;
}
@@ -2244,10 +2236,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
if (nr) {
folio = fbatch->folios[nr - 1];
- if (folio_test_hugetlb(folio))
- *start = folio->index + 1;
- else
- *start = folio->index + folio_nr_pages(folio);
+ *start = folio->index + folio_nr_pages(folio);
}
out:
rcu_read_unlock();
@@ -2285,9 +2274,6 @@ unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
continue;
if (!folio_batch_add(fbatch, folio)) {
unsigned long nr = folio_nr_pages(folio);
-
- if (folio_test_hugetlb(folio))
- nr = 1;
*start = folio->index + nr;
goto out;
}
--
2.40.1
Add filemap_lock_hugetlb_folio() which is wraps __filemap_get_folio()
and passes in a linear page index. hugetlb_add_to_page_cache() is modified
to also compute a linear page index before calling into page cache code.
linear_page_index() is modified to perform the computation on hugetlb
so we can use it in the page cache wrappers.
Signed-off-by: Sidhartha Kumar <[email protected]>
---
fs/hugetlbfs/inode.c | 14 +++++++-------
include/linux/hugetlb.h | 21 +++++++++++++++++++--
include/linux/pagemap.h | 2 --
mm/hugetlb.c | 22 +++++++++++++---------
4 files changed, 39 insertions(+), 20 deletions(-)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 90361a922cec7..204a9510510f1 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -617,20 +617,19 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
struct hstate *h = hstate_inode(inode);
struct address_space *mapping = &inode->i_data;
const pgoff_t start = lstart >> huge_page_shift(h);
- const pgoff_t end = lend >> huge_page_shift(h);
struct folio_batch fbatch;
pgoff_t next, index;
int i, freed = 0;
bool truncate_op = (lend == LLONG_MAX);
folio_batch_init(&fbatch);
- next = start;
- while (filemap_get_folios(mapping, &next, end - 1, &fbatch)) {
+ next = lstart;
+ while (filemap_get_folios(mapping, &next, lend - 1, &fbatch)) {
for (i = 0; i < folio_batch_count(&fbatch); ++i) {
struct folio *folio = fbatch.folios[i];
u32 hash = 0;
- index = folio->index;
+ index = folio->index >> huge_page_shift(h);
hash = hugetlb_fault_mutex_hash(mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
@@ -693,10 +692,11 @@ static void hugetlbfs_zero_partial_page(struct hstate *h,
loff_t start,
loff_t end)
{
- pgoff_t idx = start >> huge_page_shift(h);
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma = find_vma(mm, start);
struct folio *folio;
- folio = filemap_lock_folio(mapping, idx);
+ folio = filemap_lock_hugetlb_folio(vma, start);
if (IS_ERR(folio))
return;
@@ -868,7 +868,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
}
clear_huge_page(&folio->page, addr, pages_per_huge_page(h));
__folio_mark_uptodate(folio);
- error = hugetlb_add_to_page_cache(folio, mapping, index);
+ error = hugetlb_add_to_page_cache(folio, &pseudo_vma, mapping, addr);
if (unlikely(error)) {
restore_reserve_on_error(h, &pseudo_vma, addr, folio);
folio_put(folio);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 21f942025fecd..55f90e051b7a2 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -727,8 +727,8 @@ struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask);
struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma,
unsigned long address);
-int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
- pgoff_t idx);
+int hugetlb_add_to_page_cache(struct folio *folio, struct vm_area_struct *vma,
+ struct address_space *mapping, unsigned long address);
void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
unsigned long address, struct folio *folio);
@@ -755,6 +755,16 @@ static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio
return folio->_hugetlb_subpool;
}
+/* Wrapper function for __filemap_get_folio*/
+static inline struct folio *filemap_lock_hugetlb_folio(struct vm_area_struct *vma,
+ unsigned long address)
+{
+ struct address_space *mapping = vma->vm_file->f_mapping;
+
+ pgoff_t idx = linear_page_index(vma, address);
+ return __filemap_get_folio(mapping, idx, FGP_LOCK, 0);
+}
+
static inline void hugetlb_set_folio_subpool(struct folio *folio,
struct hugepage_subpool *subpool)
{
@@ -1021,6 +1031,13 @@ static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio
return NULL;
}
+/* Wrapper function for __filemap_get_folio*/
+static inline struct folio *filemap_lock_hugetlb_folio(struct vm_area_struct *vma,
+ unsigned long address)
+{
+ return NULL;
+}
+
static inline int isolate_or_dissolve_huge_page(struct page *page,
struct list_head *list)
{
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 7ca967849c2cc..86f7b180d2521 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -856,8 +856,6 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
unsigned long address)
{
pgoff_t pgoff;
- if (unlikely(is_vm_hugetlb_page(vma)))
- return linear_hugepage_index(vma, address);
pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
pgoff += vma->vm_pgoff;
return pgoff;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ea24718db4aff..5abab61af0ca5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -951,7 +951,7 @@ static long region_count(struct resv_map *resv, long f, long t)
/*
* Convert the address within this vma to the page offset within
- * the mapping, in pagecache page units; huge pages here.
+ * the mapping, huge page units here.
*/
static pgoff_t vma_hugecache_offset(struct hstate *h,
struct vm_area_struct *vma, unsigned long address)
@@ -5730,7 +5730,7 @@ static bool hugetlbfs_pagecache_present(struct hstate *h,
struct vm_area_struct *vma, unsigned long address)
{
struct address_space *mapping = vma->vm_file->f_mapping;
- pgoff_t idx = vma_hugecache_offset(h, vma, address);
+ pgoff_t idx = linear_page_index(vma, address);
bool present;
rcu_read_lock();
@@ -5740,13 +5740,16 @@ static bool hugetlbfs_pagecache_present(struct hstate *h,
return present;
}
-int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
- pgoff_t idx)
+int hugetlb_add_to_page_cache(struct folio *folio,
+ struct vm_area_struct *vma,
+ struct address_space *mapping,
+ unsigned long address)
{
struct inode *inode = mapping->host;
struct hstate *h = hstate_inode(inode);
int err;
+ pgoff_t idx = linear_page_index(vma, address);
__folio_set_locked(folio);
err = __filemap_add_folio(mapping, folio, idx, GFP_KERNEL, NULL);
@@ -5854,7 +5857,8 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
* before we get page_table_lock.
*/
new_folio = false;
- folio = filemap_lock_folio(mapping, idx);
+
+ folio = filemap_lock_hugetlb_folio(vma, address);
if (IS_ERR(folio)) {
size = i_size_read(mapping->host) >> huge_page_shift(h);
if (idx >= size)
@@ -5913,7 +5917,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
new_folio = true;
if (vma->vm_flags & VM_MAYSHARE) {
- int err = hugetlb_add_to_page_cache(folio, mapping, idx);
+ int err = hugetlb_add_to_page_cache(folio, vma, mapping, address);
if (err) {
/*
* err can't be -EEXIST which implies someone
@@ -6145,7 +6149,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
/* Just decrements count, does not deallocate */
vma_end_reservation(h, vma, haddr);
- pagecache_folio = filemap_lock_folio(mapping, idx);
+ pagecache_folio = filemap_lock_hugetlb_folio(vma, address);
if (IS_ERR(pagecache_folio))
pagecache_folio = NULL;
}
@@ -6258,7 +6262,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
if (is_continue) {
ret = -EFAULT;
- folio = filemap_lock_folio(mapping, idx);
+ folio = filemap_lock_hugetlb_folio(dst_vma, dst_addr);
if (IS_ERR(folio))
goto out;
folio_in_pagecache = true;
@@ -6350,7 +6354,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
* hugetlb_fault_mutex_table that here must be hold by
* the caller.
*/
- ret = hugetlb_add_to_page_cache(folio, mapping, idx);
+ ret = hugetlb_add_to_page_cache(folio, dst_vma, mapping, dst_addr);
if (ret)
goto out_release_nounlock;
folio_in_pagecache = true;
--
2.40.1