============== OVERVIEW ===========================
This series continues the conversion of core hugetlb functions to use
folios. This series converts many helper funtions in the hugetlb fault
path. This is in preperation for another series to convert the hugetlb
fault code paths to operate on folios.
============== TESTING ===========================
LTP:
Ran 10 back to back rounds of the LTP hugetlb test suite.
Gigantic Huge Pages:
Test allocation and freeing via hugeadm commands:
hugeadm --pool-pages-min 1GB:10
hugeadm --pool-pages-min 1GB:0
Demote:
Demote 1 1GB hugepages to 512 2MB hugepages
echo 1 > /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
echo 1 > /sys/kernel/mm/hugepages/hugepages-1048576kB/demote
cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
# 512
cat /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
# 0
Rebased on 1/3/2023 mm-unstable
Sidhartha Kumar (8):
mm/hugetlb: convert isolate_hugetlb to folios
mm/hugetlb: convert __update_and_free_page() to folios
mm/hugetlb: convert dequeue_hugetlb_page functions to folios
mm/hugetlb: convert alloc_surplus_huge_page() to folios
mm/hugetlb: increase use of folios in alloc_huge_page()
mm/hugetlb: convert alloc_migrate_huge_page to folios
mm/hugetlb: convert restore_reserve_on_error() to folios
mm/hugetlb: convert demote_free_huge_page to folios
include/linux/hugetlb.h | 10 +-
include/linux/hugetlb_cgroup.h | 8 +-
include/linux/mm.h | 5 +
mm/gup.c | 2 +-
mm/hugetlb.c | 213 +++++++++++++++++----------------
mm/hugetlb_cgroup.c | 8 +-
mm/memory-failure.c | 2 +-
mm/memory_hotplug.c | 2 +-
mm/mempolicy.c | 2 +-
mm/migrate.c | 7 +-
10 files changed, 136 insertions(+), 123 deletions(-)
--
2.39.0
Convert isolate_hugetlb() to take in a folio and convert its callers to
pass a folio. Using page_folio() to convert the callers to use a folio is
safe as isolate_hugetlb() operates on a head page.
Also add a folio equivalent of get_page_unless_zero().
Signed-off-by: Sidhartha Kumar <[email protected]>
---
include/linux/hugetlb.h | 4 ++--
include/linux/mm.h | 5 +++++
mm/gup.c | 2 +-
mm/hugetlb.c | 16 ++++++++--------
mm/memory-failure.c | 2 +-
mm/memory_hotplug.c | 2 +-
mm/mempolicy.c | 2 +-
mm/migrate.c | 2 +-
8 files changed, 20 insertions(+), 15 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 551834cd5299..482929b2d044 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -169,7 +169,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
vm_flags_t vm_flags);
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
-int isolate_hugetlb(struct page *page, struct list_head *list);
+int isolate_hugetlb(struct folio *folio, struct list_head *list);
int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison);
int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared);
@@ -374,7 +374,7 @@ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
return NULL;
}
-static inline int isolate_hugetlb(struct page *page, struct list_head *list)
+static inline int isolate_hugetlb(struct folio *folio, struct list_head *list)
{
return -EBUSY;
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e2dd5a37d078..cd8508d728f1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -775,6 +775,11 @@ static inline bool get_page_unless_zero(struct page *page)
return page_ref_add_unless(page, 1, 0);
}
+static inline bool get_folio_unless_zero(struct folio *folio)
+{
+ return folio_ref_add_unless(folio, 1, 0);
+}
+
extern int page_is_ram(unsigned long pfn);
enum {
diff --git a/mm/gup.c b/mm/gup.c
index 5182abaaecde..bdb00b9df89e 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1843,7 +1843,7 @@ static unsigned long collect_longterm_unpinnable_pages(
continue;
if (folio_test_hugetlb(folio)) {
- isolate_hugetlb(&folio->page, movable_page_list);
+ isolate_hugetlb(folio, movable_page_list);
continue;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0c58f6519b9a..90c6f0402c7b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2781,7 +2781,7 @@ static int alloc_and_dissolve_hugetlb_folio(struct hstate *h,
* Fail with -EBUSY if not possible.
*/
spin_unlock_irq(&hugetlb_lock);
- ret = isolate_hugetlb(&old_folio->page, list);
+ ret = isolate_hugetlb(old_folio, list);
spin_lock_irq(&hugetlb_lock);
goto free_new;
} else if (!folio_test_hugetlb_freed(old_folio)) {
@@ -2856,7 +2856,7 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
if (hstate_is_gigantic(h))
return -ENOMEM;
- if (folio_ref_count(folio) && !isolate_hugetlb(&folio->page, list))
+ if (folio_ref_count(folio) && !isolate_hugetlb(folio, list))
ret = 0;
else if (!folio_ref_count(folio))
ret = alloc_and_dissolve_hugetlb_folio(h, folio, list);
@@ -7271,19 +7271,19 @@ __weak unsigned long hugetlb_mask_last_page(struct hstate *h)
* These functions are overwritable if your architecture needs its own
* behavior.
*/
-int isolate_hugetlb(struct page *page, struct list_head *list)
+int isolate_hugetlb(struct folio *folio, struct list_head *list)
{
int ret = 0;
spin_lock_irq(&hugetlb_lock);
- if (!PageHeadHuge(page) ||
- !HPageMigratable(page) ||
- !get_page_unless_zero(page)) {
+ if (!folio_test_hugetlb(folio) ||
+ !folio_test_hugetlb_migratable(folio) ||
+ !get_folio_unless_zero(folio)) {
ret = -EBUSY;
goto unlock;
}
- ClearHPageMigratable(page);
- list_move_tail(&page->lru, list);
+ folio_clear_hugetlb_migratable(folio);
+ list_move_tail(&folio->lru, list);
unlock:
spin_unlock_irq(&hugetlb_lock);
return ret;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 63d8501001c6..cf60c0fa795c 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -2438,7 +2438,7 @@ static bool isolate_page(struct page *page, struct list_head *pagelist)
bool isolated = false;
if (PageHuge(page)) {
- isolated = !isolate_hugetlb(page, pagelist);
+ isolated = !isolate_hugetlb(page_folio(page), pagelist);
} else {
bool lru = !__PageMovable(page);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index fd40f7e9f176..a1e8c3e9ab08 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1641,7 +1641,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
if (PageHuge(page)) {
pfn = page_to_pfn(head) + compound_nr(head) - 1;
- isolate_hugetlb(head, &source);
+ isolate_hugetlb(folio, &source);
continue;
} else if (PageTransHuge(page))
pfn = page_to_pfn(head) + thp_nr_pages(page) - 1;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 61aa9aedb728..4e62b26539c9 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -601,7 +601,7 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
if (flags & (MPOL_MF_MOVE_ALL) ||
(flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) {
- if (isolate_hugetlb(page, qp->pagelist) &&
+ if (isolate_hugetlb(page_folio(page), qp->pagelist) &&
(flags & MPOL_MF_STRICT))
/*
* Failed to isolate page but allow migrating pages
diff --git a/mm/migrate.c b/mm/migrate.c
index 4aea647a0180..6932b3d5a9dd 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1732,7 +1732,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
if (PageHuge(page)) {
if (PageHead(page)) {
- err = isolate_hugetlb(page, pagelist);
+ err = isolate_hugetlb(page_folio(page), pagelist);
if (!err)
err = 1;
}
--
2.39.0
dequeue_huge_page_node_exact() is changed to dequeue_hugetlb_folio_node_
exact() and dequeue_huge_page_nodemask() is changed to dequeue_hugetlb_
folio_nodemask(). Update their callers to pass in a folio.
Signed-off-by: Sidhartha Kumar <[email protected]>
---
mm/hugetlb.c | 55 +++++++++++++++++++++++++++++-----------------------
1 file changed, 31 insertions(+), 24 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b06ec8d60794..8dffb77d3510 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1140,32 +1140,36 @@ static void enqueue_hugetlb_folio(struct hstate *h, struct folio *folio)
folio_set_hugetlb_freed(folio);
}
-static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
+static struct folio *dequeue_hugetlb_folio_node_exact(struct hstate *h,
+ int nid)
{
struct page *page;
+ struct folio *folio;
bool pin = !!(current->flags & PF_MEMALLOC_PIN);
lockdep_assert_held(&hugetlb_lock);
list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
- if (pin && !is_longterm_pinnable_page(page))
+ folio = page_folio(page);
+
+ if (pin && !folio_is_longterm_pinnable(folio))
continue;
- if (PageHWPoison(page))
+ if (folio_test_hwpoison(folio))
continue;
- list_move(&page->lru, &h->hugepage_activelist);
- set_page_refcounted(page);
- ClearHPageFreed(page);
+ list_move(&folio->lru, &h->hugepage_activelist);
+ folio_ref_unfreeze(folio, 1);
+ folio_clear_hugetlb_freed(folio);
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
- return page;
+ return folio;
}
return NULL;
}
-static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, int nid,
- nodemask_t *nmask)
+static struct folio *dequeue_hugetlb_folio_nodemask(struct hstate *h, gfp_t gfp_mask,
+ int nid, nodemask_t *nmask)
{
unsigned int cpuset_mems_cookie;
struct zonelist *zonelist;
@@ -1178,7 +1182,7 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask,
retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nmask) {
- struct page *page;
+ struct folio *folio;
if (!cpuset_zone_allowed(zone, gfp_mask))
continue;
@@ -1190,9 +1194,9 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask,
continue;
node = zone_to_nid(zone);
- page = dequeue_huge_page_node_exact(h, node);
- if (page)
- return page;
+ folio = dequeue_hugetlb_folio_node_exact(h, node);
+ if (folio)
+ return folio;
}
if (unlikely(read_mems_allowed_retry(cpuset_mems_cookie)))
goto retry_cpuset;
@@ -1210,7 +1214,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
unsigned long address, int avoid_reserve,
long chg)
{
- struct page *page = NULL;
+ struct folio *folio = NULL;
struct mempolicy *mpol;
gfp_t gfp_mask;
nodemask_t *nodemask;
@@ -1232,22 +1236,24 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
if (mpol_is_preferred_many(mpol)) {
- page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+ folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask,
+ nid, nodemask);
/* Fallback to all nodes if page==NULL */
nodemask = NULL;
}
- if (!page)
- page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+ if (!folio)
+ folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask,
+ nid, nodemask);
- if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
- SetHPageRestoreReserve(page);
+ if (folio && !avoid_reserve && vma_has_reserves(vma, chg)) {
+ folio_set_hugetlb_restore_reserve(folio);
h->resv_huge_pages--;
}
mpol_cond_put(mpol);
- return page;
+ return &folio->page;
err:
return NULL;
@@ -2331,12 +2337,13 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
{
spin_lock_irq(&hugetlb_lock);
if (available_huge_pages(h)) {
- struct page *page;
+ struct folio *folio;
- page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask);
- if (page) {
+ folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask,
+ preferred_nid, nmask);
+ if (folio) {
spin_unlock_irq(&hugetlb_lock);
- return page;
+ return &folio->page;
}
}
spin_unlock_irq(&hugetlb_lock);
--
2.39.0
Use the hugetlb folio flag macros inside restore_reserve_on_error() and
update the comments to reflect the use of folios.
Signed-off-by: Sidhartha Kumar <[email protected]>
---
mm/hugetlb.c | 27 ++++++++++++++-------------
1 file changed, 14 insertions(+), 13 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0db01718d1c3..2bb69b098117 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2678,22 +2678,23 @@ static long vma_del_reservation(struct hstate *h,
void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
unsigned long address, struct page *page)
{
+ struct folio *folio = page_folio(page);
long rc = vma_needs_reservation(h, vma, address);
- if (HPageRestoreReserve(page)) {
+ if (folio_test_hugetlb_restore_reserve(folio)) {
if (unlikely(rc < 0))
/*
* Rare out of memory condition in reserve map
- * manipulation. Clear HPageRestoreReserve so that
- * global reserve count will not be incremented
+ * manipulation. Clear hugetlb_restore_reserve so
+ * that global reserve count will not be incremented
* by free_huge_page. This will make it appear
- * as though the reservation for this page was
+ * as though the reservation for this folio was
* consumed. This may prevent the task from
- * faulting in the page at a later time. This
+ * faulting in the folio at a later time. This
* is better than inconsistent global huge page
* accounting of reserve counts.
*/
- ClearHPageRestoreReserve(page);
+ folio_clear_hugetlb_restore_reserve(folio);
else if (rc)
(void)vma_add_reservation(h, vma, address);
else
@@ -2704,7 +2705,7 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
* This indicates there is an entry in the reserve map
* not added by alloc_huge_page. We know it was added
* before the alloc_huge_page call, otherwise
- * HPageRestoreReserve would be set on the page.
+ * hugetlb_restore_reserve would be set on the folio.
* Remove the entry so that a subsequent allocation
* does not consume a reservation.
*/
@@ -2713,12 +2714,12 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
/*
* VERY rare out of memory condition. Since
* we can not delete the entry, set
- * HPageRestoreReserve so that the reserve
- * count will be incremented when the page
+ * hugetlb_restore_reserve so that the reserve
+ * count will be incremented when the folio
* is freed. This reserve will be consumed
* on a subsequent allocation.
*/
- SetHPageRestoreReserve(page);
+ folio_set_hugetlb_restore_reserve(folio);
} else if (rc < 0) {
/*
* Rare out of memory condition from
@@ -2734,12 +2735,12 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
/*
* For private mappings, no entry indicates
* a reservation is present. Since we can
- * not add an entry, set SetHPageRestoreReserve
- * on the page so reserve count will be
+ * not add an entry, set hugetlb_restore_reserve
+ * on the folio so reserve count will be
* incremented when freed. This reserve will
* be consumed on a subsequent allocation.
*/
- SetHPageRestoreReserve(page);
+ folio_set_hugetlb_restore_reserve(folio);
} else
/*
* No reservation present, do nothing
--
2.39.0
Change hugetlb_cgroup_commit_charge{,_rsvd}(), dequeue_huge_page_vma()
and alloc_buddy_huge_page_with_mpol() to use folios so alloc_huge_page()
is cleaned by operating on folios until its return.
Signed-off-by: Sidhartha Kumar <[email protected]>
---
include/linux/hugetlb_cgroup.h | 8 ++++----
mm/hugetlb.c | 33 ++++++++++++++++-----------------
mm/hugetlb_cgroup.c | 8 ++------
3 files changed, 22 insertions(+), 27 deletions(-)
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index f706626a8063..3d82d91f49ac 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -141,10 +141,10 @@ extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
struct hugetlb_cgroup **ptr);
extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg,
- struct page *page);
+ struct folio *folio);
extern void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg,
- struct page *page);
+ struct folio *folio);
extern void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages,
struct folio *folio);
extern void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages,
@@ -230,14 +230,14 @@ static inline int hugetlb_cgroup_charge_cgroup_rsvd(int idx,
static inline void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg,
- struct page *page)
+ struct folio *folio)
{
}
static inline void
hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg,
- struct page *page)
+ struct folio *folio)
{
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0b8bab52bc7e..640ca4eaccf2 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1209,7 +1209,7 @@ static unsigned long available_huge_pages(struct hstate *h)
return h->free_huge_pages - h->resv_huge_pages;
}
-static struct page *dequeue_huge_page_vma(struct hstate *h,
+static struct folio *dequeue_hugetlb_folio_vma(struct hstate *h,
struct vm_area_struct *vma,
unsigned long address, int avoid_reserve,
long chg)
@@ -1253,7 +1253,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
}
mpol_cond_put(mpol);
- return &folio->page;
+ return folio;
err:
return NULL;
@@ -2305,7 +2305,7 @@ static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
* Use the VMA's mpolicy to allocate a huge page from the buddy.
*/
static
-struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
+struct folio *alloc_buddy_hugetlb_folio_with_mpol(struct hstate *h,
struct vm_area_struct *vma, unsigned long addr)
{
struct folio *folio = NULL;
@@ -2328,7 +2328,7 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
if (!folio)
folio = alloc_surplus_hugetlb_folio(h, gfp_mask, nid, nodemask);
mpol_cond_put(mpol);
- return &folio->page;
+ return folio;
}
/* page migration callback function */
@@ -2877,7 +2877,6 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
{
struct hugepage_subpool *spool = subpool_vma(vma);
struct hstate *h = hstate_vma(vma);
- struct page *page;
struct folio *folio;
long map_chg, map_commit;
long gbl_chg;
@@ -2941,34 +2940,34 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
* from the global free pool (global change). gbl_chg == 0 indicates
* a reservation exists for the allocation.
*/
- page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
- if (!page) {
+ folio = dequeue_hugetlb_folio_vma(h, vma, addr, avoid_reserve, gbl_chg);
+ if (!folio) {
spin_unlock_irq(&hugetlb_lock);
- page = alloc_buddy_huge_page_with_mpol(h, vma, addr);
- if (!page)
+ folio = alloc_buddy_hugetlb_folio_with_mpol(h, vma, addr);
+ if (!folio)
goto out_uncharge_cgroup;
spin_lock_irq(&hugetlb_lock);
if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
- SetHPageRestoreReserve(page);
+ folio_set_hugetlb_restore_reserve(folio);
h->resv_huge_pages--;
}
- list_add(&page->lru, &h->hugepage_activelist);
- set_page_refcounted(page);
+ list_add(&folio->lru, &h->hugepage_activelist);
+ folio_ref_unfreeze(folio, 1);
/* Fall through */
}
- folio = page_folio(page);
- hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
+
+ hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, folio);
/* If allocation is not consuming a reservation, also store the
* hugetlb_cgroup pointer on the page.
*/
if (deferred_reserve) {
hugetlb_cgroup_commit_charge_rsvd(idx, pages_per_huge_page(h),
- h_cg, page);
+ h_cg, folio);
}
spin_unlock_irq(&hugetlb_lock);
- hugetlb_set_page_subpool(page, spool);
+ hugetlb_set_folio_subpool(folio, spool);
map_commit = vma_commit_reservation(h, vma, addr);
if (unlikely(map_chg > map_commit)) {
@@ -2989,7 +2988,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
pages_per_huge_page(h), folio);
}
- return page;
+ return &folio->page;
out_uncharge_cgroup:
hugetlb_cgroup_uncharge_cgroup(idx, pages_per_huge_page(h), h_cg);
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index d9e4425d81ac..dedd2edb076e 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -331,19 +331,15 @@ static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg,
- struct page *page)
+ struct folio *folio)
{
- struct folio *folio = page_folio(page);
-
__hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, false);
}
void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg,
- struct page *page)
+ struct folio *folio)
{
- struct folio *folio = page_folio(page);
-
__hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, true);
}
--
2.39.0
On Tue, Jan 03, 2023 at 01:13:35PM -0600, Sidhartha Kumar wrote:
> +static struct folio *dequeue_hugetlb_folio_node_exact(struct hstate *h,
> + int nid)
> {
> struct page *page;
> + struct folio *folio;
> bool pin = !!(current->flags & PF_MEMALLOC_PIN);
>
> lockdep_assert_held(&hugetlb_lock);
> list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
> - if (pin && !is_longterm_pinnable_page(page))
> + folio = page_folio(page);
I'd argue that you can pull folios directly off the hugepage_freelists.
Since they're attached through the 'lru', you know they're not tail
pages, because lru.prev aliases with compound_head.
The rest looks good.
On Tue, Jan 03, 2023 at 01:13:33PM -0600, Sidhartha Kumar wrote:
> +++ b/include/linux/mm.h
> @@ -775,6 +775,11 @@ static inline bool get_page_unless_zero(struct page *page)
> return page_ref_add_unless(page, 1, 0);
> }
>
> +static inline bool get_folio_unless_zero(struct folio *folio)
> +{
> + return folio_ref_add_unless(folio, 1, 0);
> +}
> +
I think that's folio_try_get() in linux/page_ref.h.
The rest looks good though.
On 01/03/23 13:13, Sidhartha Kumar wrote:
> Convert isolate_hugetlb() to take in a folio and convert its callers to
> pass a folio. Using page_folio() to convert the callers to use a folio is
> safe as isolate_hugetlb() operates on a head page.
>
> Also add a folio equivalent of get_page_unless_zero().
>
> Signed-off-by: Sidhartha Kumar <[email protected]>
> ---
> include/linux/hugetlb.h | 4 ++--
> include/linux/mm.h | 5 +++++
> mm/gup.c | 2 +-
> mm/hugetlb.c | 16 ++++++++--------
> mm/memory-failure.c | 2 +-
> mm/memory_hotplug.c | 2 +-
> mm/mempolicy.c | 2 +-
> mm/migrate.c | 2 +-
> 8 files changed, 20 insertions(+), 15 deletions(-)
The hugetlb parts look fine to me. If you address the get_folio_unless_zero
issue pointed out by Matthew,
Reviewed-by: Mike Kravetz <[email protected]>
--
Mike Kravetz
On 01/03/23 21:00, Matthew Wilcox wrote:
> On Tue, Jan 03, 2023 at 01:13:35PM -0600, Sidhartha Kumar wrote:
> > +static struct folio *dequeue_hugetlb_folio_node_exact(struct hstate *h,
> > + int nid)
> > {
> > struct page *page;
> > + struct folio *folio;
> > bool pin = !!(current->flags & PF_MEMALLOC_PIN);
> >
> > lockdep_assert_held(&hugetlb_lock);
> > list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
> > - if (pin && !is_longterm_pinnable_page(page))
> > + folio = page_folio(page);
>
> I'd argue that you can pull folios directly off the hugepage_freelists.
> Since they're attached through the 'lru', you know they're not tail
> pages, because lru.prev aliases with compound_head.
Yes, then we can get rid of the local variable *page.
A quick grep shows only the routine __mem_cgroup_uncharge_list() does
this today.
--
Mike Kravetz
On 01/03/23 13:13, Sidhartha Kumar wrote:
> Change hugetlb_cgroup_commit_charge{,_rsvd}(), dequeue_huge_page_vma()
> and alloc_buddy_huge_page_with_mpol() to use folios
Nice that the only 'conversion' was to eliminate the page to folio or
folio to page calls in those routines.
> so alloc_huge_page()
> is cleaned by operating on folios until its return.
>
> Signed-off-by: Sidhartha Kumar <[email protected]>
> ---
> include/linux/hugetlb_cgroup.h | 8 ++++----
> mm/hugetlb.c | 33 ++++++++++++++++-----------------
> mm/hugetlb_cgroup.c | 8 ++------
> 3 files changed, 22 insertions(+), 27 deletions(-)
Thanks,
Reviewed-by: Mike Kravetz <[email protected]>
--
Mike Kravetz
>
> diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
> index f706626a8063..3d82d91f49ac 100644
> --- a/include/linux/hugetlb_cgroup.h
> +++ b/include/linux/hugetlb_cgroup.h
> @@ -141,10 +141,10 @@ extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
> struct hugetlb_cgroup **ptr);
> extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
> struct hugetlb_cgroup *h_cg,
> - struct page *page);
> + struct folio *folio);
> extern void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
> struct hugetlb_cgroup *h_cg,
> - struct page *page);
> + struct folio *folio);
> extern void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages,
> struct folio *folio);
> extern void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages,
> @@ -230,14 +230,14 @@ static inline int hugetlb_cgroup_charge_cgroup_rsvd(int idx,
>
> static inline void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
> struct hugetlb_cgroup *h_cg,
> - struct page *page)
> + struct folio *folio)
> {
> }
>
> static inline void
> hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
> struct hugetlb_cgroup *h_cg,
> - struct page *page)
> + struct folio *folio)
> {
> }
>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 0b8bab52bc7e..640ca4eaccf2 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1209,7 +1209,7 @@ static unsigned long available_huge_pages(struct hstate *h)
> return h->free_huge_pages - h->resv_huge_pages;
> }
>
> -static struct page *dequeue_huge_page_vma(struct hstate *h,
> +static struct folio *dequeue_hugetlb_folio_vma(struct hstate *h,
> struct vm_area_struct *vma,
> unsigned long address, int avoid_reserve,
> long chg)
> @@ -1253,7 +1253,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
> }
>
> mpol_cond_put(mpol);
> - return &folio->page;
> + return folio;
>
> err:
> return NULL;
> @@ -2305,7 +2305,7 @@ static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
> * Use the VMA's mpolicy to allocate a huge page from the buddy.
> */
> static
> -struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
> +struct folio *alloc_buddy_hugetlb_folio_with_mpol(struct hstate *h,
> struct vm_area_struct *vma, unsigned long addr)
> {
> struct folio *folio = NULL;
> @@ -2328,7 +2328,7 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
> if (!folio)
> folio = alloc_surplus_hugetlb_folio(h, gfp_mask, nid, nodemask);
> mpol_cond_put(mpol);
> - return &folio->page;
> + return folio;
> }
>
> /* page migration callback function */
> @@ -2877,7 +2877,6 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
> {
> struct hugepage_subpool *spool = subpool_vma(vma);
> struct hstate *h = hstate_vma(vma);
> - struct page *page;
> struct folio *folio;
> long map_chg, map_commit;
> long gbl_chg;
> @@ -2941,34 +2940,34 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
> * from the global free pool (global change). gbl_chg == 0 indicates
> * a reservation exists for the allocation.
> */
> - page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
> - if (!page) {
> + folio = dequeue_hugetlb_folio_vma(h, vma, addr, avoid_reserve, gbl_chg);
> + if (!folio) {
> spin_unlock_irq(&hugetlb_lock);
> - page = alloc_buddy_huge_page_with_mpol(h, vma, addr);
> - if (!page)
> + folio = alloc_buddy_hugetlb_folio_with_mpol(h, vma, addr);
> + if (!folio)
> goto out_uncharge_cgroup;
> spin_lock_irq(&hugetlb_lock);
> if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
> - SetHPageRestoreReserve(page);
> + folio_set_hugetlb_restore_reserve(folio);
> h->resv_huge_pages--;
> }
> - list_add(&page->lru, &h->hugepage_activelist);
> - set_page_refcounted(page);
> + list_add(&folio->lru, &h->hugepage_activelist);
> + folio_ref_unfreeze(folio, 1);
> /* Fall through */
> }
> - folio = page_folio(page);
> - hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
> +
> + hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, folio);
> /* If allocation is not consuming a reservation, also store the
> * hugetlb_cgroup pointer on the page.
> */
> if (deferred_reserve) {
> hugetlb_cgroup_commit_charge_rsvd(idx, pages_per_huge_page(h),
> - h_cg, page);
> + h_cg, folio);
> }
>
> spin_unlock_irq(&hugetlb_lock);
>
> - hugetlb_set_page_subpool(page, spool);
> + hugetlb_set_folio_subpool(folio, spool);
>
> map_commit = vma_commit_reservation(h, vma, addr);
> if (unlikely(map_chg > map_commit)) {
> @@ -2989,7 +2988,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
> hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
> pages_per_huge_page(h), folio);
> }
> - return page;
> + return &folio->page;
>
> out_uncharge_cgroup:
> hugetlb_cgroup_uncharge_cgroup(idx, pages_per_huge_page(h), h_cg);
> diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
> index d9e4425d81ac..dedd2edb076e 100644
> --- a/mm/hugetlb_cgroup.c
> +++ b/mm/hugetlb_cgroup.c
> @@ -331,19 +331,15 @@ static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
>
> void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
> struct hugetlb_cgroup *h_cg,
> - struct page *page)
> + struct folio *folio)
> {
> - struct folio *folio = page_folio(page);
> -
> __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, false);
> }
>
> void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
> struct hugetlb_cgroup *h_cg,
> - struct page *page)
> + struct folio *folio)
> {
> - struct folio *folio = page_folio(page);
> -
> __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, true);
> }
>
> --
> 2.39.0
>
On 01/03/23 13:13, Sidhartha Kumar wrote:
> Use the hugetlb folio flag macros inside restore_reserve_on_error() and
> update the comments to reflect the use of folios.
>
> Signed-off-by: Sidhartha Kumar <[email protected]>
> ---
> mm/hugetlb.c | 27 ++++++++++++++-------------
> 1 file changed, 14 insertions(+), 13 deletions(-)
Looks fine,
Reviewed-by: Mike Kravetz <[email protected]>
--
Mike Kravetz