Hi everyone,
This series contains a few patches to remove unneeded lock page and
PageMovable check, reduce the rcu lock duration. Also we fix potential
pte_unmap on an not mapped pte. More details can be found in the
respective changelogs. Thanks!
---
v3:
[PATCH 1/4] mm/migration: reduce the rcu lock duration
add the analysis that the original race condition isn't possible now
alsoreduce the rcu lock duration in kernel_migrate_pages
[PATCH 2/4] mm/migration: remove unneeded lock page and PageMovable check
let free_pages_prepare() clear PG_isolated for us
[PATCH 3/4] mm/migration: return errno when isolate_huge_page failed
rename isolate_huge_page to isolate_hugetlb
[PATCH 4/4] mm/migration: fix potential pte_unmap on an not mapped pte
collect Reviewed-by tag
v2:
collect Reviewed-by tag
make isolate_huge_page consistent with isolate_lru_page
add hugetlbfs variant of hugetlb_migration_entry_wait
v1:
rebase [1] on mainline.
[1] https://lore.kernel.org/lkml/[email protected]/T/
---
Miaohe Lin (4):
mm: reduce the rcu lock duration
mm/migration: remove unneeded lock page and PageMovable check
mm/migration: return errno when isolate_huge_page failed
mm/migration: fix potential pte_unmap on an not mapped pte
include/linux/hugetlb.h | 6 +++---
include/linux/swapops.h | 12 ++++++++----
mm/gup.c | 2 +-
mm/hugetlb.c | 15 +++++++--------
mm/memory-failure.c | 2 +-
mm/mempolicy.c | 5 ++---
mm/migrate.c | 40 +++++++++++++++++++++++++---------------
7 files changed, 47 insertions(+), 35 deletions(-)
--
2.23.0
__migration_entry_wait and migration_entry_wait_on_locked assume pte is
always mapped from caller. But this is not the case when it's called from
migration_entry_wait_huge and follow_huge_pmd. Add a hugetlbfs variant that
calls hugetlb_migration_entry_wait(ptep == NULL) to fix this issue.
Fixes: 30dad30922cc ("mm: migration: add migrate_entry_wait_huge()")
Suggested-by: David Hildenbrand <[email protected]>
Signed-off-by: Miaohe Lin <[email protected]>
Reviewed-by: David Hildenbrand <[email protected]>
---
include/linux/swapops.h | 12 ++++++++----
mm/hugetlb.c | 4 ++--
mm/migrate.c | 23 +++++++++++++++++++----
3 files changed, 29 insertions(+), 10 deletions(-)
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index f24775b41880..bb7afd03a324 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -244,8 +244,10 @@ extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
spinlock_t *ptl);
extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
unsigned long address);
-extern void migration_entry_wait_huge(struct vm_area_struct *vma,
- struct mm_struct *mm, pte_t *pte);
+#ifdef CONFIG_HUGETLB_PAGE
+extern void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl);
+extern void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte);
+#endif
#else
static inline swp_entry_t make_readable_migration_entry(pgoff_t offset)
{
@@ -271,8 +273,10 @@ static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
spinlock_t *ptl) { }
static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
unsigned long address) { }
-static inline void migration_entry_wait_huge(struct vm_area_struct *vma,
- struct mm_struct *mm, pte_t *pte) { }
+#ifdef CONFIG_HUGETLB_PAGE
+static inline void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl) { }
+static inline void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte) { }
+#endif
static inline int is_writable_migration_entry(swp_entry_t entry)
{
return 0;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2026fcfc8886..5015270cca12 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5694,7 +5694,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
*/
entry = huge_ptep_get(ptep);
if (unlikely(is_hugetlb_entry_migration(entry))) {
- migration_entry_wait_huge(vma, mm, ptep);
+ migration_entry_wait_huge(vma, ptep);
return 0;
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
return VM_FAULT_HWPOISON_LARGE |
@@ -6912,7 +6912,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
} else {
if (is_hugetlb_entry_migration(pte)) {
spin_unlock(ptl);
- __migration_entry_wait(mm, (pte_t *)pmd, ptl);
+ __migration_entry_wait_huge((pte_t *)pmd, ptl);
goto retry;
}
/*
diff --git a/mm/migrate.c b/mm/migrate.c
index 97c31b87d1a2..581dd2b4dcbb 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -315,13 +315,28 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
__migration_entry_wait(mm, ptep, ptl);
}
-void migration_entry_wait_huge(struct vm_area_struct *vma,
- struct mm_struct *mm, pte_t *pte)
+#ifdef CONFIG_HUGETLB_PAGE
+void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl)
{
- spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte);
- __migration_entry_wait(mm, pte, ptl);
+ pte_t pte;
+
+ spin_lock(ptl);
+ pte = huge_ptep_get(ptep);
+
+ if (unlikely(!is_hugetlb_entry_migration(pte)))
+ spin_unlock(ptl);
+ else
+ migration_entry_wait_on_locked(pte_to_swp_entry(pte), NULL, ptl);
}
+void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte)
+{
+ spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), vma->vm_mm, pte);
+
+ __migration_entry_wait_huge(pte, ptl);
+}
+#endif
+
#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
{
--
2.23.0
We might fail to isolate huge page due to e.g. the page is under migration
which cleared HPageMigratable. We should return errno in this case rather
than always return 1 which could confuse the user, i.e. the caller might
think all of the memory is migrated while the hugetlb page is left behind.
We make the prototype of isolate_huge_page consistent with isolate_lru_page
as suggested by Huang Ying and rename isolate_huge_page to isolate_hugetlb
as suggested by Muchun to improve the readability.
Fixes: e8db67eb0ded ("mm: migrate: move_pages() supports thp migration")
Suggested-by: Huang Ying <[email protected]>
Signed-off-by: Miaohe Lin <[email protected]>
---
include/linux/hugetlb.h | 6 +++---
mm/gup.c | 2 +-
mm/hugetlb.c | 11 +++++------
mm/memory-failure.c | 2 +-
mm/mempolicy.c | 2 +-
mm/migrate.c | 5 +++--
6 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index e4cff27d1198..756b66ff025e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -170,7 +170,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
vm_flags_t vm_flags);
long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
long freed);
-bool isolate_huge_page(struct page *page, struct list_head *list);
+int isolate_hugetlb(struct page *page, struct list_head *list);
int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
int get_huge_page_for_hwpoison(unsigned long pfn, int flags);
void putback_active_hugepage(struct page *page);
@@ -376,9 +376,9 @@ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
return NULL;
}
-static inline bool isolate_huge_page(struct page *page, struct list_head *list)
+static inline int isolate_hugetlb(struct page *page, struct list_head *list)
{
- return false;
+ return -EBUSY;
}
static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
diff --git a/mm/gup.c b/mm/gup.c
index 551264407624..3899dcb288a6 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1898,7 +1898,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
* Try to move out any movable page before pinning the range.
*/
if (folio_test_hugetlb(folio)) {
- if (!isolate_huge_page(&folio->page,
+ if (isolate_hugetlb(&folio->page,
&movable_page_list))
isolation_error_count++;
continue;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 01f0e2e5ab48..2026fcfc8886 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2765,8 +2765,7 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
* Fail with -EBUSY if not possible.
*/
spin_unlock_irq(&hugetlb_lock);
- if (!isolate_huge_page(old_page, list))
- ret = -EBUSY;
+ ret = isolate_hugetlb(old_page, list);
spin_lock_irq(&hugetlb_lock);
goto free_new;
} else if (!HPageFreed(old_page)) {
@@ -2842,7 +2841,7 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
if (hstate_is_gigantic(h))
return -ENOMEM;
- if (page_count(head) && isolate_huge_page(head, list))
+ if (page_count(head) && !isolate_hugetlb(head, list))
ret = 0;
else if (!page_count(head))
ret = alloc_and_dissolve_huge_page(h, head, list);
@@ -6945,15 +6944,15 @@ follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int fla
return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
}
-bool isolate_huge_page(struct page *page, struct list_head *list)
+int isolate_hugetlb(struct page *page, struct list_head *list)
{
- bool ret = true;
+ int ret = 0;
spin_lock_irq(&hugetlb_lock);
if (!PageHeadHuge(page) ||
!HPageMigratable(page) ||
!get_page_unless_zero(page)) {
- ret = false;
+ ret = -EBUSY;
goto unlock;
}
ClearHPageMigratable(page);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index b85661cbdc4a..5deb1b1cb2fd 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -2166,7 +2166,7 @@ static bool isolate_page(struct page *page, struct list_head *pagelist)
bool lru = PageLRU(page);
if (PageHuge(page)) {
- isolated = isolate_huge_page(page, pagelist);
+ isolated = !isolate_hugetlb(page, pagelist);
} else {
if (lru)
isolated = !isolate_lru_page(page);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 2dad094177bf..f96d55131689 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -602,7 +602,7 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
if (flags & (MPOL_MF_MOVE_ALL) ||
(flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) {
- if (!isolate_huge_page(page, qp->pagelist) &&
+ if (isolate_hugetlb(page, qp->pagelist) &&
(flags & MPOL_MF_STRICT))
/*
* Failed to isolate page but allow migrating pages
diff --git a/mm/migrate.c b/mm/migrate.c
index 337336115e43..97c31b87d1a2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1627,8 +1627,9 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
if (PageHuge(page)) {
if (PageHead(page)) {
- isolate_huge_page(page, pagelist);
- err = 1;
+ err = isolate_hugetlb(page, pagelist);
+ if (!err)
+ err = 1;
}
} else {
struct page *head;
--
2.23.0
On Wed, May 25, 2022 at 04:18:21PM +0800, Miaohe Lin wrote:
> We might fail to isolate huge page due to e.g. the page is under migration
> which cleared HPageMigratable. We should return errno in this case rather
> than always return 1 which could confuse the user, i.e. the caller might
> think all of the memory is migrated while the hugetlb page is left behind.
> We make the prototype of isolate_huge_page consistent with isolate_lru_page
> as suggested by Huang Ying and rename isolate_huge_page to isolate_hugetlb
> as suggested by Muchun to improve the readability.
>
> Fixes: e8db67eb0ded ("mm: migrate: move_pages() supports thp migration")
> Suggested-by: Huang Ying <[email protected]>
> Signed-off-by: Miaohe Lin <[email protected]>
Looks good to me, one thing below though:
> ---
> include/linux/hugetlb.h | 6 +++---
> mm/gup.c | 2 +-
> mm/hugetlb.c | 11 +++++------
> mm/memory-failure.c | 2 +-
> mm/mempolicy.c | 2 +-
> mm/migrate.c | 5 +++--
> 6 files changed, 14 insertions(+), 14 deletions(-)
>
...
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -1627,8 +1627,9 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
>
> if (PageHuge(page)) {
> if (PageHead(page)) {
> - isolate_huge_page(page, pagelist);
> - err = 1;
> + err = isolate_hugetlb(page, pagelist);
> + if (!err)
> + err = 1;
> }
We used to always return 1 which means page has been queued for migration, as we
did not check isolate_huge_page() return value.
Now, we either return 1 or 0 depending on isolate_hugetlb().
I guess that was fine because in the end, if isolate_huge_page() failed,
the page just does not get added to 'pagelist', right? So, it is just
confusing for the user because he might not get an error so he thinks
the page will be migrated, and yet will not?
--
Oscar Salvador
SUSE Labs
On 2022/5/25 16:41, Oscar Salvador wrote:
> On Wed, May 25, 2022 at 04:18:21PM +0800, Miaohe Lin wrote:
>> We might fail to isolate huge page due to e.g. the page is under migration
>> which cleared HPageMigratable. We should return errno in this case rather
>> than always return 1 which could confuse the user, i.e. the caller might
>> think all of the memory is migrated while the hugetlb page is left behind.
>> We make the prototype of isolate_huge_page consistent with isolate_lru_page
>> as suggested by Huang Ying and rename isolate_huge_page to isolate_hugetlb
>> as suggested by Muchun to improve the readability.
>>
>> Fixes: e8db67eb0ded ("mm: migrate: move_pages() supports thp migration")
>> Suggested-by: Huang Ying <[email protected]>
>> Signed-off-by: Miaohe Lin <[email protected]>
>
> Looks good to me, one thing below though:
>
>> ---
>> include/linux/hugetlb.h | 6 +++---
>> mm/gup.c | 2 +-
>> mm/hugetlb.c | 11 +++++------
>> mm/memory-failure.c | 2 +-
>> mm/mempolicy.c | 2 +-
>> mm/migrate.c | 5 +++--
>> 6 files changed, 14 insertions(+), 14 deletions(-)
>>
> ...
>> --- a/mm/migrate.c
>> +++ b/mm/migrate.c
>> @@ -1627,8 +1627,9 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
>>
>> if (PageHuge(page)) {
>> if (PageHead(page)) {
>> - isolate_huge_page(page, pagelist);
>> - err = 1;
>> + err = isolate_hugetlb(page, pagelist);
>> + if (!err)
>> + err = 1;
>> }
>
> We used to always return 1 which means page has been queued for migration, as we
> did not check isolate_huge_page() return value.
> Now, we either return 1 or 0 depending on isolate_hugetlb().
Return 1 or -EBUSY just as normal page case.
> I guess that was fine because in the end, if isolate_huge_page() failed,
> the page just does not get added to 'pagelist', right? So, it is just
> confusing for the user because he might not get an error so he thinks
> the page will be migrated, and yet will not?
Yes, the hugetlb page might not be migrated due to error while it's not reported in the
__user *status. So the caller might think all of the memory is migrated and thus does
not retry to migrate the hugetlb page in the next round.
Many thanks for your review and comment! :)
>
>