move_page_tables() tries to move page table by PMD or PTE.
The root reason is if it tries to move PMD, both old and new range should be
PMD aligned. But current code calculate old range and new range separately.
This leads to some redundant check and calculation.
This cleanup tries to consolidate the range check in one place to reduce some
extra range handling.
v4:
* remove a redundant parentheses pointed by Kirill
v3:
* merge patch 1 with 2 as suggested by Kirill
* add patch 4 to simplify the logic to calculate next and extent
v2:
* remove 3rd patch which doesn't work on ARM platform. Thanks report and
test from Dmitry Osipenko
Wei Yang (4):
mm/mremap: it is sure to have enough space when extent meets
requirement
mm/mremap: calculate extent in one place
mm/mremap: start addresses are properly aligned
mm/mremap: use pmd_addr_end to simplify the calculate of extent
include/linux/huge_mm.h | 2 +-
mm/huge_memory.c | 8 +-------
mm/mremap.c | 27 ++++++++++-----------------
3 files changed, 12 insertions(+), 25 deletions(-)
--
2.20.1 (Apple Git-117)
old_end is passed to these two function to check whether there is enough
space to do the move, while this check is done before invoking these
functions.
These two functions only would be invoked when extent meets the
requirement and there is one check before invoking these functions:
if (extent > old_end - old_addr)
extent = old_end - old_addr;
This implies (old_end - old_addr) won't fail the check in these two
functions.
Signed-off-by: Wei Yang <[email protected]>
Tested-by: Dmitry Osipenko <[email protected]>
Acked-by: Kirill A. Shutemov <[email protected]>
---
include/linux/huge_mm.h | 2 +-
mm/huge_memory.c | 7 ++-----
mm/mremap.c | 10 ++++------
3 files changed, 7 insertions(+), 12 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 71f20776b06c..17c4c4975145 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -42,7 +42,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
unsigned char *vec);
extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
- unsigned long new_addr, unsigned long old_end,
+ unsigned long new_addr,
pmd_t *old_pmd, pmd_t *new_pmd);
extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, pgprot_t newprot,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 78c84bee7e29..1e580fdad4d0 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1722,17 +1722,14 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd)
}
bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
- unsigned long new_addr, unsigned long old_end,
- pmd_t *old_pmd, pmd_t *new_pmd)
+ unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
{
spinlock_t *old_ptl, *new_ptl;
pmd_t pmd;
struct mm_struct *mm = vma->vm_mm;
bool force_flush = false;
- if ((old_addr & ~HPAGE_PMD_MASK) ||
- (new_addr & ~HPAGE_PMD_MASK) ||
- old_end - old_addr < HPAGE_PMD_SIZE)
+ if ((old_addr & ~HPAGE_PMD_MASK) || (new_addr & ~HPAGE_PMD_MASK))
return false;
/*
diff --git a/mm/mremap.c b/mm/mremap.c
index 5dd572d57ca9..de27b12c8a5a 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -193,15 +193,13 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
#ifdef CONFIG_HAVE_MOVE_PMD
static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
- unsigned long new_addr, unsigned long old_end,
- pmd_t *old_pmd, pmd_t *new_pmd)
+ unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
{
spinlock_t *old_ptl, *new_ptl;
struct mm_struct *mm = vma->vm_mm;
pmd_t pmd;
- if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
- || old_end - old_addr < PMD_SIZE)
+ if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK))
return false;
/*
@@ -273,7 +271,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
if (need_rmap_locks)
take_rmap_locks(vma);
moved = move_huge_pmd(vma, old_addr, new_addr,
- old_end, old_pmd, new_pmd);
+ old_pmd, new_pmd);
if (need_rmap_locks)
drop_rmap_locks(vma);
if (moved)
@@ -293,7 +291,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
if (need_rmap_locks)
take_rmap_locks(vma);
moved = move_normal_pmd(vma, old_addr, new_addr,
- old_end, old_pmd, new_pmd);
+ old_pmd, new_pmd);
if (need_rmap_locks)
drop_rmap_locks(vma);
if (moved)
--
2.20.1 (Apple Git-117)
The purpose of this code is to calculate the smaller extent in old and
new range. Let's leverage pmd_addr_end() to do the calculation.
Hope this would make the code easier to read.
Signed-off-by: Wei Yang <[email protected]>
---
v4: remove redundant parentheses pointed by Kirill
---
mm/mremap.c | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/mm/mremap.c b/mm/mremap.c
index f5f17d050617..f6f56aa0b893 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -237,11 +237,12 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long new_addr, unsigned long len,
bool need_rmap_locks)
{
- unsigned long extent, next, old_end;
+ unsigned long extent, old_next, new_next, old_end, new_end;
struct mmu_notifier_range range;
pmd_t *old_pmd, *new_pmd;
old_end = old_addr + len;
+ new_end = new_addr + len;
flush_cache_range(vma, old_addr, old_end);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
@@ -250,14 +251,11 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
cond_resched();
- next = (old_addr + PMD_SIZE) & PMD_MASK;
- /* even if next overflowed, extent below will be ok */
- extent = next - old_addr;
- if (extent > old_end - old_addr)
- extent = old_end - old_addr;
- next = (new_addr + PMD_SIZE) & PMD_MASK;
- if (extent > next - new_addr)
- extent = next - new_addr;
+
+ old_next = pmd_addr_end(old_addr, old_end);
+ new_next = pmd_addr_end(new_addr, new_end);
+ extent = min(old_next - old_addr, new_next - new_addr);
+
old_pmd = get_old_pmd(vma->vm_mm, old_addr);
if (!old_pmd)
continue;
--
2.20.1 (Apple Git-117)
After previous cleanup, extent is the minimal step for both source and
destination. This means when extent is HPAGE_PMD_SIZE or PMD_SIZE,
old_addr and new_addr are properly aligned too.
Since these two functions are only invoked in move_page_tables, it is
safe to remove the check now.
Signed-off-by: Wei Yang <[email protected]>
Tested-by: Dmitry Osipenko <[email protected]>
Acked-by: Kirill A. Shutemov <[email protected]>
---
mm/huge_memory.c | 3 ---
mm/mremap.c | 3 ---
2 files changed, 6 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1e580fdad4d0..462a7dbd6350 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1729,9 +1729,6 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
struct mm_struct *mm = vma->vm_mm;
bool force_flush = false;
- if ((old_addr & ~HPAGE_PMD_MASK) || (new_addr & ~HPAGE_PMD_MASK))
- return false;
-
/*
* The destination pmd shouldn't be established, free_pgtables()
* should have release it.
diff --git a/mm/mremap.c b/mm/mremap.c
index a30b3e86cc99..f5f17d050617 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -199,9 +199,6 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
struct mm_struct *mm = vma->vm_mm;
pmd_t pmd;
- if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK))
- return false;
-
/*
* The destination pmd shouldn't be established, free_pgtables()
* should have release it.
--
2.20.1 (Apple Git-117)
Page tables is moved on the base of PMD. This requires both source
and destination range should meet the requirement.
Current code works well since move_huge_pmd() and move_normal_pmd()
would check old_addr and new_addr again. And then return to move_ptes()
if the either of them is not aligned.
In stead of calculating the extent separately, it is better to calculate
in one place, so we know it is not necessary to try move pmd. By doing
so, the logic seems a little clear.
Signed-off-by: Wei Yang <[email protected]>
Tested-by: Dmitry Osipenko <[email protected]>
Acked-by: Kirill A. Shutemov <[email protected]>
---
mm/mremap.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/mm/mremap.c b/mm/mremap.c
index de27b12c8a5a..a30b3e86cc99 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -258,6 +258,9 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
extent = next - old_addr;
if (extent > old_end - old_addr)
extent = old_end - old_addr;
+ next = (new_addr + PMD_SIZE) & PMD_MASK;
+ if (extent > next - new_addr)
+ extent = next - new_addr;
old_pmd = get_old_pmd(vma->vm_mm, old_addr);
if (!old_pmd)
continue;
@@ -301,9 +304,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
if (pte_alloc(new_vma->vm_mm, new_pmd))
break;
- next = (new_addr + PMD_SIZE) & PMD_MASK;
- if (extent > next - new_addr)
- extent = next - new_addr;
move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
new_pmd, new_addr, need_rmap_locks);
}
--
2.20.1 (Apple Git-117)
08.07.2020 12:50, Wei Yang пишет:
> move_page_tables() tries to move page table by PMD or PTE.
>
> The root reason is if it tries to move PMD, both old and new range should be
> PMD aligned. But current code calculate old range and new range separately.
> This leads to some redundant check and calculation.
>
> This cleanup tries to consolidate the range check in one place to reduce some
> extra range handling.
>
> v4:
> * remove a redundant parentheses pointed by Kirill
>
> v3:
> * merge patch 1 with 2 as suggested by Kirill
> * add patch 4 to simplify the logic to calculate next and extent
Hello, Wei!
Unfortunately you re-introduced the offending change that was fixed in
v2 and today's next-20200709 on ARM32 is broken once again:
BUG: Bad rss-counter state mm:db85ec46 type:MM_ANONPAGES val:190
Please don't do it ;)
> v2:
> * remove 3rd patch which doesn't work on ARM platform. Thanks report and
> test from Dmitry Osipenko
>
> Wei Yang (4):
> mm/mremap: it is sure to have enough space when extent meets
> requirement
> mm/mremap: calculate extent in one place
> mm/mremap: start addresses are properly aligned
> mm/mremap: use pmd_addr_end to simplify the calculate of extent
>
> include/linux/huge_mm.h | 2 +-
> mm/huge_memory.c | 8 +-------
> mm/mremap.c | 27 ++++++++++-----------------
> 3 files changed, 12 insertions(+), 25 deletions(-)
>
On Thu, Jul 09, 2020 at 10:38:58PM +0300, Dmitry Osipenko wrote:
>08.07.2020 12:50, Wei Yang пишет:
>> move_page_tables() tries to move page table by PMD or PTE.
>>
>> The root reason is if it tries to move PMD, both old and new range should be
>> PMD aligned. But current code calculate old range and new range separately.
>> This leads to some redundant check and calculation.
>>
>> This cleanup tries to consolidate the range check in one place to reduce some
>> extra range handling.
>>
>> v4:
>> * remove a redundant parentheses pointed by Kirill
>>
>> v3:
>> * merge patch 1 with 2 as suggested by Kirill
>
>> * add patch 4 to simplify the logic to calculate next and extent
>
>Hello, Wei!
>
>Unfortunately you re-introduced the offending change that was fixed in
>v2 and today's next-20200709 on ARM32 is broken once again:
>
>BUG: Bad rss-counter state mm:db85ec46 type:MM_ANONPAGES val:190
>
Ah, my bad, I forget the error we met last time. It is the different format of
pmd_addr_end.
Sorry for that.
@ Kirill
If you agree, I would leave the extent/next calculation as it is in patch 3.
>Please don't do it ;)
>
>> v2:
>> * remove 3rd patch which doesn't work on ARM platform. Thanks report and
>> test from Dmitry Osipenko
>>
>> Wei Yang (4):
>> mm/mremap: it is sure to have enough space when extent meets
>> requirement
>> mm/mremap: calculate extent in one place
>> mm/mremap: start addresses are properly aligned
>> mm/mremap: use pmd_addr_end to simplify the calculate of extent
>>
>> include/linux/huge_mm.h | 2 +-
>> mm/huge_memory.c | 8 +-------
>> mm/mremap.c | 27 ++++++++++-----------------
>> 3 files changed, 12 insertions(+), 25 deletions(-)
>>
--
Wei Yang
Help you, Help me
On Fri, Jul 10, 2020 at 09:14:10AM +0800, Wei Yang wrote:
> On Thu, Jul 09, 2020 at 10:38:58PM +0300, Dmitry Osipenko wrote:
> >08.07.2020 12:50, Wei Yang пишет:
> >> move_page_tables() tries to move page table by PMD or PTE.
> >>
> >> The root reason is if it tries to move PMD, both old and new range should be
> >> PMD aligned. But current code calculate old range and new range separately.
> >> This leads to some redundant check and calculation.
> >>
> >> This cleanup tries to consolidate the range check in one place to reduce some
> >> extra range handling.
> >>
> >> v4:
> >> * remove a redundant parentheses pointed by Kirill
> >>
> >> v3:
> >> * merge patch 1 with 2 as suggested by Kirill
> >
> >> * add patch 4 to simplify the logic to calculate next and extent
> >
> >Hello, Wei!
> >
> >Unfortunately you re-introduced the offending change that was fixed in
> >v2 and today's next-20200709 on ARM32 is broken once again:
> >
> >BUG: Bad rss-counter state mm:db85ec46 type:MM_ANONPAGES val:190
> >
>
> Ah, my bad, I forget the error we met last time. It is the different format of
> pmd_addr_end.
>
> Sorry for that.
>
> @ Kirill
>
> If you agree, I would leave the extent/next calculation as it is in patch 3.
Okay.
--
Kirill A. Shutemov