2022-11-09 05:45:56

by Pavan Kondeti

[permalink] [raw]
Subject: [PATCH] mm/madvise: fix madvise_pageout for private file mappings

When MADV_PAGEOUT is called on a private file mapping VMA region,
we bail out early if the process is neither owner nor write capable
of the file. However, this VMA may have both private/shared clean
pages and private dirty pages. The opportunity of paging out the
private dirty pages (Anon pages) is missed. Fix this by caching
the file access check and use it later along with PageAnon() during
page walk.

We observe ~10% improvement in zram usage, thus leaving more available
memory on a 4GB RAM system running Android.

Signed-off-by: Pavankumar Kondeti <[email protected]>
---
mm/madvise.c | 30 +++++++++++++++++++++++-------
1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index c7105ec..b6b88e2 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -40,6 +40,7 @@
struct madvise_walk_private {
struct mmu_gather *tlb;
bool pageout;
+ bool can_pageout_file;
};

/*
@@ -328,6 +329,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
struct madvise_walk_private *private = walk->private;
struct mmu_gather *tlb = private->tlb;
bool pageout = private->pageout;
+ bool pageout_anon_only = pageout && !private->can_pageout_file;
struct mm_struct *mm = tlb->mm;
struct vm_area_struct *vma = walk->vma;
pte_t *orig_pte, *pte, ptent;
@@ -364,6 +366,9 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
if (page_mapcount(page) != 1)
goto huge_unlock;

+ if (pageout_anon_only && !PageAnon(page))
+ goto huge_unlock;
+
if (next - addr != HPAGE_PMD_SIZE) {
int err;

@@ -432,6 +437,8 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
if (PageTransCompound(page)) {
if (page_mapcount(page) != 1)
break;
+ if (pageout_anon_only && !PageAnon(page))
+ break;
get_page(page);
if (!trylock_page(page)) {
put_page(page);
@@ -459,6 +466,9 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
if (!PageLRU(page) || page_mapcount(page) != 1)
continue;

+ if (pageout_anon_only && !PageAnon(page))
+ continue;
+
VM_BUG_ON_PAGE(PageTransCompound(page), page);

if (pte_young(ptent)) {
@@ -541,11 +551,13 @@ static long madvise_cold(struct vm_area_struct *vma,

static void madvise_pageout_page_range(struct mmu_gather *tlb,
struct vm_area_struct *vma,
- unsigned long addr, unsigned long end)
+ unsigned long addr, unsigned long end,
+ bool can_pageout_file)
{
struct madvise_walk_private walk_private = {
.pageout = true,
.tlb = tlb,
+ .can_pageout_file = can_pageout_file,
};

tlb_start_vma(tlb, vma);
@@ -553,10 +565,8 @@ static void madvise_pageout_page_range(struct mmu_gather *tlb,
tlb_end_vma(tlb, vma);
}

-static inline bool can_do_pageout(struct vm_area_struct *vma)
+static inline bool can_do_file_pageout(struct vm_area_struct *vma)
{
- if (vma_is_anonymous(vma))
- return true;
if (!vma->vm_file)
return false;
/*
@@ -576,17 +586,23 @@ static long madvise_pageout(struct vm_area_struct *vma,
{
struct mm_struct *mm = vma->vm_mm;
struct mmu_gather tlb;
+ bool can_pageout_file;

*prev = vma;
if (!can_madv_lru_vma(vma))
return -EINVAL;

- if (!can_do_pageout(vma))
- return 0;
+ /*
+ * If the VMA belongs to a private file mapping, there can be private
+ * dirty pages which can be paged out if even this process is neither
+ * owner nor write capable of the file. Cache the file access check
+ * here and use it later during page walk.
+ */
+ can_pageout_file = can_do_file_pageout(vma);

lru_add_drain();
tlb_gather_mmu(&tlb, mm);
- madvise_pageout_page_range(&tlb, vma, start_addr, end_addr);
+ madvise_pageout_page_range(&tlb, vma, start_addr, end_addr, can_pageout_file);
tlb_finish_mmu(&tlb);

return 0;
--
2.7.4



2022-11-30 23:56:57

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] mm/madvise: fix madvise_pageout for private file mappings


On Wed, 9 Nov 2022 10:48:36 +0530 Pavankumar Kondeti <[email protected]> wrote:

> When MADV_PAGEOUT is called on a private file mapping VMA region,
> we bail out early if the process is neither owner nor write capable
> of the file. However, this VMA may have both private/shared clean
> pages and private dirty pages. The opportunity of paging out the
> private dirty pages (Anon pages) is missed. Fix this by caching
> the file access check and use it later along with PageAnon() during
> page walk.
>
> We observe ~10% improvement in zram usage, thus leaving more available
> memory on a 4GB RAM system running Android.
>

Could we please have some reviewer input on this?

Thanks.

> ---
> mm/madvise.c | 30 +++++++++++++++++++++++-------
> 1 file changed, 23 insertions(+), 7 deletions(-)
>
> diff --git a/mm/madvise.c b/mm/madvise.c
> index c7105ec..b6b88e2 100644
> --- a/mm/madvise.c
> +++ b/mm/madvise.c
> @@ -40,6 +40,7 @@
> struct madvise_walk_private {
> struct mmu_gather *tlb;
> bool pageout;
> + bool can_pageout_file;
> };
>
> /*
> @@ -328,6 +329,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
> struct madvise_walk_private *private = walk->private;
> struct mmu_gather *tlb = private->tlb;
> bool pageout = private->pageout;
> + bool pageout_anon_only = pageout && !private->can_pageout_file;
> struct mm_struct *mm = tlb->mm;
> struct vm_area_struct *vma = walk->vma;
> pte_t *orig_pte, *pte, ptent;
> @@ -364,6 +366,9 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
> if (page_mapcount(page) != 1)
> goto huge_unlock;
>
> + if (pageout_anon_only && !PageAnon(page))
> + goto huge_unlock;
> +
> if (next - addr != HPAGE_PMD_SIZE) {
> int err;
>
> @@ -432,6 +437,8 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
> if (PageTransCompound(page)) {
> if (page_mapcount(page) != 1)
> break;
> + if (pageout_anon_only && !PageAnon(page))
> + break;
> get_page(page);
> if (!trylock_page(page)) {
> put_page(page);
> @@ -459,6 +466,9 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
> if (!PageLRU(page) || page_mapcount(page) != 1)
> continue;
>
> + if (pageout_anon_only && !PageAnon(page))
> + continue;
> +
> VM_BUG_ON_PAGE(PageTransCompound(page), page);
>
> if (pte_young(ptent)) {
> @@ -541,11 +551,13 @@ static long madvise_cold(struct vm_area_struct *vma,
>
> static void madvise_pageout_page_range(struct mmu_gather *tlb,
> struct vm_area_struct *vma,
> - unsigned long addr, unsigned long end)
> + unsigned long addr, unsigned long end,
> + bool can_pageout_file)
> {
> struct madvise_walk_private walk_private = {
> .pageout = true,
> .tlb = tlb,
> + .can_pageout_file = can_pageout_file,
> };
>
> tlb_start_vma(tlb, vma);
> @@ -553,10 +565,8 @@ static void madvise_pageout_page_range(struct mmu_gather *tlb,
> tlb_end_vma(tlb, vma);
> }
>
> -static inline bool can_do_pageout(struct vm_area_struct *vma)
> +static inline bool can_do_file_pageout(struct vm_area_struct *vma)
> {
> - if (vma_is_anonymous(vma))
> - return true;
> if (!vma->vm_file)
> return false;
> /*
> @@ -576,17 +586,23 @@ static long madvise_pageout(struct vm_area_struct *vma,
> {
> struct mm_struct *mm = vma->vm_mm;
> struct mmu_gather tlb;
> + bool can_pageout_file;
>
> *prev = vma;
> if (!can_madv_lru_vma(vma))
> return -EINVAL;
>
> - if (!can_do_pageout(vma))
> - return 0;
> + /*
> + * If the VMA belongs to a private file mapping, there can be private
> + * dirty pages which can be paged out if even this process is neither
> + * owner nor write capable of the file. Cache the file access check
> + * here and use it later during page walk.
> + */
> + can_pageout_file = can_do_file_pageout(vma);
>
> lru_add_drain();
> tlb_gather_mmu(&tlb, mm);
> - madvise_pageout_page_range(&tlb, vma, start_addr, end_addr);
> + madvise_pageout_page_range(&tlb, vma, start_addr, end_addr, can_pageout_file);
> tlb_finish_mmu(&tlb);
>
> return 0;
> --
> 2.7.4
>

2022-12-01 03:07:55

by Pavan Kondeti

[permalink] [raw]
Subject: Re: [PATCH] mm/madvise: fix madvise_pageout for private file mappings

On Wed, Nov 30, 2022 at 03:17:39PM -0800, Andrew Morton wrote:
>
> On Wed, 9 Nov 2022 10:48:36 +0530 Pavankumar Kondeti <[email protected]> wrote:
>
> > When MADV_PAGEOUT is called on a private file mapping VMA region,
> > we bail out early if the process is neither owner nor write capable
> > of the file. However, this VMA may have both private/shared clean
> > pages and private dirty pages. The opportunity of paging out the
> > private dirty pages (Anon pages) is missed. Fix this by caching
> > the file access check and use it later along with PageAnon() during
> > page walk.
> >
> > We observe ~10% improvement in zram usage, thus leaving more available
> > memory on a 4GB RAM system running Android.
> >
>
> Could we please have some reviewer input on this?
>
> Thanks.
>

Thanks Andrew for the reminder. Fyi, this patch has been included in Android
Generic Kernel Image (5.10 and 5.15 kernels) as we have seen good savings on
Android. It would make a difference on a low memory android devices.

Suren/Minchan,

Can you please do the needful?

Thanks,
Pavan

2022-12-01 14:06:40

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH] mm/madvise: fix madvise_pageout for private file mappings

> + * If the VMA belongs to a private file mapping, there can be private
> + * dirty pages which can be paged out if even this process is neither
> + * owner nor write capable of the file. Cache the file access check
> + * here and use it later during page walk.
> + */
> + can_pageout_file = can_do_file_pageout(vma);

Why not move that into madvise_pageout_page_range() ? Avoids passing
this variable to that function.

In fact, why not even call that function directly instead of storing
that in madvise_walk_private(). The function is extremely lightweight.

>
> lru_add_drain();
> tlb_gather_mmu(&tlb, mm);
> - madvise_pageout_page_range(&tlb, vma, start_addr, end_addr);
> + madvise_pageout_page_range(&tlb, vma, start_addr, end_addr, can_pageout_file);
> tlb_finish_mmu(&tlb);
>
> return 0;

--
Thanks,

David / dhildenb

2022-12-01 14:24:01

by Pavan Kondeti

[permalink] [raw]
Subject: Re: [PATCH] mm/madvise: fix madvise_pageout for private file mappings

Hi David,

Thanks for your review.

On Thu, Dec 01, 2022 at 02:01:22PM +0100, David Hildenbrand wrote:
> >+ * If the VMA belongs to a private file mapping, there can be private
> >+ * dirty pages which can be paged out if even this process is neither
> >+ * owner nor write capable of the file. Cache the file access check
> >+ * here and use it later during page walk.
> >+ */
> >+ can_pageout_file = can_do_file_pageout(vma);
>
> Why not move that into madvise_pageout_page_range() ? Avoids passing this
> variable to that function.
>
Silly me. I should have done that in the first place.

> In fact, why not even call that function directly instead of storing that in
> madvise_walk_private(). The function is extremely lightweight.

Agreed. I will incorporate your suggestion and send the patch after testing.

>
> > lru_add_drain();
> > tlb_gather_mmu(&tlb, mm);
> >- madvise_pageout_page_range(&tlb, vma, start_addr, end_addr);
> >+ madvise_pageout_page_range(&tlb, vma, start_addr, end_addr, can_pageout_file);
> > tlb_finish_mmu(&tlb);
> > return 0;
>
Thanks,
Pavan

2022-12-01 14:49:06

by Pavan Kondeti

[permalink] [raw]
Subject: Re: [PATCH] mm/madvise: fix madvise_pageout for private file mappings

Hi Mark,

On Thu, Dec 01, 2022 at 01:46:36PM +0000, Mark Hemment wrote:
> On Wed, 9 Nov 2022 at 05:19, Pavankumar Kondeti
> <[email protected]> wrote:
> >
> > When MADV_PAGEOUT is called on a private file mapping VMA region,
> > we bail out early if the process is neither owner nor write capable
> > of the file. However, this VMA may have both private/shared clean
> > pages and private dirty pages. The opportunity of paging out the
> > private dirty pages (Anon pages) is missed. Fix this by caching
> > the file access check and use it later along with PageAnon() during
> > page walk.
> >
> > We observe ~10% improvement in zram usage, thus leaving more available
> > memory on a 4GB RAM system running Android.
> >
> > Signed-off-by: Pavankumar Kondeti <[email protected]>
>
> Only scanned review the patch; the logic looks good (as does the
> reasoning) but a couple of minor comments;
>
Thanks for the review and nice suggestions on how the patch can be improved.

>
> > ---
> > mm/madvise.c | 30 +++++++++++++++++++++++-------
> > 1 file changed, 23 insertions(+), 7 deletions(-)
> >
> > diff --git a/mm/madvise.c b/mm/madvise.c
> > index c7105ec..b6b88e2 100644
> > --- a/mm/madvise.c
> > +++ b/mm/madvise.c
> > @@ -40,6 +40,7 @@
> > struct madvise_walk_private {
> > struct mmu_gather *tlb;
> > bool pageout;
> > + bool can_pageout_file;
> > };
> >
> > /*
> > @@ -328,6 +329,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
> > struct madvise_walk_private *private = walk->private;
> > struct mmu_gather *tlb = private->tlb;
> > bool pageout = private->pageout;
> > + bool pageout_anon_only = pageout && !private->can_pageout_file;
> > struct mm_struct *mm = tlb->mm;
> > struct vm_area_struct *vma = walk->vma;
> > pte_t *orig_pte, *pte, ptent;
> > @@ -364,6 +366,9 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
> > if (page_mapcount(page) != 1)
> > goto huge_unlock;
> >
> > + if (pageout_anon_only && !PageAnon(page))
> > + goto huge_unlock;
> > +
> > if (next - addr != HPAGE_PMD_SIZE) {
> > int err;
> >
> > @@ -432,6 +437,8 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
> > if (PageTransCompound(page)) {
> > if (page_mapcount(page) != 1)
> > break;
> > + if (pageout_anon_only && !PageAnon(page))
> > + break;
> > get_page(page);
> > if (!trylock_page(page)) {
> > put_page(page);
> > @@ -459,6 +466,9 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
> > if (!PageLRU(page) || page_mapcount(page) != 1)
> > continue;
> >
> > + if (pageout_anon_only && !PageAnon(page))
> > + continue;
> > +
>
> The added PageAnon()s probably do not have a measurable performance
> impact, but not ideal when walking a large anonymous mapping (as
> '->can_pageout_file' is zero for anon mappings).
> Could the code be re-structured so that PageAnon() is only tested when
> filtering is needed? Say;
> if (pageout_anon_only_filter && !PageAnon(page)) {
> continue;
> }
> where 'pageout_anon_only_filter' is only set for a private named
> mapping when do not have write perms on backing object. It would not
> be set for anon mappings.
>
Understood. Like you suggested, PageAnon() check can be eliminated for
an anon mapping. will make the necessary changes.

>
> > VM_BUG_ON_PAGE(PageTransCompound(page), page);
> >
> > if (pte_young(ptent)) {
> > @@ -541,11 +551,13 @@ static long madvise_cold(struct vm_area_struct *vma,
> >
> > static void madvise_pageout_page_range(struct mmu_gather *tlb,
> > struct vm_area_struct *vma,
> > - unsigned long addr, unsigned long end)
> > + unsigned long addr, unsigned long end,
> > + bool can_pageout_file)
> > {
> > struct madvise_walk_private walk_private = {
> > .pageout = true,
> > .tlb = tlb,
> > + .can_pageout_file = can_pageout_file,
> > };
> >
> > tlb_start_vma(tlb, vma);
> > @@ -553,10 +565,8 @@ static void madvise_pageout_page_range(struct mmu_gather *tlb,
> > tlb_end_vma(tlb, vma);
> > }
> >
> > -static inline bool can_do_pageout(struct vm_area_struct *vma)
> > +static inline bool can_do_file_pageout(struct vm_area_struct *vma)
> > {
> > - if (vma_is_anonymous(vma))
> > - return true;
> > if (!vma->vm_file)
> > return false;
> > /*
> > @@ -576,17 +586,23 @@ static long madvise_pageout(struct vm_area_struct *vma,
> > {
> > struct mm_struct *mm = vma->vm_mm;
> > struct mmu_gather tlb;
> > + bool can_pageout_file;
> >
> > *prev = vma;
> > if (!can_madv_lru_vma(vma))
> > return -EINVAL;
> >
> > - if (!can_do_pageout(vma))
> > - return 0;
>
> The removal of this test results in a process, which cannot get write
> perms for a shared named mapping, performing a 'walk'. As such a
> mapping cannot have anon pages, this walk will be a no-op. Not sure
> why a well-behaved program would do a MADV_PAGEOUT on such a mapping,
> but if one does this could be considered a (minor performance)
> regression. As madvise_pageout() can easily filter this case, might be
> worth adding a check.
>

Got it. we can take care of this edge case by rejecting shared mappings i.e
!!(vma->vm_flags & VM_MAYSHARE) == 1 where the process has no write
permission.

>
> > + /*
> > + * If the VMA belongs to a private file mapping, there can be private
> > + * dirty pages which can be paged out if even this process is neither
> > + * owner nor write capable of the file. Cache the file access check
> > + * here and use it later during page walk.
> > + */
> > + can_pageout_file = can_do_file_pageout(vma);
> >
> > lru_add_drain();
> > tlb_gather_mmu(&tlb, mm);
> > - madvise_pageout_page_range(&tlb, vma, start_addr, end_addr);
> > + madvise_pageout_page_range(&tlb, vma, start_addr, end_addr, can_pageout_file);
> > tlb_finish_mmu(&tlb);
> >
> > return 0;
> > --
> > 2.7.4
> >
> >
>
Thanks,
Pavan

2022-12-01 15:21:23

by Mark Hemment

[permalink] [raw]
Subject: Re: [PATCH] mm/madvise: fix madvise_pageout for private file mappings

On Wed, 9 Nov 2022 at 05:19, Pavankumar Kondeti
<[email protected]> wrote:
>
> When MADV_PAGEOUT is called on a private file mapping VMA region,
> we bail out early if the process is neither owner nor write capable
> of the file. However, this VMA may have both private/shared clean
> pages and private dirty pages. The opportunity of paging out the
> private dirty pages (Anon pages) is missed. Fix this by caching
> the file access check and use it later along with PageAnon() during
> page walk.
>
> We observe ~10% improvement in zram usage, thus leaving more available
> memory on a 4GB RAM system running Android.
>
> Signed-off-by: Pavankumar Kondeti <[email protected]>

Only scanned review the patch; the logic looks good (as does the
reasoning) but a couple of minor comments;


> ---
> mm/madvise.c | 30 +++++++++++++++++++++++-------
> 1 file changed, 23 insertions(+), 7 deletions(-)
>
> diff --git a/mm/madvise.c b/mm/madvise.c
> index c7105ec..b6b88e2 100644
> --- a/mm/madvise.c
> +++ b/mm/madvise.c
> @@ -40,6 +40,7 @@
> struct madvise_walk_private {
> struct mmu_gather *tlb;
> bool pageout;
> + bool can_pageout_file;
> };
>
> /*
> @@ -328,6 +329,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
> struct madvise_walk_private *private = walk->private;
> struct mmu_gather *tlb = private->tlb;
> bool pageout = private->pageout;
> + bool pageout_anon_only = pageout && !private->can_pageout_file;
> struct mm_struct *mm = tlb->mm;
> struct vm_area_struct *vma = walk->vma;
> pte_t *orig_pte, *pte, ptent;
> @@ -364,6 +366,9 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
> if (page_mapcount(page) != 1)
> goto huge_unlock;
>
> + if (pageout_anon_only && !PageAnon(page))
> + goto huge_unlock;
> +
> if (next - addr != HPAGE_PMD_SIZE) {
> int err;
>
> @@ -432,6 +437,8 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
> if (PageTransCompound(page)) {
> if (page_mapcount(page) != 1)
> break;
> + if (pageout_anon_only && !PageAnon(page))
> + break;
> get_page(page);
> if (!trylock_page(page)) {
> put_page(page);
> @@ -459,6 +466,9 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
> if (!PageLRU(page) || page_mapcount(page) != 1)
> continue;
>
> + if (pageout_anon_only && !PageAnon(page))
> + continue;
> +

The added PageAnon()s probably do not have a measurable performance
impact, but not ideal when walking a large anonymous mapping (as
'->can_pageout_file' is zero for anon mappings).
Could the code be re-structured so that PageAnon() is only tested when
filtering is needed? Say;
if (pageout_anon_only_filter && !PageAnon(page)) {
continue;
}
where 'pageout_anon_only_filter' is only set for a private named
mapping when do not have write perms on backing object. It would not
be set for anon mappings.


> VM_BUG_ON_PAGE(PageTransCompound(page), page);
>
> if (pte_young(ptent)) {
> @@ -541,11 +551,13 @@ static long madvise_cold(struct vm_area_struct *vma,
>
> static void madvise_pageout_page_range(struct mmu_gather *tlb,
> struct vm_area_struct *vma,
> - unsigned long addr, unsigned long end)
> + unsigned long addr, unsigned long end,
> + bool can_pageout_file)
> {
> struct madvise_walk_private walk_private = {
> .pageout = true,
> .tlb = tlb,
> + .can_pageout_file = can_pageout_file,
> };
>
> tlb_start_vma(tlb, vma);
> @@ -553,10 +565,8 @@ static void madvise_pageout_page_range(struct mmu_gather *tlb,
> tlb_end_vma(tlb, vma);
> }
>
> -static inline bool can_do_pageout(struct vm_area_struct *vma)
> +static inline bool can_do_file_pageout(struct vm_area_struct *vma)
> {
> - if (vma_is_anonymous(vma))
> - return true;
> if (!vma->vm_file)
> return false;
> /*
> @@ -576,17 +586,23 @@ static long madvise_pageout(struct vm_area_struct *vma,
> {
> struct mm_struct *mm = vma->vm_mm;
> struct mmu_gather tlb;
> + bool can_pageout_file;
>
> *prev = vma;
> if (!can_madv_lru_vma(vma))
> return -EINVAL;
>
> - if (!can_do_pageout(vma))
> - return 0;

The removal of this test results in a process, which cannot get write
perms for a shared named mapping, performing a 'walk'. As such a
mapping cannot have anon pages, this walk will be a no-op. Not sure
why a well-behaved program would do a MADV_PAGEOUT on such a mapping,
but if one does this could be considered a (minor performance)
regression. As madvise_pageout() can easily filter this case, might be
worth adding a check.


> + /*
> + * If the VMA belongs to a private file mapping, there can be private
> + * dirty pages which can be paged out if even this process is neither
> + * owner nor write capable of the file. Cache the file access check
> + * here and use it later during page walk.
> + */
> + can_pageout_file = can_do_file_pageout(vma);
>
> lru_add_drain();
> tlb_gather_mmu(&tlb, mm);
> - madvise_pageout_page_range(&tlb, vma, start_addr, end_addr);
> + madvise_pageout_page_range(&tlb, vma, start_addr, end_addr, can_pageout_file);
> tlb_finish_mmu(&tlb);
>
> return 0;
> --
> 2.7.4
>
>

Cheers,
Mark

2022-12-01 20:12:29

by Suren Baghdasaryan

[permalink] [raw]
Subject: Re: [PATCH] mm/madvise: fix madvise_pageout for private file mappings

On Wed, Nov 30, 2022 at 7:00 PM Pavan Kondeti <[email protected]> wrote:
>
> On Wed, Nov 30, 2022 at 03:17:39PM -0800, Andrew Morton wrote:
> >
> > On Wed, 9 Nov 2022 10:48:36 +0530 Pavankumar Kondeti <[email protected]> wrote:
> >
> > > When MADV_PAGEOUT is called on a private file mapping VMA region,
> > > we bail out early if the process is neither owner nor write capable
> > > of the file. However, this VMA may have both private/shared clean
> > > pages and private dirty pages. The opportunity of paging out the
> > > private dirty pages (Anon pages) is missed. Fix this by caching
> > > the file access check and use it later along with PageAnon() during
> > > page walk.
> > >
> > > We observe ~10% improvement in zram usage, thus leaving more available
> > > memory on a 4GB RAM system running Android.
> > >
> >
> > Could we please have some reviewer input on this?
> >
> > Thanks.
> >
>
> Thanks Andrew for the reminder. Fyi, this patch has been included in Android
> Generic Kernel Image (5.10 and 5.15 kernels) as we have seen good savings on
> Android. It would make a difference on a low memory android devices.
>
> Suren/Minchan,
>
> Can you please do the needful?

Yeah, I saw this patch before and it makes sense to me.
When discussing it with Minchan one concern was that if the vma has no
private dirty pages then we will be wasting cpu cycles scanning it.
However I guess it's the choice of the userspace process to call
madvise() on such mappings and risk wasting some cycles...

>
> Thanks,
> Pavan