2024-02-19 14:17:57

by 李培锋

[permalink] [raw]
Subject: [PATCH 1/2] mm/rmap: support folio_referenced to control if try_lock in rmap_walk

From: lipeifeng <[email protected]>

The patch to support folio_referenced to control the bevavior
of walk_rmap, which for some thread to hold the lock in rmap_walk
instead of try_lock when using folio_referenced.

Signed-off-by: lipeifeng <[email protected]>
---
include/linux/rmap.h | 5 +++--
mm/rmap.c | 5 +++--
mm/vmscan.c | 16 ++++++++++++++--
3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b7944a8..846b261 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -623,7 +623,8 @@ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio,
* Called from mm/vmscan.c to handle paging out
*/
int folio_referenced(struct folio *, int is_locked,
- struct mem_cgroup *memcg, unsigned long *vm_flags);
+ struct mem_cgroup *memcg, unsigned long *vm_flags,
+ unsigned int rw_try_lock);

void try_to_migrate(struct folio *folio, enum ttu_flags flags);
void try_to_unmap(struct folio *, enum ttu_flags flags);
@@ -739,7 +740,7 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,

static inline int folio_referenced(struct folio *folio, int is_locked,
struct mem_cgroup *memcg,
- unsigned long *vm_flags)
+ unsigned long *vm_flags, unsigned int rw_try_lock)
{
*vm_flags = 0;
return 0;
diff --git a/mm/rmap.c b/mm/rmap.c
index f5d43ed..15d1fba 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -952,6 +952,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
* @is_locked: Caller holds lock on the folio.
* @memcg: target memory cgroup
* @vm_flags: A combination of all the vma->vm_flags which referenced the folio.
+ * @rw_try_lock: if try_lock in rmap_walk
*
* Quick test_and_clear_referenced for all mappings of a folio,
*
@@ -959,7 +960,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
* the function bailed out due to rmap lock contention.
*/
int folio_referenced(struct folio *folio, int is_locked,
- struct mem_cgroup *memcg, unsigned long *vm_flags)
+ struct mem_cgroup *memcg, unsigned long *vm_flags, unsigned int rw_try_lock)
{
int we_locked = 0;
struct folio_referenced_arg pra = {
@@ -970,7 +971,7 @@ int folio_referenced(struct folio *folio, int is_locked,
.rmap_one = folio_referenced_one,
.arg = (void *)&pra,
.anon_lock = folio_lock_anon_vma_read,
- .try_lock = true,
+ .try_lock = rw_try_lock ? true : false,
.invalid_vma = invalid_folio_referenced_vma,
};

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4f9c854..0296d48 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -136,6 +136,9 @@ struct scan_control {
/* Always discard instead of demoting to lower tier memory */
unsigned int no_demotion:1;

+ /* if try_lock in rmap_walk */
+ unsigned int rw_try_lock:1;
+
/* Allocation order */
s8 order;

@@ -827,7 +830,7 @@ static enum folio_references folio_check_references(struct folio *folio,
unsigned long vm_flags;

referenced_ptes = folio_referenced(folio, 1, sc->target_mem_cgroup,
- &vm_flags);
+ &vm_flags, sc->rw_try_lock);
referenced_folio = folio_test_clear_referenced(folio);

/*
@@ -1501,6 +1504,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
struct scan_control sc = {
.gfp_mask = GFP_KERNEL,
.may_unmap = 1,
+ .rw_try_lock = 1,
};
struct reclaim_stat stat;
unsigned int nr_reclaimed;
@@ -2038,7 +2042,7 @@ static void shrink_active_list(unsigned long nr_to_scan,

/* Referenced or rmap lock contention: rotate */
if (folio_referenced(folio, 0, sc->target_mem_cgroup,
- &vm_flags) != 0) {
+ &vm_flags, sc->rw_try_lock) != 0) {
/*
* Identify referenced, file-backed active folios and
* give them one more trip around the active list. So
@@ -2096,6 +2100,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
.may_unmap = 1,
.may_swap = 1,
.no_demotion = 1,
+ .rw_try_lock = 1,
};

nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, false);
@@ -5442,6 +5447,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
.may_swap = true,
.reclaim_idx = MAX_NR_ZONES - 1,
.gfp_mask = GFP_KERNEL,
+ .rw_try_lock = 1,
};

buf = kvmalloc(len + 1, GFP_KERNEL);
@@ -6414,6 +6420,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
.may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = 1,
+ .rw_try_lock = 1,
};

/*
@@ -6459,6 +6466,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
.may_unmap = 1,
.reclaim_idx = MAX_NR_ZONES - 1,
.may_swap = !noswap,
+ .rw_try_lock = 1,
};

WARN_ON_ONCE(!current->reclaim_state);
@@ -6503,6 +6511,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
.may_unmap = 1,
.may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
.proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
+ .rw_try_lock = 1,
};
/*
* Traverse the ZONELIST_FALLBACK zonelist of the current node to put
@@ -6764,6 +6773,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
.gfp_mask = GFP_KERNEL,
.order = order,
.may_unmap = 1,
+ .rw_try_lock = 1,
};

set_task_reclaim_state(current, &sc.reclaim_state);
@@ -7223,6 +7233,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
.may_unmap = 1,
.may_swap = 1,
.hibernation_mode = 1,
+ .rw_try_lock = 1,
};
struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
unsigned long nr_reclaimed;
@@ -7381,6 +7392,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
.may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
.may_swap = 1,
.reclaim_idx = gfp_zone(gfp_mask),
+ .rw_try_lock = 1,
};
unsigned long pflags;

--
2.7.4



2024-02-20 01:43:21

by 李培锋

[permalink] [raw]
Subject: Re: [PATCH 1/2] mm/rmap: support folio_referenced to control if try_lock in rmap_walk

add more experts from Linux and Google.


在 2024/2/19 22:17, [email protected] 写道:
> From: lipeifeng <[email protected]>
>
> The patch to support folio_referenced to control the bevavior
> of walk_rmap, which for some thread to hold the lock in rmap_walk
> instead of try_lock when using folio_referenced.
>
> Signed-off-by: lipeifeng <[email protected]>
> ---
> include/linux/rmap.h | 5 +++--
> mm/rmap.c | 5 +++--
> mm/vmscan.c | 16 ++++++++++++++--
> 3 files changed, 20 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> index b7944a8..846b261 100644
> --- a/include/linux/rmap.h
> +++ b/include/linux/rmap.h
> @@ -623,7 +623,8 @@ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio,
> * Called from mm/vmscan.c to handle paging out
> */
> int folio_referenced(struct folio *, int is_locked,
> - struct mem_cgroup *memcg, unsigned long *vm_flags);
> + struct mem_cgroup *memcg, unsigned long *vm_flags,
> + unsigned int rw_try_lock);
>
> void try_to_migrate(struct folio *folio, enum ttu_flags flags);
> void try_to_unmap(struct folio *, enum ttu_flags flags);
> @@ -739,7 +740,7 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
>
> static inline int folio_referenced(struct folio *folio, int is_locked,
> struct mem_cgroup *memcg,
> - unsigned long *vm_flags)
> + unsigned long *vm_flags, unsigned int rw_try_lock)
> {
> *vm_flags = 0;
> return 0;
> diff --git a/mm/rmap.c b/mm/rmap.c
> index f5d43ed..15d1fba 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -952,6 +952,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
> * @is_locked: Caller holds lock on the folio.
> * @memcg: target memory cgroup
> * @vm_flags: A combination of all the vma->vm_flags which referenced the folio.
> + * @rw_try_lock: if try_lock in rmap_walk
> *
> * Quick test_and_clear_referenced for all mappings of a folio,
> *
> @@ -959,7 +960,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
> * the function bailed out due to rmap lock contention.
> */
> int folio_referenced(struct folio *folio, int is_locked,
> - struct mem_cgroup *memcg, unsigned long *vm_flags)
> + struct mem_cgroup *memcg, unsigned long *vm_flags, unsigned int rw_try_lock)
> {
> int we_locked = 0;
> struct folio_referenced_arg pra = {
> @@ -970,7 +971,7 @@ int folio_referenced(struct folio *folio, int is_locked,
> .rmap_one = folio_referenced_one,
> .arg = (void *)&pra,
> .anon_lock = folio_lock_anon_vma_read,
> - .try_lock = true,
> + .try_lock = rw_try_lock ? true : false,
> .invalid_vma = invalid_folio_referenced_vma,
> };
>
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 4f9c854..0296d48 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -136,6 +136,9 @@ struct scan_control {
> /* Always discard instead of demoting to lower tier memory */
> unsigned int no_demotion:1;
>
> + /* if try_lock in rmap_walk */
> + unsigned int rw_try_lock:1;
> +
> /* Allocation order */
> s8 order;
>
> @@ -827,7 +830,7 @@ static enum folio_references folio_check_references(struct folio *folio,
> unsigned long vm_flags;
>
> referenced_ptes = folio_referenced(folio, 1, sc->target_mem_cgroup,
> - &vm_flags);
> + &vm_flags, sc->rw_try_lock);
> referenced_folio = folio_test_clear_referenced(folio);
>
> /*
> @@ -1501,6 +1504,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
> struct scan_control sc = {
> .gfp_mask = GFP_KERNEL,
> .may_unmap = 1,
> + .rw_try_lock = 1,
> };
> struct reclaim_stat stat;
> unsigned int nr_reclaimed;
> @@ -2038,7 +2042,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
>
> /* Referenced or rmap lock contention: rotate */
> if (folio_referenced(folio, 0, sc->target_mem_cgroup,
> - &vm_flags) != 0) {
> + &vm_flags, sc->rw_try_lock) != 0) {
> /*
> * Identify referenced, file-backed active folios and
> * give them one more trip around the active list. So
> @@ -2096,6 +2100,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
> .may_unmap = 1,
> .may_swap = 1,
> .no_demotion = 1,
> + .rw_try_lock = 1,
> };
>
> nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, false);
> @@ -5442,6 +5447,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
> .may_swap = true,
> .reclaim_idx = MAX_NR_ZONES - 1,
> .gfp_mask = GFP_KERNEL,
> + .rw_try_lock = 1,
> };
>
> buf = kvmalloc(len + 1, GFP_KERNEL);
> @@ -6414,6 +6420,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
> .may_writepage = !laptop_mode,
> .may_unmap = 1,
> .may_swap = 1,
> + .rw_try_lock = 1,
> };
>
> /*
> @@ -6459,6 +6466,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
> .may_unmap = 1,
> .reclaim_idx = MAX_NR_ZONES - 1,
> .may_swap = !noswap,
> + .rw_try_lock = 1,
> };
>
> WARN_ON_ONCE(!current->reclaim_state);
> @@ -6503,6 +6511,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
> .may_unmap = 1,
> .may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
> .proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
> + .rw_try_lock = 1,
> };
> /*
> * Traverse the ZONELIST_FALLBACK zonelist of the current node to put
> @@ -6764,6 +6773,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
> .gfp_mask = GFP_KERNEL,
> .order = order,
> .may_unmap = 1,
> + .rw_try_lock = 1,
> };
>
> set_task_reclaim_state(current, &sc.reclaim_state);
> @@ -7223,6 +7233,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
> .may_unmap = 1,
> .may_swap = 1,
> .hibernation_mode = 1,
> + .rw_try_lock = 1,
> };
> struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
> unsigned long nr_reclaimed;
> @@ -7381,6 +7392,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
> .may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
> .may_swap = 1,
> .reclaim_idx = gfp_zone(gfp_mask),
> + .rw_try_lock = 1,
> };
> unsigned long pflags;
>

2024-02-20 03:02:12

by Barry Song

[permalink] [raw]
Subject: Re: [PATCH 1/2] mm/rmap: support folio_referenced to control if try_lock in rmap_walk

Hi peifeng,

On Tue, Feb 20, 2024 at 2:43 PM 李培锋 <[email protected]> wrote:
>
> add more experts from Linux and Google.
>
>
> 在 2024/2/19 22:17, [email protected] 写道:
> > From: lipeifeng <[email protected]>
> >
> > The patch to support folio_referenced to control the bevavior
> > of walk_rmap, which for some thread to hold the lock in rmap_walk
> > instead of try_lock when using folio_referenced.

please describe what problem the patch is trying to address,
and why this modification is needed in commit message.

btw, who is set rw_try_lock to 0, what is the benefit?

> >
> > Signed-off-by: lipeifeng <[email protected]>
> > ---
> > include/linux/rmap.h | 5 +++--
> > mm/rmap.c | 5 +++--
> > mm/vmscan.c | 16 ++++++++++++++--
> > 3 files changed, 20 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> > index b7944a8..846b261 100644
> > --- a/include/linux/rmap.h
> > +++ b/include/linux/rmap.h
> > @@ -623,7 +623,8 @@ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio,
> > * Called from mm/vmscan.c to handle paging out
> > */
> > int folio_referenced(struct folio *, int is_locked,
> > - struct mem_cgroup *memcg, unsigned long *vm_flags);
> > + struct mem_cgroup *memcg, unsigned long *vm_flags,
> > + unsigned int rw_try_lock);
> >
> > void try_to_migrate(struct folio *folio, enum ttu_flags flags);
> > void try_to_unmap(struct folio *, enum ttu_flags flags);
> > @@ -739,7 +740,7 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
> >
> > static inline int folio_referenced(struct folio *folio, int is_locked,
> > struct mem_cgroup *memcg,
> > - unsigned long *vm_flags)
> > + unsigned long *vm_flags, unsigned int rw_try_lock)
> > {
> > *vm_flags = 0;
> > return 0;
> > diff --git a/mm/rmap.c b/mm/rmap.c
> > index f5d43ed..15d1fba 100644
> > --- a/mm/rmap.c
> > +++ b/mm/rmap.c
> > @@ -952,6 +952,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
> > * @is_locked: Caller holds lock on the folio.
> > * @memcg: target memory cgroup
> > * @vm_flags: A combination of all the vma->vm_flags which referenced the folio.
> > + * @rw_try_lock: if try_lock in rmap_walk
> > *
> > * Quick test_and_clear_referenced for all mappings of a folio,
> > *
> > @@ -959,7 +960,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
> > * the function bailed out due to rmap lock contention.
> > */
> > int folio_referenced(struct folio *folio, int is_locked,
> > - struct mem_cgroup *memcg, unsigned long *vm_flags)
> > + struct mem_cgroup *memcg, unsigned long *vm_flags, unsigned int rw_try_lock)
> > {
> > int we_locked = 0;
> > struct folio_referenced_arg pra = {
> > @@ -970,7 +971,7 @@ int folio_referenced(struct folio *folio, int is_locked,
> > .rmap_one = folio_referenced_one,
> > .arg = (void *)&pra,
> > .anon_lock = folio_lock_anon_vma_read,
> > - .try_lock = true,
> > + .try_lock = rw_try_lock ? true : false,
> > .invalid_vma = invalid_folio_referenced_vma,
> > };
> >
> > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > index 4f9c854..0296d48 100644
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
> > @@ -136,6 +136,9 @@ struct scan_control {
> > /* Always discard instead of demoting to lower tier memory */
> > unsigned int no_demotion:1;
> >
> > + /* if try_lock in rmap_walk */
> > + unsigned int rw_try_lock:1;
> > +
> > /* Allocation order */
> > s8 order;
> >
> > @@ -827,7 +830,7 @@ static enum folio_references folio_check_references(struct folio *folio,
> > unsigned long vm_flags;
> >
> > referenced_ptes = folio_referenced(folio, 1, sc->target_mem_cgroup,
> > - &vm_flags);
> > + &vm_flags, sc->rw_try_lock);
> > referenced_folio = folio_test_clear_referenced(folio);
> >
> > /*
> > @@ -1501,6 +1504,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
> > struct scan_control sc = {
> > .gfp_mask = GFP_KERNEL,
> > .may_unmap = 1,
> > + .rw_try_lock = 1,
> > };
> > struct reclaim_stat stat;
> > unsigned int nr_reclaimed;
> > @@ -2038,7 +2042,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
> >
> > /* Referenced or rmap lock contention: rotate */
> > if (folio_referenced(folio, 0, sc->target_mem_cgroup,
> > - &vm_flags) != 0) {
> > + &vm_flags, sc->rw_try_lock) != 0) {
> > /*
> > * Identify referenced, file-backed active folios and
> > * give them one more trip around the active list So
> > @@ -2096,6 +2100,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
> > .may_unmap = 1,
> > .may_swap = 1,
> > .no_demotion = 1,
> > + .rw_try_lock = 1,
> > };
> >
> > nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, false);
> > @@ -5442,6 +5447,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
> > .may_swap = true,
> > .reclaim_idx = MAX_NR_ZONES - 1,
> > .gfp_mask = GFP_KERNEL,
> > + .rw_try_lock = 1,
> > };
> >
> > buf = kvmalloc(len + 1, GFP_KERNEL);
> > @@ -6414,6 +6420,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
> > .may_writepage = !laptop_mode,
> > .may_unmap = 1,
> > .may_swap = 1,
> > + .rw_try_lock = 1,
> > };
> >
> > /*
> > @@ -6459,6 +6466,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
> > .may_unmap = 1,
> > .reclaim_idx = MAX_NR_ZONES - 1,
> > .may_swap = !noswap,
> > + .rw_try_lock = 1,
> > };
> >
> > WARN_ON_ONCE(!current->reclaim_state);
> > @@ -6503,6 +6511,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
> > .may_unmap = 1,
> > .may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
> > .proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
> > + .rw_try_lock = 1,
> > };
> > /*
> > * Traverse the ZONELIST_FALLBACK zonelist of the current node to put
> > @@ -6764,6 +6773,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
> > .gfp_mask = GFP_KERNEL,
> > .order = order,
> > .may_unmap = 1,
> > + .rw_try_lock = 1,
> > };
> >
> > set_task_reclaim_state(current, &sc.reclaim_state);
> > @@ -7223,6 +7233,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
> > .may_unmap = 1,
> > .may_swap = 1,
> > .hibernation_mode = 1,
> > + .rw_try_lock = 1,
> > };
> > struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
> > unsigned long nr_reclaimed;
> > @@ -7381,6 +7392,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
> > .may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
> > .may_swap = 1,
> > .reclaim_idx = gfp_zone(gfp_mask),
> > + .rw_try_lock = 1,
> > };
> > unsigned long pflags;
> >
Thanks
Barry

2024-02-20 04:00:24

by 李培锋

[permalink] [raw]
Subject: Re: [PATCH 1/2] mm/rmap: support folio_referenced to control if try_lock in rmap_walk


在 2024/2/20 11:01, Barry Song 写道:
> Hi peifeng,
>
> On Tue, Feb 20, 2024 at 2:43 PM 李培锋 <[email protected]> wrote:
>> add more experts from Linux and Google.
>>
>>
>> 在 2024/2/19 22:17, [email protected] 写道:
>>> From: lipeifeng <[email protected]>
>>>
>>> The patch to support folio_referenced to control the bevavior
>>> of walk_rmap, which for some thread to hold the lock in rmap_walk
>>> instead of try_lock when using folio_referenced.
> please describe what problem the patch is trying to address,
> and why this modification is needed in commit message.

Hi Barry:

1. the patch is one of the kshrinkd series patches.

2. it is to support folio_referenced to control the bevavior of walk_rmap,

kshrinkd would call folio_referenced through shrink_folio_list but it
doesn't

want to try_lock in rmap_walk during folio_referenced.


> btw, who is set rw_try_lock to 0, what is the benefit?

Actually, the current situation is that only shrink_folio_list will set
try_lock to 1,

while others will be set to 0 that it would wait for rwsem-lock if
contened in rmap_walk.


>
>>> Signed-off-by: lipeifeng <[email protected]>
>>> ---
>>> include/linux/rmap.h | 5 +++--
>>> mm/rmap.c | 5 +++--
>>> mm/vmscan.c | 16 ++++++++++++++--
>>> 3 files changed, 20 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
>>> index b7944a8..846b261 100644
>>> --- a/include/linux/rmap.h
>>> +++ b/include/linux/rmap.h
>>> @@ -623,7 +623,8 @@ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio,
>>> * Called from mm/vmscan.c to handle paging out
>>> */
>>> int folio_referenced(struct folio *, int is_locked,
>>> - struct mem_cgroup *memcg, unsigned long *vm_flags);
>>> + struct mem_cgroup *memcg, unsigned long *vm_flags,
>>> + unsigned int rw_try_lock);
>>>
>>> void try_to_migrate(struct folio *folio, enum ttu_flags flags);
>>> void try_to_unmap(struct folio *, enum ttu_flags flags);
>>> @@ -739,7 +740,7 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
>>>
>>> static inline int folio_referenced(struct folio *folio, int is_locked,
>>> struct mem_cgroup *memcg,
>>> - unsigned long *vm_flags)
>>> + unsigned long *vm_flags, unsigned int rw_try_lock)
>>> {
>>> *vm_flags = 0;
>>> return 0;
>>> diff --git a/mm/rmap.c b/mm/rmap.c
>>> index f5d43ed..15d1fba 100644
>>> --- a/mm/rmap.c
>>> +++ b/mm/rmap.c
>>> @@ -952,6 +952,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
>>> * @is_locked: Caller holds lock on the folio.
>>> * @memcg: target memory cgroup
>>> * @vm_flags: A combination of all the vma->vm_flags which referenced the folio.
>>> + * @rw_try_lock: if try_lock in rmap_walk
>>> *
>>> * Quick test_and_clear_referenced for all mappings of a folio,
>>> *
>>> @@ -959,7 +960,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
>>> * the function bailed out due to rmap lock contention.
>>> */
>>> int folio_referenced(struct folio *folio, int is_locked,
>>> - struct mem_cgroup *memcg, unsigned long *vm_flags)
>>> + struct mem_cgroup *memcg, unsigned long *vm_flags, unsigned int rw_try_lock)
>>> {
>>> int we_locked = 0;
>>> struct folio_referenced_arg pra = {
>>> @@ -970,7 +971,7 @@ int folio_referenced(struct folio *folio, int is_locked,
>>> .rmap_one = folio_referenced_one,
>>> .arg = (void *)&pra,
>>> .anon_lock = folio_lock_anon_vma_read,
>>> - .try_lock = true,
>>> + .try_lock = rw_try_lock ? true : false,
>>> .invalid_vma = invalid_folio_referenced_vma,
>>> };
>>>
>>> diff --git a/mm/vmscan.c b/mm/vmscan.c
>>> index 4f9c854..0296d48 100644
>>> --- a/mm/vmscan.c
>>> +++ b/mm/vmscan.c
>>> @@ -136,6 +136,9 @@ struct scan_control {
>>> /* Always discard instead of demoting to lower tier memory */
>>> unsigned int no_demotion:1;
>>>
>>> + /* if try_lock in rmap_walk */
>>> + unsigned int rw_try_lock:1;
>>> +
>>> /* Allocation order */
>>> s8 order;
>>>
>>> @@ -827,7 +830,7 @@ static enum folio_references folio_check_references(struct folio *folio,
>>> unsigned long vm_flags;
>>>
>>> referenced_ptes = folio_referenced(folio, 1, sc->target_mem_cgroup,
>>> - &vm_flags);
>>> + &vm_flags, sc->rw_try_lock);
>>> referenced_folio = folio_test_clear_referenced(folio);
>>>
>>> /*
>>> @@ -1501,6 +1504,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
>>> struct scan_control sc = {
>>> .gfp_mask = GFP_KERNEL,
>>> .may_unmap = 1,
>>> + .rw_try_lock = 1,
>>> };
>>> struct reclaim_stat stat;
>>> unsigned int nr_reclaimed;
>>> @@ -2038,7 +2042,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
>>>
>>> /* Referenced or rmap lock contention: rotate */
>>> if (folio_referenced(folio, 0, sc->target_mem_cgroup,
>>> - &vm_flags) != 0) {
>>> + &vm_flags, sc->rw_try_lock) != 0) {
>>> /*
>>> * Identify referenced, file-backed active folios and
>>> * give them one more trip around the active list. So
>>> @@ -2096,6 +2100,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
>>> .may_unmap = 1,
>>> .may_swap = 1,
>>> .no_demotion = 1,
>>> + .rw_try_lock = 1,
>>> };
>>>
>>> nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, false);
>>> @@ -5442,6 +5447,7 @@ static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
>>> .may_swap = true,
>>> .reclaim_idx = MAX_NR_ZONES - 1,
>>> .gfp_mask = GFP_KERNEL,
>>> + .rw_try_lock = 1,
>>> };
>>>
>>> buf = kvmalloc(len + 1, GFP_KERNEL);
>>> @@ -6414,6 +6420,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
>>> .may_writepage = !laptop_mode,
>>> .may_unmap = 1,
>>> .may_swap = 1,
>>> + .rw_try_lock = 1,
>>> };
>>>
>>> /*
>>> @@ -6459,6 +6466,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
>>> .may_unmap = 1,
>>> .reclaim_idx = MAX_NR_ZONES - 1,
>>> .may_swap = !noswap,
>>> + .rw_try_lock = 1,
>>> };
>>>
>>> WARN_ON_ONCE(!current->reclaim_state);
>>> @@ -6503,6 +6511,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
>>> .may_unmap = 1,
>>> .may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
>>> .proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
>>> + .rw_try_lock = 1,
>>> };
>>> /*
>>> * Traverse the ZONELIST_FALLBACK zonelist of the current node to put
>>> @@ -6764,6 +6773,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
>>> .gfp_mask = GFP_KERNEL,
>>> .order = order,
>>> .may_unmap = 1,
>>> + .rw_try_lock = 1,
>>> };
>>>
>>> set_task_reclaim_state(current, &sc.reclaim_state);
>>> @@ -7223,6 +7233,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
>>> .may_unmap = 1,
>>> .may_swap = 1,
>>> .hibernation_mode = 1,
>>> + .rw_try_lock = 1,
>>> };
>>> struct zonelist *zonelist = node_zonelist(numa_node_id(), sc.gfp_mask);
>>> unsigned long nr_reclaimed;
>>> @@ -7381,6 +7392,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
>>> .may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
>>> .may_swap = 1,
>>> .reclaim_idx = gfp_zone(gfp_mask),
>>> + .rw_try_lock = 1,
>>> };
>>> unsigned long pflags;
>>>
> Thanks
> Barry

2024-02-20 07:17:02

by Barry Song

[permalink] [raw]
Subject: Re: [PATCH 1/2] mm/rmap: support folio_referenced to control if try_lock in rmap_walk

On Tue, Feb 20, 2024 at 5:00 PM 李培锋 <[email protected]> wrote:
>
>
> 在 2024/2/20 11:01, Barry Song 写道:
> > Hi peifeng,
> >
> > On Tue, Feb 20, 2024 at 2:43 PM 李培锋 <[email protected]> wrote:
> >> add more experts from Linux and Google.
> >>
> >>
> >> 在 2024/2/19 22:17, [email protected] 写道:
> >>> From: lipeifeng <[email protected]>
> >>>
> >>> The patch to support folio_referenced to control the bevavior
> >>> of walk_rmap, which for some thread to hold the lock in rmap_walk
> >>> instead of try_lock when using folio_referenced.
> > please describe what problem the patch is trying to address,
> > and why this modification is needed in commit message.
>
> Hi Barry:
>
> 1. the patch is one of the kshrinkd series patches.

this seems like a bad name for the patchset as nobody knows
what is kshrinkd. maybe something like "asynchronously
reclaim contended folios rather than aging them"?

>
> 2. it is to support folio_referenced to control the bevavior of walk_rmap,
>
> kshrinkd would call folio_referenced through shrink_folio_list but it
> doesn't
>
> want to try_lock in rmap_walk during folio_referenced.
>
>
> > btw, who is set rw_try_lock to 0, what is the benefit?
>
> Actually, the current situation is that only shrink_folio_list will set
> try_lock to 1,

understood, as you don't want contended folios to be skipped
by scanner any more.

>
> while others will be set to 0 that it would wait for rwsem-lock if
> contened in rmap_walk.

ok. other reclamation threads will still skip contended folios.

As discussed, the patchset really needs detailed data to back up.

Thanks
Barry