2023-12-20 04:01:12

by Kinsey Ho

[permalink] [raw]
Subject: [PATCH mm-unstable v3 0/4] mm/mglru: Kconfig cleanup

This series is the result of the following discussion:
https://lore.kernel.org/[email protected]/

It mainly avoids building the code that walks page tables on CPUs that
use it, i.e., those don't support hardware accessed bit. Specifically,
it introduces a new Kconfig to guard some of functions added by
commit bd74fdaea146 ("mm: multi-gen LRU: support page table walks")
on CPUs like POWER9, on which the series was tested.


Kinsey Ho (4):
mm/mglru: add CONFIG_ARCH_HAS_HW_PTE_YOUNG
mm/mglru: add CONFIG_LRU_GEN_WALKS_MMU
mm/mglru: remove CONFIG_MEMCG
mm/mglru: remove CONFIG_TRANSPARENT_HUGEPAGE

arch/Kconfig | 8 +
arch/arm64/Kconfig | 1 +
arch/x86/Kconfig | 1 +
arch/x86/include/asm/pgtable.h | 6 -
include/linux/memcontrol.h | 2 +-
include/linux/mm_types.h | 16 +-
include/linux/mmzone.h | 28 +---
include/linux/pgtable.h | 2 +-
kernel/fork.c | 2 +-
mm/Kconfig | 4 +
mm/vmscan.c | 271 ++++++++++++++++++---------------
11 files changed, 174 insertions(+), 167 deletions(-)

--
2.43.0.472.g3155946c3a-goog



2023-12-20 04:01:20

by Kinsey Ho

[permalink] [raw]
Subject: [PATCH mm-unstable v1 1/4] mm/mglru: add CONFIG_ARCH_HAS_HW_PTE_YOUNG

Some architectures are able to set the accessed bit in PTEs when PTEs
are used as part of linear address translations.

Add CONFIG_ARCH_HAS_HW_PTE_YOUNG for such architectures to be able to
override arch_has_hw_pte_young().

Signed-off-by: Kinsey Ho <[email protected]>
Co-developed-by: Aneesh Kumar K V <[email protected]>
Signed-off-by: Aneesh Kumar K V <[email protected]>
---
arch/Kconfig | 8 ++++++++
arch/arm64/Kconfig | 1 +
arch/x86/Kconfig | 1 +
arch/x86/include/asm/pgtable.h | 6 ------
include/linux/pgtable.h | 2 +-
5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index f4b210ab0612..8c8901f80586 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1470,6 +1470,14 @@ config DYNAMIC_SIGFRAME
config HAVE_ARCH_NODE_DEV_GROUP
bool

+config ARCH_HAS_HW_PTE_YOUNG
+ bool
+ help
+ Architectures that select this option are capable of setting the
+ accessed bit in PTE entries when using them as part of linear address
+ translations. Architectures that require runtime check should select
+ this option and override arch_has_hw_pte_young().
+
config ARCH_HAS_NONLEAF_PMD_YOUNG
bool
help
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7b071a00425d..12d611f3da5d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -36,6 +36,7 @@ config ARM64
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_DEVMAP
select ARCH_HAS_PTE_SPECIAL
+ select ARCH_HAS_HW_PTE_YOUNG
select ARCH_HAS_SETUP_DMA_OPS
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_SET_MEMORY
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1566748f16c4..04941a1ffc0a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -88,6 +88,7 @@ config X86
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_PTE_DEVMAP if X86_64
select ARCH_HAS_PTE_SPECIAL
+ select ARCH_HAS_HW_PTE_YOUNG
select ARCH_HAS_NONLEAF_PMD_YOUNG if PGTABLE_LEVELS > 2
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_COPY_MC if X86_64
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 57bab91bbf50..08b5cb22d9a6 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1679,12 +1679,6 @@ static inline bool arch_has_pfn_modify_check(void)
return boot_cpu_has_bug(X86_BUG_L1TF);
}

-#define arch_has_hw_pte_young arch_has_hw_pte_young
-static inline bool arch_has_hw_pte_young(void)
-{
- return true;
-}
-
#define arch_check_zapped_pte arch_check_zapped_pte
void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte);

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index af7639c3b0a3..9ecc20fa6269 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -375,7 +375,7 @@ static inline bool arch_has_hw_nonleaf_pmd_young(void)
*/
static inline bool arch_has_hw_pte_young(void)
{
- return false;
+ return IS_ENABLED(CONFIG_ARCH_HAS_HW_PTE_YOUNG);
}
#endif

--
2.43.0.472.g3155946c3a-goog


2023-12-20 04:01:40

by Kinsey Ho

[permalink] [raw]
Subject: [PATCH mm-unstable v1 3/4] mm/mglru: remove CONFIG_MEMCG

Remove CONFIG_MEMCG in a refactoring to improve code readability at
the cost of a few bytes in struct lru_gen_folio per node when
CONFIG_MEMCG=n.

Signed-off-by: Kinsey Ho <[email protected]>
Co-developed-by: Aneesh Kumar K V <[email protected]>
Signed-off-by: Aneesh Kumar K V <[email protected]>
---
include/linux/mm_types.h | 4 ---
include/linux/mmzone.h | 26 ++--------------
mm/vmscan.c | 67 +++++++++++++---------------------------
3 files changed, 23 insertions(+), 74 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 552fa2d11c57..55b7121809ff 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1017,9 +1017,7 @@ struct lru_gen_mm_list {

void lru_gen_add_mm(struct mm_struct *mm);
void lru_gen_del_mm(struct mm_struct *mm);
-#ifdef CONFIG_MEMCG
void lru_gen_migrate_mm(struct mm_struct *mm);
-#endif

static inline void lru_gen_init_mm(struct mm_struct *mm)
{
@@ -1050,11 +1048,9 @@ static inline void lru_gen_del_mm(struct mm_struct *mm)
{
}

-#ifdef CONFIG_MEMCG
static inline void lru_gen_migrate_mm(struct mm_struct *mm)
{
}
-#endif

static inline void lru_gen_init_mm(struct mm_struct *mm)
{
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bc3f63ec4291..28665e1b8475 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -440,14 +440,12 @@ struct lru_gen_folio {
atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
/* whether the multi-gen LRU is enabled */
bool enabled;
-#ifdef CONFIG_MEMCG
/* the memcg generation this lru_gen_folio belongs to */
u8 gen;
/* the list segment this lru_gen_folio belongs to */
u8 seg;
/* per-node lru_gen_folio list for global reclaim */
struct hlist_nulls_node list;
-#endif
};

enum {
@@ -493,11 +491,6 @@ struct lru_gen_mm_walk {
bool force_scan;
};

-void lru_gen_init_lruvec(struct lruvec *lruvec);
-void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
-
-#ifdef CONFIG_MEMCG
-
/*
* For each node, memcgs are divided into two generations: the old and the
* young. For each generation, memcgs are randomly sharded into multiple bins
@@ -555,6 +548,8 @@ struct lru_gen_memcg {
};

void lru_gen_init_pgdat(struct pglist_data *pgdat);
+void lru_gen_init_lruvec(struct lruvec *lruvec);
+void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);

void lru_gen_init_memcg(struct mem_cgroup *memcg);
void lru_gen_exit_memcg(struct mem_cgroup *memcg);
@@ -563,19 +558,6 @@ void lru_gen_offline_memcg(struct mem_cgroup *memcg);
void lru_gen_release_memcg(struct mem_cgroup *memcg);
void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid);

-#else /* !CONFIG_MEMCG */
-
-#define MEMCG_NR_GENS 1
-
-struct lru_gen_memcg {
-};
-
-static inline void lru_gen_init_pgdat(struct pglist_data *pgdat)
-{
-}
-
-#endif /* CONFIG_MEMCG */
-
#else /* !CONFIG_LRU_GEN */

static inline void lru_gen_init_pgdat(struct pglist_data *pgdat)
@@ -590,8 +572,6 @@ static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
{
}

-#ifdef CONFIG_MEMCG
-
static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
{
}
@@ -616,8 +596,6 @@ static inline void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid)
{
}

-#endif /* CONFIG_MEMCG */
-
#endif /* CONFIG_LRU_GEN */

struct lruvec {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index aa7ea09ffb4c..351a0b5043c0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4097,13 +4097,6 @@ enum {
MEMCG_LRU_YOUNG,
};

-#ifdef CONFIG_MEMCG
-
-static int lru_gen_memcg_seg(struct lruvec *lruvec)
-{
- return READ_ONCE(lruvec->lrugen.seg);
-}
-
static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
{
int seg;
@@ -4150,6 +4143,8 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
spin_unlock_irqrestore(&pgdat->memcg_lru.lock, flags);
}

+#ifdef CONFIG_MEMCG
+
void lru_gen_online_memcg(struct mem_cgroup *memcg)
{
int gen;
@@ -4217,18 +4212,11 @@ void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid)
struct lruvec *lruvec = get_lruvec(memcg, nid);

/* see the comment on MEMCG_NR_GENS */
- if (lru_gen_memcg_seg(lruvec) != MEMCG_LRU_HEAD)
+ if (READ_ONCE(lruvec->lrugen.seg) != MEMCG_LRU_HEAD)
lru_gen_rotate_memcg(lruvec, MEMCG_LRU_HEAD);
}

-#else /* !CONFIG_MEMCG */
-
-static int lru_gen_memcg_seg(struct lruvec *lruvec)
-{
- return 0;
-}
-
-#endif
+#endif /* CONFIG_MEMCG */

/******************************************************************************
* the eviction
@@ -4776,7 +4764,7 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)

if (mem_cgroup_below_low(NULL, memcg)) {
/* see the comment on MEMCG_NR_GENS */
- if (lru_gen_memcg_seg(lruvec) != MEMCG_LRU_TAIL)
+ if (READ_ONCE(lruvec->lrugen.seg) != MEMCG_LRU_TAIL)
return MEMCG_LRU_TAIL;

memcg_memory_event(memcg, MEMCG_LOW);
@@ -4799,12 +4787,10 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
return 0;

/* one retry if offlined or too small */
- return lru_gen_memcg_seg(lruvec) != MEMCG_LRU_TAIL ?
+ return READ_ONCE(lruvec->lrugen.seg) != MEMCG_LRU_TAIL ?
MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG;
}

-#ifdef CONFIG_MEMCG
-
static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
{
int op;
@@ -4896,20 +4882,6 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
blk_finish_plug(&plug);
}

-#else /* !CONFIG_MEMCG */
-
-static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
-{
- BUILD_BUG();
-}
-
-static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
-{
- BUILD_BUG();
-}
-
-#endif
-
static void set_initial_priority(struct pglist_data *pgdat, struct scan_control *sc)
{
int priority;
@@ -5560,6 +5532,18 @@ static const struct file_operations lru_gen_ro_fops = {
* initialization
******************************************************************************/

+void lru_gen_init_pgdat(struct pglist_data *pgdat)
+{
+ int i, j;
+
+ spin_lock_init(&pgdat->memcg_lru.lock);
+
+ for (i = 0; i < MEMCG_NR_GENS; i++) {
+ for (j = 0; j < MEMCG_NR_BINS; j++)
+ INIT_HLIST_NULLS_HEAD(&pgdat->memcg_lru.fifo[i][j], i);
+ }
+}
+
void lru_gen_init_lruvec(struct lruvec *lruvec)
{
int i;
@@ -5582,18 +5566,6 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)

#ifdef CONFIG_MEMCG

-void lru_gen_init_pgdat(struct pglist_data *pgdat)
-{
- int i, j;
-
- spin_lock_init(&pgdat->memcg_lru.lock);
-
- for (i = 0; i < MEMCG_NR_GENS; i++) {
- for (j = 0; j < MEMCG_NR_BINS; j++)
- INIT_HLIST_NULLS_HEAD(&pgdat->memcg_lru.fifo[i][j], i);
- }
-}
-
void lru_gen_init_memcg(struct mem_cgroup *memcg)
{
struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
@@ -5653,14 +5625,17 @@ late_initcall(init_lru_gen);

static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
{
+ BUILD_BUG();
}

static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
{
+ BUILD_BUG();
}

static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc)
{
+ BUILD_BUG();
}

#endif /* CONFIG_LRU_GEN */
--
2.43.0.472.g3155946c3a-goog


2023-12-20 04:01:55

by Kinsey Ho

[permalink] [raw]
Subject: [PATCH mm-unstable v1 4/4] mm/mglru: remove CONFIG_TRANSPARENT_HUGEPAGE

Improve code readability by removing CONFIG_TRANSPARENT_HUGEPAGE,
since the compiler should be able to automatically optimize out the
code that promotes THPs during page table walks.

No functional changes.

Signed-off-by: Kinsey Ho <[email protected]>
Co-developed-by: Aneesh Kumar K V <[email protected]>
Signed-off-by: Aneesh Kumar K V <[email protected]>
---
mm/vmscan.c | 12 +-----------
1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 351a0b5043c0..ceba905e5630 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3273,7 +3273,6 @@ static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned
return pfn;
}

-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr)
{
unsigned long pfn = pmd_pfn(pmd);
@@ -3291,7 +3290,6 @@ static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned

return pfn;
}
-#endif

static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
struct pglist_data *pgdat, bool can_swap)
@@ -3394,7 +3392,6 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
return suitable_to_scan(total, young);
}

-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area_struct *vma,
struct mm_walk *args, unsigned long *bitmap, unsigned long *first)
{
@@ -3472,12 +3469,6 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area
done:
*first = -1;
}
-#else
-static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area_struct *vma,
- struct mm_walk *args, unsigned long *bitmap, unsigned long *first)
-{
-}
-#endif

static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
struct mm_walk *args)
@@ -3513,7 +3504,6 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
continue;
}

-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (pmd_trans_huge(val)) {
unsigned long pfn = pmd_pfn(val);
struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
@@ -3532,7 +3522,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
continue;
}
-#endif
+
walk->mm_stats[MM_NONLEAF_TOTAL]++;

if (should_clear_pmd_young()) {
--
2.43.0.472.g3155946c3a-goog


2023-12-20 04:01:55

by Kinsey Ho

[permalink] [raw]
Subject: [PATCH mm-unstable v1 2/4] mm/mglru: add CONFIG_LRU_GEN_WALKS_MMU

Add CONFIG_LRU_GEN_WALKS_MMU such that if disabled, the code that
walks page tables to promote pages into the youngest generation will
not be built.

Also improves code readability by adding two helper functions
get_mm_state() and get_next_mm().

Signed-off-by: Kinsey Ho <[email protected]>
Co-developed-by: Aneesh Kumar K V <[email protected]>
Signed-off-by: Aneesh Kumar K V <[email protected]>
---
include/linux/memcontrol.h | 2 +-
include/linux/mm_types.h | 12 ++-
include/linux/mmzone.h | 2 +
kernel/fork.c | 2 +-
mm/Kconfig | 4 +
mm/vmscan.c | 192 ++++++++++++++++++++++++-------------
6 files changed, 139 insertions(+), 75 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5de775e6cdd9..20ff87f8e001 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -330,7 +330,7 @@ struct mem_cgroup {
struct deferred_split deferred_split_queue;
#endif

-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_GEN_WALKS_MMU
/* per-memcg mm_struct list */
struct lru_gen_mm_list mm_list;
#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index a66534c78c4d..552fa2d11c57 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -958,7 +958,7 @@ struct mm_struct {
*/
unsigned long ksm_zero_pages;
#endif /* CONFIG_KSM */
-#ifdef CONFIG_LRU_GEN
+#ifdef CONFIG_LRU_GEN_WALKS_MMU
struct {
/* this mm_struct is on lru_gen_mm_list */
struct list_head list;
@@ -973,7 +973,7 @@ struct mm_struct {
struct mem_cgroup *memcg;
#endif
} lru_gen;
-#endif /* CONFIG_LRU_GEN */
+#endif /* CONFIG_LRU_GEN_WALKS_MMU */
} __randomize_layout;

/*
@@ -1011,6 +1011,10 @@ struct lru_gen_mm_list {
spinlock_t lock;
};

+#endif /* CONFIG_LRU_GEN */
+
+#ifdef CONFIG_LRU_GEN_WALKS_MMU
+
void lru_gen_add_mm(struct mm_struct *mm);
void lru_gen_del_mm(struct mm_struct *mm);
#ifdef CONFIG_MEMCG
@@ -1036,7 +1040,7 @@ static inline void lru_gen_use_mm(struct mm_struct *mm)
WRITE_ONCE(mm->lru_gen.bitmap, -1);
}

-#else /* !CONFIG_LRU_GEN */
+#else /* !CONFIG_LRU_GEN_WALKS_MMU */

static inline void lru_gen_add_mm(struct mm_struct *mm)
{
@@ -1060,7 +1064,7 @@ static inline void lru_gen_use_mm(struct mm_struct *mm)
{
}

-#endif /* CONFIG_LRU_GEN */
+#endif /* CONFIG_LRU_GEN_WALKS_MMU */

struct vma_iterator {
struct ma_state mas;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2efd3be484fd..bc3f63ec4291 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -640,9 +640,11 @@ struct lruvec {
#ifdef CONFIG_LRU_GEN
/* evictable pages divided into generations */
struct lru_gen_folio lrugen;
+#ifdef CONFIG_LRU_GEN_WALKS_MMU
/* to concurrently iterate lru_gen_mm_list */
struct lru_gen_mm_state mm_state;
#endif
+#endif /* CONFIG_LRU_GEN */
#ifdef CONFIG_MEMCG
struct pglist_data *pgdat;
#endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 93924392a5c3..56cf276432c8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2946,7 +2946,7 @@ pid_t kernel_clone(struct kernel_clone_args *args)
get_task_struct(p);
}

- if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) {
+ if (IS_ENABLED(CONFIG_LRU_GEN_WALKS_MMU) && !(clone_flags & CLONE_VM)) {
/* lock the task to synchronize with memcg migration */
task_lock(p);
lru_gen_add_mm(p->mm);
diff --git a/mm/Kconfig b/mm/Kconfig
index 8f8b02e9c136..c98076dec5fb 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1274,6 +1274,10 @@ config LRU_GEN_STATS
from evicted generations for debugging purpose.

This option has a per-memcg and per-node memory overhead.
+
+config LRU_GEN_WALKS_MMU
+ def_bool y
+ depends on LRU_GEN && ARCH_HAS_HW_PTE_YOUNG
# }

config ARCH_SUPPORTS_PER_VMA_LOCK
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b4ca3563bcf4..aa7ea09ffb4c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2671,13 +2671,14 @@ static void get_item_key(void *item, int *key)
key[1] = hash >> BLOOM_FILTER_SHIFT;
}

-static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
+static bool test_bloom_filter(struct lru_gen_mm_state *mm_state, unsigned long seq,
+ void *item)
{
int key[2];
unsigned long *filter;
int gen = filter_gen_from_seq(seq);

- filter = READ_ONCE(lruvec->mm_state.filters[gen]);
+ filter = READ_ONCE(mm_state->filters[gen]);
if (!filter)
return true;

@@ -2686,13 +2687,14 @@ static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *it
return test_bit(key[0], filter) && test_bit(key[1], filter);
}

-static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
+static void update_bloom_filter(struct lru_gen_mm_state *mm_state, unsigned long seq,
+ void *item)
{
int key[2];
unsigned long *filter;
int gen = filter_gen_from_seq(seq);

- filter = READ_ONCE(lruvec->mm_state.filters[gen]);
+ filter = READ_ONCE(mm_state->filters[gen]);
if (!filter)
return;

@@ -2704,12 +2706,12 @@ static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *
set_bit(key[1], filter);
}

-static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq)
+static void reset_bloom_filter(struct lru_gen_mm_state *mm_state, unsigned long seq)
{
unsigned long *filter;
int gen = filter_gen_from_seq(seq);

- filter = lruvec->mm_state.filters[gen];
+ filter = mm_state->filters[gen];
if (filter) {
bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT));
return;
@@ -2717,13 +2719,15 @@ static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq)

filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT),
__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
- WRITE_ONCE(lruvec->mm_state.filters[gen], filter);
+ WRITE_ONCE(mm_state->filters[gen], filter);
}

/******************************************************************************
* mm_struct list
******************************************************************************/

+#ifdef CONFIG_LRU_GEN_WALKS_MMU
+
static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg)
{
static struct lru_gen_mm_list mm_list = {
@@ -2740,6 +2744,29 @@ static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg)
return &mm_list;
}

+static struct lru_gen_mm_state *get_mm_state(struct lruvec *lruvec)
+{
+ return &lruvec->mm_state;
+}
+
+static struct mm_struct *get_next_mm(struct lru_gen_mm_walk *walk)
+{
+ int key;
+ struct mm_struct *mm;
+ struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
+ struct lru_gen_mm_state *mm_state = get_mm_state(walk->lruvec);
+
+ mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list);
+ key = pgdat->node_id % BITS_PER_TYPE(mm->lru_gen.bitmap);
+
+ if (!walk->force_scan && !test_bit(key, &mm->lru_gen.bitmap))
+ return NULL;
+
+ clear_bit(key, &mm->lru_gen.bitmap);
+
+ return mmget_not_zero(mm) ? mm : NULL;
+}
+
void lru_gen_add_mm(struct mm_struct *mm)
{
int nid;
@@ -2755,10 +2782,11 @@ void lru_gen_add_mm(struct mm_struct *mm)

for_each_node_state(nid, N_MEMORY) {
struct lruvec *lruvec = get_lruvec(memcg, nid);
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);

/* the first addition since the last iteration */
- if (lruvec->mm_state.tail == &mm_list->fifo)
- lruvec->mm_state.tail = &mm->lru_gen.list;
+ if (mm_state->tail == &mm_list->fifo)
+ mm_state->tail = &mm->lru_gen.list;
}

list_add_tail(&mm->lru_gen.list, &mm_list->fifo);
@@ -2784,14 +2812,15 @@ void lru_gen_del_mm(struct mm_struct *mm)

for_each_node(nid) {
struct lruvec *lruvec = get_lruvec(memcg, nid);
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);

/* where the current iteration continues after */
- if (lruvec->mm_state.head == &mm->lru_gen.list)
- lruvec->mm_state.head = lruvec->mm_state.head->prev;
+ if (mm_state->head == &mm->lru_gen.list)
+ mm_state->head = mm_state->head->prev;

/* where the last iteration ended before */
- if (lruvec->mm_state.tail == &mm->lru_gen.list)
- lruvec->mm_state.tail = lruvec->mm_state.tail->next;
+ if (mm_state->tail == &mm->lru_gen.list)
+ mm_state->tail = mm_state->tail->next;
}

list_del_init(&mm->lru_gen.list);
@@ -2834,10 +2863,30 @@ void lru_gen_migrate_mm(struct mm_struct *mm)
}
#endif

+#else /* !CONFIG_LRU_GEN_WALKS_MMU */
+
+static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg)
+{
+ return NULL;
+}
+
+static struct lru_gen_mm_state *get_mm_state(struct lruvec *lruvec)
+{
+ return NULL;
+}
+
+static struct mm_struct *get_next_mm(struct lru_gen_mm_walk *walk)
+{
+ return NULL;
+}
+
+#endif
+
static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last)
{
int i;
int hist;
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);

lockdep_assert_held(&get_mm_list(lruvec_memcg(lruvec))->lock);

@@ -2845,44 +2894,20 @@ static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
hist = lru_hist_from_seq(walk->max_seq);

for (i = 0; i < NR_MM_STATS; i++) {
- WRITE_ONCE(lruvec->mm_state.stats[hist][i],
- lruvec->mm_state.stats[hist][i] + walk->mm_stats[i]);
+ WRITE_ONCE(mm_state->stats[hist][i],
+ mm_state->stats[hist][i] + walk->mm_stats[i]);
walk->mm_stats[i] = 0;
}
}

if (NR_HIST_GENS > 1 && last) {
- hist = lru_hist_from_seq(lruvec->mm_state.seq + 1);
+ hist = lru_hist_from_seq(mm_state->seq + 1);

for (i = 0; i < NR_MM_STATS; i++)
- WRITE_ONCE(lruvec->mm_state.stats[hist][i], 0);
+ WRITE_ONCE(mm_state->stats[hist][i], 0);
}
}

-static bool should_skip_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk)
-{
- int type;
- unsigned long size = 0;
- struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
- int key = pgdat->node_id % BITS_PER_TYPE(mm->lru_gen.bitmap);
-
- if (!walk->force_scan && !test_bit(key, &mm->lru_gen.bitmap))
- return true;
-
- clear_bit(key, &mm->lru_gen.bitmap);
-
- for (type = !walk->can_swap; type < ANON_AND_FILE; type++) {
- size += type ? get_mm_counter(mm, MM_FILEPAGES) :
- get_mm_counter(mm, MM_ANONPAGES) +
- get_mm_counter(mm, MM_SHMEMPAGES);
- }
-
- if (size < MIN_LRU_BATCH)
- return true;
-
- return !mmget_not_zero(mm);
-}
-
static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
struct mm_struct **iter)
{
@@ -2891,7 +2916,7 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
struct mm_struct *mm = NULL;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
- struct lru_gen_mm_state *mm_state = &lruvec->mm_state;
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);

/*
* mm_state->seq is incremented after each iteration of mm_list. There
@@ -2929,11 +2954,7 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
mm_state->tail = mm_state->head->next;
walk->force_scan = true;
}
-
- mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list);
- if (should_skip_mm(mm, walk))
- mm = NULL;
- } while (!mm);
+ } while (!(mm = get_next_mm(walk)));
done:
if (*iter || last)
reset_mm_stats(lruvec, walk, last);
@@ -2941,7 +2962,7 @@ static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
spin_unlock(&mm_list->lock);

if (mm && first)
- reset_bloom_filter(lruvec, walk->max_seq + 1);
+ reset_bloom_filter(mm_state, walk->max_seq + 1);

if (*iter)
mmput_async(*iter);
@@ -2956,7 +2977,7 @@ static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq)
bool success = false;
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
- struct lru_gen_mm_state *mm_state = &lruvec->mm_state;
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);

spin_lock(&mm_list->lock);

@@ -3469,6 +3490,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
DECLARE_BITMAP(bitmap, MIN_LRU_BATCH);
unsigned long first = -1;
struct lru_gen_mm_walk *walk = args->private;
+ struct lru_gen_mm_state *mm_state = get_mm_state(walk->lruvec);

VM_WARN_ON_ONCE(pud_leaf(*pud));

@@ -3520,7 +3542,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
}

- if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
+ if (!walk->force_scan && !test_bloom_filter(mm_state, walk->max_seq, pmd + i))
continue;

walk->mm_stats[MM_NONLEAF_FOUND]++;
@@ -3531,7 +3553,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
walk->mm_stats[MM_NONLEAF_ADDED]++;

/* carry over to the next generation */
- update_bloom_filter(walk->lruvec, walk->max_seq + 1, pmd + i);
+ update_bloom_filter(mm_state, walk->max_seq + 1, pmd + i);
}

walk_pmd_range_locked(pud, -1, vma, args, bitmap, &first);
@@ -3738,16 +3760,25 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
return success;
}

-static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan)
+static bool inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
+ bool can_swap, bool force_scan)
{
+ bool success;
int prev, next;
int type, zone;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
restart:
+ if (max_seq < READ_ONCE(lrugen->max_seq))
+ return false;
+
spin_lock_irq(&lruvec->lru_lock);

VM_WARN_ON_ONCE(!seq_is_valid(lruvec));

+ success = max_seq == lrugen->max_seq;
+ if (!success)
+ goto unlock;
+
for (type = ANON_AND_FILE - 1; type >= 0; type--) {
if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
continue;
@@ -3791,8 +3822,10 @@ static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan)
WRITE_ONCE(lrugen->timestamps[next], jiffies);
/* make sure preceding modifications appear */
smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
-
+unlock:
spin_unlock_irq(&lruvec->lru_lock);
+
+ return success;
}

static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
@@ -3802,14 +3835,16 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
struct lru_gen_mm_walk *walk;
struct mm_struct *mm = NULL;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);

VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq));

+ if (!mm_state)
+ return inc_max_seq(lruvec, max_seq, can_swap, force_scan);
+
/* see the comment in iterate_mm_list() */
- if (max_seq <= READ_ONCE(lruvec->mm_state.seq)) {
- success = false;
- goto done;
- }
+ if (max_seq <= READ_ONCE(mm_state->seq))
+ return false;

/*
* If the hardware doesn't automatically set the accessed bit, fallback
@@ -3839,8 +3874,10 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
walk_mm(lruvec, mm, walk);
} while (mm);
done:
- if (success)
- inc_max_seq(lruvec, can_swap, force_scan);
+ if (success) {
+ success = inc_max_seq(lruvec, max_seq, can_swap, force_scan);
+ WARN_ON_ONCE(!success);
+ }

return success;
}
@@ -3964,6 +4001,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
struct mem_cgroup *memcg = folio_memcg(folio);
struct pglist_data *pgdat = folio_pgdat(folio);
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);
DEFINE_MAX_SEQ(lruvec);
int old_gen, new_gen = lru_gen_from_seq(max_seq);

@@ -4042,8 +4080,8 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
mem_cgroup_unlock_pages();

/* feedback from rmap walkers to page table walkers */
- if (suitable_to_scan(i, young))
- update_bloom_filter(lruvec, max_seq, pvmw->pmd);
+ if (mm_state && suitable_to_scan(i, young))
+ update_bloom_filter(mm_state, max_seq, pvmw->pmd);
}

/******************************************************************************
@@ -5219,6 +5257,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
int type, tier;
int hist = lru_hist_from_seq(seq);
struct lru_gen_folio *lrugen = &lruvec->lrugen;
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);

for (tier = 0; tier < MAX_NR_TIERS; tier++) {
seq_printf(m, " %10d", tier);
@@ -5244,6 +5283,9 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
seq_putc(m, '\n');
}

+ if (!mm_state)
+ return;
+
seq_puts(m, " ");
for (i = 0; i < NR_MM_STATS; i++) {
const char *s = " ";
@@ -5251,10 +5293,10 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,

if (seq == max_seq && NR_HIST_GENS == 1) {
s = "LOYNFA";
- n = READ_ONCE(lruvec->mm_state.stats[hist][i]);
+ n = READ_ONCE(mm_state->stats[hist][i]);
} else if (seq != max_seq && NR_HIST_GENS > 1) {
s = "loynfa";
- n = READ_ONCE(lruvec->mm_state.stats[hist][i]);
+ n = READ_ONCE(mm_state->stats[hist][i]);
}

seq_printf(m, " %10lu%c", n, s[i]);
@@ -5523,6 +5565,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
int i;
int gen, type, zone;
struct lru_gen_folio *lrugen = &lruvec->lrugen;
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);

lrugen->max_seq = MIN_NR_GENS + 1;
lrugen->enabled = lru_gen_enabled();
@@ -5533,7 +5576,8 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
for_each_gen_type_zone(gen, type, zone)
INIT_LIST_HEAD(&lrugen->folios[gen][type][zone]);

- lruvec->mm_state.seq = MIN_NR_GENS;
+ if (mm_state)
+ mm_state->seq = MIN_NR_GENS;
}

#ifdef CONFIG_MEMCG
@@ -5552,28 +5596,38 @@ void lru_gen_init_pgdat(struct pglist_data *pgdat)

void lru_gen_init_memcg(struct mem_cgroup *memcg)
{
- INIT_LIST_HEAD(&memcg->mm_list.fifo);
- spin_lock_init(&memcg->mm_list.lock);
+ struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
+
+ if (!mm_list)
+ return;
+
+ INIT_LIST_HEAD(&mm_list->fifo);
+ spin_lock_init(&mm_list->lock);
}

void lru_gen_exit_memcg(struct mem_cgroup *memcg)
{
int i;
int nid;
+ struct lru_gen_mm_list *mm_list = get_mm_list(memcg);

- VM_WARN_ON_ONCE(!list_empty(&memcg->mm_list.fifo));
+ VM_WARN_ON_ONCE(mm_list && !list_empty(&mm_list->fifo));

for_each_node(nid) {
struct lruvec *lruvec = get_lruvec(memcg, nid);
+ struct lru_gen_mm_state *mm_state = get_mm_state(lruvec);

VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0,
sizeof(lruvec->lrugen.nr_pages)));

lruvec->lrugen.list.next = LIST_POISON1;

+ if (!mm_state)
+ continue;
+
for (i = 0; i < NR_BLOOM_FILTERS; i++) {
- bitmap_free(lruvec->mm_state.filters[i]);
- lruvec->mm_state.filters[i] = NULL;
+ bitmap_free(mm_state->filters[i]);
+ mm_state->filters[i] = NULL;
}
}
}
--
2.43.0.472.g3155946c3a-goog


2023-12-20 04:16:57

by Yu Zhao

[permalink] [raw]
Subject: Re: [PATCH mm-unstable v3 0/4] mm/mglru: Kconfig cleanup

On Tue, Dec 19, 2023 at 9:01 PM Kinsey Ho <[email protected]> wrote:
>
> This series is the result of the following discussion:
> https://lore.kernel.org/[email protected]/
>
> It mainly avoids building the code that walks page tables on CPUs that
> use it, i.e., those don't support hardware accessed bit. Specifically,
> it introduces a new Kconfig to guard some of functions added by
> commit bd74fdaea146 ("mm: multi-gen LRU: support page table walks")
> on CPUs like POWER9, on which the series was tested.
>
>
> Kinsey Ho (4):
> mm/mglru: add CONFIG_ARCH_HAS_HW_PTE_YOUNG
> mm/mglru: add CONFIG_LRU_GEN_WALKS_MMU
> mm/mglru: remove CONFIG_MEMCG
> mm/mglru: remove CONFIG_TRANSPARENT_HUGEPAGE
>
> arch/Kconfig | 8 +
> arch/arm64/Kconfig | 1 +
> arch/x86/Kconfig | 1 +
> arch/x86/include/asm/pgtable.h | 6 -
> include/linux/memcontrol.h | 2 +-
> include/linux/mm_types.h | 16 +-
> include/linux/mmzone.h | 28 +---
> include/linux/pgtable.h | 2 +-
> kernel/fork.c | 2 +-
> mm/Kconfig | 4 +
> mm/vmscan.c | 271 ++++++++++++++++++---------------
> 11 files changed, 174 insertions(+), 167 deletions(-)

+Donet Tom <[email protected]>
who is also working on this.

Donet, could try this latest version instead? If it works well as the
old one you've been using, can you please provide your Tested-by tag?
Thanks.

2023-12-20 13:46:03

by Donet Tom

[permalink] [raw]
Subject: Re: [PATCH mm-unstable v3 0/4] mm/mglru: Kconfig cleanup


On 12/20/23 09:46, Yu Zhao wrote:
> On Tue, Dec 19, 2023 at 9:01 PM Kinsey Ho <[email protected]> wrote:
>> This series is the result of the following discussion:
>> https://lore.kernel.org/[email protected]/
>>
>> It mainly avoids building the code that walks page tables on CPUs that
>> use it, i.e., those don't support hardware accessed bit. Specifically,
>> it introduces a new Kconfig to guard some of functions added by
>> commit bd74fdaea146 ("mm: multi-gen LRU: support page table walks")
>> on CPUs like POWER9, on which the series was tested.
>>
>>
>> Kinsey Ho (4):
>> mm/mglru: add CONFIG_ARCH_HAS_HW_PTE_YOUNG
>> mm/mglru: add CONFIG_LRU_GEN_WALKS_MMU
>> mm/mglru: remove CONFIG_MEMCG
>> mm/mglru: remove CONFIG_TRANSPARENT_HUGEPAGE
>>
>> arch/Kconfig | 8 +
>> arch/arm64/Kconfig | 1 +
>> arch/x86/Kconfig | 1 +
>> arch/x86/include/asm/pgtable.h | 6 -
>> include/linux/memcontrol.h | 2 +-
>> include/linux/mm_types.h | 16 +-
>> include/linux/mmzone.h | 28 +---
>> include/linux/pgtable.h | 2 +-
>> kernel/fork.c | 2 +-
>> mm/Kconfig | 4 +
>> mm/vmscan.c | 271 ++++++++++++++++++---------------
>> 11 files changed, 174 insertions(+), 167 deletions(-)
> +Donet Tom <[email protected]>
> who is also working on this.
>
> Donet, could try this latest version instead? If it works well as the
> old one you've been using, can you please provide your Tested-by tag?
> Thanks.

Hi Yu Zhao,

This patch set looks promising.

I have conducted tests on PowerPC and x86.

In old patch set there is a cleanup patch which removes
struct scan_control *sc argument from try_to_inc_max_seq() and
run_aging(), Do we need to include that patch?

=>Here are some test results from PowerPC.

# ls -l vmscan.o
-rw-r--r--. 1 root root 3600080 Dec 19 22:35 vmscan.o

# size vmscan.o
  text       data           bss      dec         hex filename
  95086      27412          0        122498      1de82 vmscan.o

# ./scripts/bloat-o-meter vmscan.o.old vmscan.o
add/remove: 4/8 grow/shrink: 7/9 up/down: 860/-2524 (-1664)
Function                              old       new     delta
should_abort_scan                      -        472     +472
inc_max_seq.isra                      1472      1612    +140
shrink_one                            680       760     +80
lru_gen_release_memcg                 508       556     +48
lru_gen_init_pgdat                    92        132     +40
shrink_node                           4040      4064    +24
lru_gen_online_memcg                  680       696     +16
lru_gen_change_state                  3968      3984    +16
------
shrink_lruvec                         2168      2152    -16
lru_gen_seq_write                     1980      1964    -16
isolate_folios                        6904      6888    -16
lru_gen_init_memcg                    32        12      -20
mm_list                               24        -       -24
lru_gen_exit_memcg                    388       344     -44
try_to_shrink_lruvec                  904       816     -88
lru_gen_rotate_memcg                  832       700     -132
lru_gen_migrate_mm                    132       -       -132
lru_gen_seq_show                      1484      1308    -176
iterate_mm_list_nowalk                288       -       -288
lru_gen_look_around                   2284      1984    -300
lru_gen_add_mm                        528       -       -528
lru_gen_del_mm                        720       -       -720
Total: Before=116213, After=114549, chg -1.43%

=>Here are some test results from x86.

$ ls -l vmscan.o
-rw-r--r--. 1 donettom donettom 2545792 Dec 20 15:16 vmscan.o

$ size vmscan.o
  text          data          bss    dec        hex filename
  109751        32189         0      141940     22a74 vmscan.o
$

$ ./scripts/bloat-o-meter vmscan.o.old vmscan.o
add/remove: 7/3 grow/shrink: 14/4 up/down: 2307/-1534 (773)
Function                                old       new      delta
inc_max_seq                             -         1470     +1470
should_abort_scan                       -         229      +229
isolate_folios                          4469      4562     +93
lru_gen_rotate_memcg                    641       731      +90
lru_gen_init_memcg                      41        99       +58
lru_gen_release_memcg                   282       336      +54
lru_gen_exit_memcg                      306       350      +44
walk_pud_range                          2502      2543     +41
shrink_node                             2912      2951     +39
lru_gen_online_memcg                    402       434      +32
lru_gen_seq_show                        1112      1140     +28
lru_gen_add_folio                       740       757      +17
lru_gen_look_around                     1217      1233     +16
__pfx_should_abort_scan                 -         16       +16
__pfx_inc_max_seq                       -         16       +16
iterate_mm_list_nowalk                  277       292      +15
shrink_one                              413       426      +13
lru_gen_init_lruvec                     190       202      +12
-----
try_to_shrink_lruvec                    717       643      -74
lru_gen_init_pgdat                      196       82       -114
try_to_inc_max_seq.isra                 2897      1578     -1319
Total: Before=101095, After=101868, chg +0.76%
$


Tested-by: Donet Tom <[email protected]>

Thanks
Donet Tom


2023-12-20 15:23:03

by Yu Zhao

[permalink] [raw]
Subject: Re: [PATCH mm-unstable v3 0/4] mm/mglru: Kconfig cleanup

On Wed, Dec 20, 2023 at 6:45 AM Donet Tom <[email protected]> wrote:
>
>
> On 12/20/23 09:46, Yu Zhao wrote:
> > On Tue, Dec 19, 2023 at 9:01 PM Kinsey Ho <[email protected]> wrote:
> >> This series is the result of the following discussion:
> >> https://lore.kernel.org/[email protected]/
> >>
> >> It mainly avoids building the code that walks page tables on CPUs that
> >> use it, i.e., those don't support hardware accessed bit. Specifically,
> >> it introduces a new Kconfig to guard some of functions added by
> >> commit bd74fdaea146 ("mm: multi-gen LRU: support page table walks")
> >> on CPUs like POWER9, on which the series was tested.
> >>
> >>
> >> Kinsey Ho (4):
> >> mm/mglru: add CONFIG_ARCH_HAS_HW_PTE_YOUNG
> >> mm/mglru: add CONFIG_LRU_GEN_WALKS_MMU
> >> mm/mglru: remove CONFIG_MEMCG
> >> mm/mglru: remove CONFIG_TRANSPARENT_HUGEPAGE
> >>
> >> arch/Kconfig | 8 +
> >> arch/arm64/Kconfig | 1 +
> >> arch/x86/Kconfig | 1 +
> >> arch/x86/include/asm/pgtable.h | 6 -
> >> include/linux/memcontrol.h | 2 +-
> >> include/linux/mm_types.h | 16 +-
> >> include/linux/mmzone.h | 28 +---
> >> include/linux/pgtable.h | 2 +-
> >> kernel/fork.c | 2 +-
> >> mm/Kconfig | 4 +
> >> mm/vmscan.c | 271 ++++++++++++++++++---------------
> >> 11 files changed, 174 insertions(+), 167 deletions(-)
> > +Donet Tom <[email protected]>
> > who is also working on this.
> >
> > Donet, could try this latest version instead? If it works well as the
> > old one you've been using, can you please provide your Tested-by tag?
> > Thanks.
>
> Hi Yu Zhao,
>
> This patch set looks promising.
>
> I have conducted tests on PowerPC and x86.
>
> In old patch set there is a cleanup patch which removes
> struct scan_control *sc argument from try_to_inc_max_seq() and
> run_aging(), Do we need to include that patch?

Sorry not for including that patch in this series.

It's the first patch in the next cleanup series, which we haven't
fully tested yet. It'll be the first order of business after the
holiday season (mid Jan), does the schedule work for you?

> =>Here are some test results from PowerPC.
>
> # ls -l vmscan.o
> -rw-r--r--. 1 root root 3600080 Dec 19 22:35 vmscan.o
>
> # size vmscan.o
> text data bss dec hex filename
> 95086 27412 0 122498 1de82 vmscan.o
>
> # ./scripts/bloat-o-meter vmscan.o.old vmscan.o
> add/remove: 4/8 grow/shrink: 7/9 up/down: 860/-2524 (-1664)
> Function old new delta
> should_abort_scan - 472 +472
> inc_max_seq.isra 1472 1612 +140
> shrink_one 680 760 +80
> lru_gen_release_memcg 508 556 +48
> lru_gen_init_pgdat 92 132 +40
> shrink_node 4040 4064 +24
> lru_gen_online_memcg 680 696 +16
> lru_gen_change_state 3968 3984 +16
> ------
> shrink_lruvec 2168 2152 -16
> lru_gen_seq_write 1980 1964 -16
> isolate_folios 6904 6888 -16
> lru_gen_init_memcg 32 12 -20
> mm_list 24 - -24
> lru_gen_exit_memcg 388 344 -44
> try_to_shrink_lruvec 904 816 -88
> lru_gen_rotate_memcg 832 700 -132
> lru_gen_migrate_mm 132 - -132
> lru_gen_seq_show 1484 1308 -176
> iterate_mm_list_nowalk 288 - -288
> lru_gen_look_around 2284 1984 -300
> lru_gen_add_mm 528 - -528
> lru_gen_del_mm 720 - -720
> Total: Before=116213, After=114549, chg -1.43%
>
> =>Here are some test results from x86.
>
> $ ls -l vmscan.o
> -rw-r--r--. 1 donettom donettom 2545792 Dec 20 15:16 vmscan.o
>
> $ size vmscan.o
> text data bss dec hex filename
> 109751 32189 0 141940 22a74 vmscan.o
> $
>
> $ ./scripts/bloat-o-meter vmscan.o.old vmscan.o
> add/remove: 7/3 grow/shrink: 14/4 up/down: 2307/-1534 (773)
> Function old new delta
> inc_max_seq - 1470 +1470
> should_abort_scan - 229 +229
> isolate_folios 4469 4562 +93
> lru_gen_rotate_memcg 641 731 +90
> lru_gen_init_memcg 41 99 +58
> lru_gen_release_memcg 282 336 +54
> lru_gen_exit_memcg 306 350 +44
> walk_pud_range 2502 2543 +41
> shrink_node 2912 2951 +39
> lru_gen_online_memcg 402 434 +32
> lru_gen_seq_show 1112 1140 +28
> lru_gen_add_folio 740 757 +17
> lru_gen_look_around 1217 1233 +16
> __pfx_should_abort_scan - 16 +16
> __pfx_inc_max_seq - 16 +16
> iterate_mm_list_nowalk 277 292 +15
> shrink_one 413 426 +13
> lru_gen_init_lruvec 190 202 +12
> -----
> try_to_shrink_lruvec 717 643 -74
> lru_gen_init_pgdat 196 82 -114
> try_to_inc_max_seq.isra 2897 1578 -1319
> Total: Before=101095, After=101868, chg +0.76%
> $
>
>
> Tested-by: Donet Tom <[email protected]>

Thanks!

Acked-by: Yu Zhao <[email protected]>

2023-12-20 16:48:33

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH mm-unstable v1 4/4] mm/mglru: remove CONFIG_TRANSPARENT_HUGEPAGE

Hi Kinsey,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]

url: https://github.com/intel-lab-lkp/linux/commits/Kinsey-Ho/mm-mglru-add-CONFIG_ARCH_HAS_HW_PTE_YOUNG/20231220-120318
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link: https://lore.kernel.org/r/20231220040037.883811-5-kinseyho%40google.com
patch subject: [PATCH mm-unstable v1 4/4] mm/mglru: remove CONFIG_TRANSPARENT_HUGEPAGE
config: arm-randconfig-002-20231220 (https://download.01.org/0day-ci/archive/20231221/[email protected]/config)
compiler: arm-linux-gnueabi-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231221/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

mm/vmscan.c: In function 'walk_pmd_range_locked':
>> mm/vmscan.c:3455:21: error: implicit declaration of function 'pmd_dirty'; did you mean 'pte_dirty'? [-Werror=implicit-function-declaration]
3455 | if (pmd_dirty(pmd[i]) && !folio_test_dirty(folio) &&
| ^~~~~~~~~
| pte_dirty
cc1: some warnings being treated as errors


vim +3455 mm/vmscan.c

bd74fdaea146029 Yu Zhao 2022-09-18 3394
b5ff4133617d0ec T.J. Alumbaugh 2023-01-18 3395 static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area_struct *vma,
b5ff4133617d0ec T.J. Alumbaugh 2023-01-18 3396 struct mm_walk *args, unsigned long *bitmap, unsigned long *first)
bd74fdaea146029 Yu Zhao 2022-09-18 3397 {
bd74fdaea146029 Yu Zhao 2022-09-18 3398 int i;
bd74fdaea146029 Yu Zhao 2022-09-18 3399 pmd_t *pmd;
bd74fdaea146029 Yu Zhao 2022-09-18 3400 spinlock_t *ptl;
bd74fdaea146029 Yu Zhao 2022-09-18 3401 struct lru_gen_mm_walk *walk = args->private;
bd74fdaea146029 Yu Zhao 2022-09-18 3402 struct mem_cgroup *memcg = lruvec_memcg(walk->lruvec);
bd74fdaea146029 Yu Zhao 2022-09-18 3403 struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
bd74fdaea146029 Yu Zhao 2022-09-18 3404 int old_gen, new_gen = lru_gen_from_seq(walk->max_seq);
bd74fdaea146029 Yu Zhao 2022-09-18 3405
bd74fdaea146029 Yu Zhao 2022-09-18 3406 VM_WARN_ON_ONCE(pud_leaf(*pud));
bd74fdaea146029 Yu Zhao 2022-09-18 3407
bd74fdaea146029 Yu Zhao 2022-09-18 3408 /* try to batch at most 1+MIN_LRU_BATCH+1 entries */
b5ff4133617d0ec T.J. Alumbaugh 2023-01-18 3409 if (*first == -1) {
b5ff4133617d0ec T.J. Alumbaugh 2023-01-18 3410 *first = addr;
b5ff4133617d0ec T.J. Alumbaugh 2023-01-18 3411 bitmap_zero(bitmap, MIN_LRU_BATCH);
bd74fdaea146029 Yu Zhao 2022-09-18 3412 return;
bd74fdaea146029 Yu Zhao 2022-09-18 3413 }
bd74fdaea146029 Yu Zhao 2022-09-18 3414
b5ff4133617d0ec T.J. Alumbaugh 2023-01-18 3415 i = addr == -1 ? 0 : pmd_index(addr) - pmd_index(*first);
bd74fdaea146029 Yu Zhao 2022-09-18 3416 if (i && i <= MIN_LRU_BATCH) {
bd74fdaea146029 Yu Zhao 2022-09-18 3417 __set_bit(i - 1, bitmap);
bd74fdaea146029 Yu Zhao 2022-09-18 3418 return;
bd74fdaea146029 Yu Zhao 2022-09-18 3419 }
bd74fdaea146029 Yu Zhao 2022-09-18 3420
b5ff4133617d0ec T.J. Alumbaugh 2023-01-18 3421 pmd = pmd_offset(pud, *first);
bd74fdaea146029 Yu Zhao 2022-09-18 3422
bd74fdaea146029 Yu Zhao 2022-09-18 3423 ptl = pmd_lockptr(args->mm, pmd);
bd74fdaea146029 Yu Zhao 2022-09-18 3424 if (!spin_trylock(ptl))
bd74fdaea146029 Yu Zhao 2022-09-18 3425 goto done;
bd74fdaea146029 Yu Zhao 2022-09-18 3426
bd74fdaea146029 Yu Zhao 2022-09-18 3427 arch_enter_lazy_mmu_mode();
bd74fdaea146029 Yu Zhao 2022-09-18 3428
bd74fdaea146029 Yu Zhao 2022-09-18 3429 do {
bd74fdaea146029 Yu Zhao 2022-09-18 3430 unsigned long pfn;
bd74fdaea146029 Yu Zhao 2022-09-18 3431 struct folio *folio;
b5ff4133617d0ec T.J. Alumbaugh 2023-01-18 3432
b5ff4133617d0ec T.J. Alumbaugh 2023-01-18 3433 /* don't round down the first address */
b5ff4133617d0ec T.J. Alumbaugh 2023-01-18 3434 addr = i ? (*first & PMD_MASK) + i * PMD_SIZE : *first;
bd74fdaea146029 Yu Zhao 2022-09-18 3435
bd74fdaea146029 Yu Zhao 2022-09-18 3436 pfn = get_pmd_pfn(pmd[i], vma, addr);
bd74fdaea146029 Yu Zhao 2022-09-18 3437 if (pfn == -1)
bd74fdaea146029 Yu Zhao 2022-09-18 3438 goto next;
bd74fdaea146029 Yu Zhao 2022-09-18 3439
bd74fdaea146029 Yu Zhao 2022-09-18 3440 if (!pmd_trans_huge(pmd[i])) {
bd02df412cbb9a6 T.J. Alumbaugh 2023-05-22 3441 if (should_clear_pmd_young())
bd74fdaea146029 Yu Zhao 2022-09-18 3442 pmdp_test_and_clear_young(vma, addr, pmd + i);
bd74fdaea146029 Yu Zhao 2022-09-18 3443 goto next;
bd74fdaea146029 Yu Zhao 2022-09-18 3444 }
bd74fdaea146029 Yu Zhao 2022-09-18 3445
bd74fdaea146029 Yu Zhao 2022-09-18 3446 folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
bd74fdaea146029 Yu Zhao 2022-09-18 3447 if (!folio)
bd74fdaea146029 Yu Zhao 2022-09-18 3448 goto next;
bd74fdaea146029 Yu Zhao 2022-09-18 3449
bd74fdaea146029 Yu Zhao 2022-09-18 3450 if (!pmdp_test_and_clear_young(vma, addr, pmd + i))
bd74fdaea146029 Yu Zhao 2022-09-18 3451 goto next;
bd74fdaea146029 Yu Zhao 2022-09-18 3452
bd74fdaea146029 Yu Zhao 2022-09-18 3453 walk->mm_stats[MM_LEAF_YOUNG]++;
bd74fdaea146029 Yu Zhao 2022-09-18 3454
bd74fdaea146029 Yu Zhao 2022-09-18 @3455 if (pmd_dirty(pmd[i]) && !folio_test_dirty(folio) &&
bd74fdaea146029 Yu Zhao 2022-09-18 3456 !(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
bd74fdaea146029 Yu Zhao 2022-09-18 3457 !folio_test_swapcache(folio)))
bd74fdaea146029 Yu Zhao 2022-09-18 3458 folio_mark_dirty(folio);
bd74fdaea146029 Yu Zhao 2022-09-18 3459
bd74fdaea146029 Yu Zhao 2022-09-18 3460 old_gen = folio_update_gen(folio, new_gen);
bd74fdaea146029 Yu Zhao 2022-09-18 3461 if (old_gen >= 0 && old_gen != new_gen)
bd74fdaea146029 Yu Zhao 2022-09-18 3462 update_batch_size(walk, folio, old_gen, new_gen);
bd74fdaea146029 Yu Zhao 2022-09-18 3463 next:
bd74fdaea146029 Yu Zhao 2022-09-18 3464 i = i > MIN_LRU_BATCH ? 0 : find_next_bit(bitmap, MIN_LRU_BATCH, i) + 1;
bd74fdaea146029 Yu Zhao 2022-09-18 3465 } while (i <= MIN_LRU_BATCH);
bd74fdaea146029 Yu Zhao 2022-09-18 3466
bd74fdaea146029 Yu Zhao 2022-09-18 3467 arch_leave_lazy_mmu_mode();
bd74fdaea146029 Yu Zhao 2022-09-18 3468 spin_unlock(ptl);
bd74fdaea146029 Yu Zhao 2022-09-18 3469 done:
b5ff4133617d0ec T.J. Alumbaugh 2023-01-18 3470 *first = -1;
bd74fdaea146029 Yu Zhao 2022-09-18 3471 }
bd74fdaea146029 Yu Zhao 2022-09-18 3472

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2023-12-20 22:39:35

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH mm-unstable v1 4/4] mm/mglru: remove CONFIG_TRANSPARENT_HUGEPAGE

Hi Kinsey,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]

url: https://github.com/intel-lab-lkp/linux/commits/Kinsey-Ho/mm-mglru-add-CONFIG_ARCH_HAS_HW_PTE_YOUNG/20231220-120318
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link: https://lore.kernel.org/r/20231220040037.883811-5-kinseyho%40google.com
patch subject: [PATCH mm-unstable v1 4/4] mm/mglru: remove CONFIG_TRANSPARENT_HUGEPAGE
config: hexagon-allmodconfig (https://download.01.org/0day-ci/archive/20231221/[email protected]/config)
compiler: clang version 18.0.0git (https://github.com/llvm/llvm-project 7022a24771c8404f847abb226735a3ae21794426)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231221/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

In file included from mm/vmscan.c:19:
In file included from include/linux/kernel_stat.h:9:
In file included from include/linux/interrupt.h:11:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/hexagon/include/asm/io.h:337:
include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
547 | val = __raw_readb(PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
560 | val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
37 | #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
| ^
In file included from mm/vmscan.c:19:
In file included from include/linux/kernel_stat.h:9:
In file included from include/linux/interrupt.h:11:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/hexagon/include/asm/io.h:337:
include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
573 | val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
35 | #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
| ^
In file included from mm/vmscan.c:19:
In file included from include/linux/kernel_stat.h:9:
In file included from include/linux/interrupt.h:11:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/hexagon/include/asm/io.h:337:
include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
584 | __raw_writeb(value, PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
594 | __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
604 | __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
>> mm/vmscan.c:3455:7: error: call to undeclared function 'pmd_dirty'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
3455 | if (pmd_dirty(pmd[i]) && !folio_test_dirty(folio) &&
| ^
mm/vmscan.c:3455:7: note: did you mean 'pte_dirty'?
arch/hexagon/include/asm/pgtable.h:282:19: note: 'pte_dirty' declared here
282 | static inline int pte_dirty(pte_t pte)
| ^
6 warnings and 1 error generated.


vim +/pmd_dirty +3455 mm/vmscan.c

bd74fdaea14602 Yu Zhao 2022-09-18 3394
b5ff4133617d0e T.J. Alumbaugh 2023-01-18 3395 static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area_struct *vma,
b5ff4133617d0e T.J. Alumbaugh 2023-01-18 3396 struct mm_walk *args, unsigned long *bitmap, unsigned long *first)
bd74fdaea14602 Yu Zhao 2022-09-18 3397 {
bd74fdaea14602 Yu Zhao 2022-09-18 3398 int i;
bd74fdaea14602 Yu Zhao 2022-09-18 3399 pmd_t *pmd;
bd74fdaea14602 Yu Zhao 2022-09-18 3400 spinlock_t *ptl;
bd74fdaea14602 Yu Zhao 2022-09-18 3401 struct lru_gen_mm_walk *walk = args->private;
bd74fdaea14602 Yu Zhao 2022-09-18 3402 struct mem_cgroup *memcg = lruvec_memcg(walk->lruvec);
bd74fdaea14602 Yu Zhao 2022-09-18 3403 struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
bd74fdaea14602 Yu Zhao 2022-09-18 3404 int old_gen, new_gen = lru_gen_from_seq(walk->max_seq);
bd74fdaea14602 Yu Zhao 2022-09-18 3405
bd74fdaea14602 Yu Zhao 2022-09-18 3406 VM_WARN_ON_ONCE(pud_leaf(*pud));
bd74fdaea14602 Yu Zhao 2022-09-18 3407
bd74fdaea14602 Yu Zhao 2022-09-18 3408 /* try to batch at most 1+MIN_LRU_BATCH+1 entries */
b5ff4133617d0e T.J. Alumbaugh 2023-01-18 3409 if (*first == -1) {
b5ff4133617d0e T.J. Alumbaugh 2023-01-18 3410 *first = addr;
b5ff4133617d0e T.J. Alumbaugh 2023-01-18 3411 bitmap_zero(bitmap, MIN_LRU_BATCH);
bd74fdaea14602 Yu Zhao 2022-09-18 3412 return;
bd74fdaea14602 Yu Zhao 2022-09-18 3413 }
bd74fdaea14602 Yu Zhao 2022-09-18 3414
b5ff4133617d0e T.J. Alumbaugh 2023-01-18 3415 i = addr == -1 ? 0 : pmd_index(addr) - pmd_index(*first);
bd74fdaea14602 Yu Zhao 2022-09-18 3416 if (i && i <= MIN_LRU_BATCH) {
bd74fdaea14602 Yu Zhao 2022-09-18 3417 __set_bit(i - 1, bitmap);
bd74fdaea14602 Yu Zhao 2022-09-18 3418 return;
bd74fdaea14602 Yu Zhao 2022-09-18 3419 }
bd74fdaea14602 Yu Zhao 2022-09-18 3420
b5ff4133617d0e T.J. Alumbaugh 2023-01-18 3421 pmd = pmd_offset(pud, *first);
bd74fdaea14602 Yu Zhao 2022-09-18 3422
bd74fdaea14602 Yu Zhao 2022-09-18 3423 ptl = pmd_lockptr(args->mm, pmd);
bd74fdaea14602 Yu Zhao 2022-09-18 3424 if (!spin_trylock(ptl))
bd74fdaea14602 Yu Zhao 2022-09-18 3425 goto done;
bd74fdaea14602 Yu Zhao 2022-09-18 3426
bd74fdaea14602 Yu Zhao 2022-09-18 3427 arch_enter_lazy_mmu_mode();
bd74fdaea14602 Yu Zhao 2022-09-18 3428
bd74fdaea14602 Yu Zhao 2022-09-18 3429 do {
bd74fdaea14602 Yu Zhao 2022-09-18 3430 unsigned long pfn;
bd74fdaea14602 Yu Zhao 2022-09-18 3431 struct folio *folio;
b5ff4133617d0e T.J. Alumbaugh 2023-01-18 3432
b5ff4133617d0e T.J. Alumbaugh 2023-01-18 3433 /* don't round down the first address */
b5ff4133617d0e T.J. Alumbaugh 2023-01-18 3434 addr = i ? (*first & PMD_MASK) + i * PMD_SIZE : *first;
bd74fdaea14602 Yu Zhao 2022-09-18 3435
bd74fdaea14602 Yu Zhao 2022-09-18 3436 pfn = get_pmd_pfn(pmd[i], vma, addr);
bd74fdaea14602 Yu Zhao 2022-09-18 3437 if (pfn == -1)
bd74fdaea14602 Yu Zhao 2022-09-18 3438 goto next;
bd74fdaea14602 Yu Zhao 2022-09-18 3439
bd74fdaea14602 Yu Zhao 2022-09-18 3440 if (!pmd_trans_huge(pmd[i])) {
bd02df412cbb9a T.J. Alumbaugh 2023-05-22 3441 if (should_clear_pmd_young())
bd74fdaea14602 Yu Zhao 2022-09-18 3442 pmdp_test_and_clear_young(vma, addr, pmd + i);
bd74fdaea14602 Yu Zhao 2022-09-18 3443 goto next;
bd74fdaea14602 Yu Zhao 2022-09-18 3444 }
bd74fdaea14602 Yu Zhao 2022-09-18 3445
bd74fdaea14602 Yu Zhao 2022-09-18 3446 folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
bd74fdaea14602 Yu Zhao 2022-09-18 3447 if (!folio)
bd74fdaea14602 Yu Zhao 2022-09-18 3448 goto next;
bd74fdaea14602 Yu Zhao 2022-09-18 3449
bd74fdaea14602 Yu Zhao 2022-09-18 3450 if (!pmdp_test_and_clear_young(vma, addr, pmd + i))
bd74fdaea14602 Yu Zhao 2022-09-18 3451 goto next;
bd74fdaea14602 Yu Zhao 2022-09-18 3452
bd74fdaea14602 Yu Zhao 2022-09-18 3453 walk->mm_stats[MM_LEAF_YOUNG]++;
bd74fdaea14602 Yu Zhao 2022-09-18 3454
bd74fdaea14602 Yu Zhao 2022-09-18 @3455 if (pmd_dirty(pmd[i]) && !folio_test_dirty(folio) &&
bd74fdaea14602 Yu Zhao 2022-09-18 3456 !(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
bd74fdaea14602 Yu Zhao 2022-09-18 3457 !folio_test_swapcache(folio)))
bd74fdaea14602 Yu Zhao 2022-09-18 3458 folio_mark_dirty(folio);
bd74fdaea14602 Yu Zhao 2022-09-18 3459
bd74fdaea14602 Yu Zhao 2022-09-18 3460 old_gen = folio_update_gen(folio, new_gen);
bd74fdaea14602 Yu Zhao 2022-09-18 3461 if (old_gen >= 0 && old_gen != new_gen)
bd74fdaea14602 Yu Zhao 2022-09-18 3462 update_batch_size(walk, folio, old_gen, new_gen);
bd74fdaea14602 Yu Zhao 2022-09-18 3463 next:
bd74fdaea14602 Yu Zhao 2022-09-18 3464 i = i > MIN_LRU_BATCH ? 0 : find_next_bit(bitmap, MIN_LRU_BATCH, i) + 1;
bd74fdaea14602 Yu Zhao 2022-09-18 3465 } while (i <= MIN_LRU_BATCH);
bd74fdaea14602 Yu Zhao 2022-09-18 3466
bd74fdaea14602 Yu Zhao 2022-09-18 3467 arch_leave_lazy_mmu_mode();
bd74fdaea14602 Yu Zhao 2022-09-18 3468 spin_unlock(ptl);
bd74fdaea14602 Yu Zhao 2022-09-18 3469 done:
b5ff4133617d0e T.J. Alumbaugh 2023-01-18 3470 *first = -1;
bd74fdaea14602 Yu Zhao 2022-09-18 3471 }
bd74fdaea14602 Yu Zhao 2022-09-18 3472

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2023-12-21 05:08:32

by Donet Tom

[permalink] [raw]
Subject: Re: [PATCH mm-unstable v3 0/4] mm/mglru: Kconfig cleanup


On 12/20/23 20:46, Yu Zhao wrote:
> On Wed, Dec 20, 2023 at 6:45 AM Donet Tom <[email protected]> wrote:
>>
>> On 12/20/23 09:46, Yu Zhao wrote:
>>> On Tue, Dec 19, 2023 at 9:01 PM Kinsey Ho <[email protected]> wrote:
>>>> This series is the result of the following discussion:
>>>> https://lore.kernel.org/[email protected]/
>>>>
>>>> It mainly avoids building the code that walks page tables on CPUs that
>>>> use it, i.e., those don't support hardware accessed bit. Specifically,
>>>> it introduces a new Kconfig to guard some of functions added by
>>>> commit bd74fdaea146 ("mm: multi-gen LRU: support page table walks")
>>>> on CPUs like POWER9, on which the series was tested.
>>>>
>>>>
>>>> Kinsey Ho (4):
>>>> mm/mglru: add CONFIG_ARCH_HAS_HW_PTE_YOUNG
>>>> mm/mglru: add CONFIG_LRU_GEN_WALKS_MMU
>>>> mm/mglru: remove CONFIG_MEMCG
>>>> mm/mglru: remove CONFIG_TRANSPARENT_HUGEPAGE
>>>>
>>>> arch/Kconfig | 8 +
>>>> arch/arm64/Kconfig | 1 +
>>>> arch/x86/Kconfig | 1 +
>>>> arch/x86/include/asm/pgtable.h | 6 -
>>>> include/linux/memcontrol.h | 2 +-
>>>> include/linux/mm_types.h | 16 +-
>>>> include/linux/mmzone.h | 28 +---
>>>> include/linux/pgtable.h | 2 +-
>>>> kernel/fork.c | 2 +-
>>>> mm/Kconfig | 4 +
>>>> mm/vmscan.c | 271 ++++++++++++++++++---------------
>>>> 11 files changed, 174 insertions(+), 167 deletions(-)
>>> +Donet Tom <[email protected]>
>>> who is also working on this.
>>>
>>> Donet, could try this latest version instead? If it works well as the
>>> old one you've been using, can you please provide your Tested-by tag?
>>> Thanks.
>> Hi Yu Zhao,
>>
>> This patch set looks promising.
>>
>> I have conducted tests on PowerPC and x86.
>>
>> In old patch set there is a cleanup patch which removes
>> struct scan_control *sc argument from try_to_inc_max_seq() and
>> run_aging(), Do we need to include that patch?
> Sorry not for including that patch in this series.
>
> It's the first patch in the next cleanup series, which we haven't
> fully tested yet. It'll be the first order of business after the
> holiday season (mid Jan), does the schedule work for you?
>
Yes. No Problem.

Thank you very much.

Donet Tom


>> =>Here are some test results from PowerPC.
>>
>> # ls -l vmscan.o
>> -rw-r--r--. 1 root root 3600080 Dec 19 22:35 vmscan.o
>>
>> # size vmscan.o
>> text data bss dec hex filename
>> 95086 27412 0 122498 1de82 vmscan.o
>>
>> # ./scripts/bloat-o-meter vmscan.o.old vmscan.o
>> add/remove: 4/8 grow/shrink: 7/9 up/down: 860/-2524 (-1664)
>> Function old new delta
>> should_abort_scan - 472 +472
>> inc_max_seq.isra 1472 1612 +140
>> shrink_one 680 760 +80
>> lru_gen_release_memcg 508 556 +48
>> lru_gen_init_pgdat 92 132 +40
>> shrink_node 4040 4064 +24
>> lru_gen_online_memcg 680 696 +16
>> lru_gen_change_state 3968 3984 +16
>> ------
>> shrink_lruvec 2168 2152 -16
>> lru_gen_seq_write 1980 1964 -16
>> isolate_folios 6904 6888 -16
>> lru_gen_init_memcg 32 12 -20
>> mm_list 24 - -24
>> lru_gen_exit_memcg 388 344 -44
>> try_to_shrink_lruvec 904 816 -88
>> lru_gen_rotate_memcg 832 700 -132
>> lru_gen_migrate_mm 132 - -132
>> lru_gen_seq_show 1484 1308 -176
>> iterate_mm_list_nowalk 288 - -288
>> lru_gen_look_around 2284 1984 -300
>> lru_gen_add_mm 528 - -528
>> lru_gen_del_mm 720 - -720
>> Total: Before=116213, After=114549, chg -1.43%
>>
>> =>Here are some test results from x86.
>>
>> $ ls -l vmscan.o
>> -rw-r--r--. 1 donettom donettom 2545792 Dec 20 15:16 vmscan.o
>>
>> $ size vmscan.o
>> text data bss dec hex filename
>> 109751 32189 0 141940 22a74 vmscan.o
>> $
>>
>> $ ./scripts/bloat-o-meter vmscan.o.old vmscan.o
>> add/remove: 7/3 grow/shrink: 14/4 up/down: 2307/-1534 (773)
>> Function old new delta
>> inc_max_seq - 1470 +1470
>> should_abort_scan - 229 +229
>> isolate_folios 4469 4562 +93
>> lru_gen_rotate_memcg 641 731 +90
>> lru_gen_init_memcg 41 99 +58
>> lru_gen_release_memcg 282 336 +54
>> lru_gen_exit_memcg 306 350 +44
>> walk_pud_range 2502 2543 +41
>> shrink_node 2912 2951 +39
>> lru_gen_online_memcg 402 434 +32
>> lru_gen_seq_show 1112 1140 +28
>> lru_gen_add_folio 740 757 +17
>> lru_gen_look_around 1217 1233 +16
>> __pfx_should_abort_scan - 16 +16
>> __pfx_inc_max_seq - 16 +16
>> iterate_mm_list_nowalk 277 292 +15
>> shrink_one 413 426 +13
>> lru_gen_init_lruvec 190 202 +12
>> -----
>> try_to_shrink_lruvec 717 643 -74
>> lru_gen_init_pgdat 196 82 -114
>> try_to_inc_max_seq.isra 2897 1578 -1319
>> Total: Before=101095, After=101868, chg +0.76%
>> $
>>
>>
>> Tested-by: Donet Tom <[email protected]>
> Thanks!
>
> Acked-by: Yu Zhao <[email protected]>

2023-12-21 05:27:03

by Yu Zhao

[permalink] [raw]
Subject: Re: [PATCH mm-unstable v1 4/4] mm/mglru: remove CONFIG_TRANSPARENT_HUGEPAGE

On Thu, Dec 21, 2023 at 12:47:51AM +0800, kernel test robot wrote:
> Hi Kinsey,
>
> kernel test robot noticed the following build errors:
>
> [auto build test ERROR on akpm-mm/mm-everything]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Kinsey-Ho/mm-mglru-add-CONFIG_ARCH_HAS_HW_PTE_YOUNG/20231220-120318
> base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> patch link: https://lore.kernel.org/r/20231220040037.883811-5-kinseyho%40google.com
> patch subject: [PATCH mm-unstable v1 4/4] mm/mglru: remove CONFIG_TRANSPARENT_HUGEPAGE
> config: arm-randconfig-002-20231220 (https://download.01.org/0day-ci/archive/20231221/[email protected]/config)
> compiler: arm-linux-gnueabi-gcc (GCC) 13.2.0
> reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231221/[email protected]/reproduce)
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <[email protected]>
> | Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

Thanks for the report.

Kinsey, please make sure you include the above tags (as well as the
Closes tag from the second report in this thread) in the v2.

> All errors (new ones prefixed by >>):
>
> mm/vmscan.c: In function 'walk_pmd_range_locked':
> >> mm/vmscan.c:3455:21: error: implicit declaration of function 'pmd_dirty'; did you mean 'pte_dirty'? [-Werror=implicit-function-declaration]
> 3455 | if (pmd_dirty(pmd[i]) && !folio_test_dirty(folio) &&
> | ^~~~~~~~~
> | pte_dirty
> cc1: some warnings being treated as errors

Apparetly we need the following, similar to
commit 6617da8fb565 ("mm: add dummy pmd_young() for architectures not having it")

diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index 29d9b12298bc..8b5df1bbf9e9 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -523,6 +523,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd)
return pmd;
}

+#define pmd_dirty pmd_dirty
static inline int pmd_dirty(pmd_t pmd)
{
return !!(pmd_val(pmd) & (_PAGE_DIRTY | _PAGE_MODIFIED));
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 430b208c0130..e27a4c83c548 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -655,6 +655,7 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
return pmd;
}

+#define pmd_dirty pmd_dirty
static inline int pmd_dirty(pmd_t pmd)
{
return !!(pmd_val(pmd) & _PAGE_MODIFIED);
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index ab00235b018f..7b4287f36054 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -673,6 +673,7 @@ static inline int pmd_write(pmd_t pmd)
return pte_write(pmd_pte(pmd));
}

+#define pmd_dirty pmd_dirty
static inline int pmd_dirty(pmd_t pmd)
{
return pte_dirty(pmd_pte(pmd));
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 601e87fa8a9a..1299b56e43f6 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -770,6 +770,7 @@ static inline int pud_write(pud_t pud)
return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
}

+#define pmd_dirty pmd_dirty
static inline int pmd_dirty(pmd_t pmd)
{
return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 5e41033bf4ca..a8c871b7d786 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -706,6 +706,7 @@ static inline unsigned long pmd_write(pmd_t pmd)
#define pud_write(pud) pte_write(__pte(pud_val(pud)))

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define pmd_dirty pmd_dirty
static inline unsigned long pmd_dirty(pmd_t pmd)
{
pte_t pte = __pte(pmd_val(pmd));
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 57bab91bbf50..ee83a238ac13 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -141,6 +141,7 @@ static inline int pte_young(pte_t pte)
return pte_flags(pte) & _PAGE_ACCESSED;
}

+#define pmd_dirty pmd_dirty
static inline bool pmd_dirty(pmd_t pmd)
{
return pmd_flags(pmd) & _PAGE_DIRTY_BITS;
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index af7639c3b0a3..b646c84cc592 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -184,6 +184,13 @@ static inline int pmd_young(pmd_t pmd)
}
#endif

+#ifndef pmd_dirty
+static inline int pmd_dirty(pmd_t pmd)
+{
+ return 0;
+}
+#endif
+
/*
* A facility to provide lazy MMU batching. This allows PTE updates and
* page invalidations to be delayed until a call to leave lazy MMU mode