2022-08-29 14:52:20

by Qi Zheng

[permalink] [raw]
Subject: [PATCH 0/7] add common struct mm_slot and use it in THP and KSM

Hi all,

At present, both THP and KSM module have similar structures mm_slot for
organizing and recording the information required for scanning mm, and
each defines the following exactly the same operation functions:

- alloc_mm_slot
- free_mm_slot
- get_mm_slot
- insert_to_mm_slots_hash

In order to de-duplicate these codes, this patchset introduces a common
struct mm_slot, and lets THP and KSM to use it.

This series is based on next-20220829.

Comments and suggestions are welcome.

Thanks,
Qi.

Qi Zheng (7):
mm: introduce common struct mm_slot
mm: thp: convert to use common struct mm_slot
ksm: remove redundant declarations in ksm.h
ksm: add the ksm prefix to the names of the ksm private structures
ksm: convert ksm_mm_slot.mm_list to ksm_mm_slot.mm_node
ksm: convert ksm_mm_slot.link to ksm_mm_slot.hash
ksm: convert to use common struct mm_slot

Documentation/mm/ksm.rst | 2 +-
include/linux/ksm.h | 3 -
mm/khugepaged.c | 121 ++++++---------
mm/ksm.c | 326 ++++++++++++++++++---------------------
mm/mm_slot.h | 55 +++++++
5 files changed, 260 insertions(+), 247 deletions(-)
create mode 100644 mm/mm_slot.h

--
2.20.1


2022-08-29 14:52:20

by Qi Zheng

[permalink] [raw]
Subject: [PATCH 6/7] ksm: convert ksm_mm_slot.link to ksm_mm_slot.hash

In order to use common struct mm_slot, convert ksm_mm_slot.link
to ksm_mm_slot.hash in advance, no functional change.

Signed-off-by: Qi Zheng <[email protected]>
---
mm/ksm.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/mm/ksm.c b/mm/ksm.c
index 8c52aa7e0a02..667efca75b0d 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -113,13 +113,13 @@

/**
* struct ksm_mm_slot - ksm information per mm that is being scanned
- * @link: link to the mm_slots hash list
+ * @hash: link to the mm_slots hash list
* @mm_node: link into the mm_slots list, rooted in ksm_mm_head
* @rmap_list: head for this mm_slot's singly-linked list of rmap_items
* @mm: the mm that this information is valid for
*/
struct ksm_mm_slot {
- struct hlist_node link;
+ struct hlist_node hash;
struct list_head mm_node;
struct ksm_rmap_item *rmap_list;
struct mm_struct *mm;
@@ -424,7 +424,7 @@ static struct ksm_mm_slot *get_mm_slot(struct mm_struct *mm)
{
struct ksm_mm_slot *slot;

- hash_for_each_possible(mm_slots_hash, slot, link, (unsigned long)mm)
+ hash_for_each_possible(mm_slots_hash, slot, hash, (unsigned long)mm)
if (slot->mm == mm)
return slot;

@@ -435,7 +435,7 @@ static void insert_to_mm_slots_hash(struct mm_struct *mm,
struct ksm_mm_slot *mm_slot)
{
mm_slot->mm = mm;
- hash_add(mm_slots_hash, &mm_slot->link, (unsigned long)mm);
+ hash_add(mm_slots_hash, &mm_slot->hash, (unsigned long)mm);
}

/*
@@ -1008,7 +1008,7 @@ static int unmerge_and_remove_all_rmap_items(void)
ksm_scan.mm_slot = list_entry(mm_slot->mm_node.next,
struct ksm_mm_slot, mm_node);
if (ksm_test_exit(mm)) {
- hash_del(&mm_slot->link);
+ hash_del(&mm_slot->hash);
list_del(&mm_slot->mm_node);
spin_unlock(&ksm_mmlist_lock);

@@ -2376,7 +2376,7 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
* or when all VM_MERGEABLE areas have been unmapped (and
* mmap_lock then protects against race with MADV_MERGEABLE).
*/
- hash_del(&slot->link);
+ hash_del(&slot->hash);
list_del(&slot->mm_node);
spin_unlock(&ksm_mmlist_lock);

@@ -2570,7 +2570,7 @@ void __ksm_exit(struct mm_struct *mm)
mm_slot = get_mm_slot(mm);
if (mm_slot && ksm_scan.mm_slot != mm_slot) {
if (!mm_slot->rmap_list) {
- hash_del(&mm_slot->link);
+ hash_del(&mm_slot->hash);
list_del(&mm_slot->mm_node);
easy_to_free = 1;
} else {
--
2.20.1

2022-08-29 14:54:10

by Qi Zheng

[permalink] [raw]
Subject: [PATCH 7/7] ksm: convert to use common struct mm_slot

Convert to use common struct mm_slot, no functional change.

Signed-off-by: Qi Zheng <[email protected]>
---
mm/ksm.c | 132 +++++++++++++++++++++++--------------------------------
1 file changed, 56 insertions(+), 76 deletions(-)

diff --git a/mm/ksm.c b/mm/ksm.c
index 667efca75b0d..051a09d24d54 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -42,6 +42,7 @@

#include <asm/tlbflush.h>
#include "internal.h"
+#include "mm_slot.h"

#ifdef CONFIG_NUMA
#define NUMA(x) (x)
@@ -113,16 +114,12 @@

/**
* struct ksm_mm_slot - ksm information per mm that is being scanned
- * @hash: link to the mm_slots hash list
- * @mm_node: link into the mm_slots list, rooted in ksm_mm_head
+ * @slot: hash lookup from mm to mm_slot
* @rmap_list: head for this mm_slot's singly-linked list of rmap_items
- * @mm: the mm that this information is valid for
*/
struct ksm_mm_slot {
- struct hlist_node hash;
- struct list_head mm_node;
+ struct mm_slot slot;
struct ksm_rmap_item *rmap_list;
- struct mm_struct *mm;
};

/**
@@ -231,7 +228,7 @@ static LIST_HEAD(migrate_nodes);
static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);

static struct ksm_mm_slot ksm_mm_head = {
- .mm_node = LIST_HEAD_INIT(ksm_mm_head.mm_node),
+ .slot.mm_node = LIST_HEAD_INIT(ksm_mm_head.slot.mm_node),
};
static struct ksm_scan ksm_scan = {
.mm_slot = &ksm_mm_head,
@@ -408,36 +405,6 @@ static inline void free_stable_node(struct ksm_stable_node *stable_node)
kmem_cache_free(stable_node_cache, stable_node);
}

-static inline struct ksm_mm_slot *alloc_mm_slot(void)
-{
- if (!mm_slot_cache) /* initialization failed */
- return NULL;
- return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
-}
-
-static inline void free_mm_slot(struct ksm_mm_slot *mm_slot)
-{
- kmem_cache_free(mm_slot_cache, mm_slot);
-}
-
-static struct ksm_mm_slot *get_mm_slot(struct mm_struct *mm)
-{
- struct ksm_mm_slot *slot;
-
- hash_for_each_possible(mm_slots_hash, slot, hash, (unsigned long)mm)
- if (slot->mm == mm)
- return slot;
-
- return NULL;
-}
-
-static void insert_to_mm_slots_hash(struct mm_struct *mm,
- struct ksm_mm_slot *mm_slot)
-{
- mm_slot->mm = mm;
- hash_add(mm_slots_hash, &mm_slot->hash, (unsigned long)mm);
-}
-
/*
* ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
* page tables after it has passed through ksm_exit() - which, if necessary,
@@ -975,20 +942,22 @@ static int remove_all_stable_nodes(void)
static int unmerge_and_remove_all_rmap_items(void)
{
struct ksm_mm_slot *mm_slot;
+ struct mm_slot *slot;
struct mm_struct *mm;
struct vm_area_struct *vma;
int err = 0;

spin_lock(&ksm_mmlist_lock);
- ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_node.next,
- struct ksm_mm_slot, mm_node);
+ slot = list_entry(ksm_mm_head.slot.mm_node.next,
+ struct mm_slot, mm_node);
+ ksm_scan.mm_slot = mm_slot_entry(slot, struct ksm_mm_slot, slot);
spin_unlock(&ksm_mmlist_lock);

for (mm_slot = ksm_scan.mm_slot; mm_slot != &ksm_mm_head;
mm_slot = ksm_scan.mm_slot) {
- VMA_ITERATOR(vmi, mm_slot->mm, 0);
+ VMA_ITERATOR(vmi, mm_slot->slot.mm, 0);

- mm = mm_slot->mm;
+ mm = mm_slot->slot.mm;
mmap_read_lock(mm);
for_each_vma(vmi, vma) {
if (ksm_test_exit(mm))
@@ -1005,14 +974,15 @@ static int unmerge_and_remove_all_rmap_items(void)
mmap_read_unlock(mm);

spin_lock(&ksm_mmlist_lock);
- ksm_scan.mm_slot = list_entry(mm_slot->mm_node.next,
- struct ksm_mm_slot, mm_node);
+ slot = list_entry(mm_slot->slot.mm_node.next,
+ struct mm_slot, mm_node);
+ ksm_scan.mm_slot = mm_slot_entry(slot, struct ksm_mm_slot, slot);
if (ksm_test_exit(mm)) {
- hash_del(&mm_slot->hash);
- list_del(&mm_slot->mm_node);
+ hash_del(&mm_slot->slot.hash);
+ list_del(&mm_slot->slot.mm_node);
spin_unlock(&ksm_mmlist_lock);

- free_mm_slot(mm_slot);
+ free_mm_slot(mm_slot_cache, mm_slot);
clear_bit(MMF_VM_MERGEABLE, &mm->flags);
mmdrop(mm);
} else
@@ -2233,7 +2203,7 @@ static struct ksm_rmap_item *get_next_rmap_item(struct ksm_mm_slot *mm_slot,
rmap_item = alloc_rmap_item();
if (rmap_item) {
/* It has already been zeroed */
- rmap_item->mm = mm_slot->mm;
+ rmap_item->mm = mm_slot->slot.mm;
rmap_item->address = addr;
rmap_item->rmap_list = *rmap_list;
*rmap_list = rmap_item;
@@ -2244,17 +2214,18 @@ static struct ksm_rmap_item *get_next_rmap_item(struct ksm_mm_slot *mm_slot,
static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
{
struct mm_struct *mm;
- struct ksm_mm_slot *slot;
+ struct ksm_mm_slot *mm_slot;
+ struct mm_slot *slot;
struct vm_area_struct *vma;
struct ksm_rmap_item *rmap_item;
struct vma_iterator vmi;
int nid;

- if (list_empty(&ksm_mm_head.mm_node))
+ if (list_empty(&ksm_mm_head.slot.mm_node))
return NULL;

- slot = ksm_scan.mm_slot;
- if (slot == &ksm_mm_head) {
+ mm_slot = ksm_scan.mm_slot;
+ if (mm_slot == &ksm_mm_head) {
/*
* A number of pages can hang around indefinitely on per-cpu
* pagevecs, raised page count preventing write_protect_page
@@ -2291,20 +2262,23 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
root_unstable_tree[nid] = RB_ROOT;

spin_lock(&ksm_mmlist_lock);
- slot = list_entry(slot->mm_node.next, struct ksm_mm_slot, mm_node);
- ksm_scan.mm_slot = slot;
+ slot = list_entry(mm_slot->slot.mm_node.next,
+ struct mm_slot, mm_node);
+ mm_slot = mm_slot_entry(slot, struct ksm_mm_slot, slot);
+ ksm_scan.mm_slot = mm_slot;
spin_unlock(&ksm_mmlist_lock);
/*
* Although we tested list_empty() above, a racing __ksm_exit
* of the last mm on the list may have removed it since then.
*/
- if (slot == &ksm_mm_head)
+ if (mm_slot == &ksm_mm_head)
return NULL;
next_mm:
ksm_scan.address = 0;
- ksm_scan.rmap_list = &slot->rmap_list;
+ ksm_scan.rmap_list = &mm_slot->rmap_list;
}

+ slot = &mm_slot->slot;
mm = slot->mm;
vma_iter_init(&vmi, mm, ksm_scan.address);

@@ -2334,7 +2308,7 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
if (PageAnon(*page)) {
flush_anon_page(vma, *page, ksm_scan.address);
flush_dcache_page(*page);
- rmap_item = get_next_rmap_item(slot,
+ rmap_item = get_next_rmap_item(mm_slot,
ksm_scan.rmap_list, ksm_scan.address);
if (rmap_item) {
ksm_scan.rmap_list =
@@ -2355,7 +2329,7 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
if (ksm_test_exit(mm)) {
no_vmas:
ksm_scan.address = 0;
- ksm_scan.rmap_list = &slot->rmap_list;
+ ksm_scan.rmap_list = &mm_slot->rmap_list;
}
/*
* Nuke all the rmap_items that are above this current rmap:
@@ -2364,8 +2338,9 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
remove_trailing_rmap_items(ksm_scan.rmap_list);

spin_lock(&ksm_mmlist_lock);
- ksm_scan.mm_slot = list_entry(slot->mm_node.next,
- struct ksm_mm_slot, mm_node);
+ slot = list_entry(mm_slot->slot.mm_node.next,
+ struct mm_slot, mm_node);
+ ksm_scan.mm_slot = mm_slot_entry(slot, struct ksm_mm_slot, slot);
if (ksm_scan.address == 0) {
/*
* We've completed a full scan of all vmas, holding mmap_lock
@@ -2376,11 +2351,11 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
* or when all VM_MERGEABLE areas have been unmapped (and
* mmap_lock then protects against race with MADV_MERGEABLE).
*/
- hash_del(&slot->hash);
- list_del(&slot->mm_node);
+ hash_del(&mm_slot->slot.hash);
+ list_del(&mm_slot->slot.mm_node);
spin_unlock(&ksm_mmlist_lock);

- free_mm_slot(slot);
+ free_mm_slot(mm_slot_cache, mm_slot);
clear_bit(MMF_VM_MERGEABLE, &mm->flags);
mmap_read_unlock(mm);
mmdrop(mm);
@@ -2397,8 +2372,8 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
}

/* Repeat until we've completed scanning the whole list */
- slot = ksm_scan.mm_slot;
- if (slot != &ksm_mm_head)
+ mm_slot = ksm_scan.mm_slot;
+ if (mm_slot != &ksm_mm_head)
goto next_mm;

ksm_scan.seqnr++;
@@ -2426,7 +2401,7 @@ static void ksm_do_scan(unsigned int scan_npages)

static int ksmd_should_run(void)
{
- return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_node);
+ return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.slot.mm_node);
}

static int ksm_scan_thread(void *nothing)
@@ -2516,17 +2491,20 @@ EXPORT_SYMBOL_GPL(ksm_madvise);
int __ksm_enter(struct mm_struct *mm)
{
struct ksm_mm_slot *mm_slot;
+ struct mm_slot *slot;
int needs_wakeup;

- mm_slot = alloc_mm_slot();
+ mm_slot = alloc_mm_slot(mm_slot_cache);
if (!mm_slot)
return -ENOMEM;

+ slot = &mm_slot->slot;
+
/* Check ksm_run too? Would need tighter locking */
- needs_wakeup = list_empty(&ksm_mm_head.mm_node);
+ needs_wakeup = list_empty(&ksm_mm_head.slot.mm_node);

spin_lock(&ksm_mmlist_lock);
- insert_to_mm_slots_hash(mm, mm_slot);
+ insert_to_mm_slots_hash(mm_slots_hash, mm, slot);
/*
* When KSM_RUN_MERGE (or KSM_RUN_STOP),
* insert just behind the scanning cursor, to let the area settle
@@ -2538,9 +2516,9 @@ int __ksm_enter(struct mm_struct *mm)
* missed: then we might as well insert at the end of the list.
*/
if (ksm_run & KSM_RUN_UNMERGE)
- list_add_tail(&mm_slot->mm_node, &ksm_mm_head.mm_node);
+ list_add_tail(&slot->mm_node, &ksm_mm_head.slot.mm_node);
else
- list_add_tail(&mm_slot->mm_node, &ksm_scan.mm_slot->mm_node);
+ list_add_tail(&slot->mm_node, &ksm_scan.mm_slot->slot.mm_node);
spin_unlock(&ksm_mmlist_lock);

set_bit(MMF_VM_MERGEABLE, &mm->flags);
@@ -2555,6 +2533,7 @@ int __ksm_enter(struct mm_struct *mm)
void __ksm_exit(struct mm_struct *mm)
{
struct ksm_mm_slot *mm_slot;
+ struct mm_slot *slot;
int easy_to_free = 0;

/*
@@ -2567,21 +2546,22 @@ void __ksm_exit(struct mm_struct *mm)
*/

spin_lock(&ksm_mmlist_lock);
- mm_slot = get_mm_slot(mm);
+ slot = get_mm_slot(mm_slots_hash, mm);
+ mm_slot = mm_slot_entry(slot, struct ksm_mm_slot, slot);
if (mm_slot && ksm_scan.mm_slot != mm_slot) {
if (!mm_slot->rmap_list) {
- hash_del(&mm_slot->hash);
- list_del(&mm_slot->mm_node);
+ hash_del(&slot->hash);
+ list_del(&slot->mm_node);
easy_to_free = 1;
} else {
- list_move(&mm_slot->mm_node,
- &ksm_scan.mm_slot->mm_node);
+ list_move(&slot->mm_node,
+ &ksm_scan.mm_slot->slot.mm_node);
}
}
spin_unlock(&ksm_mmlist_lock);

if (easy_to_free) {
- free_mm_slot(mm_slot);
+ free_mm_slot(mm_slot_cache, mm_slot);
clear_bit(MMF_VM_MERGEABLE, &mm->flags);
mmdrop(mm);
} else if (mm_slot) {
--
2.20.1

2022-08-29 14:57:54

by Qi Zheng

[permalink] [raw]
Subject: [PATCH 2/7] mm: thp: convert to use common struct mm_slot

Rename private struct mm_slot to struct khugepaged_mm_slot and
convert to use common struct mm_slot with no functional change.

Signed-off-by: Qi Zheng <[email protected]>
---
mm/khugepaged.c | 121 ++++++++++++++++++++----------------------------
1 file changed, 51 insertions(+), 70 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index d8e388106322..c7f40f43e0f3 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -23,6 +23,7 @@
#include <asm/tlb.h>
#include <asm/pgalloc.h>
#include "internal.h"
+#include "mm_slot.h"

enum scan_result {
SCAN_FAIL,
@@ -104,17 +105,13 @@ struct collapse_control {
};

/**
- * struct mm_slot - hash lookup from mm to mm_slot
- * @hash: hash collision list
- * @mm_node: khugepaged scan list headed in khugepaged_scan.mm_head
- * @mm: the mm that this information is valid for
+ * struct khugepaged_mm_slot - khugepaged information per mm that is being scanned
+ * @slot: hash lookup from mm to mm_slot
* @nr_pte_mapped_thp: number of pte mapped THP
* @pte_mapped_thp: address array corresponding pte mapped THP
*/
-struct mm_slot {
- struct hlist_node hash;
- struct list_head mm_node;
- struct mm_struct *mm;
+struct khugepaged_mm_slot {
+ struct mm_slot slot;

/* pte-mapped THP in this mm */
int nr_pte_mapped_thp;
@@ -131,7 +128,7 @@ struct mm_slot {
*/
struct khugepaged_scan {
struct list_head mm_head;
- struct mm_slot *mm_slot;
+ struct khugepaged_mm_slot *mm_slot;
unsigned long address;
};

@@ -395,8 +392,9 @@ int hugepage_madvise(struct vm_area_struct *vma,
int __init khugepaged_init(void)
{
mm_slot_cache = kmem_cache_create("khugepaged_mm_slot",
- sizeof(struct mm_slot),
- __alignof__(struct mm_slot), 0, NULL);
+ sizeof(struct khugepaged_mm_slot),
+ __alignof__(struct khugepaged_mm_slot),
+ 0, NULL);
if (!mm_slot_cache)
return -ENOMEM;

@@ -413,36 +411,6 @@ void __init khugepaged_destroy(void)
kmem_cache_destroy(mm_slot_cache);
}

-static inline struct mm_slot *alloc_mm_slot(void)
-{
- if (!mm_slot_cache) /* initialization failed */
- return NULL;
- return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
-}
-
-static inline void free_mm_slot(struct mm_slot *mm_slot)
-{
- kmem_cache_free(mm_slot_cache, mm_slot);
-}
-
-static struct mm_slot *get_mm_slot(struct mm_struct *mm)
-{
- struct mm_slot *mm_slot;
-
- hash_for_each_possible(mm_slots_hash, mm_slot, hash, (unsigned long)mm)
- if (mm == mm_slot->mm)
- return mm_slot;
-
- return NULL;
-}
-
-static void insert_to_mm_slots_hash(struct mm_struct *mm,
- struct mm_slot *mm_slot)
-{
- mm_slot->mm = mm;
- hash_add(mm_slots_hash, &mm_slot->hash, (long)mm);
-}
-
static inline int hpage_collapse_test_exit(struct mm_struct *mm)
{
return atomic_read(&mm->mm_users) == 0;
@@ -450,28 +418,31 @@ static inline int hpage_collapse_test_exit(struct mm_struct *mm)

void __khugepaged_enter(struct mm_struct *mm)
{
- struct mm_slot *mm_slot;
+ struct khugepaged_mm_slot *mm_slot;
+ struct mm_slot *slot;
int wakeup;

- mm_slot = alloc_mm_slot();
+ mm_slot = alloc_mm_slot(mm_slot_cache);
if (!mm_slot)
return;

+ slot = &mm_slot->slot;
+
/* __khugepaged_exit() must not run from under us */
VM_BUG_ON_MM(hpage_collapse_test_exit(mm), mm);
if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) {
- free_mm_slot(mm_slot);
+ free_mm_slot(mm_slot_cache, mm_slot);
return;
}

spin_lock(&khugepaged_mm_lock);
- insert_to_mm_slots_hash(mm, mm_slot);
+ insert_to_mm_slots_hash(mm_slots_hash, mm, slot);
/*
* Insert just behind the scanning cursor, to let the area settle
* down a little.
*/
wakeup = list_empty(&khugepaged_scan.mm_head);
- list_add_tail(&mm_slot->mm_node, &khugepaged_scan.mm_head);
+ list_add_tail(&slot->mm_node, &khugepaged_scan.mm_head);
spin_unlock(&khugepaged_mm_lock);

mmgrab(mm);
@@ -491,21 +462,23 @@ void khugepaged_enter_vma(struct vm_area_struct *vma,

void __khugepaged_exit(struct mm_struct *mm)
{
- struct mm_slot *mm_slot;
+ struct khugepaged_mm_slot *mm_slot;
+ struct mm_slot *slot;
int free = 0;

spin_lock(&khugepaged_mm_lock);
- mm_slot = get_mm_slot(mm);
+ slot = get_mm_slot(mm_slots_hash, mm);
+ mm_slot = mm_slot_entry(slot, struct khugepaged_mm_slot, slot);
if (mm_slot && khugepaged_scan.mm_slot != mm_slot) {
- hash_del(&mm_slot->hash);
- list_del(&mm_slot->mm_node);
+ hash_del(&slot->hash);
+ list_del(&slot->mm_node);
free = 1;
}
spin_unlock(&khugepaged_mm_lock);

if (free) {
clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
- free_mm_slot(mm_slot);
+ free_mm_slot(mm_slot_cache, mm_slot);
mmdrop(mm);
} else if (mm_slot) {
/*
@@ -1321,16 +1294,17 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
return result;
}

-static void collect_mm_slot(struct mm_slot *mm_slot)
+static void collect_mm_slot(struct khugepaged_mm_slot *mm_slot)
{
- struct mm_struct *mm = mm_slot->mm;
+ struct mm_slot *slot = &mm_slot->slot;
+ struct mm_struct *mm = slot->mm;

lockdep_assert_held(&khugepaged_mm_lock);

if (hpage_collapse_test_exit(mm)) {
/* free mm_slot */
- hash_del(&mm_slot->hash);
- list_del(&mm_slot->mm_node);
+ hash_del(&slot->hash);
+ list_del(&slot->mm_node);

/*
* Not strictly needed because the mm exited already.
@@ -1339,7 +1313,7 @@ static void collect_mm_slot(struct mm_slot *mm_slot)
*/

/* khugepaged_mm_lock actually not necessary for the below */
- free_mm_slot(mm_slot);
+ free_mm_slot(mm_slot_cache, mm_slot);
mmdrop(mm);
}
}
@@ -1352,12 +1326,14 @@ static void collect_mm_slot(struct mm_slot *mm_slot)
static void khugepaged_add_pte_mapped_thp(struct mm_struct *mm,
unsigned long addr)
{
- struct mm_slot *mm_slot;
+ struct khugepaged_mm_slot *mm_slot;
+ struct mm_slot *slot;

VM_BUG_ON(addr & ~HPAGE_PMD_MASK);

spin_lock(&khugepaged_mm_lock);
- mm_slot = get_mm_slot(mm);
+ slot = get_mm_slot(mm_slots_hash, mm);
+ mm_slot = mm_slot_entry(slot, struct khugepaged_mm_slot, slot);
if (likely(mm_slot && mm_slot->nr_pte_mapped_thp < MAX_PTE_MAPPED_THP))
mm_slot->pte_mapped_thp[mm_slot->nr_pte_mapped_thp++] = addr;
spin_unlock(&khugepaged_mm_lock);
@@ -1489,9 +1465,10 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
goto drop_hpage;
}

-static void khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
+static void khugepaged_collapse_pte_mapped_thps(struct khugepaged_mm_slot *mm_slot)
{
- struct mm_struct *mm = mm_slot->mm;
+ struct mm_slot *slot = &mm_slot->slot;
+ struct mm_struct *mm = slot->mm;
int i;

if (likely(mm_slot->nr_pte_mapped_thp == 0))
@@ -2054,7 +2031,8 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result,
__acquires(&khugepaged_mm_lock)
{
struct vma_iterator vmi;
- struct mm_slot *mm_slot;
+ struct khugepaged_mm_slot *mm_slot;
+ struct mm_slot *slot;
struct mm_struct *mm;
struct vm_area_struct *vma;
int progress = 0;
@@ -2064,18 +2042,20 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result,
lockdep_assert_held(&khugepaged_mm_lock);
*result = SCAN_FAIL;

- if (khugepaged_scan.mm_slot)
+ if (khugepaged_scan.mm_slot) {
mm_slot = khugepaged_scan.mm_slot;
- else {
- mm_slot = list_entry(khugepaged_scan.mm_head.next,
+ slot = &mm_slot->slot;
+ } else {
+ slot = list_entry(khugepaged_scan.mm_head.next,
struct mm_slot, mm_node);
+ mm_slot = mm_slot_entry(slot, struct khugepaged_mm_slot, slot);
khugepaged_scan.address = 0;
khugepaged_scan.mm_slot = mm_slot;
}
spin_unlock(&khugepaged_mm_lock);
khugepaged_collapse_pte_mapped_thps(mm_slot);

- mm = mm_slot->mm;
+ mm = slot->mm;
/*
* Don't wait for semaphore (to avoid long wait times). Just move to
* the next mm on the list.
@@ -2171,10 +2151,11 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result,
* khugepaged runs here, khugepaged_exit will find
* mm_slot not pointing to the exiting mm.
*/
- if (mm_slot->mm_node.next != &khugepaged_scan.mm_head) {
- khugepaged_scan.mm_slot = list_entry(
- mm_slot->mm_node.next,
- struct mm_slot, mm_node);
+ if (slot->mm_node.next != &khugepaged_scan.mm_head) {
+ slot = list_entry(slot->mm_node.next,
+ struct mm_slot, mm_node);
+ khugepaged_scan.mm_slot =
+ mm_slot_entry(slot, struct khugepaged_mm_slot, slot);
khugepaged_scan.address = 0;
} else {
khugepaged_scan.mm_slot = NULL;
@@ -2269,7 +2250,7 @@ static void khugepaged_wait_work(void)

static int khugepaged(void *none)
{
- struct mm_slot *mm_slot;
+ struct khugepaged_mm_slot *mm_slot;

set_freezable();
set_user_nice(current, MAX_NICE);
--
2.20.1

2022-08-29 14:59:25

by Qi Zheng

[permalink] [raw]
Subject: [PATCH 3/7] ksm: remove redundant declarations in ksm.h

Currently, for struct stable_node, no one uses it in both the
include/linux/ksm.h file and the file that contains it. For
struct mem_cgroup, it's also not used in ksm.h. So they're all
redundant, just remove them.

Signed-off-by: Qi Zheng <[email protected]>
---
include/linux/ksm.h | 3 ---
1 file changed, 3 deletions(-)

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 0b4f17418f64..7e232ba59b86 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -15,9 +15,6 @@
#include <linux/sched.h>
#include <linux/sched/coredump.h>

-struct stable_node;
-struct mem_cgroup;
-
#ifdef CONFIG_KSM
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, unsigned long *vm_flags);
--
2.20.1

2022-08-29 15:10:51

by Qi Zheng

[permalink] [raw]
Subject: [PATCH 1/7] mm: introduce common struct mm_slot

At present, both THP and KSM module have similar structures
mm_slot for organizing and recording the information required
for scanning mm, and each defines the following exactly the
same operation functions:

- alloc_mm_slot
- free_mm_slot
- get_mm_slot
- insert_to_mm_slots_hash

In order to de-duplicate these codes, this patch introduces a
common struct mm_slot, and subsequent patches will let THP and
KSM to use it.

Signed-off-by: Qi Zheng <[email protected]>
---
mm/mm_slot.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 55 insertions(+)
create mode 100644 mm/mm_slot.h

diff --git a/mm/mm_slot.h b/mm/mm_slot.h
new file mode 100644
index 000000000000..c8f0d26ef7b0
--- /dev/null
+++ b/mm/mm_slot.h
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _LINUX_MM_SLOT_H
+#define _LINUX_MM_SLOT_H
+
+#include <linux/hashtable.h>
+#include <linux/slab.h>
+
+/*
+ * struct mm_slot - hash lookup from mm to mm_slot
+ * @hash: link to the mm_slots hash list
+ * @mm_node: link into the mm_slots list
+ * @mm: the mm that this information is valid for
+ */
+struct mm_slot {
+ struct hlist_node hash;
+ struct list_head mm_node;
+ struct mm_struct *mm;
+};
+
+#define mm_slot_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+static inline void *alloc_mm_slot(struct kmem_cache *cache)
+{
+ if (!cache) /* initialization failed */
+ return NULL;
+ return kmem_cache_zalloc(cache, GFP_KERNEL);
+}
+
+static inline void free_mm_slot(struct kmem_cache *cache, void *objp)
+{
+ kmem_cache_free(cache, objp);
+}
+
+#define get_mm_slot(_hashtable, _mm) \
+({ \
+ struct mm_slot *tmp_slot, *mm_slot = NULL; \
+ \
+ hash_for_each_possible(_hashtable, tmp_slot, hash, (unsigned long)_mm) \
+ if (_mm == tmp_slot->mm) { \
+ mm_slot = tmp_slot; \
+ break; \
+ } \
+ \
+ mm_slot; \
+})
+
+#define insert_to_mm_slots_hash(_hashtable, _mm, _mm_slot) \
+({ \
+ _mm_slot->mm = _mm; \
+ hash_add(_hashtable, &_mm_slot->hash, (unsigned long)_mm); \
+})
+
+#endif /* _LINUX_MM_SLOT_H */
--
2.20.1

2022-08-29 20:31:57

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/7] mm: introduce common struct mm_slot

On Mon, 29 Aug 2022 22:30:49 +0800 Qi Zheng <[email protected]> wrote:

> At present, both THP and KSM module have similar structures
> mm_slot for organizing and recording the information required
> for scanning mm, and each defines the following exactly the
> same operation functions:
>
> - alloc_mm_slot
> - free_mm_slot
> - get_mm_slot
> - insert_to_mm_slots_hash
>
> In order to de-duplicate these codes, this patch introduces a
> common struct mm_slot, and subsequent patches will let THP and
> KSM to use it.

Seems like a good idea.

> --- /dev/null
> +++ b/mm/mm_slot.h
> @@ -0,0 +1,55 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#ifndef _LINUX_MM_SLOT_H
> +#define _LINUX_MM_SLOT_H
> +
> +#include <linux/hashtable.h>
> +#include <linux/slab.h>
> +
> +/*
> + * struct mm_slot - hash lookup from mm to mm_slot
> + * @hash: link to the mm_slots hash list
> + * @mm_node: link into the mm_slots list
> + * @mm: the mm that this information is valid for
> + */
> +struct mm_slot {
> + struct hlist_node hash;
> + struct list_head mm_node;
> + struct mm_struct *mm;
> +};

It appears that the presence of an mm_struct in the hash list does not
contribute to the mm_struct's refcount? That's somewhat unexpected.

It would be helpful to add some words here describing the means by
which a user of mm_slot would prevent the mm_struct from getting freed
while on the list. I assume "caller must maintain a reference on the
mm_struct while it remains on an mm_slot hash list"?

> +#define mm_slot_entry(ptr, type, member) \
> + container_of(ptr, type, member)
> +
> +static inline void *alloc_mm_slot(struct kmem_cache *cache)
> +{
> + if (!cache) /* initialization failed */
> + return NULL;
> + return kmem_cache_zalloc(cache, GFP_KERNEL);
> +}
> +
> +static inline void free_mm_slot(struct kmem_cache *cache, void *objp)
> +{
> + kmem_cache_free(cache, objp);
> +}
> +
> +#define get_mm_slot(_hashtable, _mm) \
> +({ \
> + struct mm_slot *tmp_slot, *mm_slot = NULL; \
> + \
> + hash_for_each_possible(_hashtable, tmp_slot, hash, (unsigned long)_mm) \
> + if (_mm == tmp_slot->mm) { \
> + mm_slot = tmp_slot; \
> + break; \
> + } \
> + \
> + mm_slot; \
> +})

Is there a reason why this must be implemented as a macro? That's
preferable, although this may be overly large for inlining. mm/util.c
might suit.

> +#define insert_to_mm_slots_hash(_hashtable, _mm, _mm_slot) \
> +({ \
> + _mm_slot->mm = _mm; \
> + hash_add(_hashtable, &_mm_slot->hash, (unsigned long)_mm); \
> +})

Does this need to be a macro?


And the naming. Can we please have

mm_slot_entry
mm_slot_alloc
mm_slot_free
mm_slot_get
mm_slot_insert

Also, "get" usually implies that a refcout is taken on the obtained
object, so mm_slot_lookup() would be more appropriate.

2022-08-30 05:05:46

by Qi Zheng

[permalink] [raw]
Subject: Re: [PATCH 1/7] mm: introduce common struct mm_slot



On 2022/8/30 03:51, Andrew Morton wrote:
> On Mon, 29 Aug 2022 22:30:49 +0800 Qi Zheng <[email protected]> wrote:
>
>> At present, both THP and KSM module have similar structures
>> mm_slot for organizing and recording the information required
>> for scanning mm, and each defines the following exactly the
>> same operation functions:
>>
>> - alloc_mm_slot
>> - free_mm_slot
>> - get_mm_slot
>> - insert_to_mm_slots_hash
>>
>> In order to de-duplicate these codes, this patch introduces a
>> common struct mm_slot, and subsequent patches will let THP and
>> KSM to use it.
>
> Seems like a good idea.
>
>> --- /dev/null
>> +++ b/mm/mm_slot.h
>> @@ -0,0 +1,55 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +
>> +#ifndef _LINUX_MM_SLOT_H
>> +#define _LINUX_MM_SLOT_H
>> +
>> +#include <linux/hashtable.h>
>> +#include <linux/slab.h>
>> +
>> +/*
>> + * struct mm_slot - hash lookup from mm to mm_slot
>> + * @hash: link to the mm_slots hash list
>> + * @mm_node: link into the mm_slots list
>> + * @mm: the mm that this information is valid for
>> + */
>> +struct mm_slot {
>> + struct hlist_node hash;
>> + struct list_head mm_node;
>> + struct mm_struct *mm;
>> +};
>
> It appears that the presence of an mm_struct in the hash list does not
> contribute to the mm_struct's refcount? That's somewhat unexpected.

Hi,

The reason is that khugepaged_exit()/ksm_exit() will be called first in
__mmput() to remove mm from the linked list. So it is prevented the
mm_struct from getting freed while on the list.

>
> It would be helpful to add some words here describing the means by
> which a user of mm_slot would prevent the mm_struct from getting freed
> while on the list. I assume "caller must maintain a reference on the
> mm_struct while it remains on an mm_slot hash list"?
>
>> +#define mm_slot_entry(ptr, type, member) \
>> + container_of(ptr, type, member)
>> +
>> +static inline void *alloc_mm_slot(struct kmem_cache *cache)
>> +{
>> + if (!cache) /* initialization failed */
>> + return NULL;
>> + return kmem_cache_zalloc(cache, GFP_KERNEL);
>> +}
>> +
>> +static inline void free_mm_slot(struct kmem_cache *cache, void *objp)
>> +{
>> + kmem_cache_free(cache, objp);
>> +}
>> +
>> +#define get_mm_slot(_hashtable, _mm) \
>> +({ \
>> + struct mm_slot *tmp_slot, *mm_slot = NULL; \
>> + \
>> + hash_for_each_possible(_hashtable, tmp_slot, hash, (unsigned long)_mm) \
>> + if (_mm == tmp_slot->mm) { \
>> + mm_slot = tmp_slot; \
>> + break; \
>> + } \
>> + \
>> + mm_slot; \
>> +})
>
> Is there a reason why this must be implemented as a macro? That's

Since _hashtable is an array name, IIUC, this cannot be passed as a
function parameter, so I chose to implement it as a macro.

> preferable, although this may be overly large for inlining. mm/util.c
> might suit.
>
>> +#define insert_to_mm_slots_hash(_hashtable, _mm, _mm_slot) \
>> +({ \
>> + _mm_slot->mm = _mm; \
>> + hash_add(_hashtable, &_mm_slot->hash, (unsigned long)_mm); \
>> +})
>
> Does this need to be a macro?

Ditto.

>
>
> And the naming. Can we please have
>
> mm_slot_entry
> mm_slot_alloc
> mm_slot_free
> mm_slot_get
> mm_slot_insert
>
> Also, "get" usually implies that a refcout is taken on the obtained
> object, so mm_slot_lookup() would be more appropriate.

These names are better, will modify to it in the next version.

Thanks,
Qi

--
Thanks,
Qi

2022-08-30 17:59:50

by Yang Shi

[permalink] [raw]
Subject: Re: [PATCH 1/7] mm: introduce common struct mm_slot

On Mon, Aug 29, 2022 at 12:51 PM Andrew Morton
<[email protected]> wrote:
>
> On Mon, 29 Aug 2022 22:30:49 +0800 Qi Zheng <[email protected]> wrote:
>
> > At present, both THP and KSM module have similar structures
> > mm_slot for organizing and recording the information required
> > for scanning mm, and each defines the following exactly the
> > same operation functions:
> >
> > - alloc_mm_slot
> > - free_mm_slot
> > - get_mm_slot
> > - insert_to_mm_slots_hash
> >
> > In order to de-duplicate these codes, this patch introduces a
> > common struct mm_slot, and subsequent patches will let THP and
> > KSM to use it.
>
> Seems like a good idea.
>
> > --- /dev/null
> > +++ b/mm/mm_slot.h
> > @@ -0,0 +1,55 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
> > +#ifndef _LINUX_MM_SLOT_H
> > +#define _LINUX_MM_SLOT_H
> > +
> > +#include <linux/hashtable.h>
> > +#include <linux/slab.h>
> > +
> > +/*
> > + * struct mm_slot - hash lookup from mm to mm_slot
> > + * @hash: link to the mm_slots hash list
> > + * @mm_node: link into the mm_slots list
> > + * @mm: the mm that this information is valid for
> > + */
> > +struct mm_slot {
> > + struct hlist_node hash;
> > + struct list_head mm_node;
> > + struct mm_struct *mm;
> > +};
>
> It appears that the presence of an mm_struct in the hash list does not
> contribute to the mm_struct's refcount? That's somewhat unexpected.

I didn't find time to look into the series yet, but when the
mm/mm_slot was added to the list, mmgrab() was definitely called if
this was not changed by the series.

>
> It would be helpful to add some words here describing the means by
> which a user of mm_slot would prevent the mm_struct from getting freed
> while on the list. I assume "caller must maintain a reference on the
> mm_struct while it remains on an mm_slot hash list"?
>
> > +#define mm_slot_entry(ptr, type, member) \
> > + container_of(ptr, type, member)
> > +
> > +static inline void *alloc_mm_slot(struct kmem_cache *cache)
> > +{
> > + if (!cache) /* initialization failed */
> > + return NULL;
> > + return kmem_cache_zalloc(cache, GFP_KERNEL);
> > +}
> > +
> > +static inline void free_mm_slot(struct kmem_cache *cache, void *objp)
> > +{
> > + kmem_cache_free(cache, objp);
> > +}
> > +
> > +#define get_mm_slot(_hashtable, _mm) \
> > +({ \
> > + struct mm_slot *tmp_slot, *mm_slot = NULL; \
> > + \
> > + hash_for_each_possible(_hashtable, tmp_slot, hash, (unsigned long)_mm) \
> > + if (_mm == tmp_slot->mm) { \
> > + mm_slot = tmp_slot; \
> > + break; \
> > + } \
> > + \
> > + mm_slot; \
> > +})
>
> Is there a reason why this must be implemented as a macro? That's
> preferable, although this may be overly large for inlining. mm/util.c
> might suit.
>
> > +#define insert_to_mm_slots_hash(_hashtable, _mm, _mm_slot) \
> > +({ \
> > + _mm_slot->mm = _mm; \
> > + hash_add(_hashtable, &_mm_slot->hash, (unsigned long)_mm); \
> > +})
>
> Does this need to be a macro?
>
>
> And the naming. Can we please have
>
> mm_slot_entry
> mm_slot_alloc
> mm_slot_free
> mm_slot_get
> mm_slot_insert
>
> Also, "get" usually implies that a refcout is taken on the obtained
> object, so mm_slot_lookup() would be more appropriate.
>

2022-08-31 04:19:46

by Qi Zheng

[permalink] [raw]
Subject: Re: [PATCH 1/7] mm: introduce common struct mm_slot



On 2022/8/31 01:03, Yang Shi wrote:
> On Mon, Aug 29, 2022 at 12:51 PM Andrew Morton
> <[email protected]> wrote:
>>
>> On Mon, 29 Aug 2022 22:30:49 +0800 Qi Zheng <[email protected]> wrote:
>>
>>> At present, both THP and KSM module have similar structures
>>> mm_slot for organizing and recording the information required
>>> for scanning mm, and each defines the following exactly the
>>> same operation functions:
>>>
>>> - alloc_mm_slot
>>> - free_mm_slot
>>> - get_mm_slot
>>> - insert_to_mm_slots_hash
>>>
>>> In order to de-duplicate these codes, this patch introduces a
>>> common struct mm_slot, and subsequent patches will let THP and
>>> KSM to use it.
>>
>> Seems like a good idea.
>>
>>> --- /dev/null
>>> +++ b/mm/mm_slot.h
>>> @@ -0,0 +1,55 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +
>>> +#ifndef _LINUX_MM_SLOT_H
>>> +#define _LINUX_MM_SLOT_H
>>> +
>>> +#include <linux/hashtable.h>
>>> +#include <linux/slab.h>
>>> +
>>> +/*
>>> + * struct mm_slot - hash lookup from mm to mm_slot
>>> + * @hash: link to the mm_slots hash list
>>> + * @mm_node: link into the mm_slots list
>>> + * @mm: the mm that this information is valid for
>>> + */
>>> +struct mm_slot {
>>> + struct hlist_node hash;
>>> + struct list_head mm_node;
>>> + struct mm_struct *mm;
>>> +};
>>
>> It appears that the presence of an mm_struct in the hash list does not
>> contribute to the mm_struct's refcount? That's somewhat unexpected.
>
> I didn't find time to look into the series yet, but when the
> mm/mm_slot was added to the list, mmgrab() was definitely called if
> this was not changed by the series.

Yeah, and this series does not change that.

>
>>
>> It would be helpful to add some words here describing the means by
>> which a user of mm_slot would prevent the mm_struct from getting freed
>> while on the list. I assume "caller must maintain a reference on the
>> mm_struct while it remains on an mm_slot hash list"?
>>
>>> +#define mm_slot_entry(ptr, type, member) \
>>> + container_of(ptr, type, member)
>>> +
>>> +static inline void *alloc_mm_slot(struct kmem_cache *cache)
>>> +{
>>> + if (!cache) /* initialization failed */
>>> + return NULL;
>>> + return kmem_cache_zalloc(cache, GFP_KERNEL);
>>> +}
>>> +
>>> +static inline void free_mm_slot(struct kmem_cache *cache, void *objp)
>>> +{
>>> + kmem_cache_free(cache, objp);
>>> +}
>>> +
>>> +#define get_mm_slot(_hashtable, _mm) \
>>> +({ \
>>> + struct mm_slot *tmp_slot, *mm_slot = NULL; \
>>> + \
>>> + hash_for_each_possible(_hashtable, tmp_slot, hash, (unsigned long)_mm) \
>>> + if (_mm == tmp_slot->mm) { \
>>> + mm_slot = tmp_slot; \
>>> + break; \
>>> + } \
>>> + \
>>> + mm_slot; \
>>> +})
>>
>> Is there a reason why this must be implemented as a macro? That's
>> preferable, although this may be overly large for inlining. mm/util.c
>> might suit.
>>
>>> +#define insert_to_mm_slots_hash(_hashtable, _mm, _mm_slot) \
>>> +({ \
>>> + _mm_slot->mm = _mm; \
>>> + hash_add(_hashtable, &_mm_slot->hash, (unsigned long)_mm); \
>>> +})
>>
>> Does this need to be a macro?
>>
>>
>> And the naming. Can we please have
>>
>> mm_slot_entry
>> mm_slot_alloc
>> mm_slot_free
>> mm_slot_get
>> mm_slot_insert
>>
>> Also, "get" usually implies that a refcout is taken on the obtained
>> object, so mm_slot_lookup() would be more appropriate.
>>

--
Thanks,
Qi