2023-04-14 02:19:45

by Longlong Xia

[permalink] [raw]
Subject: [PATCH v2 0/2] mm: ksm: support hwpoison for ksm page

Currently, ksm does not support hwpoison. As ksm is being used more widely
for deduplication at the system level, container level, and process level,
supporting hwpoison for ksm has become increasingly important. However, ksm
pages were not processed by hwpoison in 2009 [1].

The main method of implementation:
1. Refactor add_to_kill() and add new add_to_kill_*() to better accommodate
the handling of different types of pages.
2. Add collect_procs_ksm() to collect processes when the error hit an ksm
page.
3. Add task_in_to_kill_list() to avoid duplicate addition of tsk to the
to_kill list.
4. Try_to_unmap ksm page (already supported).
5. Handle related processes such as sending SIGBUS.

Tested with poisoning to ksm page from
1) different process
2) one process

and with/without memory_failure_early_kill set, the processes
are killed as expected with the patchset.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/
commit/?h=01e00f880ca700376e1845cf7a2524ebe68e47d6

v2:
- Rename the "addr" argument to "ksm_addr" in both the
"__add__add_to_kill()" and "add_to_kill_ksm()"
- Collect RB/TB

Longlong Xia (2):
mm: memory-failure: Refactor add_to_kill()
mm: ksm: Support hwpoison for ksm page

include/linux/ksm.h | 11 ++++++++
include/linux/mm.h | 7 +++++
mm/ksm.c | 45 ++++++++++++++++++++++++++++++++
mm/memory-failure.c | 63 +++++++++++++++++++++++++++++++++------------
4 files changed, 109 insertions(+), 17 deletions(-)

--
2.25.1


2023-04-14 02:19:47

by Longlong Xia

[permalink] [raw]
Subject: [PATCH v2 1/2] mm: memory-failure: Refactor add_to_kill()

The page_address_in_vma() is used to find the user virtual address of page
in add_to_kill(), but it doesn't support ksm due to the ksm page->index
unusable, add an ksm_addr as parameter to add_to_kill(), let's the caller
to pass it, also rename the function to __add_to_kill(), and adding
add_to_kill_anon_file() for handling anonymous pages and file pages,
adding add_to_kill_fsdax() for handling fsdax pages.

Signed-off-by: Longlong Xia <[email protected]>
Tested-by: Naoya Horiguchi <[email protected]>
Reviewed-by: Kefeng Wang <[email protected]>
---
mm/memory-failure.c | 29 +++++++++++++++++++++--------
1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index fae9baf3be162..f5e1d80a82738 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -405,9 +405,9 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
* page->mapping are sufficient for mapping the page back to its
* corresponding user virtual address.
*/
-static void add_to_kill(struct task_struct *tsk, struct page *p,
- pgoff_t fsdax_pgoff, struct vm_area_struct *vma,
- struct list_head *to_kill)
+static void __add_to_kill(struct task_struct *tsk, struct page *p,
+ struct vm_area_struct *vma, struct list_head *to_kill,
+ unsigned long ksm_addr, pgoff_t fsdax_pgoff)
{
struct to_kill *tk;

@@ -417,7 +417,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
return;
}

- tk->addr = page_address_in_vma(p, vma);
+ tk->addr = ksm_addr ? ksm_addr : page_address_in_vma(p, vma);
if (is_zone_device_page(p)) {
if (fsdax_pgoff != FSDAX_INVALID_PGOFF)
tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
@@ -448,6 +448,13 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
list_add_tail(&tk->nd, to_kill);
}

+static void add_to_kill_anon_file(struct task_struct *tsk, struct page *p,
+ struct vm_area_struct *vma,
+ struct list_head *to_kill)
+{
+ __add_to_kill(tsk, p, vma, to_kill, 0, FSDAX_INVALID_PGOFF);
+}
+
/*
* Kill the processes that have been collected earlier.
*
@@ -573,7 +580,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
continue;
if (!page_mapped_in_vma(page, vma))
continue;
- add_to_kill(t, page, FSDAX_INVALID_PGOFF, vma, to_kill);
+ add_to_kill_anon_file(t, page, vma, to_kill);
}
}
read_unlock(&tasklist_lock);
@@ -609,8 +616,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
* to be informed of all such data corruptions.
*/
if (vma->vm_mm == t->mm)
- add_to_kill(t, page, FSDAX_INVALID_PGOFF, vma,
- to_kill);
+ add_to_kill_anon_file(t, page, vma, to_kill);
}
}
read_unlock(&tasklist_lock);
@@ -618,6 +624,13 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
}

#ifdef CONFIG_FS_DAX
+static void add_to_kill_fsdax(struct task_struct *tsk, struct page *p,
+ struct vm_area_struct *vma,
+ struct list_head *to_kill, pgoff_t pgoff)
+{
+ __add_to_kill(tsk, p, vma, to_kill, 0, pgoff);
+}
+
/*
* Collect processes when the error hit a fsdax page.
*/
@@ -637,7 +650,7 @@ static void collect_procs_fsdax(struct page *page,
continue;
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
if (vma->vm_mm == t->mm)
- add_to_kill(t, page, pgoff, vma, to_kill);
+ add_to_kill_fsdax(t, page, vma, to_kill, pgoff);
}
}
read_unlock(&tasklist_lock);
--
2.25.1

2023-04-14 02:19:49

by Longlong Xia

[permalink] [raw]
Subject: [PATCH v2 2/2] mm: ksm: Support hwpoison for ksm page

hwpoison_user_mappings() is updated to support ksm pages, and add
collect_procs_ksm() to collect processes when the error hit an ksm
page. The difference from collect_procs_anon() is that it also needs
to traverse the rmap-item list on the stable node of the ksm page.
At the same time, add_to_kill_ksm() is added to handle ksm pages. And
task_in_to_kill_list() is added to avoid duplicate addition of tsk to
the to_kill list. This is because when scanning the list, if the pages
that make up the ksm page all come from the same process, they may be
added repeatedly.

Signed-off-by: Longlong Xia <[email protected]>
Tested-by: Naoya Horiguchi <[email protected]>
Reviewed-by: Kefeng Wang <[email protected]>
---
include/linux/ksm.h | 11 +++++++++++
include/linux/mm.h | 7 +++++++
mm/ksm.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
mm/memory-failure.c | 34 +++++++++++++++++++++++++---------
4 files changed, 88 insertions(+), 9 deletions(-)

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 7e232ba59b865..d9e701326a882 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -51,6 +51,10 @@ struct page *ksm_might_need_to_copy(struct page *page,
void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc);
void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);

+#ifdef CONFIG_MEMORY_FAILURE
+void collect_procs_ksm(struct page *page, struct list_head *to_kill,
+ int force_early);
+#endif
#else /* !CONFIG_KSM */

static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
@@ -62,6 +66,13 @@ static inline void ksm_exit(struct mm_struct *mm)
{
}

+#ifdef CONFIG_MEMORY_FAILURE
+static inline void collect_procs_ksm(struct page *page,
+ struct list_head *to_kill, int force_early)
+{
+}
+#endif
+
#ifdef CONFIG_MMU
static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, unsigned long *vm_flags)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1f79667824eb6..342c0d1c31f92 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3453,6 +3453,7 @@ extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
bool *migratable_cleared);
void num_poisoned_pages_inc(unsigned long pfn);
void num_poisoned_pages_sub(unsigned long pfn, long i);
+struct task_struct *task_early_kill(struct task_struct *tsk, int force_early);
#else
static inline void memory_failure_queue(unsigned long pfn, int flags)
{
@@ -3473,6 +3474,12 @@ static inline void num_poisoned_pages_sub(unsigned long pfn, long i)
}
#endif

+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_KSM)
+void add_to_kill_ksm(struct task_struct *tsk, struct page *p,
+ struct vm_area_struct *vma, struct list_head *to_kill,
+ unsigned long ksm_addr);
+#endif
+
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
extern void memblk_nr_poison_inc(unsigned long pfn);
extern void memblk_nr_poison_sub(unsigned long pfn, long i);
diff --git a/mm/ksm.c b/mm/ksm.c
index 2b8d30068cbbd..29e4be80b6c57 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2721,6 +2721,51 @@ void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc)
goto again;
}

+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Collect processes when the error hit an ksm page.
+ */
+void collect_procs_ksm(struct page *page, struct list_head *to_kill,
+ int force_early)
+{
+ struct ksm_stable_node *stable_node;
+ struct ksm_rmap_item *rmap_item;
+ struct folio *folio = page_folio(page);
+ struct vm_area_struct *vma;
+ struct task_struct *tsk;
+
+ stable_node = folio_stable_node(folio);
+ if (!stable_node)
+ return;
+ hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) {
+ struct anon_vma *av = rmap_item->anon_vma;
+
+ anon_vma_lock_read(av);
+ read_lock(&tasklist_lock);
+ for_each_process(tsk) {
+ struct anon_vma_chain *vmac;
+ unsigned long addr;
+ struct task_struct *t =
+ task_early_kill(tsk, force_early);
+ if (!t)
+ continue;
+ anon_vma_interval_tree_foreach(vmac, &av->rb_root, 0,
+ ULONG_MAX)
+ {
+ vma = vmac->vma;
+ if (vma->vm_mm == t->mm) {
+ addr = rmap_item->address & PAGE_MASK;
+ add_to_kill_ksm(t, page, vma, to_kill,
+ addr);
+ }
+ }
+ }
+ read_unlock(&tasklist_lock);
+ anon_vma_unlock_read(av);
+ }
+}
+#endif
+
#ifdef CONFIG_MIGRATION
void folio_migrate_ksm(struct folio *newfolio, struct folio *folio)
{
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index f5e1d80a82738..6ed97e78b5b8a 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -455,6 +455,27 @@ static void add_to_kill_anon_file(struct task_struct *tsk, struct page *p,
__add_to_kill(tsk, p, vma, to_kill, 0, FSDAX_INVALID_PGOFF);
}

+#ifdef CONFIG_KSM
+static bool task_in_to_kill_list(struct list_head *to_kill,
+ struct task_struct *tsk)
+{
+ struct to_kill *tk, *next;
+
+ list_for_each_entry_safe(tk, next, to_kill, nd) {
+ if (tk->tsk == tsk)
+ return true;
+ }
+
+ return false;
+}
+void add_to_kill_ksm(struct task_struct *tsk, struct page *p,
+ struct vm_area_struct *vma, struct list_head *to_kill,
+ unsigned long ksm_addr)
+{
+ if (!task_in_to_kill_list(to_kill, tsk))
+ __add_to_kill(tsk, p, vma, to_kill, ksm_addr, FSDAX_INVALID_PGOFF);
+}
+#endif
/*
* Kill the processes that have been collected earlier.
*
@@ -534,8 +555,7 @@ static struct task_struct *find_early_kill_thread(struct task_struct *tsk)
* processes sharing the same error page,if the process is "early kill", the
* task_struct of the dedicated thread will also be returned.
*/
-static struct task_struct *task_early_kill(struct task_struct *tsk,
- int force_early)
+struct task_struct *task_early_kill(struct task_struct *tsk, int force_early)
{
if (!tsk->mm)
return NULL;
@@ -666,8 +686,9 @@ static void collect_procs(struct page *page, struct list_head *tokill,
{
if (!page->mapping)
return;
-
- if (PageAnon(page))
+ if (unlikely(PageKsm(page)))
+ collect_procs_ksm(page, tokill, force_early);
+ else if (PageAnon(page))
collect_procs_anon(page, tokill, force_early);
else
collect_procs_file(page, tokill, force_early);
@@ -1522,11 +1543,6 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
if (!page_mapped(hpage))
return true;

- if (PageKsm(p)) {
- pr_err("%#lx: can't handle KSM pages.\n", pfn);
- return false;
- }
-
if (PageSwapCache(p)) {
pr_err("%#lx: keeping poisoned page in swap cache\n", pfn);
ttu &= ~TTU_HWPOISON;
--
2.25.1