From: "Huang, Ying" <[email protected]>
During review the code of migrate_pages() and build a test program for
it. Several bugs in error path are identified and fixed in this
series.
Most patches are tested via
- Apply error-inject.patch in Linux kernel
- Compile test-migrate.c (with -lnuma)
- Test with test-migrate.sh
error-inject.patch, test-migrate.c, and test-migrate.sh are as below.
It turns out that error injection is an important tool to fix bugs in
error path.
Changes:
v3:
- Rebased on mm-unstable (20220816)
- Added Baolin's patch to avoid retry 10 times for fail to migrate THP subpages
v2:
- Rebased on v5.19-rc5
- Addressed some comments from Baolin, Thanks!
- Added reviewed-by tags
Best Regards,
Huang, Ying
------------------------- error-inject.patch -------------------------
From 295ea21204f3f025a041fe39c68a2eaec8313c68 Mon Sep 17 00:00:00 2001
From: Huang Ying <[email protected]>
Date: Tue, 21 Jun 2022 11:08:30 +0800
Subject: [PATCH] migrate_pages: error inject
---
mm/migrate.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 55 insertions(+), 3 deletions(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 399904015d23..87d47064ec6c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -337,6 +337,42 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
}
#endif
+#define EI_MP_ENOSYS 0x0001
+#define EI_MP_THP_ENOMEM 0x0002
+#define EI_MP_NP_ENOMEM 0x0004
+#define EI_MP_EAGAIN 0x0008
+#define EI_MP_EOTHER 0x0010
+#define EI_MP_NOSPLIT 0x0020
+#define EI_MP_SPLIT_FAIL 0x0040
+#define EI_MP_EAGAIN_PERM 0x0080
+#define EI_MP_EBUSY 0x0100
+
+static unsigned int ei_migrate_pages;
+
+module_param(ei_migrate_pages, uint, 0644);
+
+static bool ei_thp_migration_supported(void)
+{
+ if (ei_migrate_pages & EI_MP_ENOSYS)
+ return false;
+ else
+ return thp_migration_supported();
+}
+
+static int ei_trylock_page(struct page *page)
+{
+ if (ei_migrate_pages & EI_MP_EAGAIN)
+ return 0;
+ return trylock_page(page);
+}
+
+static int ei_split_huge_page_to_list(struct page *page, struct list_head *list)
+{
+ if (ei_migrate_pages & EI_MP_SPLIT_FAIL)
+ return -EBUSY;
+ return split_huge_page_to_list(page, list);
+}
+
static int expected_page_refs(struct address_space *mapping, struct page *page)
{
int expected_count = 1;
@@ -368,6 +404,9 @@ int folio_migrate_mapping(struct address_space *mapping,
if (folio_ref_count(folio) != expected_count)
return -EAGAIN;
+ if (ei_migrate_pages & EI_MP_EAGAIN_PERM)
+ return -EAGAIN;
+
/* No turning back from here */
newfolio->index = folio->index;
newfolio->mapping = folio->mapping;
@@ -929,7 +968,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
struct anon_vma *anon_vma = NULL;
bool is_lru = !__PageMovable(page);
- if (!trylock_page(page)) {
+ if (!ei_trylock_page(page)) {
if (!force || mode == MIGRATE_ASYNC)
goto out;
@@ -952,6 +991,11 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
lock_page(page);
}
+ if (ei_migrate_pages & EI_MP_EBUSY) {
+ rc = -EBUSY;
+ goto out_unlock;
+ }
+
if (PageWriteback(page)) {
/*
* Only in the case of a full synchronous migration is it
@@ -1086,7 +1130,7 @@ static int unmap_and_move(new_page_t get_new_page,
int rc = MIGRATEPAGE_SUCCESS;
struct page *newpage = NULL;
- if (!thp_migration_supported() && PageTransHuge(page))
+ if (!ei_thp_migration_supported() && PageTransHuge(page))
return -ENOSYS;
if (page_count(page) == 1) {
@@ -1102,6 +1146,11 @@ static int unmap_and_move(new_page_t get_new_page,
goto out;
}
+ if ((ei_migrate_pages & EI_MP_THP_ENOMEM) && PageTransHuge(page))
+ return -ENOMEM;
+ if ((ei_migrate_pages & EI_MP_NP_ENOMEM) && !PageTransHuge(page))
+ return -ENOMEM;
+
newpage = get_new_page(page, private);
if (!newpage)
return -ENOMEM;
@@ -1305,7 +1354,7 @@ static inline int try_split_thp(struct page *page, struct list_head *split_pages
int rc;
lock_page(page);
- rc = split_huge_page_to_list(page, split_pages);
+ rc = ei_split_huge_page_to_list(page, split_pages);
unlock_page(page);
return rc;
@@ -1358,6 +1407,9 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
bool nosplit = (reason == MR_NUMA_MISPLACED);
bool no_subpage_counting = false;
+ if (ei_migrate_pages & EI_MP_NOSPLIT)
+ nosplit = true;
+
trace_mm_migrate_pages_start(mode, reason);
thp_subpage_migration:
--
2.30.2
------------------------- test-migrate.c -------------------------------------
#define _GNU_SOURCE
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <unistd.h>
#include <numaif.h>
#include <numa.h>
#ifndef MADV_FREE
#define MADV_FREE 8 /* free pages only if memory pressure */
#endif
#define ONE_MB (1024 * 1024)
#define MAP_SIZE (16 * ONE_MB)
#define THP_SIZE (2 * ONE_MB)
#define THP_MASK (THP_SIZE - 1)
#define ERR_EXIT_ON(cond, msg) \
do { \
int __cond_in_macro = (cond); \
if (__cond_in_macro) \
error_exit(__cond_in_macro, (msg)); \
} while (0)
void error_msg(int ret, int nr, int *status, const char *msg)
{
int i;
fprintf(stderr, "Error: %s, ret : %d, error: %s\n",
msg, ret, strerror(errno));
if (!nr)
return;
fprintf(stderr, "status: ");
for (i = 0; i < nr; i++)
fprintf(stderr, "%d ", status[i]);
fprintf(stderr, "\n");
}
void error_exit(int ret, const char *msg)
{
error_msg(ret, 0, NULL, msg);
exit(1);
}
void *addr_thp;
void *addr;
char *pn;
char *pn1;
char *pn2;
char *pn3;
void *pages[4];
int status[4];
void create_map(bool thp)
{
int ret;
void *p;
p = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ERR_EXIT_ON(p == MAP_FAILED, "mmap");
if (thp) {
ret = madvise(p, MAP_SIZE, MADV_HUGEPAGE);
ERR_EXIT_ON(ret, "advise hugepage");
addr_thp = p;
} else {
addr = p;
}
}
void prepare()
{
int ret;
struct iovec iov;
if (addr) {
munmap(addr_thp, MAP_SIZE);
munmap(addr, MAP_SIZE);
}
create_map(true);
create_map(false);
pn = (char *)(((unsigned long)addr_thp + THP_SIZE) & ~THP_MASK);
pn1 = pn + THP_SIZE;
pages[0] = pn;
pages[1] = pn1;
*pn = 1;
pn2 = (char *)(((unsigned long)addr + THP_SIZE) & ~THP_MASK);
pn3 = pn2 + THP_SIZE;
pages[2] = pn2;
pages[3] = pn3;
status[0] = status[1] = status[2] = status[3] = 1024;
}
void test_migrate()
{
int ret;
int nodes[4] = { 1, 1, 1, 1 };
pid_t pid = getpid();
prepare();
*pn1 = 1;
*pn2 = 1;
*pn3 = 1;
ret = move_pages(pid, 4, pages, nodes, status, MPOL_MF_MOVE_ALL);
error_msg(ret, 4, status, "move 4 pages");
}
int main(int argc, char *argv[])
{
numa_run_on_node(0);
test_migrate();
return 0;
}
--------------------- test-migrate.sh ----------------------------
#!/bin/bash
PARAM=/sys/module/migrate/parameters/ei_migrate_pages
get_vmstat()
{
echo ================= $* ================
cat /proc/vmstat | grep -e '\(pgmigrate\|thp_migration\)'
}
simple_test()
{
echo $1 > $PARAM
shift
get_vmstat before $*
./test-migrate
get_vmstat after $*
}
#define EI_MP_ENOSYS 0x0001
#define EI_MP_THP_ENOMEM 0x0002
#define EI_MP_NP_ENOMEM 0x0004
#define EI_MP_EAGAIN 0x0008
#define EI_MP_EOTHER 0x0010
#define EI_MP_NOSPLIT 0x0020
#define EI_MP_SPLIT_FAIL 0x0040
#define EI_MP_EAGAIN_PERM 0x0080
#define EI_MP_EBUSY 0x0100
simple_test 0x26 ENOMEM
simple_test 0x81 retry THP subpages
simple_test 0xc1 ENOSYS
simple_test 0x101 ENOSYS
After 10 retries, we will give up and the remaining pages will be
counted as failure in nr_failed and nr_thp_failed. We should count
the failure in nr_failed_pages too. This is done in this patch.
Signed-off-by: "Huang, Ying" <[email protected]>
Fixes: 5984fabb6e82 ("mm: move_pages: report the number of non-attempted pages")
Reviewed-by: Baolin Wang <[email protected]>
Reviewed-by: Oscar Salvador <[email protected]>
Cc: Zi Yan <[email protected]>
Cc: Yang Shi <[email protected]>
---
mm/migrate.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 81daa4dd3bb6..55fbf9669431 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1413,6 +1413,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
int thp_retry = 1;
int nr_failed = 0;
int nr_failed_pages = 0;
+ int nr_retry_pages = 0;
int nr_succeeded = 0;
int nr_thp_succeeded = 0;
int nr_thp_failed = 0;
@@ -1433,6 +1434,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
for (pass = 0; pass < 10 && (retry || thp_retry); pass++) {
retry = 0;
thp_retry = 0;
+ nr_retry_pages = 0;
list_for_each_entry_safe(page, page2, from, lru) {
/*
@@ -1506,7 +1508,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
nr_failed++;
}
- nr_failed_pages += nr_subpages;
+ nr_failed_pages += nr_subpages + nr_retry_pages;
/*
* There might be some subpages of fail-to-migrate THPs
* left in thp_split_pages list. Move them back to migration
@@ -1522,6 +1524,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
thp_retry++;
else
retry++;
+ nr_retry_pages += nr_subpages;
break;
case MIGRATEPAGE_SUCCESS:
nr_succeeded += nr_subpages;
@@ -1548,6 +1551,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
if (!no_subpage_counting)
nr_failed += retry;
nr_thp_failed += thp_retry;
+ nr_failed_pages += nr_retry_pages;
/*
* Try to migrate subpages of fail-to-migrate THPs, no nr_failed
* counting in this round, since all subpages of a THP is counted
--
2.30.2
Before commit b5bade978e9b ("mm: migrate: fix the return value of
migrate_pages()"), the tail pages of THP will be put in the "from"
list directly. So one of the loop cursors (page2) needs to be reset,
as is done in try_split_thp() via list_safe_reset_next(). But after
the commit, the tail pages of THP will be put in a dedicated
list (thp_split_pages). That is, the "from" list will not be changed
during splitting. So, it's unnecessary to call list_safe_reset_next()
anymore.
This is a code cleanup, no functionality changes are expected.
Signed-off-by: "Huang, Ying" <[email protected]>
Reviewed-by: Baolin Wang <[email protected]>
Reviewed-by: Oscar Salvador <[email protected]>
Cc: Zi Yan <[email protected]>
Cc: Yang Shi <[email protected]>
---
mm/migrate.c | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 1758fd215c0a..19a9b26af7e2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1369,16 +1369,13 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
return rc;
}
-static inline int try_split_thp(struct page *page, struct page **page2,
- struct list_head *from)
+static inline int try_split_thp(struct page *page, struct list_head *split_pages)
{
- int rc = 0;
+ int rc;
lock_page(page);
- rc = split_huge_page_to_list(page, from);
+ rc = split_huge_page_to_list(page, split_pages);
unlock_page(page);
- if (!rc)
- list_safe_reset_next(page, *page2, lru);
return rc;
}
@@ -1482,7 +1479,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
/* THP migration is unsupported */
if (is_thp) {
nr_thp_failed++;
- if (!try_split_thp(page, &page2, &thp_split_pages)) {
+ if (!try_split_thp(page, &thp_split_pages)) {
nr_thp_split++;
goto retry;
}
@@ -1501,7 +1498,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
*/
if (is_thp && !nosplit) {
nr_thp_failed++;
- if (!try_split_thp(page, &page2, &thp_split_pages)) {
+ if (!try_split_thp(page, &thp_split_pages)) {
nr_thp_split++;
goto retry;
}
--
2.30.2
The return value of move_pages() syscall is incorrect when counting
the remaining pages to be migrated. For example, for the following
test program,
"
#define _GNU_SOURCE
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <unistd.h>
#include <numaif.h>
#include <numa.h>
#ifndef MADV_FREE
#define MADV_FREE 8 /* free pages only if memory pressure */
#endif
#define ONE_MB (1024 * 1024)
#define MAP_SIZE (16 * ONE_MB)
#define THP_SIZE (2 * ONE_MB)
#define THP_MASK (THP_SIZE - 1)
#define ERR_EXIT_ON(cond, msg) \
do { \
int __cond_in_macro = (cond); \
if (__cond_in_macro) \
error_exit(__cond_in_macro, (msg)); \
} while (0)
void error_msg(int ret, int nr, int *status, const char *msg)
{
int i;
fprintf(stderr, "Error: %s, ret : %d, error: %s\n",
msg, ret, strerror(errno));
if (!nr)
return;
fprintf(stderr, "status: ");
for (i = 0; i < nr; i++)
fprintf(stderr, "%d ", status[i]);
fprintf(stderr, "\n");
}
void error_exit(int ret, const char *msg)
{
error_msg(ret, 0, NULL, msg);
exit(1);
}
int page_size;
bool do_vmsplice;
bool do_thp;
static int pipe_fds[2];
void *addr;
char *pn;
char *pn1;
void *pages[2];
int status[2];
void prepare()
{
int ret;
struct iovec iov;
if (addr) {
munmap(addr, MAP_SIZE);
close(pipe_fds[0]);
close(pipe_fds[1]);
}
ret = pipe(pipe_fds);
ERR_EXIT_ON(ret, "pipe");
addr = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ERR_EXIT_ON(addr == MAP_FAILED, "mmap");
if (do_thp) {
ret = madvise(addr, MAP_SIZE, MADV_HUGEPAGE);
ERR_EXIT_ON(ret, "advise hugepage");
}
pn = (char *)(((unsigned long)addr + THP_SIZE) & ~THP_MASK);
pn1 = pn + THP_SIZE;
pages[0] = pn;
pages[1] = pn1;
*pn = 1;
if (do_vmsplice) {
iov.iov_base = pn;
iov.iov_len = page_size;
ret = vmsplice(pipe_fds[1], &iov, 1, 0);
ERR_EXIT_ON(ret < 0, "vmsplice");
}
status[0] = status[1] = 1024;
}
void test_migrate()
{
int ret;
int nodes[2] = { 1, 1 };
pid_t pid = getpid();
prepare();
ret = move_pages(pid, 1, pages, nodes, status, MPOL_MF_MOVE_ALL);
error_msg(ret, 1, status, "move 1 page");
prepare();
ret = move_pages(pid, 2, pages, nodes, status, MPOL_MF_MOVE_ALL);
error_msg(ret, 2, status, "move 2 pages, page 1 not mapped");
prepare();
*pn1 = 1;
ret = move_pages(pid, 2, pages, nodes, status, MPOL_MF_MOVE_ALL);
error_msg(ret, 2, status, "move 2 pages");
prepare();
*pn1 = 1;
nodes[1] = 0;
ret = move_pages(pid, 2, pages, nodes, status, MPOL_MF_MOVE_ALL);
error_msg(ret, 2, status, "move 2 pages, page 1 to node 0");
}
int main(int argc, char *argv[])
{
numa_run_on_node(0);
page_size = getpagesize();
test_migrate();
fprintf(stderr, "\nMake page 0 cannot be migrated:\n");
do_vmsplice = true;
test_migrate();
fprintf(stderr, "\nTest THP:\n");
do_thp = true;
do_vmsplice = false;
test_migrate();
fprintf(stderr, "\nTHP: make page 0 cannot be migrated:\n");
do_vmsplice = true;
test_migrate();
return 0;
}
"
The output of the current kernel is,
"
Error: move 1 page, ret : 0, error: Success
status: 1
Error: move 2 pages, page 1 not mapped, ret : 0, error: Success
status: 1 -14
Error: move 2 pages, ret : 0, error: Success
status: 1 1
Error: move 2 pages, page 1 to node 0, ret : 0, error: Success
status: 1 0
Make page 0 cannot be migrated:
Error: move 1 page, ret : 0, error: Success
status: 1024
Error: move 2 pages, page 1 not mapped, ret : 1, error: Success
status: 1024 -14
Error: move 2 pages, ret : 0, error: Success
status: 1024 1024
Error: move 2 pages, page 1 to node 0, ret : 1, error: Success
status: 1024 1024
"
While the expected output is,
"
Error: move 1 page, ret : 0, error: Success
status: 1
Error: move 2 pages, page 1 not mapped, ret : 0, error: Success
status: 1 -14
Error: move 2 pages, ret : 0, error: Success
status: 1 1
Error: move 2 pages, page 1 to node 0, ret : 0, error: Success
status: 1 0
Make page 0 cannot be migrated:
Error: move 1 page, ret : 1, error: Success
status: 1024
Error: move 2 pages, page 1 not mapped, ret : 1, error: Success
status: 1024 -14
Error: move 2 pages, ret : 1, error: Success
status: 1024 1024
Error: move 2 pages, page 1 to node 0, ret : 2, error: Success
status: 1024 1024
"
Fix this via correcting the remaining pages counting. With the fix,
the output for the test program as above is expected.
Signed-off-by: "Huang, Ying" <[email protected]>
Fixes: 5984fabb6e82 ("mm: move_pages: report the number of non-attempted pages")
Reviewed-by: Oscar Salvador <[email protected]>
Cc: Baolin Wang <[email protected]>
Cc: Zi Yan <[email protected]>
Cc: Yang Shi <[email protected]>
---
mm/migrate.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index a35eba462e61..1758fd215c0a 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1751,7 +1751,7 @@ static int move_pages_and_store_status(struct mm_struct *mm, int node,
* well.
*/
if (err > 0)
- err += nr_pages - i - 1;
+ err += nr_pages - i;
return err;
}
return store_status(status, start, node, i - start);
@@ -1837,8 +1837,12 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
err = move_pages_and_store_status(mm, current_node, &pagelist,
status, start, i, nr_pages);
- if (err)
+ if (err) {
+ /* We have accounted for page i */
+ if (err > 0)
+ err--;
goto out;
+ }
current_node = NUMA_NO_NODE;
}
out_flush:
--
2.30.2
If THP is failed to be migrated for -ENOSYS and -ENOMEM, the THP will
be split into thp_split_pages, and after other pages are migrated,
pages in thp_split_pages will be migrated with no_subpage_counting ==
true, because its failure have been counted already. If some pages in
thp_split_pages are retried during migration, we should not count
their failure if no_subpage_counting == true too. This is done this
patch to fix the failure counting for THP subpages retrying.
Signed-off-by: "Huang, Ying" <[email protected]>
Fixes: 5984fabb6e82 ("mm: move_pages: report the number of non-attempted pages")
Reviewed-by: Baolin Wang <[email protected]>
Reviewed-by: Oscar Salvador <[email protected]>
Cc: Zi Yan <[email protected]>
Cc: Yang Shi <[email protected]>
---
mm/migrate.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index ae55f08e72ce..0018b5191799 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1545,7 +1545,8 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
}
}
}
- nr_failed += retry;
+ if (!no_subpage_counting)
+ nr_failed += retry;
nr_thp_failed += thp_retry;
/*
* Try to migrate subpages of fail-to-migrate THPs, no nr_failed
--
2.30.2
From: Baolin Wang <[email protected]>
If THP is failed to migrate due to -ENOSYS or -ENOMEM case, the THP will
be split, and the subpages of fail-to-migrate THP will be tried to migrate
again, so we should not account the retry counter in the second loop, since
we already accounted 'nr_thp_failed' in the first loop.
Moreover we also do not need retry 10 times for -EAGAIN case for the subpages
of fail-to-migrate THP in the second loop, since we already regarded the
THP as migration failure, and save some migration time (for the worst case,
will try 512 * 10 times) according to previous discussion [1].
[1] https://lore.kernel.org/linux-mm/[email protected]/
Tested-by: "Huang, Ying" <[email protected]>
Signed-off-by: Baolin Wang <[email protected]>
Signed-off-by: "Huang, Ying" <[email protected]>
Cc: Oscar Salvador <[email protected]>
Cc: Zi Yan <[email protected]>
Cc: Yang Shi <[email protected]>
---
mm/migrate.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 55fbf9669431..06a653977835 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1522,7 +1522,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
case -EAGAIN:
if (is_thp)
thp_retry++;
- else
+ else if (!no_subpage_counting)
retry++;
nr_retry_pages += nr_subpages;
break;
@@ -1548,8 +1548,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
}
}
}
- if (!no_subpage_counting)
- nr_failed += retry;
+ nr_failed += retry;
nr_thp_failed += thp_retry;
nr_failed_pages += nr_retry_pages;
/*
--
2.30.2
If THP is failed to be migrated, it may be split and retry. But after
splitting, the head page will be left in "from" list, although THP
migration failure has been counted already. If the head page is
failed to be migrated too, the failure will be counted twice
incorrectly. So this is fixed in this patch via moving the head page
of THP after splitting to "thp_split_pages" too.
Signed-off-by: "Huang, Ying" <[email protected]>
Fixes: 5984fabb6e82 ("mm: move_pages: report the number of non-attempted pages")
Reviewed-by: Baolin Wang <[email protected]>
Reviewed-by: Oscar Salvador <[email protected]>
Cc: Zi Yan <[email protected]>
Cc: Yang Shi <[email protected]>
---
mm/migrate.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 0223673e42d1..81daa4dd3bb6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1374,6 +1374,8 @@ static inline int try_split_thp(struct page *page, struct list_head *split_pages
lock_page(page);
rc = split_huge_page_to_list(page, split_pages);
unlock_page(page);
+ if (!rc)
+ list_move_tail(&page->lru, split_pages);
return rc;
}
@@ -1433,7 +1435,6 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
thp_retry = 0;
list_for_each_entry_safe(page, page2, from, lru) {
-retry:
/*
* THP statistics is based on the source huge page.
* Capture required information that might get lost
@@ -1469,10 +1470,9 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
* retry on the same page with the THP split
* to base pages.
*
- * Head page is retried immediately and tail
- * pages are added to the tail of the list so
- * we encounter them after the rest of the list
- * is processed.
+ * Sub-pages are put in thp_split_pages, and
+ * we will migrate them after the rest of the
+ * list is processed.
*/
case -ENOSYS:
/* THP migration is unsupported */
@@ -1480,7 +1480,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
nr_thp_failed++;
if (!try_split_thp(page, &thp_split_pages)) {
nr_thp_split++;
- goto retry;
+ break;
}
/* Hugetlb migration is unsupported */
} else if (!no_subpage_counting) {
@@ -1500,7 +1500,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
/* THP NUMA faulting doesn't split THP to retry. */
if (!nosplit && !try_split_thp(page, &thp_split_pages)) {
nr_thp_split++;
- goto retry;
+ break;
}
} else if (!no_subpage_counting) {
nr_failed++;
--
2.30.2
If THP or hugetlbfs page migration isn't supported, unmap_and_move()
or unmap_and_move_huge_page() will return -ENOSYS. For THP, splitting
will be tried, but if splitting doesn't succeed, the THP will be left
in "from" list wrongly. If some other pages are retried, the THP
migration failure will counted again. This is fixed via moving the
failure THP from "from" to "ret_pages".
Another issue of the original code is that the unsupported failure
processing isn't consistent between THP and hugetlbfs page. Make them
consistent in this patch to make the code easier to be understood too.
Signed-off-by: "Huang, Ying" <[email protected]>
Fixes: 5984fabb6e82 ("mm: move_pages: report the number of non-attempted pages")
Reviewed-by: Baolin Wang <[email protected]>
Cc: Zi Yan <[email protected]>
Cc: Yang Shi <[email protected]>
---
mm/migrate.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 0018b5191799..0223673e42d1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1260,10 +1260,8 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
* tables or check whether the hugepage is pmd-based or not before
* kicking migration.
*/
- if (!hugepage_migration_supported(page_hstate(hpage))) {
- list_move_tail(&hpage->lru, ret);
+ if (!hugepage_migration_supported(page_hstate(hpage)))
return -ENOSYS;
- }
if (page_count(hpage) == 1) {
/* page was freed from under us. So we are done. */
@@ -1460,6 +1458,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
* page will be put back
* -EAGAIN: stay on the from list
* -ENOMEM: stay on the from list
+ * -ENOSYS: stay on the from list
* Other errno: put on ret_pages list then splice to
* from list
*/
@@ -1489,6 +1488,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
}
nr_failed_pages += nr_subpages;
+ list_move_tail(&page->lru, &ret_pages);
break;
case -ENOMEM:
/*
--
2.30.2
In unmap_and_move(), if the new THP cannot be allocated, -ENOMEM will
be returned, and migrate_pages() will try to split the THP unless
"reason" is MR_NUMA_MISPLACED (that is, nosplit == true). But when
nosplit == true, the THP migration failure will not be counted. This
is incorrect. So in this patch, the THP migration failure will be
counted for -ENOMEM regardless of nosplit is true or false. The
nr_failed counting isn't fixed because it's not used. Added some
comments for it per Baolin's suggestion.
Signed-off-by: "Huang, Ying" <[email protected]>
Fixes: 5984fabb6e82 ("mm: move_pages: report the number of non-attempted pages")
Reviewed-by: Baolin Wang <[email protected]>
Reviewed-by: Oscar Salvador <[email protected]>
Cc: Zi Yan <[email protected]>
Cc: Yang Shi <[email protected]>
---
mm/migrate.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/mm/migrate.c b/mm/migrate.c
index 19a9b26af7e2..ae55f08e72ce 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1494,11 +1494,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
/*
* When memory is low, don't bother to try to migrate
* other pages, just exit.
- * THP NUMA faulting doesn't split THP to retry.
*/
- if (is_thp && !nosplit) {
+ if (is_thp) {
nr_thp_failed++;
- if (!try_split_thp(page, &thp_split_pages)) {
+ /* THP NUMA faulting doesn't split THP to retry. */
+ if (!nosplit && !try_split_thp(page, &thp_split_pages)) {
nr_thp_split++;
goto retry;
}
@@ -1514,6 +1514,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
* the caller otherwise the page refcnt will be leaked.
*/
list_splice_init(&thp_split_pages, from);
+ /* nr_failed isn't updated for not used */
nr_thp_failed += thp_retry;
goto out;
case -EAGAIN:
--
2.30.2
On Wed, 17 Aug 2022 16:14:00 +0800 Huang Ying <[email protected]> wrote:
> error-inject.patch, test-migrate.c, and test-migrate.sh are as below.
> It turns out that error injection is an important tool to fix bugs in
> error path.
Indeed, and thanks for doing this.
Did you consider lib/*inject*.c? If so, was it unsuitable?
Andrew Morton <[email protected]> writes:
> On Wed, 17 Aug 2022 16:14:00 +0800 Huang Ying <[email protected]> wrote:
>
>> error-inject.patch, test-migrate.c, and test-migrate.sh are as below.
>> It turns out that error injection is an important tool to fix bugs in
>> error path.
>
> Indeed, and thanks for doing this.
>
> Did you consider lib/*inject*.c? If so, was it unsuitable?
I haven't take a deep look at that. Will do that.
Best Regards,
Huang, Ying
Andrew Morton <[email protected]> writes:
> On Wed, 17 Aug 2022 16:14:00 +0800 Huang Ying <[email protected]> wrote:
>
>> error-inject.patch, test-migrate.c, and test-migrate.sh are as below.
>> It turns out that error injection is an important tool to fix bugs in
>> error path.
>
> Indeed, and thanks for doing this.
>
> Did you consider lib/*inject*.c? If so, was it unsuitable?
I have done some experiments to use some existing error injection
mechanisms in kernel to test the error path of migrate_pages(). After
some googling, I found that the BPF based error injection described in
the following URL is most suitable for my purpose.
https://lwn.net/Articles/740146/
Because the BPF seems quite flexible to satisfy various requirements of
error injection. With it, the execution of some functions can be
skipped and some specified error code can be returned instead.
Works out of box
================
Some error injection functionality just works out of box. For example,
inject some page allocation error in some path. Firstly,
CONFIG_BPF_KPROBE_OVERRIDE needs to be enabled in kernel config. Then,
a simple bpftrace script as follows can be used to inject page
allocation error during migrate_pages().
--------------------ENOMEM-----------------------
kprobe:migrate_pages { @in_migrate_pages++; }
kretprobe:migrate_pages { @in_migrate_pages--; }
kprobe:should_fail_alloc_page / @in_migrate_pages > 0 / {
override(1);
}
-------------------------------------------------
The call chain of error injection is specified via the first 2 lines. I
copied the methods used in BCC inject script. Is there any better
method to specify the call chain?
We can inject error only for THP page allocation too.
--------------------ENOMEM THP-------------------
kprobe:migrate_pages { @in_migrate_pages++; }
kretprobe:migrate_pages { @in_migrate_pages--; }
kprobe:should_fail_alloc_page / @in_migrate_pages > 0 && arg1 == 9 / {
override(1);
}
-------------------------------------------------
Use some hack to override any function
======================================
The in-kernel BPF based error injection mechanism can only override
function return value for the functions in the whitelist, that is,
functions marked with ALLOW_ERROR_INJECTION(). That is quite limited.
The thorough error path testing needs to override the return value of
arbitrary function. So, I use a simple hack patch as follows for that.
-----------------------8<---------------------------------
From 3bcd401a3731bc7316d222501070a2a71fdf7170 Mon Sep 17 00:00:00 2001
From: Huang Ying <[email protected]>
Date: Tue, 20 Sep 2022 09:08:25 +0800
Subject: [PATCH] dbg: allow override any function with bpf_error_injection
---
lib/error-inject.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/lib/error-inject.c b/lib/error-inject.c
index 1afca1b1cdea..82a402e0f15c 100644
--- a/lib/error-inject.c
+++ b/lib/error-inject.c
@@ -21,6 +21,7 @@ struct ei_entry {
void *priv;
};
+#if 0
bool within_error_injection_list(unsigned long addr)
{
struct ei_entry *ent;
@@ -36,6 +37,12 @@ bool within_error_injection_list(unsigned long addr)
mutex_unlock(&ei_mutex);
return ret;
}
+#else
+bool within_error_injection_list(unsigned long addr)
+{
+ return true;
+}
+#endif
int get_injectable_error_type(unsigned long addr)
{
--
2.35.1
----------------------------------------------------------
With this debug patch, most error path can be tested. For example,
--------------------ENOSYS THP + EAGAIN----------
#include <linux/mm.h>
kprobe:migrate_pages { @in_migrate_pages++; }
kretprobe:migrate_pages { @in_migrate_pages--; }
kprobe:unmap_and_move / @in_migrate_pages > 0 / {
if (((struct page *)arg3)->flags & (1 << PG_head)) {
override(-38);
} else {
override(-11);
}
}
-------------------------------------------------
With this, unmap_and_move() will return -ENOSYS (-38) for THP, and
-EAGAIN (-11) for normal page. This can be used to test the
corresponding error path in migrate_pages().
I think that it's quite common for developers to inject error for
arbitrary function to test the error path. Is it a good idea to turn on
the arbitrary error injection if a special kernel configuration
(e.g. CONFIG_BPF_KPROBE_OVERRIDE_ANY_FUNCTION) is enabled for debugging
purpose only?
Some hacks are still necessary for complete coverage
====================================================
Even if we can override the return value of any function. Some hacks
are still necessary for complete coverage. For example, some functions
may be inlined, if we want to override its return value, we need to mark
it with "noinline". And some error cannot be injected with return value
overridden directly. For example, if we want to test when THP split
isn't allowed condition in migrate_pages(). Then, some hack patch need
to be used to do that. For example, the below patch can do that.
-----------------------8<---------------------------------
From ca371806dc7f96148cbdf03fdf8f92309306a325 Mon Sep 17 00:00:00 2001
From: Huang Ying <[email protected]>
Date: Tue, 20 Sep 2022 09:37:53 +0800
Subject: [PATCH] dbg: inject nosplit
---
mm/migrate.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/mm/migrate.c b/mm/migrate.c
index 571d8c9fd5bc..d4ee76c285b2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -57,6 +57,11 @@
#include "internal.h"
+static noinline bool error_inject_nosplit(void)
+{
+ return false;
+}
+
int isolate_movable_page(struct page *page, isolate_mode_t mode)
{
const struct movable_operations *mops;
@@ -1412,6 +1417,9 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
bool nosplit = (reason == MR_NUMA_MISPLACED);
bool no_subpage_counting = false;
+ if (error_inject_nosplit())
+ nosplit = true;
+
trace_mm_migrate_pages_start(mode, reason);
thp_subpage_migration:
--
2.35.1
----------------------------------------------------------
With the help of the above patch, the following bpftrace script can
inject the expected error,
--------------------ENOMEM THP + nosplit---------
kprobe:migrate_pages { @in_migrate_pages++; }
kretprobe:migrate_pages { @in_migrate_pages--; }
kprobe:should_fail_alloc_page / @in_migrate_pages > 0 && arg1 == 9 / {
override(1);
}
kprobe:error_inject_nosplit / @in_migrate_pages > 0 / {
override(1);
}
-------------------------------------------------
Although some hack patches are needed. This is still simpler than my
original hand-made error injection solution. So I will recommend
developers to use it in the error path testing in the future.
Best Regards,
Huang, Ying
On Mon, Sep 19, 2022 at 7:44 PM Huang, Ying <[email protected]> wrote:
>
> Andrew Morton <[email protected]> writes:
>
> > On Wed, 17 Aug 2022 16:14:00 +0800 Huang Ying <[email protected]> wrote:
> >
> >> error-inject.patch, test-migrate.c, and test-migrate.sh are as below.
> >> It turns out that error injection is an important tool to fix bugs in
> >> error path.
> >
> > Indeed, and thanks for doing this.
> >
> > Did you consider lib/*inject*.c? If so, was it unsuitable?
>
Thanks, Ying. Great tips!
> I have done some experiments to use some existing error injection
> mechanisms in kernel to test the error path of migrate_pages(). After
> some googling, I found that the BPF based error injection described in
> the following URL is most suitable for my purpose.
>
> https://lwn.net/Articles/740146/
>
> Because the BPF seems quite flexible to satisfy various requirements of
> error injection. With it, the execution of some functions can be
> skipped and some specified error code can be returned instead.
>
> Works out of box
> ================
>
> Some error injection functionality just works out of box. For example,
> inject some page allocation error in some path. Firstly,
> CONFIG_BPF_KPROBE_OVERRIDE needs to be enabled in kernel config. Then,
> a simple bpftrace script as follows can be used to inject page
> allocation error during migrate_pages().
>
> --------------------ENOMEM-----------------------
> kprobe:migrate_pages { @in_migrate_pages++; }
> kretprobe:migrate_pages { @in_migrate_pages--; }
> kprobe:should_fail_alloc_page / @in_migrate_pages > 0 / {
> override(1);
> }
> -------------------------------------------------
>
> The call chain of error injection is specified via the first 2 lines. I
> copied the methods used in BCC inject script. Is there any better
> method to specify the call chain?
>
> We can inject error only for THP page allocation too.
>
> --------------------ENOMEM THP-------------------
> kprobe:migrate_pages { @in_migrate_pages++; }
> kretprobe:migrate_pages { @in_migrate_pages--; }
> kprobe:should_fail_alloc_page / @in_migrate_pages > 0 && arg1 == 9 / {
> override(1);
> }
> -------------------------------------------------
>
> Use some hack to override any function
> ======================================
>
> The in-kernel BPF based error injection mechanism can only override
> function return value for the functions in the whitelist, that is,
> functions marked with ALLOW_ERROR_INJECTION(). That is quite limited.
> The thorough error path testing needs to override the return value of
> arbitrary function. So, I use a simple hack patch as follows for that.
>
> -----------------------8<---------------------------------
> From 3bcd401a3731bc7316d222501070a2a71fdf7170 Mon Sep 17 00:00:00 2001
> From: Huang Ying <[email protected]>
> Date: Tue, 20 Sep 2022 09:08:25 +0800
> Subject: [PATCH] dbg: allow override any function with bpf_error_injection
>
> ---
> lib/error-inject.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/lib/error-inject.c b/lib/error-inject.c
> index 1afca1b1cdea..82a402e0f15c 100644
> --- a/lib/error-inject.c
> +++ b/lib/error-inject.c
> @@ -21,6 +21,7 @@ struct ei_entry {
> void *priv;
> };
>
> +#if 0
> bool within_error_injection_list(unsigned long addr)
> {
> struct ei_entry *ent;
> @@ -36,6 +37,12 @@ bool within_error_injection_list(unsigned long addr)
> mutex_unlock(&ei_mutex);
> return ret;
> }
> +#else
> +bool within_error_injection_list(unsigned long addr)
> +{
> + return true;
> +}
> +#endif
>
> int get_injectable_error_type(unsigned long addr)
> {
> --
> 2.35.1
> ----------------------------------------------------------
>
> With this debug patch, most error path can be tested. For example,
>
> --------------------ENOSYS THP + EAGAIN----------
> #include <linux/mm.h>
> kprobe:migrate_pages { @in_migrate_pages++; }
> kretprobe:migrate_pages { @in_migrate_pages--; }
> kprobe:unmap_and_move / @in_migrate_pages > 0 / {
> if (((struct page *)arg3)->flags & (1 << PG_head)) {
> override(-38);
> } else {
> override(-11);
> }
> }
> -------------------------------------------------
>
> With this, unmap_and_move() will return -ENOSYS (-38) for THP, and
> -EAGAIN (-11) for normal page. This can be used to test the
> corresponding error path in migrate_pages().
>
> I think that it's quite common for developers to inject error for
> arbitrary function to test the error path. Is it a good idea to turn on
> the arbitrary error injection if a special kernel configuration
> (e.g. CONFIG_BPF_KPROBE_OVERRIDE_ANY_FUNCTION) is enabled for debugging
> purpose only?
>
> Some hacks are still necessary for complete coverage
> ====================================================
>
> Even if we can override the return value of any function. Some hacks
> are still necessary for complete coverage. For example, some functions
> may be inlined, if we want to override its return value, we need to mark
> it with "noinline". And some error cannot be injected with return value
> overridden directly. For example, if we want to test when THP split
> isn't allowed condition in migrate_pages(). Then, some hack patch need
> to be used to do that. For example, the below patch can do that.
>
> -----------------------8<---------------------------------
> From ca371806dc7f96148cbdf03fdf8f92309306a325 Mon Sep 17 00:00:00 2001
> From: Huang Ying <[email protected]>
> Date: Tue, 20 Sep 2022 09:37:53 +0800
> Subject: [PATCH] dbg: inject nosplit
>
> ---
> mm/migrate.c | 8 ++++++++
> 1 file changed, 8 insertions(+)
>
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 571d8c9fd5bc..d4ee76c285b2 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -57,6 +57,11 @@
>
> #include "internal.h"
>
> +static noinline bool error_inject_nosplit(void)
> +{
> + return false;
> +}
> +
> int isolate_movable_page(struct page *page, isolate_mode_t mode)
> {
> const struct movable_operations *mops;
> @@ -1412,6 +1417,9 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
> bool nosplit = (reason == MR_NUMA_MISPLACED);
> bool no_subpage_counting = false;
>
> + if (error_inject_nosplit())
> + nosplit = true;
> +
> trace_mm_migrate_pages_start(mode, reason);
>
> thp_subpage_migration:
> --
> 2.35.1
> ----------------------------------------------------------
>
> With the help of the above patch, the following bpftrace script can
> inject the expected error,
>
> --------------------ENOMEM THP + nosplit---------
> kprobe:migrate_pages { @in_migrate_pages++; }
> kretprobe:migrate_pages { @in_migrate_pages--; }
> kprobe:should_fail_alloc_page / @in_migrate_pages > 0 && arg1 == 9 / {
> override(1);
> }
> kprobe:error_inject_nosplit / @in_migrate_pages > 0 / {
> override(1);
> }
> -------------------------------------------------
>
> Although some hack patches are needed. This is still simpler than my
> original hand-made error injection solution. So I will recommend
> developers to use it in the error path testing in the future.
>
> Best Regards,
> Huang, Ying
On 17/08/2022 09:14, Huang Ying wrote:
> The return value of move_pages() syscall is incorrect when counting
> the remaining pages to be migrated.
Hi Huang, Alistair,
I've noticed that this patch from Huang has caused the mm/migration selftest (authored by Alistair) to start failing (see bisection log below).
Of the 3 tests, migration.private_anon and migration.private_anon_thp continue to pass, but migration.shared_anon fails:
# RUN migration.shared_anon ...
Didn't migrate 1 pages
# migration.c:167:shared_anon:Expected migrate(ptr, self->n1, self->n2) (-2) == 0 (0)
# shared_anon: Test terminated by assertion
# FAIL migration.shared_anon
not ok 2 migration.shared_anon
The failure occurs due to the return code of move_pages() and this patch has changed the return code handling in the kernel, so it makes sense:
int migrate(uint64_t *ptr, int n1, int n2)
{
int ret, tmp;
int status = 0;
struct timespec ts1, ts2;
if (clock_gettime(CLOCK_MONOTONIC, &ts1))
return -1;
while (1) {
if (clock_gettime(CLOCK_MONOTONIC, &ts2))
return -1;
if (ts2.tv_sec - ts1.tv_sec >= RUNTIME)
return 0;
ret = move_pages(0, 1, (void **) &ptr, &n2, &status,
MPOL_MF_MOVE_ALL);
if (ret) {
if (ret > 0)
printf("Didn't migrate %d pages\n", ret); <<<< HERE
else
perror("Couldn't migrate pages");
return -2;
}
tmp = n2;
n2 = n1;
n1 = tmp;
}
return 0;
}
I haven't looked any further and am not sure what the correct fix is. I wondered if either you might be able to offer a solution?
Thanks,
Ryan
git bisect start
# bad: [6eaae198076080886b9e7d57f4ae06fa782f90ef] Linux 6.5-rc3
git bisect bad 6eaae198076080886b9e7d57f4ae06fa782f90ef
# good: [2c85ebc57b3e1817b6ce1a6b703928e113a90442] Linux 5.10
git bisect good 2c85ebc57b3e1817b6ce1a6b703928e113a90442
# good: [d710d370c4911e83da5d2bc43d4a2c3b56bd27e7] Merge tag 's390-5.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
git bisect good d710d370c4911e83da5d2bc43d4a2c3b56bd27e7
# bad: [8715c6d3100fc7c6edddf29af4a399a1c12d028c] Merge tag 'for-6.2/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
git bisect bad 8715c6d3100fc7c6edddf29af4a399a1c12d028c
# good: [12b68040a5e468068fd7f4af1150eab8f6e96235] Merge tag 'media/v5.20-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
git bisect good 12b68040a5e468068fd7f4af1150eab8f6e96235
# good: [7e6739b9336e61fe23ca4e2c8d1fda8f19f979bf] Merge tag 'drm-next-2022-10-05' of git://anongit.freedesktop.org/drm/drm
git bisect good 7e6739b9336e61fe23ca4e2c8d1fda8f19f979bf
# bad: [524d0c68826bc1adf9d1946e540eb4f7b16699a7] Merge tag 'ceph-for-6.1-rc1' of https://github.com/ceph/ceph-client
git bisect bad 524d0c68826bc1adf9d1946e540eb4f7b16699a7
# good: [29926f1cd3535f565f200430d5b6a794543fe130] fbdev: mb862xx: Fix check of return value from irq_of_parse_and_map()
git bisect good 29926f1cd3535f565f200430d5b6a794543fe130
# good: [d4013bc4d49f6da8178a340348369bb9920225c9] Merge tag 'bitmap-6.1-rc1' of https://github.com/norov/linux
git bisect good d4013bc4d49f6da8178a340348369bb9920225c9
# bad: [ac1e8c6c95bf805c699656046aef0a05205edfbd] Merge tag '6.1-rc-smb3-client-fixes-part1' of git://git.samba.org/sfrench/cifs-2.6
git bisect bad ac1e8c6c95bf805c699656046aef0a05205edfbd
# bad: [f80be4571b19b9fd8dd1528cd2a2f123aff51f70] kmsan: add KMSAN runtime core
git bisect bad f80be4571b19b9fd8dd1528cd2a2f123aff51f70
# bad: [c4c84f06285e48f80e9843d0775ad92714ffc35a] fs/proc/task_mmu: stop using linked list and highest_vm_end
git bisect bad c4c84f06285e48f80e9843d0775ad92714ffc35a
# good: [8c004d1fc1497d9a6d92ea968bd58230af59a492] jbd2: replace ll_rw_block()
git bisect good 8c004d1fc1497d9a6d92ea968bd58230af59a492
# bad: [f76c83378851f8e70f032848c4e61203f39480e4] mm: multi-gen LRU: optimize multiple memcgs
git bisect bad f76c83378851f8e70f032848c4e61203f39480e4
# good: [36537a67d3561bfe2b3654161d6c9008fff84d43] mm, hwpoison: avoid unneeded page_mapped_in_vma() overhead in collect_procs_anon()
git bisect good 36537a67d3561bfe2b3654161d6c9008fff84d43
# bad: [5fc30916b5cda697a7eb8f1167c38c27100a793a] migrate_pages(): fix failure counting for THP subpages retrying
git bisect bad 5fc30916b5cda697a7eb8f1167c38c27100a793a
# good: [2e3468778dbe3ec389a10c21a703bb8e5be5cfbc] mm: remember young/dirty bit for page migrations
git bisect good 2e3468778dbe3ec389a10c21a703bb8e5be5cfbc
# good: [f347c9d2697fcbbb64e077f7113a3887a181b8c0] filemap: make the accounting of thrashing more consistent
git bisect good f347c9d2697fcbbb64e077f7113a3887a181b8c0
# bad: [9c62ff005fc774fb2ba14223b0d865a8aca48fb5] migrate_pages(): remove unnecessary list_safe_reset_next()
git bisect bad 9c62ff005fc774fb2ba14223b0d865a8aca48fb5
# bad: [a7504ed14f9b5e873599b2487eb95062dd0b65f8] migrate: fix syscall move_pages() return value for failure
git bisect bad a7504ed14f9b5e873599b2487eb95062dd0b65f8
# first bad commit: [a7504ed14f9b5e873599b2487eb95062dd0b65f8] migrate: fix syscall move_pages() return value for failure
Thanks Ryan.
If I'm understanding Huang's patch correctly then kernel versions
without it would have returned a return code indicating that all pages
were migrated (ie. none failed to migrate) even if they hadn't.
Given I would have wrote and tested the test against the old buggy
version it's probable that this test was always failing but the failure
was undetected.
The failure to migrate could be valid (although I'd expect at least some
success). One improvement to the test would be to check that status code
for the page as well and make sure it matches the return code. We would
likely have caught the bug Huang fixed earlier then.
Will take a look and see if I can improve the test.
- Alistair
Ryan Roberts <[email protected]> writes:
> On 17/08/2022 09:14, Huang Ying wrote:
>> The return value of move_pages() syscall is incorrect when counting
>> the remaining pages to be migrated.
>
> Hi Huang, Alistair,
>
> I've noticed that this patch from Huang has caused the mm/migration selftest (authored by Alistair) to start failing (see bisection log below).
>
> Of the 3 tests, migration.private_anon and migration.private_anon_thp continue to pass, but migration.shared_anon fails:
>
>
> # RUN migration.shared_anon ...
> Didn't migrate 1 pages
> # migration.c:167:shared_anon:Expected migrate(ptr, self->n1, self->n2) (-2) == 0 (0)
> # shared_anon: Test terminated by assertion
> # FAIL migration.shared_anon
> not ok 2 migration.shared_anon
>
>
> The failure occurs due to the return code of move_pages() and this patch has changed the return code handling in the kernel, so it makes sense:
>
>
> int migrate(uint64_t *ptr, int n1, int n2)
> {
> int ret, tmp;
> int status = 0;
> struct timespec ts1, ts2;
>
> if (clock_gettime(CLOCK_MONOTONIC, &ts1))
> return -1;
>
> while (1) {
> if (clock_gettime(CLOCK_MONOTONIC, &ts2))
> return -1;
>
> if (ts2.tv_sec - ts1.tv_sec >= RUNTIME)
> return 0;
>
> ret = move_pages(0, 1, (void **) &ptr, &n2, &status,
> MPOL_MF_MOVE_ALL);
> if (ret) {
> if (ret > 0)
> printf("Didn't migrate %d pages\n", ret); <<<< HERE
> else
> perror("Couldn't migrate pages");
> return -2;
> }
>
> tmp = n2;
> n2 = n1;
> n1 = tmp;
> }
>
> return 0;
> }
>
>
> I haven't looked any further and am not sure what the correct fix is. I wondered if either you might be able to offer a solution?
>
> Thanks,
> Ryan
>
>
> git bisect start
> # bad: [6eaae198076080886b9e7d57f4ae06fa782f90ef] Linux 6.5-rc3
> git bisect bad 6eaae198076080886b9e7d57f4ae06fa782f90ef
> # good: [2c85ebc57b3e1817b6ce1a6b703928e113a90442] Linux 5.10
> git bisect good 2c85ebc57b3e1817b6ce1a6b703928e113a90442
> # good: [d710d370c4911e83da5d2bc43d4a2c3b56bd27e7] Merge tag 's390-5.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
> git bisect good d710d370c4911e83da5d2bc43d4a2c3b56bd27e7
> # bad: [8715c6d3100fc7c6edddf29af4a399a1c12d028c] Merge tag 'for-6.2/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
> git bisect bad 8715c6d3100fc7c6edddf29af4a399a1c12d028c
> # good: [12b68040a5e468068fd7f4af1150eab8f6e96235] Merge tag 'media/v5.20-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
> git bisect good 12b68040a5e468068fd7f4af1150eab8f6e96235
> # good: [7e6739b9336e61fe23ca4e2c8d1fda8f19f979bf] Merge tag 'drm-next-2022-10-05' of git://anongit.freedesktop.org/drm/drm
> git bisect good 7e6739b9336e61fe23ca4e2c8d1fda8f19f979bf
> # bad: [524d0c68826bc1adf9d1946e540eb4f7b16699a7] Merge tag 'ceph-for-6.1-rc1' of https://github.com/ceph/ceph-client
> git bisect bad 524d0c68826bc1adf9d1946e540eb4f7b16699a7
> # good: [29926f1cd3535f565f200430d5b6a794543fe130] fbdev: mb862xx: Fix check of return value from irq_of_parse_and_map()
> git bisect good 29926f1cd3535f565f200430d5b6a794543fe130
> # good: [d4013bc4d49f6da8178a340348369bb9920225c9] Merge tag 'bitmap-6.1-rc1' of https://github.com/norov/linux
> git bisect good d4013bc4d49f6da8178a340348369bb9920225c9
> # bad: [ac1e8c6c95bf805c699656046aef0a05205edfbd] Merge tag '6.1-rc-smb3-client-fixes-part1' of git://git.samba.org/sfrench/cifs-2.6
> git bisect bad ac1e8c6c95bf805c699656046aef0a05205edfbd
> # bad: [f80be4571b19b9fd8dd1528cd2a2f123aff51f70] kmsan: add KMSAN runtime core
> git bisect bad f80be4571b19b9fd8dd1528cd2a2f123aff51f70
> # bad: [c4c84f06285e48f80e9843d0775ad92714ffc35a] fs/proc/task_mmu: stop using linked list and highest_vm_end
> git bisect bad c4c84f06285e48f80e9843d0775ad92714ffc35a
> # good: [8c004d1fc1497d9a6d92ea968bd58230af59a492] jbd2: replace ll_rw_block()
> git bisect good 8c004d1fc1497d9a6d92ea968bd58230af59a492
> # bad: [f76c83378851f8e70f032848c4e61203f39480e4] mm: multi-gen LRU: optimize multiple memcgs
> git bisect bad f76c83378851f8e70f032848c4e61203f39480e4
> # good: [36537a67d3561bfe2b3654161d6c9008fff84d43] mm, hwpoison: avoid unneeded page_mapped_in_vma() overhead in collect_procs_anon()
> git bisect good 36537a67d3561bfe2b3654161d6c9008fff84d43
> # bad: [5fc30916b5cda697a7eb8f1167c38c27100a793a] migrate_pages(): fix failure counting for THP subpages retrying
> git bisect bad 5fc30916b5cda697a7eb8f1167c38c27100a793a
> # good: [2e3468778dbe3ec389a10c21a703bb8e5be5cfbc] mm: remember young/dirty bit for page migrations
> git bisect good 2e3468778dbe3ec389a10c21a703bb8e5be5cfbc
> # good: [f347c9d2697fcbbb64e077f7113a3887a181b8c0] filemap: make the accounting of thrashing more consistent
> git bisect good f347c9d2697fcbbb64e077f7113a3887a181b8c0
> # bad: [9c62ff005fc774fb2ba14223b0d865a8aca48fb5] migrate_pages(): remove unnecessary list_safe_reset_next()
> git bisect bad 9c62ff005fc774fb2ba14223b0d865a8aca48fb5
> # bad: [a7504ed14f9b5e873599b2487eb95062dd0b65f8] migrate: fix syscall move_pages() return value for failure
> git bisect bad a7504ed14f9b5e873599b2487eb95062dd0b65f8
> # first bad commit: [a7504ed14f9b5e873599b2487eb95062dd0b65f8] migrate: fix syscall move_pages() return value for failure
On 28/07/2023 01:57, Alistair Popple wrote:
>
> Thanks Ryan.
>
> If I'm understanding Huang's patch correctly then kernel versions
> without it would have returned a return code indicating that all pages
> were migrated (ie. none failed to migrate) even if they hadn't.
>
> Given I would have wrote and tested the test against the old buggy
> version it's probable that this test was always failing but the failure
> was undetected.
>
> The failure to migrate could be valid (although I'd expect at least some
> success).
Yes, when I looked, I think an initial (variable) number of iterations succeed
before failure.
One improvement to the test would be to check that status code
> for the page as well and make sure it matches the return code. We would
> likely have caught the bug Huang fixed earlier then.
>
> Will take a look and see if I can improve the test.
Thanks. This is not urgent from my perspective - I just wanted to point it out.
So don't feel like you need to prioritize it on my account. Although it would be
good to get to the point where all the tests pass on mainline...
>
> - Alistair
>
> Ryan Roberts <[email protected]> writes:
>
>> On 17/08/2022 09:14, Huang Ying wrote:
>>> The return value of move_pages() syscall is incorrect when counting
>>> the remaining pages to be migrated.
>>
>> Hi Huang, Alistair,
>>
>> I've noticed that this patch from Huang has caused the mm/migration selftest (authored by Alistair) to start failing (see bisection log below).
>>
>> Of the 3 tests, migration.private_anon and migration.private_anon_thp continue to pass, but migration.shared_anon fails:
>>
>>
>> # RUN migration.shared_anon ...
>> Didn't migrate 1 pages
>> # migration.c:167:shared_anon:Expected migrate(ptr, self->n1, self->n2) (-2) == 0 (0)
>> # shared_anon: Test terminated by assertion
>> # FAIL migration.shared_anon
>> not ok 2 migration.shared_anon
>>
>>
>> The failure occurs due to the return code of move_pages() and this patch has changed the return code handling in the kernel, so it makes sense:
>>
>>
>> int migrate(uint64_t *ptr, int n1, int n2)
>> {
>> int ret, tmp;
>> int status = 0;
>> struct timespec ts1, ts2;
>>
>> if (clock_gettime(CLOCK_MONOTONIC, &ts1))
>> return -1;
>>
>> while (1) {
>> if (clock_gettime(CLOCK_MONOTONIC, &ts2))
>> return -1;
>>
>> if (ts2.tv_sec - ts1.tv_sec >= RUNTIME)
>> return 0;
>>
>> ret = move_pages(0, 1, (void **) &ptr, &n2, &status,
>> MPOL_MF_MOVE_ALL);
>> if (ret) {
>> if (ret > 0)
>> printf("Didn't migrate %d pages\n", ret); <<<< HERE
>> else
>> perror("Couldn't migrate pages");
>> return -2;
>> }
>>
>> tmp = n2;
>> n2 = n1;
>> n1 = tmp;
>> }
>>
>> return 0;
>> }
>>
>>
>> I haven't looked any further and am not sure what the correct fix is. I wondered if either you might be able to offer a solution?
>>
>> Thanks,
>> Ryan
>>
>>
>> git bisect start
>> # bad: [6eaae198076080886b9e7d57f4ae06fa782f90ef] Linux 6.5-rc3
>> git bisect bad 6eaae198076080886b9e7d57f4ae06fa782f90ef
>> # good: [2c85ebc57b3e1817b6ce1a6b703928e113a90442] Linux 5.10
>> git bisect good 2c85ebc57b3e1817b6ce1a6b703928e113a90442
>> # good: [d710d370c4911e83da5d2bc43d4a2c3b56bd27e7] Merge tag 's390-5.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
>> git bisect good d710d370c4911e83da5d2bc43d4a2c3b56bd27e7
>> # bad: [8715c6d3100fc7c6edddf29af4a399a1c12d028c] Merge tag 'for-6.2/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
>> git bisect bad 8715c6d3100fc7c6edddf29af4a399a1c12d028c
>> # good: [12b68040a5e468068fd7f4af1150eab8f6e96235] Merge tag 'media/v5.20-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
>> git bisect good 12b68040a5e468068fd7f4af1150eab8f6e96235
>> # good: [7e6739b9336e61fe23ca4e2c8d1fda8f19f979bf] Merge tag 'drm-next-2022-10-05' of git://anongit.freedesktop.org/drm/drm
>> git bisect good 7e6739b9336e61fe23ca4e2c8d1fda8f19f979bf
>> # bad: [524d0c68826bc1adf9d1946e540eb4f7b16699a7] Merge tag 'ceph-for-6.1-rc1' of https://github.com/ceph/ceph-client
>> git bisect bad 524d0c68826bc1adf9d1946e540eb4f7b16699a7
>> # good: [29926f1cd3535f565f200430d5b6a794543fe130] fbdev: mb862xx: Fix check of return value from irq_of_parse_and_map()
>> git bisect good 29926f1cd3535f565f200430d5b6a794543fe130
>> # good: [d4013bc4d49f6da8178a340348369bb9920225c9] Merge tag 'bitmap-6.1-rc1' of https://github.com/norov/linux
>> git bisect good d4013bc4d49f6da8178a340348369bb9920225c9
>> # bad: [ac1e8c6c95bf805c699656046aef0a05205edfbd] Merge tag '6.1-rc-smb3-client-fixes-part1' of git://git.samba.org/sfrench/cifs-2.6
>> git bisect bad ac1e8c6c95bf805c699656046aef0a05205edfbd
>> # bad: [f80be4571b19b9fd8dd1528cd2a2f123aff51f70] kmsan: add KMSAN runtime core
>> git bisect bad f80be4571b19b9fd8dd1528cd2a2f123aff51f70
>> # bad: [c4c84f06285e48f80e9843d0775ad92714ffc35a] fs/proc/task_mmu: stop using linked list and highest_vm_end
>> git bisect bad c4c84f06285e48f80e9843d0775ad92714ffc35a
>> # good: [8c004d1fc1497d9a6d92ea968bd58230af59a492] jbd2: replace ll_rw_block()
>> git bisect good 8c004d1fc1497d9a6d92ea968bd58230af59a492
>> # bad: [f76c83378851f8e70f032848c4e61203f39480e4] mm: multi-gen LRU: optimize multiple memcgs
>> git bisect bad f76c83378851f8e70f032848c4e61203f39480e4
>> # good: [36537a67d3561bfe2b3654161d6c9008fff84d43] mm, hwpoison: avoid unneeded page_mapped_in_vma() overhead in collect_procs_anon()
>> git bisect good 36537a67d3561bfe2b3654161d6c9008fff84d43
>> # bad: [5fc30916b5cda697a7eb8f1167c38c27100a793a] migrate_pages(): fix failure counting for THP subpages retrying
>> git bisect bad 5fc30916b5cda697a7eb8f1167c38c27100a793a
>> # good: [2e3468778dbe3ec389a10c21a703bb8e5be5cfbc] mm: remember young/dirty bit for page migrations
>> git bisect good 2e3468778dbe3ec389a10c21a703bb8e5be5cfbc
>> # good: [f347c9d2697fcbbb64e077f7113a3887a181b8c0] filemap: make the accounting of thrashing more consistent
>> git bisect good f347c9d2697fcbbb64e077f7113a3887a181b8c0
>> # bad: [9c62ff005fc774fb2ba14223b0d865a8aca48fb5] migrate_pages(): remove unnecessary list_safe_reset_next()
>> git bisect bad 9c62ff005fc774fb2ba14223b0d865a8aca48fb5
>> # bad: [a7504ed14f9b5e873599b2487eb95062dd0b65f8] migrate: fix syscall move_pages() return value for failure
>> git bisect bad a7504ed14f9b5e873599b2487eb95062dd0b65f8
>> # first bad commit: [a7504ed14f9b5e873599b2487eb95062dd0b65f8] migrate: fix syscall move_pages() return value for failure
>