2021-03-29 15:41:42

by Zi Yan

[permalink] [raw]
Subject: [PATCH v7 2/2] mm: huge_memory: debugfs for file-backed THP split.

From: Zi Yan <[email protected]>

Further extend <debugfs>/split_huge_pages to accept
"<path>,<off_start>,<off_end>" for file-backed THP split tests since
tmpfs may have file backed by THP that mapped nowhere.

Update selftest program to test file-backed THP split too.

Suggested-by: Kirill A. Shutemov <[email protected]>
Signed-off-by: Zi Yan <[email protected]>
Reviewed-by: Yang Shi <[email protected]>
---
mm/huge_memory.c | 91 ++++++++++++++++++-
.../selftests/vm/split_huge_page_test.c | 81 ++++++++++++++++-
2 files changed, 166 insertions(+), 6 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1bcab247aea8..ca47f5a317f3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3062,6 +3062,66 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
return ret;
}

+static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
+ pgoff_t off_end)
+{
+ struct filename *file;
+ struct file *candidate;
+ struct address_space *mapping;
+ int ret = -EINVAL;
+ pgoff_t off_cur;
+ unsigned long total = 0, split = 0;
+
+ file = getname_kernel(file_path);
+ if (IS_ERR(file))
+ return ret;
+
+ candidate = file_open_name(file, O_RDONLY, 0);
+ if (IS_ERR(candidate))
+ goto out;
+
+ pr_debug("split file-backed THPs in file: %s, offset: [0x%lx - 0x%lx]\n",
+ file_path, off_start, off_end);
+
+ mapping = candidate->f_mapping;
+
+ for (off_cur = off_start; off_cur < off_end;) {
+ struct page *fpage = pagecache_get_page(mapping, off_cur,
+ FGP_ENTRY | FGP_HEAD, 0);
+
+ if (xa_is_value(fpage) || !fpage) {
+ off_cur += PAGE_SIZE;
+ continue;
+ }
+
+ if (!is_transparent_hugepage(fpage)) {
+ off_cur += PAGE_SIZE;
+ goto next;
+ }
+ total++;
+ off_cur = fpage->index + thp_size(fpage);
+
+ if (!trylock_page(fpage))
+ goto next;
+
+ if (!split_huge_page(fpage))
+ split++;
+
+ unlock_page(fpage);
+next:
+ put_page(fpage);
+ cond_resched();
+ }
+
+ filp_close(candidate, NULL);
+ ret = 0;
+
+ pr_debug("%lu of %lu file-backed THP split\n", split, total);
+out:
+ putname(file);
+ return ret;
+}
+
#define MAX_INPUT_BUF_SZ 255

static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
@@ -3069,7 +3129,8 @@ static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
{
static DEFINE_MUTEX(split_debug_mutex);
ssize_t ret;
- char input_buf[MAX_INPUT_BUF_SZ]; /* hold pid, start_vaddr, end_vaddr */
+ /* hold pid, start_vaddr, end_vaddr or file_path, off_start, off_end */
+ char input_buf[MAX_INPUT_BUF_SZ];
int pid;
unsigned long vaddr_start, vaddr_end;

@@ -3084,6 +3145,34 @@ static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
goto out;

input_buf[MAX_INPUT_BUF_SZ - 1] = '\0';
+
+ if (input_buf[0] == '/') {
+ char *tok;
+ char *buf = input_buf;
+ char file_path[MAX_INPUT_BUF_SZ];
+ pgoff_t off_start = 0, off_end = 0;
+ size_t input_len = strlen(input_buf);
+
+ tok = strsep(&buf, ",");
+ if (tok) {
+ strncpy(file_path, tok, MAX_INPUT_BUF_SZ);
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = sscanf(buf, "0x%lx,0x%lx", &off_start, &off_end);
+ if (ret != 2) {
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = split_huge_pages_in_file(file_path, off_start, off_end);
+ if (!ret)
+ ret = input_len;
+
+ goto out;
+ }
+
ret = sscanf(input_buf, "%d,0x%lx,0x%lx", &pid, &vaddr_start, &vaddr_end);
if (ret == 1 && pid == 1) {
split_huge_pages_all();
diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c
index 2c0c18e60c57..845a63cdb052 100644
--- a/tools/testing/selftests/vm/split_huge_page_test.c
+++ b/tools/testing/selftests/vm/split_huge_page_test.c
@@ -7,11 +7,13 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
+#include <stdarg.h>
#include <unistd.h>
#include <inttypes.h>
#include <string.h>
#include <fcntl.h>
#include <sys/mman.h>
+#include <sys/mount.h>
#include <malloc.h>
#include <stdbool.h>

@@ -24,6 +26,9 @@ uint64_t pmd_pagesize;
#define SMAP_PATH "/proc/self/smaps"
#define INPUT_MAX 80

+#define PID_FMT "%d,0x%lx,0x%lx"
+#define PATH_FMT "%s,0x%lx,0x%lx"
+
#define PFN_MASK ((1UL<<55)-1)
#define KPF_THP (1UL<<22)

@@ -87,13 +92,16 @@ static int write_file(const char *path, const char *buf, size_t buflen)
return (unsigned int) numwritten;
}

-static void write_debugfs(int pid, uint64_t vaddr_start, uint64_t vaddr_end)
+static void write_debugfs(const char *fmt, ...)
{
char input[INPUT_MAX];
int ret;
+ va_list argp;
+
+ va_start(argp, fmt);
+ ret = vsnprintf(input, INPUT_MAX, fmt, argp);
+ va_end(argp);

- ret = snprintf(input, INPUT_MAX, "%d,0x%lx,0x%lx", pid, vaddr_start,
- vaddr_end);
if (ret >= INPUT_MAX) {
printf("%s: Debugfs input is too long\n", __func__);
exit(EXIT_FAILURE);
@@ -183,7 +191,8 @@ void split_pmd_thp(void)
}

/* split all THPs */
- write_debugfs(getpid(), (uint64_t)one_page, (uint64_t)one_page + len);
+ write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
+ (uint64_t)one_page + len);

for (i = 0; i < len; i++)
if (one_page[i] != (char)i) {
@@ -274,7 +283,7 @@ void split_pte_mapped_thp(void)
}

/* split all remapped THPs */
- write_debugfs(getpid(), (uint64_t)pte_mapped,
+ write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped,
(uint64_t)pte_mapped + pagesize * 4);

/* smap does not show THPs after mremap, use kpageflags instead */
@@ -300,6 +309,67 @@ void split_pte_mapped_thp(void)
close(kpageflags_fd);
}

+void split_file_backed_thp(void)
+{
+ int status;
+ int fd;
+ ssize_t num_written;
+ char tmpfs_template[] = "/tmp/thp_split_XXXXXX";
+ const char *tmpfs_loc = mkdtemp(tmpfs_template);
+ char testfile[INPUT_MAX];
+
+ printf("Please enable pr_debug in split_huge_pages_in_file() if you need more info.\n");
+
+ status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m");
+
+ if (status) {
+ printf("Unable to create a tmpfs for testing\n");
+ exit(EXIT_FAILURE);
+ }
+
+ status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc);
+ if (status >= INPUT_MAX) {
+ printf("Fail to create file-backed THP split testing file\n");
+ goto cleanup;
+ }
+
+ fd = open(testfile, O_CREAT|O_WRONLY);
+ if (fd == -1) {
+ perror("Cannot open testing file\n");
+ goto cleanup;
+ }
+
+ /* write something to the file, so a file-backed THP can be allocated */
+ num_written = write(fd, tmpfs_loc, sizeof(tmpfs_loc));
+ close(fd);
+
+ if (num_written < 1) {
+ printf("Fail to write data to testing file\n");
+ goto cleanup;
+ }
+
+ /* split the file-backed THP */
+ write_debugfs(PATH_FMT, testfile, 0, 1024);
+
+ status = unlink(testfile);
+ if (status)
+ perror("Cannot remove testing file\n");
+
+cleanup:
+ status = umount(tmpfs_loc);
+ if (status) {
+ printf("Unable to umount %s\n", tmpfs_loc);
+ exit(EXIT_FAILURE);
+ }
+ status = rmdir(tmpfs_loc);
+ if (status) {
+ perror("cannot remove tmp dir");
+ exit(EXIT_FAILURE);
+ }
+
+ printf("file-backed THP split test done, please check dmesg for more information\n");
+}
+
int main(int argc, char **argv)
{
if (geteuid() != 0) {
@@ -313,6 +383,7 @@ int main(int argc, char **argv)

split_pmd_thp();
split_pte_mapped_thp();
+ split_file_backed_thp();

return 0;
}
--
2.30.2


2021-03-31 16:48:44

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH v7 2/2] mm: huge_memory: debugfs for file-backed THP split.

On Mon, Mar 29, 2021 at 11:39:32AM -0400, Zi Yan wrote:
> + for (off_cur = off_start; off_cur < off_end;) {
> + struct page *fpage = pagecache_get_page(mapping, off_cur,
> + FGP_ENTRY | FGP_HEAD, 0);
> +
> + if (xa_is_value(fpage) || !fpage) {
> + off_cur += PAGE_SIZE;
> + continue;
> + }
> +
> + if (!is_transparent_hugepage(fpage)) {
> + off_cur += PAGE_SIZE;
> + goto next;
> + }
> + total++;
> + off_cur = fpage->index + thp_size(fpage);

That can't be right. fpage->index is in units of pages and thp_size is
in units of bytes. I wish C had a better type system.
I think you meant:

off_cur = fpage->index + thp_nr_pages(fpage);

Also, I think this loop would read better as ...

for (index = off_start; index < off_end; index += nr_pages) {
struct page *fpage = pagecache_get_page(mapping, index,
FGP_ENTRY | FGP_HEAD, 0);
nr_pages = 1;
if (xa_is_value(fpage) || !fpage)
continue;
if (!is_transparent_hugepage(fpage))
goto next;
total++;
nr_pages = thp_nr_pages(fpage);
...

2021-03-31 17:03:27

by Zi Yan

[permalink] [raw]
Subject: Re: [PATCH v7 2/2] mm: huge_memory: debugfs for file-backed THP split.

On 31 Mar 2021, at 12:44, Matthew Wilcox wrote:

> On Mon, Mar 29, 2021 at 11:39:32AM -0400, Zi Yan wrote:
>> + for (off_cur = off_start; off_cur < off_end;) {
>> + struct page *fpage = pagecache_get_page(mapping, off_cur,
>> + FGP_ENTRY | FGP_HEAD, 0);
>> +
>> + if (xa_is_value(fpage) || !fpage) {
>> + off_cur += PAGE_SIZE;
>> + continue;
>> + }
>> +
>> + if (!is_transparent_hugepage(fpage)) {
>> + off_cur += PAGE_SIZE;
>> + goto next;
>> + }
>> + total++;
>> + off_cur = fpage->index + thp_size(fpage);
>
> That can't be right. fpage->index is in units of pages and thp_size is
> in units of bytes. I wish C had a better type system.
> I think you meant:
>
> off_cur = fpage->index + thp_nr_pages(fpage);
>
> Also, I think this loop would read better as ...
>
> for (index = off_start; index < off_end; index += nr_pages) {
> struct page *fpage = pagecache_get_page(mapping, index,
> FGP_ENTRY | FGP_HEAD, 0);
> nr_pages = 1;
> if (xa_is_value(fpage) || !fpage)
> continue;
> if (!is_transparent_hugepage(fpage))
> goto next;
> total++;
> nr_pages = thp_nr_pages(fpage);
> ...

Thanks for catching this! I mixed this with looping through VMA, which
is in units of bytes. I will fix this and use your suggested loop code.


Best Regards,
Yan Zi


Attachments:
signature.asc (871.00 B)
OpenPGP digital signature