2021-08-06 10:11:42

by Zhansaya Bagdauletkyzy

[permalink] [raw]
Subject: [PATCH v2 0/2] add KSM performance tests

Extend KSM self tests with a performance benchmark. These tests are not
part of regular regression testing, as they are mainly intended to be
used by developers making changes to the memory management subsystem.
This patchset is a respin of the previous series:
https://lore.kernel.org/lkml/[email protected]/

Zhansaya Bagdauletkyzy (2):
selftests: vm: add KSM merging time test
selftests: vm: add COW time test for KSM pages

v1 -> v2:
- replace MB with MiB
- address COW test review comments

tools/testing/selftests/vm/ksm_tests.c | 152 ++++++++++++++++++++++++-
1 file changed, 148 insertions(+), 4 deletions(-)

--
2.25.1


2021-08-06 10:12:51

by Zhansaya Bagdauletkyzy

[permalink] [raw]
Subject: [PATCH v2 2/2] selftests: vm: add COW time test for KSM pages

Since merged pages are copied every time they need to be modified,
the write access time is different between shared and non-shared pages.
Add ksm_cow_time() function which evaluates latency of these COW
breaks. First, 4000 pages are allocated and the time, required to modify
1 byte in every other page, is measured. After this, the pages are
merged into 2000 pairs and in each pair, 1 page is modified (i.e. they
are decoupled) to detect COW breaks. The time needed to break COW of
merged pages is then compared with performance of non-shared pages.

The test is run as follows: ./ksm_tests -C
The output:
Total size: 15 MiB

Not merged pages:
Total time: 0.001903112 s
Average speed: 3678.186 MiB/s

Merged pages:
Total time: 0.006215680 s
Average speed: 1126.184 MiB/s

Signed-off-by: Zhansaya Bagdauletkyzy <[email protected]>
---
v1 -> v2:
As suggested by Pavel,
- add baseline figures with non-shared pages
- instead of having all pages merged together, create pairs of
duplicated pages

Pavel's review comments:
https://lore.kernel.org/lkml/CA+CK2bDYZBBaU3pC369o01tCgydaJ6y91GZ0_MWONMMCajZOUw@mail.gmail.com/

tools/testing/selftests/vm/ksm_tests.c | 84 +++++++++++++++++++++++++-
1 file changed, 81 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c
index 432dfe615e50..382ee6ccd13a 100644
--- a/tools/testing/selftests/vm/ksm_tests.c
+++ b/tools/testing/selftests/vm/ksm_tests.c
@@ -33,7 +33,8 @@ enum ksm_test_name {
CHECK_KSM_UNMERGE,
CHECK_KSM_ZERO_PAGE_MERGE,
CHECK_KSM_NUMA_MERGE,
- KSM_MERGE_TIME
+ KSM_MERGE_TIME,
+ KSM_COW_TIME
};

static int ksm_write_sysfs(const char *file_path, unsigned long val)
@@ -98,7 +99,8 @@ static void print_help(void)
" -U (page unmerging)\n"
" -P evaluate merging time and speed.\n"
" For this test, the size of duplicated memory area (in MiB)\n"
- " must be provided using -s option\n\n");
+ " must be provided using -s option\n"
+ " -C evaluate the time required to break COW of merged pages.\n\n");

printf(" -a: specify the access protections of pages.\n"
" <prot> must be of the form [rwx].\n"
@@ -455,6 +457,75 @@ static int ksm_merge_time(int mapping, int prot, int timeout, size_t map_size)
return KSFT_FAIL;
}

+static int ksm_cow_time(int mapping, int prot, int timeout, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time, end_time;
+ unsigned long cow_time_ns;
+ int page_count = 4000;
+
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+ for (size_t i = 0; i < page_count; i = i + 2)
+ memset(map_ptr + page_size * i, '-', 1);
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Total size: %lu MiB\n\n", (page_size * page_count) / MB);
+ printf("Not merged pages:\n");
+ printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
+ cow_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n\n", ((page_size * (page_count / 2)) / MB) /
+ ((double)cow_time_ns / NSEC_PER_SEC));
+
+ /* Create 2000 pairs of duplicated pages */
+ for (size_t i = 0; i < page_count; i = i + 2) {
+ memset(map_ptr + page_size * i, '+', i + 1);
+ memset(map_ptr + page_size * (i + 1), '+', i + 1);
+ }
+ if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+ for (size_t i = 0; i < page_count; i = i + 2)
+ memset(map_ptr + page_size * i, '-', 1);
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+
+ cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Merged pages:\n");
+ printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
+ cow_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n", ((page_size * (page_count / 2)) / MB) /
+ ((double)cow_time_ns / NSEC_PER_SEC));
+
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
int main(int argc, char *argv[])
{
int ret, opt;
@@ -468,7 +539,7 @@ int main(int argc, char *argv[])
bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT;
long size_MB = 0;

- while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNP")) != -1) {
+ while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPC")) != -1) {
switch (opt) {
case 'a':
prot = str_to_prot(optarg);
@@ -522,6 +593,9 @@ int main(int argc, char *argv[])
case 'P':
test_name = KSM_MERGE_TIME;
break;
+ case 'C':
+ test_name = KSM_COW_TIME;
+ break;
default:
return KSFT_FAIL;
}
@@ -571,6 +645,10 @@ int main(int argc, char *argv[])
ret = ksm_merge_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
size_MB);
break;
+ case KSM_COW_TIME:
+ ret = ksm_cow_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
+ page_size);
+ break;
}

if (ksm_restore(&ksm_sysfs_old)) {
--
2.25.1

2021-08-06 10:13:34

by Zhansaya Bagdauletkyzy

[permalink] [raw]
Subject: [PATCH v2 1/2] selftests: vm: add KSM merging time test

Add ksm_merge_time() function to determine speed and time needed for
merging. The total spent time is shown in seconds while speed is
in MiB/s. User must specify the size of duplicated memory area (in MiB)
before running the test.

The test is run as follows: ./ksm_tests -P -s 100
The output:
Total size: 100 MiB
Total time: 0.201106786 s
Average speed: 497.248 MiB/s

Signed-off-by: Zhansaya Bagdauletkyzy <[email protected]>
---
v1 -> v2:
As suggested by Pavel,
- replace MB with MiB
- measure speed more accurately

Pavel's review comments:
https://lore.kernel.org/lkml/CA+CK2bBpzdWMYoJdR2EQNNCrRn+Pg1Gs2oBqLR65JW3UUnWt0w@mail.gmail.com/

tools/testing/selftests/vm/ksm_tests.c | 74 ++++++++++++++++++++++++--
1 file changed, 70 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c
index cdeb4a028538..432dfe615e50 100644
--- a/tools/testing/selftests/vm/ksm_tests.c
+++ b/tools/testing/selftests/vm/ksm_tests.c
@@ -7,6 +7,7 @@
#include <numa.h>

#include "../kselftest.h"
+#include "../../../../include/vdso/time64.h"

#define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/"
#define KSM_FP(s) (KSM_SYSFS_PATH s)
@@ -15,6 +16,7 @@
#define KSM_PROT_STR_DEFAULT "rw"
#define KSM_USE_ZERO_PAGES_DEFAULT false
#define KSM_MERGE_ACROSS_NODES_DEFAULT true
+#define MB (1ul << 20)

struct ksm_sysfs {
unsigned long max_page_sharing;
@@ -30,7 +32,8 @@ enum ksm_test_name {
CHECK_KSM_MERGE,
CHECK_KSM_UNMERGE,
CHECK_KSM_ZERO_PAGE_MERGE,
- CHECK_KSM_NUMA_MERGE
+ CHECK_KSM_NUMA_MERGE,
+ KSM_MERGE_TIME
};

static int ksm_write_sysfs(const char *file_path, unsigned long val)
@@ -86,13 +89,16 @@ static int str_to_prot(char *prot_str)
static void print_help(void)
{
printf("usage: ksm_tests [-h] <test type> [-a prot] [-p page_count] [-l timeout]\n"
- "[-z use_zero_pages] [-m merge_across_nodes]\n");
+ "[-z use_zero_pages] [-m merge_across_nodes] [-s size]\n");

printf("Supported <test type>:\n"
" -M (page merging)\n"
" -Z (zero pages merging)\n"
" -N (merging of pages in different NUMA nodes)\n"
- " -U (page unmerging)\n\n");
+ " -U (page unmerging)\n"
+ " -P evaluate merging time and speed.\n"
+ " For this test, the size of duplicated memory area (in MiB)\n"
+ " must be provided using -s option\n\n");

printf(" -a: specify the access protections of pages.\n"
" <prot> must be of the form [rwx].\n"
@@ -105,6 +111,7 @@ static void print_help(void)
" Default: %d\n", KSM_USE_ZERO_PAGES_DEFAULT);
printf(" -m: change merge_across_nodes tunable\n"
" Default: %d\n", KSM_MERGE_ACROSS_NODES_DEFAULT);
+ printf(" -s: the size of duplicated memory area (in MiB)\n");

exit(0);
}
@@ -407,6 +414,47 @@ static int check_ksm_numa_merge(int mapping, int prot, int timeout, bool merge_a
return KSFT_FAIL;
}

+static int ksm_merge_time(int mapping, int prot, int timeout, size_t map_size)
+{
+ void *map_ptr;
+ struct timespec start_time, end_time;
+ unsigned long scan_time_ns;
+
+ map_size *= MB;
+
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', map_size);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+ if (ksm_merge_pages(map_ptr, map_size, start_time, timeout))
+ goto err_out;
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+
+ scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Total size: %lu MiB\n", map_size / MB);
+ printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
+ scan_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n", (map_size / MB) /
+ ((double)scan_time_ns / NSEC_PER_SEC));
+
+ munmap(map_ptr, map_size);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, map_size);
+ return KSFT_FAIL;
+}
+
int main(int argc, char *argv[])
{
int ret, opt;
@@ -418,8 +466,9 @@ int main(int argc, char *argv[])
int test_name = CHECK_KSM_MERGE;
bool use_zero_pages = KSM_USE_ZERO_PAGES_DEFAULT;
bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT;
+ long size_MB = 0;

- while ((opt = getopt(argc, argv, "ha:p:l:z:m:MUZN")) != -1) {
+ while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNP")) != -1) {
switch (opt) {
case 'a':
prot = str_to_prot(optarg);
@@ -453,6 +502,12 @@ int main(int argc, char *argv[])
else
merge_across_nodes = 1;
break;
+ case 's':
+ size_MB = atol(optarg);
+ if (size_MB <= 0) {
+ printf("Size must be greater than 0\n");
+ return KSFT_FAIL;
+ }
case 'M':
break;
case 'U':
@@ -464,6 +519,9 @@ int main(int argc, char *argv[])
case 'N':
test_name = CHECK_KSM_NUMA_MERGE;
break;
+ case 'P':
+ test_name = KSM_MERGE_TIME;
+ break;
default:
return KSFT_FAIL;
}
@@ -505,6 +563,14 @@ int main(int argc, char *argv[])
ret = check_ksm_numa_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
merge_across_nodes, page_size);
break;
+ case KSM_MERGE_TIME:
+ if (size_MB == 0) {
+ printf("Option '-s' is required.\n");
+ return KSFT_FAIL;
+ }
+ ret = ksm_merge_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
+ size_MB);
+ break;
}

if (ksm_restore(&ksm_sysfs_old)) {
--
2.25.1

2021-08-16 14:55:08

by Tyler Hicks

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] selftests: vm: add KSM merging time test

On 2021-08-06 16:10:27, Zhansaya Bagdauletkyzy wrote:
> Add ksm_merge_time() function to determine speed and time needed for
> merging. The total spent time is shown in seconds while speed is
> in MiB/s. User must specify the size of duplicated memory area (in MiB)
> before running the test.
>
> The test is run as follows: ./ksm_tests -P -s 100
> The output:
> Total size: 100 MiB
> Total time: 0.201106786 s
> Average speed: 497.248 MiB/s
>
> Signed-off-by: Zhansaya Bagdauletkyzy <[email protected]>

Thanks for addressing all of Pavel's feedback. This looks good to me.

Reviewed-by: Tyler Hicks <[email protected]>

Tyler

> ---
> v1 -> v2:
> As suggested by Pavel,
> - replace MB with MiB
> - measure speed more accurately
>
> Pavel's review comments:
> https://lore.kernel.org/lkml/CA+CK2bBpzdWMYoJdR2EQNNCrRn+Pg1Gs2oBqLR65JW3UUnWt0w@mail.gmail.com/
>
> tools/testing/selftests/vm/ksm_tests.c | 74 ++++++++++++++++++++++++--
> 1 file changed, 70 insertions(+), 4 deletions(-)
>
> diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c
> index cdeb4a028538..432dfe615e50 100644
> --- a/tools/testing/selftests/vm/ksm_tests.c
> +++ b/tools/testing/selftests/vm/ksm_tests.c
> @@ -7,6 +7,7 @@
> #include <numa.h>
>
> #include "../kselftest.h"
> +#include "../../../../include/vdso/time64.h"
>
> #define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/"
> #define KSM_FP(s) (KSM_SYSFS_PATH s)
> @@ -15,6 +16,7 @@
> #define KSM_PROT_STR_DEFAULT "rw"
> #define KSM_USE_ZERO_PAGES_DEFAULT false
> #define KSM_MERGE_ACROSS_NODES_DEFAULT true
> +#define MB (1ul << 20)
>
> struct ksm_sysfs {
> unsigned long max_page_sharing;
> @@ -30,7 +32,8 @@ enum ksm_test_name {
> CHECK_KSM_MERGE,
> CHECK_KSM_UNMERGE,
> CHECK_KSM_ZERO_PAGE_MERGE,
> - CHECK_KSM_NUMA_MERGE
> + CHECK_KSM_NUMA_MERGE,
> + KSM_MERGE_TIME
> };
>
> static int ksm_write_sysfs(const char *file_path, unsigned long val)
> @@ -86,13 +89,16 @@ static int str_to_prot(char *prot_str)
> static void print_help(void)
> {
> printf("usage: ksm_tests [-h] <test type> [-a prot] [-p page_count] [-l timeout]\n"
> - "[-z use_zero_pages] [-m merge_across_nodes]\n");
> + "[-z use_zero_pages] [-m merge_across_nodes] [-s size]\n");
>
> printf("Supported <test type>:\n"
> " -M (page merging)\n"
> " -Z (zero pages merging)\n"
> " -N (merging of pages in different NUMA nodes)\n"
> - " -U (page unmerging)\n\n");
> + " -U (page unmerging)\n"
> + " -P evaluate merging time and speed.\n"
> + " For this test, the size of duplicated memory area (in MiB)\n"
> + " must be provided using -s option\n\n");
>
> printf(" -a: specify the access protections of pages.\n"
> " <prot> must be of the form [rwx].\n"
> @@ -105,6 +111,7 @@ static void print_help(void)
> " Default: %d\n", KSM_USE_ZERO_PAGES_DEFAULT);
> printf(" -m: change merge_across_nodes tunable\n"
> " Default: %d\n", KSM_MERGE_ACROSS_NODES_DEFAULT);
> + printf(" -s: the size of duplicated memory area (in MiB)\n");
>
> exit(0);
> }
> @@ -407,6 +414,47 @@ static int check_ksm_numa_merge(int mapping, int prot, int timeout, bool merge_a
> return KSFT_FAIL;
> }
>
> +static int ksm_merge_time(int mapping, int prot, int timeout, size_t map_size)
> +{
> + void *map_ptr;
> + struct timespec start_time, end_time;
> + unsigned long scan_time_ns;
> +
> + map_size *= MB;
> +
> + map_ptr = allocate_memory(NULL, prot, mapping, '*', map_size);
> + if (!map_ptr)
> + return KSFT_FAIL;
> +
> + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
> + perror("clock_gettime");
> + goto err_out;
> + }
> + if (ksm_merge_pages(map_ptr, map_size, start_time, timeout))
> + goto err_out;
> + if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
> + perror("clock_gettime");
> + goto err_out;
> + }
> +
> + scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
> + (end_time.tv_nsec - start_time.tv_nsec);
> +
> + printf("Total size: %lu MiB\n", map_size / MB);
> + printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
> + scan_time_ns % NSEC_PER_SEC);
> + printf("Average speed: %.3f MiB/s\n", (map_size / MB) /
> + ((double)scan_time_ns / NSEC_PER_SEC));
> +
> + munmap(map_ptr, map_size);
> + return KSFT_PASS;
> +
> +err_out:
> + printf("Not OK\n");
> + munmap(map_ptr, map_size);
> + return KSFT_FAIL;
> +}
> +
> int main(int argc, char *argv[])
> {
> int ret, opt;
> @@ -418,8 +466,9 @@ int main(int argc, char *argv[])
> int test_name = CHECK_KSM_MERGE;
> bool use_zero_pages = KSM_USE_ZERO_PAGES_DEFAULT;
> bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT;
> + long size_MB = 0;
>
> - while ((opt = getopt(argc, argv, "ha:p:l:z:m:MUZN")) != -1) {
> + while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNP")) != -1) {
> switch (opt) {
> case 'a':
> prot = str_to_prot(optarg);
> @@ -453,6 +502,12 @@ int main(int argc, char *argv[])
> else
> merge_across_nodes = 1;
> break;
> + case 's':
> + size_MB = atol(optarg);
> + if (size_MB <= 0) {
> + printf("Size must be greater than 0\n");
> + return KSFT_FAIL;
> + }
> case 'M':
> break;
> case 'U':
> @@ -464,6 +519,9 @@ int main(int argc, char *argv[])
> case 'N':
> test_name = CHECK_KSM_NUMA_MERGE;
> break;
> + case 'P':
> + test_name = KSM_MERGE_TIME;
> + break;
> default:
> return KSFT_FAIL;
> }
> @@ -505,6 +563,14 @@ int main(int argc, char *argv[])
> ret = check_ksm_numa_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
> merge_across_nodes, page_size);
> break;
> + case KSM_MERGE_TIME:
> + if (size_MB == 0) {
> + printf("Option '-s' is required.\n");
> + return KSFT_FAIL;
> + }
> + ret = ksm_merge_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
> + size_MB);
> + break;
> }
>
> if (ksm_restore(&ksm_sysfs_old)) {
> --
> 2.25.1
>

2021-08-16 15:51:53

by Tyler Hicks

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] selftests: vm: add COW time test for KSM pages

On 2021-08-06 16:10:29, Zhansaya Bagdauletkyzy wrote:
> Since merged pages are copied every time they need to be modified,
> the write access time is different between shared and non-shared pages.
> Add ksm_cow_time() function which evaluates latency of these COW
> breaks. First, 4000 pages are allocated and the time, required to modify
> 1 byte in every other page, is measured. After this, the pages are
> merged into 2000 pairs and in each pair, 1 page is modified (i.e. they
> are decoupled) to detect COW breaks. The time needed to break COW of
> merged pages is then compared with performance of non-shared pages.
>
> The test is run as follows: ./ksm_tests -C
> The output:
> Total size: 15 MiB
>
> Not merged pages:
> Total time: 0.001903112 s
> Average speed: 3678.186 MiB/s
>
> Merged pages:
> Total time: 0.006215680 s
> Average speed: 1126.184 MiB/s
>
> Signed-off-by: Zhansaya Bagdauletkyzy <[email protected]>
> ---
> v1 -> v2:
> As suggested by Pavel,
> - add baseline figures with non-shared pages
> - instead of having all pages merged together, create pairs of
> duplicated pages
>
> Pavel's review comments:
> https://lore.kernel.org/lkml/CA+CK2bDYZBBaU3pC369o01tCgydaJ6y91GZ0_MWONMMCajZOUw@mail.gmail.com/
>
> tools/testing/selftests/vm/ksm_tests.c | 84 +++++++++++++++++++++++++-
> 1 file changed, 81 insertions(+), 3 deletions(-)
>
> diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c
> index 432dfe615e50..382ee6ccd13a 100644
> --- a/tools/testing/selftests/vm/ksm_tests.c
> +++ b/tools/testing/selftests/vm/ksm_tests.c
> @@ -33,7 +33,8 @@ enum ksm_test_name {
> CHECK_KSM_UNMERGE,
> CHECK_KSM_ZERO_PAGE_MERGE,
> CHECK_KSM_NUMA_MERGE,
> - KSM_MERGE_TIME
> + KSM_MERGE_TIME,
> + KSM_COW_TIME
> };
>
> static int ksm_write_sysfs(const char *file_path, unsigned long val)
> @@ -98,7 +99,8 @@ static void print_help(void)
> " -U (page unmerging)\n"
> " -P evaluate merging time and speed.\n"
> " For this test, the size of duplicated memory area (in MiB)\n"
> - " must be provided using -s option\n\n");
> + " must be provided using -s option\n"
> + " -C evaluate the time required to break COW of merged pages.\n\n");
>
> printf(" -a: specify the access protections of pages.\n"
> " <prot> must be of the form [rwx].\n"
> @@ -455,6 +457,75 @@ static int ksm_merge_time(int mapping, int prot, int timeout, size_t map_size)
> return KSFT_FAIL;
> }
>
> +static int ksm_cow_time(int mapping, int prot, int timeout, size_t page_size)
> +{
> + void *map_ptr;
> + struct timespec start_time, end_time;
> + unsigned long cow_time_ns;
> + int page_count = 4000;

size_t is more correct.

> +
> + map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
> + if (!map_ptr)
> + return KSFT_FAIL;
> +
> + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
> + perror("clock_gettime");
> + return KSFT_FAIL;
> + }
> + for (size_t i = 0; i < page_count; i = i + 2)
^
I think it is best to use the correct test here even though you're
stepping by 2 and won't have any problems in practice:

i < (page_count - 1)

This should be changed in all for loops in this function.

> + memset(map_ptr + page_size * i, '-', 1);
> + if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
> + perror("clock_gettime");
> + return KSFT_FAIL;
> + }
> +
> + cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
> + (end_time.tv_nsec - start_time.tv_nsec);
> +
> + printf("Total size: %lu MiB\n\n", (page_size * page_count) / MB);
> + printf("Not merged pages:\n");
> + printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
> + cow_time_ns % NSEC_PER_SEC);
> + printf("Average speed: %.3f MiB/s\n\n", ((page_size * (page_count / 2)) / MB) /
> + ((double)cow_time_ns / NSEC_PER_SEC));
> +
> + /* Create 2000 pairs of duplicated pages */
> + for (size_t i = 0; i < page_count; i = i + 2) {
> + memset(map_ptr + page_size * i, '+', i + 1);
> + memset(map_ptr + page_size * (i + 1), '+', i + 1);
> + }

Since i is being used as the third argument to memset(), this loop only
works if page_count is less than page_size. I could see someone quickly
bumping page_count higher and then unknowingly getting bad test results.

Lets gain a little headroom in page_count. We are creating page_count/2
pairs of duplicate pages so we can use these memset() calls:

memset(map_ptr + page_size * i, '+', i / 2 + 1);
memset(map_ptr + page_size * (i + 1), '+', i / 2 + 1);

Then add a short comment to the page_count declaration mentioning that
page_count must be < (page_size / 2).


Everything else looks good to me.

Tyler

> + if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
> + goto err_out;
> +
> + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
> + perror("clock_gettime");
> + goto err_out;
> + }
> + for (size_t i = 0; i < page_count; i = i + 2)
> + memset(map_ptr + page_size * i, '-', 1);
> + if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
> + perror("clock_gettime");
> + goto err_out;
> + }
> +
> + cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
> + (end_time.tv_nsec - start_time.tv_nsec);
> +
> + printf("Merged pages:\n");
> + printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
> + cow_time_ns % NSEC_PER_SEC);
> + printf("Average speed: %.3f MiB/s\n", ((page_size * (page_count / 2)) / MB) /
> + ((double)cow_time_ns / NSEC_PER_SEC));
> +
> + munmap(map_ptr, page_size * page_count);
> + return KSFT_PASS;
> +
> +err_out:
> + printf("Not OK\n");
> + munmap(map_ptr, page_size * page_count);
> + return KSFT_FAIL;
> +}
> +
> int main(int argc, char *argv[])
> {
> int ret, opt;
> @@ -468,7 +539,7 @@ int main(int argc, char *argv[])
> bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT;
> long size_MB = 0;
>
> - while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNP")) != -1) {
> + while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPC")) != -1) {
> switch (opt) {
> case 'a':
> prot = str_to_prot(optarg);
> @@ -522,6 +593,9 @@ int main(int argc, char *argv[])
> case 'P':
> test_name = KSM_MERGE_TIME;
> break;
> + case 'C':
> + test_name = KSM_COW_TIME;
> + break;
> default:
> return KSFT_FAIL;
> }
> @@ -571,6 +645,10 @@ int main(int argc, char *argv[])
> ret = ksm_merge_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
> size_MB);
> break;
> + case KSM_COW_TIME:
> + ret = ksm_cow_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
> + page_size);
> + break;
> }
>
> if (ksm_restore(&ksm_sysfs_old)) {
> --
> 2.25.1
>