If the result of MBM&CMT tests is failed when Intel
Sub-NUMA is enabled, print a possible causes of failure.
Since when the Intel Sub-NUMA Clustering(SNC) feature is enabled,
the CMT and MBM counters may not be accurate.
Signed-off-by: Shaopeng Tan <[email protected]>
---
Hello,
According to the Intel RDT reference Manual,
when the sub-numa clustering feature is enabled,
the CMT and MBM counters may not be accurate.
When running CMT tests and MBM tests on 2nd Generation
Intel Xeon Scalable Processor, the result may be "not ok".
If result of MBM&CMT tests is failed when Intel Sub-NUMA is enabled,
fix it to print a possible cause of failure,
instead of SKIP these tests in v1.
Thanks,
tools/testing/selftests/resctrl/Makefile | 1 +
tools/testing/selftests/resctrl/cmt_test.c | 5 ++-
tools/testing/selftests/resctrl/mbm_test.c | 5 ++-
tools/testing/selftests/resctrl/resctrl.h | 2 ++
.../testing/selftests/resctrl/resctrl_tests.c | 36 +++++++++++++++++++
tools/testing/selftests/resctrl/resctrlfs.c | 26 ++++++++++++++
6 files changed, 73 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
index adfd92145e12..6d257f40e6ac 100644
--- a/tools/testing/selftests/resctrl/Makefile
+++ b/tools/testing/selftests/resctrl/Makefile
@@ -1,6 +1,7 @@
#SPDX-License-Identifier: GPL-2.0
CFLAGS += -g -Wall -O2 -D_FORTIFY_SOURCE=2
+LDLIBS += -lnuma
TEST_GEN_PROGS := resctrl_tests
EXTRA_SOURCES := $(wildcard *.c)
diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
index 8968e36db99d..c5a49444c5a0 100644
--- a/tools/testing/selftests/resctrl/cmt_test.c
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -136,8 +136,11 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
return ret;
ret = check_results(¶m, n);
- if (ret)
+ if (ret) {
+ if (sub_numa_cluster_enable)
+ ksft_print_msg("Sub-NUMA Clustering(SNC) feature is enabled, the CMT counters may not be accurate.\n");
return ret;
+ }
cmt_test_cleanup();
diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
index 8392e5c55ed0..7dc1bdf2d0b8 100644
--- a/tools/testing/selftests/resctrl/mbm_test.c
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -136,8 +136,11 @@ int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd)
return ret;
ret = check_results(span);
- if (ret)
+ if (ret) {
+ if (sub_numa_cluster_enable)
+ ksft_print_msg("Sub-NUMA Clustering(SNC) feature is enabled, the MBM counters may not be accurate.\n");
return ret;
+ }
mbm_test_cleanup();
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 1ad10c47e31d..4b8ad4fbd016 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -76,6 +76,7 @@ extern pid_t bm_pid, ppid;
extern char llc_occup_path[1024];
extern bool is_amd;
+extern bool sub_numa_cluster_enable;
bool check_resctrlfs_support(void);
int filter_dmesg(void);
@@ -85,6 +86,7 @@ int umount_resctrlfs(void);
int validate_bw_report_request(char *bw_report);
bool validate_resctrl_feature_request(const char *resctrl_val);
char *fgrep(FILE *inf, const char *str);
+char *fgrep_last_match_line(FILE *inf, const char *str);
int taskset_benchmark(pid_t bm_pid, int cpu_no);
void run_benchmark(int signum, siginfo_t *info, void *ucontext);
int write_schemata(char *ctrlgrp, char *schemata, int cpu_no,
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index 3be0895c492b..bbab4a7f37ed 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -8,12 +8,15 @@
* Sai Praneeth Prakhya <[email protected]>,
* Fenghua Yu <[email protected]>
*/
+#include <numa.h>
+#include <string.h>
#include "resctrl.h"
#define BENCHMARK_ARGS 64
#define BENCHMARK_ARG_SIZE 64
bool is_amd;
+bool sub_numa_cluster_enable;
void detect_amd(void)
{
@@ -34,6 +37,35 @@ void detect_amd(void)
fclose(inf);
}
+void check_sub_numa_cluster(void)
+{
+ FILE *inf = fopen("/proc/cpuinfo", "r");
+ char *res, *s;
+ int socket_num = 0;
+ int numa_nodes = 0;
+
+ if (!inf)
+ return;
+
+ res = fgrep_last_match_line(inf, "physical id");
+
+ if (res) {
+ s = strpbrk(res, "1234567890");
+ socket_num = atoi(s) + 1;
+ free(res);
+ }
+ fclose(inf);
+
+ numa_nodes = numa_max_node() + 1;
+
+ /*
+ * when the Sub-NUMA Clustering(SNC) feature is enabled,
+ * the number of numa nodes is twice the number of sockets.
+ */
+ if (numa_nodes == (2 * socket_num))
+ sub_numa_cluster_enable = true;
+}
+
static void cmd_help(void)
{
printf("usage: resctrl_tests [-h] [-b \"benchmark_cmd [options]\"] [-t test list] [-n no_of_bits]\n");
@@ -210,6 +242,10 @@ int main(int argc, char **argv)
/* Detect AMD vendor */
detect_amd();
+ /* check whether sub numa clustering is enable or not */
+ if (!is_amd)
+ check_sub_numa_cluster();
+
if (has_ben) {
/* Extract benchmark command from command line. */
for (i = ben_ind; i < argc; i++) {
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index 5f5a166ade60..1908ecb14b70 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -605,6 +605,32 @@ char *fgrep(FILE *inf, const char *str)
return NULL;
}
+/*
+ * Find the last matched line.
+ * Return a pointer to the string of the matched line,
+ * else retuen NULL if no matched line
+ */
+char *fgrep_last_match_line(FILE *inf, const char *str)
+{
+ char line[256];
+ char result_line[256];
+ int slen = strlen(str);
+
+ while (!feof(inf)) {
+ if (!fgets(line, 256, inf))
+ break;
+ if (strncmp(line, str, slen))
+ continue;
+
+ strcpy(result_line, line);
+ }
+
+ if (strlen(result_line) >= slen)
+ return strdup(result_line);
+
+ return NULL;
+}
+
/*
* validate_resctrl_feature_request - Check if requested feature is valid.
* @resctrl_val: Requested feature
--
2.27.0
Hi,
Friendly ping for a review.
>
> If the result of MBM&CMT tests is failed when Intel Sub-NUMA is enabled,
> print a possible causes of failure.
> Since when the Intel Sub-NUMA Clustering(SNC) feature is enabled, the CMT
> and MBM counters may not be accurate.
>
> Signed-off-by: Shaopeng Tan <[email protected]>
> ---
> Hello,
>
> According to the Intel RDT reference Manual, when the sub-numa clustering
> feature is enabled, the CMT and MBM counters may not be accurate.
> When running CMT tests and MBM tests on 2nd Generation Intel Xeon
> Scalable Processor, the result may be "not ok".
> If result of MBM&CMT tests is failed when Intel Sub-NUMA is enabled, fix it to
> print a possible cause of failure, instead of SKIP these tests in v1.
>
> Thanks,
>
> tools/testing/selftests/resctrl/Makefile | 1 +
> tools/testing/selftests/resctrl/cmt_test.c | 5 ++-
> tools/testing/selftests/resctrl/mbm_test.c | 5 ++-
> tools/testing/selftests/resctrl/resctrl.h | 2 ++
> .../testing/selftests/resctrl/resctrl_tests.c | 36
> +++++++++++++++++++
> tools/testing/selftests/resctrl/resctrlfs.c | 26 ++++++++++++++
> 6 files changed, 73 insertions(+), 2 deletions(-)
>
> diff --git a/tools/testing/selftests/resctrl/Makefile
> b/tools/testing/selftests/resctrl/Makefile
> index adfd92145e12..6d257f40e6ac 100644
> --- a/tools/testing/selftests/resctrl/Makefile
> +++ b/tools/testing/selftests/resctrl/Makefile
> @@ -1,6 +1,7 @@
> #SPDX-License-Identifier: GPL-2.0
>
> CFLAGS += -g -Wall -O2 -D_FORTIFY_SOURCE=2
> +LDLIBS += -lnuma
>
> TEST_GEN_PROGS := resctrl_tests
> EXTRA_SOURCES := $(wildcard *.c)
> diff --git a/tools/testing/selftests/resctrl/cmt_test.c
> b/tools/testing/selftests/resctrl/cmt_test.c
> index 8968e36db99d..c5a49444c5a0 100644
> --- a/tools/testing/selftests/resctrl/cmt_test.c
> +++ b/tools/testing/selftests/resctrl/cmt_test.c
> @@ -136,8 +136,11 @@ int cmt_resctrl_val(int cpu_no, int n, char
> **benchmark_cmd)
> return ret;
>
> ret = check_results(¶m, n);
> - if (ret)
> + if (ret) {
> + if (sub_numa_cluster_enable)
> + ksft_print_msg("Sub-NUMA Clustering(SNC) feature
> is enabled, the CMT
> +counters may not be accurate.\n");
> return ret;
> + }
>
> cmt_test_cleanup();
>
> diff --git a/tools/testing/selftests/resctrl/mbm_test.c
> b/tools/testing/selftests/resctrl/mbm_test.c
> index 8392e5c55ed0..7dc1bdf2d0b8 100644
> --- a/tools/testing/selftests/resctrl/mbm_test.c
> +++ b/tools/testing/selftests/resctrl/mbm_test.c
> @@ -136,8 +136,11 @@ int mbm_bw_change(int span, int cpu_no, char
> *bw_report, char **benchmark_cmd)
> return ret;
>
> ret = check_results(span);
> - if (ret)
> + if (ret) {
> + if (sub_numa_cluster_enable)
> + ksft_print_msg("Sub-NUMA Clustering(SNC) feature
> is enabled, the MBM
> +counters may not be accurate.\n");
> return ret;
> + }
>
> mbm_test_cleanup();
>
> diff --git a/tools/testing/selftests/resctrl/resctrl.h
> b/tools/testing/selftests/resctrl/resctrl.h
> index 1ad10c47e31d..4b8ad4fbd016 100644
> --- a/tools/testing/selftests/resctrl/resctrl.h
> +++ b/tools/testing/selftests/resctrl/resctrl.h
> @@ -76,6 +76,7 @@ extern pid_t bm_pid, ppid;
>
> extern char llc_occup_path[1024];
> extern bool is_amd;
> +extern bool sub_numa_cluster_enable;
>
> bool check_resctrlfs_support(void);
> int filter_dmesg(void);
> @@ -85,6 +86,7 @@ int umount_resctrlfs(void); int
> validate_bw_report_request(char *bw_report); bool
> validate_resctrl_feature_request(const char *resctrl_val); char *fgrep(FILE
> *inf, const char *str);
> +char *fgrep_last_match_line(FILE *inf, const char *str);
> int taskset_benchmark(pid_t bm_pid, int cpu_no); void run_benchmark(int
> signum, siginfo_t *info, void *ucontext); int write_schemata(char *ctrlgrp,
> char *schemata, int cpu_no, diff --git
> a/tools/testing/selftests/resctrl/resctrl_tests.c
> b/tools/testing/selftests/resctrl/resctrl_tests.c
> index 3be0895c492b..bbab4a7f37ed 100644
> --- a/tools/testing/selftests/resctrl/resctrl_tests.c
> +++ b/tools/testing/selftests/resctrl/resctrl_tests.c
> @@ -8,12 +8,15 @@
> * Sai Praneeth Prakhya <[email protected]>,
> * Fenghua Yu <[email protected]>
> */
> +#include <numa.h>
> +#include <string.h>
> #include "resctrl.h"
>
> #define BENCHMARK_ARGS 64
> #define BENCHMARK_ARG_SIZE 64
>
> bool is_amd;
> +bool sub_numa_cluster_enable;
>
> void detect_amd(void)
> {
> @@ -34,6 +37,35 @@ void detect_amd(void)
> fclose(inf);
> }
>
> +void check_sub_numa_cluster(void)
> +{
> + FILE *inf = fopen("/proc/cpuinfo", "r");
> + char *res, *s;
> + int socket_num = 0;
> + int numa_nodes = 0;
> +
> + if (!inf)
> + return;
> +
> + res = fgrep_last_match_line(inf, "physical id");
> +
> + if (res) {
> + s = strpbrk(res, "1234567890");
> + socket_num = atoi(s) + 1;
> + free(res);
> + }
> + fclose(inf);
> +
> + numa_nodes = numa_max_node() + 1;
> +
> + /*
> + * when the Sub-NUMA Clustering(SNC) feature is enabled,
> + * the number of numa nodes is twice the number of sockets.
> + */
> + if (numa_nodes == (2 * socket_num))
> + sub_numa_cluster_enable = true;
> +}
> +
> static void cmd_help(void)
> {
> printf("usage: resctrl_tests [-h] [-b \"benchmark_cmd [options]\"]
> [-t test list] [-n no_of_bits]\n"); @@ -210,6 +242,10 @@ int main(int argc, char
> **argv)
> /* Detect AMD vendor */
> detect_amd();
>
> + /* check whether sub numa clustering is enable or not */
> + if (!is_amd)
> + check_sub_numa_cluster();
> +
> if (has_ben) {
> /* Extract benchmark command from command line. */
> for (i = ben_ind; i < argc; i++) {
> diff --git a/tools/testing/selftests/resctrl/resctrlfs.c
> b/tools/testing/selftests/resctrl/resctrlfs.c
> index 5f5a166ade60..1908ecb14b70 100644
> --- a/tools/testing/selftests/resctrl/resctrlfs.c
> +++ b/tools/testing/selftests/resctrl/resctrlfs.c
> @@ -605,6 +605,32 @@ char *fgrep(FILE *inf, const char *str)
> return NULL;
> }
>
> +/*
> + * Find the last matched line.
> + * Return a pointer to the string of the matched line,
> + * else retuen NULL if no matched line
> + */
> +char *fgrep_last_match_line(FILE *inf, const char *str) {
> + char line[256];
> + char result_line[256];
> + int slen = strlen(str);
> +
> + while (!feof(inf)) {
> + if (!fgets(line, 256, inf))
> + break;
> + if (strncmp(line, str, slen))
> + continue;
> +
> + strcpy(result_line, line);
> + }
> +
> + if (strlen(result_line) >= slen)
> + return strdup(result_line);
> +
> + return NULL;
> +}
> +
> /*
> * validate_resctrl_feature_request - Check if requested feature is valid.
> * @resctrl_val: Requested feature
> --
> 2.27.0
Hi Shaopeng Tan,
On 12/13/2021 2:03 AM, Shaopeng Tan wrote:
> If the result of MBM&CMT tests is failed when Intel
> Sub-NUMA is enabled, print a possible causes of failure.
> Since when the Intel Sub-NUMA Clustering(SNC) feature is enabled,
> the CMT and MBM counters may not be accurate.
>
> Signed-off-by: Shaopeng Tan <[email protected]>
> ---
> Hello,
>
> According to the Intel RDT reference Manual,
> when the sub-numa clustering feature is enabled,
> the CMT and MBM counters may not be accurate.
> When running CMT tests and MBM tests on 2nd Generation
> Intel Xeon Scalable Processor, the result may be "not ok".
> If result of MBM&CMT tests is failed when Intel Sub-NUMA is enabled,
> fix it to print a possible cause of failure,
> instead of SKIP these tests in v1.
>
> Thanks,
>
> tools/testing/selftests/resctrl/Makefile | 1 +
> tools/testing/selftests/resctrl/cmt_test.c | 5 ++-
> tools/testing/selftests/resctrl/mbm_test.c | 5 ++-
> tools/testing/selftests/resctrl/resctrl.h | 2 ++
> .../testing/selftests/resctrl/resctrl_tests.c | 36 +++++++++++++++++++
> tools/testing/selftests/resctrl/resctrlfs.c | 26 ++++++++++++++
> 6 files changed, 73 insertions(+), 2 deletions(-)
>
> diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
> index adfd92145e12..6d257f40e6ac 100644
> --- a/tools/testing/selftests/resctrl/Makefile
> +++ b/tools/testing/selftests/resctrl/Makefile
> @@ -1,6 +1,7 @@
> #SPDX-License-Identifier: GPL-2.0
>
> CFLAGS += -g -Wall -O2 -D_FORTIFY_SOURCE=2
> +LDLIBS += -lnuma
>
> TEST_GEN_PROGS := resctrl_tests
> EXTRA_SOURCES := $(wildcard *.c)
> diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
> index 8968e36db99d..c5a49444c5a0 100644
> --- a/tools/testing/selftests/resctrl/cmt_test.c
> +++ b/tools/testing/selftests/resctrl/cmt_test.c
> @@ -136,8 +136,11 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
> return ret;
>
> ret = check_results(¶m, n);
> - if (ret)
> + if (ret) {
> + if (sub_numa_cluster_enable)
> + ksft_print_msg("Sub-NUMA Clustering(SNC) feature is enabled, the CMT counters may not be accurate.\n");
> return ret;
> + }
>
> cmt_test_cleanup();
>
> diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
> index 8392e5c55ed0..7dc1bdf2d0b8 100644
> --- a/tools/testing/selftests/resctrl/mbm_test.c
> +++ b/tools/testing/selftests/resctrl/mbm_test.c
> @@ -136,8 +136,11 @@ int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd)
> return ret;
>
> ret = check_results(span);
> - if (ret)
> + if (ret) {
> + if (sub_numa_cluster_enable)
> + ksft_print_msg("Sub-NUMA Clustering(SNC) feature is enabled, the MBM counters may not be accurate.\n");
> return ret;
> + }
>
> mbm_test_cleanup();
>
> diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
> index 1ad10c47e31d..4b8ad4fbd016 100644
> --- a/tools/testing/selftests/resctrl/resctrl.h
> +++ b/tools/testing/selftests/resctrl/resctrl.h
> @@ -76,6 +76,7 @@ extern pid_t bm_pid, ppid;
>
> extern char llc_occup_path[1024];
> extern bool is_amd;
> +extern bool sub_numa_cluster_enable;
>
> bool check_resctrlfs_support(void);
> int filter_dmesg(void);
> @@ -85,6 +86,7 @@ int umount_resctrlfs(void);
> int validate_bw_report_request(char *bw_report);
> bool validate_resctrl_feature_request(const char *resctrl_val);
> char *fgrep(FILE *inf, const char *str);
> +char *fgrep_last_match_line(FILE *inf, const char *str);
> int taskset_benchmark(pid_t bm_pid, int cpu_no);
> void run_benchmark(int signum, siginfo_t *info, void *ucontext);
> int write_schemata(char *ctrlgrp, char *schemata, int cpu_no,
> diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
> index 3be0895c492b..bbab4a7f37ed 100644
> --- a/tools/testing/selftests/resctrl/resctrl_tests.c
> +++ b/tools/testing/selftests/resctrl/resctrl_tests.c
> @@ -8,12 +8,15 @@
> * Sai Praneeth Prakhya <[email protected]>,
> * Fenghua Yu <[email protected]>
> */
> +#include <numa.h>
> +#include <string.h>
> #include "resctrl.h"
>
> #define BENCHMARK_ARGS 64
> #define BENCHMARK_ARG_SIZE 64
>
> bool is_amd;
> +bool sub_numa_cluster_enable;
>
> void detect_amd(void)
> {
> @@ -34,6 +37,35 @@ void detect_amd(void)
> fclose(inf);
> }
>
> +void check_sub_numa_cluster(void)
> +{
> + FILE *inf = fopen("/proc/cpuinfo", "r");
> + char *res, *s;
> + int socket_num = 0;
> + int numa_nodes = 0;
> +
> + if (!inf)
> + return;
> +
> + res = fgrep_last_match_line(inf, "physical id");
> +
> + if (res) {
> + s = strpbrk(res, "1234567890");
> + socket_num = atoi(s) + 1;
> + free(res);
> + }
> + fclose(inf);
> +
> + numa_nodes = numa_max_node() + 1;
> +
> + /*
> + * when the Sub-NUMA Clustering(SNC) feature is enabled,
> + * the number of numa nodes is twice the number of sockets.
> + */
> + if (numa_nodes == (2 * socket_num))
> + sub_numa_cluster_enable = true;
> +}
Unfortunately there does not seem to be an architectural way to detect if
SNC has been enabled and the above test is fragile wrt the assumptions
about the topology of the system. What we need is a reliable and
future-proof test but I do not know what that should be.
Reinette
Hi Reinette,
> On 12/13/2021 2:03 AM, Shaopeng Tan wrote:
> > If the result of MBM&CMT tests is failed when Intel Sub-NUMA is
> > enabled, print a possible causes of failure.
> > Since when the Intel Sub-NUMA Clustering(SNC) feature is enabled, the
> > CMT and MBM counters may not be accurate.
> >
> > Signed-off-by: Shaopeng Tan <[email protected]>
> > ---
> > Hello,
> >
> > According to the Intel RDT reference Manual, when the sub-numa
> > clustering feature is enabled, the CMT and MBM counters may not be
> > accurate.
> > When running CMT tests and MBM tests on 2nd Generation Intel Xeon
> > Scalable Processor, the result may be "not ok".
> > If result of MBM&CMT tests is failed when Intel Sub-NUMA is enabled,
> > fix it to print a possible cause of failure, instead of SKIP these
> > tests in v1.
> >
> > Thanks,
> >
> > tools/testing/selftests/resctrl/Makefile | 1 +
> > tools/testing/selftests/resctrl/cmt_test.c | 5 ++-
> > tools/testing/selftests/resctrl/mbm_test.c | 5 ++-
> > tools/testing/selftests/resctrl/resctrl.h | 2 ++
> > .../testing/selftests/resctrl/resctrl_tests.c | 36
> +++++++++++++++++++
> > tools/testing/selftests/resctrl/resctrlfs.c | 26 ++++++++++++++
> > 6 files changed, 73 insertions(+), 2 deletions(-)
> >
> > diff --git a/tools/testing/selftests/resctrl/Makefile
> > b/tools/testing/selftests/resctrl/Makefile
> > index adfd92145e12..6d257f40e6ac 100644
> > --- a/tools/testing/selftests/resctrl/Makefile
> > +++ b/tools/testing/selftests/resctrl/Makefile
> > @@ -1,6 +1,7 @@
> > #SPDX-License-Identifier: GPL-2.0
> >
> > CFLAGS += -g -Wall -O2 -D_FORTIFY_SOURCE=2
> > +LDLIBS += -lnuma
> >
> > TEST_GEN_PROGS := resctrl_tests
> > EXTRA_SOURCES := $(wildcard *.c)
> > diff --git a/tools/testing/selftests/resctrl/cmt_test.c
> > b/tools/testing/selftests/resctrl/cmt_test.c
> > index 8968e36db99d..c5a49444c5a0 100644
> > --- a/tools/testing/selftests/resctrl/cmt_test.c
> > +++ b/tools/testing/selftests/resctrl/cmt_test.c
> > @@ -136,8 +136,11 @@ int cmt_resctrl_val(int cpu_no, int n, char
> **benchmark_cmd)
> > return ret;
> >
> > ret = check_results(¶m, n);
> > - if (ret)
> > + if (ret) {
> > + if (sub_numa_cluster_enable)
> > + ksft_print_msg("Sub-NUMA Clustering(SNC) feature
> is enabled, the
> > +CMT counters may not be accurate.\n");
> > return ret;
> > + }
> >
> > cmt_test_cleanup();
> >
> > diff --git a/tools/testing/selftests/resctrl/mbm_test.c
> > b/tools/testing/selftests/resctrl/mbm_test.c
> > index 8392e5c55ed0..7dc1bdf2d0b8 100644
> > --- a/tools/testing/selftests/resctrl/mbm_test.c
> > +++ b/tools/testing/selftests/resctrl/mbm_test.c
> > @@ -136,8 +136,11 @@ int mbm_bw_change(int span, int cpu_no, char
> *bw_report, char **benchmark_cmd)
> > return ret;
> >
> > ret = check_results(span);
> > - if (ret)
> > + if (ret) {
> > + if (sub_numa_cluster_enable)
> > + ksft_print_msg("Sub-NUMA Clustering(SNC) feature
> is enabled, the
> > +MBM counters may not be accurate.\n");
> > return ret;
> > + }
> >
> > mbm_test_cleanup();
> >
> > diff --git a/tools/testing/selftests/resctrl/resctrl.h
> > b/tools/testing/selftests/resctrl/resctrl.h
> > index 1ad10c47e31d..4b8ad4fbd016 100644
> > --- a/tools/testing/selftests/resctrl/resctrl.h
> > +++ b/tools/testing/selftests/resctrl/resctrl.h
> > @@ -76,6 +76,7 @@ extern pid_t bm_pid, ppid;
> >
> > extern char llc_occup_path[1024];
> > extern bool is_amd;
> > +extern bool sub_numa_cluster_enable;
> >
> > bool check_resctrlfs_support(void);
> > int filter_dmesg(void);
> > @@ -85,6 +86,7 @@ int umount_resctrlfs(void); int
> > validate_bw_report_request(char *bw_report); bool
> > validate_resctrl_feature_request(const char *resctrl_val); char
> > *fgrep(FILE *inf, const char *str);
> > +char *fgrep_last_match_line(FILE *inf, const char *str);
> > int taskset_benchmark(pid_t bm_pid, int cpu_no); void
> > run_benchmark(int signum, siginfo_t *info, void *ucontext); int
> > write_schemata(char *ctrlgrp, char *schemata, int cpu_no, diff --git
> > a/tools/testing/selftests/resctrl/resctrl_tests.c
> > b/tools/testing/selftests/resctrl/resctrl_tests.c
> > index 3be0895c492b..bbab4a7f37ed 100644
> > --- a/tools/testing/selftests/resctrl/resctrl_tests.c
> > +++ b/tools/testing/selftests/resctrl/resctrl_tests.c
> > @@ -8,12 +8,15 @@
> > * Sai Praneeth Prakhya <[email protected]>,
> > * Fenghua Yu <[email protected]>
> > */
> > +#include <numa.h>
> > +#include <string.h>
> > #include "resctrl.h"
> >
> > #define BENCHMARK_ARGS 64
> > #define BENCHMARK_ARG_SIZE 64
> >
> > bool is_amd;
> > +bool sub_numa_cluster_enable;
> >
> > void detect_amd(void)
> > {
> > @@ -34,6 +37,35 @@ void detect_amd(void)
> > fclose(inf);
> > }
> >
> > +void check_sub_numa_cluster(void)
> > +{
> > + FILE *inf = fopen("/proc/cpuinfo", "r");
> > + char *res, *s;
> > + int socket_num = 0;
> > + int numa_nodes = 0;
> > +
> > + if (!inf)
> > + return;
> > +
> > + res = fgrep_last_match_line(inf, "physical id");
> > +
> > + if (res) {
> > + s = strpbrk(res, "1234567890");
> > + socket_num = atoi(s) + 1;
> > + free(res);
> > + }
> > + fclose(inf);
> > +
> > + numa_nodes = numa_max_node() + 1;
> > +
> > + /*
> > + * when the Sub-NUMA Clustering(SNC) feature is enabled,
> > + * the number of numa nodes is twice the number of sockets.
> > + */
> > + if (numa_nodes == (2 * socket_num))
> > + sub_numa_cluster_enable = true;
> > +}
>
>
> Unfortunately there does not seem to be an architectural way to detect if SNC
> has been enabled and the above test is fragile wrt the assumptions about the
> topology of the system. What we need is a reliable and future-proof test but I do
> not know what that should be.
I understand your concerns.
At least I know SNC affects on the 2nd Generation Intel Xeon.
So, how about just printing information when the test is running on 2nd Generation Intel Xeon?
That is, when the result of MBM&CMT test is "not ok"
and if running cpu model is 2nd Generation Intel Xeon,
then print information about the possibility of failure (SNC may be enabled).
How about this idea?
Or, do you think we should drop this patch at all?
Best regards,
Tan Shaopeng
Hi Tan Shaopeng,
On 1/21/2022 12:00 AM, [email protected] wrote:
> Hi Reinette,
>
>> On 12/13/2021 2:03 AM, Shaopeng Tan wrote:
>>> If the result of MBM&CMT tests is failed when Intel Sub-NUMA is
>>> enabled, print a possible causes of failure.
>>> Since when the Intel Sub-NUMA Clustering(SNC) feature is enabled, the
>>> CMT and MBM counters may not be accurate.
>>>
>>> Signed-off-by: Shaopeng Tan <[email protected]>
>>> ---
>>> Hello,
>>>
>>> According to the Intel RDT reference Manual, when the sub-numa
>>> clustering feature is enabled, the CMT and MBM counters may not be
>>> accurate.
>>> When running CMT tests and MBM tests on 2nd Generation Intel Xeon
>>> Scalable Processor, the result may be "not ok".
>>> If result of MBM&CMT tests is failed when Intel Sub-NUMA is enabled,
>>> fix it to print a possible cause of failure, instead of SKIP these
>>> tests in v1.
>>>
>>> Thanks,
>>>
>>> tools/testing/selftests/resctrl/Makefile | 1 +
>>> tools/testing/selftests/resctrl/cmt_test.c | 5 ++-
>>> tools/testing/selftests/resctrl/mbm_test.c | 5 ++-
>>> tools/testing/selftests/resctrl/resctrl.h | 2 ++
>>> .../testing/selftests/resctrl/resctrl_tests.c | 36
>> +++++++++++++++++++
>>> tools/testing/selftests/resctrl/resctrlfs.c | 26 ++++++++++++++
>>> 6 files changed, 73 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/tools/testing/selftests/resctrl/Makefile
>>> b/tools/testing/selftests/resctrl/Makefile
>>> index adfd92145e12..6d257f40e6ac 100644
>>> --- a/tools/testing/selftests/resctrl/Makefile
>>> +++ b/tools/testing/selftests/resctrl/Makefile
>>> @@ -1,6 +1,7 @@
>>> #SPDX-License-Identifier: GPL-2.0
>>>
>>> CFLAGS += -g -Wall -O2 -D_FORTIFY_SOURCE=2
>>> +LDLIBS += -lnuma
>>>
>>> TEST_GEN_PROGS := resctrl_tests
>>> EXTRA_SOURCES := $(wildcard *.c)
>>> diff --git a/tools/testing/selftests/resctrl/cmt_test.c
>>> b/tools/testing/selftests/resctrl/cmt_test.c
>>> index 8968e36db99d..c5a49444c5a0 100644
>>> --- a/tools/testing/selftests/resctrl/cmt_test.c
>>> +++ b/tools/testing/selftests/resctrl/cmt_test.c
>>> @@ -136,8 +136,11 @@ int cmt_resctrl_val(int cpu_no, int n, char
>> **benchmark_cmd)
>>> return ret;
>>>
>>> ret = check_results(¶m, n);
>>> - if (ret)
>>> + if (ret) {
>>> + if (sub_numa_cluster_enable)
>>> + ksft_print_msg("Sub-NUMA Clustering(SNC) feature
>> is enabled, the
>>> +CMT counters may not be accurate.\n");
>>> return ret;
>>> + }
>>>
>>> cmt_test_cleanup();
>>>
>>> diff --git a/tools/testing/selftests/resctrl/mbm_test.c
>>> b/tools/testing/selftests/resctrl/mbm_test.c
>>> index 8392e5c55ed0..7dc1bdf2d0b8 100644
>>> --- a/tools/testing/selftests/resctrl/mbm_test.c
>>> +++ b/tools/testing/selftests/resctrl/mbm_test.c
>>> @@ -136,8 +136,11 @@ int mbm_bw_change(int span, int cpu_no, char
>> *bw_report, char **benchmark_cmd)
>>> return ret;
>>>
>>> ret = check_results(span);
>>> - if (ret)
>>> + if (ret) {
>>> + if (sub_numa_cluster_enable)
>>> + ksft_print_msg("Sub-NUMA Clustering(SNC) feature
>> is enabled, the
>>> +MBM counters may not be accurate.\n");
>>> return ret;
>>> + }
>>>
>>> mbm_test_cleanup();
>>>
>>> diff --git a/tools/testing/selftests/resctrl/resctrl.h
>>> b/tools/testing/selftests/resctrl/resctrl.h
>>> index 1ad10c47e31d..4b8ad4fbd016 100644
>>> --- a/tools/testing/selftests/resctrl/resctrl.h
>>> +++ b/tools/testing/selftests/resctrl/resctrl.h
>>> @@ -76,6 +76,7 @@ extern pid_t bm_pid, ppid;
>>>
>>> extern char llc_occup_path[1024];
>>> extern bool is_amd;
>>> +extern bool sub_numa_cluster_enable;
>>>
>>> bool check_resctrlfs_support(void);
>>> int filter_dmesg(void);
>>> @@ -85,6 +86,7 @@ int umount_resctrlfs(void); int
>>> validate_bw_report_request(char *bw_report); bool
>>> validate_resctrl_feature_request(const char *resctrl_val); char
>>> *fgrep(FILE *inf, const char *str);
>>> +char *fgrep_last_match_line(FILE *inf, const char *str);
>>> int taskset_benchmark(pid_t bm_pid, int cpu_no); void
>>> run_benchmark(int signum, siginfo_t *info, void *ucontext); int
>>> write_schemata(char *ctrlgrp, char *schemata, int cpu_no, diff --git
>>> a/tools/testing/selftests/resctrl/resctrl_tests.c
>>> b/tools/testing/selftests/resctrl/resctrl_tests.c
>>> index 3be0895c492b..bbab4a7f37ed 100644
>>> --- a/tools/testing/selftests/resctrl/resctrl_tests.c
>>> +++ b/tools/testing/selftests/resctrl/resctrl_tests.c
>>> @@ -8,12 +8,15 @@
>>> * Sai Praneeth Prakhya <[email protected]>,
>>> * Fenghua Yu <[email protected]>
>>> */
>>> +#include <numa.h>
>>> +#include <string.h>
>>> #include "resctrl.h"
>>>
>>> #define BENCHMARK_ARGS 64
>>> #define BENCHMARK_ARG_SIZE 64
>>>
>>> bool is_amd;
>>> +bool sub_numa_cluster_enable;
>>>
>>> void detect_amd(void)
>>> {
>>> @@ -34,6 +37,35 @@ void detect_amd(void)
>>> fclose(inf);
>>> }
>>>
>>> +void check_sub_numa_cluster(void)
>>> +{
>>> + FILE *inf = fopen("/proc/cpuinfo", "r");
>>> + char *res, *s;
>>> + int socket_num = 0;
>>> + int numa_nodes = 0;
>>> +
>>> + if (!inf)
>>> + return;
>>> +
>>> + res = fgrep_last_match_line(inf, "physical id");
>>> +
>>> + if (res) {
>>> + s = strpbrk(res, "1234567890");
>>> + socket_num = atoi(s) + 1;
>>> + free(res);
>>> + }
>>> + fclose(inf);
>>> +
>>> + numa_nodes = numa_max_node() + 1;
>>> +
>>> + /*
>>> + * when the Sub-NUMA Clustering(SNC) feature is enabled,
>>> + * the number of numa nodes is twice the number of sockets.
>>> + */
>>> + if (numa_nodes == (2 * socket_num))
>>> + sub_numa_cluster_enable = true;
>>> +}
>>
>>
>> Unfortunately there does not seem to be an architectural way to detect if SNC
>> has been enabled and the above test is fragile wrt the assumptions about the
>> topology of the system. What we need is a reliable and future-proof test but I do
>> not know what that should be.
>
> I understand your concerns.
> At least I know SNC affects on the 2nd Generation Intel Xeon.
> So, how about just printing information when the test is running on 2nd Generation Intel Xeon?
> That is, when the result of MBM&CMT test is "not ok"
> and if running cpu model is 2nd Generation Intel Xeon,
> then print information about the possibility of failure (SNC may be enabled).
> How about this idea?
I think that making this model specific would be hard to get right for all systems
and hard to maintain. Perhaps we could just print a generic message on failure? Something
like: "Intel CMT and MBM counters may be inaccurate when Sub-NUMA Clustering (SNC) is
enabled. Ensure SNC is disabled in the BIOS if this system supports SNC." I'd be the first
to admit that this is not ideal and would appreciate suggestions for improvement. Unfortunately
we seem to lack a reliable and future proof way to detect if SNC is enabled but I do look
forward to being corrected.
Reinette
> Or, do you think we should drop this patch at all?
>
> Best regards,
> Tan Shaopeng
Hi Reinette,
> On 1/21/2022 12:00 AM, [email protected] wrote:
> > Hi Reinette,
> >
> >> On 12/13/2021 2:03 AM, Shaopeng Tan wrote:
> >>> If the result of MBM&CMT tests is failed when Intel Sub-NUMA is
> >>> enabled, print a possible causes of failure.
> >>> Since when the Intel Sub-NUMA Clustering(SNC) feature is enabled,
> >>> the CMT and MBM counters may not be accurate.
> >>>
> >>> Signed-off-by: Shaopeng Tan <[email protected]>
> >>> ---
> >>> Hello,
> >>>
> >>> According to the Intel RDT reference Manual, when the sub-numa
> >>> clustering feature is enabled, the CMT and MBM counters may not be
> >>> accurate.
> >>> When running CMT tests and MBM tests on 2nd Generation Intel Xeon
> >>> Scalable Processor, the result may be "not ok".
> >>> If result of MBM&CMT tests is failed when Intel Sub-NUMA is enabled,
> >>> fix it to print a possible cause of failure, instead of SKIP these
> >>> tests in v1.
> >>>
> >>> Thanks,
> >>>
> >>> tools/testing/selftests/resctrl/Makefile | 1 +
> >>> tools/testing/selftests/resctrl/cmt_test.c | 5 ++-
> >>> tools/testing/selftests/resctrl/mbm_test.c | 5 ++-
> >>> tools/testing/selftests/resctrl/resctrl.h | 2 ++
> >>> .../testing/selftests/resctrl/resctrl_tests.c | 36
> >> +++++++++++++++++++
> >>> tools/testing/selftests/resctrl/resctrlfs.c | 26 ++++++++++++++
> >>> 6 files changed, 73 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/tools/testing/selftests/resctrl/Makefile
> >>> b/tools/testing/selftests/resctrl/Makefile
> >>> index adfd92145e12..6d257f40e6ac 100644
> >>> --- a/tools/testing/selftests/resctrl/Makefile
> >>> +++ b/tools/testing/selftests/resctrl/Makefile
> >>> @@ -1,6 +1,7 @@
> >>> #SPDX-License-Identifier: GPL-2.0
> >>>
> >>> CFLAGS += -g -Wall -O2 -D_FORTIFY_SOURCE=2
> >>> +LDLIBS += -lnuma
> >>>
> >>> TEST_GEN_PROGS := resctrl_tests
> >>> EXTRA_SOURCES := $(wildcard *.c)
> >>> diff --git a/tools/testing/selftests/resctrl/cmt_test.c
> >>> b/tools/testing/selftests/resctrl/cmt_test.c
> >>> index 8968e36db99d..c5a49444c5a0 100644
> >>> --- a/tools/testing/selftests/resctrl/cmt_test.c
> >>> +++ b/tools/testing/selftests/resctrl/cmt_test.c
> >>> @@ -136,8 +136,11 @@ int cmt_resctrl_val(int cpu_no, int n, char
> >> **benchmark_cmd)
> >>> return ret;
> >>>
> >>> ret = check_results(¶m, n);
> >>> - if (ret)
> >>> + if (ret) {
> >>> + if (sub_numa_cluster_enable)
> >>> + ksft_print_msg("Sub-NUMA Clustering(SNC) feature
> >> is enabled, the
> >>> +CMT counters may not be accurate.\n");
> >>> return ret;
> >>> + }
> >>>
> >>> cmt_test_cleanup();
> >>>
> >>> diff --git a/tools/testing/selftests/resctrl/mbm_test.c
> >>> b/tools/testing/selftests/resctrl/mbm_test.c
> >>> index 8392e5c55ed0..7dc1bdf2d0b8 100644
> >>> --- a/tools/testing/selftests/resctrl/mbm_test.c
> >>> +++ b/tools/testing/selftests/resctrl/mbm_test.c
> >>> @@ -136,8 +136,11 @@ int mbm_bw_change(int span, int cpu_no, char
> >> *bw_report, char **benchmark_cmd)
> >>> return ret;
> >>>
> >>> ret = check_results(span);
> >>> - if (ret)
> >>> + if (ret) {
> >>> + if (sub_numa_cluster_enable)
> >>> + ksft_print_msg("Sub-NUMA Clustering(SNC) feature
> >> is enabled, the
> >>> +MBM counters may not be accurate.\n");
> >>> return ret;
> >>> + }
> >>>
> >>> mbm_test_cleanup();
> >>>
> >>> diff --git a/tools/testing/selftests/resctrl/resctrl.h
> >>> b/tools/testing/selftests/resctrl/resctrl.h
> >>> index 1ad10c47e31d..4b8ad4fbd016 100644
> >>> --- a/tools/testing/selftests/resctrl/resctrl.h
> >>> +++ b/tools/testing/selftests/resctrl/resctrl.h
> >>> @@ -76,6 +76,7 @@ extern pid_t bm_pid, ppid;
> >>>
> >>> extern char llc_occup_path[1024];
> >>> extern bool is_amd;
> >>> +extern bool sub_numa_cluster_enable;
> >>>
> >>> bool check_resctrlfs_support(void); int filter_dmesg(void); @@
> >>> -85,6 +86,7 @@ int umount_resctrlfs(void); int
> >>> validate_bw_report_request(char *bw_report); bool
> >>> validate_resctrl_feature_request(const char *resctrl_val); char
> >>> *fgrep(FILE *inf, const char *str);
> >>> +char *fgrep_last_match_line(FILE *inf, const char *str);
> >>> int taskset_benchmark(pid_t bm_pid, int cpu_no); void
> >>> run_benchmark(int signum, siginfo_t *info, void *ucontext); int
> >>> write_schemata(char *ctrlgrp, char *schemata, int cpu_no, diff --git
> >>> a/tools/testing/selftests/resctrl/resctrl_tests.c
> >>> b/tools/testing/selftests/resctrl/resctrl_tests.c
> >>> index 3be0895c492b..bbab4a7f37ed 100644
> >>> --- a/tools/testing/selftests/resctrl/resctrl_tests.c
> >>> +++ b/tools/testing/selftests/resctrl/resctrl_tests.c
> >>> @@ -8,12 +8,15 @@
> >>> * Sai Praneeth Prakhya <[email protected]>,
> >>> * Fenghua Yu <[email protected]>
> >>> */
> >>> +#include <numa.h>
> >>> +#include <string.h>
> >>> #include "resctrl.h"
> >>>
> >>> #define BENCHMARK_ARGS 64
> >>> #define BENCHMARK_ARG_SIZE 64
> >>>
> >>> bool is_amd;
> >>> +bool sub_numa_cluster_enable;
> >>>
> >>> void detect_amd(void)
> >>> {
> >>> @@ -34,6 +37,35 @@ void detect_amd(void)
> >>> fclose(inf);
> >>> }
> >>>
> >>> +void check_sub_numa_cluster(void)
> >>> +{
> >>> + FILE *inf = fopen("/proc/cpuinfo", "r");
> >>> + char *res, *s;
> >>> + int socket_num = 0;
> >>> + int numa_nodes = 0;
> >>> +
> >>> + if (!inf)
> >>> + return;
> >>> +
> >>> + res = fgrep_last_match_line(inf, "physical id");
> >>> +
> >>> + if (res) {
> >>> + s = strpbrk(res, "1234567890");
> >>> + socket_num = atoi(s) + 1;
> >>> + free(res);
> >>> + }
> >>> + fclose(inf);
> >>> +
> >>> + numa_nodes = numa_max_node() + 1;
> >>> +
> >>> + /*
> >>> + * when the Sub-NUMA Clustering(SNC) feature is enabled,
> >>> + * the number of numa nodes is twice the number of sockets.
> >>> + */
> >>> + if (numa_nodes == (2 * socket_num))
> >>> + sub_numa_cluster_enable = true;
> >>> +}
> >>
> >>
> >> Unfortunately there does not seem to be an architectural way to
> >> detect if SNC has been enabled and the above test is fragile wrt the
> >> assumptions about the topology of the system. What we need is a
> >> reliable and future-proof test but I do not know what that should be.
> >
> > I understand your concerns.
> > At least I know SNC affects on the 2nd Generation Intel Xeon.
> > So, how about just printing information when the test is running on 2nd
> Generation Intel Xeon?
> > That is, when the result of MBM&CMT test is "not ok"
> > and if running cpu model is 2nd Generation Intel Xeon, then print
> > information about the possibility of failure (SNC may be enabled).
> > How about this idea?
>
> I think that making this model specific would be hard to get right for all systems
> and hard to maintain. Perhaps we could just print a generic message on failure?
> Something
> like: "Intel CMT and MBM counters may be inaccurate when Sub-NUMA
> Clustering (SNC) is enabled. Ensure SNC is disabled in the BIOS if this system
> supports SNC." I'd be the first to admit that this is not ideal and would
> appreciate suggestions for improvement. Unfortunately we seem to lack a
> reliable and future proof way to detect if SNC is enabled but I do look forward to
> being corrected.
I can't think out a better idea, I am going to use your idea.
If MBM&CMT test is running on Intel cpu and the result of MBM&CMT test is "not ok",
then print a message:
"Intel CMT and MBM counters may be inaccurate when Sub-NUMA Clustering (SNC) is enabled.
Ensure SNC is disabled in the BIOS if this system supports SNC."
Best regards,
Tan Shaopeng