2022-02-01 20:49:54

by Mathieu Desnoyers

[permalink] [raw]
Subject: [RFC PATCH 2/2] selftests/rseq: Implement rseq numa node id field selftest

Test the NUMA node id extension rseq field. Compare it against the value
returned by the getcpu(2) system call while pinned on a specific core.

Signed-off-by: Mathieu Desnoyers <[email protected]>
---
tools/testing/selftests/rseq/basic_test.c | 6 +++
tools/testing/selftests/rseq/rseq-abi.h | 51 +++++++++++++++++++++--
tools/testing/selftests/rseq/rseq.c | 37 ++++++++++++++--
tools/testing/selftests/rseq/rseq.h | 40 ++++++++++++++++++
4 files changed, 127 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/rseq/basic_test.c b/tools/testing/selftests/rseq/basic_test.c
index d8efbfb89193..9601db964b72 100644
--- a/tools/testing/selftests/rseq/basic_test.c
+++ b/tools/testing/selftests/rseq/basic_test.c
@@ -22,6 +22,8 @@ void test_cpu_pointer(void)
CPU_ZERO(&test_affinity);
for (i = 0; i < CPU_SETSIZE; i++) {
if (CPU_ISSET(i, &affinity)) {
+ int node;
+
CPU_SET(i, &test_affinity);
sched_setaffinity(0, sizeof(test_affinity),
&test_affinity);
@@ -29,6 +31,10 @@ void test_cpu_pointer(void)
assert(rseq_current_cpu() == i);
assert(rseq_current_cpu_raw() == i);
assert(rseq_cpu_start() == i);
+ node = rseq_fallback_current_node();
+ assert(rseq_current_node() == node);
+ assert(rseq_current_node_raw() == node);
+ assert(rseq_node_start() == node);
CPU_CLR(i, &test_affinity);
}
}
diff --git a/tools/testing/selftests/rseq/rseq-abi.h b/tools/testing/selftests/rseq/rseq-abi.h
index a8c44d9af71f..7aba1cc0990b 100644
--- a/tools/testing/selftests/rseq/rseq-abi.h
+++ b/tools/testing/selftests/rseq/rseq-abi.h
@@ -13,9 +13,9 @@
#include <linux/types.h>
#include <asm/byteorder.h>

-enum rseq_abi_cpu_id_state {
- RSEQ_ABI_CPU_ID_UNINITIALIZED = -1,
- RSEQ_ABI_CPU_ID_REGISTRATION_FAILED = -2,
+enum rseq_abi_id_state {
+ RSEQ_ABI_ID_UNINITIALIZED = -1,
+ RSEQ_ABI_ID_REGISTRATION_FAILED = -2,
};

enum rseq_abi_flags {
@@ -146,6 +146,51 @@ struct rseq_abi {
* this thread.
*/
__u32 flags;
+ __u32 padding1[3];
+
+ /*
+ * This is the end of the original rseq ABI.
+ * This is a valid end of rseq ABI for the purpose of rseq registration
+ * rseq_len.
+ * The original rseq ABI use "sizeof(struct rseq)" on registration,
+ * thus requiring the padding above.
+ */
+
+ /*
+ * Restartable sequences node_id_start field. Updated by the
+ * kernel. Read by user-space with single-copy atomicity
+ * semantics. This field should only be read by the thread which
+ * registered this data structure. Aligned on 32-bit. Always
+ * contains a value in the range of possible NUMA node IDs, although the
+ * value may not be the actual current NUMA node ID (e.g. if rseq is not
+ * initialized). This NUMA node ID number value should always be compared
+ * against the value of the node_id field before performing a rseq
+ * commit or returning a value read from a data structure indexed using
+ * the node_id_start value.
+ */
+ __u32 node_id_start;
+
+ /*
+ * Restartable sequences node_id field. Updated by the kernel.
+ * Read by user-space with single-copy atomicity semantics. This
+ * field should only be read by the thread which registered this
+ * data structure. Aligned on 32-bit. Values
+ * RSEQ_ID_UNINITIALIZED and RSEQ_ID_REGISTRATION_FAILED
+ * have a special semantic: the former means "rseq uninitialized",
+ * and latter means "rseq initialization failed". This value is
+ * meant to be read within rseq critical sections and compared
+ * with the node_id_start value previously read, before performing
+ * the commit instruction, or read and compared with the
+ * node_id_start value before returning a value loaded from a data
+ * structure indexed using the node_id_start value.
+ */
+ __u32 node_id;
+
+ /*
+ * This is a valid end of rseq ABI for the purpose of rseq registration
+ * rseq_len. Use the offset immediately after the node_id field as
+ * rseq_len.
+ */
} __attribute__((aligned(4 * sizeof(__u64))));

#endif /* _RSEQ_ABI_H */
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 07ba0d463a96..99b5c3b71ef0 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -27,10 +27,20 @@
#include <signal.h>
#include <limits.h>
#include <dlfcn.h>
+#include <stddef.h>

#include "../kselftest.h"
#include "rseq.h"

+#ifndef sizeof_field
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#endif
+
+#ifndef offsetofend
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
+#endif
+
static const int *libc_rseq_offset_p;
static const unsigned int *libc_rseq_size_p;
static const unsigned int *libc_rseq_flags_p;
@@ -49,7 +59,8 @@ static int rseq_ownership;

static
__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"))) = {
- .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
+ .cpu_id = RSEQ_ABI_ID_UNINITIALIZED,
+ .node_id = RSEQ_ABI_ID_UNINITIALIZED,
};

static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
@@ -58,6 +69,11 @@ static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
}

+static int sys_getcpu(unsigned *cpu, unsigned *node)
+{
+ return syscall(__NR_getcpu, cpu, node, NULL);
+}
+
int rseq_available(void)
{
int rc;
@@ -83,7 +99,7 @@ int rseq_register_current_thread(void)
/* Treat libc's ownership as a successful registration. */
return 0;
}
- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), 0, RSEQ_SIG);
+ rc = sys_rseq(&__rseq_abi, offsetofend(struct rseq_abi, node_id), 0, RSEQ_SIG);
if (rc)
return -1;
assert(rseq_current_cpu_raw() >= 0);
@@ -98,7 +114,7 @@ int rseq_unregister_current_thread(void)
/* Treat libc's ownership as a successful unregistration. */
return 0;
}
- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
+ rc = sys_rseq(&__rseq_abi, offsetofend(struct rseq_abi, node_id), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
if (rc)
return -1;
return 0;
@@ -121,7 +137,7 @@ void rseq_init(void)
return;
rseq_ownership = 1;
rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
- rseq_size = sizeof(struct rseq_abi);
+ rseq_size = offsetofend(struct rseq_abi, node_id);
rseq_flags = 0;
}

@@ -146,3 +162,16 @@ int32_t rseq_fallback_current_cpu(void)
}
return cpu;
}
+
+int32_t rseq_fallback_current_node(void)
+{
+ uint32_t cpu_id, node_id;
+ int ret;
+
+ ret = sys_getcpu(&cpu_id, &node_id);
+ if (ret) {
+ perror("sys_getcpu()");
+ return ret;
+ }
+ return (int32_t) node_id;
+}
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index 6bd0ac466b4a..6fccc87f9025 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -115,6 +115,11 @@ int rseq_unregister_current_thread(void);
*/
int32_t rseq_fallback_current_cpu(void);

+/*
+ * Restartable sequence fallback for reading the current node number.
+ */
+int32_t rseq_fallback_current_node(void);
+
/*
* Values returned can be either the current CPU number, -1 (rseq is
* uninitialized), or -2 (rseq initialization has failed).
@@ -124,6 +129,15 @@ static inline int32_t rseq_current_cpu_raw(void)
return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id);
}

+/*
+ * Values returned can be either the current NUMA node number, -1 (rseq is
+ * uninitialized), or -2 (rseq initialization has failed).
+ */
+static inline int32_t rseq_current_node_raw(void)
+{
+ return RSEQ_ACCESS_ONCE(rseq_get_abi()->node_id);
+}
+
/*
* Returns a possible CPU number, which is typically the current CPU.
* The returned CPU number can be used to prepare for an rseq critical
@@ -140,6 +154,22 @@ static inline uint32_t rseq_cpu_start(void)
return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id_start);
}

+/*
+ * Returns a possible NUMA node number, which is typically the current NUMA
+ * node. The returned NUMA node number can be used to prepare for an rseq
+ * critical section, which will confirm whether the NUMA node number is indeed
+ * the current one, and whether rseq is initialized.
+ *
+ * The NUMA node number returned by rseq_node_start should always be validated
+ * by passing it to a rseq asm sequence, or by comparing it to the return value
+ * of rseq_current_node_raw() if the rseq asm sequence does not need to be
+ * invoked.
+ */
+static inline uint32_t rseq_node_start(void)
+{
+ return RSEQ_ACCESS_ONCE(rseq_get_abi()->node_id_start);
+}
+
static inline uint32_t rseq_current_cpu(void)
{
int32_t cpu;
@@ -150,6 +180,16 @@ static inline uint32_t rseq_current_cpu(void)
return cpu;
}

+static inline uint32_t rseq_current_node(void)
+{
+ int32_t node;
+
+ node = rseq_current_node_raw();
+ if (rseq_unlikely(node < 0))
+ node = rseq_fallback_current_node();
+ return node;
+}
+
static inline void rseq_clear_rseq_cs(void)
{
RSEQ_WRITE_ONCE(rseq_get_abi()->rseq_cs.arch.ptr, 0);
--
2.17.1