This tests clone3() with *set_tid to see if all desired PIDs are working
as expected. The tests are trying multiple invalid input parameters as
well as creating processes while specifying a certain PID in multiple
PID namespaces at the same time.
Signed-off-by: Adrian Reber <[email protected]>
---
tools/testing/selftests/clone3/.gitignore | 1 +
tools/testing/selftests/clone3/Makefile | 2 +-
.../testing/selftests/clone3/clone3_set_tid.c | 345 ++++++++++++++++++
3 files changed, 347 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/clone3/clone3_set_tid.c
diff --git a/tools/testing/selftests/clone3/.gitignore b/tools/testing/selftests/clone3/.gitignore
index 85d9d3ba2524..d56c3c49d869 100644
--- a/tools/testing/selftests/clone3/.gitignore
+++ b/tools/testing/selftests/clone3/.gitignore
@@ -1 +1,2 @@
clone3
+clone3_set_tid
diff --git a/tools/testing/selftests/clone3/Makefile b/tools/testing/selftests/clone3/Makefile
index ea922c014ae4..2d292545ca8e 100644
--- a/tools/testing/selftests/clone3/Makefile
+++ b/tools/testing/selftests/clone3/Makefile
@@ -2,6 +2,6 @@
CFLAGS += -I../../../../usr/include/
-TEST_GEN_PROGS := clone3
+TEST_GEN_PROGS := clone3 clone3_set_tid
include ../lib.mk
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
new file mode 100644
index 000000000000..51196901ccc4
--- /dev/null
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Based on Christian Brauner's clone3() example.
+ * These tests are assuming to be running in the host's
+ * PID namespace.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sched.h>
+
+#include "../kselftest.h"
+
+#ifndef MAX_PID_NS_LEVEL
+#define MAX_PID_NS_LEVEL 32
+#endif
+
+static int pipe_1[2];
+static int pipe_2[2];
+
+static pid_t raw_clone(struct clone_args *args)
+{
+ return syscall(__NR_clone3, args, sizeof(struct clone_args));
+}
+
+static int call_clone3_set_tid(__u64 *set_tid,
+ size_t set_tid_size,
+ int flags,
+ int expected_pid,
+ int wait_for_it)
+{
+ int status;
+ int ret = 0;
+ pid_t pid = -1;
+ struct clone_args args = {0};
+
+ args.flags = flags;
+ args.exit_signal = SIGCHLD;
+ args.set_tid = (__u64)set_tid;
+ args.set_tid_size = set_tid_size;
+
+ pid = raw_clone(&args);
+ if (pid < 0) {
+ ksft_print_msg("%s - Failed to create new process\n",
+ strerror(errno));
+ return -errno;
+ }
+
+ if (pid == 0) {
+ char tmp = 0;
+ ksft_print_msg("I am the child, my PID is %d (expected %d)\n",
+ getpid(), set_tid[0]);
+ if (wait_for_it) {
+ ksft_print_msg("[%d] Child is ready and waiting\n", getpid());
+ /* Signal the parent that the child is ready */
+ close(pipe_1[0]);
+ write(pipe_1[1], &tmp, 1);
+ close(pipe_1[1]);
+ close(pipe_2[1]);
+ read(pipe_2[0], &tmp, 1);
+ close(pipe_2[0]);
+ }
+
+ if (set_tid[0] != getpid())
+ _exit(EXIT_FAILURE);
+ _exit(EXIT_SUCCESS);
+ }
+
+ if (expected_pid == 0 || expected_pid == pid)
+ ksft_print_msg("I am the parent (%d). My child's pid is %d\n",
+ getpid(), pid);
+ else {
+ ksft_print_msg(
+ "Expected child pid %d does not match actual pid %d\n",
+ expected_pid, pid);
+ ret = -1;
+ }
+
+ if (wait(&status) < 0) {
+ ksft_print_msg("Child returned %s\n", strerror(errno));
+ return -errno;
+ }
+ if (WEXITSTATUS(status))
+ return WEXITSTATUS(status);
+
+ return ret;
+}
+
+static void test_clone3_set_tid(__u64 *set_tid,
+ size_t set_tid_size,
+ int flags,
+ int expected,
+ int expected_pid,
+ int wait_for_it)
+{
+ int ret;
+
+ ksft_print_msg(
+ "[%d] Trying clone3() with CLONE_SET_TID to %d and 0x%x\n",
+ getpid(), set_tid[0], flags);
+ ret = call_clone3_set_tid(set_tid, set_tid_size, flags, expected_pid,
+ wait_for_it);
+ ksft_print_msg(
+ "[%d] clone3() with CLONE_SET_TID %d says :%d - expected %d\n",
+ getpid(), set_tid[0], ret, expected);
+ if (ret != expected)
+ ksft_test_result_fail(
+ "[%d] Result (%d) is different than expected (%d)\n",
+ getpid(), ret, expected);
+ else
+ ksft_test_result_pass("[%d] Result (%d) matches expectation (%d)\n",
+ getpid(), ret, expected);
+}
+int main(int argc, char *argv[])
+{
+ FILE *f;
+ char buf;
+ pid_t pid;
+ pid_t ns1;
+ pid_t ns2;
+ pid_t ns3;
+ int status;
+ char *proc;
+ int ret = -1;
+ pid_t ns_pid;
+ int pid_max = 0;
+ uid_t uid = getuid();
+ char line[1024] = {0};
+ __aligned_u64 set_tid[MAX_PID_NS_LEVEL * 2];
+ __aligned_u64 set_tid_small[1];
+
+ if (pipe(pipe_1) == -1 || pipe(pipe_2))
+ ksft_exit_fail_msg("pipe() failed\n");
+
+ ksft_print_header();
+ ksft_set_plan(27);
+
+ f = fopen("/proc/sys/kernel/pid_max", "r");
+ if (f == NULL)
+ ksft_exit_fail_msg(
+ "%s - Could not open /proc/sys/kernel/pid_max\n",
+ strerror(errno));
+ fscanf(f, "%d", &pid_max);
+ fclose(f);
+ ksft_print_msg("/proc/sys/kernel/pid_max %d\n", pid_max);
+
+ /* Try invalid settings */
+ memset(&set_tid, 0, sizeof(set_tid));
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -E2BIG, 0, 0);
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -E2BIG, 0, 0);
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0, -E2BIG, 0, 0);
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -E2BIG, 0, 0);
+
+ /* small set_tid array, but maximum set_tid_size */
+ /* Find the current active PID */
+ pid = fork();
+ if (pid == 0) {
+ ksft_print_msg("Child has PID %d\n", getpid());
+ _exit(EXIT_SUCCESS);
+ }
+ (void)wait(NULL);
+ /* After the child has finished, its PID should be free. */
+ set_tid_small[0] = pid;
+ /*
+ * There is a chance that this can return -EFAULT as the actual
+ * set_tid array has only one entry, but we are telling the kernel
+ * that it has the size MAX_PID_NS_LEVEL. This could lead to a
+ * situation where copy_from_user() fails. So far it always
+ * succeeds and copies random data (whatever is after set_tid_small).
+ */
+ test_clone3_set_tid(set_tid_small, MAX_PID_NS_LEVEL, 0, -EINVAL, 0, 0);
+
+ /*
+ * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
+ * nested PID namespace.
+ */
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
+
+ memset(&set_tid, 0xff, sizeof(set_tid));
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -E2BIG, 0, 0);
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -E2BIG, 0, 0);
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0, -E2BIG, 0, 0);
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -E2BIG, 0, 0);
+ /*
+ * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
+ * nested PID namespace.
+ */
+ test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
+
+ memset(&set_tid, 0, sizeof(set_tid));
+ /* Try with an invalid PID */
+ set_tid[0] = 0;
+ test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+ set_tid[0] = -1;
+ test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+ /* Claim that the set_tid array actually contains 2 elements. */
+ test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
+ /* Try it in a new PID namespace */
+ if (uid == 0)
+ test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+ else
+ test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EPERM, 0, 0);
+
+ /*
+ * Try with a valid PID (1) but as non-root. This should fail
+ * with -EPERM if running in the initial user namespace.
+ * As root it should tell us -EEXIST.
+ */
+ set_tid[0] = 1;
+ if (uid == 0)
+ test_clone3_set_tid(set_tid, 1, 0, -EEXIST, 0, 0);
+ else
+ test_clone3_set_tid(set_tid, 1, 0, -EPERM, 0, 0);
+
+ /* Try it in a new PID namespace */
+ if (uid == 0)
+ test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, 0, 0, 0);
+ else
+ test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EPERM, 0, 0);
+
+ /* pid_max should fail everywhere */
+ set_tid[0] = pid_max;
+ test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+ if (uid == 0)
+ test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+ else
+ test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EPERM, 0, 0);
+
+ if (uid != 0) {
+ /*
+ * All remaining tests require root. Tell the framework
+ * that all those tests are skipped as non-root.
+ */
+ ksft_cnt.ksft_xskip += ksft_plan - ksft_test_num();
+ goto out;
+ }
+
+ /* Find the current active PID */
+ pid = fork();
+ if (pid == 0) {
+ ksft_print_msg("Child has PID %d\n", getpid());
+ usleep(500);
+ _exit(EXIT_SUCCESS);
+ }
+ (void)wait(NULL);
+ /* After the child has finished, its PID should be free. */
+ set_tid[0] = pid;
+ test_clone3_set_tid(set_tid, 1, 0, 0, 0, 0);
+ /* This should fail as there is no PID 1 in that namespace */
+ test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+ set_tid[0] = 1;
+ set_tid[1] = pid;
+ test_clone3_set_tid(set_tid, 2, CLONE_NEWPID, 0, pid, 0);
+
+ ksft_print_msg("unshare PID namespace\n");
+ unshare(CLONE_NEWPID);
+ set_tid[0] = pid;
+ /* This should fail as there is no PID 1 in that namespace */
+ test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+
+ /* Let's create a PID 1 */
+ ns_pid = fork();
+ if (ns_pid == 0) {
+ ksft_print_msg("Child in PID namespace has PID %d\n", getpid());
+ set_tid[0] = 2;
+ test_clone3_set_tid(set_tid, 1, 0, 0, 2, 0);
+ set_tid[0] = 1;
+ set_tid[1] = 42;
+ set_tid[2] = pid;
+ /*
+ * This should fail as there are not enough active PID
+ * namespaces. Again assuming this is running in the host's
+ * PID namespace. Not yet nested.
+ */
+ test_clone3_set_tid(set_tid, 4, CLONE_NEWPID, -EINVAL, 0, 0);
+ /*
+ * This should work and from the parent we should see
+ * something like 'NSpid: pid 42 1'.
+ */
+ test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, 0, 42, 1);
+ _exit(ksft_cnt.ksft_pass);
+ }
+
+ close(pipe_1[1]);
+ close(pipe_2[0]);
+ while (read(pipe_1[0], &buf, 1) > 0) {
+ ksft_print_msg("[%d] Child is ready and waiting\n", getpid());
+ break;
+ }
+
+ asprintf(&proc, "/proc/%d/status", pid);
+ f = fopen(proc, "r");
+ if (f == NULL)
+ ksft_exit_fail_msg(
+ "%s - Could not open %s\n",
+ strerror(errno), proc);
+ while (fgets(line, 1024, f)) {
+ if (strstr(line, "NSpid")) {
+ /* Verify that all generated PIDs are as expected. */
+ sscanf(line, "NSpid:\t%d\t%d\t%d", &ns3, &ns2, &ns1);
+ break;
+ }
+ }
+ fclose(f);
+ free(proc);
+ close(pipe_2[0]);
+ /* Tell the clone3()'d child to finish. */
+ write(pipe_2[1], &buf, 1);
+ close(pipe_2[1]);
+
+ if (wait(&status) < 0) {
+ ksft_print_msg("Child returned %s\n", strerror(errno));
+ ret = -errno;
+ goto out;
+ }
+ if (WEXITSTATUS(status))
+ /*
+ * Update the number of total tests with the tests from the
+ * child processes.
+ */
+ ksft_cnt.ksft_pass = WEXITSTATUS(status);
+
+ if (ns3 == pid && ns2 == 42 && ns1 == 1)
+ ksft_test_result_pass(
+ "PIDs in all namespaces as expected (%d,%d,%d)\n",
+ ns3, ns2, ns1);
+ else
+ ksft_test_result_fail(
+ "PIDs in all namespaces not as expected (%d,%d,%d)\n",
+ ns3, ns2, ns1);
+out:
+ ret = 0;
+
+ return !ret ? ksft_exit_pass() : ksft_exit_fail();
+}
--
2.23.0