2023-11-29 05:35:14

by Hengqi Chen

[permalink] [raw]
Subject: [PATCH v3 0/3] seccomp: Make seccomp filter reusable

This patchset introduces a new operation and a new flag
to split the SECCOMP_SET_MODE_FILTER process into two steps:
load filter and attach filter. Thus we can reuse the filter.

The new SECCOMP_LOAD_FILTER loads the filter and returns a fd
which can be reused for different processes via UDS.
With this new operation, we can eliminate a hot path of JITing
BPF program (the filter) where we apply the same seccomp filter
to thousands of micro VMs on a bare metal instance.

The new flag SECCOMP_FILTER_FLAG_FILTER_FD is used to attach
a filter which is loaded via SECCOMP_LOAD_FILTER and represented
by a seccomp filter fd.

Change logs:
v2 -> v3:
- Stick to cBPF, don't interfere with bpffs

v1 -> v2:
- Add a flag to attach filter from fd
- Update selftests

RFC -> v1:
- Addressed comments from Kees
- Reuse filter copy/create code (patch 1)
- Add a selftest (patch 4)

Hengqi Chen (3):
seccomp: Introduce SECCOMP_LOAD_FILTER operation
seccomp: Introduce new flag SECCOMP_FILTER_FLAG_FILTER_FD
selftests/seccomp: Test seccomp filter load and attach

include/linux/seccomp.h | 3 +-
include/uapi/linux/seccomp.h | 3 +
kernel/seccomp.c | 78 ++++++++++++++++++-
tools/testing/selftests/seccomp/seccomp_bpf.c | 71 +++++++++++++++++
4 files changed, 151 insertions(+), 4 deletions(-)

--
2.34.1


2023-11-29 05:35:22

by Hengqi Chen

[permalink] [raw]
Subject: [PATCH v3 1/3] seccomp: Introduce SECCOMP_LOAD_FILTER operation

This patch adds a new operation named SECCOMP_LOAD_FILTER.
It accepts a sock_fprog the same as SECCOMP_SET_MODE_FILTER
but only performs the loading process. If succeed, returns a
new fd associated with the seccomp filter. The seccomp filter
thus can be reused to attach to different processes.

Signed-off-by: Hengqi Chen <[email protected]>
---
include/uapi/linux/seccomp.h | 1 +
kernel/seccomp.c | 34 ++++++++++++++++++++++++++++++++++
2 files changed, 35 insertions(+)

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index dbfc9b37fcae..ee2c83697810 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -16,6 +16,7 @@
#define SECCOMP_SET_MODE_FILTER 1
#define SECCOMP_GET_ACTION_AVAIL 2
#define SECCOMP_GET_NOTIF_SIZES 3
+#define SECCOMP_LOAD_FILTER 4

/* Valid flags for SECCOMP_SET_MODE_FILTER */
#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 255999ba9190..ef956c3d15c7 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -1885,6 +1885,18 @@ static bool has_duplicate_listener(struct seccomp_filter *new_child)
return false;
}

+static int seccomp_filter_put(struct inode *inode, struct file *file)
+{
+ struct seccomp_filter *filter = file->private_data;
+
+ __put_seccomp_filter(filter);
+ return 0;
+}
+
+static const struct file_operations seccomp_filter_fops = {
+ .release = seccomp_filter_put,
+};
+
/**
* seccomp_set_mode_filter: internal function for setting seccomp filter
* @flags: flags to change filter behavior
@@ -1996,12 +2008,29 @@ static long seccomp_set_mode_filter(unsigned int flags,
seccomp_filter_free(prepared);
return ret;
}
+
+static long seccomp_load_filter(const char __user *filter)
+{
+ struct seccomp_filter *sfilter;
+
+ sfilter = seccomp_prepare_user_filter(filter);
+ if (IS_ERR(sfilter))
+ return PTR_ERR(sfilter);
+
+ return anon_inode_getfd("seccomp-filter", &seccomp_filter_fops,
+ sfilter, O_CLOEXEC);
+}
#else
static inline long seccomp_set_mode_filter(unsigned int flags,
const char __user *filter)
{
return -EINVAL;
}
+
+static inline long seccomp_load_filter(const char __user *filter)
+{
+ return -EINVAL;
+}
#endif

static long seccomp_get_action_avail(const char __user *uaction)
@@ -2063,6 +2092,11 @@ static long do_seccomp(unsigned int op, unsigned int flags,
return -EINVAL;

return seccomp_get_notif_sizes(uargs);
+ case SECCOMP_LOAD_FILTER:
+ if (flags != 0)
+ return -EINVAL;
+
+ return seccomp_load_filter(uargs);
default:
return -EINVAL;
}
--
2.34.1

2023-11-29 05:35:43

by Hengqi Chen

[permalink] [raw]
Subject: [PATCH v3 3/3] selftests/seccomp: Test seccomp filter load and attach

Add testcases to exercise the newly added seccomp filter
load and attach functionalities.

Signed-off-by: Hengqi Chen <[email protected]>
---
tools/testing/selftests/seccomp/seccomp_bpf.c | 71 +++++++++++++++++++
1 file changed, 71 insertions(+)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 38f651469968..66eb72e6c1a3 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -4735,6 +4735,77 @@ TEST(user_notification_wait_killable_fatal)
EXPECT_EQ(SIGTERM, WTERMSIG(status));
}

+TEST(seccomp_filter_load_and_attach)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ int fd, ret, flags;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret)
+ {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ flags = 0;
+ fd = seccomp(SECCOMP_LOAD_FILTER, flags, &prog);
+ ASSERT_GT(fd, -1);
+
+ flags = SECCOMP_FILTER_FLAG_FILTER_FD;
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &fd);
+ ASSERT_EQ(ret, 0);
+
+ flags = SECCOMP_FILTER_FLAG_FILTER_FD;
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &fd);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EEXIST);
+
+ flags = 0;
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
+ ASSERT_EQ(ret, 0);
+
+ close(fd);
+}
+
+TEST(seccomp_attach_fd_failed)
+{
+ int fd, ret, flags;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret)
+ {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_GT(fd, -1);
+
+ /* copy a sock_fprog from a fd */
+ flags = 0;
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &fd);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EFAULT);
+
+ /* pass a non seccomp filter fd */
+ flags = SECCOMP_FILTER_FLAG_FILTER_FD;
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &fd);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EINVAL);
+ close(fd);
+
+ /* pass a invalid fd */
+ fd = -1;
+ flags = SECCOMP_FILTER_FLAG_FILTER_FD;
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &fd);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EBADF);
+}
+
/*
* TODO:
* - expand NNP testing
--
2.34.1

2023-11-29 05:36:02

by Hengqi Chen

[permalink] [raw]
Subject: [PATCH v3 2/3] seccomp: Introduce new flag SECCOMP_FILTER_FLAG_FILTER_FD

Add a new flag SECCOMP_FILTER_FLAG_FILTER_FD for SECCOMP_SET_MODE_FILTER.
This indicates user supply a seccomp filter fd, not a sock_fprog.
It allows us to attach the seccomp filter that is previously loaded via
SECCOMP_LOAD_FILTER.

Signed-off-by: Hengqi Chen <[email protected]>
---
include/linux/seccomp.h | 3 ++-
include/uapi/linux/seccomp.h | 2 ++
kernel/seccomp.c | 44 +++++++++++++++++++++++++++++++++---
3 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 175079552f68..3257042c35fc 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -9,7 +9,8 @@
SECCOMP_FILTER_FLAG_SPEC_ALLOW | \
SECCOMP_FILTER_FLAG_NEW_LISTENER | \
SECCOMP_FILTER_FLAG_TSYNC_ESRCH | \
- SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV)
+ SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV | \
+ SECCOMP_FILTER_FLAG_FILTER_FD)

/* sizeof() the first published struct seccomp_notif_addfd */
#define SECCOMP_NOTIFY_ADDFD_SIZE_VER0 24
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index ee2c83697810..41e0ca56ef1c 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -26,6 +26,8 @@
#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
/* Received notifications wait in killable state (only respond to fatal signals) */
#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
+/* Indicates that the filter is a fd obtained from SECCOMP_LOAD_FILTER */
+#define SECCOMP_FILTER_FLAG_FILTER_FD (1UL << 6)

/*
* All BPF programs must return a 32-bit value.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ef956c3d15c7..a43a6a6b6b77 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -883,8 +883,11 @@ static long seccomp_attach_filter(unsigned int flags,

/* Validate resulting filter length. */
total_insns = filter->prog->len;
- for (walker = current->seccomp.filter; walker; walker = walker->prev)
+ for (walker = current->seccomp.filter; walker; walker = walker->prev) {
total_insns += walker->prog->len + 4; /* 4 instr penalty */
+ if (walker == filter)
+ return -EEXIST;
+ }
if (total_insns > MAX_INSNS_PER_PATH)
return -ENOMEM;

@@ -1897,6 +1900,38 @@ static const struct file_operations seccomp_filter_fops = {
.release = seccomp_filter_put,
};

+/**
+ * seccomp_prepare_filter_from_fd - prepares filter from a user-supplied fd
+ * @ufd: pointer to fd that refers to a seccomp filter.
+ *
+ * Returns filter on success or an ERR_PTR on failure.
+ */
+static struct seccomp_filter *
+seccomp_prepare_filter_from_fd(const char __user *ufd)
+{
+ struct seccomp_filter *sfilter;
+ struct fd f;
+ int fd;
+
+ if (copy_from_user(&fd, ufd, sizeof(fd)))
+ return ERR_PTR(-EFAULT);
+
+ f = fdget(fd);
+ if (!f.file)
+ return ERR_PTR(-EBADF);
+
+ if (f.file->f_op != &seccomp_filter_fops) {
+ fdput(f);
+ return ERR_PTR(-EINVAL);
+ }
+
+ sfilter = f.file->private_data;
+ __get_seccomp_filter(sfilter);
+
+ fdput(f);
+ return sfilter;
+}
+
/**
* seccomp_set_mode_filter: internal function for setting seccomp filter
* @flags: flags to change filter behavior
@@ -1944,7 +1979,10 @@ static long seccomp_set_mode_filter(unsigned int flags,
return -EINVAL;

/* Prepare the new filter before holding any locks. */
- prepared = seccomp_prepare_user_filter(filter);
+ if (flags & SECCOMP_FILTER_FLAG_FILTER_FD)
+ prepared = seccomp_prepare_filter_from_fd(filter);
+ else
+ prepared = seccomp_prepare_user_filter(filter);
if (IS_ERR(prepared))
return PTR_ERR(prepared);

@@ -2005,7 +2043,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
}
}
out_free:
- seccomp_filter_free(prepared);
+ __put_seccomp_filter(prepared);
return ret;
}

--
2.34.1

2023-12-05 08:48:25

by Oliver Sang

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] selftests/seccomp: Test seccomp filter load and attach



Hello,

kernel test robot noticed "kernel-selftests.seccomp.seccomp_bpf.fail" on:

commit: 95084d9b2b5f0b593724819288f3cb4e2c585cb0 ("[PATCH v3 3/3] selftests/seccomp: Test seccomp filter load and attach")
url: https://github.com/intel-lab-lkp/linux/commits/Hengqi-Chen/seccomp-Introduce-SECCOMP_LOAD_FILTER-operation/20231129-134337
base: https://git.kernel.org/cgit/linux/kernel/git/kees/linux.git for-next/seccomp
patch link: https://lore.kernel.org/all/[email protected]/
patch subject: [PATCH v3 3/3] selftests/seccomp: Test seccomp filter load and attach

in testcase: kernel-selftests
version: kernel-selftests-x86_64-60acb023-1_20230329
with following parameters:

group: group-s



compiler: gcc-12
test machine: 36 threads 1 sockets Intel(R) Core(TM) i9-10980XE CPU @ 3.00GHz (Cascade Lake) with 32G memory

(please refer to attached dmesg/kmsg for entire log/backtrace)




If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-lkp/[email protected]


we noticed one test added in this commit can pass, but another will fail.


# # RUN global.seccomp_filter_load_and_attach ...
# # OK global.seccomp_filter_load_and_attach
# ok 56 global.seccomp_filter_load_and_attach
# # RUN global.seccomp_attach_fd_failed ...
# # seccomp_bpf.c:4792:seccomp_attach_fd_failed:Expected errno (22) == EFAULT (14)
# # seccomp_attach_fd_failed: Test terminated by assertion
# # FAIL global.seccomp_attach_fd_failed
# not ok 57 global.seccomp_attach_fd_failed

...

# # FAILED: 97 / 98 tests passed.
# # Totals: pass:97 fail:1 xfail:0 xpass:0 skip:0 error:0
not ok 1 selftests: seccomp: seccomp_bpf # exit=1


we applied the patch set upon 31c65705a8cfa like below:

95084d9b2b5f0 (linux-review/Hengqi-Chen/seccomp-Introduce-SECCOMP_LOAD_FILTER-operation/20231129-134337) selftests/seccomp: Test seccomp filter load and attach
8fcda1c36e519 seccomp: Introduce new flag SECCOMP_FILTER_FLAG_FILTER_FD
bd86f21cfe1e0 seccomp: Introduce SECCOMP_LOAD_FILTER operation
31c65705a8cfa (kees/for-next/seccomp, kees/for-linus/seccomp) perf/benchmark: fix seccomp_unotify benchmark for 32-bit
ce9ecca0238b1 (tag: v6.6-rc2, hyperv/hyperv-next) Linux 6.6-rc2


not sure if this is the correct base, or is there any other dependency to run
new test seccomp_attach_fd_failed?


The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20231205/[email protected]



--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki