2021-02-05 21:29:10

by Chris Wilson

[permalink] [raw]
Subject: [PATCH] kernel: Expose SYS_kcmp by default

Userspace has discovered the functionality offered by SYS_kcmp and has
started to depend upon it. In particular, Mesa uses SYS_kcmp for
os_same_file_description() in order to identify when two fd (e.g. device
or dmabuf) point to the same struct file. Since they depend on it for
core functionality, lift SYS_kcmp out of the non-default
CONFIG_CHECKPOINT_RESTORE into the selectable syscall category.

Signed-off-by: Chris Wilson <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Will Drewry <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Dave Airlie <[email protected]>
Cc: Daniel Vetter <[email protected]>
Cc: Lucas Stach <[email protected]>
---
init/Kconfig | 11 +++++++++++
kernel/Makefile | 2 +-
tools/testing/selftests/seccomp/seccomp_bpf.c | 2 +-
3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index b77c60f8b963..f62fca13ac5b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1194,6 +1194,7 @@ endif # NAMESPACES
config CHECKPOINT_RESTORE
bool "Checkpoint/restore support"
select PROC_CHILDREN
+ select KCMP
default n
help
Enables additional kernel features in a sake of checkpoint/restore.
@@ -1737,6 +1738,16 @@ config ARCH_HAS_MEMBARRIER_CALLBACKS
config ARCH_HAS_MEMBARRIER_SYNC_CORE
bool

+config KCMP
+ bool "Enable kcmp() system call" if EXPERT
+ default y
+ help
+ Enable the file descriptor comparison system call. It provides
+ user-space with the ability to compare two fd to see if they
+ point to the same file, and check other attributes.
+
+ If unsure, say Y.
+
config RSEQ
bool "Enable rseq() system call" if EXPERT
default y
diff --git a/kernel/Makefile b/kernel/Makefile
index aa7368c7eabf..320f1f3941b7 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -51,7 +51,7 @@ obj-y += livepatch/
obj-y += dma/
obj-y += entry/

-obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
+obj-$(CONFIG_KCMP) += kcmp.o
obj-$(CONFIG_FREEZER) += freezer.o
obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 26c72f2b61b1..1b6c7d33c4ff 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -315,7 +315,7 @@ TEST(kcmp)
ret = __filecmp(getpid(), getpid(), 1, 1);
EXPECT_EQ(ret, 0);
if (ret != 0 && errno == ENOSYS)
- SKIP(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)");
+ SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)");
}

TEST(mode_strict_support)
--
2.20.1


2021-02-06 04:54:11

by Chris Wilson

[permalink] [raw]
Subject: [PATCH v3] kcmp: Support selection of SYS_kcmp without CHECKPOINT_RESTORE

Userspace has discovered the functionality offered by SYS_kcmp and has
started to depend upon it. In particular, Mesa uses SYS_kcmp for
os_same_file_description() in order to identify when two fd (e.g. device
or dmabuf) point to the same struct file. Since they depend on it for
core functionality, lift SYS_kcmp out of the non-default
CONFIG_CHECKPOINT_RESTORE into the selectable syscall category.

Rasmus Villemoes also pointed out that systemd uses SYS_kcmp to
deduplicate the per-service file descriptor store.

Note that some distributions such as Ubuntu are already enabling
CHECKPOINT_RESTORE in their configs and so, by extension, SYS_kcmp.

References: https://gitlab.freedesktop.org/drm/intel/-/issues/3046
Signed-off-by: Chris Wilson <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Will Drewry <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Dave Airlie <[email protected]>
Cc: Daniel Vetter <[email protected]>
Cc: Lucas Stach <[email protected]>
Cc: Rasmus Villemoes <[email protected]>
Cc: Cyrill Gorcunov <[email protected]>
Cc: [email protected]
Acked-by: Daniel Vetter <[email protected]> # DRM depends on kcmp
Acked-by: Rasmus Villemoes <[email protected]> # systemd uses kcmp

---
v2:
- Default n.
- Borrrow help message from man kcmp.
- Export get_epoll_tfile_raw_ptr() for CONFIG_KCMP
v3:
- Select KCMP for CONFIG_DRM
---
drivers/gpu/drm/Kconfig | 3 +++
fs/eventpoll.c | 4 ++--
include/linux/eventpoll.h | 2 +-
init/Kconfig | 11 +++++++++++
kernel/Makefile | 2 +-
tools/testing/selftests/seccomp/seccomp_bpf.c | 2 +-
6 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 0973f408d75f..af6c6d214d91 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -15,6 +15,9 @@ menuconfig DRM
select I2C_ALGOBIT
select DMA_SHARED_BUFFER
select SYNC_FILE
+# gallium uses SYS_kcmp for os_same_file_description() to de-duplicate
+# device and dmabuf fd. Let's make sure that is available for our userspace.
+ select KCMP
help
Kernel-level support for the Direct Rendering Infrastructure (DRI)
introduced in XFree86 4.0. If you say Y here, you need to select
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index a829af074eb5..3196474cbe24 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -979,7 +979,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
return epir;
}

-#ifdef CONFIG_CHECKPOINT_RESTORE
+#ifdef CONFIG_KCMP
static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff)
{
struct rb_node *rbp;
@@ -1021,7 +1021,7 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,

return file_raw;
}
-#endif /* CONFIG_CHECKPOINT_RESTORE */
+#endif /* CONFIG_KCMP */

/**
* Adds a new entry to the tail of the list in a lockless way, i.e.
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 0350393465d4..593322c946e6 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -18,7 +18,7 @@ struct file;

#ifdef CONFIG_EPOLL

-#ifdef CONFIG_CHECKPOINT_RESTORE
+#ifdef CONFIG_KCMP
struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff);
#endif

diff --git a/init/Kconfig b/init/Kconfig
index b77c60f8b963..9cc7436b2f73 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1194,6 +1194,7 @@ endif # NAMESPACES
config CHECKPOINT_RESTORE
bool "Checkpoint/restore support"
select PROC_CHILDREN
+ select KCMP
default n
help
Enables additional kernel features in a sake of checkpoint/restore.
@@ -1737,6 +1738,16 @@ config ARCH_HAS_MEMBARRIER_CALLBACKS
config ARCH_HAS_MEMBARRIER_SYNC_CORE
bool

+config KCMP
+ bool "Enable kcmp() system call" if EXPERT
+ help
+ Enable the kernel resource comparison system call. It provides
+ user-space with the ability to compare two processes to see if they
+ share a common resource, such as a file descriptor or even virtual
+ memory space.
+
+ If unsure, say N.
+
config RSEQ
bool "Enable rseq() system call" if EXPERT
default y
diff --git a/kernel/Makefile b/kernel/Makefile
index aa7368c7eabf..320f1f3941b7 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -51,7 +51,7 @@ obj-y += livepatch/
obj-y += dma/
obj-y += entry/

-obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
+obj-$(CONFIG_KCMP) += kcmp.o
obj-$(CONFIG_FREEZER) += freezer.o
obj-$(CONFIG_PROFILING) += profile.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 26c72f2b61b1..1b6c7d33c4ff 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -315,7 +315,7 @@ TEST(kcmp)
ret = __filecmp(getpid(), getpid(), 1, 1);
EXPECT_EQ(ret, 0);
if (ret != 0 && errno == ENOSYS)
- SKIP(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)");
+ SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)");
}

TEST(mode_strict_support)
--
2.20.1

2021-02-06 14:29:32

by Cyrill Gorcunov

[permalink] [raw]
Subject: Re: [PATCH v3] kcmp: Support selection of SYS_kcmp without CHECKPOINT_RESTORE

On Fri, Feb 05, 2021 at 10:00:12PM +0000, Chris Wilson wrote:
> Userspace has discovered the functionality offered by SYS_kcmp and has
> started to depend upon it. In particular, Mesa uses SYS_kcmp for
> os_same_file_description() in order to identify when two fd (e.g. device
> or dmabuf) point to the same struct file. Since they depend on it for
> core functionality, lift SYS_kcmp out of the non-default
> CONFIG_CHECKPOINT_RESTORE into the selectable syscall category.
>
...
Reviewed-by: Cyrill Gorcunov <[email protected]>

2021-02-08 22:15:47

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v3] kcmp: Support selection of SYS_kcmp without CHECKPOINT_RESTORE

On Fri, Feb 05, 2021 at 10:00:12PM +0000, Chris Wilson wrote:
> Userspace has discovered the functionality offered by SYS_kcmp and has
> started to depend upon it. In particular, Mesa uses SYS_kcmp for
> os_same_file_description() in order to identify when two fd (e.g. device
> or dmabuf) point to the same struct file. Since they depend on it for
> core functionality, lift SYS_kcmp out of the non-default
> CONFIG_CHECKPOINT_RESTORE into the selectable syscall category.
>
> Rasmus Villemoes also pointed out that systemd uses SYS_kcmp to
> deduplicate the per-service file descriptor store.
>
> Note that some distributions such as Ubuntu are already enabling
> CHECKPOINT_RESTORE in their configs and so, by extension, SYS_kcmp.
>
> References: https://gitlab.freedesktop.org/drm/intel/-/issues/3046
> Signed-off-by: Chris Wilson <[email protected]>

Thanks!

Reviewed-by: Kees Cook <[email protected]>

-Kees

> Cc: Kees Cook <[email protected]>
> Cc: Andy Lutomirski <[email protected]>
> Cc: Will Drewry <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: Dave Airlie <[email protected]>
> Cc: Daniel Vetter <[email protected]>
> Cc: Lucas Stach <[email protected]>
> Cc: Rasmus Villemoes <[email protected]>
> Cc: Cyrill Gorcunov <[email protected]>
> Cc: [email protected]
> Acked-by: Daniel Vetter <[email protected]> # DRM depends on kcmp
> Acked-by: Rasmus Villemoes <[email protected]> # systemd uses kcmp
>
> ---
> v2:
> - Default n.
> - Borrrow help message from man kcmp.
> - Export get_epoll_tfile_raw_ptr() for CONFIG_KCMP
> v3:
> - Select KCMP for CONFIG_DRM
> ---
> drivers/gpu/drm/Kconfig | 3 +++
> fs/eventpoll.c | 4 ++--
> include/linux/eventpoll.h | 2 +-
> init/Kconfig | 11 +++++++++++
> kernel/Makefile | 2 +-
> tools/testing/selftests/seccomp/seccomp_bpf.c | 2 +-
> 6 files changed, 19 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 0973f408d75f..af6c6d214d91 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -15,6 +15,9 @@ menuconfig DRM
> select I2C_ALGOBIT
> select DMA_SHARED_BUFFER
> select SYNC_FILE
> +# gallium uses SYS_kcmp for os_same_file_description() to de-duplicate
> +# device and dmabuf fd. Let's make sure that is available for our userspace.
> + select KCMP
> help
> Kernel-level support for the Direct Rendering Infrastructure (DRI)
> introduced in XFree86 4.0. If you say Y here, you need to select
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index a829af074eb5..3196474cbe24 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
> @@ -979,7 +979,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
> return epir;
> }
>
> -#ifdef CONFIG_CHECKPOINT_RESTORE
> +#ifdef CONFIG_KCMP
> static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff)
> {
> struct rb_node *rbp;
> @@ -1021,7 +1021,7 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
>
> return file_raw;
> }
> -#endif /* CONFIG_CHECKPOINT_RESTORE */
> +#endif /* CONFIG_KCMP */
>
> /**
> * Adds a new entry to the tail of the list in a lockless way, i.e.
> diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
> index 0350393465d4..593322c946e6 100644
> --- a/include/linux/eventpoll.h
> +++ b/include/linux/eventpoll.h
> @@ -18,7 +18,7 @@ struct file;
>
> #ifdef CONFIG_EPOLL
>
> -#ifdef CONFIG_CHECKPOINT_RESTORE
> +#ifdef CONFIG_KCMP
> struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff);
> #endif
>
> diff --git a/init/Kconfig b/init/Kconfig
> index b77c60f8b963..9cc7436b2f73 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1194,6 +1194,7 @@ endif # NAMESPACES
> config CHECKPOINT_RESTORE
> bool "Checkpoint/restore support"
> select PROC_CHILDREN
> + select KCMP
> default n
> help
> Enables additional kernel features in a sake of checkpoint/restore.
> @@ -1737,6 +1738,16 @@ config ARCH_HAS_MEMBARRIER_CALLBACKS
> config ARCH_HAS_MEMBARRIER_SYNC_CORE
> bool
>
> +config KCMP
> + bool "Enable kcmp() system call" if EXPERT
> + help
> + Enable the kernel resource comparison system call. It provides
> + user-space with the ability to compare two processes to see if they
> + share a common resource, such as a file descriptor or even virtual
> + memory space.
> +
> + If unsure, say N.
> +
> config RSEQ
> bool "Enable rseq() system call" if EXPERT
> default y
> diff --git a/kernel/Makefile b/kernel/Makefile
> index aa7368c7eabf..320f1f3941b7 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -51,7 +51,7 @@ obj-y += livepatch/
> obj-y += dma/
> obj-y += entry/
>
> -obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
> +obj-$(CONFIG_KCMP) += kcmp.o
> obj-$(CONFIG_FREEZER) += freezer.o
> obj-$(CONFIG_PROFILING) += profile.o
> obj-$(CONFIG_STACKTRACE) += stacktrace.o
> diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
> index 26c72f2b61b1..1b6c7d33c4ff 100644
> --- a/tools/testing/selftests/seccomp/seccomp_bpf.c
> +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
> @@ -315,7 +315,7 @@ TEST(kcmp)
> ret = __filecmp(getpid(), getpid(), 1, 1);
> EXPECT_EQ(ret, 0);
> if (ret != 0 && errno == ENOSYS)
> - SKIP(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)");
> + SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)");
> }
>
> TEST(mode_strict_support)
> --
> 2.20.1
>

--
Kees Cook

2021-02-12 13:13:54

by Emil Velikov

[permalink] [raw]
Subject: Re: [PATCH v3] kcmp: Support selection of SYS_kcmp without CHECKPOINT_RESTORE

On Fri, 5 Feb 2021 at 22:01, Chris Wilson <[email protected]> wrote:
>
> Userspace has discovered the functionality offered by SYS_kcmp and has
> started to depend upon it. In particular, Mesa uses SYS_kcmp for
> os_same_file_description() in order to identify when two fd (e.g. device
> or dmabuf)

As you rightfully point out, SYS_kcmp is a bit of a two edged sword.
While you mention the CONFIG issue, there is also a portability aspect
(mesa runs on more than just linux) and as well as sandbox filtering
of the extra syscall.

Last time I looked, the latter was still an issue and mesa was using
SYS_kcmp to compare device node fds.
A far shorter and more portable solution is possible, so let me
prepare a Mesa patch.

-Emil

2021-02-12 13:18:45

by Simon Ser

[permalink] [raw]
Subject: Re: [PATCH v3] kcmp: Support selection of SYS_kcmp without CHECKPOINT_RESTORE

On Friday, February 12th, 2021 at 1:57 PM, Emil Velikov <[email protected]> wrote:

> On Fri, 5 Feb 2021 at 22:01, Chris Wilson <[email protected]> wrote:
> >
> > Userspace has discovered the functionality offered by SYS_kcmp and has
> > started to depend upon it. In particular, Mesa uses SYS_kcmp for
> > os_same_file_description() in order to identify when two fd (e.g. device
> > or dmabuf)
>
> As you rightfully point out, SYS_kcmp is a bit of a two edged sword.
> While you mention the CONFIG issue, there is also a portability aspect
> (mesa runs on more than just linux) and as well as sandbox filtering
> of the extra syscall.
>
> Last time I looked, the latter was still an issue and mesa was using
> SYS_kcmp to compare device node fds.
> A far shorter and more portable solution is possible, so let me
> prepare a Mesa patch.

Comparing two DMA-BUFs can be done with their inode number, I think.

Comparing two device FDs is more subtle, because of GEM handle
ref'counting. You sometimes really want to check whether two FDs are
backed by the same file *description*. See [1] for details.

[1]: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/110

2021-02-12 14:05:02

by Michel Dänzer

[permalink] [raw]
Subject: Re: [PATCH v3] kcmp: Support selection of SYS_kcmp without CHECKPOINT_RESTORE

On 2021-02-12 1:57 p.m., Emil Velikov wrote:
> On Fri, 5 Feb 2021 at 22:01, Chris Wilson <[email protected]> wrote:
>>
>> Userspace has discovered the functionality offered by SYS_kcmp and has
>> started to depend upon it. In particular, Mesa uses SYS_kcmp for
>> os_same_file_description() in order to identify when two fd (e.g. device
>> or dmabuf)
>
> As you rightfully point out, SYS_kcmp is a bit of a two edged sword.
> While you mention the CONFIG issue, there is also a portability aspect
> (mesa runs on more than just linux) and as well as sandbox filtering
> of the extra syscall.
>
> Last time I looked, the latter was still an issue and mesa was using
> SYS_kcmp to compare device node fds.
> A far shorter and more portable solution is possible, so let me
> prepare a Mesa patch.

Make sure to read my comments on
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6881 first. :)


--
Earthling Michel Dänzer | https://redhat.com
Libre software enthusiast | Mesa and X developer

2021-02-12 14:09:35

by Emil Velikov

[permalink] [raw]
Subject: Re: [PATCH v3] kcmp: Support selection of SYS_kcmp without CHECKPOINT_RESTORE

On Fri, 12 Feb 2021 at 13:14, Simon Ser <[email protected]> wrote:
>
> On Friday, February 12th, 2021 at 1:57 PM, Emil Velikov <[email protected]> wrote:
>
> > On Fri, 5 Feb 2021 at 22:01, Chris Wilson <[email protected]> wrote:
> > >
> > > Userspace has discovered the functionality offered by SYS_kcmp and has
> > > started to depend upon it. In particular, Mesa uses SYS_kcmp for
> > > os_same_file_description() in order to identify when two fd (e.g. device
> > > or dmabuf)
> >
> > As you rightfully point out, SYS_kcmp is a bit of a two edged sword.
> > While you mention the CONFIG issue, there is also a portability aspect
> > (mesa runs on more than just linux) and as well as sandbox filtering
> > of the extra syscall.
> >
> > Last time I looked, the latter was still an issue and mesa was using
> > SYS_kcmp to compare device node fds.
> > A far shorter and more portable solution is possible, so let me
> > prepare a Mesa patch.
>
> Comparing two DMA-BUFs can be done with their inode number, I think.
>
> Comparing two device FDs is more subtle, because of GEM handle
> ref'counting. You sometimes really want to check whether two FDs are
> backed by the same file *description*. See [1] for details.
>
Thanks for the correction and the reference.
Seems like I've short circuited file description table vs file descriptor.

Emil

2021-02-12 14:13:22

by Emil Velikov

[permalink] [raw]
Subject: Re: [PATCH v3] kcmp: Support selection of SYS_kcmp without CHECKPOINT_RESTORE

On Fri, 12 Feb 2021 at 14:01, Michel Dänzer <[email protected]> wrote:
>
> On 2021-02-12 1:57 p.m., Emil Velikov wrote:
> > On Fri, 5 Feb 2021 at 22:01, Chris Wilson <[email protected]> wrote:
> >>
> >> Userspace has discovered the functionality offered by SYS_kcmp and has
> >> started to depend upon it. In particular, Mesa uses SYS_kcmp for
> >> os_same_file_description() in order to identify when two fd (e.g. device
> >> or dmabuf)
> >
> > As you rightfully point out, SYS_kcmp is a bit of a two edged sword.
> > While you mention the CONFIG issue, there is also a portability aspect
> > (mesa runs on more than just linux) and as well as sandbox filtering
> > of the extra syscall.
> >
> > Last time I looked, the latter was still an issue and mesa was using
> > SYS_kcmp to compare device node fds.
> > A far shorter and more portable solution is possible, so let me
> > prepare a Mesa patch.
>
> Make sure to read my comments on
> https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6881 first. :)
>
Much appreciated. I might have been "slightly" off - pardon for the noise o/

-Emil

2021-02-13 17:55:25

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] kernel: Expose SYS_kcmp by default

Hi!

> Userspace has discovered the functionality offered by SYS_kcmp and has
> started to depend upon it. In particular, Mesa uses SYS_kcmp for
> os_same_file_description() in order to identify when two fd (e.g. device
> or dmabuf) point to the same struct file. Since they depend on it for
> core functionality, lift SYS_kcmp out of the non-default
> CONFIG_CHECKPOINT_RESTORE into the selectable syscall category.

Is it good idea to enable everything because Mesa uses it for file
descriptors?

This is really interesting syscall...

Best regards,
Pavel

--
http://www.livejournal.com/~pavelmachek


Attachments:
(No filename) (630.00 B)
signature.asc (201.00 B)
Download all attachments

2021-02-15 09:00:11

by Thomas Zimmermann

[permalink] [raw]
Subject: Re: [PATCH v3] kcmp: Support selection of SYS_kcmp without CHECKPOINT_RESTORE

Hi

Am 05.02.21 um 23:00 schrieb Chris Wilson:
> Userspace has discovered the functionality offered by SYS_kcmp and has
> started to depend upon it. In particular, Mesa uses SYS_kcmp for
> os_same_file_description() in order to identify when two fd (e.g. device
> or dmabuf) point to the same struct file. Since they depend on it for
> core functionality, lift SYS_kcmp out of the non-default
> CONFIG_CHECKPOINT_RESTORE into the selectable syscall category.
>
> Rasmus Villemoes also pointed out that systemd uses SYS_kcmp to
> deduplicate the per-service file descriptor store.

This helps a lot with transactional programming in userspace system
code. So FWIW

Acked-by: Thomas Zimmermann <[email protected]>

>
> Note that some distributions such as Ubuntu are already enabling
> CHECKPOINT_RESTORE in their configs and so, by extension, SYS_kcmp.
>
> References: https://gitlab.freedesktop.org/drm/intel/-/issues/3046
> Signed-off-by: Chris Wilson <[email protected]>
> Cc: Kees Cook <[email protected]>
> Cc: Andy Lutomirski <[email protected]>
> Cc: Will Drewry <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: Dave Airlie <[email protected]>
> Cc: Daniel Vetter <[email protected]>
> Cc: Lucas Stach <[email protected]>
> Cc: Rasmus Villemoes <[email protected]>
> Cc: Cyrill Gorcunov <[email protected]>
> Cc: [email protected]
> Acked-by: Daniel Vetter <[email protected]> # DRM depends on kcmp
> Acked-by: Rasmus Villemoes <[email protected]> # systemd uses kcmp
>
> ---
> v2:
> - Default n.
> - Borrrow help message from man kcmp.
> - Export get_epoll_tfile_raw_ptr() for CONFIG_KCMP
> v3:
> - Select KCMP for CONFIG_DRM
> ---
> drivers/gpu/drm/Kconfig | 3 +++
> fs/eventpoll.c | 4 ++--
> include/linux/eventpoll.h | 2 +-
> init/Kconfig | 11 +++++++++++
> kernel/Makefile | 2 +-
> tools/testing/selftests/seccomp/seccomp_bpf.c | 2 +-
> 6 files changed, 19 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 0973f408d75f..af6c6d214d91 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -15,6 +15,9 @@ menuconfig DRM
> select I2C_ALGOBIT
> select DMA_SHARED_BUFFER
> select SYNC_FILE
> +# gallium uses SYS_kcmp for os_same_file_description() to de-duplicate
> +# device and dmabuf fd. Let's make sure that is available for our userspace.
> + select KCMP
> help
> Kernel-level support for the Direct Rendering Infrastructure (DRI)
> introduced in XFree86 4.0. If you say Y here, you need to select
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index a829af074eb5..3196474cbe24 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
> @@ -979,7 +979,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
> return epir;
> }
>
> -#ifdef CONFIG_CHECKPOINT_RESTORE
> +#ifdef CONFIG_KCMP
> static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff)
> {
> struct rb_node *rbp;
> @@ -1021,7 +1021,7 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
>
> return file_raw;
> }
> -#endif /* CONFIG_CHECKPOINT_RESTORE */
> +#endif /* CONFIG_KCMP */
>
> /**
> * Adds a new entry to the tail of the list in a lockless way, i.e.
> diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
> index 0350393465d4..593322c946e6 100644
> --- a/include/linux/eventpoll.h
> +++ b/include/linux/eventpoll.h
> @@ -18,7 +18,7 @@ struct file;
>
> #ifdef CONFIG_EPOLL
>
> -#ifdef CONFIG_CHECKPOINT_RESTORE
> +#ifdef CONFIG_KCMP
> struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff);
> #endif
>
> diff --git a/init/Kconfig b/init/Kconfig
> index b77c60f8b963..9cc7436b2f73 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1194,6 +1194,7 @@ endif # NAMESPACES
> config CHECKPOINT_RESTORE
> bool "Checkpoint/restore support"
> select PROC_CHILDREN
> + select KCMP
> default n
> help
> Enables additional kernel features in a sake of checkpoint/restore.
> @@ -1737,6 +1738,16 @@ config ARCH_HAS_MEMBARRIER_CALLBACKS
> config ARCH_HAS_MEMBARRIER_SYNC_CORE
> bool
>
> +config KCMP
> + bool "Enable kcmp() system call" if EXPERT
> + help
> + Enable the kernel resource comparison system call. It provides
> + user-space with the ability to compare two processes to see if they
> + share a common resource, such as a file descriptor or even virtual
> + memory space.
> +
> + If unsure, say N.
> +
> config RSEQ
> bool "Enable rseq() system call" if EXPERT
> default y
> diff --git a/kernel/Makefile b/kernel/Makefile
> index aa7368c7eabf..320f1f3941b7 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -51,7 +51,7 @@ obj-y += livepatch/
> obj-y += dma/
> obj-y += entry/
>
> -obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
> +obj-$(CONFIG_KCMP) += kcmp.o
> obj-$(CONFIG_FREEZER) += freezer.o
> obj-$(CONFIG_PROFILING) += profile.o
> obj-$(CONFIG_STACKTRACE) += stacktrace.o
> diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
> index 26c72f2b61b1..1b6c7d33c4ff 100644
> --- a/tools/testing/selftests/seccomp/seccomp_bpf.c
> +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
> @@ -315,7 +315,7 @@ TEST(kcmp)
> ret = __filecmp(getpid(), getpid(), 1, 1);
> EXPECT_EQ(ret, 0);
> if (ret != 0 && errno == ENOSYS)
> - SKIP(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)");
> + SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)");
> }
>
> TEST(mode_strict_support)
>

--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Felix Imendörffer


Attachments:
OpenPGP_signature (855.00 B)
OpenPGP digital signature

2021-02-15 10:26:18

by Lucas Stach

[permalink] [raw]
Subject: Re: [PATCH] kernel: Expose SYS_kcmp by default

Am Samstag, dem 13.02.2021 um 18:40 +0100 schrieb Pavel Machek:
> Hi!
>
> > Userspace has discovered the functionality offered by SYS_kcmp and has
> > started to depend upon it. In particular, Mesa uses SYS_kcmp for
> > os_same_file_description() in order to identify when two fd (e.g. device
> > or dmabuf) point to the same struct file. Since they depend on it for
> > core functionality, lift SYS_kcmp out of the non-default
> > CONFIG_CHECKPOINT_RESTORE into the selectable syscall category.
>
> Is it good idea to enable everything because Mesa uses it for file
> descriptors?
>
> This is really interesting syscall...

As Debian/Ubuntu and Fedora are already shipping with
CONFIG_CHECKPOINT_RESTORE=y in their kernel configs, I don't really see
the need to add further restrictions here. Or this discussion should
have happened a while ago...

Regards,
Lucas

2021-02-16 09:06:22

by Daniel Vetter

[permalink] [raw]
Subject: Re: [PATCH v3] kcmp: Support selection of SYS_kcmp without CHECKPOINT_RESTORE

On Mon, Feb 08, 2021 at 02:12:00PM -0800, Kees Cook wrote:
> On Fri, Feb 05, 2021 at 10:00:12PM +0000, Chris Wilson wrote:
> > Userspace has discovered the functionality offered by SYS_kcmp and has
> > started to depend upon it. In particular, Mesa uses SYS_kcmp for
> > os_same_file_description() in order to identify when two fd (e.g. device
> > or dmabuf) point to the same struct file. Since they depend on it for
> > core functionality, lift SYS_kcmp out of the non-default
> > CONFIG_CHECKPOINT_RESTORE into the selectable syscall category.
> >
> > Rasmus Villemoes also pointed out that systemd uses SYS_kcmp to
> > deduplicate the per-service file descriptor store.
> >
> > Note that some distributions such as Ubuntu are already enabling
> > CHECKPOINT_RESTORE in their configs and so, by extension, SYS_kcmp.
> >
> > References: https://gitlab.freedesktop.org/drm/intel/-/issues/3046
> > Signed-off-by: Chris Wilson <[email protected]>
>
> Thanks!
>
> Reviewed-by: Kees Cook <[email protected]>

Thanks for reviews&patch, I stuffed it into a topic branch and plan to
send it to Linus later this week.

Cheers, Daniel

>
> -Kees
>
> > Cc: Kees Cook <[email protected]>
> > Cc: Andy Lutomirski <[email protected]>
> > Cc: Will Drewry <[email protected]>
> > Cc: Andrew Morton <[email protected]>
> > Cc: Dave Airlie <[email protected]>
> > Cc: Daniel Vetter <[email protected]>
> > Cc: Lucas Stach <[email protected]>
> > Cc: Rasmus Villemoes <[email protected]>
> > Cc: Cyrill Gorcunov <[email protected]>
> > Cc: [email protected]
> > Acked-by: Daniel Vetter <[email protected]> # DRM depends on kcmp
> > Acked-by: Rasmus Villemoes <[email protected]> # systemd uses kcmp
> >
> > ---
> > v2:
> > - Default n.
> > - Borrrow help message from man kcmp.
> > - Export get_epoll_tfile_raw_ptr() for CONFIG_KCMP
> > v3:
> > - Select KCMP for CONFIG_DRM
> > ---
> > drivers/gpu/drm/Kconfig | 3 +++
> > fs/eventpoll.c | 4 ++--
> > include/linux/eventpoll.h | 2 +-
> > init/Kconfig | 11 +++++++++++
> > kernel/Makefile | 2 +-
> > tools/testing/selftests/seccomp/seccomp_bpf.c | 2 +-
> > 6 files changed, 19 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> > index 0973f408d75f..af6c6d214d91 100644
> > --- a/drivers/gpu/drm/Kconfig
> > +++ b/drivers/gpu/drm/Kconfig
> > @@ -15,6 +15,9 @@ menuconfig DRM
> > select I2C_ALGOBIT
> > select DMA_SHARED_BUFFER
> > select SYNC_FILE
> > +# gallium uses SYS_kcmp for os_same_file_description() to de-duplicate
> > +# device and dmabuf fd. Let's make sure that is available for our userspace.
> > + select KCMP
> > help
> > Kernel-level support for the Direct Rendering Infrastructure (DRI)
> > introduced in XFree86 4.0. If you say Y here, you need to select
> > diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> > index a829af074eb5..3196474cbe24 100644
> > --- a/fs/eventpoll.c
> > +++ b/fs/eventpoll.c
> > @@ -979,7 +979,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
> > return epir;
> > }
> >
> > -#ifdef CONFIG_CHECKPOINT_RESTORE
> > +#ifdef CONFIG_KCMP
> > static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff)
> > {
> > struct rb_node *rbp;
> > @@ -1021,7 +1021,7 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
> >
> > return file_raw;
> > }
> > -#endif /* CONFIG_CHECKPOINT_RESTORE */
> > +#endif /* CONFIG_KCMP */
> >
> > /**
> > * Adds a new entry to the tail of the list in a lockless way, i.e.
> > diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
> > index 0350393465d4..593322c946e6 100644
> > --- a/include/linux/eventpoll.h
> > +++ b/include/linux/eventpoll.h
> > @@ -18,7 +18,7 @@ struct file;
> >
> > #ifdef CONFIG_EPOLL
> >
> > -#ifdef CONFIG_CHECKPOINT_RESTORE
> > +#ifdef CONFIG_KCMP
> > struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff);
> > #endif
> >
> > diff --git a/init/Kconfig b/init/Kconfig
> > index b77c60f8b963..9cc7436b2f73 100644
> > --- a/init/Kconfig
> > +++ b/init/Kconfig
> > @@ -1194,6 +1194,7 @@ endif # NAMESPACES
> > config CHECKPOINT_RESTORE
> > bool "Checkpoint/restore support"
> > select PROC_CHILDREN
> > + select KCMP
> > default n
> > help
> > Enables additional kernel features in a sake of checkpoint/restore.
> > @@ -1737,6 +1738,16 @@ config ARCH_HAS_MEMBARRIER_CALLBACKS
> > config ARCH_HAS_MEMBARRIER_SYNC_CORE
> > bool
> >
> > +config KCMP
> > + bool "Enable kcmp() system call" if EXPERT
> > + help
> > + Enable the kernel resource comparison system call. It provides
> > + user-space with the ability to compare two processes to see if they
> > + share a common resource, such as a file descriptor or even virtual
> > + memory space.
> > +
> > + If unsure, say N.
> > +
> > config RSEQ
> > bool "Enable rseq() system call" if EXPERT
> > default y
> > diff --git a/kernel/Makefile b/kernel/Makefile
> > index aa7368c7eabf..320f1f3941b7 100644
> > --- a/kernel/Makefile
> > +++ b/kernel/Makefile
> > @@ -51,7 +51,7 @@ obj-y += livepatch/
> > obj-y += dma/
> > obj-y += entry/
> >
> > -obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
> > +obj-$(CONFIG_KCMP) += kcmp.o
> > obj-$(CONFIG_FREEZER) += freezer.o
> > obj-$(CONFIG_PROFILING) += profile.o
> > obj-$(CONFIG_STACKTRACE) += stacktrace.o
> > diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
> > index 26c72f2b61b1..1b6c7d33c4ff 100644
> > --- a/tools/testing/selftests/seccomp/seccomp_bpf.c
> > +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
> > @@ -315,7 +315,7 @@ TEST(kcmp)
> > ret = __filecmp(getpid(), getpid(), 1, 1);
> > EXPECT_EQ(ret, 0);
> > if (ret != 0 && errno == ENOSYS)
> > - SKIP(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)");
> > + SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)");
> > }
> >
> > TEST(mode_strict_support)
> > --
> > 2.20.1
> >
>
> --
> Kees Cook

--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch