2023-05-30 19:20:10

by ~akihirosuda

[permalink] [raw]
Subject: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"

This sysctl limits groups who can create a new userns without
CAP_SYS_ADMIN in the current userns, so as to mitigate potential kernel
vulnerabilities around userns.

The sysctl value format is same as "net.ipv4.ping_group_range".

To disable creating new unprivileged userns, set the sysctl value to "1
0" in the initial userns.

To allow everyone to create new userns, set the sysctl value to "0
4294967294". This is the default value.

This sysctl replaces "kernel.unprivileged_userns_clone" that is found in
Ubuntu [1] and Debian GNU/Linux.

Link: https://git.launchpad.net/~ubuntu-
kernel/ubuntu/+source/linux/+git/jammy/commit?id=3422764 [1]

Signed-off-by: Akihiro Suda <[email protected]>

Akihiro Suda (3):
net/ipv4: split group_range logic to kernel/group_range.c
group_range: allow GID from 2147483648 to 4294967294
userns: add sysctl "kernel.userns_group_range"

include/linux/group_range.h | 18 +++++
include/linux/user_namespace.h | 5 ++
include/net/netns/ipv4.h | 9 +--
include/net/ping.h | 6 --
kernel/Makefile | 2 +-
kernel/fork.c | 24 +++++++
kernel/group_range.c | 123 +++++++++++++++++++++++++++++++++
kernel/sysctl.c | 30 ++++++++
kernel/user.c | 9 +++
net/ipv4/ping.c | 39 +----------
net/ipv4/sysctl_net_ipv4.c | 56 ++-------------
11 files changed, 219 insertions(+), 102 deletions(-)
create mode 100644 include/linux/group_range.h
create mode 100644 kernel/group_range.c

--
2.38.4


2023-05-30 19:23:22

by ~akihirosuda

[permalink] [raw]
Subject: [PATCH linux 3/3] userns: add sysctl "kernel.userns_group_range"

From: Akihiro Suda <[email protected]>

This sysctl limits groups who can create a new userns without CAP_SYS_ADMIN
in the current userns, so as to mitigate potential kernel vulnerabilities
around userns.

The sysctl value format is same as "net.ipv4.ping_group_range".

To disable creating new unprivileged userns, set the sysctl value to "1 0"
in the initial userns.

To allow everyone to create new userns, set the sysctl value to
"0 4294967294". This is the default value.

This sysctl replaces "kernel.unprivileged_userns_clone" that is found in
Ubuntu [1] and Debian GNU/Linux.

Link: https://git.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux/+git/jammy/commit?id=3422764 [1]

Signed-off-by: Akihiro Suda <[email protected]>
---
include/linux/user_namespace.h | 5 +++++
kernel/fork.c | 24 ++++++++++++++++++++++++
kernel/sysctl.c | 30 ++++++++++++++++++++++++++++++
kernel/user.c | 9 +++++++++
4 files changed, 68 insertions(+)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 45f09bec02c4..b8b5a982f818 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -11,6 +11,10 @@
#include <linux/sysctl.h>
#include <linux/err.h>

+#ifdef CONFIG_SYSCTL
+#include <linux/group_range.h>
+#endif
+
#define UID_GID_MAP_MAX_BASE_EXTENTS 5
#define UID_GID_MAP_MAX_EXTENTS 340

@@ -98,6 +102,7 @@ struct user_namespace {
#ifdef CONFIG_SYSCTL
struct ctl_table_set set;
struct ctl_table_header *sysctls;
+ struct group_range group_range;
#endif
struct ucounts *ucounts;
long ucount_max[UCOUNT_COUNTS];
diff --git a/kernel/fork.c b/kernel/fork.c
index ed4e01daccaa..1e8debdf0896 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -111,6 +111,10 @@
#define CREATE_TRACE_POINTS
#include <trace/events/task.h>

+#ifdef CONFIG_USER_NS
+#include <linux/group_range.h>
+#endif
+
/*
* Minimum number of threads to boot the kernel
*/
@@ -2235,6 +2239,16 @@ static void rv_task_fork(struct task_struct *p)
#define rv_task_fork(p) do {} while (0)
#endif

+#ifdef CONFIG_USER_NS
+static bool userns_clone_is_allowed(void)
+{
+ if (capable(CAP_SYS_ADMIN))
+ return true;
+
+ return check_current_group_range(&current_user_ns()->group_range);
+}
+#endif
+
/*
* This creates a new process as a copy of the old one,
* but does not actually start it yet.
@@ -2266,6 +2280,11 @@ __latent_entropy struct task_struct *copy_process(
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
return ERR_PTR(-EINVAL);

+#ifdef CONFIG_USER_NS
+ if ((clone_flags & CLONE_NEWUSER) && !userns_clone_is_allowed())
+ return ERR_PTR(-EPERM);
+#endif
+
/*
* Thread groups must share signals as well, and detached threads
* can only be started up within the thread group.
@@ -3340,6 +3359,11 @@ static int check_unshare_flags(unsigned long unshare_flags)
return -EINVAL;
}

+#ifdef CONFIG_USER_NS
+ if ((unshare_flags & CLONE_NEWUSER) && !userns_clone_is_allowed())
+ return -EPERM;
+#endif
+
return 0;
}

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index bfe53e835524..ace7bf0fe9fc 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -80,6 +80,9 @@
#ifdef CONFIG_RT_MUTEXES
#include <linux/rtmutex.h>
#endif
+#ifdef CONFIG_USER_NS
+#include <linux/group_range.h>
+#endif

/* shared constants to be used in various sysctls */
const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
@@ -1615,6 +1618,24 @@ int proc_do_static_key(struct ctl_table *table, int write,
return ret;
}

+#ifdef CONFIG_USER_NS
+static struct group_range *userns_group_range_func(struct ctl_table *table)
+{
+ struct user_namespace *user_ns =
+ container_of(table->data, struct user_namespace, group_range.range);
+
+ return &user_ns->group_range;
+}
+
+/* Validate changes from /proc interface. */
+static int userns_group_range(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ return sysctl_group_range(userns_group_range_func, table,
+ write, buffer, lenp, ppos);
+}
+#endif
+
static struct ctl_table kern_table[] = {
{
.procname = "panic",
@@ -1623,6 +1644,15 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_USER_NS
+ {
+ .procname = "userns_group_range",
+ .data = &init_user_ns.group_range.range,
+ .maxlen = sizeof(init_user_ns.group_range.range),
+ .mode = 0644,
+ .proc_handler = userns_group_range,
+ },
+#endif
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
diff --git a/kernel/user.c b/kernel/user.c
index d667debeafd6..4704c93f62f9 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -20,6 +20,10 @@
#include <linux/user_namespace.h>
#include <linux/proc_ns.h>

+#ifdef CONFIG_SYSCTL
+#include <linux/group_range.h>
+#endif
+
/*
* userns count is 1 for root user, 1 for init_uts_ns,
* and 1 for... ?
@@ -67,6 +71,11 @@ struct user_namespace init_user_ns = {
.keyring_name_list = LIST_HEAD_INIT(init_user_ns.keyring_name_list),
.keyring_sem = __RWSEM_INITIALIZER(init_user_ns.keyring_sem),
#endif
+#ifdef CONFIG_SYSCTL
+ .group_range = {
+ .range = {0, ((gid_t)~0U) - 1},
+ },
+#endif
};
EXPORT_SYMBOL_GPL(init_user_ns);

--
2.38.4

2023-05-30 22:30:37

by Paul Moore

[permalink] [raw]
Subject: Re: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"

On Tue, May 30, 2023 at 2:50 PM ~akihirosuda <[email protected]> wrote:
>
> This sysctl limits groups who can create a new userns without
> CAP_SYS_ADMIN in the current userns, so as to mitigate potential kernel
> vulnerabilities around userns.
>
> The sysctl value format is same as "net.ipv4.ping_group_range".
>
> To disable creating new unprivileged userns, set the sysctl value to "1
> 0" in the initial userns.
>
> To allow everyone to create new userns, set the sysctl value to "0
> 4294967294". This is the default value.
>
> This sysctl replaces "kernel.unprivileged_userns_clone" that is found in
> Ubuntu [1] and Debian GNU/Linux.
>
> Link: https://git.launchpad.net/~ubuntu-
> kernel/ubuntu/+source/linux/+git/jammy/commit?id=3422764 [1]

Given the challenges around adding access controls to userns
operations, have you considered using the LSM support that was added
upstream last year? The relevant LSM hook can be found in commit
7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()"),
and although only SELinux currently provides an access control
implementation, there is no reason you couldn't add support for your
favorite LSM, or even just a simple BPF LSM to enforce the group
controls as you've described them here.

> Akihiro Suda (3):
> net/ipv4: split group_range logic to kernel/group_range.c
> group_range: allow GID from 2147483648 to 4294967294
> userns: add sysctl "kernel.userns_group_range"
>
> include/linux/group_range.h | 18 +++++
> include/linux/user_namespace.h | 5 ++
> include/net/netns/ipv4.h | 9 +--
> include/net/ping.h | 6 --
> kernel/Makefile | 2 +-
> kernel/fork.c | 24 +++++++
> kernel/group_range.c | 123 +++++++++++++++++++++++++++++++++
> kernel/sysctl.c | 30 ++++++++
> kernel/user.c | 9 +++
> net/ipv4/ping.c | 39 +----------
> net/ipv4/sysctl_net_ipv4.c | 56 ++-------------
> 11 files changed, 219 insertions(+), 102 deletions(-)
> create mode 100644 include/linux/group_range.h
> create mode 100644 kernel/group_range.c

--
paul-moore.com

2023-05-31 04:41:20

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH linux 3/3] userns: add sysctl "kernel.userns_group_range"

Hi ~akihirosuda,

kernel test robot noticed the following build warnings:

[auto build test WARNING on linux/master]

url: https://github.com/intel-lab-lkp/linux/commits/akihirosuda/group_range-allow-GID-from-2147483648-to-4294967294/20230531-030041
base: linux/master
patch link: https://lore.kernel.org/r/168547265011.24337.4306067683997517082-3%40git.sr.ht
patch subject: [PATCH linux 3/3] userns: add sysctl "kernel.userns_group_range"
config: um-x86_64_defconfig (https://download.01.org/0day-ci/archive/20230531/[email protected]/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/7d893a64ad74171c7ad9aa2296da7bc4214bdf37
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review akihirosuda/group_range-allow-GID-from-2147483648-to-4294967294/20230531-030041
git checkout 7d893a64ad74171c7ad9aa2296da7bc4214bdf37
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=um SUBARCH=x86_64 olddefconfig
make W=1 O=build_dir ARCH=um SUBARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All warnings (new ones prefixed by >>):

>> kernel/user.c:31:38: warning: missing braces around initializer [-Wmissing-braces]
31 | struct user_namespace init_user_ns = {
| ^
......
40 | },
| }
......
50 | },
| }
......
60 | },
| }
61 | },
62 | .ns.count = REFCOUNT_INIT(3),
| }
......
65 | .ns.inum = PROC_USER_INIT_INO,
| }
......
76 | .range = {0, ((gid_t)~0U) - 1},
| {} { }
>> kernel/user.c:31:38: warning: missing braces around initializer [-Wmissing-braces]
31 | struct user_namespace init_user_ns = {
| ^
......
40 | },
| }
......
50 | },
| }
......
60 | },
| }
61 | },
62 | .ns.count = REFCOUNT_INIT(3),
| }
......
65 | .ns.inum = PROC_USER_INIT_INO,
| }
......
76 | .range = {0, ((gid_t)~0U) - 1},
| {} { }
>> kernel/user.c:31:38: warning: missing braces around initializer [-Wmissing-braces]
31 | struct user_namespace init_user_ns = {
| ^
......
40 | },
| }
......
50 | },
| }
......
60 | },
| }
61 | },
62 | .ns.count = REFCOUNT_INIT(3),
| }
......
65 | .ns.inum = PROC_USER_INIT_INO,
| }
......
76 | .range = {0, ((gid_t)~0U) - 1},
| {} { }


vim +31 kernel/user.c

7d893a64ad7417 Akihiro Suda 2023-05-30 26
59607db367c57f Serge E. Hallyn 2011-03-23 27 /*
59607db367c57f Serge E. Hallyn 2011-03-23 28 * userns count is 1 for root user, 1 for init_uts_ns,
59607db367c57f Serge E. Hallyn 2011-03-23 29 * and 1 for... ?
59607db367c57f Serge E. Hallyn 2011-03-23 30 */
aee16ce73c71a2 Pavel Emelyanov 2008-02-08 @31 struct user_namespace init_user_ns = {
22d917d80e8428 Eric W. Biederman 2011-11-17 32 .uid_map = {
22d917d80e8428 Eric W. Biederman 2011-11-17 33 .nr_extents = 1,
aa4bf44dc851c6 Christian Brauner 2017-10-25 34 {
22d917d80e8428 Eric W. Biederman 2011-11-17 35 .extent[0] = {
22d917d80e8428 Eric W. Biederman 2011-11-17 36 .first = 0,
22d917d80e8428 Eric W. Biederman 2011-11-17 37 .lower_first = 0,
4b06a81f1daee6 Eric W. Biederman 2012-05-19 38 .count = 4294967295U,
22d917d80e8428 Eric W. Biederman 2011-11-17 39 },
22d917d80e8428 Eric W. Biederman 2011-11-17 40 },
aa4bf44dc851c6 Christian Brauner 2017-10-25 41 },
22d917d80e8428 Eric W. Biederman 2011-11-17 42 .gid_map = {
22d917d80e8428 Eric W. Biederman 2011-11-17 43 .nr_extents = 1,
aa4bf44dc851c6 Christian Brauner 2017-10-25 44 {
22d917d80e8428 Eric W. Biederman 2011-11-17 45 .extent[0] = {
22d917d80e8428 Eric W. Biederman 2011-11-17 46 .first = 0,
22d917d80e8428 Eric W. Biederman 2011-11-17 47 .lower_first = 0,
4b06a81f1daee6 Eric W. Biederman 2012-05-19 48 .count = 4294967295U,
22d917d80e8428 Eric W. Biederman 2011-11-17 49 },
22d917d80e8428 Eric W. Biederman 2011-11-17 50 },
aa4bf44dc851c6 Christian Brauner 2017-10-25 51 },
f76d207a66c3a5 Eric W. Biederman 2012-08-30 52 .projid_map = {
f76d207a66c3a5 Eric W. Biederman 2012-08-30 53 .nr_extents = 1,
aa4bf44dc851c6 Christian Brauner 2017-10-25 54 {
f76d207a66c3a5 Eric W. Biederman 2012-08-30 55 .extent[0] = {
f76d207a66c3a5 Eric W. Biederman 2012-08-30 56 .first = 0,
f76d207a66c3a5 Eric W. Biederman 2012-08-30 57 .lower_first = 0,
f76d207a66c3a5 Eric W. Biederman 2012-08-30 58 .count = 4294967295U,
f76d207a66c3a5 Eric W. Biederman 2012-08-30 59 },
f76d207a66c3a5 Eric W. Biederman 2012-08-30 60 },
aa4bf44dc851c6 Christian Brauner 2017-10-25 61 },
265cbd62e034cb Kirill Tkhai 2020-08-03 62 .ns.count = REFCOUNT_INIT(3),
783291e6900292 Eric W. Biederman 2011-11-17 63 .owner = GLOBAL_ROOT_UID,
783291e6900292 Eric W. Biederman 2011-11-17 64 .group = GLOBAL_ROOT_GID,
435d5f4bb2ccba Al Viro 2014-10-31 65 .ns.inum = PROC_USER_INIT_INO,
33c429405a2c8d Al Viro 2014-11-01 66 #ifdef CONFIG_USER_NS
33c429405a2c8d Al Viro 2014-11-01 67 .ns.ops = &userns_operations,
33c429405a2c8d Al Viro 2014-11-01 68 #endif
9cc46516ddf497 Eric W. Biederman 2014-12-02 69 .flags = USERNS_INIT_FLAGS,
b206f281d0ee14 David Howells 2019-06-26 70 #ifdef CONFIG_KEYS
b206f281d0ee14 David Howells 2019-06-26 71 .keyring_name_list = LIST_HEAD_INIT(init_user_ns.keyring_name_list),
0f44e4d976f96c David Howells 2019-06-26 72 .keyring_sem = __RWSEM_INITIALIZER(init_user_ns.keyring_sem),
f36f8c75ae2e7d David Howells 2013-09-24 73 #endif
7d893a64ad7417 Akihiro Suda 2023-05-30 74 #ifdef CONFIG_SYSCTL
7d893a64ad7417 Akihiro Suda 2023-05-30 75 .group_range = {
7d893a64ad7417 Akihiro Suda 2023-05-30 76 .range = {0, ((gid_t)~0U) - 1},
7d893a64ad7417 Akihiro Suda 2023-05-30 77 },
7d893a64ad7417 Akihiro Suda 2023-05-30 78 #endif
aee16ce73c71a2 Pavel Emelyanov 2008-02-08 79 };
aee16ce73c71a2 Pavel Emelyanov 2008-02-08 80 EXPORT_SYMBOL_GPL(init_user_ns);
aee16ce73c71a2 Pavel Emelyanov 2008-02-08 81

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2023-05-31 08:05:50

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"

On Tue, May 30, 2023 at 05:58:48PM -0400, Paul Moore wrote:
> On Tue, May 30, 2023 at 2:50 PM ~akihirosuda <[email protected]> wrote:
> >
> > This sysctl limits groups who can create a new userns without
> > CAP_SYS_ADMIN in the current userns, so as to mitigate potential kernel
> > vulnerabilities around userns.
> >
> > The sysctl value format is same as "net.ipv4.ping_group_range".
> >
> > To disable creating new unprivileged userns, set the sysctl value to "1
> > 0" in the initial userns.
> >
> > To allow everyone to create new userns, set the sysctl value to "0
> > 4294967294". This is the default value.
> >
> > This sysctl replaces "kernel.unprivileged_userns_clone" that is found in
> > Ubuntu [1] and Debian GNU/Linux.
> >
> > Link: https://git.launchpad.net/~ubuntu-
> > kernel/ubuntu/+source/linux/+git/jammy/commit?id=3422764 [1]
>
> Given the challenges around adding access controls to userns
> operations, have you considered using the LSM support that was added
> upstream last year? The relevant LSM hook can be found in commit
> 7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()"),
> and although only SELinux currently provides an access control
> implementation, there is no reason you couldn't add support for your
> favorite LSM, or even just a simple BPF LSM to enforce the group
> controls as you've described them here.

Yes. Please, no more sysctls...

2023-06-02 00:20:14

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"

~akihirosuda <[email protected]> writes:

> This sysctl limits groups who can create a new userns without
> CAP_SYS_ADMIN in the current userns, so as to mitigate potential kernel
> vulnerabilities around userns.
>
> The sysctl value format is same as "net.ipv4.ping_group_range".
>
> To disable creating new unprivileged userns, set the sysctl value to "1
> 0" in the initial userns.
>
> To allow everyone to create new userns, set the sysctl value to "0
> 4294967294". This is the default value.
>
> This sysctl replaces "kernel.unprivileged_userns_clone" that is found in
> Ubuntu [1] and Debian GNU/Linux.
>
> Link: https://git.launchpad.net/~ubuntu-
> kernel/ubuntu/+source/linux/+git/jammy/commit?id=3422764 [1]
>
> Signed-off-by: Akihiro Suda <[email protected]>

How does this functionally differ from what already exists
user.max_user_namespaces?

Given that setns exists I don't see limiting creation of user namespaces
by group being meaningful, if your goal is to reduce the attack surface
of the kernel to mitigate potential kernel vulnerabilities.

How does this functionality interact with the use of setgroups in a user
namespace?

What is the value of a group_range inside of a newly created user
namespace? How does that work to maintain the policy you are trying to
implement?

Eric

2023-06-02 00:37:25

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"

Paul Moore <[email protected]> writes:

> On Tue, May 30, 2023 at 2:50 PM ~akihirosuda <[email protected]> wrote:
>>
>> This sysctl limits groups who can create a new userns without
>> CAP_SYS_ADMIN in the current userns, so as to mitigate potential kernel
>> vulnerabilities around userns.
>>
>> The sysctl value format is same as "net.ipv4.ping_group_range".
>>
>> To disable creating new unprivileged userns, set the sysctl value to "1
>> 0" in the initial userns.
>>
>> To allow everyone to create new userns, set the sysctl value to "0
>> 4294967294". This is the default value.
>>
>> This sysctl replaces "kernel.unprivileged_userns_clone" that is found in
>> Ubuntu [1] and Debian GNU/Linux.
>>
>> Link: https://git.launchpad.net/~ubuntu-
>> kernel/ubuntu/+source/linux/+git/jammy/commit?id=3422764 [1]
>
> Given the challenges around adding access controls to userns
> operations, have you considered using the LSM support that was added
> upstream last year? The relevant LSM hook can be found in commit
> 7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()"),


Paul how have you handled the real world regression I reported against
chromium?

Paul are you aware that the LSM hook can not be used to achieve the
objective of this patchset?

> and although only SELinux currently provides an access control
> implementation, there is no reason you couldn't add support for your
> favorite LSM, or even just a simple BPF LSM to enforce the group
> controls as you've described them here.

Is there a publicly available SELinux policy that uses that LSM hook?

Eric

2023-06-02 01:12:05

by Paul Moore

[permalink] [raw]
Subject: Re: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"

On Thu, Jun 1, 2023 at 8:14 PM Eric W. Biederman <[email protected]> wrote:
> Paul Moore <[email protected]> writes:
> > On Tue, May 30, 2023 at 2:50 PM ~akihirosuda <[email protected]> wrote:
> >>
> >> This sysctl limits groups who can create a new userns without
> >> CAP_SYS_ADMIN in the current userns, so as to mitigate potential kernel
> >> vulnerabilities around userns.
> >>
> >> The sysctl value format is same as "net.ipv4.ping_group_range".
> >>
> >> To disable creating new unprivileged userns, set the sysctl value to "1
> >> 0" in the initial userns.
> >>
> >> To allow everyone to create new userns, set the sysctl value to "0
> >> 4294967294". This is the default value.
> >>
> >> This sysctl replaces "kernel.unprivileged_userns_clone" that is found in
> >> Ubuntu [1] and Debian GNU/Linux.
> >>
> >> Link: https://git.launchpad.net/~ubuntu-
> >> kernel/ubuntu/+source/linux/+git/jammy/commit?id=3422764 [1]
> >
> > Given the challenges around adding access controls to userns
> > operations, have you considered using the LSM support that was added
> > upstream last year? The relevant LSM hook can be found in commit
> > 7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()"),
>
> Paul how have you handled the real world regression I reported against
> chromium?

I don't track chromium development.

> Paul are you aware that the LSM hook can not be used to achieve the
> objective of this patchset?

/me shrugs

I thought one could look into a cred struct using a BPF LSM, which
would allow one to make access control decisions based on group ID,
but I will be the first to admit I'm not a BPF LSM expert.
Regardless, one could introduce a group ID check into a LSM if they
were so inclined.

I also find it slightly amusing that you are arguing against my reply
that was discussing *not* adding another userns control point; of all
people I thought you would be supportive of this ... /me shrugs again.

> > and although only SELinux currently provides an access control
> > implementation, there is no reason you couldn't add support for your
> > favorite LSM, or even just a simple BPF LSM to enforce the group
> > controls as you've described them here.
>
> Is there a publicly available SELinux policy that uses that LSM hook?

I have no idea, I don't track all of the publicly available SELinux
policies because frankly it doesn't matter; the SELinux feature
exists, and it is my role to support and maintain it. There are
LSM/SELinux features which are not widely exercised in general purpose
SELinux policies for various reasons, but *are* used in specialized
environments that are not often discussed on public mailing lists.

--
paul-moore.com

2023-06-02 01:48:13

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"

Paul Moore <[email protected]> writes:

> On Thu, Jun 1, 2023 at 8:14 PM Eric W. Biederman <[email protected]> wrote:
>> Paul Moore <[email protected]> writes:
>> >
>> > Given the challenges around adding access controls to userns
>> > operations, have you considered using the LSM support that was added
>> > upstream last year? The relevant LSM hook can be found in commit
>> > 7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()"),
>>
>> Paul how have you handled the real world regression I reported against
>> chromium?
>
> I don't track chromium development.

You have chosen to be the maintainer and I reported it to you.

>> Paul are you aware that the LSM hook can not be used to achieve the
>> objective of this patchset?
>
> /me shrugs
>

[snip parts about performing a group id check]

The LSM hook you added does not have the technical capability to reduce
the attack surface to mitigate bugs in the kernel. It is the
ineffectiveness of the hook not the permission check that I was
referring to.

Eric

2023-06-02 15:21:24

by Paul Moore

[permalink] [raw]
Subject: Re: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"

On Thu, Jun 1, 2023 at 9:41 PM Eric W. Biederman <[email protected]> wrote:
> Paul Moore <[email protected]> writes:
> > On Thu, Jun 1, 2023 at 8:14 PM Eric W. Biederman <[email protected]> wrote:
> >> Paul Moore <[email protected]> writes:
> >> >
> >> > Given the challenges around adding access controls to userns
> >> > operations, have you considered using the LSM support that was added
> >> > upstream last year? The relevant LSM hook can be found in commit
> >> > 7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()"),
> >>
> >> Paul how have you handled the real world regression I reported against
> >> chromium?
> >
> > I don't track chromium development.
>
> You have chosen to be the maintainer and I reported it to you.

I just dug through all of the mail I've received from you over the
past two (?) years, as well as checking the LSM archive on lore and I
don't see any bug reports from you directed at the upstream LSM or
SELinux code ... perhaps I missed something, do you have a pointer?

Also, for the sake of clarification, I do not maintain any part of
Chromium or Chrome OS. I do maintain the upstream LSM, SELinux,
audit, and labeled networking subsystems in the Linux Kernel as well
as a couple of userspace packages.

> >> Paul are you aware that the LSM hook can not be used to achieve the
> >> objective of this patchset?
> >
> > /me shrugs
>
> [snip parts about performing a group id check]

My comments here were only discussing the possibility of performing a
group ID based access control check; I made no claims about the
desirability of such a check, and I have no interest in rehashing our
old debates.

--
paul-moore.com

2023-06-02 21:06:39

by Akihiro Suda

[permalink] [raw]
Subject: Re: [PATCH linux 0/3] [PATCH] userns: add sysctl "kernel.userns_group_range"

Thank you all for feedbacks,

> (Paul)

> Given the challenges around adding access controls to userns
> operations, have you considered using the LSM support that was added
> upstream last year?

I'll consider this.

> The relevant LSM hook can be found in commit
> 7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()"),
> and although only SELinux currently provides an access control
> implementation, there is no reason you couldn't add support for your
> favorite LSM, or even just a simple BPF LSM to enforce the group
> controls as you've described them here.

My intent is to provide an universal knob that works for both SELinux
distros and AppArmor distros.
So I guess I should try to use BPF LSM (and find out how its end-user
UX in the userspace can be simplified just like sysctl).

---
> (Christian)

> Yes. Please, no more sysctls...

I'll try to find another way, such as (BPF) LSM.

---
> (Eric)

> How does this functionally differ from what already exists
> user.max_user_namespaces?

My patch is not about the numbers of the UserNS, but about who is
permitted to create UserNS,
which can be a potential attack surface to pwn the root in the initial UserNS.

> Given that setns exists I don't see limiting creation of user namespaces
> by group being meaningful, if your goal is to reduce the attack surface
> of the kernel to mitigate potential kernel vulnerabilities.

Yes, that's my goal.
The intent is to allow creating UserNS only for (semi-trusted) human
users who need to run unprivileged (aka rootless) containers.
Creating UserNS is expected to be prohibited for system daemon
accounts and human users who do not need (or who are not trusted
enough) to run containers.

This configuration should be more secure than just allowing everybody
to run unprivileged (aka rootless) containers, and also more secure
than just disabling UserNS and running everything as the root.

> How does this functionality interact with the use of setgroups in a user
> namespace?
>
> What is the value of a group_range inside of a newly created user
> namespace? How does that work to maintain the policy you are trying to
> implement?

In a child UserNS, the group_range file is expected to use the mapped
IDs, not the initial UserNS IDs.
(So, the range can't be just initialized with `.range = {0,
((gid_t)~0U) - 1}`. My patch v1 is wrong.)

> Paul are you aware that the LSM hook can not be used to achieve the
> objective of this patchset?

What would be an obstacle to using an LSM hook for this? (with an
addition of GID checks)

2023年6月2日(金) 23:50 Paul Moore <[email protected]>:
>
> On Thu, Jun 1, 2023 at 9:41 PM Eric W. Biederman <[email protected]> wrote:
> > Paul Moore <[email protected]> writes:
> > > On Thu, Jun 1, 2023 at 8:14 PM Eric W. Biederman <[email protected]> wrote:
> > >> Paul Moore <[email protected]> writes:
> > >> >
> > >> > Given the challenges around adding access controls to userns
> > >> > operations, have you considered using the LSM support that was added
> > >> > upstream last year? The relevant LSM hook can be found in commit
> > >> > 7cd4c5c2101c ("security, lsm: Introduce security_create_user_ns()"),
> > >>
> > >> Paul how have you handled the real world regression I reported against
> > >> chromium?
> > >
> > > I don't track chromium development.
> >
> > You have chosen to be the maintainer and I reported it to you.
>
> I just dug through all of the mail I've received from you over the
> past two (?) years, as well as checking the LSM archive on lore and I
> don't see any bug reports from you directed at the upstream LSM or
> SELinux code ... perhaps I missed something, do you have a pointer?
>
> Also, for the sake of clarification, I do not maintain any part of
> Chromium or Chrome OS. I do maintain the upstream LSM, SELinux,
> audit, and labeled networking subsystems in the Linux Kernel as well
> as a couple of userspace packages.
>
> > >> Paul are you aware that the LSM hook can not be used to achieve the
> > >> objective of this patchset?
> > >
> > > /me shrugs
> >
> > [snip parts about performing a group id check]
>
> My comments here were only discussing the possibility of performing a
> group ID based access control check; I made no claims about the
> desirability of such a check, and I have no interest in rehashing our
> old debates.
>
> --
> paul-moore.com