Setup securityfs_ns with symlinks, directories, and files for IMA
namespacing support. The same directory structure that IMA uses on the
host is also created for the namespacing case.
Increment the user namespace's refcount_teardown value by '1' once
securityfs_ns has been successfully setup since the initialization of the
filesystem causes an additional reference to the user namespace to be
taken. The early teardown function will delete the file system and release
the additional reference.
The securityfs_ns file and directory ownerships cannot be set when the
filesystem is setup since at this point the user namespace has not been
configured yet by the user and therefore the ownership mappings are not
available, yet. Therefore, adjust the file and directory ownerships when
an inode's function for determining the permissions of a file or directory
is accessed.
This filesystem can now be mounted as follows:
mount -t securityfs_ns /sys/kernel/security/ /sys/kernel/security/
The following directories, symlinks, and files are then available.
$ ls -l sys/kernel/security/
total 0
lr--r--r--. 1 nobody nobody 0 Nov 27 06:44 ima -> integrity/ima
drwxr-xr-x. 3 nobody nobody 0 Nov 27 06:44 integrity
$ ls -l sys/kernel/security/ima/
total 0
-r--r-----. 1 root root 0 Nov 27 06:44 ascii_runtime_measurements
-r--r-----. 1 root root 0 Nov 27 06:44 binary_runtime_measurements
-rw-------. 1 root root 0 Nov 27 06:44 policy
-r--r-----. 1 root root 0 Nov 27 06:44 runtime_measurements_count
-r--r-----. 1 root root 0 Nov 27 06:44 violations
Signed-off-by: Stefan Berger <[email protected]>
---
include/linux/ima.h | 17 +++
security/integrity/ima/ima.h | 2 +
security/integrity/ima/ima_fs.c | 178 ++++++++++++++++++++++-
security/integrity/ima/ima_init_ima_ns.c | 6 +-
security/integrity/ima/ima_ns.c | 4 +-
5 files changed, 203 insertions(+), 4 deletions(-)
diff --git a/include/linux/ima.h b/include/linux/ima.h
index fe08919df326..a2c5e516f706 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -221,6 +221,18 @@ struct ima_h_table {
struct hlist_head queue[IMA_MEASURE_HTABLE_SIZE];
};
+enum {
+ IMAFS_DENTRY_INTEGRITY_DIR = 0,
+ IMAFS_DENTRY_DIR,
+ IMAFS_DENTRY_SYMLINK,
+ IMAFS_DENTRY_BINARY_RUNTIME_MEASUREMENTS,
+ IMAFS_DENTRY_ASCII_RUNTIME_MEASUREMENTS,
+ IMAFS_DENTRY_RUNTIME_MEASUREMENTS_COUNT,
+ IMAFS_DENTRY_VIOLATIONS,
+ IMAFS_DENTRY_IMA_POLICY,
+ IMAFS_DENTRY_LAST
+};
+
struct ima_namespace {
struct kref kref;
struct user_namespace *user_ns;
@@ -267,6 +279,11 @@ struct ima_namespace {
struct mutex ima_write_mutex;
unsigned long ima_fs_flags;
int valid_policy;
+
+ struct dentry *dentry[IMAFS_DENTRY_LAST];
+ struct vfsmount *mount;
+ int mount_count;
+ bool file_ownership_fixes_done;
};
extern struct ima_namespace init_ima_ns;
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index bb9763cd5fb1..9bcd71bb716c 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -139,6 +139,8 @@ struct ns_status {
/* Internal IMA function definitions */
int ima_init(void);
int ima_fs_init(void);
+int ima_fs_ns_init(struct ima_namespace *ns);
+void ima_fs_ns_free(struct ima_namespace *ns);
int ima_add_template_entry(struct ima_namespace *ns,
struct ima_template_entry *entry, int violation,
const char *op, struct inode *inode,
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 6766bb8262f2..9a14be520268 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -22,6 +22,7 @@
#include <linux/parser.h>
#include <linux/vmalloc.h>
#include <linux/ima.h>
+#include <linux/namei.h>
#include "ima.h"
@@ -436,8 +437,13 @@ static int ima_release_policy(struct inode *inode, struct file *file)
ima_update_policy(ns);
#if !defined(CONFIG_IMA_WRITE_POLICY) && !defined(CONFIG_IMA_READ_POLICY)
- securityfs_remove(ima_policy);
- ima_policy = NULL;
+ if (ns == &init_ima_ns) {
+ securityfs_remove(ima_policy);
+ ima_policy = NULL;
+ } else {
+ securityfs_ns_remove(ns->dentry[IMAFS_DENTRY_POLICY]);
+ ns->dentry[IMAFS_DENTRY_POLICY] = NULL;
+ }
#elif defined(CONFIG_IMA_WRITE_POLICY)
clear_bit(IMA_FS_BUSY, &ns->ima_fs_flags);
#elif defined(CONFIG_IMA_READ_POLICY)
@@ -509,3 +515,171 @@ int __init ima_fs_init(void)
securityfs_remove(ima_policy);
return -1;
}
+
+/*
+ * Fix the ownership (uid/gid) of the dentry's that couldn't be set at the
+ * time of their creation because the user namespace wasn't configured, yet.
+ */
+static void ima_fs_ns_fixup_uid_gid(struct ima_namespace *ns)
+{
+ struct inode *inode;
+ size_t i;
+
+ if (ns->file_ownership_fixes_done ||
+ ns->user_ns->uid_map.nr_extents == 0)
+ return;
+
+ ns->file_ownership_fixes_done = true;
+ for (i = 0; i < IMAFS_DENTRY_LAST; i++) {
+ if (!ns->dentry[i])
+ continue;
+ inode = ns->dentry[i]->d_inode;
+ inode->i_uid = make_kuid(ns->user_ns, 0);
+ inode->i_gid = make_kgid(ns->user_ns, 0);
+ }
+}
+
+/* Fix the permissions when a file is opened */
+int ima_fs_ns_permission(struct user_namespace *mnt_userns, struct inode *inode,
+ int mask)
+{
+ ima_fs_ns_fixup_uid_gid(get_current_ns());
+ return generic_permission(mnt_userns, inode, mask);
+}
+
+const struct inode_operations ima_fs_ns_inode_operations = {
+ .lookup = simple_lookup,
+ .permission = ima_fs_ns_permission,
+};
+
+int ima_fs_ns_init(struct ima_namespace *ns)
+{
+ struct dentry *parent;
+
+ ns->mount = securityfs_ns_create_mount(ns->user_ns);
+ if (IS_ERR(ns->mount)) {
+ ns->mount = NULL;
+ return -1;
+ }
+ ns->mount_count += 1;
+
+ ns->dentry[IMAFS_DENTRY_INTEGRITY_DIR] =
+ securityfs_ns_create_dir("integrity", NULL,
+ &ima_fs_ns_inode_operations,
+ &ns->mount, &ns->mount_count);
+ if (IS_ERR(ns->dentry[IMAFS_DENTRY_INTEGRITY_DIR])) {
+ ns->dentry[IMAFS_DENTRY_INTEGRITY_DIR] = NULL;
+ goto out;
+ }
+
+ ns->dentry[IMAFS_DENTRY_DIR] =
+ securityfs_ns_create_dir("ima", ns->dentry[IMAFS_DENTRY_INTEGRITY_DIR],
+ &ima_fs_ns_inode_operations,
+ &ns->mount, &ns->mount_count);
+ if (IS_ERR(ns->dentry[IMAFS_DENTRY_DIR])) {
+ ns->dentry[IMAFS_DENTRY_DIR] = NULL;
+ goto out;
+ }
+
+ ns->dentry[IMAFS_DENTRY_SYMLINK] =
+ securityfs_ns_create_symlink("ima", NULL, "integrity/ima", NULL,
+ &ns->mount, &ns->mount_count);
+ if (IS_ERR(ns->dentry[IMAFS_DENTRY_SYMLINK])) {
+ ns->dentry[IMAFS_DENTRY_SYMLINK] = NULL;
+ goto out;
+ }
+
+ parent = ns->dentry[IMAFS_DENTRY_DIR];
+ ns->dentry[IMAFS_DENTRY_BINARY_RUNTIME_MEASUREMENTS] =
+ securityfs_ns_create_file("binary_runtime_measurements",
+ S_IRUSR | S_IRGRP, parent, NULL,
+ &ima_measurements_ops,
+ &ima_fs_ns_inode_operations,
+ &ns->mount, &ns->mount_count);
+ if (IS_ERR(ns->dentry[IMAFS_DENTRY_BINARY_RUNTIME_MEASUREMENTS])) {
+ ns->dentry[IMAFS_DENTRY_BINARY_RUNTIME_MEASUREMENTS] = NULL;
+ goto out;
+ }
+
+ ns->dentry[IMAFS_DENTRY_ASCII_RUNTIME_MEASUREMENTS] =
+ securityfs_ns_create_file("ascii_runtime_measurements",
+ S_IRUSR | S_IRGRP, parent, NULL,
+ &ima_ascii_measurements_ops,
+ &ima_fs_ns_inode_operations,
+ &ns->mount, &ns->mount_count);
+ if (IS_ERR(ns->dentry[IMAFS_DENTRY_ASCII_RUNTIME_MEASUREMENTS])) {
+ ns->dentry[IMAFS_DENTRY_ASCII_RUNTIME_MEASUREMENTS] = NULL;
+ goto out;
+ }
+
+ ns->dentry[IMAFS_DENTRY_RUNTIME_MEASUREMENTS_COUNT] =
+ securityfs_ns_create_file("runtime_measurements_count",
+ S_IRUSR | S_IRGRP, parent, NULL,
+ &ima_measurements_count_ops,
+ &ima_fs_ns_inode_operations,
+ &ns->mount, &ns->mount_count);
+ if (IS_ERR(ns->dentry[IMAFS_DENTRY_RUNTIME_MEASUREMENTS_COUNT])) {
+ ns->dentry[IMAFS_DENTRY_RUNTIME_MEASUREMENTS_COUNT] = NULL;
+ goto out;
+ }
+
+ ns->dentry[IMAFS_DENTRY_VIOLATIONS] =
+ securityfs_ns_create_file("violations", S_IRUSR | S_IRGRP,
+ parent, NULL, &ima_htable_violations_ops,
+ &ima_fs_ns_inode_operations,
+ &ns->mount, &ns->mount_count);
+ if (IS_ERR(ns->dentry[IMAFS_DENTRY_VIOLATIONS])) {
+ ns->dentry[IMAFS_DENTRY_VIOLATIONS] = NULL;
+ goto out;
+ }
+
+ ns->dentry[IMAFS_DENTRY_IMA_POLICY] =
+ securityfs_ns_create_file("policy", POLICY_FILE_FLAGS,
+ parent, NULL,
+ &ima_measure_policy_ops,
+ &ima_fs_ns_inode_operations,
+ &ns->mount, &ns->mount_count);
+ if (IS_ERR(ns->dentry[IMAFS_DENTRY_IMA_POLICY])) {
+ ns->dentry[IMAFS_DENTRY_IMA_POLICY] = NULL;
+ goto out;
+ }
+
+ /* Adjust the trigger for user namespace's early teardown of dependent
+ * namespaces. Due to the filesystem there's an additional reference
+ * to the user namespace.
+ */
+ ns->user_ns->refcount_teardown += 1;
+
+ return 0;
+
+out:
+ ima_fs_ns_free(ns);
+
+ return -1;
+}
+
+void ima_fs_ns_free(struct ima_namespace *ns)
+{
+ size_t i;
+
+ for (i = 0; i < IMAFS_DENTRY_LAST; i++) {
+ switch (i) {
+ case IMAFS_DENTRY_DIR:
+ case IMAFS_DENTRY_INTEGRITY_DIR:
+ /* files first */
+ continue;
+ }
+ securityfs_ns_remove(ns->dentry[i], &ns->mount, &ns->mount_count);
+ ns->dentry[i] = NULL;
+ }
+ securityfs_ns_remove(ns->dentry[IMAFS_DENTRY_DIR], &ns->mount, &ns->mount_count);
+ ns->dentry[IMAFS_DENTRY_DIR] = NULL;
+ securityfs_ns_remove(ns->dentry[IMAFS_DENTRY_INTEGRITY_DIR], &ns->mount, &ns->mount_count);
+ ns->dentry[IMAFS_DENTRY_INTEGRITY_DIR] = NULL;
+
+ if (ns->mount) {
+ mntput(ns->mount);
+ ns->mount_count -= 1;
+ }
+ ns->mount = NULL;
+}
diff --git a/security/integrity/ima/ima_init_ima_ns.c b/security/integrity/ima/ima_init_ima_ns.c
index 22ff74e85a5f..86a89502c0c5 100644
--- a/security/integrity/ima/ima_init_ima_ns.c
+++ b/security/integrity/ima/ima_init_ima_ns.c
@@ -20,6 +20,8 @@
int ima_init_namespace(struct ima_namespace *ns)
{
+ int rc = 0;
+
ns->ns_status_tree = RB_ROOT;
rwlock_init(&ns->ns_status_lock);
ns->ns_status_cache = KMEM_CACHE(ns_status, SLAB_PANIC);
@@ -52,8 +54,10 @@ int ima_init_namespace(struct ima_namespace *ns)
mutex_init(&ns->ima_write_mutex);
ns->valid_policy = 1;
ns->ima_fs_flags = 0;
+ if (ns != &init_ima_ns)
+ rc = ima_fs_ns_init(ns);
- return 0;
+ return rc;
}
int __init ima_ns_init(void)
diff --git a/security/integrity/ima/ima_ns.c b/security/integrity/ima/ima_ns.c
index e7ad52b79f99..c687e840441a 100644
--- a/security/integrity/ima/ima_ns.c
+++ b/security/integrity/ima/ima_ns.c
@@ -67,7 +67,9 @@ struct ima_namespace *copy_ima_ns(struct ima_namespace *old_ns,
void ima_ns_userns_early_teardown(struct ima_namespace *ns)
{
-}
+ pr_debug("%s: ns=0x%lx\n", __func__, (unsigned long)ns);
+ ima_fs_ns_free(ns);
+};
EXPORT_SYMBOL(ima_ns_userns_early_teardown);
static void destroy_ima_ns(struct ima_namespace *ns)
--
2.31.1
On Tue, 2021-11-30 at 11:06 -0500, Stefan Berger wrote:
[...]
> +
> +/*
> + * Fix the ownership (uid/gid) of the dentry's that couldn't be set
> at the
> + * time of their creation because the user namespace wasn't
> configured, yet.
> + */
> +static void ima_fs_ns_fixup_uid_gid(struct ima_namespace *ns)
> +{
> + struct inode *inode;
> + size_t i;
> +
> + if (ns->file_ownership_fixes_done ||
> + ns->user_ns->uid_map.nr_extents == 0)
> + return;
> +
> + ns->file_ownership_fixes_done = true;
> + for (i = 0; i < IMAFS_DENTRY_LAST; i++) {
> + if (!ns->dentry[i])
> + continue;
> + inode = ns->dentry[i]->d_inode;
> + inode->i_uid = make_kuid(ns->user_ns, 0);
> + inode->i_gid = make_kgid(ns->user_ns, 0);
> + }
> +}
> +
> +/* Fix the permissions when a file is opened */
> +int ima_fs_ns_permission(struct user_namespace *mnt_userns, struct
> inode *inode,
> + int mask)
> +{
> + ima_fs_ns_fixup_uid_gid(get_current_ns());
> + return generic_permission(mnt_userns, inode, mask);
> +}
> +
> +const struct inode_operations ima_fs_ns_inode_operations = {
> + .lookup = simple_lookup,
> + .permission = ima_fs_ns_permission,
> +};
> +
In theory this uid/gid shifting should have already been done for you
and all of the above code should be unnecessary. What is supposed to
happen is that the mount of securityfs_ns in the new user namespace
should pick up a superblock s_user_ns for that new user namespace. Now
inode_alloc() uses i_uid_write(inode, 0) which maps back through the
s_user_ns to obtain the owner of the user namespace.
What can happen is that if you do the inode allocation before (or even
without) writing to the uid_map file, it maps back through an empty map
and ends up with -1 for i_uid ... is this what you're seeing?
James
On 12/1/21 12:56, James Bottomley wrote:
> On Tue, 2021-11-30 at 11:06 -0500, Stefan Berger wrote:
> [...]
>> +
>> +/*
>> + * Fix the ownership (uid/gid) of the dentry's that couldn't be set
>> at the
>> + * time of their creation because the user namespace wasn't
>> configured, yet.
>> + */
>> +static void ima_fs_ns_fixup_uid_gid(struct ima_namespace *ns)
>> +{
>> + struct inode *inode;
>> + size_t i;
>> +
>> + if (ns->file_ownership_fixes_done ||
>> + ns->user_ns->uid_map.nr_extents == 0)
>> + return;
>> +
>> + ns->file_ownership_fixes_done = true;
>> + for (i = 0; i < IMAFS_DENTRY_LAST; i++) {
>> + if (!ns->dentry[i])
>> + continue;
>> + inode = ns->dentry[i]->d_inode;
>> + inode->i_uid = make_kuid(ns->user_ns, 0);
>> + inode->i_gid = make_kgid(ns->user_ns, 0);
>> + }
>> +}
>> +
>> +/* Fix the permissions when a file is opened */
>> +int ima_fs_ns_permission(struct user_namespace *mnt_userns, struct
>> inode *inode,
>> + int mask)
>> +{
>> + ima_fs_ns_fixup_uid_gid(get_current_ns());
>> + return generic_permission(mnt_userns, inode, mask);
>> +}
>> +
>> +const struct inode_operations ima_fs_ns_inode_operations = {
>> + .lookup = simple_lookup,
>> + .permission = ima_fs_ns_permission,
>> +};
>> +
> In theory this uid/gid shifting should have already been done for you
> and all of the above code should be unnecessary. What is supposed to
> happen is that the mount of securityfs_ns in the new user namespace
> should pick up a superblock s_user_ns for that new user namespace. Now
> inode_alloc() uses i_uid_write(inode, 0) which maps back through the
> s_user_ns to obtain the owner of the user namespace.
>
> What can happen is that if you do the inode allocation before (or even
> without) writing to the uid_map file, it maps back through an empty map
> and ends up with -1 for i_uid ... is this what you're seeing?
I tried this with runc and a user namespace active mapping uid 1000 on
the host to uid 0 in the container. There I run into the problem that
all of the files and directories without the above work-around are
mapped to 'nobody', just like all the files in sysfs in this case are
also mapped to nobody. This code resolved the issue.
sh-5.1# ls -l /sys/
total 0
drwxr-xr-x. 2 nobody nobody 0 Dec 1 18:06 block
drwxr-xr-x. 28 nobody nobody 0 Dec 1 18:06 bus
drwxr-xr-x. 54 nobody nobody 0 Dec 1 18:06 class
drwxr-xr-x. 4 nobody nobody 0 Dec 1 18:06 dev
drwxr-xr-x. 15 nobody nobody 0 Dec 1 18:06 devices
drwxrwxrwt. 2 root root 40 Dec 1 18:06 firmware
drwxr-xr-x. 9 nobody nobody 0 Dec 1 18:06 fs
drwxr-xr-x. 16 nobody nobody 0 Dec 1 18:06 kernel
drwxr-xr-x. 161 nobody nobody 0 Dec 1 18:06 module
drwxr-xr-x. 3 nobody nobody 0 Dec 1 18:06 power
sh-5.1# ls -l /sys/kernel/security/
total 0
lr--r--r--. 1 nobody nobody 0 Dec 1 18:06 ima -> integrity/ima
drwxr-xr-x. 3 nobody nobody 0 Dec 1 18:06 integrity
sh-5.1# ls -l /sys/kernel/security/ima/
total 0
-r--r-----. 1 root root 0 Dec 1 18:06 ascii_runtime_measurements
-r--r-----. 1 root root 0 Dec 1 18:06 binary_runtime_measurements
-rw-------. 1 root root 0 Dec 1 18:06 policy
-r--r-----. 1 root root 0 Dec 1 18:06 runtime_measurements_count
-r--r-----. 1 root root 0 Dec 1 18:06 violations
The nobody's are obviously sufficient to cd into the directories, but
for file accesses I wanted to see root and no changes to permissions.
Stefan
>
> James
>
>
On Wed, 2021-12-01 at 13:11 -0500, Stefan Berger wrote:
> On 12/1/21 12:56, James Bottomley wrote:
[...]
> I tried this with runc and a user namespace active mapping uid 1000
> on the host to uid 0 in the container. There I run into the problem
> that all of the files and directories without the above work-around
> are mapped to 'nobody', just like all the files in sysfs in this case
> are also mapped to nobody. This code resolved the issue.
So I applied your patches with the permission shift commented out and
instrumented inode_alloc() to see where it might be failing and I
actually find it all works as expected for me:
ejb@testdeb:~> unshare -r --user --mount --ima
root@testdeb:~# mount -t securityfs_ns none /sys/kernel/security
root@testdeb:~# ls -l /sys/kernel/security/ima/
total 0
-r--r----- 1 root root 0 Dec 1 19:11 ascii_runtime_measurements
-r--r----- 1 root root 0 Dec 1 19:11 binary_runtime_measurements
-rw------- 1 root root 0 Dec 1 19:11 policy
-r--r----- 1 root root 0 Dec 1 19:11 runtime_measurements_count
-r--r----- 1 root root 0 Dec 1 19:11 violations
I think your problem is something to do with how runc is installing the
uid/gid mappings. If it's installing them after the security_ns inodes
are created then they get the -1 value (because no mappings exist in
s_user_ns). I can even demonstrate this by forcing unshare to enter
the IMA namespace before writing the mapping values and I'll see
"nobody nogroup" above like you do.
I also see the instrumentation telling me that i_write_uid() is mapping
back to 1000 in the former case and -1 in the latter.
James
On 12/1/21 14:21, James Bottomley wrote:
> On Wed, 2021-12-01 at 13:11 -0500, Stefan Berger wrote:
>> On 12/1/21 12:56, James Bottomley wrote:
> [...]
>> I tried this with runc and a user namespace active mapping uid 1000
>> on the host to uid 0 in the container. There I run into the problem
>> that all of the files and directories without the above work-around
>> are mapped to 'nobody', just like all the files in sysfs in this case
>> are also mapped to nobody. This code resolved the issue.
> So I applied your patches with the permission shift commented out and
> instrumented inode_alloc() to see where it might be failing and I
> actually find it all works as expected for me:
>
> ejb@testdeb:~> unshare -r --user --mount --ima
> root@testdeb:~# mount -t securityfs_ns none /sys/kernel/security
> root@testdeb:~# ls -l /sys/kernel/security/ima/
> total 0
> -r--r----- 1 root root 0 Dec 1 19:11 ascii_runtime_measurements
> -r--r----- 1 root root 0 Dec 1 19:11 binary_runtime_measurements
> -rw------- 1 root root 0 Dec 1 19:11 policy
> -r--r----- 1 root root 0 Dec 1 19:11 runtime_measurements_count
> -r--r----- 1 root root 0 Dec 1 19:11 violations
>
> I think your problem is something to do with how runc is installing the
> uid/gid mappings. If it's installing them after the security_ns inodes
> are created then they get the -1 value (because no mappings exist in
> s_user_ns). I can even demonstrate this by forcing unshare to enter
> the IMA namespace before writing the mapping values and I'll see
> "nobody nogroup" above like you do.
I am surprised you get this mapping even after commenting the permission
adjustments... it doesn't work for me when I comment them out:
[stefanb@ima-ns-dev rootfs]$ unshare -r --user --mount
[root@ima-ns-dev rootfs]# mount -t securityfs_ns none /sys/kernel/security/
[root@ima-ns-dev rootfs]# cd /sys/kernel/security/ima/
[root@ima-ns-dev ima]# ls -l
total 0
-r--r-----. 1 nobody nobody 0 Dec 1 15:20 ascii_runtime_measurements
-r--r-----. 1 nobody nobody 0 Dec 1 15:20 binary_runtime_measurements
-rw-------. 1 nobody nobody 0 Dec 1 15:20 policy
-r--r-----. 1 nobody nobody 0 Dec 1 15:20 runtime_measurements_count
-r--r-----. 1 nobody nobody 0 Dec 1 15:20 violations
[root@ima-ns-dev ima]# cat /proc/self/uid_map
0 1000 1
[root@ima-ns-dev ima]# cat /proc/self/gid_map
0 1000 1
The initialization of securityfs and setup of files and directories
happens at the same time as the IMA namespace is created. At this time
there are no user mappings available, so that's why I need to make the
adjustments 'late'.
Stefan
On Wed, 2021-12-01 at 15:25 -0500, Stefan Berger wrote:
> On 12/1/21 14:21, James Bottomley wrote:
> > On Wed, 2021-12-01 at 13:11 -0500, Stefan Berger wrote:
> > > On 12/1/21 12:56, James Bottomley wrote:
> > [...]
> > > I tried this with runc and a user namespace active mapping uid
> > > 1000 on the host to uid 0 in the container. There I run into the
> > > problem that all of the files and directories without the above
> > > work-around are mapped to 'nobody', just like all the files in
> > > sysfs in this case are also mapped to nobody. This code resolved
> > > the issue.
> > So I applied your patches with the permission shift commented out
> > and instrumented inode_alloc() to see where it might be failing and
> > I actually find it all works as expected for me:
> >
> > ejb@testdeb:~> unshare -r --user --mount --ima
> > root@testdeb:~# mount -t securityfs_ns none /sys/kernel/security
> > root@testdeb:~# ls -l /sys/kernel/security/ima/
> > total 0
> > -r--r----- 1 root root 0 Dec 1 19:11 ascii_runtime_measurements
> > -r--r----- 1 root root 0 Dec 1 19:11 binary_runtime_measurements
> > -rw------- 1 root root 0 Dec 1 19:11 policy
> > -r--r----- 1 root root 0 Dec 1 19:11 runtime_measurements_count
> > -r--r----- 1 root root 0 Dec 1 19:11 violations
> >
> > I think your problem is something to do with how runc is installing
> > the uid/gid mappings. If it's installing them after the
> > security_ns inodes are created then they get the -1 value (because
> > no mappings exist in s_user_ns). I can even demonstrate this by
> > forcing unshare to enter the IMA namespace before writing the
> > mapping values and I'll see "nobody nogroup" above like you do.
>
> I am surprised you get this mapping even after commenting the
> permission adjustments... it doesn't work for me when I comment them
> out:
>
> [stefanb@ima-ns-dev rootfs]$ unshare -r --user --mount
> [root@ima-ns-dev rootfs]# mount -t securityfs_ns none
> /sys/kernel/security/
> [root@ima-ns-dev rootfs]# cd /sys/kernel/security/ima/
> [root@ima-ns-dev ima]# ls -l
> total 0
> -r--r-----. 1 nobody nobody 0 Dec 1 15:20 ascii_runtime_measurements
> -r--r-----. 1 nobody nobody 0 Dec 1 15:20
> binary_runtime_measurements
> -rw-------. 1 nobody nobody 0 Dec 1 15:20 policy
> -r--r-----. 1 nobody nobody 0 Dec 1 15:20 runtime_measurements_count
> -r--r-----. 1 nobody nobody 0 Dec 1 15:20 violations
> [root@ima-ns-dev ima]# cat /proc/self/uid_map
> 0 1000 1
> [root@ima-ns-dev ima]# cat /proc/self/gid_map
> 0 1000 1
>
> The initialization of securityfs and setup of files and directories
> happens at the same time as the IMA namespace is created. At this
> time there are no user mappings available, so that's why I need to
> make the adjustments 'late'.
There is one other possible difference: To get the correct s_user_ns
on the securityfs_ns mount, the mount namespace itself has to be owned
by the user namespace ... is runc doing that correctly? I always
forget this detail because unshare does it correctly automatically but
it means you must unshare the user namespace first and then unshare the
mount namespace (or do it in the same sys call because the kernel will
get the correct order).
James
On 12/1/21 16:11, James Bottomley wrote:
> On Wed, 2021-12-01 at 15:25 -0500, Stefan Berger wrote:
>> On 12/1/21 14:21, James Bottomley wrote:
>>> On Wed, 2021-12-01 at 13:11 -0500, Stefan Berger wrote:
>>>> On 12/1/21 12:56, James Bottomley wrote:
>>> [...]
>>>> I tried this with runc and a user namespace active mapping uid
>>>> 1000 on the host to uid 0 in the container. There I run into the
>>>> problem that all of the files and directories without the above
>>>> work-around are mapped to 'nobody', just like all the files in
>>>> sysfs in this case are also mapped to nobody. This code resolved
>>>> the issue.
>>> So I applied your patches with the permission shift commented out
>>> and instrumented inode_alloc() to see where it might be failing and
>>> I actually find it all works as expected for me:
>>>
>>> ejb@testdeb:~> unshare -r --user --mount --ima
>>> root@testdeb:~# mount -t securityfs_ns none /sys/kernel/security
>>> root@testdeb:~# ls -l /sys/kernel/security/ima/
>>> total 0
>>> -r--r----- 1 root root 0 Dec 1 19:11 ascii_runtime_measurements
>>> -r--r----- 1 root root 0 Dec 1 19:11 binary_runtime_measurements
>>> -rw------- 1 root root 0 Dec 1 19:11 policy
>>> -r--r----- 1 root root 0 Dec 1 19:11 runtime_measurements_count
>>> -r--r----- 1 root root 0 Dec 1 19:11 violations
>>>
>>> I think your problem is something to do with how runc is installing
>>> the uid/gid mappings. If it's installing them after the
>>> security_ns inodes are created then they get the -1 value (because
>>> no mappings exist in s_user_ns). I can even demonstrate this by
>>> forcing unshare to enter the IMA namespace before writing the
>>> mapping values and I'll see "nobody nogroup" above like you do.
>> I am surprised you get this mapping even after commenting the
>> permission adjustments... it doesn't work for me when I comment them
>> out:
>>
>> [stefanb@ima-ns-dev rootfs]$ unshare -r --user --mount
>> [root@ima-ns-dev rootfs]# mount -t securityfs_ns none
>> /sys/kernel/security/
>> [root@ima-ns-dev rootfs]# cd /sys/kernel/security/ima/
>> [root@ima-ns-dev ima]# ls -l
>> total 0
>> -r--r-----. 1 nobody nobody 0 Dec 1 15:20 ascii_runtime_measurements
>> -r--r-----. 1 nobody nobody 0 Dec 1 15:20
>> binary_runtime_measurements
>> -rw-------. 1 nobody nobody 0 Dec 1 15:20 policy
>> -r--r-----. 1 nobody nobody 0 Dec 1 15:20 runtime_measurements_count
>> -r--r-----. 1 nobody nobody 0 Dec 1 15:20 violations
>> [root@ima-ns-dev ima]# cat /proc/self/uid_map
>> 0 1000 1
>> [root@ima-ns-dev ima]# cat /proc/self/gid_map
>> 0 1000 1
>>
>> The initialization of securityfs and setup of files and directories
>> happens at the same time as the IMA namespace is created. At this
>> time there are no user mappings available, so that's why I need to
>> make the adjustments 'late'.
> There is one other possible difference: To get the correct s_user_ns
I am currently wondering why I cannot re-create your setup while
disabling the remapping...
> on the securityfs_ns mount, the mount namespace itself has to be owned
> by the user namespace ... is runc doing that correctly? I always
Following an strace of 'runc create' I see an unshare(CLONE_NEWUSER) by
a process before it does an
unshare(CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWNET),
so this seems to be doing it in the order you suggest.
Also, runc seems to have its own set of struggles. I am not sure we
would be able to ask them to accommodate us to do it 'correctly' - it
doesn't sound so 'easy' for them either to get everything under the hood:
https://github.com/opencontainers/runc/blob/master/libcontainer/nsenter/nsexec.c#L919
* In order for this unsharing code to be more extensible we need
to split
* up unshare(CLONE_NEWUSER) and clone() in various ways. The ideal
case
* would be if we did clone(CLONE_NEWUSER) and the other namespaces
* separately, but because of SELinux issues we cannot really do
that. But
[...]
* However, if we unshare(2) the user namespace *before* we
clone(2), then
* all hell breaks loose.
sounds like fun
So, I am not quite sure whether I am working around an issue of runc but
for that I would like to first be able to re-create your successful
setup to see what's different.
Stefan
> forget this detail because unshare does it correctly automatically but
> it means you must unshare the user namespace first and then unshare the
> mount namespace (or do it in the same sys call because the kernel will
> get the correct order).
>
> James
>
>
On Wed, 2021-12-01 at 16:34 -0500, Stefan Berger wrote:
> On 12/1/21 16:11, James Bottomley wrote:
> > On Wed, 2021-12-01 at 15:25 -0500, Stefan Berger wrote:
> > > On 12/1/21 14:21, James Bottomley wrote:
> > > > On Wed, 2021-12-01 at 13:11 -0500, Stefan Berger wrote:
> > > > > On 12/1/21 12:56, James Bottomley wrote:
> > > > [...]
> > > > > I tried this with runc and a user namespace active mapping
> > > > > uid
> > > > > 1000 on the host to uid 0 in the container. There I run into
> > > > > the
> > > > > problem that all of the files and directories without the
> > > > > above
> > > > > work-around are mapped to 'nobody', just like all the files
> > > > > in
> > > > > sysfs in this case are also mapped to nobody. This code
> > > > > resolved
> > > > > the issue.
> > > > So I applied your patches with the permission shift commented
> > > > out
> > > > and instrumented inode_alloc() to see where it might be failing
> > > > and
> > > > I actually find it all works as expected for me:
> > > >
> > > > ejb@testdeb:~> unshare -r --user --mount --ima
> > > > root@testdeb:~# mount -t securityfs_ns none
> > > > /sys/kernel/security
> > > > root@testdeb:~# ls -l /sys/kernel/security/ima/
> > > > total 0
> > > > -r--r----- 1 root root 0 Dec 1 19:11
> > > > ascii_runtime_measurements
> > > > -r--r----- 1 root root 0 Dec 1 19:11
> > > > binary_runtime_measurements
> > > > -rw------- 1 root root 0 Dec 1 19:11 policy
> > > > -r--r----- 1 root root 0 Dec 1 19:11
> > > > runtime_measurements_count
> > > > -r--r----- 1 root root 0 Dec 1 19:11 violations
> > > >
> > > > I think your problem is something to do with how runc is
> > > > installing
> > > > the uid/gid mappings. If it's installing them after the
> > > > security_ns inodes are created then they get the -1 value
> > > > (because
> > > > no mappings exist in s_user_ns). I can even demonstrate this
> > > > by
> > > > forcing unshare to enter the IMA namespace before writing the
> > > > mapping values and I'll see "nobody nogroup" above like you do.
> > > I am surprised you get this mapping even after commenting the
> > > permission adjustments... it doesn't work for me when I comment
> > > them
> > > out:
> > >
> > > [stefanb@ima-ns-dev rootfs]$ unshare -r --user --mount
> > > [root@ima-ns-dev rootfs]# mount -t securityfs_ns none
> > > /sys/kernel/security/
> > > [root@ima-ns-dev rootfs]# cd /sys/kernel/security/ima/
> > > [root@ima-ns-dev ima]# ls -l
> > > total 0
> > > -r--r-----. 1 nobody nobody 0 Dec 1 15:20
> > > ascii_runtime_measurements
> > > -r--r-----. 1 nobody nobody 0 Dec 1 15:20
> > > binary_runtime_measurements
> > > -rw-------. 1 nobody nobody 0 Dec 1 15:20 policy
> > > -r--r-----. 1 nobody nobody 0 Dec 1 15:20
> > > runtime_measurements_count
> > > -r--r-----. 1 nobody nobody 0 Dec 1 15:20 violations
> > > [root@ima-ns-dev ima]# cat /proc/self/uid_map
> > > 0 1000 1
> > > [root@ima-ns-dev ima]# cat /proc/self/gid_map
> > > 0 1000 1
> > >
> > > The initialization of securityfs and setup of files and
> > > directories
> > > happens at the same time as the IMA namespace is created. At this
> > > time there are no user mappings available, so that's why I need
> > > to
> > > make the adjustments 'late'.
> > There is one other possible difference: To get the correct
> > s_user_ns
>
> I am currently wondering why I cannot re-create your setup while
> disabling the remapping...
OK, I think I figured it out. When I applied your patches, it was on
top of my existing ones, so I had to massage them a bit.
Your problem is the securityfs inode creation is triggered inside
create_user_ns, which means it happens *before* ushare writes to the
proc/self/uid_map file, so the securityfs_inodes are always created on
an empty mapping and i_write_uid always sets the inode uid to -1.
I don't see this because my setup for everything is triggered off the
first use of the IMA namespace. You'd need to have some type of lazy
setup of the inodes as well to give unshare time to install the uid/gid
mappings.
James
On 12/1/21 17:01, James Bottomley wrote:
> On Wed, 2021-12-01 at 16:34 -0500, Stefan Berger wrote:
>> On 12/1/21 16:11, James Bottomley wrote:
>>> On Wed, 2021-12-01 at 15:25 -0500, Stefan Berger wrote:
>>>> On 12/1/21 14:21, James Bottomley wrote:
>>>>> On Wed, 2021-12-01 at 13:11 -0500, Stefan Berger wrote:
>>>>>> On 12/1/21 12:56, James Bottomley wrote:
>>>>> [...]
>>>>>> I tried this with runc and a user namespace active mapping
>>>>>> uid
>>>>>> 1000 on the host to uid 0 in the container. There I run into
>>>>>> the
>>>>>> problem that all of the files and directories without the
>>>>>> above
>>>>>> work-around are mapped to 'nobody', just like all the files
>>>>>> in
>>>>>> sysfs in this case are also mapped to nobody. This code
>>>>>> resolved
>>>>>> the issue.
>>>>> So I applied your patches with the permission shift commented
>>>>> out
>>>>> and instrumented inode_alloc() to see where it might be failing
>>>>> and
>>>>> I actually find it all works as expected for me:
>>>>>
>>>>> ejb@testdeb:~> unshare -r --user --mount --ima
>>>>> root@testdeb:~# mount -t securityfs_ns none
>>>>> /sys/kernel/security
>>>>> root@testdeb:~# ls -l /sys/kernel/security/ima/
>>>>> total 0
>>>>> -r--r----- 1 root root 0 Dec 1 19:11
>>>>> ascii_runtime_measurements
>>>>> -r--r----- 1 root root 0 Dec 1 19:11
>>>>> binary_runtime_measurements
>>>>> -rw------- 1 root root 0 Dec 1 19:11 policy
>>>>> -r--r----- 1 root root 0 Dec 1 19:11
>>>>> runtime_measurements_count
>>>>> -r--r----- 1 root root 0 Dec 1 19:11 violations
>>>>>
>>>>> I think your problem is something to do with how runc is
>>>>> installing
>>>>> the uid/gid mappings. If it's installing them after the
>>>>> security_ns inodes are created then they get the -1 value
>>>>> (because
>>>>> no mappings exist in s_user_ns). I can even demonstrate this
>>>>> by
>>>>> forcing unshare to enter the IMA namespace before writing the
>>>>> mapping values and I'll see "nobody nogroup" above like you do.
>>>> I am surprised you get this mapping even after commenting the
>>>> permission adjustments... it doesn't work for me when I comment
>>>> them
>>>> out:
>>>>
>>>> [stefanb@ima-ns-dev rootfs]$ unshare -r --user --mount
>>>> [root@ima-ns-dev rootfs]# mount -t securityfs_ns none
>>>> /sys/kernel/security/
>>>> [root@ima-ns-dev rootfs]# cd /sys/kernel/security/ima/
>>>> [root@ima-ns-dev ima]# ls -l
>>>> total 0
>>>> -r--r-----. 1 nobody nobody 0 Dec 1 15:20
>>>> ascii_runtime_measurements
>>>> -r--r-----. 1 nobody nobody 0 Dec 1 15:20
>>>> binary_runtime_measurements
>>>> -rw-------. 1 nobody nobody 0 Dec 1 15:20 policy
>>>> -r--r-----. 1 nobody nobody 0 Dec 1 15:20
>>>> runtime_measurements_count
>>>> -r--r-----. 1 nobody nobody 0 Dec 1 15:20 violations
>>>> [root@ima-ns-dev ima]# cat /proc/self/uid_map
>>>> 0 1000 1
>>>> [root@ima-ns-dev ima]# cat /proc/self/gid_map
>>>> 0 1000 1
>>>>
>>>> The initialization of securityfs and setup of files and
>>>> directories
>>>> happens at the same time as the IMA namespace is created. At this
>>>> time there are no user mappings available, so that's why I need
>>>> to
>>>> make the adjustments 'late'.
>>> There is one other possible difference: To get the correct
>>> s_user_ns
>> I am currently wondering why I cannot re-create your setup while
>> disabling the remapping...
> OK, I think I figured it out. When I applied your patches, it was on
> top of my existing ones, so I had to massage them a bit.
>
> Your problem is the securityfs inode creation is triggered inside
> create_user_ns, which means it happens *before* ushare writes to the
> proc/self/uid_map file, so the securityfs_inodes are always created on
> an empty mapping and i_write_uid always sets the inode uid to -1.
Right, the initialization of the filesystem is quite early.
>
> I don't see this because my setup for everything is triggered off the
> first use of the IMA namespace. You'd need to have some type of lazy
> setup of the inodes as well to give unshare time to install the uid/gid
> mappings.
What could trigger that? A callback while mounting - but I am not sure
where to hook into then. What is your mechanisms to trigger as the
'first use of the IMA namespace'? What is 'use' here?
Stefan
>
> James
>
>
On Wed, 2021-12-01 at 17:09 -0500, Stefan Berger wrote:
> On 12/1/21 17:01, James Bottomley wrote:
[...]
> > OK, I think I figured it out. When I applied your patches, it was
> > on top of my existing ones, so I had to massage them a bit.
> >
> > Your problem is the securityfs inode creation is triggered inside
> > create_user_ns, which means it happens *before* ushare writes to
> > the proc/self/uid_map file, so the securityfs_inodes are always
> > created on an empty mapping and i_write_uid always sets the inode
> > uid to -1.
>
> Right, the initialization of the filesystem is quite early.
>
>
> > I don't see this because my setup for everything is triggered off
> > the first use of the IMA namespace. You'd need to have some type
> > of lazy setup of the inodes as well to give unshare time to install
> > the uid/gidmappings.
>
> What could trigger that? A callback while mounting - but I am not
> sure where to hook into then. What is your mechanisms to trigger as
> the 'first use of the IMA namespace'? What is 'use' here?
use for me is first event that gets logged in the new namespace.
However, I don't think this is a good trigger, it's just a random thing
I was playing with. Perhaps trigger on mount is a good one ... that
could be done from securityfs_ns_init_fs_context?
James
On 12/1/21 17:19, James Bottomley wrote:
> On Wed, 2021-12-01 at 17:09 -0500, Stefan Berger wrote:
>> On 12/1/21 17:01, James Bottomley wrote:
>>
>>
>>
>>> I don't see this because my setup for everything is triggered off
>>> the first use of the IMA namespace. You'd need to have some type
>>> of lazy setup of the inodes as well to give unshare time to install
>>> the uid/gidmappings.
>> What could trigger that? A callback while mounting - but I am not
>> sure where to hook into then. What is your mechanisms to trigger as
>> the 'first use of the IMA namespace'? What is 'use' here?
> use for me is first event that gets logged in the new namespace.
>
> However, I don't think this is a good trigger, it's just a random thing
> I was playing with. Perhaps trigger on mount is a good one ... that
> could be done from securityfs_ns_init_fs_context?
Yes, this here does the trick now for late init also with runc. The late
uid adjustments are gone.
static int securityfs_ns_init_fs_context(struct fs_context *fc)
{
int rc;
if (fc->user_ns->ima_ns->late_fs_init) {
rc = fc->user_ns->ima_ns->late_fs_init(fc->user_ns);
if (rc)
return rc;
}
fc->ops = &securityfs_ns_context_ops;
return 0;
}
Stefan
>
> James
>
>
>
>
On Tue, Nov 30, 2021 at 11:06:54AM -0500, Stefan Berger wrote:
> Setup securityfs_ns with symlinks, directories, and files for IMA
> namespacing support. The same directory structure that IMA uses on the
> host is also created for the namespacing case.
>
> Increment the user namespace's refcount_teardown value by '1' once
> securityfs_ns has been successfully setup since the initialization of the
> filesystem causes an additional reference to the user namespace to be
> taken. The early teardown function will delete the file system and release
> the additional reference.
>
> The securityfs_ns file and directory ownerships cannot be set when the
> filesystem is setup since at this point the user namespace has not been
> configured yet by the user and therefore the ownership mappings are not
> available, yet. Therefore, adjust the file and directory ownerships when
> an inode's function for determining the permissions of a file or directory
> is accessed.
>
> This filesystem can now be mounted as follows:
>
> mount -t securityfs_ns /sys/kernel/security/ /sys/kernel/security/
>
> The following directories, symlinks, and files are then available.
>
> $ ls -l sys/kernel/security/
> total 0
> lr--r--r--. 1 nobody nobody 0 Nov 27 06:44 ima -> integrity/ima
> drwxr-xr-x. 3 nobody nobody 0 Nov 27 06:44 integrity
>
> $ ls -l sys/kernel/security/ima/
> total 0
> -r--r-----. 1 root root 0 Nov 27 06:44 ascii_runtime_measurements
> -r--r-----. 1 root root 0 Nov 27 06:44 binary_runtime_measurements
> -rw-------. 1 root root 0 Nov 27 06:44 policy
> -r--r-----. 1 root root 0 Nov 27 06:44 runtime_measurements_count
> -r--r-----. 1 root root 0 Nov 27 06:44 violations
>
> Signed-off-by: Stefan Berger <[email protected]>
> ---
> include/linux/ima.h | 17 +++
> security/integrity/ima/ima.h | 2 +
> security/integrity/ima/ima_fs.c | 178 ++++++++++++++++++++++-
> security/integrity/ima/ima_init_ima_ns.c | 6 +-
> security/integrity/ima/ima_ns.c | 4 +-
> 5 files changed, 203 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/ima.h b/include/linux/ima.h
> index fe08919df326..a2c5e516f706 100644
> --- a/include/linux/ima.h
> +++ b/include/linux/ima.h
> @@ -221,6 +221,18 @@ struct ima_h_table {
> struct hlist_head queue[IMA_MEASURE_HTABLE_SIZE];
> };
>
> +enum {
> + IMAFS_DENTRY_INTEGRITY_DIR = 0,
> + IMAFS_DENTRY_DIR,
> + IMAFS_DENTRY_SYMLINK,
> + IMAFS_DENTRY_BINARY_RUNTIME_MEASUREMENTS,
> + IMAFS_DENTRY_ASCII_RUNTIME_MEASUREMENTS,
> + IMAFS_DENTRY_RUNTIME_MEASUREMENTS_COUNT,
> + IMAFS_DENTRY_VIOLATIONS,
> + IMAFS_DENTRY_IMA_POLICY,
> + IMAFS_DENTRY_LAST
> +};
> +
> struct ima_namespace {
> struct kref kref;
> struct user_namespace *user_ns;
> @@ -267,6 +279,11 @@ struct ima_namespace {
> struct mutex ima_write_mutex;
> unsigned long ima_fs_flags;
> int valid_policy;
> +
> + struct dentry *dentry[IMAFS_DENTRY_LAST];
> + struct vfsmount *mount;
> + int mount_count;
> + bool file_ownership_fixes_done;
> };
>
> extern struct ima_namespace init_ima_ns;
> diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
> index bb9763cd5fb1..9bcd71bb716c 100644
> --- a/security/integrity/ima/ima.h
> +++ b/security/integrity/ima/ima.h
> @@ -139,6 +139,8 @@ struct ns_status {
> /* Internal IMA function definitions */
> int ima_init(void);
> int ima_fs_init(void);
> +int ima_fs_ns_init(struct ima_namespace *ns);
> +void ima_fs_ns_free(struct ima_namespace *ns);
> int ima_add_template_entry(struct ima_namespace *ns,
> struct ima_template_entry *entry, int violation,
> const char *op, struct inode *inode,
> diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
> index 6766bb8262f2..9a14be520268 100644
> --- a/security/integrity/ima/ima_fs.c
> +++ b/security/integrity/ima/ima_fs.c
> @@ -22,6 +22,7 @@
> #include <linux/parser.h>
> #include <linux/vmalloc.h>
> #include <linux/ima.h>
> +#include <linux/namei.h>
>
> #include "ima.h"
>
> @@ -436,8 +437,13 @@ static int ima_release_policy(struct inode *inode, struct file *file)
>
> ima_update_policy(ns);
> #if !defined(CONFIG_IMA_WRITE_POLICY) && !defined(CONFIG_IMA_READ_POLICY)
> - securityfs_remove(ima_policy);
> - ima_policy = NULL;
> + if (ns == &init_ima_ns) {
> + securityfs_remove(ima_policy);
> + ima_policy = NULL;
> + } else {
> + securityfs_ns_remove(ns->dentry[IMAFS_DENTRY_POLICY]);
> + ns->dentry[IMAFS_DENTRY_POLICY] = NULL;
> + }
> #elif defined(CONFIG_IMA_WRITE_POLICY)
> clear_bit(IMA_FS_BUSY, &ns->ima_fs_flags);
> #elif defined(CONFIG_IMA_READ_POLICY)
> @@ -509,3 +515,171 @@ int __init ima_fs_init(void)
> securityfs_remove(ima_policy);
> return -1;
> }
> +
> +/*
> + * Fix the ownership (uid/gid) of the dentry's that couldn't be set at the
> + * time of their creation because the user namespace wasn't configured, yet.
> + */
> +static void ima_fs_ns_fixup_uid_gid(struct ima_namespace *ns)
> +{
> + struct inode *inode;
> + size_t i;
> +
> + if (ns->file_ownership_fixes_done ||
> + ns->user_ns->uid_map.nr_extents == 0)
> + return;
> +
> + ns->file_ownership_fixes_done = true;
> + for (i = 0; i < IMAFS_DENTRY_LAST; i++) {
> + if (!ns->dentry[i])
> + continue;
> + inode = ns->dentry[i]->d_inode;
> + inode->i_uid = make_kuid(ns->user_ns, 0);
> + inode->i_gid = make_kgid(ns->user_ns, 0);
> + }
> +}
> +
> +/* Fix the permissions when a file is opened */
> +int ima_fs_ns_permission(struct user_namespace *mnt_userns, struct inode *inode,
> + int mask)
> +{
> + ima_fs_ns_fixup_uid_gid(get_current_ns());
As noted later in the thread if this is required it means something is
buggy in the current code. That shouldn't be needed.
I think there's a more fundamental issue here. The correct way to do all
this would be to restructure securityfs at least how it works inside of
user namespaces. Currently, securityfs works like debugfs: a single
shared superblock that is pinned by each new inode that is created via:
simple_pin_fs(&fs_type, &mount, &mount_count);
simple_release_fs(&mount, &mount_count);
and each mount surfaces the same superblock. Ideally making securityfs
mountable inside of user namespaces should get you a new superblock.
Functions that create files for the ima ns would then be called inside
->fill_super etc.
On 12/2/21 08:18, Christian Brauner wrote:
> On Tue, Nov 30, 2021 at 11:06:54AM -0500, Stefan Berger wrote:
>> Setup securityfs_ns with symlinks, directories, and files for IMA
>> namespacing support. The same directory structure that IMA uses on the
>> host is also created for the namespacing case.
>>
>> Increment the user namespace's refcount_teardown value by '1' once
>> securityfs_ns has been successfully setup since the initialization of the
>> filesystem causes an additional reference to the user namespace to be
>> taken. The early teardown function will delete the file system and release
>> the additional reference.
>>
>> The securityfs_ns file and directory ownerships cannot be set when the
>> filesystem is setup since at this point the user namespace has not been
>> configured yet by the user and therefore the ownership mappings are not
>> available, yet. Therefore, adjust the file and directory ownerships when
>> an inode's function for determining the permissions of a file or directory
>> is accessed.
>>
>> This filesystem can now be mounted as follows:
>>
>> mount -t securityfs_ns /sys/kernel/security/ /sys/kernel/security/
>>
>> The following directories, symlinks, and files are then available.
>>
>> $ ls -l sys/kernel/security/
>> total 0
>> lr--r--r--. 1 nobody nobody 0 Nov 27 06:44 ima -> integrity/ima
>> drwxr-xr-x. 3 nobody nobody 0 Nov 27 06:44 integrity
>>
>> $ ls -l sys/kernel/security/ima/
>> total 0
>> -r--r-----. 1 root root 0 Nov 27 06:44 ascii_runtime_measurements
>> -r--r-----. 1 root root 0 Nov 27 06:44 binary_runtime_measurements
>> -rw-------. 1 root root 0 Nov 27 06:44 policy
>> -r--r-----. 1 root root 0 Nov 27 06:44 runtime_measurements_count
>> -r--r-----. 1 root root 0 Nov 27 06:44 violations
>>
>> Signed-off-by: Stefan Berger <[email protected]>
>> ---
>> include/linux/ima.h | 17 +++
>> security/integrity/ima/ima.h | 2 +
>> security/integrity/ima/ima_fs.c | 178 ++++++++++++++++++++++-
>> security/integrity/ima/ima_init_ima_ns.c | 6 +-
>> security/integrity/ima/ima_ns.c | 4 +-
>> 5 files changed, 203 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/linux/ima.h b/include/linux/ima.h
>> index fe08919df326..a2c5e516f706 100644
>> --- a/include/linux/ima.h
>> +++ b/include/linux/ima.h
>> @@ -221,6 +221,18 @@ struct ima_h_table {
>> struct hlist_head queue[IMA_MEASURE_HTABLE_SIZE];
>> };
>>
>> +enum {
>> + IMAFS_DENTRY_INTEGRITY_DIR = 0,
>> + IMAFS_DENTRY_DIR,
>> + IMAFS_DENTRY_SYMLINK,
>> + IMAFS_DENTRY_BINARY_RUNTIME_MEASUREMENTS,
>> + IMAFS_DENTRY_ASCII_RUNTIME_MEASUREMENTS,
>> + IMAFS_DENTRY_RUNTIME_MEASUREMENTS_COUNT,
>> + IMAFS_DENTRY_VIOLATIONS,
>> + IMAFS_DENTRY_IMA_POLICY,
>> + IMAFS_DENTRY_LAST
>> +};
>> +
>> struct ima_namespace {
>> struct kref kref;
>> struct user_namespace *user_ns;
>> @@ -267,6 +279,11 @@ struct ima_namespace {
>> struct mutex ima_write_mutex;
>> unsigned long ima_fs_flags;
>> int valid_policy;
>> +
>> + struct dentry *dentry[IMAFS_DENTRY_LAST];
>> + struct vfsmount *mount;
>> + int mount_count;
>> + bool file_ownership_fixes_done;
>> };
>>
>> extern struct ima_namespace init_ima_ns;
>> diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
>> index bb9763cd5fb1..9bcd71bb716c 100644
>> --- a/security/integrity/ima/ima.h
>> +++ b/security/integrity/ima/ima.h
>> @@ -139,6 +139,8 @@ struct ns_status {
>> /* Internal IMA function definitions */
>> int ima_init(void);
>> int ima_fs_init(void);
>> +int ima_fs_ns_init(struct ima_namespace *ns);
>> +void ima_fs_ns_free(struct ima_namespace *ns);
>> int ima_add_template_entry(struct ima_namespace *ns,
>> struct ima_template_entry *entry, int violation,
>> const char *op, struct inode *inode,
>> diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
>> index 6766bb8262f2..9a14be520268 100644
>> --- a/security/integrity/ima/ima_fs.c
>> +++ b/security/integrity/ima/ima_fs.c
>> @@ -22,6 +22,7 @@
>> #include <linux/parser.h>
>> #include <linux/vmalloc.h>
>> #include <linux/ima.h>
>> +#include <linux/namei.h>
>>
>> #include "ima.h"
>>
>> @@ -436,8 +437,13 @@ static int ima_release_policy(struct inode *inode, struct file *file)
>>
>> ima_update_policy(ns);
>> #if !defined(CONFIG_IMA_WRITE_POLICY) && !defined(CONFIG_IMA_READ_POLICY)
>> - securityfs_remove(ima_policy);
>> - ima_policy = NULL;
>> + if (ns == &init_ima_ns) {
>> + securityfs_remove(ima_policy);
>> + ima_policy = NULL;
>> + } else {
>> + securityfs_ns_remove(ns->dentry[IMAFS_DENTRY_POLICY]);
>> + ns->dentry[IMAFS_DENTRY_POLICY] = NULL;
>> + }
>> #elif defined(CONFIG_IMA_WRITE_POLICY)
>> clear_bit(IMA_FS_BUSY, &ns->ima_fs_flags);
>> #elif defined(CONFIG_IMA_READ_POLICY)
>> @@ -509,3 +515,171 @@ int __init ima_fs_init(void)
>> securityfs_remove(ima_policy);
>> return -1;
>> }
>> +
>> +/*
>> + * Fix the ownership (uid/gid) of the dentry's that couldn't be set at the
>> + * time of their creation because the user namespace wasn't configured, yet.
>> + */
>> +static void ima_fs_ns_fixup_uid_gid(struct ima_namespace *ns)
>> +{
>> + struct inode *inode;
>> + size_t i;
>> +
>> + if (ns->file_ownership_fixes_done ||
>> + ns->user_ns->uid_map.nr_extents == 0)
>> + return;
>> +
>> + ns->file_ownership_fixes_done = true;
>> + for (i = 0; i < IMAFS_DENTRY_LAST; i++) {
>> + if (!ns->dentry[i])
>> + continue;
>> + inode = ns->dentry[i]->d_inode;
>> + inode->i_uid = make_kuid(ns->user_ns, 0);
>> + inode->i_gid = make_kgid(ns->user_ns, 0);
>> + }
>> +}
>> +
>> +/* Fix the permissions when a file is opened */
>> +int ima_fs_ns_permission(struct user_namespace *mnt_userns, struct inode *inode,
>> + int mask)
>> +{
>> + ima_fs_ns_fixup_uid_gid(get_current_ns());
> As noted later in the thread if this is required it means something is
> buggy in the current code. That shouldn't be needed.
I fixed this yesterday with late initialization:
https://lkml.org/lkml/2021/12/1/1181
>
> I think there's a more fundamental issue here. The correct way to do all
> this would be to restructure securityfs at least how it works inside of
> user namespaces. Currently, securityfs works like debugfs: a single
> shared superblock that is pinned by each new inode that is created via:
>
> simple_pin_fs(&fs_type, &mount, &mount_count);
> simple_release_fs(&mount, &mount_count);
>
> and each mount surfaces the same superblock. Ideally making securityfs
> mountable inside of user namespaces should get you a new superblock.
> Functions that create files for the ima ns would then be called inside
> ->fill_super etc.
So this would be the wrong place to do it? I moved it there because this
is called late (upon mounting) when the configuration of the user
namespace has completed.
static int securityfs_ns_init_fs_context(struct fs_context *fc)
{
int rc;
if (fc->user_ns->ima_ns->late_fs_init) {
rc = fc->user_ns->ima_ns->late_fs_init(fc->user_ns);
if (rc)
return rc;
}
fc->ops = &securityfs_ns_context_ops;
return 0;
}
Stefan