Move the ima_write_mutex, ima_fs_flag, and valid_policy variables into
ima_namespace. This way each IMA namespace can set those variables
independently.
Signed-off-by: Stefan Berger <[email protected]>
---
include/linux/ima.h | 5 ++++
security/integrity/ima/ima_fs.c | 32 +++++++++++-------------
security/integrity/ima/ima_init_ima_ns.c | 4 +++
3 files changed, 23 insertions(+), 18 deletions(-)
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 2ce801bfc449..3aaf6e806db4 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -261,6 +261,11 @@ struct ima_namespace {
struct ima_h_table ima_htable;
struct list_head ima_measurements;
unsigned long binary_runtime_size;
+
+ /* IMA's filesystem */
+ struct mutex ima_write_mutex;
+ unsigned long ima_fs_flags;
+ int valid_policy;
};
extern struct ima_namespace init_ima_ns;
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 38b1c26479b3..0e582ceecc7f 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -25,8 +25,6 @@
#include "ima.h"
-static DEFINE_MUTEX(ima_write_mutex);
-
bool ima_canonical_fmt;
static int __init default_canonical_fmt_setup(char *str)
{
@@ -37,8 +35,6 @@ static int __init default_canonical_fmt_setup(char *str)
}
__setup("ima_canonical_fmt", default_canonical_fmt_setup);
-static int valid_policy = 1;
-
static ssize_t ima_show_htable_value(char __user *buf, size_t count,
loff_t *ppos, atomic_long_t *val)
{
@@ -339,7 +335,7 @@ static ssize_t ima_write_policy(struct file *file, const char __user *buf,
goto out;
}
- result = mutex_lock_interruptible(&ima_write_mutex);
+ result = mutex_lock_interruptible(&ns->ima_write_mutex);
if (result < 0)
goto out_free;
@@ -354,12 +350,12 @@ static ssize_t ima_write_policy(struct file *file, const char __user *buf,
} else {
result = ima_parse_add_rule(ns, data);
}
- mutex_unlock(&ima_write_mutex);
+ mutex_unlock(&ns->ima_write_mutex);
out_free:
kfree(data);
out:
if (result < 0)
- valid_policy = 0;
+ ns->valid_policy = 0;
return result;
}
@@ -376,8 +372,6 @@ enum ima_fs_flags {
IMA_FS_BUSY,
};
-static unsigned long ima_fs_flags;
-
#ifdef CONFIG_IMA_READ_POLICY
static const struct seq_operations ima_policy_seqops = {
.start = ima_policy_start,
@@ -392,6 +386,8 @@ static const struct seq_operations ima_policy_seqops = {
*/
static int ima_open_policy(struct inode *inode, struct file *filp)
{
+ struct ima_namespace *ns = get_current_ns();
+
if (!(filp->f_flags & O_WRONLY)) {
#ifndef CONFIG_IMA_READ_POLICY
return -EACCES;
@@ -403,7 +399,7 @@ static int ima_open_policy(struct inode *inode, struct file *filp)
return seq_open(filp, &ima_policy_seqops);
#endif
}
- if (test_and_set_bit(IMA_FS_BUSY, &ima_fs_flags))
+ if (test_and_set_bit(IMA_FS_BUSY, &ns->ima_fs_flags))
return -EBUSY;
return 0;
}
@@ -417,25 +413,25 @@ static int ima_open_policy(struct inode *inode, struct file *filp)
*/
static int ima_release_policy(struct inode *inode, struct file *file)
{
- const char *cause = valid_policy ? "completed" : "failed";
struct ima_namespace *ns = get_current_ns();
+ const char *cause = ns->valid_policy ? "completed" : "failed";
if ((file->f_flags & O_ACCMODE) == O_RDONLY)
return seq_release(inode, file);
- if (valid_policy && ima_check_policy(ns) < 0) {
+ if (ns->valid_policy && ima_check_policy(ns) < 0) {
cause = "failed";
- valid_policy = 0;
+ ns->valid_policy = 0;
}
pr_info("policy update %s\n", cause);
integrity_audit_msg(AUDIT_INTEGRITY_STATUS, NULL, NULL,
- "policy_update", cause, !valid_policy, 0);
+ "policy_update", cause, !ns->valid_policy, 0);
- if (!valid_policy) {
+ if (!ns->valid_policy) {
ima_delete_rules(ns);
- valid_policy = 1;
- clear_bit(IMA_FS_BUSY, &ima_fs_flags);
+ ns->valid_policy = 1;
+ clear_bit(IMA_FS_BUSY, &ns->ima_fs_flags);
return 0;
}
@@ -444,7 +440,7 @@ static int ima_release_policy(struct inode *inode, struct file *file)
securityfs_remove(ima_policy);
ima_policy = NULL;
#elif defined(CONFIG_IMA_WRITE_POLICY)
- clear_bit(IMA_FS_BUSY, &ima_fs_flags);
+ clear_bit(IMA_FS_BUSY, &ns->ima_fs_flags);
#elif defined(CONFIG_IMA_READ_POLICY)
inode->i_mode &= ~S_IWUSR;
#endif
diff --git a/security/integrity/ima/ima_init_ima_ns.c b/security/integrity/ima/ima_init_ima_ns.c
index b097aad7b156..162c94e06d13 100644
--- a/security/integrity/ima/ima_init_ima_ns.c
+++ b/security/integrity/ima/ima_init_ima_ns.c
@@ -49,6 +49,10 @@ int ima_init_namespace(struct ima_namespace *ns)
else
ns->binary_runtime_size = ULONG_MAX;
+ mutex_init(&ns->ima_write_mutex);
+ ns->valid_policy = 1;
+ ns->ima_fs_flags = 0;
+
return 0;
}
--
2.31.1
On Wed, Dec 08, 2021 at 05:18:15PM -0500, Stefan Berger wrote:
> Move the ima_write_mutex, ima_fs_flag, and valid_policy variables into
> ima_namespace. This way each IMA namespace can set those variables
> independently.
>
> Signed-off-by: Stefan Berger <[email protected]>
> ---
> include/linux/ima.h | 5 ++++
> security/integrity/ima/ima_fs.c | 32 +++++++++++-------------
> security/integrity/ima/ima_init_ima_ns.c | 4 +++
> 3 files changed, 23 insertions(+), 18 deletions(-)
>
> diff --git a/include/linux/ima.h b/include/linux/ima.h
> index 2ce801bfc449..3aaf6e806db4 100644
> --- a/include/linux/ima.h
> +++ b/include/linux/ima.h
> @@ -261,6 +261,11 @@ struct ima_namespace {
> struct ima_h_table ima_htable;
> struct list_head ima_measurements;
> unsigned long binary_runtime_size;
> +
> + /* IMA's filesystem */
> + struct mutex ima_write_mutex;
> + unsigned long ima_fs_flags;
> + int valid_policy;
> };
>
> extern struct ima_namespace init_ima_ns;
> diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
> index 38b1c26479b3..0e582ceecc7f 100644
> --- a/security/integrity/ima/ima_fs.c
> +++ b/security/integrity/ima/ima_fs.c
> @@ -25,8 +25,6 @@
>
> #include "ima.h"
>
> -static DEFINE_MUTEX(ima_write_mutex);
> -
> bool ima_canonical_fmt;
> static int __init default_canonical_fmt_setup(char *str)
> {
> @@ -37,8 +35,6 @@ static int __init default_canonical_fmt_setup(char *str)
> }
> __setup("ima_canonical_fmt", default_canonical_fmt_setup);
>
> -static int valid_policy = 1;
> -
> static ssize_t ima_show_htable_value(char __user *buf, size_t count,
> loff_t *ppos, atomic_long_t *val)
> {
> @@ -339,7 +335,7 @@ static ssize_t ima_write_policy(struct file *file, const char __user *buf,
> goto out;
> }
>
> - result = mutex_lock_interruptible(&ima_write_mutex);
> + result = mutex_lock_interruptible(&ns->ima_write_mutex);
> if (result < 0)
> goto out_free;
>
> @@ -354,12 +350,12 @@ static ssize_t ima_write_policy(struct file *file, const char __user *buf,
> } else {
> result = ima_parse_add_rule(ns, data);
> }
> - mutex_unlock(&ima_write_mutex);
> + mutex_unlock(&ns->ima_write_mutex);
> out_free:
> kfree(data);
> out:
> if (result < 0)
> - valid_policy = 0;
> + ns->valid_policy = 0;
>
> return result;
> }
> @@ -376,8 +372,6 @@ enum ima_fs_flags {
> IMA_FS_BUSY,
> };
>
> -static unsigned long ima_fs_flags;
> -
> #ifdef CONFIG_IMA_READ_POLICY
> static const struct seq_operations ima_policy_seqops = {
> .start = ima_policy_start,
> @@ -392,6 +386,8 @@ static const struct seq_operations ima_policy_seqops = {
> */
> static int ima_open_policy(struct inode *inode, struct file *filp)
> {
> + struct ima_namespace *ns = get_current_ns();
> +
I'm a bit confused here. In all those callbacks:
.open = ima_open_policy,
.write = ima_write_policy,
.release = ima_release_policy,
you're calling get_current_ns() at the top of it. What guarantees that
the same ima_namespace is returned here? What if the fd is sent to
someone who is in a different user namespace and the write to that
file?
Maybe I'm just confused but wouldn't you want something like this?
From 1f03dc427c583d5e9ebc9ebe9de77c3c535bbebe Mon Sep 17 00:00:00 2001
From: Christian Brauner <[email protected]>
Date: Thu, 9 Dec 2021 20:07:02 +0100
Subject: [PATCH] !!!! HERE BE DRAGONS - UNTESTED !!!!
---
security/integrity/ima/ima_fs.c | 43 +++++++++++++++++++++++++++++----
1 file changed, 38 insertions(+), 5 deletions(-)
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 583462b29cb5..d5b302b925b8 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -317,10 +317,14 @@ static ssize_t ima_read_policy(char *path)
static ssize_t ima_write_policy(struct file *file, const char __user *buf,
size_t datalen, loff_t *ppos)
{
- struct ima_namespace *ns = get_current_ns();
+ struct ima_namespace *ns;
+ struct user_namespace *user_ns;
char *data;
ssize_t result;
+ user_ns = ima_filp_private(filp);
+ ns = user_ns->ima_ns
+
if (datalen >= PAGE_SIZE)
datalen = PAGE_SIZE - 1;
@@ -373,26 +377,51 @@ static const struct seq_operations ima_policy_seqops = {
};
#endif
+static struct user_namespace *ima_filp_private(struct file *filp)
+{
+ if (!(filp->f_flags & O_WRONLY)) {
+#ifdef CONFIG_IMA_READ_POLICY
+ struct seq_file *seq;
+
+ seq = filp->private_data;
+ return seq->private;
+#endif
+ }
+ return filp->private_data;
+}
+
/*
* ima_open_policy: sequentialize access to the policy file
*/
static int ima_open_policy(struct inode *inode, struct file *filp)
{
- struct ima_namespace *ns = get_current_ns();
+ struct user_namespace *user_ns = current_user_ns();
+ struct ima_namespace *ns = user_ns->ima_ns;
if (!(filp->f_flags & O_WRONLY)) {
#ifndef CONFIG_IMA_READ_POLICY
return -EACCES;
#else
+ int err;
+ struct seq_file *seq;
+
if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
return -EACCES;
- if (!mac_admin_ns_capable(ima_user_ns(ns)))
+ if (!mac_admin_ns_capable(user_ns))
return -EPERM;
- return seq_open(filp, &ima_policy_seqops);
+ err = seq_open(filp, &ima_policy_seqops);
+ if (err)
+ return err;
+
+ seq = filp->private_data;
+ seq->private = user_ns;
+ return 0;
#endif
}
if (test_and_set_bit(IMA_FS_BUSY, &ns->ima_fs_flags))
return -EBUSY;
+
+ filp->private_data = user_ns;
return 0;
}
@@ -405,9 +434,13 @@ static int ima_open_policy(struct inode *inode, struct file *filp)
*/
static int ima_release_policy(struct inode *inode, struct file *file)
{
- struct ima_namespace *ns = get_current_ns();
+ struct ima_namespace *ns;
+ struct user_namespace *user_ns;
const char *cause = ns->valid_policy ? "completed" : "failed";
+ user_ns = ima_filp_private(filp);
+ ns = user_ns->ima_ns
+
if ((file->f_flags & O_ACCMODE) == O_RDONLY)
return seq_release(inode, file);
--
2.30.2
On 12/9/21 14:11, Christian Brauner wrote:
> On Wed, Dec 08, 2021 at 05:18:15PM -0500, Stefan Berger wrote:
>> Move the ima_write_mutex, ima_fs_flag, and valid_policy variables into
>> ima_namespace. This way each IMA namespace can set those variables
>> independently.
>>
>> Signed-off-by: Stefan Berger <[email protected]>
>> ---
>> include/linux/ima.h | 5 ++++
>> security/integrity/ima/ima_fs.c | 32 +++++++++++-------------
>> security/integrity/ima/ima_init_ima_ns.c | 4 +++
>> 3 files changed, 23 insertions(+), 18 deletions(-)
>>
>> diff --git a/include/linux/ima.h b/include/linux/ima.h
>> index 2ce801bfc449..3aaf6e806db4 100644
>> --- a/include/linux/ima.h
>> +++ b/include/linux/ima.h
>> @@ -261,6 +261,11 @@ struct ima_namespace {
>> struct ima_h_table ima_htable;
>> struct list_head ima_measurements;
>> unsigned long binary_runtime_size;
>> +
>> + /* IMA's filesystem */
>> + struct mutex ima_write_mutex;
>> + unsigned long ima_fs_flags;
>> + int valid_policy;
>> };
>>
>> extern struct ima_namespace init_ima_ns;
>> diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
>> index 38b1c26479b3..0e582ceecc7f 100644
>> --- a/security/integrity/ima/ima_fs.c
>> +++ b/security/integrity/ima/ima_fs.c
>> @@ -25,8 +25,6 @@
>>
>> #include "ima.h"
>>
>> -static DEFINE_MUTEX(ima_write_mutex);
>> -
>> bool ima_canonical_fmt;
>> static int __init default_canonical_fmt_setup(char *str)
>> {
>> @@ -37,8 +35,6 @@ static int __init default_canonical_fmt_setup(char *str)
>> }
>> __setup("ima_canonical_fmt", default_canonical_fmt_setup);
>>
>> -static int valid_policy = 1;
>> -
>> static ssize_t ima_show_htable_value(char __user *buf, size_t count,
>> loff_t *ppos, atomic_long_t *val)
>> {
>> @@ -339,7 +335,7 @@ static ssize_t ima_write_policy(struct file *file, const char __user *buf,
>> goto out;
>> }
>>
>> - result = mutex_lock_interruptible(&ima_write_mutex);
>> + result = mutex_lock_interruptible(&ns->ima_write_mutex);
>> if (result < 0)
>> goto out_free;
>>
>> @@ -354,12 +350,12 @@ static ssize_t ima_write_policy(struct file *file, const char __user *buf,
>> } else {
>> result = ima_parse_add_rule(ns, data);
>> }
>> - mutex_unlock(&ima_write_mutex);
>> + mutex_unlock(&ns->ima_write_mutex);
>> out_free:
>> kfree(data);
>> out:
>> if (result < 0)
>> - valid_policy = 0;
>> + ns->valid_policy = 0;
>>
>> return result;
>> }
>> @@ -376,8 +372,6 @@ enum ima_fs_flags {
>> IMA_FS_BUSY,
>> };
>>
>> -static unsigned long ima_fs_flags;
>> -
>> #ifdef CONFIG_IMA_READ_POLICY
>> static const struct seq_operations ima_policy_seqops = {
>> .start = ima_policy_start,
>> @@ -392,6 +386,8 @@ static const struct seq_operations ima_policy_seqops = {
>> */
>> static int ima_open_policy(struct inode *inode, struct file *filp)
>> {
>> + struct ima_namespace *ns = get_current_ns();
>> +
> I'm a bit confused here. In all those callbacks:
> .open = ima_open_policy,
> .write = ima_write_policy,
> .release = ima_release_policy,
> you're calling get_current_ns() at the top of it. What guarantees that
> the same ima_namespace is returned here? What if the fd is sent to
> someone who is in a different user namespace and the write to that
> file?
>
> Maybe I'm just confused but wouldn't you want something like this?
I hadn't thought about inheritance or passing fds. But yes. I will adopt
your patch and extend all the files to tie them to the user namespace
they are opened in...
Thanks.
>
> From 1f03dc427c583d5e9ebc9ebe9de77c3c535bbebe Mon Sep 17 00:00:00 2001
> From: Christian Brauner <[email protected]>
> Date: Thu, 9 Dec 2021 20:07:02 +0100
> Subject: [PATCH] !!!! HERE BE DRAGONS - UNTESTED !!!!
>
> ---
> security/integrity/ima/ima_fs.c | 43 +++++++++++++++++++++++++++++----
> 1 file changed, 38 insertions(+), 5 deletions(-)
>
> diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
> index 583462b29cb5..d5b302b925b8 100644
> --- a/security/integrity/ima/ima_fs.c
> +++ b/security/integrity/ima/ima_fs.c
> @@ -317,10 +317,14 @@ static ssize_t ima_read_policy(char *path)
> static ssize_t ima_write_policy(struct file *file, const char __user *buf,
> size_t datalen, loff_t *ppos)
> {
> - struct ima_namespace *ns = get_current_ns();
> + struct ima_namespace *ns;
> + struct user_namespace *user_ns;
> char *data;
> ssize_t result;
>
> + user_ns = ima_filp_private(filp);
> + ns = user_ns->ima_ns
> +
> if (datalen >= PAGE_SIZE)
> datalen = PAGE_SIZE - 1;
>
> @@ -373,26 +377,51 @@ static const struct seq_operations ima_policy_seqops = {
> };
> #endif
>
> +static struct user_namespace *ima_filp_private(struct file *filp)
> +{
> + if (!(filp->f_flags & O_WRONLY)) {
> +#ifdef CONFIG_IMA_READ_POLICY
> + struct seq_file *seq;
> +
> + seq = filp->private_data;
> + return seq->private;
> +#endif
> + }
> + return filp->private_data;
> +}
> +
> /*
> * ima_open_policy: sequentialize access to the policy file
> */
> static int ima_open_policy(struct inode *inode, struct file *filp)
> {
> - struct ima_namespace *ns = get_current_ns();
> + struct user_namespace *user_ns = current_user_ns();
> + struct ima_namespace *ns = user_ns->ima_ns;
>
> if (!(filp->f_flags & O_WRONLY)) {
> #ifndef CONFIG_IMA_READ_POLICY
> return -EACCES;
> #else
> + int err;
> + struct seq_file *seq;
> +
> if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
> return -EACCES;
> - if (!mac_admin_ns_capable(ima_user_ns(ns)))
> + if (!mac_admin_ns_capable(user_ns))
> return -EPERM;
> - return seq_open(filp, &ima_policy_seqops);
> + err = seq_open(filp, &ima_policy_seqops);
> + if (err)
> + return err;
> +
> + seq = filp->private_data;
> + seq->private = user_ns;
> + return 0;
> #endif
> }
> if (test_and_set_bit(IMA_FS_BUSY, &ns->ima_fs_flags))
> return -EBUSY;
> +
> + filp->private_data = user_ns;
> return 0;
> }
>
> @@ -405,9 +434,13 @@ static int ima_open_policy(struct inode *inode, struct file *filp)
> */
> static int ima_release_policy(struct inode *inode, struct file *file)
> {
> - struct ima_namespace *ns = get_current_ns();
> + struct ima_namespace *ns;
> + struct user_namespace *user_ns;
> const char *cause = ns->valid_policy ? "completed" : "failed";
>
> + user_ns = ima_filp_private(filp);
> + ns = user_ns->ima_ns
> +
> if ((file->f_flags & O_ACCMODE) == O_RDONLY)
> return seq_release(inode, file);
>
On 12/9/21 14:11, Christian Brauner wrote:
>
> From 1f03dc427c583d5e9ebc9ebe9de77c3c535bbebe Mon Sep 17 00:00:00 2001
> From: Christian Brauner <[email protected]>
> Date: Thu, 9 Dec 2021 20:07:02 +0100
> Subject: [PATCH] !!!! HERE BE DRAGONS - UNTESTED !!!!
>
> ---
> security/integrity/ima/ima_fs.c | 43 +++++++++++++++++++++++++++++----
> 1 file changed, 38 insertions(+), 5 deletions(-)
>
> diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
> index 583462b29cb5..d5b302b925b8 100644
> --- a/security/integrity/ima/ima_fs.c
> +++ b/security/integrity/ima/ima_fs.c
> @@ -317,10 +317,14 @@ static ssize_t ima_read_policy(char *path)
> static ssize_t ima_write_policy(struct file *file, const char __user *buf,
> size_t datalen, loff_t *ppos)
> {
> - struct ima_namespace *ns = get_current_ns();
> + struct ima_namespace *ns;
> + struct user_namespace *user_ns;
> char *data;
> ssize_t result;
>
> + user_ns = ima_filp_private(filp);
> + ns = user_ns->ima_ns
> +
> if (datalen >= PAGE_SIZE)
> datalen = PAGE_SIZE - 1;
>
> @@ -373,26 +377,51 @@ static const struct seq_operations ima_policy_seqops = {
> };
> #endif
>
> +static struct user_namespace *ima_filp_private(struct file *filp)
> +{
> + if (!(filp->f_flags & O_WRONLY)) {
> +#ifdef CONFIG_IMA_READ_POLICY
> + struct seq_file *seq;
> +
> + seq = filp->private_data;
> + return seq->private;
> +#endif
> + }
> + return filp->private_data;
> +}
> +
> /*
> * ima_open_policy: sequentialize access to the policy file
> */
> static int ima_open_policy(struct inode *inode, struct file *filp)
> {
> - struct ima_namespace *ns = get_current_ns();
> + struct user_namespace *user_ns = current_user_ns();
Do we have to take a reference on the user namespace assuming one can
open the file, pass the fd down the hierarchy, and then the user
namespace with the opened file goes away? Or is there anything else that
keeps the user namespace alive?
On Thu, Dec 09, 2021 at 07:57:02PM -0500, Stefan Berger wrote:
>
> On 12/9/21 14:11, Christian Brauner wrote:
> >
> > From 1f03dc427c583d5e9ebc9ebe9de77c3c535bbebe Mon Sep 17 00:00:00 2001
> > From: Christian Brauner <[email protected]>
> > Date: Thu, 9 Dec 2021 20:07:02 +0100
> > Subject: [PATCH] !!!! HERE BE DRAGONS - UNTESTED !!!!
> >
> > ---
> > security/integrity/ima/ima_fs.c | 43 +++++++++++++++++++++++++++++----
> > 1 file changed, 38 insertions(+), 5 deletions(-)
> >
> > diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
> > index 583462b29cb5..d5b302b925b8 100644
> > --- a/security/integrity/ima/ima_fs.c
> > +++ b/security/integrity/ima/ima_fs.c
> > @@ -317,10 +317,14 @@ static ssize_t ima_read_policy(char *path)
> > static ssize_t ima_write_policy(struct file *file, const char __user *buf,
> > size_t datalen, loff_t *ppos)
> > {
> > - struct ima_namespace *ns = get_current_ns();
> > + struct ima_namespace *ns;
> > + struct user_namespace *user_ns;
> > char *data;
> > ssize_t result;
> > + user_ns = ima_filp_private(filp);
> > + ns = user_ns->ima_ns
> > +
> > if (datalen >= PAGE_SIZE)
> > datalen = PAGE_SIZE - 1;
> > @@ -373,26 +377,51 @@ static const struct seq_operations ima_policy_seqops = {
> > };
> > #endif
> > +static struct user_namespace *ima_filp_private(struct file *filp)
> > +{
> > + if (!(filp->f_flags & O_WRONLY)) {
> > +#ifdef CONFIG_IMA_READ_POLICY
> > + struct seq_file *seq;
> > +
> > + seq = filp->private_data;
> > + return seq->private;
> > +#endif
> > + }
> > + return filp->private_data;
> > +}
> > +
> > /*
> > * ima_open_policy: sequentialize access to the policy file
> > */
> > static int ima_open_policy(struct inode *inode, struct file *filp)
> > {
> > - struct ima_namespace *ns = get_current_ns();
> > + struct user_namespace *user_ns = current_user_ns();
>
>
> Do we have to take a reference on the user namespace assuming one can open
> the file, pass the fd down the hierarchy, and then the user namespace with
> the opened file goes away? Or is there anything else that keeps the user
> namespace alive?
No, we don't. When ima_policy_open() is called we do current_user_ns()
but that will be guaranteed to be identical to filp->f_cred->user_ns.
And f_cred is a reference that has been taken when the vfs allocated a
struct file for this .open call so won't go away until the last fput.
My proposal is also too complicated, I think.
(The booster is giving me the same side-effects as my second shot so
this looks like two good days of fever and headache. So I'll use that as
an excuse. :))
Your patch series as it stands has a bit of a security issue with those
get_current_ns() calls across differnet file/seq_file operations.
You have to make an architectural decision, I think. I see two sensible
options:
1. The relevant ima_ns that .open/.read/.write operate on is always taken
to be the ima_ns of the filesystem's userns, i.e.
sb->s_user_ns->ima_ns.
This - but I'm not an ima person - makes the most sense to me and the
semantics are straightforward. If I write to a file to alter some
policy then I expect the ima namespace of the user namespace to be
affected that the securityfs instance was mounted in.
2. The relevant ima_ns that .open/.read/.write operate on is always
taken to be the one of the opener. I don't really like that as that
gets weird if for some complicated reason the caller is not located
in the userns the filesystem was mounted in (weird mount propagation
scenario or sm). It also feels strange to operate on an ima_ns that's
different from s_user_ns->ima_ns in a securityfs instance.
So I think I would propose you do sm:
From 6c8018f14f22e7bc2255dcebab96f9b8c39a8459 Mon Sep 17 00:00:00 2001
From: Christian Brauner <[email protected]>
Date: Fri, 10 Dec 2021 10:31:27 +0100
Subject: [PATCH 1/2] !!!! HERE BE DRAGONS - COMPLETELY UNTESTED !!!!
The relevant ima_ns that .open/.read/.write operate on is always taken to be
the ima_ns of the filesystem's userns, i.e. sb->s_user_ns->ima_ns. This - but
I'm not an ima person - makes the most sense to me and the semantics are
straightforward. If I write to a file to alter some policy then I expect the
ima namespace of the user namespace to be affected that the securityfs instance
was mounted in.
---
security/integrity/ima/ima_fs.c | 30 ++++++++++++++++++-----------
security/integrity/ima/ima_policy.c | 8 ++++++--
2 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 778983fd9a73..95b7b9ec2a76 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -49,7 +49,8 @@ static ssize_t ima_show_htable_violations(struct file *filp,
char __user *buf,
size_t count, loff_t *ppos)
{
- struct ima_namespace *ns = get_current_ns();
+ struct user_namespace *user_ns = filp->f_path.mnt->mnt_sb->s_user_ns;
+ struct ima_namespace *ns = user_ns->ima_ns;
return ima_show_htable_value(buf, count, ppos, &ns->ima_htable.violations);
}
@@ -63,7 +64,8 @@ static ssize_t ima_show_measurements_count(struct file *filp,
char __user *buf,
size_t count, loff_t *ppos)
{
- struct ima_namespace *ns = get_current_ns();
+ struct user_namespace *user_ns = filp->f_path.mnt->mnt_sb->s_user_ns;
+ struct ima_namespace *ns = user_ns->ima_ns;
return ima_show_htable_value(buf, count, ppos, &ns->ima_htable.len);
}
@@ -76,7 +78,9 @@ static const struct file_operations ima_measurements_count_ops = {
/* returns pointer to hlist_node */
static void *ima_measurements_start(struct seq_file *m, loff_t *pos)
{
- struct ima_namespace *ns = get_current_ns();
+ const struct file *filp = m->file;
+ struct user_namespace *user_ns = filp->f_path.mnt->mnt_sb->s_user_ns;
+ struct ima_namespace *ns = user_ns->ima_ns;
loff_t l = *pos;
struct ima_queue_entry *qe;
@@ -94,7 +98,9 @@ static void *ima_measurements_start(struct seq_file *m, loff_t *pos)
static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct ima_namespace *ns = get_current_ns();
+ const struct file *filp = m->file;
+ struct user_namespace *user_ns = filp->f_path.mnt->mnt_sb->s_user_ns;
+ struct ima_namespace *ns = user_ns->ima_ns;
struct ima_queue_entry *qe = v;
/* lock protects when reading beyond last element
@@ -273,9 +279,8 @@ static const struct file_operations ima_ascii_measurements_ops = {
.release = seq_release,
};
-static ssize_t ima_read_policy(char *path)
+static ssize_t ima_read_policy(struct ima_namespace *ns, char *path)
{
- struct ima_namespace *ns = get_current_ns();
void *data = NULL;
char *datap;
size_t size;
@@ -317,7 +322,8 @@ static ssize_t ima_read_policy(char *path)
static ssize_t ima_write_policy(struct file *file, const char __user *buf,
size_t datalen, loff_t *ppos)
{
- struct ima_namespace *ns = get_current_ns();
+ struct user_namespace *user_ns = file->f_path.mnt->mnt_sb->s_user_ns;
+ struct ima_namespace *ns = user_ns->ima_ns;
char *data;
ssize_t result;
@@ -340,7 +346,7 @@ static ssize_t ima_write_policy(struct file *file, const char __user *buf,
goto out_free;
if (data[0] == '/') {
- result = ima_read_policy(data);
+ result = ima_read_policy(ns, data);
} else if (ima_appraise & IMA_APPRAISE_POLICY) {
pr_err("signed policy file (specified as an absolute pathname) required\n");
integrity_audit_msg(AUDIT_INTEGRITY_STATUS, NULL, NULL,
@@ -378,7 +384,8 @@ static const struct seq_operations ima_policy_seqops = {
*/
static int ima_open_policy(struct inode *inode, struct file *filp)
{
- struct ima_namespace *ns = get_current_ns();
+ struct user_namespace *user_ns = inode->i_sb->s_user_ns;
+ struct ima_namespace *ns = user_ns->ima_ns;
if (!(filp->f_flags & O_WRONLY)) {
#ifndef CONFIG_IMA_READ_POLICY
@@ -386,7 +393,7 @@ static int ima_open_policy(struct inode *inode, struct file *filp)
#else
if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
return -EACCES;
- if (!mac_admin_ns_capable(ns->user_ns))
+ if (!mac_admin_ns_capable(user_ns))
return -EPERM;
return seq_open(filp, &ima_policy_seqops);
#endif
@@ -405,7 +412,8 @@ static int ima_open_policy(struct inode *inode, struct file *filp)
*/
static int ima_release_policy(struct inode *inode, struct file *file)
{
- struct ima_namespace *ns = get_current_ns();
+ struct user_namespace *user_ns = inode->i_sb->s_user_ns;
+ struct ima_namespace *ns = user_ns->ima_ns;
const char *cause = ns->valid_policy ? "completed" : "failed";
if ((file->f_flags & O_ACCMODE) == O_RDONLY)
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 747dca6131d6..41e5f17ec44d 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -1908,7 +1908,9 @@ static const char *const mask_tokens[] = {
void *ima_policy_start(struct seq_file *m, loff_t *pos)
{
- struct ima_namespace *ns = get_current_ns();
+ const struct file *filp = m->file;
+ struct user_namespace *user_ns = filp->f_path.mnt->mnt_sb->user_ns;
+ struct ima_namespace *ns = user_ns->ima_ns;
loff_t l = *pos;
struct ima_rule_entry *entry;
struct list_head *ima_rules_tmp;
@@ -1928,7 +1930,9 @@ void *ima_policy_start(struct seq_file *m, loff_t *pos)
void *ima_policy_next(struct seq_file *m, void *v, loff_t *pos)
{
struct ima_rule_entry *entry = v;
- struct ima_namespace *ns = get_current_ns();
+ const struct file *filp = m->file;
+ struct user_namespace *user_ns = filp->f_path.mnt->mnt_sb->user_ns;
+ struct ima_namespace *ns = user_ns->ima_ns;
rcu_read_lock();
entry = list_entry_rcu(entry->list.next, struct ima_rule_entry, list);
--
2.30.2
From ecf25d6b2b5895005d4103169bdb55d970e7a865 Mon Sep 17 00:00:00 2001
From: Christian Brauner <[email protected]>
Date: Fri, 10 Dec 2021 11:56:25 +0100
Subject: [PATCH 2/2] !!!! HERE BE DRAGONS - COMPLETELY UNTESTED !!!!
securityfs: don't allow mounting from outside the filesystem's userns
If we ever need to allow that we should revisit the semantics.
---
security/inode.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/security/inode.c b/security/inode.c
index eaccba7017d9..71f9634228f3 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -43,7 +43,10 @@ static int securityfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
static const struct tree_descr files[] = {{""}};
struct user_namespace *ns = fc->user_ns;
- int error;
+ int error = -EINVAL;
+
+ if (WARN_ON(ns != current_user_ns()))
+ return error;
error = simple_fill_super(sb, SECURITYFS_MAGIC, files);
if (error)
--
2.30.2
On 12/10/21 06:32, Christian Brauner wrote:
> On Thu, Dec 09, 2021 at 07:57:02PM -0500, Stefan Berger wrote:
>> On 12/9/21 14:11, Christian Brauner wrote:
>>> From 1f03dc427c583d5e9ebc9ebe9de77c3c535bbebe Mon Sep 17 00:00:00 2001
>>> From: Christian Brauner <[email protected]>
>>> Date: Thu, 9 Dec 2021 20:07:02 +0100
>>> Subject: [PATCH] !!!! HERE BE DRAGONS - UNTESTED !!!!
>>>
>>> ---
>>> security/integrity/ima/ima_fs.c | 43 +++++++++++++++++++++++++++++----
>>> 1 file changed, 38 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
>>> index 583462b29cb5..d5b302b925b8 100644
>>> --- a/security/integrity/ima/ima_fs.c
>>> +++ b/security/integrity/ima/ima_fs.c
>>> @@ -317,10 +317,14 @@ static ssize_t ima_read_policy(char *path)
>>> static ssize_t ima_write_policy(struct file *file, const char __user *buf,
>>> size_t datalen, loff_t *ppos)
>>> {
>>> - struct ima_namespace *ns = get_current_ns();
>>> + struct ima_namespace *ns;
>>> + struct user_namespace *user_ns;
>>> char *data;
>>> ssize_t result;
>>> + user_ns = ima_filp_private(filp);
>>> + ns = user_ns->ima_ns
>>> +
>>> if (datalen >= PAGE_SIZE)
>>> datalen = PAGE_SIZE - 1;
>>> @@ -373,26 +377,51 @@ static const struct seq_operations ima_policy_seqops = {
>>> };
>>> #endif
>>> +static struct user_namespace *ima_filp_private(struct file *filp)
>>> +{
>>> + if (!(filp->f_flags & O_WRONLY)) {
>>> +#ifdef CONFIG_IMA_READ_POLICY
>>> + struct seq_file *seq;
>>> +
>>> + seq = filp->private_data;
>>> + return seq->private;
>>> +#endif
>>> + }
>>> + return filp->private_data;
>>> +}
>>> +
>>> /*
>>> * ima_open_policy: sequentialize access to the policy file
>>> */
>>> static int ima_open_policy(struct inode *inode, struct file *filp)
>>> {
>>> - struct ima_namespace *ns = get_current_ns();
>>> + struct user_namespace *user_ns = current_user_ns();
>>
>> Do we have to take a reference on the user namespace assuming one can open
>> the file, pass the fd down the hierarchy, and then the user namespace with
>> the opened file goes away? Or is there anything else that keeps the user
>> namespace alive?
> No, we don't. When ima_policy_open() is called we do current_user_ns()
> but that will be guaranteed to be identical to filp->f_cred->user_ns.
> And f_cred is a reference that has been taken when the vfs allocated a
> struct file for this .open call so won't go away until the last fput.
>
> My proposal is also too complicated, I think.
> (The booster is giving me the same side-effects as my second shot so
> this looks like two good days of fever and headache. So I'll use that as
> an excuse. :))
>
> Your patch series as it stands has a bit of a security issue with those
> get_current_ns() calls across differnet file/seq_file operations.
>
> You have to make an architectural decision, I think. I see two sensible
> options:
> 1. The relevant ima_ns that .open/.read/.write operate on is always taken
> to be the ima_ns of the filesystem's userns, i.e.
> sb->s_user_ns->ima_ns.
> This - but I'm not an ima person - makes the most sense to me and the
> semantics are straightforward. If I write to a file to alter some
> policy then I expect the ima namespace of the user namespace to be
> affected that the securityfs instance was mounted in.
> 2. The relevant ima_ns that .open/.read/.write operate on is always
> taken to be the one of the opener. I don't really like that as that
> gets weird if for some complicated reason the caller is not located
> in the userns the filesystem was mounted in (weird mount propagation
> scenario or sm). It also feels strange to operate on an ima_ns that's
> different from s_user_ns->ima_ns in a securityfs instance.
We have this situation because one can setns() to another mount
namespaces but the data shown by SecurityFS lives in a user namespace,
right? And now we need to decide whether to affect the data in the user
namespace that did the open (option 2) or to which the SecurityFS
belongs to (option 1). If we were to open a regular file it would be
option 1, so we should probably not break that existing semantic and
also choose option 1 unless there one wasn't allowed to choose the user
namespace the SecurityFS files belonged to then it should be option 2
but then we have file descriptor passing where 'being allowed' can
change depending on who is reading/writing a file... Is there anything
that would prevent us from setns()'ing to that target user namespace so
that we would now see that of a user namespace that we are not allowed
to see?
Following man page of setns:
" User namespaces
A process reassociating itself with a user namespace must
have the CAP_SYS_ADMIN capability in the target user
namespace. (This necessarily implies that it is only
possible to join a descendant user namespace.) Upon
successfully joining a user namespace, a process is
granted all capabilities in that namespace, regardless of
its user and group IDs."
So if we choose option 1 maybe we have to test for this capability upon
every read/write from/to a file?
Stefan
>
> So I think I would propose you do sm:
>
> From 6c8018f14f22e7bc2255dcebab96f9b8c39a8459 Mon Sep 17 00:00:00 2001
> From: Christian Brauner <[email protected]>
> Date: Fri, 10 Dec 2021 10:31:27 +0100
> Subject: [PATCH 1/2] !!!! HERE BE DRAGONS - COMPLETELY UNTESTED !!!!
>
> The relevant ima_ns that .open/.read/.write operate on is always taken to be
> the ima_ns of the filesystem's userns, i.e. sb->s_user_ns->ima_ns. This - but
> I'm not an ima person - makes the most sense to me and the semantics are
> straightforward. If I write to a file to alter some policy then I expect the
> ima namespace of the user namespace to be affected that the securityfs instance
> was mounted in.
> ---
> security/integrity/ima/ima_fs.c | 30 ++++++++++++++++++-----------
> security/integrity/ima/ima_policy.c | 8 ++++++--
> 2 files changed, 25 insertions(+), 13 deletions(-)
>
> diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
> index 778983fd9a73..95b7b9ec2a76 100644
> --- a/security/integrity/ima/ima_fs.c
> +++ b/security/integrity/ima/ima_fs.c
> @@ -49,7 +49,8 @@ static ssize_t ima_show_htable_violations(struct file *filp,
> char __user *buf,
> size_t count, loff_t *ppos)
> {
> - struct ima_namespace *ns = get_current_ns();
> + struct user_namespace *user_ns = filp->f_path.mnt->mnt_sb->s_user_ns;
> + struct ima_namespace *ns = user_ns->ima_ns;
>
> return ima_show_htable_value(buf, count, ppos, &ns->ima_htable.violations);
> }
> @@ -63,7 +64,8 @@ static ssize_t ima_show_measurements_count(struct file *filp,
> char __user *buf,
> size_t count, loff_t *ppos)
> {
> - struct ima_namespace *ns = get_current_ns();
> + struct user_namespace *user_ns = filp->f_path.mnt->mnt_sb->s_user_ns;
> + struct ima_namespace *ns = user_ns->ima_ns;
>
> return ima_show_htable_value(buf, count, ppos, &ns->ima_htable.len);
> }
> @@ -76,7 +78,9 @@ static const struct file_operations ima_measurements_count_ops = {
> /* returns pointer to hlist_node */
> static void *ima_measurements_start(struct seq_file *m, loff_t *pos)
> {
> - struct ima_namespace *ns = get_current_ns();
> + const struct file *filp = m->file;
> + struct user_namespace *user_ns = filp->f_path.mnt->mnt_sb->s_user_ns;
> + struct ima_namespace *ns = user_ns->ima_ns;
> loff_t l = *pos;
> struct ima_queue_entry *qe;
>
> @@ -94,7 +98,9 @@ static void *ima_measurements_start(struct seq_file *m, loff_t *pos)
>
> static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos)
> {
> - struct ima_namespace *ns = get_current_ns();
> + const struct file *filp = m->file;
> + struct user_namespace *user_ns = filp->f_path.mnt->mnt_sb->s_user_ns;
> + struct ima_namespace *ns = user_ns->ima_ns;
> struct ima_queue_entry *qe = v;
>
> /* lock protects when reading beyond last element
> @@ -273,9 +279,8 @@ static const struct file_operations ima_ascii_measurements_ops = {
> .release = seq_release,
> };
>
> -static ssize_t ima_read_policy(char *path)
> +static ssize_t ima_read_policy(struct ima_namespace *ns, char *path)
> {
> - struct ima_namespace *ns = get_current_ns();
> void *data = NULL;
> char *datap;
> size_t size;
> @@ -317,7 +322,8 @@ static ssize_t ima_read_policy(char *path)
> static ssize_t ima_write_policy(struct file *file, const char __user *buf,
> size_t datalen, loff_t *ppos)
> {
> - struct ima_namespace *ns = get_current_ns();
> + struct user_namespace *user_ns = file->f_path.mnt->mnt_sb->s_user_ns;
> + struct ima_namespace *ns = user_ns->ima_ns;
> char *data;
> ssize_t result;
>
> @@ -340,7 +346,7 @@ static ssize_t ima_write_policy(struct file *file, const char __user *buf,
> goto out_free;
>
> if (data[0] == '/') {
> - result = ima_read_policy(data);
> + result = ima_read_policy(ns, data);
> } else if (ima_appraise & IMA_APPRAISE_POLICY) {
> pr_err("signed policy file (specified as an absolute pathname) required\n");
> integrity_audit_msg(AUDIT_INTEGRITY_STATUS, NULL, NULL,
> @@ -378,7 +384,8 @@ static const struct seq_operations ima_policy_seqops = {
> */
> static int ima_open_policy(struct inode *inode, struct file *filp)
> {
> - struct ima_namespace *ns = get_current_ns();
> + struct user_namespace *user_ns = inode->i_sb->s_user_ns;
> + struct ima_namespace *ns = user_ns->ima_ns;
>
> if (!(filp->f_flags & O_WRONLY)) {
> #ifndef CONFIG_IMA_READ_POLICY
> @@ -386,7 +393,7 @@ static int ima_open_policy(struct inode *inode, struct file *filp)
> #else
> if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
> return -EACCES;
> - if (!mac_admin_ns_capable(ns->user_ns))
> + if (!mac_admin_ns_capable(user_ns))
> return -EPERM;
> return seq_open(filp, &ima_policy_seqops);
> #endif
> @@ -405,7 +412,8 @@ static int ima_open_policy(struct inode *inode, struct file *filp)
> */
> static int ima_release_policy(struct inode *inode, struct file *file)
> {
> - struct ima_namespace *ns = get_current_ns();
> + struct user_namespace *user_ns = inode->i_sb->s_user_ns;
> + struct ima_namespace *ns = user_ns->ima_ns;
> const char *cause = ns->valid_policy ? "completed" : "failed";
>
> if ((file->f_flags & O_ACCMODE) == O_RDONLY)
> diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
> index 747dca6131d6..41e5f17ec44d 100644
> --- a/security/integrity/ima/ima_policy.c
> +++ b/security/integrity/ima/ima_policy.c
> @@ -1908,7 +1908,9 @@ static const char *const mask_tokens[] = {
>
> void *ima_policy_start(struct seq_file *m, loff_t *pos)
> {
> - struct ima_namespace *ns = get_current_ns();
> + const struct file *filp = m->file;
> + struct user_namespace *user_ns = filp->f_path.mnt->mnt_sb->user_ns;
> + struct ima_namespace *ns = user_ns->ima_ns;
> loff_t l = *pos;
> struct ima_rule_entry *entry;
> struct list_head *ima_rules_tmp;
> @@ -1928,7 +1930,9 @@ void *ima_policy_start(struct seq_file *m, loff_t *pos)
> void *ima_policy_next(struct seq_file *m, void *v, loff_t *pos)
> {
> struct ima_rule_entry *entry = v;
> - struct ima_namespace *ns = get_current_ns();
> + const struct file *filp = m->file;
> + struct user_namespace *user_ns = filp->f_path.mnt->mnt_sb->user_ns;
> + struct ima_namespace *ns = user_ns->ima_ns;
>
> rcu_read_lock();
> entry = list_entry_rcu(entry->list.next, struct ima_rule_entry, list);
On Fri, 2021-12-10 at 08:57 -0500, Stefan Berger wrote:
> On 12/10/21 06:32, Christian Brauner wrote:
> > On Thu, Dec 09, 2021 at 07:57:02PM -0500, Stefan Berger wrote:
> > > On 12/9/21 14:11, Christian Brauner wrote:
> > > > From 1f03dc427c583d5e9ebc9ebe9de77c3c535bbebe Mon Sep 17
> > > > 00:00:00 2001
> > > > From: Christian Brauner <[email protected]>
> > > > Date: Thu, 9 Dec 2021 20:07:02 +0100
> > > > Subject: [PATCH] !!!! HERE BE DRAGONS - UNTESTED !!!!
> > > >
> > > > ---
> > > > security/integrity/ima/ima_fs.c | 43
> > > > +++++++++++++++++++++++++++++----
> > > > 1 file changed, 38 insertions(+), 5 deletions(-)
> > > >
> > > > diff --git a/security/integrity/ima/ima_fs.c
> > > > b/security/integrity/ima/ima_fs.c
> > > > index 583462b29cb5..d5b302b925b8 100644
> > > > --- a/security/integrity/ima/ima_fs.c
> > > > +++ b/security/integrity/ima/ima_fs.c
> > > > @@ -317,10 +317,14 @@ static ssize_t ima_read_policy(char
> > > > *path)
> > > > static ssize_t ima_write_policy(struct file *file, const
> > > > char __user *buf,
> > > > size_t datalen, loff_t *ppos)
> > > > {
> > > > - struct ima_namespace *ns = get_current_ns();
> > > > + struct ima_namespace *ns;
> > > > + struct user_namespace *user_ns;
> > > > char *data;
> > > > ssize_t result;
> > > > + user_ns = ima_filp_private(filp);
> > > > + ns = user_ns->ima_ns
> > > > +
> > > > if (datalen >= PAGE_SIZE)
> > > > datalen = PAGE_SIZE - 1;
> > > > @@ -373,26 +377,51 @@ static const struct seq_operations
> > > > ima_policy_seqops = {
> > > > };
> > > > #endif
> > > > +static struct user_namespace *ima_filp_private(struct file
> > > > *filp)
> > > > +{
> > > > + if (!(filp->f_flags & O_WRONLY)) {
> > > > +#ifdef CONFIG_IMA_READ_POLICY
> > > > + struct seq_file *seq;
> > > > +
> > > > + seq = filp->private_data;
> > > > + return seq->private;
> > > > +#endif
> > > > + }
> > > > + return filp->private_data;
> > > > +}
> > > > +
> > > > /*
> > > > * ima_open_policy: sequentialize access to the policy file
> > > > */
> > > > static int ima_open_policy(struct inode *inode, struct file
> > > > *filp)
> > > > {
> > > > - struct ima_namespace *ns = get_current_ns();
> > > > + struct user_namespace *user_ns = current_user_ns();
> > >
> > > Do we have to take a reference on the user namespace assuming one
> > > can open
> > > the file, pass the fd down the hierarchy, and then the user
> > > namespace with
> > > the opened file goes away? Or is there anything else that keeps
> > > the user
> > > namespace alive?
> > No, we don't. When ima_policy_open() is called we do
> > current_user_ns() but that will be guaranteed to be identical to
> > filp->f_cred->user_ns. And f_cred is a reference that has been
> > taken when the vfs allocated a struct file for this .open call so
> > won't go away until the last fput.
> >
> > My proposal is also too complicated, I think.
> > (The booster is giving me the same side-effects as my second shot
> > so this looks like two good days of fever and headache. So I'll use
> > that as an excuse. :))
> >
> > Your patch series as it stands has a bit of a security issue with
> > those get_current_ns() calls across differnet file/seq_file
> > operations.
> > You have to make an architectural decision, I think. I see two
> > sensible options:
> > 1. The relevant ima_ns that .open/.read/.write operate on is always
> > taken to be the ima_ns of the filesystem's userns, i.e. sb-
> > >s_user_ns->ima_ns.
> > This - but I'm not an ima person - makes the most sense to me
> > and the semantics are straightforward. If I write to a file to
> > alter some policy then I expect the ima namespace of the user
> > namespace to be affected that the securityfs instance was mounted
> > in.
> > 2. The relevant ima_ns that .open/.read/.write operate on is always
> > taken to be the one of the opener. I don't really like that as that
> > gets weird if for some complicated reason the caller is not located
> > in the userns the filesystem was mounted in (weird mount
> > propagation scenario or sm). It also feels strange to operate on an
> > ima_ns that's different from s_user_ns->ima_ns in a securityfs
> > instance.
>
> We have this situation because one can setns() to another mount
> namespaces but the data shown by SecurityFS lives in a user
> namespace, right?
Well, not necessarily. There is another case where only the userns is
unshared and securityfs is never mounted inside the container. If the
process has the capability to open the securityfs files (kubernetes
privileged container, say), what should it see? The analogue with the
pid namespace says it should see the contents of the what the parent
had mounted because if it wanted to see its own it would have done a
mount of securityfs inside the userns. This argues for sb->s_user_ns-
>ima_ns.
for the setns mount namespace case, the vfsmnt tree is duplicated, so
if the securityfs sb->s_user_ns is your user namespace in the prior
mount namespace, it will end up being so in the new one. sb->s_user_ns
only changes on actual mount.
> And now we need to decide whether to affect the data in the
> user namespace that did the open (option 2) or to which the
> SecurityFS belongs to (option 1). If we were to open a regular file
> it would be option 1, so we should probably not break that existing
> semantic and also choose option 1 unless there one wasn't allowed to
> choose the user namespace the SecurityFS files belonged to then it
> should be option 2
Once the userns is unshared, IMA accounting is done inside the
namespace. However, in order to see the results, the container must
mount securityfs in the userns. I can't think of a good reason why a
privileged container should want to be accounted separately but see the
results of its parents, but similarly I can't see why a pid namespace
should want to see /proc of its parent either ... yet that's the
semantic we have today.
> but then we have file descriptor passing where 'being allowed' can
> change depending on who is reading/writing a file... Is there
> anything that would prevent us from setns()'ing to that target user
> namespace so that we would now see that of a user namespace that we
> are not allowed to see?
If you're able to setns to a user namespace, you logically have all its
privileges, so that problem shouldn't arise.
Option 2 is basically sliding back towards securityfs magically
changing properties depending on which userns is asking. If we're
going to support that, I don't see what was wrong with the owner/guid
magically changing as well like I first propsed. If we're going to
insist on a new mount of securityfs, I think it has to function cleanly
like the pid namespace, so option 1 is required.
James
On 12/10/21 06:32, Christian Brauner wrote:
> From ecf25d6b2b5895005d4103169bdb55d970e7a865 Mon Sep 17 00:00:00 2001
> From: Christian Brauner<[email protected]>
> Date: Fri, 10 Dec 2021 11:56:25 +0100
> Subject: [PATCH 2/2] !!!! HERE BE DRAGONS - COMPLETELY UNTESTED !!!!
>
> securityfs: don't allow mounting from outside the filesystem's userns
>
> If we ever need to allow that we should revisit the semantics.
> ---
> security/inode.c | 5 ++++-
> 1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/security/inode.c b/security/inode.c
> index eaccba7017d9..71f9634228f3 100644
> --- a/security/inode.c
> +++ b/security/inode.c
> @@ -43,7 +43,10 @@ static int securityfs_fill_super(struct super_block *sb, struct fs_context *fc)
> {
> static const struct tree_descr files[] = {{""}};
> struct user_namespace *ns = fc->user_ns;
> - int error;
> + int error = -EINVAL;
> +
> + if (WARN_ON(ns != current_user_ns()))
> + return error;
>
> error = simple_fill_super(sb, SECURITYFS_MAGIC, files);
> if (error)
Oops, I hadn't seen this patch. How can one 'mount from outside the
filesystem's userns'?
On Fri, Dec 10, 2021 at 03:08:27PM -0500, Stefan Berger wrote:
>
> On 12/10/21 06:32, Christian Brauner wrote:
> > From ecf25d6b2b5895005d4103169bdb55d970e7a865 Mon Sep 17 00:00:00 2001
> > From: Christian Brauner<[email protected]>
> > Date: Fri, 10 Dec 2021 11:56:25 +0100
> > Subject: [PATCH 2/2] !!!! HERE BE DRAGONS - COMPLETELY UNTESTED !!!!
> >
> > securityfs: don't allow mounting from outside the filesystem's userns
> >
> > If we ever need to allow that we should revisit the semantics.
> > ---
> > security/inode.c | 5 ++++-
> > 1 file changed, 4 insertions(+), 1 deletion(-)
> >
> > diff --git a/security/inode.c b/security/inode.c
> > index eaccba7017d9..71f9634228f3 100644
> > --- a/security/inode.c
> > +++ b/security/inode.c
> > @@ -43,7 +43,10 @@ static int securityfs_fill_super(struct super_block *sb, struct fs_context *fc)
> > {
> > static const struct tree_descr files[] = {{""}};
> > struct user_namespace *ns = fc->user_ns;
> > - int error;
> > + int error = -EINVAL;
> > +
> > + if (WARN_ON(ns != current_user_ns()))
> > + return error;
> > error = simple_fill_super(sb, SECURITYFS_MAGIC, files);
> > if (error)
>
>
> Oops, I hadn't seen this patch. How can one 'mount from outside the
> filesystem's userns'?
The new mount api is generic enough that this could be possible and it
might gain that ability explicitly at some point in the future for the
sake of delegated mounting. So that's just a good hardening measure.
On Fri, Dec 10, 2021 at 08:57:11AM -0500, Stefan Berger wrote:
>
> On 12/10/21 06:32, Christian Brauner wrote:
> > On Thu, Dec 09, 2021 at 07:57:02PM -0500, Stefan Berger wrote:
> > > On 12/9/21 14:11, Christian Brauner wrote:
> > > > From 1f03dc427c583d5e9ebc9ebe9de77c3c535bbebe Mon Sep 17 00:00:00 2001
> > > > From: Christian Brauner <[email protected]>
> > > > Date: Thu, 9 Dec 2021 20:07:02 +0100
> > > > Subject: [PATCH] !!!! HERE BE DRAGONS - UNTESTED !!!!
> > > >
> > > > ---
> > > > security/integrity/ima/ima_fs.c | 43 +++++++++++++++++++++++++++++----
> > > > 1 file changed, 38 insertions(+), 5 deletions(-)
> > > >
> > > > diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
> > > > index 583462b29cb5..d5b302b925b8 100644
> > > > --- a/security/integrity/ima/ima_fs.c
> > > > +++ b/security/integrity/ima/ima_fs.c
> > > > @@ -317,10 +317,14 @@ static ssize_t ima_read_policy(char *path)
> > > > static ssize_t ima_write_policy(struct file *file, const char __user *buf,
> > > > size_t datalen, loff_t *ppos)
> > > > {
> > > > - struct ima_namespace *ns = get_current_ns();
> > > > + struct ima_namespace *ns;
> > > > + struct user_namespace *user_ns;
> > > > char *data;
> > > > ssize_t result;
> > > > + user_ns = ima_filp_private(filp);
> > > > + ns = user_ns->ima_ns
> > > > +
> > > > if (datalen >= PAGE_SIZE)
> > > > datalen = PAGE_SIZE - 1;
> > > > @@ -373,26 +377,51 @@ static const struct seq_operations ima_policy_seqops = {
> > > > };
> > > > #endif
> > > > +static struct user_namespace *ima_filp_private(struct file *filp)
> > > > +{
> > > > + if (!(filp->f_flags & O_WRONLY)) {
> > > > +#ifdef CONFIG_IMA_READ_POLICY
> > > > + struct seq_file *seq;
> > > > +
> > > > + seq = filp->private_data;
> > > > + return seq->private;
> > > > +#endif
> > > > + }
> > > > + return filp->private_data;
> > > > +}
> > > > +
> > > > /*
> > > > * ima_open_policy: sequentialize access to the policy file
> > > > */
> > > > static int ima_open_policy(struct inode *inode, struct file *filp)
> > > > {
> > > > - struct ima_namespace *ns = get_current_ns();
> > > > + struct user_namespace *user_ns = current_user_ns();
> > >
> > > Do we have to take a reference on the user namespace assuming one can open
> > > the file, pass the fd down the hierarchy, and then the user namespace with
> > > the opened file goes away? Or is there anything else that keeps the user
> > > namespace alive?
> > No, we don't. When ima_policy_open() is called we do current_user_ns()
> > but that will be guaranteed to be identical to filp->f_cred->user_ns.
> > And f_cred is a reference that has been taken when the vfs allocated a
> > struct file for this .open call so won't go away until the last fput.
> >
> > My proposal is also too complicated, I think.
> > (The booster is giving me the same side-effects as my second shot so
> > this looks like two good days of fever and headache. So I'll use that as
> > an excuse. :))
> >
> > Your patch series as it stands has a bit of a security issue with those
> > get_current_ns() calls across differnet file/seq_file operations.
> >
> > You have to make an architectural decision, I think. I see two sensible
> > options:
> > 1. The relevant ima_ns that .open/.read/.write operate on is always taken
> > to be the ima_ns of the filesystem's userns, i.e.
> > sb->s_user_ns->ima_ns.
> > This - but I'm not an ima person - makes the most sense to me and the
> > semantics are straightforward. If I write to a file to alter some
> > policy then I expect the ima namespace of the user namespace to be
> > affected that the securityfs instance was mounted in.
> > 2. The relevant ima_ns that .open/.read/.write operate on is always
> > taken to be the one of the opener. I don't really like that as that
> > gets weird if for some complicated reason the caller is not located
> > in the userns the filesystem was mounted in (weird mount propagation
> > scenario or sm). It also feels strange to operate on an ima_ns that's
> > different from s_user_ns->ima_ns in a securityfs instance.
>
> We have this situation because one can setns() to another mount namespaces
> but the data shown by SecurityFS lives in a user namespace, right? And now
> we need to decide whether to affect the data in the user namespace that did
> the open (option 2) or to which the SecurityFS belongs to (option 1). If we
> were to open a regular file it would be option 1, so we should probably not
> break that existing semantic and also choose option 1 unless there one
> wasn't allowed to choose the user namespace the SecurityFS files belonged to
> then it should be option 2 but then we have file descriptor passing where
> 'being allowed' can change depending on who is reading/writing a file... Is
A general remark that's always worth repeating: under no circumstances
should the object that you're operating on change from .open to
.read/.write. Consider the following very rough sketch:
pid = fork():
if (pid == 0) {
// create USERNS1
unshare(CLONE_NEWUSER);
// write an idmapping for getuid() to userns 0
mount("", "/sys/kernel/security", "securityfs", [...]);
int fd_ima = open("/sys/kernel/security/some_ima_file", O_WRONLY);
// Send fd_ima to parent via SCM_RIGHTS
}
// Receive fd_ima from child.
write(fd_ima, "SET_OPTION_ON_INIT_USER_NS", ...);
That example above is an instant security issue if the ima_ns changed
depending on the caller since it means you could open a file in an
unprivileged userns and then funnel it to a random process in
init_user_ns that you control via SCM_RIGHTS and have it do the write()
and instead of USERNS1->ima_ns you'd suddenly have changed
init_user_ns->ima_ns.
And this is what your previous patch with these multiple
get_current_ns() calls allowed.
So the write in the example above must operate on USERNS1->ima_ns.
Now, both option 1 and 2 will ensure that the ima_ns that is operated on
is always the same across .open/.read/.write:
1. securityfs->sb->s_user_ns->ima_ns this is trivially guaranteed
because the filesystem's user namespace doesn't change no matter
where you access that filesystem from.
2. securityfs_file->f_cred->user_ns->ima_ns this is guaranteed because
f_cred stashes the openers credentials.
What I fundamentally dislike about option 2 (f_cred) is - and I tried to
say that in my prior mail - that what ima_ns is operated on depends on
the userns of the process that called .open, i.e. the contents in
securityfs change depending on the opener's userns.
And I think that is fundamentally sloppy semantics. The contents of a
filesystem should generally not depend on the caller if it can be
avoided. We have a few instances where this is the case (e.g. some
procfs sysctls) but especially for securityfs this does not make sense.
> there anything that would prevent us from setns()'ing to that target user
> namespace so that we would now see that of a user namespace that we are not
> allowed to see?
If you're really worried about someone being able to access a securityfs
instance whose userns doesn't match the userns the securityfs instance
was mounted in there are multiple ways to fix it. The one that I tend to
prefer is:
From e0ff6a8dcc573763568e685dd70d1547efd68df9 Mon Sep 17 00:00:00 2001
From: Christian Brauner <[email protected]>
Date: Fri, 10 Dec 2021 11:47:37 +0100
Subject: !!!! HERE BE DRAGONS - COMPLETELY UNTESTED !!!!
securityfs: only allow access to securityfs from within same namespace
Limit opening of securityfs files to callers located in the same namespace.
---
security/inode.c | 33 +++++++++++++++++++++++++++++++--
1 file changed, 31 insertions(+), 2 deletions(-)
diff --git a/security/inode.c b/security/inode.c
index eaccba7017d9..9eaf757c08cb 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -80,6 +80,35 @@ static struct file_system_type fs_type = {
.fs_flags = FS_USERNS_MOUNT,
};
+static int securityfs_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
+{
+ int err;
+
+ err = generic_permission(&init_user_ns, inode, mask);
+ if (!err) {
+ if (inode->i_sb->s_user_ns != current_user_ns())
+ err = -EACCES;
+ }
+
+ return err;
+}
+
+const struct inode_operations securityfs_dir_inode_operations = {
+ .permission = securityfs_permission,
+ .lookup = simple_lookup,
+};
+
+const struct file_operations securityfs_dir_operations = {
+ .permission = securityfs_permission,
+ .open = dcache_dir_open,
+ .release = dcache_dir_close,
+ .llseek = dcache_dir_lseek,
+ .read = generic_read_dir,
+ .iterate_shared = dcache_readdir,
+ .fsync = noop_fsync,
+};
+
/**
* securityfs_create_dentry - create a dentry in the securityfs filesystem
*
@@ -167,8 +196,8 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode,
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
inode->i_private = data;
if (S_ISDIR(mode)) {
- inode->i_op = &simple_dir_inode_operations;
- inode->i_fop = &simple_dir_operations;
+ inode->i_op = &securityfs_dir_inode_operations;
+ inode->i_fop = &securityfs_dir_operations;
inc_nlink(inode);
inc_nlink(dir);
} else if (S_ISLNK(mode)) {
--
2.30.2
>
> Following man page of setns:
>
> " User namespaces
> A process reassociating itself with a user namespace must
> have the CAP_SYS_ADMIN capability in the target user
> namespace. (This necessarily implies that it is only
> possible to join a descendant user namespace.) Upon
> successfully joining a user namespace, a process is
> granted all capabilities in that namespace, regardless of
> its user and group IDs."
>
>
> So if we choose option 1 maybe we have to test for this capability upon
> every read/write from/to a file?
>
In general, never do permission checking at read/write time unless the
read/write fundamentally depends on what is read or written. Clean
semantics will do permission checking once at open time. If you really
really need to do permission checking at .read/.write time you need to
use f_cred possibly calling override_creds/revert_creds() while doing so
but simply don't do it.
On Sat, Dec 11, 2021 at 10:50:26AM +0100, Christian Brauner wrote:
> On Fri, Dec 10, 2021 at 08:57:11AM -0500, Stefan Berger wrote:
> >
> > On 12/10/21 06:32, Christian Brauner wrote:
> > > On Thu, Dec 09, 2021 at 07:57:02PM -0500, Stefan Berger wrote:
> > > > On 12/9/21 14:11, Christian Brauner wrote:
> > > > > From 1f03dc427c583d5e9ebc9ebe9de77c3c535bbebe Mon Sep 17 00:00:00 2001
> > > > > From: Christian Brauner <[email protected]>
> > > > > Date: Thu, 9 Dec 2021 20:07:02 +0100
> > > > > Subject: [PATCH] !!!! HERE BE DRAGONS - UNTESTED !!!!
> > > > >
> > > > > ---
> > > > > security/integrity/ima/ima_fs.c | 43 +++++++++++++++++++++++++++++----
> > > > > 1 file changed, 38 insertions(+), 5 deletions(-)
> > > > >
> > > > > diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
> > > > > index 583462b29cb5..d5b302b925b8 100644
> > > > > --- a/security/integrity/ima/ima_fs.c
> > > > > +++ b/security/integrity/ima/ima_fs.c
> > > > > @@ -317,10 +317,14 @@ static ssize_t ima_read_policy(char *path)
> > > > > static ssize_t ima_write_policy(struct file *file, const char __user *buf,
> > > > > size_t datalen, loff_t *ppos)
> > > > > {
> > > > > - struct ima_namespace *ns = get_current_ns();
> > > > > + struct ima_namespace *ns;
> > > > > + struct user_namespace *user_ns;
> > > > > char *data;
> > > > > ssize_t result;
> > > > > + user_ns = ima_filp_private(filp);
> > > > > + ns = user_ns->ima_ns
> > > > > +
> > > > > if (datalen >= PAGE_SIZE)
> > > > > datalen = PAGE_SIZE - 1;
> > > > > @@ -373,26 +377,51 @@ static const struct seq_operations ima_policy_seqops = {
> > > > > };
> > > > > #endif
> > > > > +static struct user_namespace *ima_filp_private(struct file *filp)
> > > > > +{
> > > > > + if (!(filp->f_flags & O_WRONLY)) {
> > > > > +#ifdef CONFIG_IMA_READ_POLICY
> > > > > + struct seq_file *seq;
> > > > > +
> > > > > + seq = filp->private_data;
> > > > > + return seq->private;
> > > > > +#endif
> > > > > + }
> > > > > + return filp->private_data;
> > > > > +}
> > > > > +
> > > > > /*
> > > > > * ima_open_policy: sequentialize access to the policy file
> > > > > */
> > > > > static int ima_open_policy(struct inode *inode, struct file *filp)
> > > > > {
> > > > > - struct ima_namespace *ns = get_current_ns();
> > > > > + struct user_namespace *user_ns = current_user_ns();
> > > >
> > > > Do we have to take a reference on the user namespace assuming one can open
> > > > the file, pass the fd down the hierarchy, and then the user namespace with
> > > > the opened file goes away? Or is there anything else that keeps the user
> > > > namespace alive?
> > > No, we don't. When ima_policy_open() is called we do current_user_ns()
> > > but that will be guaranteed to be identical to filp->f_cred->user_ns.
> > > And f_cred is a reference that has been taken when the vfs allocated a
> > > struct file for this .open call so won't go away until the last fput.
> > >
> > > My proposal is also too complicated, I think.
> > > (The booster is giving me the same side-effects as my second shot so
> > > this looks like two good days of fever and headache. So I'll use that as
> > > an excuse. :))
> > >
> > > Your patch series as it stands has a bit of a security issue with those
> > > get_current_ns() calls across differnet file/seq_file operations.
> > >
> > > You have to make an architectural decision, I think. I see two sensible
> > > options:
> > > 1. The relevant ima_ns that .open/.read/.write operate on is always taken
> > > to be the ima_ns of the filesystem's userns, i.e.
> > > sb->s_user_ns->ima_ns.
> > > This - but I'm not an ima person - makes the most sense to me and the
> > > semantics are straightforward. If I write to a file to alter some
> > > policy then I expect the ima namespace of the user namespace to be
> > > affected that the securityfs instance was mounted in.
> > > 2. The relevant ima_ns that .open/.read/.write operate on is always
> > > taken to be the one of the opener. I don't really like that as that
> > > gets weird if for some complicated reason the caller is not located
> > > in the userns the filesystem was mounted in (weird mount propagation
> > > scenario or sm). It also feels strange to operate on an ima_ns that's
> > > different from s_user_ns->ima_ns in a securityfs instance.
> >
> > We have this situation because one can setns() to another mount namespaces
> > but the data shown by SecurityFS lives in a user namespace, right? And now
> > we need to decide whether to affect the data in the user namespace that did
> > the open (option 2) or to which the SecurityFS belongs to (option 1). If we
> > were to open a regular file it would be option 1, so we should probably not
> > break that existing semantic and also choose option 1 unless there one
> > wasn't allowed to choose the user namespace the SecurityFS files belonged to
> > then it should be option 2 but then we have file descriptor passing where
> > 'being allowed' can change depending on who is reading/writing a file... Is
>
> A general remark that's always worth repeating: under no circumstances
> should the object that you're operating on change from .open to
> .read/.write. Consider the following very rough sketch:
>
> pid = fork():
> if (pid == 0) {
> // create USERNS1
> unshare(CLONE_NEWUSER);
>
> // write an idmapping for getuid() to userns 0
>
> mount("", "/sys/kernel/security", "securityfs", [...]);
>
> int fd_ima = open("/sys/kernel/security/some_ima_file", O_WRONLY);
>
> // Send fd_ima to parent via SCM_RIGHTS
> }
> // Receive fd_ima from child.
>
> write(fd_ima, "SET_OPTION_ON_INIT_USER_NS", ...);
>
> That example above is an instant security issue if the ima_ns changed
> depending on the caller since it means you could open a file in an
> unprivileged userns and then funnel it to a random process in
> init_user_ns that you control via SCM_RIGHTS and have it do the write()
> and instead of USERNS1->ima_ns you'd suddenly have changed
> init_user_ns->ima_ns.
> And this is what your previous patch with these multiple
> get_current_ns() calls allowed.
>
> So the write in the example above must operate on USERNS1->ima_ns.
>
> Now, both option 1 and 2 will ensure that the ima_ns that is operated on
> is always the same across .open/.read/.write:
> 1. securityfs->sb->s_user_ns->ima_ns this is trivially guaranteed
> because the filesystem's user namespace doesn't change no matter
> where you access that filesystem from.
> 2. securityfs_file->f_cred->user_ns->ima_ns this is guaranteed because
> f_cred stashes the openers credentials.
>
> What I fundamentally dislike about option 2 (f_cred) is - and I tried to
> say that in my prior mail - that what ima_ns is operated on depends on
> the userns of the process that called .open, i.e. the contents in
> securityfs change depending on the opener's userns.
>
> And I think that is fundamentally sloppy semantics. The contents of a
> filesystem should generally not depend on the caller if it can be
> avoided. We have a few instances where this is the case (e.g. some
> procfs sysctls) but especially for securityfs this does not make sense.
>
> > there anything that would prevent us from setns()'ing to that target user
> > namespace so that we would now see that of a user namespace that we are not
> > allowed to see?
>
> If you're really worried about someone being able to access a securityfs
> instance whose userns doesn't match the userns the securityfs instance
> was mounted in there are multiple ways to fix it. The one that I tend to
> prefer is:
>
> From e0ff6a8dcc573763568e685dd70d1547efd68df9 Mon Sep 17 00:00:00 2001
> From: Christian Brauner <[email protected]>
> Date: Fri, 10 Dec 2021 11:47:37 +0100
> Subject: !!!! HERE BE DRAGONS - COMPLETELY UNTESTED !!!!
>
> securityfs: only allow access to securityfs from within same namespace
>
> Limit opening of securityfs files to callers located in the same namespace.
>
> ---
> security/inode.c | 33 +++++++++++++++++++++++++++++++--
> 1 file changed, 31 insertions(+), 2 deletions(-)
>
> diff --git a/security/inode.c b/security/inode.c
> index eaccba7017d9..9eaf757c08cb 100644
> --- a/security/inode.c
> +++ b/security/inode.c
> @@ -80,6 +80,35 @@ static struct file_system_type fs_type = {
> .fs_flags = FS_USERNS_MOUNT,
> };
>
> +static int securityfs_permission(struct user_namespace *mnt_userns,
> + struct inode *inode, int mask)
> +{
> + int err;
> +
> + err = generic_permission(&init_user_ns, inode, mask);
> + if (!err) {
> + if (inode->i_sb->s_user_ns != current_user_ns())
> + err = -EACCES;
> + }
Thinking about this a better alternative might be (on top of my
previous suggestion):
diff --git a/security/inode.c b/security/inode.c
index 0000b2bd4c0c..a540372e4c8c 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -86,15 +86,15 @@ static struct file_system_type fs_type = {
static int securityfs_permission(struct user_namespace *mnt_userns,
struct inode *inode, int mask)
{
- int err;
-
- err = generic_permission(&init_user_ns, inode, mask);
- if (!err) {
- if (inode->i_sb->s_user_ns != current_user_ns())
- err = -EACCES;
- }
-
- return err;
+ /*
+ * Restrict access to securityfs to callers whose user
+ * namespace is the same or an ancestor of the user namespace
+ * this securityfs instance was mounted in.
+ */
+ if (!in_userns(current_user_ns(), inode->i_sb->s_user_ns))
+ return -EACCES;
+
+ return generic_permission(&init_user_ns, inode, mask);
}
const struct inode_operations securityfs_dir_inode_operations = {
On 12/11/21 04:50, Christian Brauner wrote:
> On Fri, Dec 10, 2021 at 08:57:11AM -0500, Stefan Berger wrote:
>>
>>
>> there anything that would prevent us from setns()'ing to that target user
>> namespace so that we would now see that of a user namespace that we are not
>> allowed to see?
> If you're really worried about someone being able to access a securityfs
> instance whose userns doesn't match the userns the securityfs instance
> was mounted in there are multiple ways to fix it. The one that I tend to
> prefer is:
>
> From e0ff6a8dcc573763568e685dd70d1547efd68df9 Mon Sep 17 00:00:00 2001
> From: Christian Brauner <[email protected]>
> Date: Fri, 10 Dec 2021 11:47:37 +0100
> Subject: !!!! HERE BE DRAGONS - COMPLETELY UNTESTED !!!!
>
> securityfs: only allow access to securityfs from within same namespace
>
> Limit opening of securityfs files to callers located in the same namespace.
>
> ---
> security/inode.c | 33 +++++++++++++++++++++++++++++++--
> 1 file changed, 31 insertions(+), 2 deletions(-)
>
> diff --git a/security/inode.c b/security/inode.c
> index eaccba7017d9..9eaf757c08cb 100644
> --- a/security/inode.c
> +++ b/security/inode.c
> @@ -80,6 +80,35 @@ static struct file_system_type fs_type = {
> .fs_flags = FS_USERNS_MOUNT,
> };
>
> +static int securityfs_permission(struct user_namespace *mnt_userns,
> + struct inode *inode, int mask)
> +{
> + int err;
> +
> + err = generic_permission(&init_user_ns, inode, mask);
> + if (!err) {
> + if (inode->i_sb->s_user_ns != current_user_ns())
> + err = -EACCES;
> + }
> +
> + return err;
> +}
> +
> +const struct inode_operations securityfs_dir_inode_operations = {
> + .permission = securityfs_permission,
> + .lookup = simple_lookup,
> +};
> +
> +const struct file_operations securityfs_dir_operations = {
> + .permission = securityfs_permission,
This interface function on file operations doesn't exist.
I'll use the inode_operations and also hook it to the root dentry of the
super_block. Then there's no need to have this check on symlinks and
files...
> + .open = dcache_dir_open,
> + .release = dcache_dir_close,
> + .llseek = dcache_dir_lseek,
> + .read = generic_read_dir,
> + .iterate_shared = dcache_readdir,
> + .fsync = noop_fsync,
> +};
> +
> /**
> * securityfs_create_dentry - create a dentry in the securityfs filesystem
> *
> @@ -167,8 +196,8 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode,
> inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
> inode->i_private = data;
> if (S_ISDIR(mode)) {
> - inode->i_op = &simple_dir_inode_operations;
> - inode->i_fop = &simple_dir_operations;
> + inode->i_op = &securityfs_dir_inode_operations;
> + inode->i_fop = &securityfs_dir_operations;
> inc_nlink(inode);
> inc_nlink(dir);
> } else if (S_ISLNK(mode)) {
On Mon, Dec 13, 2021 at 10:33:40AM -0500, Stefan Berger wrote:
>
> On 12/11/21 04:50, Christian Brauner wrote:
> > On Fri, Dec 10, 2021 at 08:57:11AM -0500, Stefan Berger wrote:
> > >
> > >
> > > there anything that would prevent us from setns()'ing to that target user
> > > namespace so that we would now see that of a user namespace that we are not
> > > allowed to see?
> > If you're really worried about someone being able to access a securityfs
> > instance whose userns doesn't match the userns the securityfs instance
> > was mounted in there are multiple ways to fix it. The one that I tend to
> > prefer is:
> >
> > From e0ff6a8dcc573763568e685dd70d1547efd68df9 Mon Sep 17 00:00:00 2001
> > From: Christian Brauner <[email protected]>
> > Date: Fri, 10 Dec 2021 11:47:37 +0100
> > Subject: !!!! HERE BE DRAGONS - COMPLETELY UNTESTED !!!!
> >
> > securityfs: only allow access to securityfs from within same namespace
> >
> > Limit opening of securityfs files to callers located in the same namespace.
> >
> > ---
> > security/inode.c | 33 +++++++++++++++++++++++++++++++--
> > 1 file changed, 31 insertions(+), 2 deletions(-)
> >
> > diff --git a/security/inode.c b/security/inode.c
> > index eaccba7017d9..9eaf757c08cb 100644
> > --- a/security/inode.c
> > +++ b/security/inode.c
> > @@ -80,6 +80,35 @@ static struct file_system_type fs_type = {
> > .fs_flags = FS_USERNS_MOUNT,
> > };
> > +static int securityfs_permission(struct user_namespace *mnt_userns,
> > + struct inode *inode, int mask)
> > +{
> > + int err;
> > +
> > + err = generic_permission(&init_user_ns, inode, mask);
> > + if (!err) {
> > + if (inode->i_sb->s_user_ns != current_user_ns())
> > + err = -EACCES;
> > + }
> > +
> > + return err;
> > +}
> > +
> > +const struct inode_operations securityfs_dir_inode_operations = {
> > + .permission = securityfs_permission,
> > + .lookup = simple_lookup,
> > +};
> > +
> > +const struct file_operations securityfs_dir_operations = {
> > + .permission = securityfs_permission,
>
>
> This interface function on file operations doesn't exist.
It's almost as if the subject line of this patch warned about its draft
character. That was supposed for regular files.
>
> I'll use the inode_operations and also hook it to the root dentry of the
> super_block. Then there's no need to have this check on symlinks and
> files...
Don't special case the inode_operations for the root inode!
If a privileged process opens an fd refering to a struct file for the
root inode and leaks it to an unprivileged process by accident the
unprivileged process can open any file or directory beneath via openat()
and friends.
Symlinks don't need a .permission handler anyway because they just
contain the name of another file and that is checked for .permission
unless you have a separate .getlink handler where you want to restrict
link contents further.
But regular files need to have a .permission method see openat().
On Mon, Dec 13, 2021 at 04:50:20PM +0100, Christian Brauner wrote:
> On Mon, Dec 13, 2021 at 10:33:40AM -0500, Stefan Berger wrote:
> >
> > On 12/11/21 04:50, Christian Brauner wrote:
> > > On Fri, Dec 10, 2021 at 08:57:11AM -0500, Stefan Berger wrote:
> > > >
> > > >
> > > > there anything that would prevent us from setns()'ing to that target user
> > > > namespace so that we would now see that of a user namespace that we are not
> > > > allowed to see?
> > > If you're really worried about someone being able to access a securityfs
> > > instance whose userns doesn't match the userns the securityfs instance
> > > was mounted in there are multiple ways to fix it. The one that I tend to
> > > prefer is:
> > >
> > > From e0ff6a8dcc573763568e685dd70d1547efd68df9 Mon Sep 17 00:00:00 2001
> > > From: Christian Brauner <[email protected]>
> > > Date: Fri, 10 Dec 2021 11:47:37 +0100
> > > Subject: !!!! HERE BE DRAGONS - COMPLETELY UNTESTED !!!!
> > >
> > > securityfs: only allow access to securityfs from within same namespace
> > >
> > > Limit opening of securityfs files to callers located in the same namespace.
> > >
> > > ---
> > > security/inode.c | 33 +++++++++++++++++++++++++++++++--
> > > 1 file changed, 31 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/security/inode.c b/security/inode.c
> > > index eaccba7017d9..9eaf757c08cb 100644
> > > --- a/security/inode.c
> > > +++ b/security/inode.c
> > > @@ -80,6 +80,35 @@ static struct file_system_type fs_type = {
> > > .fs_flags = FS_USERNS_MOUNT,
> > > };
> > > +static int securityfs_permission(struct user_namespace *mnt_userns,
> > > + struct inode *inode, int mask)
> > > +{
> > > + int err;
> > > +
> > > + err = generic_permission(&init_user_ns, inode, mask);
> > > + if (!err) {
> > > + if (inode->i_sb->s_user_ns != current_user_ns())
> > > + err = -EACCES;
> > > + }
> > > +
> > > + return err;
> > > +}
> > > +
> > > +const struct inode_operations securityfs_dir_inode_operations = {
> > > + .permission = securityfs_permission,
> > > + .lookup = simple_lookup,
> > > +};
> > > +
> > > +const struct file_operations securityfs_dir_operations = {
> > > + .permission = securityfs_permission,
> >
> >
> > This interface function on file operations doesn't exist.
>
> It's almost as if the subject line of this patch warned about its draft
> character. That was supposed for regular files.
>
> >
> > I'll use the inode_operations and also hook it to the root dentry of the
> > super_block. Then there's no need to have this check on symlinks and
> > files...
>
> Don't special case the inode_operations for the root inode!
> If a privileged process opens an fd refering to a struct file for the
s/a privileged process/a process that is located in an ancestor userns
of the securityfs instance
> root inode and leaks it to an unprivileged process by accident the
s/unprivileged process/process located in a descendant userns
> unprivileged process can open any file or directory beneath via openat()
> and friends.
On 12/13/21 10:50, Christian Brauner wrote:
> On Mon, Dec 13, 2021 at 10:33:40AM -0500, Stefan Berger wrote:
>> On 12/11/21 04:50, Christian Brauner wrote:
>>> On Fri, Dec 10, 2021 at 08:57:11AM -0500, Stefan Berger wrote:
>>>>
>>>> there anything that would prevent us from setns()'ing to that target user
>>>> namespace so that we would now see that of a user namespace that we are not
>>>> allowed to see?
>>> If you're really worried about someone being able to access a securityfs
>>> instance whose userns doesn't match the userns the securityfs instance
>>> was mounted in there are multiple ways to fix it. The one that I tend to
>>> prefer is:
>>>
>>> From e0ff6a8dcc573763568e685dd70d1547efd68df9 Mon Sep 17 00:00:00 2001
>>> From: Christian Brauner <[email protected]>
>>> Date: Fri, 10 Dec 2021 11:47:37 +0100
>>> Subject: !!!! HERE BE DRAGONS - COMPLETELY UNTESTED !!!!
>>>
>>> securityfs: only allow access to securityfs from within same namespace
>>>
>>> Limit opening of securityfs files to callers located in the same namespace.
>>>
>>> ---
>>> security/inode.c | 33 +++++++++++++++++++++++++++++++--
>>> 1 file changed, 31 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/security/inode.c b/security/inode.c
>>> index eaccba7017d9..9eaf757c08cb 100644
>>> --- a/security/inode.c
>>> +++ b/security/inode.c
>>> @@ -80,6 +80,35 @@ static struct file_system_type fs_type = {
>>> .fs_flags = FS_USERNS_MOUNT,
>>> };
>>> +static int securityfs_permission(struct user_namespace *mnt_userns,
>>> + struct inode *inode, int mask)
>>> +{
>>> + int err;
>>> +
>>> + err = generic_permission(&init_user_ns, inode, mask);
>>> + if (!err) {
>>> + if (inode->i_sb->s_user_ns != current_user_ns())
>>> + err = -EACCES;
>>> + }
>>> +
>>> + return err;
>>> +}
>>> +
>>> +const struct inode_operations securityfs_dir_inode_operations = {
>>> + .permission = securityfs_permission,
>>> + .lookup = simple_lookup,
>>> +};
>>> +
>>> +const struct file_operations securityfs_dir_operations = {
>>> + .permission = securityfs_permission,
>>
>> This interface function on file operations doesn't exist.
> It's almost as if the subject line of this patch warned about its draft
> character. That was supposed for regular files.
>
>> I'll use the inode_operations and also hook it to the root dentry of the
>> super_block. Then there's no need to have this check on symlinks and
>> files...
> Don't special case the inode_operations for the root inode!
I modified the inode_operations *also* for the root node, since that one
is initialized with &simple_dir_inode_operationsby simple_fill_super, so
I didn't want to miss it...
> If a privileged process opens an fd refering to a struct file for the
> root inode and leaks it to an unprivileged process by accident the
> unprivileged process can open any file or directory beneath via openat()
> and friends.
>
> Symlinks don't need a .permission handler anyway because they just
> contain the name of another file and that is checked for .permission
> unless you have a separate .getlink handler where you want to restrict
> link contents further.
>
> But regular files need to have a .permission method see openat().
So here's what I have now for the hardening.
diff --git a/security/inode.c b/security/inode.c
index fee01ff4d831..a0d9f086e3d5 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -26,6 +26,29 @@
static struct vfsmount *init_securityfs_mount;
static int init_securityfs_mount_count;
+static int securityfs_permission(struct user_namespace *mnt_userns,
+ struct inode *inode, int mask)
+{
+ int err;
+
+ err = generic_permission(&init_user_ns, inode, mask);
+ if (!err) {
+ if (inode->i_sb->s_user_ns != current_user_ns())
+ err = -EACCES;
+ }
+
+ return err;
+}
+
+const struct inode_operations securityfs_dir_inode_operations = {
+ .permission = securityfs_permission,
+ .lookup = simple_lookup,
+};
+
+const struct inode_operations securityfs_file_inode_operations = {
+ .permission = securityfs_permission,
+};
+
static void securityfs_free_inode(struct inode *inode)
{
if (S_ISLNK(inode->i_mode))
@@ -41,20 +64,25 @@ static const struct super_operations
securityfs_super_operations = {
static int securityfs_fill_super(struct super_block *sb, struct
fs_context *fc)
{
static const struct tree_descr files[] = {{""}};
+ struct user_namespace *ns = fc->user_ns;
int error;
+ if (WARN_ON(ns != current_user_ns()))
+ return -EINVAL;
+
error = simple_fill_super(sb, SECURITYFS_MAGIC, files);
if (error)
return error;
sb->s_op = &securityfs_super_operations;
+ sb->s_root->d_inode->i_op = &securityfs_dir_inode_operations;
return 0;
}
[...]
@@ -157,7 +186,7 @@ static struct dentry *securityfs_create_dentry(const
char *name, umode_t mode,
inode->i_atime = inode->i_mtime = inode->i_ctime =
current_time(inode);
inode->i_private = data;
if (S_ISDIR(mode)) {
- inode->i_op = &simple_dir_inode_operations;
+ inode->i_op = &securityfs_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
inc_nlink(inode);
inc_nlink(dir);
@@ -165,10 +194,10 @@ static struct dentry
*securityfs_create_dentry(const char *name, umode_t mode,
inode->i_op = iops ? iops :
&simple_symlink_inode_operations;
inode->i_link = data;
} else {
+ inode->i_op = &securityfs_file_inode_operations;
inode->i_fop = fops;
}
d_instantiate(dentry, inode);
On Mon, Dec 13, 2021 at 11:25:28AM -0500, Stefan Berger wrote:
>
> On 12/13/21 10:50, Christian Brauner wrote:
> > On Mon, Dec 13, 2021 at 10:33:40AM -0500, Stefan Berger wrote:
> > > On 12/11/21 04:50, Christian Brauner wrote:
> > > > On Fri, Dec 10, 2021 at 08:57:11AM -0500, Stefan Berger wrote:
> > > > >
> > > > > there anything that would prevent us from setns()'ing to that target user
> > > > > namespace so that we would now see that of a user namespace that we are not
> > > > > allowed to see?
> > > > If you're really worried about someone being able to access a securityfs
> > > > instance whose userns doesn't match the userns the securityfs instance
> > > > was mounted in there are multiple ways to fix it. The one that I tend to
> > > > prefer is:
> > > >
> > > > From e0ff6a8dcc573763568e685dd70d1547efd68df9 Mon Sep 17 00:00:00 2001
> > > > From: Christian Brauner <[email protected]>
> > > > Date: Fri, 10 Dec 2021 11:47:37 +0100
> > > > Subject: !!!! HERE BE DRAGONS - COMPLETELY UNTESTED !!!!
> > > >
> > > > securityfs: only allow access to securityfs from within same namespace
> > > >
> > > > Limit opening of securityfs files to callers located in the same namespace.
> > > >
> > > > ---
> > > > security/inode.c | 33 +++++++++++++++++++++++++++++++--
> > > > 1 file changed, 31 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/security/inode.c b/security/inode.c
> > > > index eaccba7017d9..9eaf757c08cb 100644
> > > > --- a/security/inode.c
> > > > +++ b/security/inode.c
> > > > @@ -80,6 +80,35 @@ static struct file_system_type fs_type = {
> > > > .fs_flags = FS_USERNS_MOUNT,
> > > > };
> > > > +static int securityfs_permission(struct user_namespace *mnt_userns,
> > > > + struct inode *inode, int mask)
> > > > +{
> > > > + int err;
> > > > +
> > > > + err = generic_permission(&init_user_ns, inode, mask);
> > > > + if (!err) {
> > > > + if (inode->i_sb->s_user_ns != current_user_ns())
> > > > + err = -EACCES;
> > > > + }
> > > > +
> > > > + return err;
> > > > +}
> > > > +
> > > > +const struct inode_operations securityfs_dir_inode_operations = {
> > > > + .permission = securityfs_permission,
> > > > + .lookup = simple_lookup,
> > > > +};
> > > > +
> > > > +const struct file_operations securityfs_dir_operations = {
> > > > + .permission = securityfs_permission,
> > >
> > > This interface function on file operations doesn't exist.
> > It's almost as if the subject line of this patch warned about its draft
> > character. That was supposed for regular files.
> >
> > > I'll use the inode_operations and also hook it to the root dentry of the
> > > super_block. Then there's no need to have this check on symlinks and
> > > files...
> > Don't special case the inode_operations for the root inode!
>
> I modified the inode_operations *also* for the root node, since that one is
> initialized with &simple_dir_inode_operationsby simple_fill_super, so I
> didn't want to miss it...
>
>
> > If a privileged process opens an fd refering to a struct file for the
> > root inode and leaks it to an unprivileged process by accident the
> > unprivileged process can open any file or directory beneath via openat()
> > and friends.
> >
> > Symlinks don't need a .permission handler anyway because they just
> > contain the name of another file and that is checked for .permission
> > unless you have a separate .getlink handler where you want to restrict
> > link contents further.
> >
> > But regular files need to have a .permission method see openat().
>
> So here's what I have now for the hardening.
>
>
> diff --git a/security/inode.c b/security/inode.c
> index fee01ff4d831..a0d9f086e3d5 100644
> --- a/security/inode.c
> +++ b/security/inode.c
> @@ -26,6 +26,29 @@
> static struct vfsmount *init_securityfs_mount;
> static int init_securityfs_mount_count;
>
> +static int securityfs_permission(struct user_namespace *mnt_userns,
> + struct inode *inode, int mask)
> +{
> + int err;
> +
> + err = generic_permission(&init_user_ns, inode, mask);
> + if (!err) {
> + if (inode->i_sb->s_user_ns != current_user_ns())
> + err = -EACCES;
Please consider
https://lore.kernel.org/lkml/20211211104527.7cp4mbznjpcijfqx@wittgenstein
otherwise looks ok.
On Mon, Dec 13, 2021 at 04:50:20PM +0100, Christian Brauner wrote:
> On Mon, Dec 13, 2021 at 10:33:40AM -0500, Stefan Berger wrote:
> >
> > On 12/11/21 04:50, Christian Brauner wrote:
> > > On Fri, Dec 10, 2021 at 08:57:11AM -0500, Stefan Berger wrote:
> > > >
> > > >
> > > > there anything that would prevent us from setns()'ing to that target user
> > > > namespace so that we would now see that of a user namespace that we are not
> > > > allowed to see?
> > > If you're really worried about someone being able to access a securityfs
> > > instance whose userns doesn't match the userns the securityfs instance
> > > was mounted in there are multiple ways to fix it. The one that I tend to
> > > prefer is:
> > >
> > > From e0ff6a8dcc573763568e685dd70d1547efd68df9 Mon Sep 17 00:00:00 2001
> > > From: Christian Brauner <[email protected]>
> > > Date: Fri, 10 Dec 2021 11:47:37 +0100
> > > Subject: !!!! HERE BE DRAGONS - COMPLETELY UNTESTED !!!!
> > >
> > > securityfs: only allow access to securityfs from within same namespace
> > >
> > > Limit opening of securityfs files to callers located in the same namespace.
> > >
> > > ---
> > > security/inode.c | 33 +++++++++++++++++++++++++++++++--
> > > 1 file changed, 31 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/security/inode.c b/security/inode.c
> > > index eaccba7017d9..9eaf757c08cb 100644
> > > --- a/security/inode.c
> > > +++ b/security/inode.c
> > > @@ -80,6 +80,35 @@ static struct file_system_type fs_type = {
> > > .fs_flags = FS_USERNS_MOUNT,
> > > };
> > > +static int securityfs_permission(struct user_namespace *mnt_userns,
> > > + struct inode *inode, int mask)
> > > +{
> > > + int err;
> > > +
> > > + err = generic_permission(&init_user_ns, inode, mask);
> > > + if (!err) {
> > > + if (inode->i_sb->s_user_ns != current_user_ns())
> > > + err = -EACCES;
> > > + }
> > > +
> > > + return err;
> > > +}
> > > +
> > > +const struct inode_operations securityfs_dir_inode_operations = {
> > > + .permission = securityfs_permission,
> > > + .lookup = simple_lookup,
> > > +};
> > > +
> > > +const struct file_operations securityfs_dir_operations = {
> > > + .permission = securityfs_permission,
> >
> >
> > This interface function on file operations doesn't exist.
>
> It's almost as if the subject line of this patch warned about its draft
> character. That was supposed for regular files.
Mah, I deleted the ;) after it on accident. I wasn't mocking. :)