2013-06-03 18:59:16

by Eric Paris

[permalink] [raw]
Subject: [RFC PATCH 1/2] selinux: merge selinux_inode_permission and inode_has_perm

selinux_inode_permission had some heavy lifting done to make it more
performance polite. But it still does largely the same thing as
inode_has_perm. So move that work into inode_has_perm and call
inode_has_perm from selinux_inode_permission.

Signed-off-by: Eric Paris <[email protected]>
---
security/selinux/hooks.c | 92 ++++++++++++++++++++++--------------------------
1 file changed, 42 insertions(+), 50 deletions(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 5c6f2cd..cfecb52 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1494,17 +1494,41 @@ static int task_has_system(struct task_struct *tsk,
SECCLASS_SYSTEM, perms, NULL);
}

+static noinline int audit_inode_permission(struct inode *inode,
+ struct common_audit_data *adp,
+ u32 perms, u32 audited, u32 denied,
+ unsigned flags)
+{
+ struct common_audit_data ad;
+ struct inode_security_struct *isec = inode->i_security;
+ int rc;
+
+ if (!adp) {
+ ad.type = LSM_AUDIT_DATA_INODE;
+ ad.u.inode = inode;
+ adp = &ad;
+ }
+
+ rc = slow_avc_audit(current_sid(), isec->sid, isec->sclass, perms,
+ audited, denied, adp, flags);
+ if (rc)
+ return rc;
+ return 0;
+}
+
/* Check whether a task has a particular permission to an inode.
The 'adp' parameter is optional and allows other audit
data to be passed (e.g. the dentry). */
static int inode_has_perm(const struct cred *cred,
struct inode *inode,
- u32 perms,
+ u32 perms, u32 dontaudit,
struct common_audit_data *adp,
unsigned flags)
{
struct inode_security_struct *isec;
- u32 sid;
+ struct av_decision avd;
+ u32 sid, denied, audited;
+ int rc, rc2;

validate_creds(cred);

@@ -1514,6 +1538,14 @@ static int inode_has_perm(const struct cred *cred,
sid = cred_sid(cred);
isec = inode->i_security;

+ rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd);
+ audited = avc_audit_required(perms, &avd, rc, dontaudit, &denied);
+ if (likely(!audited))
+ return rc;
+
+ rc2 = audit_inode_permission(inode, adp, perms, audited, denied, flags);
+ if (rc2)
+ return rc2;
return avc_has_perm_flags(sid, isec->sid, isec->sclass, perms, adp, flags);
}

@@ -1529,7 +1561,7 @@ static inline int dentry_has_perm(const struct cred *cred,

ad.type = LSM_AUDIT_DATA_DENTRY;
ad.u.dentry = dentry;
- return inode_has_perm(cred, inode, av, &ad, 0);
+ return inode_has_perm(cred, inode, av, 0, &ad, 0);
}

/* Same as inode_has_perm, but pass explicit audit data containing
@@ -1544,7 +1576,7 @@ static inline int path_has_perm(const struct cred *cred,

ad.type = LSM_AUDIT_DATA_PATH;
ad.u.path = *path;
- return inode_has_perm(cred, inode, av, &ad, 0);
+ return inode_has_perm(cred, inode, av, 0, &ad, 0);
}

/* Check whether a task can use an open file descriptor to
@@ -1580,7 +1612,7 @@ static int file_has_perm(const struct cred *cred,
/* av is zero if only checking access to the descriptor. */
rc = 0;
if (av)
- rc = inode_has_perm(cred, inode, av, &ad, 0);
+ rc = inode_has_perm(cred, inode, av, 0, &ad, 0);

out:
return rc;
@@ -2635,64 +2667,24 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *na
return dentry_has_perm(cred, dentry, FILE__READ);
}

-static noinline int audit_inode_permission(struct inode *inode,
- u32 perms, u32 audited, u32 denied,
- unsigned flags)
-{
- struct common_audit_data ad;
- struct inode_security_struct *isec = inode->i_security;
- int rc;
-
- ad.type = LSM_AUDIT_DATA_INODE;
- ad.u.inode = inode;
-
- rc = slow_avc_audit(current_sid(), isec->sid, isec->sclass, perms,
- audited, denied, &ad, flags);
- if (rc)
- return rc;
- return 0;
-}
-
static int selinux_inode_permission(struct inode *inode, int mask)
{
const struct cred *cred = current_cred();
- u32 perms;
- bool from_access;
+ u32 perms, dontaudit = 0;
unsigned flags = mask & MAY_NOT_BLOCK;
- struct inode_security_struct *isec;
- u32 sid;
- struct av_decision avd;
- int rc, rc2;
- u32 audited, denied;

- from_access = mask & MAY_ACCESS;
+ if (mask & MAY_ACCESS)
+ dontaudit = FILE__AUDIT_ACCESS;
+
mask &= (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND);

/* No permission to check. Existence test. */
if (!mask)
return 0;

- validate_creds(cred);
-
- if (unlikely(IS_PRIVATE(inode)))
- return 0;
-
perms = file_mask_to_av(inode->i_mode, mask);

- sid = cred_sid(cred);
- isec = inode->i_security;
-
- rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd);
- audited = avc_audit_required(perms, &avd, rc,
- from_access ? FILE__AUDIT_ACCESS : 0,
- &denied);
- if (likely(!audited))
- return rc;
-
- rc2 = audit_inode_permission(inode, perms, audited, denied, flags);
- if (rc2)
- return rc2;
- return rc;
+ return inode_has_perm(cred, inode, perms, dontaudit, NULL, flags);
}

static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
--
1.8.2.1


2013-06-03 18:59:22

by Eric Paris

[permalink] [raw]
Subject: [RFC PATCH 2/2] SELinux: cache inode checks inside struct inode

This patch adds a cache of selinux security checks into struct inode.
It is protected by the seq counter against updates by other nodes. This
has a measurable impact on one benchmark Linus mentioned. The cpu
time using make to check a huge project for changes. It is going to
have a negative impact on cases where tasks with different labels are
accessing the same object. In these cases each one will grab the i_lock
to reset the in inode cache. The only place I imagine this would be
common would be with shared libraries. But as those are typically
openned and mmapped, they don't have continuous checks...

Stock Kernel:
8.23% make [k] __d_lookup_rcu
6.27% make [k] link_path_walk
3.91% make [k] selinux_inode_permission <----
3.37% make [k] avc_has_perm_noaudit <----
2.26% make [k] lookup_fast
2.12% make [k] system_call
1.86% make [k] path_lookupat
1.82% make [k] inode_has_perm.isra.32.constprop.61 <----
1.57% make [k] copy_user_enhanced_fast_string
1.48% make [k] generic_permission
1.34% make [k] __audit_syscall_exit
1.31% make [k] kmem_cache_free
1.24% make [k] kmem_cache_alloc
1.20% make [k] generic_fillattr
1.12% make [k] __inode_permission
1.06% make [k] dput
1.04% make [k] strncpy_from_user
1.04% make [k] _raw_spin_lock
Total: 3.91 + 3.37 + 1.82 = 9.1%

My Changes:
8.54% make [k] __d_lookup_rcu
6.52% make [k] link_path_walk
3.93% make [k] inode_has_perm <----
2.31% make [k] lookup_fast
2.05% make [k] system_call
1.79% make [k] path_lookupat
1.72% make [k] generic_permission
1.50% make [k] __audit_syscall_exit
1.49% make [k] selinux_inode_permission <----
1.47% make [k] copy_user_enhanced_fast_string
1.28% make [k] __inode_permission
1.23% make [k] kmem_cache_alloc
1.19% make [k] _raw_spin_lock
1.15% make [k] lg_local_lock
1.10% make [k] strncpy_from_user
1.10% make [k] dput
1.08% make [k] kmem_cache_free
1.08% make [k] generic_fillattr
Total: 3.93 + 1.49 = 5.42

In inode_has_perm the big time takers are loading cred->sid and the
raw_seqcount_begin(inode->i_security_seccount). I'm not certain how to
make either of those much faster.

In selinux_inode_permission() we spend time in getting current->cred and
in calling inode_has_perm().

Signed-off-by: Eric Paris <[email protected]>
---
include/linux/fs.h | 5 +++
security/selinux/hooks.c | 62 ++++++++++++++++++++++++++++++++++---
security/selinux/include/security.h | 1 +
security/selinux/ss/services.c | 5 +++
4 files changed, 69 insertions(+), 4 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 43db02e..5268cf3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -535,6 +535,11 @@ struct inode {
struct address_space *i_mapping;

#ifdef CONFIG_SECURITY
+ seqcount_t i_security_seqcount;
+ u32 i_last_task_sid;
+ u32 i_last_granting;
+ u32 i_last_perms;
+ u32 i_audit_allow;
void *i_security;
#endif

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index cfecb52..00dd6d9 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -82,6 +82,7 @@
#include <linux/user_namespace.h>
#include <linux/export.h>
#include <linux/msg.h>
+#include <linux/seqlock.h>
#include <linux/shm.h>

#include "avc.h"
@@ -207,6 +208,7 @@ static int inode_alloc_security(struct inode *inode)
if (!isec)
return -ENOMEM;

+ seqcount_init(&inode->i_security_seqcount);
mutex_init(&isec->lock);
INIT_LIST_HEAD(&isec->list);
isec->inode = inode;
@@ -1516,6 +1518,44 @@ static noinline int audit_inode_permission(struct inode *inode,
return 0;
}

+static bool inode_has_perm_cached(u32 sid, struct inode *inode, u32 perms)
+{
+ unsigned seq;
+ u32 last_task_sid;
+ u32 last_perms;
+ u32 last_granting;
+
+ seq = raw_seqcount_begin(&inode->i_security_seqcount);
+ last_task_sid = inode->i_last_task_sid;
+ last_perms = inode->i_last_perms;
+ last_granting = inode->i_last_granting;
+
+ /* something changed, bail! */
+ if (read_seqcount_retry(&inode->i_security_seqcount, seq))
+ return false;
+
+ return sid == last_task_sid && (perms & last_perms) == perms &&
+ security_get_latest_granting() == last_granting;
+}
+
+static void inode_set_perm_cache(struct inode *inode, u32 task_sid, u32 perms,
+ u32 granting, u32 audit_allow)
+{
+ spin_lock(&inode->i_lock);
+ write_seqcount_begin(&inode->i_security_seqcount);
+ if (inode->i_last_task_sid == task_sid &&
+ inode->i_last_granting == granting) {
+ inode->i_last_perms |= perms;
+ } else {
+ inode->i_last_task_sid = task_sid;
+ inode->i_last_perms = perms;
+ inode->i_last_granting = granting;
+ inode->i_audit_allow = audit_allow;
+ }
+ write_seqcount_end(&inode->i_security_seqcount);
+ spin_unlock(&inode->i_lock);
+}
+
/* Check whether a task has a particular permission to an inode.
The 'adp' parameter is optional and allows other audit
data to be passed (e.g. the dentry). */
@@ -1525,7 +1565,6 @@ static int inode_has_perm(const struct cred *cred,
struct common_audit_data *adp,
unsigned flags)
{
- struct inode_security_struct *isec;
struct av_decision avd;
u32 sid, denied, audited;
int rc, rc2;
@@ -1536,9 +1575,23 @@ static int inode_has_perm(const struct cred *cred,
return 0;

sid = cred_sid(cred);
- isec = inode->i_security;

- rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd);
+ if (inode_has_perm_cached(sid, inode, perms)) {
+ rc = 0;
+ avd.allowed = -1;
+ avd.auditallow = inode->i_audit_allow;
+ avd.auditdeny = -1;
+ avd.seqno = 0;
+ avd.flags = 0;
+ } else {
+ struct inode_security_struct *isec;
+
+ isec = inode->i_security;
+
+ rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd);
+ if (!rc)
+ inode_set_perm_cache(inode, sid, perms, avd.seqno, avd.auditallow);
+ }
audited = avc_audit_required(perms, &avd, rc, dontaudit, &denied);
if (likely(!audited))
return rc;
@@ -1546,7 +1599,7 @@ static int inode_has_perm(const struct cred *cred,
rc2 = audit_inode_permission(inode, adp, perms, audited, denied, flags);
if (rc2)
return rc2;
- return avc_has_perm_flags(sid, isec->sid, isec->sclass, perms, adp, flags);
+ return rc;
}

/* Same as inode_has_perm, but pass explicit audit data containing
@@ -2841,6 +2894,7 @@ static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name,
return;
}

+ inode_set_perm_cache(inode, 0, 0, 0, 0);
isec->sid = newsid;
return;
}
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 6d38851..ec7d984 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -85,6 +85,7 @@ extern int selinux_policycap_openperm;
/* limitation of boundary depth */
#define POLICYDB_BOUNDS_MAXDEPTH 4

+u32 security_get_latest_granting(void);
int security_mls_enabled(void);

int security_load_policy(void *data, size_t len);
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index b4feecc3..c6687ab 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -87,6 +87,11 @@ int ss_initialized;
*/
static u32 latest_granting;

+u32 security_get_latest_granting(void)
+{
+ return latest_granting;
+}
+
/* Forward declaration. */
static int context_struct_to_string(struct context *context, char **scontext,
u32 *scontext_len);
--
1.8.2.1

2013-06-03 19:34:07

by Eric Paris

[permalink] [raw]
Subject: Re: [RFC PATCH 1/2] selinux: merge selinux_inode_permission and inode_has_perm

On Mon, 2013-06-03 at 14:59 -0400, Eric Paris wrote:
> selinux_inode_permission had some heavy lifting done to make it more
> performance polite. But it still does largely the same thing as
> inode_has_perm. So move that work into inode_has_perm and call
> inode_has_perm from selinux_inode_permission.
>
> Signed-off-by: Eric Paris <[email protected]>
> ---
> security/selinux/hooks.c | 92 ++++++++++++++++++++++--------------------------
> 1 file changed, 42 insertions(+), 50 deletions(-)
>
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index 5c6f2cd..cfecb52 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c

> @@ -1514,6 +1538,14 @@ static int inode_has_perm(const struct cred *cred,
> sid = cred_sid(cred);
> isec = inode->i_security;
>
> + rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd);
> + audited = avc_audit_required(perms, &avd, rc, dontaudit, &denied);
> + if (likely(!audited))
> + return rc;
> +
> + rc2 = audit_inode_permission(inode, adp, perms, audited, denied, flags);
> + if (rc2)
> + return rc2;
> return avc_has_perm_flags(sid, isec->sid, isec->sclass, perms, adp, flags);
> }
>

Should just return rc, not avc_has_perm_flags(). I fixed that in the
2/2 patch and this should work just fine. Kills a little performance,
but still works.

-Eric

2013-06-03 20:26:40

by Casey Schaufler

[permalink] [raw]
Subject: Re: [RFC PATCH 2/2] SELinux: cache inode checks inside struct inode

On 6/3/2013 11:59 AM, Eric Paris wrote:
> This patch adds a cache of selinux security checks into struct inode.

This violates the security blob architecture of the LSM.

Security module specific optimizations in the VFS layer are
probably going to be pointless if (when) we go to stackable
security modules. I have based all of the work I've been
doing on stacking modules on the blob pointer architecture.
Building special case code into the stacking infrastructure
to accommodate this optimization, which will apply only if
SELinux is being used and no other inode blob using LSM is
in use, will be possible, but heinous.

We (the LSM community) agreed to the blob pointer architecture
for a number of reasons, one of which was that it keeps all of
the details of the implementation hidden from the VFS. Pulling
SELinux specific code into the VFS violates that dramatically.

I understand that Linus has chimed in on this issue and has
spoken well of the potential performance improvement. I suggest
that if we have a module independent "cache" we (for values of
"we" that include modules other than SELinux) will be better off.
*I understand that today only SELinux and Smack use i_security.*
I do not expect that to continue into the future.


> It is protected by the seq counter against updates by other nodes. This
> has a measurable impact on one benchmark Linus mentioned. The cpu
> time using make to check a huge project for changes. It is going to
> have a negative impact on cases where tasks with different labels are
> accessing the same object. In these cases each one will grab the i_lock
> to reset the in inode cache. The only place I imagine this would be
> common would be with shared libraries. But as those are typically
> openned and mmapped, they don't have continuous checks...
>
> Stock Kernel:
> 8.23% make [k] __d_lookup_rcu
> 6.27% make [k] link_path_walk
> 3.91% make [k] selinux_inode_permission <----
> 3.37% make [k] avc_has_perm_noaudit <----
> 2.26% make [k] lookup_fast
> 2.12% make [k] system_call
> 1.86% make [k] path_lookupat
> 1.82% make [k] inode_has_perm.isra.32.constprop.61 <----
> 1.57% make [k] copy_user_enhanced_fast_string
> 1.48% make [k] generic_permission
> 1.34% make [k] __audit_syscall_exit
> 1.31% make [k] kmem_cache_free
> 1.24% make [k] kmem_cache_alloc
> 1.20% make [k] generic_fillattr
> 1.12% make [k] __inode_permission
> 1.06% make [k] dput
> 1.04% make [k] strncpy_from_user
> 1.04% make [k] _raw_spin_lock
> Total: 3.91 + 3.37 + 1.82 = 9.1%
>
> My Changes:
> 8.54% make [k] __d_lookup_rcu
> 6.52% make [k] link_path_walk
> 3.93% make [k] inode_has_perm <----
> 2.31% make [k] lookup_fast
> 2.05% make [k] system_call
> 1.79% make [k] path_lookupat
> 1.72% make [k] generic_permission
> 1.50% make [k] __audit_syscall_exit
> 1.49% make [k] selinux_inode_permission <----
> 1.47% make [k] copy_user_enhanced_fast_string
> 1.28% make [k] __inode_permission
> 1.23% make [k] kmem_cache_alloc
> 1.19% make [k] _raw_spin_lock
> 1.15% make [k] lg_local_lock
> 1.10% make [k] strncpy_from_user
> 1.10% make [k] dput
> 1.08% make [k] kmem_cache_free
> 1.08% make [k] generic_fillattr
> Total: 3.93 + 1.49 = 5.42
>
> In inode_has_perm the big time takers are loading cred->sid and the
> raw_seqcount_begin(inode->i_security_seccount). I'm not certain how to
> make either of those much faster.
>
> In selinux_inode_permission() we spend time in getting current->cred and
> in calling inode_has_perm().
>
> Signed-off-by: Eric Paris <[email protected]>
> ---
> include/linux/fs.h | 5 +++
> security/selinux/hooks.c | 62 ++++++++++++++++++++++++++++++++++---
> security/selinux/include/security.h | 1 +
> security/selinux/ss/services.c | 5 +++
> 4 files changed, 69 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 43db02e..5268cf3 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -535,6 +535,11 @@ struct inode {
> struct address_space *i_mapping;
>
> #ifdef CONFIG_SECURITY

Shouldn't these be under #ifdef SELINUX?
They're pointless without SELinux.

> + seqcount_t i_security_seqcount;
> + u32 i_last_task_sid;
> + u32 i_last_granting;
> + u32 i_last_perms;
> + u32 i_audit_allow;
> void *i_security;
> #endif
>
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index cfecb52..00dd6d9 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -82,6 +82,7 @@
> #include <linux/user_namespace.h>
> #include <linux/export.h>
> #include <linux/msg.h>
> +#include <linux/seqlock.h>
> #include <linux/shm.h>
>
> #include "avc.h"
> @@ -207,6 +208,7 @@ static int inode_alloc_security(struct inode *inode)
> if (!isec)
> return -ENOMEM;
>
> + seqcount_init(&inode->i_security_seqcount);
> mutex_init(&isec->lock);
> INIT_LIST_HEAD(&isec->list);
> isec->inode = inode;
> @@ -1516,6 +1518,44 @@ static noinline int audit_inode_permission(struct inode *inode,
> return 0;
> }
>
> +static bool inode_has_perm_cached(u32 sid, struct inode *inode, u32 perms)
> +{
> + unsigned seq;
> + u32 last_task_sid;
> + u32 last_perms;
> + u32 last_granting;
> +
> + seq = raw_seqcount_begin(&inode->i_security_seqcount);
> + last_task_sid = inode->i_last_task_sid;
> + last_perms = inode->i_last_perms;
> + last_granting = inode->i_last_granting;
> +
> + /* something changed, bail! */
> + if (read_seqcount_retry(&inode->i_security_seqcount, seq))
> + return false;
> +
> + return sid == last_task_sid && (perms & last_perms) == perms &&
> + security_get_latest_granting() == last_granting;
> +}
> +
> +static void inode_set_perm_cache(struct inode *inode, u32 task_sid, u32 perms,
> + u32 granting, u32 audit_allow)
> +{
> + spin_lock(&inode->i_lock);
> + write_seqcount_begin(&inode->i_security_seqcount);
> + if (inode->i_last_task_sid == task_sid &&
> + inode->i_last_granting == granting) {
> + inode->i_last_perms |= perms;
> + } else {
> + inode->i_last_task_sid = task_sid;
> + inode->i_last_perms = perms;
> + inode->i_last_granting = granting;
> + inode->i_audit_allow = audit_allow;
> + }
> + write_seqcount_end(&inode->i_security_seqcount);
> + spin_unlock(&inode->i_lock);
> +}
> +
> /* Check whether a task has a particular permission to an inode.
> The 'adp' parameter is optional and allows other audit
> data to be passed (e.g. the dentry). */
> @@ -1525,7 +1565,6 @@ static int inode_has_perm(const struct cred *cred,
> struct common_audit_data *adp,
> unsigned flags)
> {
> - struct inode_security_struct *isec;
> struct av_decision avd;
> u32 sid, denied, audited;
> int rc, rc2;
> @@ -1536,9 +1575,23 @@ static int inode_has_perm(const struct cred *cred,
> return 0;
>
> sid = cred_sid(cred);
> - isec = inode->i_security;
>
> - rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd);
> + if (inode_has_perm_cached(sid, inode, perms)) {
> + rc = 0;
> + avd.allowed = -1;
> + avd.auditallow = inode->i_audit_allow;
> + avd.auditdeny = -1;
> + avd.seqno = 0;
> + avd.flags = 0;
> + } else {
> + struct inode_security_struct *isec;
> +
> + isec = inode->i_security;
> +
> + rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd);
> + if (!rc)
> + inode_set_perm_cache(inode, sid, perms, avd.seqno, avd.auditallow);
> + }
> audited = avc_audit_required(perms, &avd, rc, dontaudit, &denied);
> if (likely(!audited))
> return rc;
> @@ -1546,7 +1599,7 @@ static int inode_has_perm(const struct cred *cred,
> rc2 = audit_inode_permission(inode, adp, perms, audited, denied, flags);
> if (rc2)
> return rc2;
> - return avc_has_perm_flags(sid, isec->sid, isec->sclass, perms, adp, flags);
> + return rc;
> }
>
> /* Same as inode_has_perm, but pass explicit audit data containing
> @@ -2841,6 +2894,7 @@ static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name,
> return;
> }
>
> + inode_set_perm_cache(inode, 0, 0, 0, 0);
> isec->sid = newsid;
> return;
> }
> diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
> index 6d38851..ec7d984 100644
> --- a/security/selinux/include/security.h
> +++ b/security/selinux/include/security.h
> @@ -85,6 +85,7 @@ extern int selinux_policycap_openperm;
> /* limitation of boundary depth */
> #define POLICYDB_BOUNDS_MAXDEPTH 4
>
> +u32 security_get_latest_granting(void);
> int security_mls_enabled(void);
>
> int security_load_policy(void *data, size_t len);
> diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
> index b4feecc3..c6687ab 100644
> --- a/security/selinux/ss/services.c
> +++ b/security/selinux/ss/services.c
> @@ -87,6 +87,11 @@ int ss_initialized;
> */
> static u32 latest_granting;
>
> +u32 security_get_latest_granting(void)
> +{
> + return latest_granting;
> +}
> +
> /* Forward declaration. */
> static int context_struct_to_string(struct context *context, char **scontext,
> u32 *scontext_len);

2013-06-03 21:32:24

by Linus Torvalds

[permalink] [raw]
Subject: Re: [RFC PATCH 2/2] SELinux: cache inode checks inside struct inode



On Mon, 3 Jun 2013, Eric Paris wrote:
>
> #ifdef CONFIG_SECURITY
> + seqcount_t i_security_seqcount;
> + u32 i_last_task_sid;
> + u32 i_last_granting;
> + u32 i_last_perms;
> + u32 i_audit_allow;
> void *i_security;
> #endif

This is much too big. I was really hoping for "another word that the
security layer can use" or similar.

Something this big would be acceptable if it would be a *generic* security
cache, and others could use it too, and would avoid ever actually calling
into any security layer at all (ie we could do the checks entirely at the
VFS layer). Then it would be fine. But for just the fact that SELinux is
too slow? No.

Linus

2013-06-03 23:18:40

by Eric Paris

[permalink] [raw]
Subject: Re: [RFC PATCH 2/2] SELinux: cache inode checks inside struct inode

On Tue, 2013-06-04 at 06:31 +0900, Linus Torvalds wrote:
>
>
> On Mon, 3 Jun 2013, Eric Paris wrote:
> >
> > #ifdef CONFIG_SECURITY
> > + seqcount_t i_security_seqcount;
> > + u32 i_last_task_sid;
> > + u32 i_last_granting;
> > + u32 i_last_perms;
> > + u32 i_audit_allow;
> > void *i_security;
> > #endif
>
> This is much too big. I was really hoping for "another word that the
> security layer can use" or similar.

Not sure how that can work :-(

> Something this big would be acceptable if it would be a *generic* security
> cache, and others could use it too, and would avoid ever actually calling
> into any security layer at all (ie we could do the checks entirely at the
> VFS layer). Then it would be fine. But for just the fact that SELinux is
> too slow? No.

There is nothing about it that can't be VFS-erized. The fields are:

readlockless way to get the data
which task was allowed
which perms were allowed
what generation of security policy allowed it
what perms should be forced to call security hook anyway

defining "perms" from a VFS PoV is hard.

doing any of this with 'stacking' is hard. Then again, I'm only so so
on the value of stacking. I've waffled a few times...

I can do it entirely inside selinux, but we are still going to have the
cache hit you were originally seeing as we dereference isec to get the
info....

2013-06-04 01:00:12

by Casey Schaufler

[permalink] [raw]
Subject: Re: [RFC PATCH 2/2] SELinux: cache inode checks inside struct inode

On 6/3/2013 1:26 PM, Casey Schaufler wrote:
> On 6/3/2013 11:59 AM, Eric Paris wrote:
>> This patch adds a cache of selinux security checks into struct inode.
> This violates the security blob architecture of the LSM.
>
> Security module specific optimizations in the VFS layer are
> probably going to be pointless if (when) we go to stackable
> security modules. I have based all of the work I've been
> doing on stacking modules on the blob pointer architecture.
> Building special case code into the stacking infrastructure
> to accommodate this optimization, which will apply only if
> SELinux is being used and no other inode blob using LSM is
> in use, will be possible, but heinous.

After further review it turns out that this patch set won't
affect the stacking after all, because the work is still getting
done in the SELinux code. So put me down as having philosophical
objections, but not substantive ones.

>
> We (the LSM community) agreed to the blob pointer architecture
> for a number of reasons, one of which was that it keeps all of
> the details of the implementation hidden from the VFS. Pulling
> SELinux specific code into the VFS violates that dramatically.
>
> I understand that Linus has chimed in on this issue and has
> spoken well of the potential performance improvement. I suggest
> that if we have a module independent "cache" we (for values of
> "we" that include modules other than SELinux) will be better off.
> *I understand that today only SELinux and Smack use i_security.*
> I do not expect that to continue into the future.
>
>
>> It is protected by the seq counter against updates by other nodes. This
>> has a measurable impact on one benchmark Linus mentioned. The cpu
>> time using make to check a huge project for changes. It is going to
>> have a negative impact on cases where tasks with different labels are
>> accessing the same object. In these cases each one will grab the i_lock
>> to reset the in inode cache. The only place I imagine this would be
>> common would be with shared libraries. But as those are typically
>> openned and mmapped, they don't have continuous checks...
>>
>> Stock Kernel:
>> 8.23% make [k] __d_lookup_rcu
>> 6.27% make [k] link_path_walk
>> 3.91% make [k] selinux_inode_permission <----
>> 3.37% make [k] avc_has_perm_noaudit <----
>> 2.26% make [k] lookup_fast
>> 2.12% make [k] system_call
>> 1.86% make [k] path_lookupat
>> 1.82% make [k] inode_has_perm.isra.32.constprop.61 <----
>> 1.57% make [k] copy_user_enhanced_fast_string
>> 1.48% make [k] generic_permission
>> 1.34% make [k] __audit_syscall_exit
>> 1.31% make [k] kmem_cache_free
>> 1.24% make [k] kmem_cache_alloc
>> 1.20% make [k] generic_fillattr
>> 1.12% make [k] __inode_permission
>> 1.06% make [k] dput
>> 1.04% make [k] strncpy_from_user
>> 1.04% make [k] _raw_spin_lock
>> Total: 3.91 + 3.37 + 1.82 = 9.1%
>>
>> My Changes:
>> 8.54% make [k] __d_lookup_rcu
>> 6.52% make [k] link_path_walk
>> 3.93% make [k] inode_has_perm <----
>> 2.31% make [k] lookup_fast
>> 2.05% make [k] system_call
>> 1.79% make [k] path_lookupat
>> 1.72% make [k] generic_permission
>> 1.50% make [k] __audit_syscall_exit
>> 1.49% make [k] selinux_inode_permission <----
>> 1.47% make [k] copy_user_enhanced_fast_string
>> 1.28% make [k] __inode_permission
>> 1.23% make [k] kmem_cache_alloc
>> 1.19% make [k] _raw_spin_lock
>> 1.15% make [k] lg_local_lock
>> 1.10% make [k] strncpy_from_user
>> 1.10% make [k] dput
>> 1.08% make [k] kmem_cache_free
>> 1.08% make [k] generic_fillattr
>> Total: 3.93 + 1.49 = 5.42
>>
>> In inode_has_perm the big time takers are loading cred->sid and the
>> raw_seqcount_begin(inode->i_security_seccount). I'm not certain how to
>> make either of those much faster.
>>
>> In selinux_inode_permission() we spend time in getting current->cred and
>> in calling inode_has_perm().
>>
>> Signed-off-by: Eric Paris <[email protected]>
>> ---
>> include/linux/fs.h | 5 +++
>> security/selinux/hooks.c | 62 ++++++++++++++++++++++++++++++++++---
>> security/selinux/include/security.h | 1 +
>> security/selinux/ss/services.c | 5 +++
>> 4 files changed, 69 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index 43db02e..5268cf3 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -535,6 +535,11 @@ struct inode {
>> struct address_space *i_mapping;
>>
>> #ifdef CONFIG_SECURITY
> Shouldn't these be under #ifdef SELINUX?
> They're pointless without SELinux.
>
>> + seqcount_t i_security_seqcount;
>> + u32 i_last_task_sid;
>> + u32 i_last_granting;
>> + u32 i_last_perms;
>> + u32 i_audit_allow;
>> void *i_security;
>> #endif
>>
>> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
>> index cfecb52..00dd6d9 100644
>> --- a/security/selinux/hooks.c
>> +++ b/security/selinux/hooks.c
>> @@ -82,6 +82,7 @@
>> #include <linux/user_namespace.h>
>> #include <linux/export.h>
>> #include <linux/msg.h>
>> +#include <linux/seqlock.h>
>> #include <linux/shm.h>
>>
>> #include "avc.h"
>> @@ -207,6 +208,7 @@ static int inode_alloc_security(struct inode *inode)
>> if (!isec)
>> return -ENOMEM;
>>
>> + seqcount_init(&inode->i_security_seqcount);
>> mutex_init(&isec->lock);
>> INIT_LIST_HEAD(&isec->list);
>> isec->inode = inode;
>> @@ -1516,6 +1518,44 @@ static noinline int audit_inode_permission(struct inode *inode,
>> return 0;
>> }
>>
>> +static bool inode_has_perm_cached(u32 sid, struct inode *inode, u32 perms)
>> +{
>> + unsigned seq;
>> + u32 last_task_sid;
>> + u32 last_perms;
>> + u32 last_granting;
>> +
>> + seq = raw_seqcount_begin(&inode->i_security_seqcount);
>> + last_task_sid = inode->i_last_task_sid;
>> + last_perms = inode->i_last_perms;
>> + last_granting = inode->i_last_granting;
>> +
>> + /* something changed, bail! */
>> + if (read_seqcount_retry(&inode->i_security_seqcount, seq))
>> + return false;
>> +
>> + return sid == last_task_sid && (perms & last_perms) == perms &&
>> + security_get_latest_granting() == last_granting;
>> +}
>> +
>> +static void inode_set_perm_cache(struct inode *inode, u32 task_sid, u32 perms,
>> + u32 granting, u32 audit_allow)
>> +{
>> + spin_lock(&inode->i_lock);
>> + write_seqcount_begin(&inode->i_security_seqcount);
>> + if (inode->i_last_task_sid == task_sid &&
>> + inode->i_last_granting == granting) {
>> + inode->i_last_perms |= perms;
>> + } else {
>> + inode->i_last_task_sid = task_sid;
>> + inode->i_last_perms = perms;
>> + inode->i_last_granting = granting;
>> + inode->i_audit_allow = audit_allow;
>> + }
>> + write_seqcount_end(&inode->i_security_seqcount);
>> + spin_unlock(&inode->i_lock);
>> +}
>> +
>> /* Check whether a task has a particular permission to an inode.
>> The 'adp' parameter is optional and allows other audit
>> data to be passed (e.g. the dentry). */
>> @@ -1525,7 +1565,6 @@ static int inode_has_perm(const struct cred *cred,
>> struct common_audit_data *adp,
>> unsigned flags)
>> {
>> - struct inode_security_struct *isec;
>> struct av_decision avd;
>> u32 sid, denied, audited;
>> int rc, rc2;
>> @@ -1536,9 +1575,23 @@ static int inode_has_perm(const struct cred *cred,
>> return 0;
>>
>> sid = cred_sid(cred);
>> - isec = inode->i_security;
>>
>> - rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd);
>> + if (inode_has_perm_cached(sid, inode, perms)) {
>> + rc = 0;
>> + avd.allowed = -1;
>> + avd.auditallow = inode->i_audit_allow;
>> + avd.auditdeny = -1;
>> + avd.seqno = 0;
>> + avd.flags = 0;
>> + } else {
>> + struct inode_security_struct *isec;
>> +
>> + isec = inode->i_security;
>> +
>> + rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd);
>> + if (!rc)
>> + inode_set_perm_cache(inode, sid, perms, avd.seqno, avd.auditallow);
>> + }
>> audited = avc_audit_required(perms, &avd, rc, dontaudit, &denied);
>> if (likely(!audited))
>> return rc;
>> @@ -1546,7 +1599,7 @@ static int inode_has_perm(const struct cred *cred,
>> rc2 = audit_inode_permission(inode, adp, perms, audited, denied, flags);
>> if (rc2)
>> return rc2;
>> - return avc_has_perm_flags(sid, isec->sid, isec->sclass, perms, adp, flags);
>> + return rc;
>> }
>>
>> /* Same as inode_has_perm, but pass explicit audit data containing
>> @@ -2841,6 +2894,7 @@ static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name,
>> return;
>> }
>>
>> + inode_set_perm_cache(inode, 0, 0, 0, 0);
>> isec->sid = newsid;
>> return;
>> }
>> diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
>> index 6d38851..ec7d984 100644
>> --- a/security/selinux/include/security.h
>> +++ b/security/selinux/include/security.h
>> @@ -85,6 +85,7 @@ extern int selinux_policycap_openperm;
>> /* limitation of boundary depth */
>> #define POLICYDB_BOUNDS_MAXDEPTH 4
>>
>> +u32 security_get_latest_granting(void);
>> int security_mls_enabled(void);
>>
>> int security_load_policy(void *data, size_t len);
>> diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
>> index b4feecc3..c6687ab 100644
>> --- a/security/selinux/ss/services.c
>> +++ b/security/selinux/ss/services.c
>> @@ -87,6 +87,11 @@ int ss_initialized;
>> */
>> static u32 latest_granting;
>>
>> +u32 security_get_latest_granting(void)
>> +{
>> + return latest_granting;
>> +}
>> +
>> /* Forward declaration. */
>> static int context_struct_to_string(struct context *context, char **scontext,
>> u32 *scontext_len);
>
> --
> This message was distributed to subscribers of the selinux mailing list.
> If you no longer wish to subscribe, send mail to [email protected] with
> the words "unsubscribe selinux" without quotes as the message.
>

2013-06-04 02:53:03

by Casey Schaufler

[permalink] [raw]
Subject: Re: [RFC PATCH 2/2] SELinux: cache inode checks inside struct inode

On 6/3/2013 4:18 PM, Eric Paris wrote:
> On Tue, 2013-06-04 at 06:31 +0900, Linus Torvalds wrote:
>>
>> On Mon, 3 Jun 2013, Eric Paris wrote:
>>>
>>> #ifdef CONFIG_SECURITY
>>> + seqcount_t i_security_seqcount;
>>> + u32 i_last_task_sid;
>>> + u32 i_last_granting;
>>> + u32 i_last_perms;
>>> + u32 i_audit_allow;
>>> void *i_security;
>>> #endif
>> This is much too big. I was really hoping for "another word that the
>> security layer can use" or similar.
> Not sure how that can work :-(
>
>> Something this big would be acceptable if it would be a *generic* security
>> cache, and others could use it too, and would avoid ever actually calling
>> into any security layer at all (ie we could do the checks entirely at the
>> VFS layer). Then it would be fine. But for just the fact that SELinux is
>> too slow? No.
> There is nothing about it that can't be VFS-erized. The fields are:
>
> readlockless way to get the data
> which task was allowed
> which perms were allowed
> what generation of security policy allowed it
> what perms should be forced to call security hook anyway

You've defined all of these things as u32 (that is, secids).
Secids are an SELinux artifact and do not belong in general
purpose interfaces.

> defining "perms" from a VFS PoV is hard.

Yes, it is. But I think that for this to work we can't
be looking at the perms, we have to be looking at a previous
determination that can be used to accurately predict what the
outcome would be if we did look at the perms. If, for example,
we saved the pid (in a reuse safe way) of the last process to
successfully access the inode you wouldn't need to look at any
perms (unless the configuration changed) for that pid again.

Yeah, yeah, I know that is an oversimplification that would
likely gain precious little advantage.I seriously doubt it would
be worth doing.

> doing any of this with 'stacking' is hard. Then again, I'm only so so
> on the value of stacking. I've waffled a few times...

That's another reason why cashing certain attributes isn't a good idea,
but that caching a result might have value.

> I can do it entirely inside selinux, but we are still going to have the
> cache hit you were originally seeing as we dereference isec to get the
> info....
>
>
>
> --
> This message was distributed to subscribers of the selinux mailing list.
> If you no longer wish to subscribe, send mail to [email protected] with
> the words "unsubscribe selinux" without quotes as the message.
>