2005-01-04 21:55:28

by Serge E. Hallyn

[permalink] [raw]
Subject: [RFC] [PATCH] merge *_vm_enough_memory()s into a common helper

The attached patch introduces a __vm_enough_memory function in
security/security.c which is used by cap_vm_enough_memory,
dummy_vm_enough_memory, and selinux_vm_enough_memory. This has
been discussed on the lsm mailing list.

Are there any objections to or comments on this patch?

thanks,
-serge

Signed-off-by: Serge Hallyn <[email protected]>

Index: linux-2.6.10-mm1/include/linux/security.h
===================================================================
--- linux-2.6.10-mm1.orig/include/linux/security.h 2005-01-04 16:42:10.000000000 -0600
+++ linux-2.6.10-mm1/include/linux/security.h 2005-01-04 16:42:33.000000000 -0600
@@ -1900,6 +1900,7 @@ extern int register_security (struct sec
extern int unregister_security (struct security_operations *ops);
extern int mod_reg_security (const char *name, struct security_operations *ops);
extern int mod_unreg_security (const char *name, struct security_operations *ops);
+extern int __vm_enough_memory (long pages, int cap_sys_admin);


#else /* CONFIG_SECURITY */
Index: linux-2.6.10-mm1/security/commoncap.c
===================================================================
--- linux-2.6.10-mm1.orig/security/commoncap.c 2005-01-04 16:42:10.000000000 -0600
+++ linux-2.6.10-mm1/security/commoncap.c 2005-01-04 16:42:33.000000000 -0600
@@ -316,90 +316,10 @@ int cap_syslog (int type)
return 0;
}

-/*
- * Check that a process has enough memory to allocate a new virtual
- * mapping. 0 means there is enough memory for the allocation to
- * succeed and -ENOMEM implies there is not.
- *
- * We currently support three overcommit policies, which are set via the
- * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
- *
- * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
- * Additional code 2002 Jul 20 by Robert Love.
- */
int cap_vm_enough_memory(long pages)
{
- unsigned long free, allowed;
-
- vm_acct_memory(pages);
-
- /*
- * Sometimes we want to use more memory than we have
- */
- if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
- return 0;
-
- if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
- unsigned long n;
-
- free = get_page_cache_size();
- free += nr_swap_pages;
-
- /*
- * Any slabs which are created with the
- * SLAB_RECLAIM_ACCOUNT flag claim to have contents
- * which are reclaimable, under pressure. The dentry
- * cache and most inode caches should fall into this
- */
- free += atomic_read(&slab_reclaim_pages);
-
- /*
- * Leave the last 3% for root
- */
- if (!capable(CAP_SYS_ADMIN))
- free -= free / 32;
-
- if (free > pages)
- return 0;
-
- /*
- * nr_free_pages() is very expensive on large systems,
- * only call if we're about to fail.
- */
- n = nr_free_pages();
- if (!capable(CAP_SYS_ADMIN))
- n -= n / 32;
- free += n;
-
- if (free > pages)
- return 0;
- vm_unacct_memory(pages);
- return -ENOMEM;
- }
-
- allowed = (totalram_pages - hugetlb_total_pages())
- * sysctl_overcommit_ratio / 100;
- /*
- * Leave the last 3% for root
- */
- if (!capable(CAP_SYS_ADMIN))
- allowed -= allowed / 32;
- allowed += total_swap_pages;
-
- /* Leave the last 3% for root */
- if (current->euid)
- allowed -= allowed / 32;
-
- /* Don't let a single process grow too big:
- leave 3% of the size of this process for other processes */
- allowed -= current->mm->total_vm / 32;
-
- if (atomic_read(&vm_committed_space) < allowed)
- return 0;
-
- vm_unacct_memory(pages);
-
- return -ENOMEM;
+ return __vm_enough_memory(pages,
+ (cap_capable(current, CAP_SYS_ADMIN) == 0));
}

EXPORT_SYMBOL(cap_capable);
Index: linux-2.6.10-mm1/security/dummy.c
===================================================================
--- linux-2.6.10-mm1.orig/security/dummy.c 2005-01-04 16:42:10.000000000 -0600
+++ linux-2.6.10-mm1/security/dummy.c 2005-01-04 16:42:33.000000000 -0600
@@ -111,69 +111,10 @@ static int dummy_settime(struct timespec
return 0;
}

-/*
- * Check that a process has enough memory to allocate a new virtual
- * mapping. 0 means there is enough memory for the allocation to
- * succeed and -ENOMEM implies there is not.
- *
- * We currently support three overcommit policies, which are set via the
- * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
- */
static int dummy_vm_enough_memory(long pages)
{
- unsigned long free, allowed;
-
- vm_acct_memory(pages);
-
- /*
- * Sometimes we want to use more memory than we have
- */
- if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
- return 0;
-
- if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
- free = get_page_cache_size();
- free += nr_free_pages();
- free += nr_swap_pages;
-
- /*
- * Any slabs which are created with the
- * SLAB_RECLAIM_ACCOUNT flag claim to have contents
- * which are reclaimable, under pressure. The dentry
- * cache and most inode caches should fall into this
- */
- free += atomic_read(&slab_reclaim_pages);
-
- /*
- * Leave the last 3% for root
- */
- if (current->euid)
- free -= free / 32;
-
- if (free > pages)
- return 0;
- vm_unacct_memory(pages);
- return -ENOMEM;
- }
-
- allowed = (totalram_pages - hugetlb_total_pages())
- * sysctl_overcommit_ratio / 100;
- allowed += total_swap_pages;
-
- /* Leave the last 3% for root */
- if (current->euid)
- allowed -= allowed / 32;
-
- /* Don't let a single process grow too big:
- leave 3% of the size of this process for other processes */
- allowed -= current->mm->total_vm / 32;
-
- if (atomic_read(&vm_committed_space) < allowed)
- return 0;
-
- vm_unacct_memory(pages);
-
- return -ENOMEM;
+ return __vm_enough_memory(pages,
+ (dummy_capable(current, CAP_SYS_ADMIN) == 0));
}

static int dummy_bprm_alloc_security (struct linux_binprm *bprm)
Index: linux-2.6.10-mm1/security/security.c
===================================================================
--- linux-2.6.10-mm1.orig/security/security.c 2005-01-04 16:42:10.000000000 -0600
+++ linux-2.6.10-mm1/security/security.c 2005-01-04 16:44:19.000000000 -0600
@@ -17,6 +17,10 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/security.h>
+#include <linux/mman.h>
+#include <linux/swap.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>

#define SECURITY_FRAMEWORK_VERSION "1.0.0"

@@ -173,6 +177,90 @@ int mod_unreg_security(const char *name,
return security_ops->unregister_security(name, ops);
}

+/*
+ * Check that a process has enough memory to allocate a new virtual
+ * mapping. 0 means there is enough memory for the allocation to
+ * succeed and -ENOMEM implies there is not.
+ *
+ * We currently support three overcommit policies, which are set via the
+ * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
+ *
+ * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
+ * Additional code 2002 Jul 20 by Robert Love.
+ *
+ * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
+ */
+int __vm_enough_memory(long pages, int cap_sys_admin)
+{
+ unsigned long free, allowed;
+
+ vm_acct_memory(pages);
+
+ /*
+ * Sometimes we want to use more memory than we have
+ */
+ if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
+ return 0;
+
+ if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
+ unsigned long n;
+
+ free = get_page_cache_size();
+ free += nr_swap_pages;
+
+ /*
+ * Any slabs which are created with the
+ * SLAB_RECLAIM_ACCOUNT flag claim to have contents
+ * which are reclaimable, under pressure. The dentry
+ * cache and most inode caches should fall into this
+ */
+ free += atomic_read(&slab_reclaim_pages);
+
+ /*
+ * Leave the last 3% for root
+ */
+ if (!cap_sys_admin)
+ free -= free / 32;
+
+ if (free > pages)
+ return 0;
+
+ /*
+ * nr_free_pages() is very expensive on large systems,
+ * only call if we're about to fail.
+ */
+ n = nr_free_pages();
+ if (!cap_sys_admin)
+ n -= n / 32;
+ free += n;
+
+ if (free > pages)
+ return 0;
+ vm_unacct_memory(pages);
+ return -ENOMEM;
+ }
+
+ allowed = (totalram_pages - hugetlb_total_pages())
+ * sysctl_overcommit_ratio / 100;
+ /*
+ * Leave the last 3% for root
+ */
+ if (!cap_sys_admin)
+ allowed -= allowed / 32;
+ allowed += total_swap_pages;
+
+ /* Don't let a single process grow too big:
+ leave 3% of the size of this process for other processes */
+ allowed -= current->mm->total_vm / 32;
+
+ if (atomic_read(&vm_committed_space) < allowed)
+ return 0;
+
+ vm_unacct_memory(pages);
+
+ return -ENOMEM;
+}
+
/**
* capable - calls the currently loaded security module's capable() function with the specified capability
* @cap: the requested capability level.
@@ -201,3 +289,4 @@ EXPORT_SYMBOL_GPL(mod_reg_security);
EXPORT_SYMBOL_GPL(mod_unreg_security);
EXPORT_SYMBOL(capable);
EXPORT_SYMBOL(security_ops);
+EXPORT_SYMBOL(__vm_enough_memory);
Index: linux-2.6.10-mm1/security/selinux/hooks.c
===================================================================
--- linux-2.6.10-mm1.orig/security/selinux/hooks.c 2005-01-04 16:42:10.000000000 -0600
+++ linux-2.6.10-mm1/security/selinux/hooks.c 2005-01-04 16:42:33.000000000 -0600
@@ -1521,69 +1521,26 @@ static int selinux_syslog(int type)
* mapping. 0 means there is enough memory for the allocation to
* succeed and -ENOMEM implies there is not.
*
- * We currently support three overcommit policies, which are set via the
- * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
- *
- * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
- * Additional code 2002 Jul 20 by Robert Love.
+ * Note that secondary_ops->capable and task_has_perm return 0 if
+ * the capability is granted, but __vm_enough_memory requires 1 if
+ * the capability is granted.
*/
static int selinux_vm_enough_memory(long pages)
{
- unsigned long free, allowed;
- int rc;
+ int rc, cap_sys_admin = 0;
struct task_security_struct *tsec = current->security;

- vm_acct_memory(pages);
-
- /*
- * Sometimes we want to use more memory than we have
- */
- if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
- return 0;
-
- if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
- free = get_page_cache_size();
- free += nr_free_pages();
- free += nr_swap_pages;
-
- /*
- * Any slabs which are created with the
- * SLAB_RECLAIM_ACCOUNT flag claim to have contents
- * which are reclaimable, under pressure. The dentry
- * cache and most inode caches should fall into this
- */
- free += atomic_read(&slab_reclaim_pages);
-
- /*
- * Leave the last 3% for privileged processes.
- * Don't audit the check, as it is applied to all processes
- * that allocate mappings.
- */
- rc = secondary_ops->capable(current, CAP_SYS_ADMIN);
- if (!rc) {
- rc = avc_has_perm_noaudit(tsec->sid, tsec->sid,
- SECCLASS_CAPABILITY,
- CAP_TO_MASK(CAP_SYS_ADMIN), NULL);
- }
- if (rc)
- free -= free / 32;
-
- if (free > pages)
- return 0;
- vm_unacct_memory(pages);
- return -ENOMEM;
- }
-
- allowed = (totalram_pages - hugetlb_total_pages())
- * sysctl_overcommit_ratio / 100;
- allowed += total_swap_pages;
-
- if (atomic_read(&vm_committed_space) < allowed)
- return 0;
+ rc = secondary_ops->capable(current, CAP_SYS_ADMIN);
+ if (rc == 0)
+ cap_sys_admin = avc_has_perm_noaudit(tsec->sid, tsec->sid,
+ SECCLASS_CAPABILITY,
+ CAP_TO_MASK(CAP_SYS_ADMIN),
+ NULL);

- vm_unacct_memory(pages);
+ if (rc == 0)
+ cap_sys_admin = 1;

- return -ENOMEM;
+ return __vm_enough_memory(pages, cap_sys_admin);
}

/* binprm security operations */


2005-01-04 22:19:39

by Stephen Smalley

[permalink] [raw]
Subject: Re: [RFC] [PATCH] merge *_vm_enough_memory()s into a common helper

On Tue, 2005-01-04 at 16:48, Serge E. Hallyn wrote:
> The attached patch introduces a __vm_enough_memory function in
> security/security.c which is used by cap_vm_enough_memory,
> dummy_vm_enough_memory, and selinux_vm_enough_memory. This has
> been discussed on the lsm mailing list.

> + * Note that secondary_ops->capable and task_has_perm return 0 if
> + * the capability is granted, but __vm_enough_memory requires 1 if
> + * the capability is granted.

Should be avc_has_perm_noaudit, right?

> + rc = secondary_ops->capable(current, CAP_SYS_ADMIN);
> + if (rc == 0)
> + cap_sys_admin = avc_has_perm_noaudit(tsec->sid, tsec->sid,
> + SECCLASS_CAPABILITY,
> + CAP_TO_MASK(CAP_SYS_ADMIN),
> + NULL);

Shouldn't this be 'rc = avc_has_perm...'? And I'd suggest retaining the
comment from the original about why we don't want to audit here.

> - vm_unacct_memory(pages);
> + if (rc == 0)
> + cap_sys_admin = 1;
>
> - return -ENOMEM;
> + return __vm_enough_memory(pages, cap_sys_admin);
> }

--
Stephen Smalley <[email protected]>
National Security Agency

2005-01-04 22:28:10

by Chris Wright

[permalink] [raw]
Subject: Re: [RFC] [PATCH] merge *_vm_enough_memory()s into a common helper

* Stephen Smalley ([email protected]) wrote:
> On Tue, 2005-01-04 at 17:17, Chris Wright wrote:
> > * Serge E. Hallyn ([email protected]) wrote:
> >
> > I'm fine with this with a few nits. Although I don't think it will apply
> > to current bk which has merge error in this area right now. Stephen,
> > are you ok with the way this one generates audit messages?
>
> Looks like the patch (with suggested fixes) will preserve the current
> behavior, i.e. no audit message generation for SELinux from the
> vm_enough_memory hook, while still auditing real uses of CAP_SYS_ADMIN
> elsewhere. That is what we want.

Good, thanks.
-chris
--
Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net

2005-01-04 22:23:11

by Chris Wright

[permalink] [raw]
Subject: Re: [RFC] [PATCH] merge *_vm_enough_memory()s into a common helper

* Serge E. Hallyn ([email protected]) wrote:

I'm fine with this with a few nits. Although I don't think it will apply
to current bk which has merge error in this area right now. Stephen,
are you ok with the way this one generates audit messages?

> + return __vm_enough_memory(pages,
> + (cap_capable(current, CAP_SYS_ADMIN) == 0));

A temp variable isn't going to be costly and makes it easier to read.

> + return __vm_enough_memory(pages,
> + (dummy_capable(current, CAP_SYS_ADMIN) == 0));

same here

> + * Note that secondary_ops->capable and task_has_perm return 0 if
> + * the capability is granted, but __vm_enough_memory requires 1 if
> + * the capability is granted.
> */
> static int selinux_vm_enough_memory(long pages)
> {
> - unsigned long free, allowed;
> - int rc;
> + int rc, cap_sys_admin = 0;
<snip>
> + rc = secondary_ops->capable(current, CAP_SYS_ADMIN);
> + if (rc == 0)
> + cap_sys_admin = avc_has_perm_noaudit(tsec->sid, tsec->sid,
> + SECCLASS_CAPABILITY,
> + CAP_TO_MASK(CAP_SYS_ADMIN),
> + NULL);
>
> - vm_unacct_memory(pages);
> + if (rc == 0)
> + cap_sys_admin = 1;

This sure looks wrong. Did you mean rc = avc_has_perm_noaudit()?

thanks,
-chris
--
Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net

2005-01-04 22:57:31

by Stephen Smalley

[permalink] [raw]
Subject: Re: [RFC] [PATCH] merge *_vm_enough_memory()s into a common helper

On Tue, 2005-01-04 at 17:17, Chris Wright wrote:
> * Serge E. Hallyn ([email protected]) wrote:
>
> I'm fine with this with a few nits. Although I don't think it will apply
> to current bk which has merge error in this area right now. Stephen,
> are you ok with the way this one generates audit messages?

Looks like the patch (with suggested fixes) will preserve the current
behavior, i.e. no audit message generation for SELinux from the
vm_enough_memory hook, while still auditing real uses of CAP_SYS_ADMIN
elsewhere. That is what we want.

--
Stephen Smalley <[email protected]>
National Security Agency

2005-01-04 23:03:00

by Nikita Danilov

[permalink] [raw]
Subject: Re: [RFC] [PATCH] merge *_vm_enough_memory()s into a common helper

"Serge E. Hallyn" <[email protected]> writes:

> The attached patch introduces a __vm_enough_memory function in
> security/security.c which is used by cap_vm_enough_memory,
> dummy_vm_enough_memory, and selinux_vm_enough_memory. This has
> been discussed on the lsm mailing list.
>
> Are there any objections to or comments on this patch?
>
> thanks,
> -serge
>
> Signed-off-by: Serge Hallyn <[email protected]>
>

[...]

> -
> - return -ENOMEM;
> + return __vm_enough_memory(pages,
> + (cap_capable(current, CAP_SYS_ADMIN) == 0));

I don't think that CAP_SYS_ADMIN is proper capability for this:
CAP_SYS_ADMIN is catch-all that should be used only when no other
capability covers action being performed. In this case CAP_SYS_RESOURCE
seems to be a better fit, after all __vm_enough_memory() controls access
to a resource, plus file systems use CAP_SYS_RESOURCE to protect disk
blocks reserved for "root".

> }

[...]

Nikita.

2005-01-05 01:24:05

by Alan

[permalink] [raw]
Subject: Re: [RFC] [PATCH] merge *_vm_enough_memory()s into a common helper

On Maw, 2005-01-04 at 22:59, Nikita Danilov wrote:
> I don't think that CAP_SYS_ADMIN is proper capability for this:
> CAP_SYS_ADMIN is catch-all that should be used only when no other
> capability covers action being performed. In this case CAP_SYS_RESOURCE
> seems to be a better fit, after all __vm_enough_memory() controls access
> to a resource, plus file systems use CAP_SYS_RESOURCE to protect disk
> blocks reserved for "root".

Its a heuristic for "system process" and works rather well.
CAP_SYS_RESOURCE is about ignoring limits, this is about saving the
administrators backside.

2005-01-05 11:26:28

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [RFC] [PATCH] merge *_vm_enough_memory()s into a common helper

On Tue, Jan 04, 2005 at 03:48:33PM -0600, Serge E. Hallyn wrote:
> The attached patch introduces a __vm_enough_memory function in
> security/security.c which is used by cap_vm_enough_memory,
> dummy_vm_enough_memory, and selinux_vm_enough_memory. This has
> been discussed on the lsm mailing list.
>
> Are there any objections to or comments on this patch?

Maybe this function should go into mm/ ?

2005-01-05 13:18:18

by Hugh Dickins

[permalink] [raw]
Subject: Re: [RFC] [PATCH] merge *_vm_enough_memory()s into a common helper

On Wed, 5 Jan 2005, Christoph Hellwig wrote:
> On Tue, Jan 04, 2005 at 03:48:33PM -0600, Serge E. Hallyn wrote:
> > The attached patch introduces a __vm_enough_memory function in
> > security/security.c which is used by cap_vm_enough_memory,
> > dummy_vm_enough_memory, and selinux_vm_enough_memory. This has
> > been discussed on the lsm mailing list.
> >
> > Are there any objections to or comments on this patch?
>
> Maybe this function should go into mm/ ?

My thought exactly: yes, please do move it back into mm/,
where it started out before security/ came into the picture.

But where in mm? I suppose either mm/mmap.c where vm_committed_space
is still declared, or (that strange ragbag) mm/swap.c where the
CONFIG_SMP vm_acct_memory is.

Hugh

2005-01-05 16:33:45

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [RFC] [PATCH] merge *_vm_enough_memory()s into a common helper

Attached is a new version of the patch introducing the __vm_enough_memory
helper, which takes into account yesterday's suggestions. The selinux/hooks.c
typo was definately a dangerous bug, and __vm_enough_memory() has been moved
to vm_enough_memory()'s original location in mm/mmap.c.

Thank you all for the comments.

-serge

Signed-off-by: Serge Hallyn <[email protected]>

Index: linux-2.6.10-bk8/include/linux/mm.h
===================================================================
--- linux-2.6.10-bk8.orig/include/linux/mm.h 2005-01-05 08:59:54.000000000 -0600
+++ linux-2.6.10-bk8/include/linux/mm.h 2005-01-05 10:41:13.000000000 -0600
@@ -704,6 +704,7 @@ static inline void vma_nonlinear_insert(
}

/* mmap.c */
+extern int __vm_enough_memory(long pages, int cap_sys_admin);
extern void vma_adjust(struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert);
extern struct vm_area_struct *vma_merge(struct mm_struct *,
Index: linux-2.6.10-bk8/mm/mmap.c
===================================================================
--- linux-2.6.10-bk8.orig/mm/mmap.c 2005-01-05 08:59:55.000000000 -0600
+++ linux-2.6.10-bk8/mm/mmap.c 2005-01-05 10:42:33.000000000 -0600
@@ -61,10 +61,98 @@ int sysctl_overcommit_ratio = 50; /* def
int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
atomic_t vm_committed_space = ATOMIC_INIT(0);

+/*
+ * Check that a process has enough memory to allocate a new virtual
+ * mapping. 0 means there is enough memory for the allocation to
+ * succeed and -ENOMEM implies there is not.
+ *
+ * We currently support three overcommit policies, which are set via the
+ * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
+ *
+ * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
+ * Additional code 2002 Jul 20 by Robert Love.
+ *
+ * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise.
+ *
+ * Note this is a helper function intended to be used by LSMs which
+ * wish to use this logic.
+ */
+int __vm_enough_memory(long pages, int cap_sys_admin)
+{
+ unsigned long free, allowed;
+
+ vm_acct_memory(pages);
+
+ /*
+ * Sometimes we want to use more memory than we have
+ */
+ if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
+ return 0;
+
+ if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
+ unsigned long n;
+
+ free = get_page_cache_size();
+ free += nr_swap_pages;
+
+ /*
+ * Any slabs which are created with the
+ * SLAB_RECLAIM_ACCOUNT flag claim to have contents
+ * which are reclaimable, under pressure. The dentry
+ * cache and most inode caches should fall into this
+ */
+ free += atomic_read(&slab_reclaim_pages);
+
+ /*
+ * Leave the last 3% for root
+ */
+ if (!cap_sys_admin)
+ free -= free / 32;
+
+ if (free > pages)
+ return 0;
+
+ /*
+ * nr_free_pages() is very expensive on large systems,
+ * only call if we're about to fail.
+ */
+ n = nr_free_pages();
+ if (!cap_sys_admin)
+ n -= n / 32;
+ free += n;
+
+ if (free > pages)
+ return 0;
+ vm_unacct_memory(pages);
+ return -ENOMEM;
+ }
+
+ allowed = (totalram_pages - hugetlb_total_pages())
+ * sysctl_overcommit_ratio / 100;
+ /*
+ * Leave the last 3% for root
+ */
+ if (!cap_sys_admin)
+ allowed -= allowed / 32;
+ allowed += total_swap_pages;
+
+ /* Don't let a single process grow too big:
+ leave 3% of the size of this process for other processes */
+ allowed -= current->mm->total_vm / 32;
+
+ if (atomic_read(&vm_committed_space) < allowed)
+ return 0;
+
+ vm_unacct_memory(pages);
+
+ return -ENOMEM;
+}
+
EXPORT_SYMBOL(sysctl_overcommit_memory);
EXPORT_SYMBOL(sysctl_overcommit_ratio);
EXPORT_SYMBOL(sysctl_max_map_count);
EXPORT_SYMBOL(vm_committed_space);
+EXPORT_SYMBOL(__vm_enough_memory);

/*
* Requires inode->i_mapping->i_mmap_lock
Index: linux-2.6.10-bk8/security/commoncap.c
===================================================================
--- linux-2.6.10-bk8.orig/security/commoncap.c 2005-01-05 08:59:55.000000000 -0600
+++ linux-2.6.10-bk8/security/commoncap.c 2005-01-05 10:43:59.000000000 -0600
@@ -316,86 +316,13 @@ int cap_syslog (int type)
return 0;
}

-/*
- * Check that a process has enough memory to allocate a new virtual
- * mapping. 0 means there is enough memory for the allocation to
- * succeed and -ENOMEM implies there is not.
- *
- * We currently support three overcommit policies, which are set via the
- * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
- *
- * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
- * Additional code 2002 Jul 20 by Robert Love.
- */
int cap_vm_enough_memory(long pages)
{
- unsigned long free, allowed;
+ int cap_sys_admin = 0;

- vm_acct_memory(pages);
-
- /*
- * Sometimes we want to use more memory than we have
- */
- if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
- return 0;
-
- if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
- unsigned long n;
-
- free = get_page_cache_size();
- free += nr_swap_pages;
-
- /*
- * Any slabs which are created with the
- * SLAB_RECLAIM_ACCOUNT flag claim to have contents
- * which are reclaimable, under pressure. The dentry
- * cache and most inode caches should fall into this
- */
- free += atomic_read(&slab_reclaim_pages);
-
- /*
- * Leave the last 3% for root
- */
- if (!capable(CAP_SYS_ADMIN))
- free -= free / 32;
-
- if (free > pages)
- return 0;
-
- /*
- * nr_free_pages() is very expensive on large systems,
- * only call if we're about to fail.
- */
- n = nr_free_pages();
- if (!capable(CAP_SYS_ADMIN))
- n -= n / 32;
- free += n;
-
- if (free > pages)
- return 0;
- vm_unacct_memory(pages);
- return -ENOMEM;
- }
-
- allowed = (totalram_pages - hugetlb_total_pages())
- * sysctl_overcommit_ratio / 100;
- /*
- * Leave the last 3% for root
- */
- if (!capable(CAP_SYS_ADMIN))
- allowed -= allowed / 32;
- allowed += total_swap_pages;
-
- /* Don't let a single process grow too big:
- leave 3% of the size of this process for other processes */
- allowed -= current->mm->total_vm / 32;
-
- if (atomic_read(&vm_committed_space) < allowed)
- return 0;
-
- vm_unacct_memory(pages);
-
- return -ENOMEM;
+ if (cap_capable(current, CAP_SYS_ADMIN) == 0)
+ cap_sys_admin = 1;
+ return __vm_enough_memory(pages, cap_sys_admin);
}

EXPORT_SYMBOL(cap_capable);
Index: linux-2.6.10-bk8/security/dummy.c
===================================================================
--- linux-2.6.10-bk8.orig/security/dummy.c 2005-01-05 10:39:16.000000000 -0600
+++ linux-2.6.10-bk8/security/dummy.c 2005-01-05 10:51:50.000000000 -0600
@@ -108,69 +108,13 @@ static int dummy_settime(struct timespec
return 0;
}

-/*
- * Check that a process has enough memory to allocate a new virtual
- * mapping. 0 means there is enough memory for the allocation to
- * succeed and -ENOMEM implies there is not.
- *
- * We currently support three overcommit policies, which are set via the
- * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
- */
static int dummy_vm_enough_memory(long pages)
{
- unsigned long free, allowed;
+ int cap_sys_admin = 0;

- vm_acct_memory(pages);
-
- /*
- * Sometimes we want to use more memory than we have
- */
- if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
- return 0;
-
- if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
- free = get_page_cache_size();
- free += nr_free_pages();
- free += nr_swap_pages;
-
- /*
- * Any slabs which are created with the
- * SLAB_RECLAIM_ACCOUNT flag claim to have contents
- * which are reclaimable, under pressure. The dentry
- * cache and most inode caches should fall into this
- */
- free += atomic_read(&slab_reclaim_pages);
-
- /*
- * Leave the last 3% for root
- */
- if (current->euid)
- free -= free / 32;
-
- if (free > pages)
- return 0;
- vm_unacct_memory(pages);
- return -ENOMEM;
- }
-
- allowed = (totalram_pages - hugetlb_total_pages())
- * sysctl_overcommit_ratio / 100;
- allowed += total_swap_pages;
-
- /* Leave the last 3% for root */
- if (current->euid)
- allowed -= allowed / 32;
-
- /* Don't let a single process grow too big:
- leave 3% of the size of this process for other processes */
- allowed -= current->mm->total_vm / 32;
-
- if (atomic_read(&vm_committed_space) < allowed)
- return 0;
-
- vm_unacct_memory(pages);
-
- return -ENOMEM;
+ if (dummy_capable(current, CAP_SYS_ADMIN) == 0)
+ cap_sys_admin = 1;
+ return __vm_enough_memory(pages, cap_sys_admin);
}

static int dummy_bprm_alloc_security (struct linux_binprm *bprm)
Index: linux-2.6.10-bk8/security/selinux/hooks.c
===================================================================
--- linux-2.6.10-bk8.orig/security/selinux/hooks.c 2005-01-05 10:39:16.000000000 -0600
+++ linux-2.6.10-bk8/security/selinux/hooks.c 2005-01-05 10:47:04.000000000 -0600
@@ -1515,69 +1515,29 @@ static int selinux_syslog(int type)
* mapping. 0 means there is enough memory for the allocation to
* succeed and -ENOMEM implies there is not.
*
- * We currently support three overcommit policies, which are set via the
- * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting
+ * Note that secondary_ops->capable and task_has_perm_noaudit return 0
+ * if the capability is granted, but __vm_enough_memory requires 1 if
+ * the capability is granted.
*
- * Strict overcommit modes added 2002 Feb 26 by Alan Cox.
- * Additional code 2002 Jul 20 by Robert Love.
+ * Do not audit the selinux permission check, as this is applied to all
+ * processes that allocate mappings.
*/
static int selinux_vm_enough_memory(long pages)
{
- unsigned long free, allowed;
- int rc;
+ int rc, cap_sys_admin = 0;
struct task_security_struct *tsec = current->security;

- vm_acct_memory(pages);
-
- /*
- * Sometimes we want to use more memory than we have
- */
- if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS)
- return 0;
-
- if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
- free = get_page_cache_size();
- free += nr_free_pages();
- free += nr_swap_pages;
-
- /*
- * Any slabs which are created with the
- * SLAB_RECLAIM_ACCOUNT flag claim to have contents
- * which are reclaimable, under pressure. The dentry
- * cache and most inode caches should fall into this
- */
- free += atomic_read(&slab_reclaim_pages);
-
- /*
- * Leave the last 3% for privileged processes.
- * Don't audit the check, as it is applied to all processes
- * that allocate mappings.
- */
- rc = secondary_ops->capable(current, CAP_SYS_ADMIN);
- if (!rc) {
- rc = avc_has_perm_noaudit(tsec->sid, tsec->sid,
- SECCLASS_CAPABILITY,
- CAP_TO_MASK(CAP_SYS_ADMIN), NULL);
- }
- if (rc)
- free -= free / 32;
-
- if (free > pages)
- return 0;
- vm_unacct_memory(pages);
- return -ENOMEM;
- }
-
- allowed = (totalram_pages - hugetlb_total_pages())
- * sysctl_overcommit_ratio / 100;
- allowed += total_swap_pages;
-
- if (atomic_read(&vm_committed_space) < allowed)
- return 0;
+ rc = secondary_ops->capable(current, CAP_SYS_ADMIN);
+ if (rc == 0)
+ rc = avc_has_perm_noaudit(tsec->sid, tsec->sid,
+ SECCLASS_CAPABILITY,
+ CAP_TO_MASK(CAP_SYS_ADMIN),
+ NULL);

- vm_unacct_memory(pages);
+ if (rc == 0)
+ cap_sys_admin = 1;

- return -ENOMEM;
+ return __vm_enough_memory(pages, cap_sys_admin);
}

/* binprm security operations */

2005-01-05 19:26:00

by Chris Wright

[permalink] [raw]
Subject: Re: [RFC] [PATCH] merge *_vm_enough_memory()s into a common helper

* Serge E. Hallyn ([email protected]) wrote:
> Attached is a new version of the patch introducing the __vm_enough_memory
> helper, which takes into account yesterday's suggestions. The selinux/hooks.c
> typo was definately a dangerous bug, and __vm_enough_memory() has been moved
> to vm_enough_memory()'s original location in mm/mmap.c.

Looks alright.

> Signed-off-by: Serge Hallyn <[email protected]>

Signed-off-by: Chris Wright <[email protected]>

thanks,
-chris
--
Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net