2004-10-06 19:14:21

by Serge E. Hallyn

[permalink] [raw]
Subject: (patch 1/3) lsm: add control over /proc/<pid> visibility


Attached is a patch which introduces a new LSM hook,
security_task_lookup. This hook allows an LSM to mediate visibility of
/proc/<pid> on a per-pid level. The bsdjail lsm which will be sent
next is a user of this hook.

Please apply.

Signed-off-by: Serge E. Hallyn <[email protected]>

diff -Nrup linux-2.6.9-rc3-bk6/fs/proc/base.c
linux-2.6.9-rc3-bk6-jail/fs/proc/base.c
--- linux-2.6.9-rc3-bk6/fs/proc/base.c 2004-10-06 10:07:55.000000000
-0500
+++ linux-2.6.9-rc3-bk6-jail/fs/proc/base.c 2004-10-06
10:51:04.000000000 -0500
@@ -1683,6 +1683,8 @@ static int get_tgid_list(int index, unsi
int tgid = p->pid;
if (!pid_alive(p))
continue;
+ if (security_task_lookup(p))
+ continue;
if (--index >= 0)
continue;
tgids[nr_tgids] = tgid;
diff -Nrup linux-2.6.9-rc3-bk6/include/linux/security.h
linux-2.6.9-rc3-bk6-jail/include/linux/security.h
--- linux-2.6.9-rc3-bk6/include/linux/security.h 2004-08-14
00:37:30.000000000 -0500
+++ linux-2.6.9-rc3-bk6-jail/include/linux/security.h 2004-10-06
10:51:04.000000000 -0500
@@ -627,6 +627,11 @@ struct swap_info_struct;
* Set the security attributes in @p->security for a kernel thread
that
* is being reparented to the init task.
* @p contains the task_struct for the kernel thread.
+ * @task_lookup:
+ * Check permission to see the /proc/<pid> entry for process @p.
+ * @p contains the task_struct for task <pid> which is being looked
+ * up under /proc
+ * return 0 if permission is granted.
* @task_to_inode:
* Set the security attributes for an inode based on an associated
task's
* security attributes, e.g. for /proc/pid inodes.
@@ -1152,6 +1157,7 @@ struct security_operations {
unsigned long arg3, unsigned long arg4,
unsigned long arg5);
void (*task_reparent_to_init) (struct task_struct * p);
+ int (*task_lookup)(struct task_struct *p);
void (*task_to_inode)(struct task_struct *p, struct inode *inode);

int (*ipc_permission) (struct kern_ipc_perm * ipcp, short flag);
@@ -1751,6 +1757,11 @@ static inline void security_task_reparen
security_ops->task_reparent_to_init (p);
}

+static inline int security_task_lookup(struct task_struct *p)
+{
+ return security_ops->task_lookup(p);
+}
+
static inline void security_task_to_inode(struct task_struct *p, struct
inode *inode)
{
security_ops->task_to_inode(p, inode);
@@ -2386,6 +2397,11 @@ static inline void security_task_reparen
cap_task_reparent_to_init (p);
}

+static inline int security_task_lookup(struct task_struct *p)
+{
+ return 0;
+}
+
static inline void security_task_to_inode(struct task_struct *p, struct
inode *inode)
{ }

diff -Nrup linux-2.6.9-rc3-bk6/security/dummy.c
linux-2.6.9-rc3-bk6-jail/security/dummy.c
--- linux-2.6.9-rc3-bk6/security/dummy.c 2004-10-06 10:11:29.000000000
-0500
+++ linux-2.6.9-rc3-bk6-jail/security/dummy.c 2004-10-06
10:51:04.000000000 -0500
@@ -616,6 +616,11 @@ static void dummy_task_reparent_to_init
return;
}

+static int dummy_task_lookup(struct task_struct *p)
+{
+ return 0;
+}
+
static void dummy_task_to_inode(struct task_struct *p, struct inode
*inode)
{ }

@@ -978,6 +983,7 @@ void security_fixup_ops (struct security
set_to_dummy_if_null(ops, task_kill);
set_to_dummy_if_null(ops, task_prctl);
set_to_dummy_if_null(ops, task_reparent_to_init);
+ set_to_dummy_if_null(ops, task_lookup);
set_to_dummy_if_null(ops, task_to_inode);
set_to_dummy_if_null(ops, ipc_permission);
set_to_dummy_if_null(ops, msg_msg_alloc_security);



2004-10-06 19:18:28

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [patch 1/3] lsm: add bsdjail module


Attached is a patch against the security Kconfig and Makefile to support
bsdjail, as well as the bsdjail.c file itself. bsdjail offers
functionality similar to (but more limited than) the vserver patch.

A process in a jail lives under a chroot which is not vulnerable to the
well-known chdir(...)(etc)chroot(.) attack against normal chroots, and
may be locked to one ip address. For additional features, please see
Documentation/bsdjail.txt, which is included in the next patch.

Changelog:
Sep 10, 2004: original version
Sep 12, 2004: add ipv6 support
Sep 13, 2004: support simultaneous ipv4+ipv6
Oct 6, 2004: move kref release function to kref_put from kref_init

Please apply.

Signed-off-by: Serge E. Hallyn <[email protected]>

diff -Nrup linux-2.6.9-rc3-bk6/security/bsdjail.c linux-2.6.9-rc3-bk6-jail/security/bsdjail.c
--- linux-2.6.9-rc3-bk6/security/bsdjail.c 1969-12-31 18:00:00.000000000 -0600
+++ linux-2.6.9-rc3-bk6-jail/security/bsdjail.c 2004-10-06 12:20:54.000000000 -0500
@@ -0,0 +1,1525 @@
+/*
+ * File: linux/security/bsdjail.c
+ * Author: Serge Hallyn ([email protected])
+ * Date: Sep 12, 2004
+ *
+ * (See Documentation/bsdjail.txt for more information)
+ *
+ * Copyright (C) 2004 International Business Machines <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/security.h>
+#include <linux/namei.h>
+#include <linux/namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/pagemap.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
+#include <linux/mount.h>
+#include <asm/uaccess.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/seq_file.h>
+#include <linux/un.h>
+#include <linux/smp_lock.h>
+#include <linux/kref.h>
+
+static int jail_debug = 0;
+MODULE_PARM(jail_debug, "i");
+MODULE_PARM_DESC(jail_debug, "Print bsd jail debugging messages.\n");
+
+#define DBG 0
+#define WARN 1
+#define bsdj_debug(how, fmt, arg... ) \
+ do { \
+ if ( how || jail_debug ) \
+ printk(KERN_NOTICE "%s: %s: " fmt, \
+ MY_NAME, __FUNCTION__, \
+ ## arg ); \
+ } while ( 0 )
+
+#define MY_NAME "bsdjail"
+
+/* flag to keep track of how we were registered */
+static int secondary = 0;
+
+/*
+ * The task structure holding jail information.
+ * Taskp->security points to one of these (or is null).
+ * There is exactly one jail_struct for each jail. If >1 process
+ * are in the same jail, they share the same jail_struct.
+ */
+struct jail_struct {
+ struct kref kref;
+
+ /* these are set on writes to /proc/<pid>/attr/exec */
+ char *root_pathname; /* char * containing path to use as jail / */
+ char *ip4_addr_name; /* char * containing ip4 addr to use for jail */
+ char *ip6_addr_name; /* char * containing ip6 addr to use for jail */
+
+ /* these are set when a jail becomes active */
+ __u32 addr4; /* internal form of ip4_addr_name */
+ struct in6_addr addr6; /* internal form of ip6_addr_name */
+
+ struct dentry *dentry; /* dentry of fs root */
+ struct vfsmount *mnt; /* vfsmnt of fs root */
+
+ /* Resource limits. 0 = no limit */
+ int max_nrtask; /* maximum number of tasks within this jail. */
+ int cur_nrtask; /* current number of tasks within this jail. */
+ long maxtimeslice; /* max timeslice in ms for procs in this jail */
+ long nice; /* nice level for processes in this jail */
+ long max_data, max_memlock; /* equivalent to RLIMIT_{DATA,MEMLOCK} */
+/* values for the jail_flags field */
+#define IN_USE 1 /* if 0, task is setting up jail, not yet in it */
+#define GOT_IPV4 2
+#define GOT_IPV6 4 /* if 0, ipv4, else ipv6 */
+ char jail_flags;
+};
+
+#define in_use(x) (x->jail_flags & IN_USE)
+#define set_in_use(x) (x->jail_flags |= IN_USE)
+
+#define got_network(x) (x->jail_flags & (GOT_IPV4 | GOT_IPV6))
+#define got_ipv4(x) (x->jail_flags & (GOT_IPV4))
+#define got_ipv6(x) (x->jail_flags & (GOT_IPV6))
+#define set_ipv4(x) (x->jail_flags |= GOT_IPV4)
+#define set_ipv6(x) (x->jail_flags |= GOT_IPV6)
+#define unset_got_ipv4(x) (x->jail_flags &= ~GOT_IPV4)
+#define unset_got_ipv6(x) (x->jail_flags &= ~GOT_IPV6)
+
+/*
+ * structs, defines, and functions to cope with stacking
+ */
+
+#define get_task_security(task) (task->security)
+#define get_inode_security(inode) (inode->i_security)
+#define get_sock_security(sock) (sock->sk_security)
+#define get_file_security(file) (file->f_security)
+#define get_ipc_security(ipc) (ipc->security)
+
+#define jail_of(proc) (get_task_security(proc))
+
+/*
+ * disable_jail: A jail which was in use, but has no references
+ * left, is disabled - we free up the mountpoint and dentry, and
+ * give up our reference on the module.
+ *
+ * don't need to put namespace, it will be done automatically
+ * when the last process in jail is put.
+ * DO need to put the dentry and vfsmount
+ */
+static void
+disable_jail(struct jail_struct *tsec)
+{
+ dput(tsec->dentry);
+ mntput(tsec->mnt);
+ module_put(THIS_MODULE);
+}
+
+
+static void free_jail(struct jail_struct *tsec)
+{
+ if (!tsec)
+ return;
+
+ if (tsec->root_pathname)
+ kfree(tsec->root_pathname);
+ if (tsec->ip4_addr_name)
+ kfree(tsec->ip4_addr_name);
+ if (tsec->ip6_addr_name)
+ kfree(tsec->ip6_addr_name);
+ kfree(tsec);
+}
+
+/* release_jail:
+ * Callback for kref_put to use for releasing a jail when its
+ * last user exits.
+ */
+static void release_jail(struct kref *kref)
+{
+ struct jail_struct *tsec;
+
+ tsec = container_of(kref,struct jail_struct,kref);
+ disable_jail(tsec);
+ free_jail(tsec);
+}
+
+#define set_task_security(task,data) task->security = data
+#define set_inode_security(inode,data) inode->i_security = data
+#define set_sock_security(sock,data) sock->sk_security = data
+#define set_file_security(file,data) file->f_security = data
+#define set_ipc_security(ipc,data) ipc.security = data
+
+/*
+ * jail_task_free_security: this is the callback hooked into LSM.
+ * If there was no task->security field for bsdjail, do nothing.
+ * If there was, but it was never put into use, free the jail.
+ * If there was, and the jail is in use, then decrement the usage
+ * count, and disable and free the jail if the usage count hits 0.
+ */
+static void jail_task_free_security(struct task_struct *task)
+{
+ struct jail_struct *tsec;
+
+ tsec = get_task_security(task);
+
+ if (!tsec)
+ return;
+
+ if (!in_use(tsec)) {
+ /*
+ * someone did 'echo -n x > /proc/<pid>/attr/exec' but
+ * then forked before execing. Nuke the old info.
+ */
+ free_jail(tsec);
+ set_task_security(task,NULL);
+ return;
+ }
+ tsec->cur_nrtask--;
+ /* If this was the last process in the jail, delete the jail */
+ kref_put(&tsec->kref, release_jail);
+}
+
+static struct jail_struct *
+alloc_task_security(struct task_struct *tsk)
+{
+ struct jail_struct *tsec;
+ tsec = kmalloc(sizeof(struct jail_struct), GFP_KERNEL);
+ if (!tsec)
+ return ERR_PTR(-ENOMEM);
+ memset(tsec, 0, sizeof(struct jail_struct));
+ set_task_security(tsk, tsec);
+ return tsec;
+}
+
+static inline int
+in_jail(struct task_struct *t)
+{
+ struct jail_struct *tsec = jail_of(t);
+
+ if (tsec && in_use(tsec))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * If a network address was passed into /proc/<pid>/attr/exec,
+ * then process in its jail will only be allowed to bind/listen
+ * to that address.
+ */
+static void
+setup_netaddress(struct jail_struct *tsec)
+{
+ unsigned int a,b,c,d, i;
+ unsigned int x[8];
+
+ unset_got_ipv4(tsec);
+ tsec->addr4 = 0;
+ unset_got_ipv6(tsec);
+ ipv6_addr_set(&tsec->addr6, 0, 0, 0, 0);
+
+ if (tsec->ip4_addr_name) {
+ if (sscanf(tsec->ip4_addr_name,"%u.%u.%u.%u",&a,&b,&c,&d)!=4)
+ return;
+ if (a>255 || b>255 || c>255 || d>255)
+ return;
+ tsec->addr4 = htonl((a<<24)|(b<<16)|(c<<8)|d);
+ set_ipv4(tsec);
+ bsdj_debug(DBG, "Network (ipv4) set up (%s)\n",
+ tsec->ip4_addr_name);
+ }
+
+ if (tsec->ip6_addr_name) {
+ if (sscanf(tsec->ip6_addr_name,"%x:%x:%x:%x:%x:%x:%x:%x",
+ &x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6],
+ &x[7]) != 8) {
+ printk(KERN_INFO "%s: bad ipv6 addr %s\n", __FUNCTION__,
+ tsec->ip6_addr_name);
+ return;
+ }
+ for (i=0; i<8; i++) {
+ if (x[i] > 65535) {
+ printk("%s: %x > 65535 at %d\n", __FUNCTION__, x[i], i);
+ return;
+ }
+ tsec->addr6.in6_u.u6_addr16[i] = htons(x[i]);
+ }
+ set_ipv6(tsec);
+ bsdj_debug(DBG, "Network (ipv6) set up (%s)\n",
+ tsec->ip6_addr_name);
+ }
+}
+
+/*
+ * enable_jail:
+ * Called when a process is placed into a new jail to handle the
+ * actual creation of the jail.
+ * Creates namespace
+ * Sets process root+pwd
+ * Stores the requested ip address
+ * Registers a unique pseudo-proc filesystem for this jail
+ */
+static int enable_jail(struct task_struct *tsk)
+{
+ struct nameidata nd;
+ struct jail_struct *tsec;
+ int retval = -EFAULT;
+
+ tsec = jail_of(tsk);
+ if (!tsec || !tsec->root_pathname)
+ goto out;
+
+ /*
+ * USE_JAIL_NAMESPACE: could be useful, so that future mounts outside
+ * the jail don't affect the jail. But it's not necessary, and
+ * requires exporting copy_namespace from fs/namespace.c
+ *
+ * Actually, it would also be useful for truly hiding
+ * information about mounts which do not exist in this jail.
+#define USE_JAIL_NAMESPACE
+ */
+#ifdef USE_JAIL_NAMESPACE
+ bsdj_debug(DBG, "bsdjail: copying namespace.\n");
+ retval = -EPERM;
+ if (copy_namespace(CLONE_NEWNS, tsk))
+ goto out;
+ bsdj_debug(DBG, "bsdjail: copied namespace.\n");
+#endif
+
+ /* find our new root directory */
+ bsdj_debug(DBG, "bsdjail: looking up %s\n", tsec->root_pathname);
+ retval = path_lookup(tsec->root_pathname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd);
+ if (retval)
+ goto out;
+
+ bsdj_debug(DBG, "bsdjail: got %s, setting root to it\n", tsec->root_pathname);
+
+ /* and set the fsroot to it */
+ set_fs_root(tsk->fs, nd.mnt, nd.dentry);
+ set_fs_pwd(tsk->fs, nd.mnt, nd.dentry);
+
+ bsdj_debug(DBG, "bsdjail: root has been set. Have fun.\n");
+
+ /* set up networking */
+ if (tsec->ip4_addr_name || tsec->ip6_addr_name)
+ setup_netaddress(tsec);
+
+ tsec->cur_nrtask = 1;
+ if (tsec->nice)
+ set_user_nice(current, tsec->nice);
+ if (tsec->max_data) {
+ current->rlim[RLIMIT_DATA].rlim_cur = tsec->max_data;
+ current->rlim[RLIMIT_DATA].rlim_max = tsec->max_data;
+ }
+ if (tsec->max_memlock) {
+ current->rlim[RLIMIT_MEMLOCK].rlim_cur = tsec->max_memlock;
+ current->rlim[RLIMIT_MEMLOCK].rlim_max = tsec->max_memlock;
+ }
+ if (tsec->maxtimeslice) {
+ current->rlim[RLIMIT_CPU].rlim_cur = tsec->maxtimeslice;
+ current->rlim[RLIMIT_CPU].rlim_max = tsec->maxtimeslice;
+ }
+ /* success and end */
+ tsec->mnt = mntget(nd.mnt);
+ tsec->dentry = dget(nd.dentry);
+ path_release(&nd);
+ kref_init(&tsec->kref);
+ set_in_use(tsec);
+
+ /* won't let ourselves be removed until this jail goes away */
+ try_module_get(THIS_MODULE);
+
+ return 0;
+
+out:
+ return retval;
+}
+
+/*
+ * LSM /proc/<pid>/attr hooks.
+ * You may write into /proc/<pid>/attr/exec:
+ * root /some/path
+ * ip 2.2.2.2
+ * These values will be used on the next exec() to set up your jail
+ * (assuming you're not already in a jail)
+ */
+static int
+jail_setprocattr(struct task_struct *p, char *name, void *value, size_t size)
+{
+ struct jail_struct *tsec = jail_of(current);
+ long val;
+ int start, len;
+
+ if (tsec && in_use(tsec))
+ return -EINVAL; /* let them guess why */
+
+ if (p != current || strcmp(name, "exec"))
+ return -EPERM;
+
+ if (strncmp(value, "root ", 5)==0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ if (tsec->root_pathname)
+ kfree(tsec->root_pathname);
+ start = 5;
+ len = size-start;
+ tsec->root_pathname = kmalloc(len+1, GFP_KERNEL);
+ if (!tsec->root_pathname)
+ return -ENOMEM;
+ strlcpy(tsec->root_pathname, value+start, len+1);
+ } else if (strncmp(value, "ip ", 3)==0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ if (tsec->ip4_addr_name)
+ kfree(tsec->ip4_addr_name);
+ start = 3;
+ len = size-start;
+ tsec->ip4_addr_name = kmalloc(len+1, GFP_KERNEL);
+ if (!tsec->ip4_addr_name)
+ return -ENOMEM;
+ strlcpy(tsec->ip4_addr_name, value+start, len+1);
+ } else if (strncmp(value, "ip6 ", 4) == 0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ if (tsec->ip6_addr_name)
+ kfree(tsec->ip6_addr_name);
+ start = 4;
+ len = size-start;
+ tsec->ip6_addr_name = kmalloc(len+1, GFP_KERNEL);
+ if (!tsec->ip6_addr_name)
+ return -ENOMEM;
+ strlcpy(tsec->ip6_addr_name, value+start, len+1);
+
+ /* the next two are equivalent */
+ } else if (strncmp(value, "slice ", 6)==0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ val = simple_strtoul(value+6, NULL, 0);
+ tsec->maxtimeslice = val;
+ } else if (strncmp(value, "timeslice ", 10)==0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ val = simple_strtoul(value+10, NULL, 0);
+ tsec->maxtimeslice = val;
+ } else if (strncmp(value, "nrtask ", 7)==0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ val = (int) simple_strtol(value+7, NULL, 0);
+ if (val < 1)
+ return -EINVAL;
+ tsec->max_nrtask = val;
+ } else if (strncmp(value, "memlock ", 8)==0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ val = simple_strtoul(value+8, NULL, 0);
+ tsec->max_memlock = val;
+ } else if (strncmp(value, "data ", 5)==0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ val = simple_strtoul(value+5, NULL, 0);
+ tsec->max_data = val;
+ } else if (strncmp(value, "nice ", 5)==0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ val = simple_strtoul(value+5, NULL, 0);
+ tsec->nice = val;
+ } else
+ return -EINVAL;
+
+ return size;
+}
+
+static int print_jail_net_info(struct jail_struct *j, char *buf, int maxcnt)
+{
+ int len = 0;
+
+ if (j->ip4_addr_name)
+ len += snprintf(buf, maxcnt, "%s\n", j->ip4_addr_name);
+ if (j->ip6_addr_name)
+ len += snprintf(buf, maxcnt-len, "%s\n", j->ip6_addr_name);
+
+ return snprintf(buf, maxcnt, "No network information\n");
+}
+
+/*
+ * LSM /proc/<pid>/attr read hook.
+ *
+ * /proc/$$/attr/current output:
+ * If the reading process, say process 1001, is in a jail, then
+ * cat /proc/999/attr/current
+ * will print networking information.
+ * If the reading process, say process 1001, is not in a jail, then
+ * cat /proc/999/attr/current
+ * will return
+ * root: (root of jail)
+ * ip: (ip address of jail)
+ * if 999 is in a jail, or
+ * -EINVAL
+ * if 999 is not in a jail.
+ *
+ * /proc/$$/attr/exec output:
+ * A process in a jail gets -EINVAL for /proc/$$/attr/exec.
+ * A process not in a jail gets hints on starting a jail.
+ */
+static int
+jail_getprocattr(struct task_struct *p, char *name, void *value, size_t size)
+{
+ struct jail_struct *tsec;
+ int err = 0;
+
+ if (in_jail(current)) {
+ if (strcmp(name, "current")==0) {
+ /* provide network info */
+ err = print_jail_net_info(jail_of(current), value,
+ size);
+ return err;
+ }
+ return -EINVAL; /* let them guess why */
+ }
+
+ if (strcmp(name, "exec") == 0) {
+ /* Print usage some help */
+ err = snprintf(value, size,
+ "Valid keywords:\n"
+ "root <pathname>\n"
+ "ip <ip4-addr>\n"
+ "ip6 <ip6-addr>\n"
+ "nrtask <max number of tasks in this jail>\n"
+ "nice <nice level for processes in this jail>\n"
+ "slice <max timeslice per process in msecs>\n"
+ "data <max data size per process in bytes>\n"
+ "memlock <max lockable memory per process in bytes>\n");
+ return err;
+ }
+
+ if (strcmp(name, "current"))
+ return -EPERM;
+
+ tsec = jail_of(p);
+ if (!tsec || !in_use(tsec)) {
+ err = snprintf(value, size, "Not Jailed\n");
+ } else {
+ err = snprintf(value, size,
+ "Root: %s\nIPv4: %s\nIPv6: %s\n"
+ "max_nrtask %d current nrtask %d max_timeslice %lu "
+ "nice %lu\n"
+ "max_memlock %lu max_data %lu\n",
+ tsec->root_pathname,
+ tsec->ip4_addr_name ? tsec->ip4_addr_name : "(none)",
+ tsec->ip6_addr_name ? tsec->ip6_addr_name : "(none)",
+ tsec->max_nrtask, tsec->cur_nrtask, tsec->maxtimeslice,
+ tsec->nice, tsec->max_data, tsec->max_memlock);
+ }
+
+ return err;
+}
+
+/*
+ * Forbid a process in a jail from sending a signal to a process in another
+ * (or no) jail through file sigio.
+ *
+ * We consider the process which set the fowner to be the one sending the
+ * signal, rather than the one writing to the file. Therefore we store the
+ * jail of a process during jail_file_set_fowner, then check that against
+ * the jail of the process receiving the signal.
+ */
+static int
+jail_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown,
+ int fd, int reason)
+{
+ struct file *file;
+ struct jail_struct *tsec, *fsec;
+
+ if (!in_jail(current))
+ return 0;
+
+ file = (struct file *)((long)fown - offsetof(struct file,f_owner));
+ tsec = jail_of(tsk);
+ fsec = get_file_security(file);
+
+ if (fsec != tsec)
+ return -EPERM;
+
+ return 0;
+}
+
+static int
+jail_file_set_fowner(struct file *file)
+{
+ struct jail_struct *tsec;
+
+ tsec = jail_of(current);
+ set_file_security(file, tsec);
+ if (tsec)
+ kref_get(&tsec->kref);
+
+ return 0;
+}
+
+static void free_ipc_security(struct kern_ipc_perm *ipc)
+{
+ struct jail_struct *tsec;
+
+ tsec = get_ipc_security(ipc);
+ if (!tsec)
+ return;
+ kref_put(&tsec->kref, release_jail);
+ set_ipc_security((*ipc), NULL);
+}
+
+static void free_file_security(struct file *file)
+{
+ struct jail_struct *tsec;
+
+ tsec = get_file_security(file);
+ if (!tsec)
+ return;
+ kref_put(&tsec->kref, release_jail);
+ set_file_security(file, NULL);
+}
+
+static void free_inode_security(struct inode *inode)
+{
+ struct jail_struct *tsec;
+
+ tsec = get_inode_security(inode);
+ if (!tsec)
+ return;
+ kref_put(&tsec->kref, release_jail);
+ set_inode_security(inode, NULL);
+}
+
+/*
+ * LSM ptrace hook:
+ * process in jail may not ptrace process not in the same jail
+ */
+static int
+jail_ptrace (struct task_struct *tracer, struct task_struct *tracee)
+{
+ struct jail_struct *tsec = jail_of(tracer);
+
+ if (tsec && in_use(tsec)) {
+ if (tsec == jail_of(tracee))
+ return 0;
+ return -EPERM;
+ }
+ return 0;
+}
+
+/*
+ * process in jail may only use one (aliased) ip address. If they try to
+ * attach to 127.0.0.1, that is remapped to their own address. If some
+ * other address (and not their own), deny permission
+ */
+static int jail_socket_unix_bind(struct socket *sock, struct sockaddr *address,
+ int addrlen);
+
+#define loopbackaddr htonl((127 << 24) | 1)
+
+static inline int jail_inet4_bind(struct socket *sock, struct sockaddr *address,
+ int addrlen, struct jail_struct *tsec)
+{
+ struct sockaddr_in *inaddr;
+ __u32 sin_addr, jailaddr;
+
+ if (!got_ipv4(tsec))
+ return -EPERM;
+
+ inaddr = (struct sockaddr_in *)address;
+ sin_addr = inaddr->sin_addr.s_addr;
+ jailaddr = tsec->addr4;
+
+ if (sin_addr == jailaddr)
+ return 0;
+
+ if (sin_addr == loopbackaddr || !sin_addr) {
+ bsdj_debug(DBG, "Got a loopback or 0 address\n");
+ sin_addr = jailaddr;
+ bsdj_debug(DBG, "Converted to: %u.%u.%u.%u\n",
+ NIPQUAD(sin_addr));
+ return 0;
+ }
+
+ return -EPERM;
+}
+
+static inline int
+jail_inet6_bind(struct socket *sock, struct sockaddr *address, int addrlen,
+ struct jail_struct *tsec)
+{
+ struct sockaddr_in6 *inaddr6;
+ struct in6_addr *sin6_addr, *jailaddr;
+
+ if (!got_ipv6(tsec))
+ return -EPERM;
+
+ inaddr6 = (struct sockaddr_in6 *)address;
+ sin6_addr = &inaddr6->sin6_addr;
+ jailaddr = &tsec->addr6;
+
+ if (ipv6_addr_cmp(jailaddr, sin6_addr)==0)
+ return 0;
+
+ if (ipv6_addr_cmp(sin6_addr, &in6addr_loopback)==0) {
+ ipv6_addr_copy(sin6_addr, jailaddr);
+ return 0;
+ }
+
+ printk(KERN_NOTICE "%s: DENYING\n", __FUNCTION__);
+ printk(KERN_NOTICE "%s: a %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
+ "j %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+ __FUNCTION__,
+ NIP6(*sin6_addr),
+ NIP6(*jailaddr));
+
+ return -EPERM;
+}
+
+static int
+jail_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen)
+{
+ struct jail_struct *tsec = jail_of(current);
+
+ if (!tsec || !in_use(tsec))
+ return 0;
+
+ if (sock->sk->sk_family == AF_UNIX)
+ return jail_socket_unix_bind(sock, address, addrlen);
+
+ if (!got_network(tsec))
+ /* If we want to be strict, we could just
+ * deny net access when lacking a pseudo ip.
+ * For now we just allow it. */
+ return 0;
+
+ switch(address->sa_family) {
+ case AF_INET:
+ return jail_inet4_bind(sock, address, addrlen, tsec);
+
+ case AF_INET6:
+ return jail_inet6_bind(sock, address, addrlen, tsec);
+
+ default:
+ return 0;
+ }
+}
+
+/*
+ * If locked in an ipv6 jail, don't let them use ipv4, and vice versa
+ */
+static int
+jail_socket_create(int family, int type, int protocol, int kern)
+{
+ struct jail_struct *tsec = jail_of(current);
+
+ if (!tsec || !in_use(tsec) || kern || !got_network(tsec))
+ return 0;
+
+ switch(family) {
+ case AF_INET:
+ if (got_ipv4(tsec))
+ return 0;
+ return -EPERM;
+ case AF_INET6:
+ if (got_ipv6(tsec))
+ return 0;
+ return -EPERM;
+ default:
+ return 0;
+ };
+
+ return 0;
+}
+
+static void
+jail_socket_post_create(struct socket *sock, int family, int type,
+ int protocol, int kern)
+{
+ struct inet_opt *inet;
+ struct ipv6_pinfo *inet6;
+ struct jail_struct *tsec = jail_of(current);
+
+ if (!tsec || !in_use(tsec) || kern || !got_network(tsec))
+ return;
+
+ switch(family) {
+ case AF_INET:
+ inet = inet_sk(sock->sk);
+ inet->saddr = tsec->addr4;
+ break;
+ case AF_INET6:
+ inet6 = inet6_sk(sock->sk);
+ ipv6_addr_copy(&inet6->saddr, &tsec->addr6);
+ break;
+ default:
+ break;
+ };
+
+ return;
+}
+
+static int
+jail_socket_listen(struct socket *sock, int backlog)
+{
+ struct inet_opt *inet;
+ struct ipv6_pinfo *inet6;
+ struct jail_struct *tsec = jail_of(current);
+
+ if (!tsec || !in_use(tsec) || !got_network(tsec))
+ return 0;
+
+ switch (sock->sk->sk_family) {
+ case AF_INET:
+ inet = inet_sk(sock->sk);
+ if (inet->saddr == tsec->addr4)
+ return 0;
+ return -EPERM;
+
+ case AF_INET6:
+ inet6 = inet6_sk(sock->sk);
+ if (ipv6_addr_cmp(&inet6->saddr, &tsec->addr6)==0)
+ return 0;
+ return -EPERM;
+
+ default:
+ return 0;
+
+ }
+}
+
+static void free_sock_security(struct sock *sk)
+{
+ struct jail_struct *tsec;
+
+ tsec = get_sock_security(sk);
+ if (!tsec)
+ return;
+ kref_put(&tsec->kref, release_jail);
+ set_sock_security(sk, NULL);
+}
+
+/*
+ * The next three (socket) hooks prevent a process in a jail from sending
+ * data to a abstract unix domain socket which was bound outside the jail.
+ */
+static int
+jail_socket_unix_bind(struct socket *sock, struct sockaddr *address,
+ int addrlen)
+{
+ struct sockaddr_un *sunaddr;
+ struct jail_struct *tsec;
+
+ if (sock->sk->sk_family != AF_UNIX)
+ return 0;
+
+ sunaddr = (struct sockaddr_un *)address;
+ if (sunaddr->sun_path[0] != 0)
+ return 0;
+
+ tsec = jail_of(current);
+ set_sock_security(sock->sk, tsec);
+ if (tsec)
+ kref_get(&tsec->kref);
+ return 0;
+}
+
+/*
+ * Note - we deny sends both from unjailed to jailed, and from jailed
+ * to unjailed. As well as, of course between different jails.
+ */
+static int
+jail_socket_unix_may_send(struct socket *sock, struct socket *other)
+{
+ struct jail_struct *tsec, *ssec;
+
+ tsec = jail_of(current); /* jail of sending process */
+ ssec = get_sock_security(other->sk); /* jail of receiver */
+
+ if (tsec != ssec)
+ return -EPERM;
+
+ return 0;
+}
+
+static int
+jail_socket_unix_stream_connect(struct socket *sock,
+ struct socket *other, struct sock *newsk)
+{
+ struct jail_struct *tsec, *ssec;
+
+ tsec = jail_of(current); /* jail of sending process */
+ ssec = get_sock_security(other->sk); /* jail of receiver */
+
+ if (tsec != ssec)
+ return -EPERM;
+
+ return 0;
+}
+
+static int
+jail_mount(char * dev_name, struct nameidata *nd, char * type,
+ unsigned long flags, void * data)
+{
+ if (in_jail(current))
+ return -EPERM;
+
+ return 0;
+}
+
+static int
+jail_umount(struct vfsmount *mnt, int flags)
+{
+ if (in_jail(current))
+ return -EPERM;
+
+ return 0;
+}
+
+/*
+ * process in jail may not:
+ * use nice
+ * change network config
+ * load/unload modules
+ */
+static int
+jail_capable (struct task_struct *tsk, int cap)
+{
+ if (in_jail(tsk)) {
+ if (cap == CAP_SYS_NICE)
+ return -EPERM;
+ if (cap == CAP_NET_ADMIN)
+ return -EPERM;
+ if (cap == CAP_SYS_MODULE)
+ return -EPERM;
+ if (cap == CAP_SYS_RAWIO)
+ return -EPERM;
+ }
+
+ if (cap_is_fs_cap (cap) ? tsk->fsuid == 0 : tsk->euid == 0)
+ return 0;
+ return -EPERM;
+}
+
+/*
+ * jail_security_task_create:
+ *
+ * If the current process is ina a jail, and that jail is about to exceed a
+ * maximum number of processes, then refuse to fork. If the maximum number
+ * of jails is listed as 0, then there is no limit for this jail, and we allow
+ * all forks.
+ */
+static inline int
+jail_security_task_create (unsigned long clone_flags)
+{
+ struct jail_struct *tsec = jail_of(current);
+
+ if (!tsec || !in_use(tsec))
+ return 0;
+
+ if (tsec->max_nrtask && tsec->cur_nrtask >= tsec->max_nrtask)
+ return -EPERM;
+ return 0;
+}
+
+/*
+ * The child of a process in a jail belongs in the same jail
+ */
+static int
+jail_task_alloc_security(struct task_struct *tsk)
+{
+ struct jail_struct *tsec = jail_of(current);
+
+ if (!tsec || !in_use(tsec))
+ return 0;
+
+ set_task_security(tsk, tsec);
+ kref_get(&tsec->kref);
+ tsec->cur_nrtask++;
+ if (tsec->maxtimeslice) {
+ tsk->rlim[RLIMIT_CPU].rlim_max = tsec->maxtimeslice;
+ tsk->rlim[RLIMIT_CPU].rlim_cur = tsec->maxtimeslice;
+ }
+ if (tsec->max_data) {
+ tsk->rlim[RLIMIT_CPU].rlim_max = tsec->max_data;
+ tsk->rlim[RLIMIT_CPU].rlim_cur = tsec->max_data;
+ }
+ if (tsec->max_memlock) {
+ tsk->rlim[RLIMIT_CPU].rlim_max = tsec->max_memlock;
+ tsk->rlim[RLIMIT_CPU].rlim_cur = tsec->max_memlock;
+ }
+ if (tsec->nice)
+ set_user_nice(current, tsec->nice);
+
+ return 0;
+}
+
+static int
+jail_bprm_alloc_security(struct linux_binprm *bprm)
+{
+ struct jail_struct *tsec;
+ int ret;
+
+ tsec = jail_of(current);
+ if (!tsec)
+ return 0;
+
+ if (in_use(tsec))
+ return 0;
+
+ if (tsec->root_pathname) {
+ ret = enable_jail(current);
+ if (ret) {
+ /* if we failed, nix out the root/ip requests */
+ jail_task_free_security(current);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Process in jail may not create devices
+ * Thanks to Brad Spender for pointing out fifos should be allowed.
+ */
+/* TODO: We may want to allow /dev/log, at least... */
+static int
+jail_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+{
+ if (!in_jail(current))
+ return 0;
+
+ if (S_ISFIFO(mode))
+ return 0;
+
+ return -EPERM;
+}
+
+/* yanked from fs/proc/base.c */
+static unsigned name_to_int(struct dentry *dentry)
+{
+ const char *name = dentry->d_name.name;
+ int len = dentry->d_name.len;
+ unsigned n = 0;
+
+ if (len > 1 && *name == '0')
+ goto out;
+ while (len-- > 0) {
+ unsigned c = *name++ - '0';
+ if (c > 9)
+ goto out;
+ if (n >= (~0U-9)/10)
+ goto out;
+ n *= 10;
+ n += c;
+ }
+ return n;
+out:
+ return ~0U;
+}
+
+/*
+ * jail_proc_inode_permission:
+ * called only when current is in a jail, and is trying to reach
+ * /proc/<pid>. We check whether <pid> is in the same jail as
+ * current. If not, permission is denied.
+ *
+ * NOTE: On the one hand, the task_to_inode(inode)->i_security
+ * approach seems cleaner, but on the other, this prevents us
+ * from unloading bsdjail for awhile...
+ */
+static int
+jail_proc_inode_permission(struct inode *inode, int mask,
+ struct nameidata *nd)
+{
+ struct jail_struct *tsec = jail_of(current);
+ struct dentry *dentry = nd->dentry;
+ unsigned pid;
+
+ pid = name_to_int(dentry);
+ if (pid == ~0U) {
+ struct qstr *dname = &dentry->d_name;
+ if (strcmp(dname->name, "scsi")==0 ||
+ strcmp(dname->name, "sys")==0 ||
+ strcmp(dname->name, "ide")==0)
+ return -EPERM;
+ return 0;
+ }
+
+ if (dentry->d_parent != dentry->d_sb->s_root)
+ return 0;
+ if (get_inode_security(inode) != tsec)
+ return -ENOENT;
+
+ return 0;
+}
+
+/*
+ * Here is our attempt to prevent chroot escapes.
+ */
+static int
+is_jailroot_parent(struct dentry *candidate, struct dentry *root,
+ struct vfsmount *rootmnt)
+{
+ if (candidate == root)
+ return 0;
+
+ /* simple case: fs->root/.. == candidate */
+ if (root->d_parent == candidate)
+ return 1;
+
+ /*
+ * now more complicated: if fs->root is a mounted directory,
+ * then chdir(..) out of fs->root, at follow_dotdot, will follow
+ * the fs->root mount point. So we must check the parent dir of
+ * the fs->root mount point.
+ */
+ if (rootmnt->mnt_root == root && rootmnt->mnt_mountpoint!=root) {
+ root = rootmnt->mnt_mountpoint;
+ rootmnt = rootmnt->mnt_parent;
+ return is_jailroot_parent(candidate, root, rootmnt);
+ }
+
+ return 0;
+}
+
+/*
+ * A process in a jail may not see that /proc/<pid> exists for
+ * process not in its jail
+ * Unfortunately we can't pretend that pid for the starting process
+ * is 1, as vserver does.
+ */
+static int jail_task_lookup(struct task_struct *p)
+{
+ struct jail_struct *tsec = jail_of(current);
+
+ if (!tsec)
+ return 0;
+ if (tsec == jail_of(p))
+ return 0;
+ return -EPERM;
+}
+/*
+ * security_task_to_inode:
+ * Set inode->security = task's jail.
+ */
+static void jail_task_to_inode(struct task_struct *p, struct inode *inode)
+{
+ struct jail_struct *tsec = jail_of(p);
+
+ if (!tsec || !in_use(tsec))
+ return;
+ if (get_inode_security(inode))
+ return;
+ kref_get(&tsec->kref);
+ set_inode_security(inode, tsec);
+}
+
+/*
+ * inode_permission:
+ * If we are trying to look into certain /proc files from in a jail, we
+ * may deny permission.
+ * If we are trying to cd(..), but the cwd is the root of our jail, then
+ * permission is denied.
+ */
+static int
+jail_inode_permission(struct inode *inode, int mask,
+ struct nameidata *nd)
+{
+ struct jail_struct *tsec = jail_of(current);
+
+ if (!tsec || !in_use(tsec))
+ return 0;
+
+ if (!nd)
+ return 0;
+
+ if (nd->dentry &&
+ strcmp(nd->dentry->d_sb->s_type->name, "proc")==0) {
+ return jail_proc_inode_permission(inode, mask, nd);
+
+ }
+
+ if (!(mask&MAY_EXEC))
+ return 0;
+ if (!inode || !S_ISDIR(inode->i_mode))
+ return 0;
+
+ if (is_jailroot_parent(nd->dentry, tsec->dentry, tsec->mnt)) {
+ bsdj_debug(WARN,"Attempt to chdir(..) out of jail!\n"
+ "(%s is a subdir of %s)\n",
+ tsec->dentry->d_name.name,
+ nd->dentry->d_name.name);
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+/*
+ * A function which returns -ENOENT if dentry is the dentry for
+ * a /proc/<pid> directory. It returns 0 otherwise.
+ */
+static inline int
+generic_procpid_check(struct dentry *dentry)
+{
+ struct jail_struct *jail = jail_of(current);
+ unsigned pid = name_to_int(dentry);
+
+ if (!jail || !in_use(jail))
+ return 0;
+ if (pid == ~0U)
+ return 0;
+ if (strcmp(dentry->d_sb->s_type->name, "proc")!=0)
+ return 0;
+ if (dentry->d_parent != dentry->d_sb->s_root)
+ return 0;
+ if (get_inode_security(dentry->d_inode) != jail)
+ return -ENOENT;
+ return 0;
+}
+
+/*
+ * We want getattr to fail on /proc/<pid> to prevent leakage through, for
+ * instance, ls -d.
+ */
+static int
+jail_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+{
+ return generic_procpid_check(dentry);
+}
+
+/* This probably is not necessary - /proc does not support xattrs? */
+static int
+jail_inode_getxattr(struct dentry *dentry, char *name)
+{
+ return generic_procpid_check(dentry);
+}
+
+/* process in jail may not send signal to process not in the same jail */
+static int
+jail_task_kill(struct task_struct *p, struct siginfo *info, int sig)
+{
+ struct jail_struct *tsec = jail_of(current);
+
+ if (!tsec || !in_use(tsec))
+ return 0;
+
+ if (tsec == jail_of(p))
+ return 0;
+
+ if (sig==SIGCHLD)
+ return 0;
+
+ return -EPERM;
+}
+
+/*
+ * LSM hooks to limit jailed process' abilities to muck with resource
+ * limits
+ */
+static int jail_task_setrlimit (unsigned int resource, struct rlimit *new_rlim)
+{
+ if (!in_jail(current))
+ return 0;
+
+ return -EPERM;
+}
+
+static int jail_task_setscheduler (struct task_struct *p, int policy,
+ struct sched_param *lp)
+{
+ if (!in_jail(current))
+ return 0;
+
+ return -EPERM;
+}
+
+/*
+ * LSM hooks to limit IPC access.
+ */
+
+static inline int
+basic_ipc_security_check(struct kern_ipc_perm *p, struct task_struct *target)
+{
+ struct jail_struct *tsec = jail_of(target);
+
+ if (!tsec || !in_use(tsec))
+ return 0;
+
+ if (get_ipc_security(p) != tsec)
+ return -EPERM;
+
+ return 0;
+}
+
+static int
+jail_ipc_permission(struct kern_ipc_perm *ipcp, short flag)
+{
+ return basic_ipc_security_check(ipcp, current);
+}
+
+static int
+jail_shm_alloc_security (struct shmid_kernel *shp)
+{
+ struct jail_struct *tsec = jail_of(current);
+
+ if (!tsec || !in_use(tsec))
+ return 0;
+ set_ipc_security(shp->shm_perm, tsec);
+ kref_get(&tsec->kref);
+ return 0;
+}
+
+static void
+jail_shm_free_security (struct shmid_kernel *shp)
+{
+ free_ipc_security(&shp->shm_perm);
+}
+
+static int
+jail_shm_associate (struct shmid_kernel *shp, int shmflg)
+{
+ return basic_ipc_security_check(&shp->shm_perm, current);
+}
+
+static int
+jail_shm_shmctl(struct shmid_kernel *shp, int cmd)
+{
+ if (cmd == IPC_INFO || cmd == SHM_INFO)
+ return 0;
+
+ return basic_ipc_security_check(&shp->shm_perm, current);
+}
+
+static int
+jail_shm_shmat(struct shmid_kernel *shp, char *shmaddr, int shmflg)
+{
+ return basic_ipc_security_check(&shp->shm_perm, current);
+}
+
+static int
+jail_msg_queue_alloc(struct msg_queue *msq)
+{
+ struct jail_struct *tsec = jail_of(current);
+
+ if (!tsec || !in_use(tsec))
+ return 0;
+ set_ipc_security(msq->q_perm, tsec);
+ kref_get(&tsec->kref);
+ return 0;
+}
+
+static void
+jail_msg_queue_free(struct msg_queue *msq)
+{
+ free_ipc_security(&msq->q_perm);
+}
+
+static int jail_msg_queue_associate(struct msg_queue *msq, int flag)
+{
+ return basic_ipc_security_check(&msq->q_perm, current);
+}
+
+static int
+jail_msg_queue_msgctl(struct msg_queue *msq, int cmd)
+{
+ if (cmd == IPC_INFO || cmd == MSG_INFO)
+ return 0;
+
+ return basic_ipc_security_check(&msq->q_perm, current);
+}
+
+static int
+jail_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, int msqflg)
+{
+ return basic_ipc_security_check(&msq->q_perm, current);
+}
+
+static int
+jail_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
+ struct task_struct *target, long type, int mode)
+
+{
+ return basic_ipc_security_check(&msq->q_perm, target);
+}
+
+static int
+jail_sem_alloc_security(struct sem_array *sma)
+{
+ struct jail_struct *tsec = jail_of(current);
+
+ if (!tsec || !in_use(tsec))
+ return 0;
+ set_ipc_security(sma->sem_perm, tsec);
+ kref_get(&tsec->kref);
+ return 0;
+}
+
+static void
+jail_sem_free_security(struct sem_array *sma)
+{
+ free_ipc_security(&sma->sem_perm);
+}
+
+static int
+jail_sem_associate(struct sem_array *sma, int semflg)
+{
+ return basic_ipc_security_check(&sma->sem_perm, current);
+}
+
+static int
+jail_sem_semctl(struct sem_array *sma, int cmd)
+{
+ if (cmd == IPC_INFO || cmd == SEM_INFO)
+ return 0;
+ return basic_ipc_security_check(&sma->sem_perm, current);
+}
+
+static int
+jail_sem_semop(struct sem_array *sma, struct sembuf *sops, unsigned nsops,
+ int alter)
+{
+ return basic_ipc_security_check(&sma->sem_perm, current);
+}
+
+static struct security_operations bsdjail_security_ops = {
+ .ptrace = jail_ptrace,
+ .capable = jail_capable,
+
+ .task_kill = jail_task_kill,
+ .task_alloc_security = jail_task_alloc_security,
+ .task_free_security = jail_task_free_security,
+ .bprm_alloc_security = jail_bprm_alloc_security,
+ .task_create = jail_security_task_create,
+ .task_to_inode = jail_task_to_inode,
+ .task_lookup = jail_task_lookup,
+
+ .task_setrlimit = jail_task_setrlimit,
+ .task_setscheduler = jail_task_setscheduler,
+
+ .setprocattr = jail_setprocattr,
+ .getprocattr = jail_getprocattr,
+
+ .file_set_fowner = jail_file_set_fowner,
+ .file_send_sigiotask = jail_file_send_sigiotask,
+ .file_free_security = free_file_security,
+
+ .socket_bind = jail_socket_bind,
+ .socket_listen = jail_socket_listen,
+ .socket_create = jail_socket_create,
+ .socket_post_create = jail_socket_post_create,
+ .unix_stream_connect = jail_socket_unix_stream_connect,
+ .unix_may_send = jail_socket_unix_may_send,
+ .sk_free_security = free_sock_security,
+
+ .inode_mknod = jail_inode_mknod,
+ .inode_permission = jail_inode_permission,
+ .inode_free_security = free_inode_security,
+ .inode_getattr = jail_inode_getattr,
+ .inode_getxattr = jail_inode_getxattr,
+ .sb_mount = jail_mount,
+ .sb_umount = jail_umount,
+
+ .ipc_permission = jail_ipc_permission,
+ .shm_alloc_security = jail_shm_alloc_security,
+ .shm_free_security = jail_shm_free_security,
+ .shm_associate = jail_shm_associate,
+ .shm_shmctl = jail_shm_shmctl,
+ .shm_shmat = jail_shm_shmat,
+
+ .msg_queue_alloc_security = jail_msg_queue_alloc,
+ .msg_queue_free_security = jail_msg_queue_free,
+ .msg_queue_associate = jail_msg_queue_associate,
+ .msg_queue_msgctl = jail_msg_queue_msgctl,
+ .msg_queue_msgsnd = jail_msg_queue_msgsnd,
+ .msg_queue_msgrcv = jail_msg_queue_msgrcv,
+
+ .sem_alloc_security = jail_sem_alloc_security,
+ .sem_free_security = jail_sem_free_security,
+ .sem_associate = jail_sem_associate,
+ .sem_semctl = jail_sem_semctl,
+ .sem_semop = jail_sem_semop,
+};
+
+static int __init bsdjail_init (void)
+{
+ int rc = 0;
+
+ if (register_security (&bsdjail_security_ops)) {
+ printk (KERN_INFO
+ "Failure registering BSD Jail module with the kernel\n");
+
+ rc = mod_reg_security(MY_NAME, &bsdjail_security_ops);
+ if (rc < 0) {
+ printk (KERN_INFO "Failure registering BSD Jail "
+ " module with primary security module.\n");
+ return -EINVAL;
+ }
+ secondary = 1;
+ }
+ printk (KERN_INFO "BSD Jail module initialized.\n");
+
+ return 0;
+}
+
+static void __exit bsdjail_exit (void)
+{
+ if (secondary) {
+ if (mod_unreg_security (MY_NAME, &bsdjail_security_ops))
+ printk (KERN_INFO "Failure unregistering BSD Jail "
+ " module with primary module.\n");
+ } else {
+ if (unregister_security (&bsdjail_security_ops)) {
+ printk (KERN_INFO "Failure unregistering BSD Jail "
+ "module with the kernel\n");
+ }
+ }
+
+ printk (KERN_INFO "BSD Jail module removed\n");
+}
+
+security_initcall (bsdjail_init);
+module_exit (bsdjail_exit);
+
+MODULE_DESCRIPTION("BSD Jail LSM.");
+MODULE_LICENSE("GPL");
diff -Nrup linux-2.6.9-rc3-bk6/security/Kconfig linux-2.6.9-rc3-bk6-jail/security/Kconfig
--- linux-2.6.9-rc3-bk6/security/Kconfig 2004-10-06 10:08:02.000000000 -0500
+++ linux-2.6.9-rc3-bk6-jail/security/Kconfig 2004-10-06 10:52:13.000000000 -0500
@@ -46,5 +46,16 @@ config SECURITY_ROOTPLUG

source security/selinux/Kconfig

+config SECURITY_BSDJAIL
+ tristate "BSD Jail LSM"
+ depends on SECURITY
+ select SECURITY_NETWORK
+ help
+ Provides BSD Jail compartmentalization functionality.
+ See Documentation/bsdjail.txt for more information and
+ usage instructions.
+
+ If you are unsure how to answer this question, answer N.
+
endmenu

diff -Nrup linux-2.6.9-rc3-bk6/security/Makefile linux-2.6.9-rc3-bk6-jail/security/Makefile
--- linux-2.6.9-rc3-bk6/security/Makefile 2004-08-14 00:37:26.000000000 -0500
+++ linux-2.6.9-rc3-bk6-jail/security/Makefile 2004-10-06 10:52:13.000000000 -0500
@@ -15,3 +15,4 @@ obj-$(CONFIG_SECURITY) += security.o d
obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o
obj-$(CONFIG_SECURITY_CAPABILITIES) += commoncap.o capability.o
obj-$(CONFIG_SECURITY_ROOTPLUG) += commoncap.o root_plug.o
+obj-$(CONFIG_SECURITY_BSDJAIL) += bsdjail.o


2004-10-06 19:20:44

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [patch 3/3] lsm: add bsdjail documentation


Attached is a patch carrying the documentation for the bsdjail LSM.

Please apply.

Signed-off-by: Serge E. Hallyn <[email protected]>

diff -Nrup linux-2.6.9-rc3-bk6/Documentation/bsdjail.txt linux-2.6.9-rc3-bk6-jail/Documentation/bsdjail.txt
--- linux-2.6.9-rc3-bk6/Documentation/bsdjail.txt 1969-12-31 18:00:00.000000000 -0600
+++ linux-2.6.9-rc3-bk6-jail/Documentation/bsdjail.txt 2004-10-06 10:51:46.000000000 -0500
@@ -0,0 +1,99 @@
+BSD Jail Linux Security Module
+Serge E. Hallyn <[email protected]>
+
+Description:
+
+Implements a subset of the BSD Jail functionality as a Linux LSM.
+What is currently implemented:
+
+ If a proces is in a jail, it:
+
+ 1. Is locked under a chroot (as are all children) which is not
+ vulnerable to the well-known chdir(..)(etc)chroot(.) escape.
+ 2. Cannot mount or umount
+ 3. Cannot send signals outside of jail
+ 4. Cannot ptrace processes outside of jail
+ 5. Cannot create devices
+ 6. Cannot renice processes
+ 7. Cannot load or unload modules
+ 8. Cannot change network settings
+ 9. May be assigned a specific ip address which will be used
+ for all it's socket binds.
+ 10. Cannot see contents of /proc/<pid> entries of processes not in the
+ same jail. (We hide their existence for convenience's sake, but
+ their existance can still be detected using, for instance, statfs)
+ 11. Has no CAP_SYS_RAWIO capability (no ioperm/iopl)
+ 12. May not share IPC resources with processes outside its own jail.
+ 13. May find it's valid network address (if restricted) under
+ /proc/$$/attr/current.
+
+WARNINGS:
+The security of this module is very much dependent on the security
+of the rest of the system. You must carefully think through your
+use of the system.
+
+Some examples:
+ 1. If you leave /dev/hda1 in the jail, processes in the
+ jail can access that filesystem (i.e. /sbin/debugfs).
+ 2. If you provide root access within a jail, this can of
+ course be used to setuid binaries in the jail. Combined
+ with an unjailed regular user account, this gives jailed
+ users unjailed root access. (thanks to Brad Spender for
+ pointing this out). To protect against this, use jails
+ in private namespaces, with the jail filesystems mounted
+ ONLY within the jail namespaces. For instance:
+
+$ # (Make sure /dev/hdc5 is not mounted anywhere)
+$ new_namespace_shell /bin/bash
+$ mount /dev/hdc5 /opt
+$ mount -t proc proc /opt/proc
+$ echo -n "root /opt" > /proc/$$/attr/exec
+$ echo -n "ip 9.53.94.111" > /proc/$$/attr/exec
+$ exec /bin/sh
+$ sshd
+$ apachectl start
+$ exit
+
+How to use:
+ 1. modprobe bsdjail
+ [ 1.5 /sbin/ifconfig eth0:0 2.2.2.2;
+ 1.6 /sbin/route add -host 2.2.2.2 dev eth0:0
+ (optional) ]
+ 2. Make sure the root filesystem (ie /dev/hdc5) is not mounted
+ anywhere else.
+ 3. exec_private_namespace /bin/sh
+ 4. mount /dev/hdc5 /opt
+ 5. mount -t proc proc /opt/proc
+ 6. echo -n "root /opt" > /proc/$$/attr/exec
+ echo -n "ip 2.2.2.2" > /proc/$$/attr/exec (optional)
+ 7. exec /bin/sh
+ 8. sshd
+ 9. exit
+
+The new shell will now run in a private jail on the filesystem on
+/dev/hdc5. If proc has been mounted under /dev/hdc5, then a "ps -auxw"
+under the jailed shell will show only entries for processes started under
+that jail.
+
+If a private IP was specified for the jail, then
+ cat /proc/$$/attr/current
+will show the address for the private network device. Other network
+devices will be visible through /sbin/ifconfig -a, but not usable.
+
+If the reading process is not in a jail, then
+ cat /proc/$$/attr/current
+returns information about the root and ip * for the target process,
+or "Not Jailed" if the target process is not jailed.
+
+Cat /proc/$$/attr/exec gives a list of the valid keywords to cat into
+/proc/$$/attr/exec when starting a jail.
+
+Current valid keywords for creating a jail are:
+
+ root: Root of jail's fs
+ ip: Ip addr for this jail
+ nrtask: Number of tasks in this jail
+ nice: The nice level for this jail. (maybe should be min/max?)
+ slice: Max timeslice per process
+ data: Max size of DATA segment per process
+ memlock: Max size of memory which can be locked per process


2004-10-06 23:32:00

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch 1/3] lsm: add bsdjail module

Serge Hallyn <[email protected]> wrote:
>
>
> Attached is a patch against the security Kconfig and Makefile to support
> bsdjail, as well as the bsdjail.c file itself. bsdjail offers
> functionality similar to (but more limited than) the vserver patch.

I don't recall anyone requesting this feature. Tell me why we should add
it to Linux?

> +
> +#define in_use(x) (x->jail_flags & IN_USE)
> +#define set_in_use(x) (x->jail_flags |= IN_USE)
> +
> +#define got_network(x) (x->jail_flags & (GOT_IPV4 | GOT_IPV6))
> +#define got_ipv4(x) (x->jail_flags & (GOT_IPV4))
> +#define got_ipv6(x) (x->jail_flags & (GOT_IPV6))
> +#define set_ipv4(x) (x->jail_flags |= GOT_IPV4)
> +#define set_ipv6(x) (x->jail_flags |= GOT_IPV6)
> +#define unset_got_ipv4(x) (x->jail_flags &= ~GOT_IPV4)
> +#define unset_got_ipv6(x) (x->jail_flags &= ~GOT_IPV6)
> +#define get_task_security(task) (task->security)
> +#define get_inode_security(inode) (inode->i_security)
> +#define get_sock_security(sock) (sock->sk_security)
> +#define get_file_security(file) (file->f_security)
> +#define get_ipc_security(ipc) (ipc->security)
> +#define jail_of(proc) (get_task_security(proc))
> +

The above tricks may make the code easier to type, but I find they make the
code harder for others to read, and that's more important. We prefer to
open-code such things.

> + if (tsec->root_pathname)
> + kfree(tsec->root_pathname);
> + if (tsec->ip4_addr_name)
> + kfree(tsec->ip4_addr_name);
> + if (tsec->ip6_addr_name)
> + kfree(tsec->ip6_addr_name);

kfree(0) is permitted. Some people like to do the double test anyway but I
don't think it adds much here.

> + set_task_security(task,NULL);

whitespace nit: In some places you have spaces after the commas and in
others you do not.

> + kref_put(&tsec->kref, release_jail);

This is the preferred style.

2004-10-07 04:09:49

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [patch 1/3] lsm: add bsdjail module

Thank you for the feedback. I have implemented these changes, but
want to run a few tests tomorrow before I send them out to make
sure I didn't break anything...

Quoting Andrew Morton ([email protected]):
> Serge Hallyn <[email protected]> wrote:
> >
> >
> > Attached is a patch against the security Kconfig and Makefile to support
> > bsdjail, as well as the bsdjail.c file itself. bsdjail offers
> > functionality similar to (but more limited than) the vserver patch.
>
> I don't recall anyone requesting this feature. Tell me why we should add
> it to Linux?

Because it gives Linux a functionality like FreeBSD's jail and Solaris'
zones in an unobtrusive manner, without impacting users who don't wish
to use it (except for the extra security_task_lookup function calls).
It allows me (for instance) to compartmentalize apache and sendmail by
running them in different jails. Or offer family members, customers,
or whoever, ssh accounts into seemingly distinct boxes, which are simply
sshd's under different jails at different network aliases. Each would
see their own private filesystems and network, have their own usage
limits, and (mostly) not see processes outside their respective jails.
They can't {un,}load modules, ptrace unjailed processes or send signals
to them, create devices, mount, or umount. It is functionality which
otherwise would have to be achieved by running vmware or uml, but far
more lightweight, since no new OS needs to be run. (Once read-only bind
mounts are implemented, it will become even more lightweight, as large
pieces of filesystem trees will be shareable readonly between jails.)

thanks,
-serge

2004-10-07 06:18:55

by James Morris

[permalink] [raw]
Subject: Re: [patch 1/3] lsm: add bsdjail module

On Thu, 7 Oct 2004, Serge E. Hallyn wrote:

> Because it gives Linux a functionality like FreeBSD's jail and Solaris'
> zones in an unobtrusive manner, without impacting users who don't wish
> to use it (except for the extra security_task_lookup function calls).

Yes, as an LSM module, it can be configured out. I think it's a good use
of the LSM framework, and may be useful for people migrating to Linux from
legacy Solaris and FreeBSD.


- James
--
James Morris
<[email protected]>


2004-10-07 06:24:08

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch 1/3] lsm: add bsdjail module

James Morris <[email protected]> wrote:
>
> On Thu, 7 Oct 2004, Serge E. Hallyn wrote:
>
> > Because it gives Linux a functionality like FreeBSD's jail and Solaris'
> > zones in an unobtrusive manner, without impacting users who don't wish
> > to use it (except for the extra security_task_lookup function calls).
>
> Yes, as an LSM module, it can be configured out. I think it's a good use
> of the LSM framework, and may be useful for people migrating to Linux from
> legacy Solaris and FreeBSD.

Sure, but that's a bit speculative for adding a feature to the mainline
kernel.

Is there vendor pull for this feature? Do IBM have customers requiring it?

"someone might like this" is not a sufficient basis for adding stuff to the
kernel, sorry.

2004-10-07 13:11:38

by Alan

[permalink] [raw]
Subject: Re: [patch 1/3] lsm: add bsdjail module

On Iau, 2004-10-07 at 00:26, Andrew Morton wrote:
> I don't recall anyone requesting this feature. Tell me why we should add
> it to Linux?

Subject to the code cleanups and stuff you've noted I'd actually like to
see BSD jail stuff in our security modules because it has the virtue of
simplicity. If it can be extended to do all of vserver even better. J
Random Admin has a good chance at configuring BSD jails etups. J Random
Admin needs some serious tools that don't exist to set up SELinux the
same way.

In the security world simplicity is often a virtue, both in code and
concepts.

Alan

2004-10-07 17:00:52

by Chris Wright

[permalink] [raw]
Subject: Re: [patch 1/3] lsm: add bsdjail module

* Andrew Morton ([email protected]) wrote:
> James Morris <[email protected]> wrote:
> > On Thu, 7 Oct 2004, Serge E. Hallyn wrote:
> >
> > > Because it gives Linux a functionality like FreeBSD's jail and Solaris'
> > > zones in an unobtrusive manner, without impacting users who don't wish
> > > to use it (except for the extra security_task_lookup function calls).
> >
> > Yes, as an LSM module, it can be configured out. I think it's a good use
> > of the LSM framework, and may be useful for people migrating to Linux from
> > legacy Solaris and FreeBSD.
>
> Sure, but that's a bit speculative for adding a feature to the mainline
> kernel.

Which feature are you concerned over, the additional hook or the
new module? The module is a no-op for anybody who doesn't want it.
I can't vouch for the number of users of this module although I've seen
some positive feedback from users. One nice bit is that it goes a way
towards helping vserver which does have quite a few users. This module
really demonstrates one of the points of LSM...to support multiple
security models.

thanks,
-chris
--
Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net

2004-10-07 19:00:31

by Chris Wright

[permalink] [raw]
Subject: Re: [patch 1/3] lsm: add bsdjail module

* Andrew Morton ([email protected]) wrote:
> Chris Wright <[email protected]> wrote:
> > * Andrew Morton ([email protected]) wrote:
> > Which feature are you concerned over, the additional hook or the
> > new module?
>
> I am concerned about the presence of new code - simple as that.

Understood.

> We need to be able to demonstrate that the new code is sufficiently useful
> to a sufficiently large number of people as to warrant the cost of
> maintaining it in the tree for the rest of eternity.

That's fine. Serge, can you enlighten us with an idea of the users of
this code?

> > The module is a no-op for anybody who doesn't want it.
>
> It still needs to be maintained.

Absolutely.

> > I can't vouch for the number of users of this module although I've seen
> > some positive feedback from users. One nice bit is that it goes a way
> > towards helping vserver which does have quite a few users.
>
> Tell us more.

One portion of the vserver project (that which has to do with security
and isolation) could be largely covered by this work. And vserver
is an active project with many users AFAICT. The vserver maintainer
has expressed some interest in this as well. The other portion of the
project, which does the resource limiting has a decent chance of working
well with something like CKRM or similar.

> > This module
> > really demonstrates one of the points of LSM...to support multiple
> > security models.
>
> Sure. But that doesn't mean that those modules have to live at kernel.org
> rather than, say, at bsdjail.sourceforge.net.

I agree, some userbase does wonders to justify mainlining the code.

thanks,
-chris
--
Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net

2004-10-07 19:23:00

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

Attached is a new version of the bsdjail patch with the requested code
cleanups applied.

Changelog:
Sep 10, 2004: original version
Sep 12, 2004: add ipv6 support
Sep 13, 2004: support simultaneous ipv4+ipv6
Oct 6, 2004: move kref release function to kref_put from kref_init
Oct 7, 2004: requested code cleanups (mainly nix #defines)

thanks,
-serge

Signed-Off-By: Serge E. Hallyn <[email protected]>


diff -Nrup linux-2.6.9-rc3-bk6/security/bsdjail.c linux-2.6.9-rc3-bk6-jail/security/bsdjail.c
--- linux-2.6.9-rc3-bk6/security/bsdjail.c 1969-12-31 18:00:00.000000000 -0600
+++ linux-2.6.9-rc3-bk6-jail/security/bsdjail.c 2004-10-07 11:30:21.000000000 -0500
@@ -0,0 +1,1495 @@
+/*
+ * File: linux/security/bsdjail.c
+ * Author: Serge Hallyn ([email protected])
+ * Date: Sep 12, 2004
+ *
+ * (See Documentation/bsdjail.txt for more information)
+ *
+ * Copyright (C) 2004 International Business Machines <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/security.h>
+#include <linux/namei.h>
+#include <linux/namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/pagemap.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
+#include <linux/mount.h>
+#include <asm/uaccess.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/seq_file.h>
+#include <linux/un.h>
+#include <linux/smp_lock.h>
+#include <linux/kref.h>
+
+static int jail_debug = 0;
+MODULE_PARM(jail_debug, "i");
+MODULE_PARM_DESC(jail_debug, "Print bsd jail debugging messages.\n");
+
+#define DBG 0
+#define WARN 1
+#define bsdj_debug(how, fmt, arg... ) \
+ do { \
+ if ( how || jail_debug ) \
+ printk(KERN_NOTICE "%s: %s: " fmt, \
+ MY_NAME, __FUNCTION__, \
+ ## arg ); \
+ } while ( 0 )
+
+#define MY_NAME "bsdjail"
+
+/* flag to keep track of how we were registered */
+static int secondary = 0;
+
+/*
+ * The task structure holding jail information.
+ * Taskp->security points to one of these (or is null).
+ * There is exactly one jail_struct for each jail. If >1 process
+ * are in the same jail, they share the same jail_struct.
+ */
+struct jail_struct {
+ struct kref kref;
+
+ /* these are set on writes to /proc/<pid>/attr/exec */
+ char *root_pathname; /* char * containing path to use as jail / */
+ char *ip4_addr_name; /* char * containing ip4 addr to use for jail */
+ char *ip6_addr_name; /* char * containing ip6 addr to use for jail */
+
+ /* these are set when a jail becomes active */
+ __u32 addr4; /* internal form of ip4_addr_name */
+ struct in6_addr addr6; /* internal form of ip6_addr_name */
+
+ struct dentry *dentry; /* dentry of fs root */
+ struct vfsmount *mnt; /* vfsmnt of fs root */
+
+ /* Resource limits. 0 = no limit */
+ int max_nrtask; /* maximum number of tasks within this jail. */
+ int cur_nrtask; /* current number of tasks within this jail. */
+ long maxtimeslice; /* max timeslice in ms for procs in this jail */
+ long nice; /* nice level for processes in this jail */
+ long max_data, max_memlock; /* equivalent to RLIMIT_{DATA, MEMLOCK} */
+/* values for the jail_flags field */
+#define IN_USE 1 /* if 0, task is setting up jail, not yet in it */
+#define GOT_IPV4 2
+#define GOT_IPV6 4 /* if 0, ipv4, else ipv6 */
+ char jail_flags;
+};
+
+/*
+ * disable_jail: A jail which was in use, but has no references
+ * left, is disabled - we free up the mountpoint and dentry, and
+ * give up our reference on the module.
+ *
+ * don't need to put namespace, it will be done automatically
+ * when the last process in jail is put.
+ * DO need to put the dentry and vfsmount
+ */
+static void
+disable_jail(struct jail_struct *tsec)
+{
+ dput(tsec->dentry);
+ mntput(tsec->mnt);
+ module_put(THIS_MODULE);
+}
+
+
+static void free_jail(struct jail_struct *tsec)
+{
+ if (!tsec)
+ return;
+
+ kfree(tsec->root_pathname);
+ kfree(tsec->ip4_addr_name);
+ kfree(tsec->ip6_addr_name);
+ kfree(tsec);
+}
+
+/* release_jail:
+ * Callback for kref_put to use for releasing a jail when its
+ * last user exits.
+ */
+static void release_jail(struct kref *kref)
+{
+ struct jail_struct *tsec;
+
+ tsec = container_of(kref, struct jail_struct, kref);
+ disable_jail(tsec);
+ free_jail(tsec);
+}
+
+/*
+ * jail_task_free_security: this is the callback hooked into LSM.
+ * If there was no task->security field for bsdjail, do nothing.
+ * If there was, but it was never put into use, free the jail.
+ * If there was, and the jail is in use, then decrement the usage
+ * count, and disable and free the jail if the usage count hits 0.
+ */
+static void jail_task_free_security(struct task_struct *task)
+{
+ struct jail_struct *tsec;
+
+ tsec = task->security;
+
+ if (!tsec)
+ return;
+
+ if (!(tsec->jail_flags & IN_USE)) {
+ /*
+ * someone did 'echo -n x > /proc/<pid>/attr/exec' but
+ * then forked before execing. Nuke the old info.
+ */
+ free_jail(tsec);
+ task->security = NULL;
+ return;
+ }
+ tsec->cur_nrtask--;
+ /* If this was the last process in the jail, delete the jail */
+ kref_put(&tsec->kref, release_jail);
+}
+
+static struct jail_struct *
+alloc_task_security(struct task_struct *tsk)
+{
+ struct jail_struct *tsec;
+ tsec = kmalloc(sizeof(struct jail_struct), GFP_KERNEL);
+ if (!tsec)
+ return ERR_PTR(-ENOMEM);
+ memset(tsec, 0, sizeof(struct jail_struct));
+ tsk->security = tsec;
+ return tsec;
+}
+
+static inline int
+in_jail(struct task_struct *t)
+{
+ struct jail_struct *tsec = t->security;
+
+ if (tsec && (tsec->jail_flags & IN_USE))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * If a network address was passed into /proc/<pid>/attr/exec,
+ * then process in its jail will only be allowed to bind/listen
+ * to that address.
+ */
+static void
+setup_netaddress(struct jail_struct *tsec)
+{
+ unsigned int a, b, c, d, i;
+ unsigned int x[8];
+
+ tsec->jail_flags &= ~(GOT_IPV4 | GOT_IPV6);
+ tsec->addr4 = 0;
+ ipv6_addr_set(&tsec->addr6, 0, 0, 0, 0);
+
+ if (tsec->ip4_addr_name) {
+ if (sscanf(tsec->ip4_addr_name, "%u.%u.%u.%u",
+ &a, &b, &c, &d) != 4)
+ return;
+ if (a>255 || b>255 || c>255 || d>255)
+ return;
+ tsec->addr4 = htonl((a<<24) | (b<<16) | (c<<8) | d);
+ tsec->jail_flags |= GOT_IPV4;
+ bsdj_debug(DBG, "Network (ipv4) set up (%s)\n",
+ tsec->ip4_addr_name);
+ }
+
+ if (tsec->ip6_addr_name) {
+ if (sscanf(tsec->ip6_addr_name, "%x:%x:%x:%x:%x:%x:%x:%x",
+ &x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6],
+ &x[7]) != 8) {
+ printk(KERN_INFO "%s: bad ipv6 addr %s\n", __FUNCTION__,
+ tsec->ip6_addr_name);
+ return;
+ }
+ for (i=0; i<8; i++) {
+ if (x[i] > 65535) {
+ printk("%s: %x > 65535 at %d\n", __FUNCTION__, x[i], i);
+ return;
+ }
+ tsec->addr6.in6_u.u6_addr16[i] = htons(x[i]);
+ }
+ tsec->jail_flags |= GOT_IPV6;
+ bsdj_debug(DBG, "Network (ipv6) set up (%s)\n",
+ tsec->ip6_addr_name);
+ }
+}
+
+/*
+ * enable_jail:
+ * Called when a process is placed into a new jail to handle the
+ * actual creation of the jail.
+ * Creates namespace
+ * Sets process root+pwd
+ * Stores the requested ip address
+ * Registers a unique pseudo-proc filesystem for this jail
+ */
+static int enable_jail(struct task_struct *tsk)
+{
+ struct nameidata nd;
+ struct jail_struct *tsec = tsk->security;;
+ int retval = -EFAULT;
+
+ if (!tsec || !tsec->root_pathname)
+ goto out;
+
+ /*
+ * USE_JAIL_NAMESPACE: could be useful, so that future mounts outside
+ * the jail don't affect the jail. But it's not necessary, and
+ * requires exporting copy_namespace from fs/namespace.c
+ *
+ * Actually, it would also be useful for truly hiding
+ * information about mounts which do not exist in this jail.
+#define USE_JAIL_NAMESPACE
+ */
+#ifdef USE_JAIL_NAMESPACE
+ bsdj_debug(DBG, "bsdjail: copying namespace.\n");
+ retval = -EPERM;
+ if (copy_namespace(CLONE_NEWNS, tsk))
+ goto out;
+ bsdj_debug(DBG, "bsdjail: copied namespace.\n");
+#endif
+
+ /* find our new root directory */
+ bsdj_debug(DBG, "bsdjail: looking up %s\n", tsec->root_pathname);
+ retval = path_lookup(tsec->root_pathname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd);
+ if (retval)
+ goto out;
+
+ bsdj_debug(DBG, "bsdjail: got %s, setting root to it\n", tsec->root_pathname);
+
+ /* and set the fsroot to it */
+ set_fs_root(tsk->fs, nd.mnt, nd.dentry);
+ set_fs_pwd(tsk->fs, nd.mnt, nd.dentry);
+
+ bsdj_debug(DBG, "bsdjail: root has been set. Have fun.\n");
+
+ /* set up networking */
+ if (tsec->ip4_addr_name || tsec->ip6_addr_name)
+ setup_netaddress(tsec);
+
+ tsec->cur_nrtask = 1;
+ if (tsec->nice)
+ set_user_nice(current, tsec->nice);
+ if (tsec->max_data) {
+ current->rlim[RLIMIT_DATA].rlim_cur = tsec->max_data;
+ current->rlim[RLIMIT_DATA].rlim_max = tsec->max_data;
+ }
+ if (tsec->max_memlock) {
+ current->rlim[RLIMIT_MEMLOCK].rlim_cur = tsec->max_memlock;
+ current->rlim[RLIMIT_MEMLOCK].rlim_max = tsec->max_memlock;
+ }
+ if (tsec->maxtimeslice) {
+ current->rlim[RLIMIT_CPU].rlim_cur = tsec->maxtimeslice;
+ current->rlim[RLIMIT_CPU].rlim_max = tsec->maxtimeslice;
+ }
+ /* success and end */
+ tsec->mnt = mntget(nd.mnt);
+ tsec->dentry = dget(nd.dentry);
+ path_release(&nd);
+ kref_init(&tsec->kref);
+ tsec->jail_flags |= IN_USE;
+
+ /* won't let ourselves be removed until this jail goes away */
+ try_module_get(THIS_MODULE);
+
+ return 0;
+
+out:
+ return retval;
+}
+
+/*
+ * LSM /proc/<pid>/attr hooks.
+ * You may write into /proc/<pid>/attr/exec:
+ * root /some/path
+ * ip 2.2.2.2
+ * These values will be used on the next exec() to set up your jail
+ * (assuming you're not already in a jail)
+ */
+static int
+jail_setprocattr(struct task_struct *p, char *name, void *value, size_t size)
+{
+ struct jail_struct *tsec = current->security;
+ long val;
+ int start, len;
+
+ if (tsec && (tsec->jail_flags & IN_USE))
+ return -EINVAL; /* let them guess why */
+
+ if (p != current || strcmp(name, "exec"))
+ return -EPERM;
+
+ if (strncmp(value, "root ", 5) == 0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ if (tsec->root_pathname)
+ kfree(tsec->root_pathname);
+ start = 5;
+ len = size-start;
+ tsec->root_pathname = kmalloc(len+1, GFP_KERNEL);
+ if (!tsec->root_pathname)
+ return -ENOMEM;
+ strlcpy(tsec->root_pathname, value+start, len+1);
+ } else if (strncmp(value, "ip ", 3) == 0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ if (tsec->ip4_addr_name)
+ kfree(tsec->ip4_addr_name);
+ start = 3;
+ len = size-start;
+ tsec->ip4_addr_name = kmalloc(len+1, GFP_KERNEL);
+ if (!tsec->ip4_addr_name)
+ return -ENOMEM;
+ strlcpy(tsec->ip4_addr_name, value+start, len+1);
+ } else if (strncmp(value, "ip6 ", 4) == 0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ if (tsec->ip6_addr_name)
+ kfree(tsec->ip6_addr_name);
+ start = 4;
+ len = size-start;
+ tsec->ip6_addr_name = kmalloc(len+1, GFP_KERNEL);
+ if (!tsec->ip6_addr_name)
+ return -ENOMEM;
+ strlcpy(tsec->ip6_addr_name, value+start, len+1);
+
+ /* the next two are equivalent */
+ } else if (strncmp(value, "slice ", 6) == 0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ val = simple_strtoul(value+6, NULL, 0);
+ tsec->maxtimeslice = val;
+ } else if (strncmp(value, "timeslice ", 10) == 0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ val = simple_strtoul(value+10, NULL, 0);
+ tsec->maxtimeslice = val;
+ } else if (strncmp(value, "nrtask ", 7) == 0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ val = (int) simple_strtol(value+7, NULL, 0);
+ if (val < 1)
+ return -EINVAL;
+ tsec->max_nrtask = val;
+ } else if (strncmp(value, "memlock ", 8) == 0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ val = simple_strtoul(value+8, NULL, 0);
+ tsec->max_memlock = val;
+ } else if (strncmp(value, "data ", 5) == 0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ val = simple_strtoul(value+5, NULL, 0);
+ tsec->max_data = val;
+ } else if (strncmp(value, "nice ", 5) == 0) {
+ if (!tsec)
+ tsec = alloc_task_security(current);
+ if (IS_ERR(tsec))
+ return -ENOMEM;
+
+ val = simple_strtoul(value+5, NULL, 0);
+ tsec->nice = val;
+ } else
+ return -EINVAL;
+
+ return size;
+}
+
+static int print_jail_net_info(struct jail_struct *j, char *buf, int maxcnt)
+{
+ int len = 0;
+
+ if (j->ip4_addr_name)
+ len += snprintf(buf, maxcnt, "%s\n", j->ip4_addr_name);
+ if (j->ip6_addr_name)
+ len += snprintf(buf, maxcnt-len, "%s\n", j->ip6_addr_name);
+
+ return snprintf(buf, maxcnt, "No network information\n");
+}
+
+/*
+ * LSM /proc/<pid>/attr read hook.
+ *
+ * /proc/$$/attr/current output:
+ * If the reading process, say process 1001, is in a jail, then
+ * cat /proc/999/attr/current
+ * will print networking information.
+ * If the reading process, say process 1001, is not in a jail, then
+ * cat /proc/999/attr/current
+ * will return
+ * root: (root of jail)
+ * ip: (ip address of jail)
+ * if 999 is in a jail, or
+ * -EINVAL
+ * if 999 is not in a jail.
+ *
+ * /proc/$$/attr/exec output:
+ * A process in a jail gets -EINVAL for /proc/$$/attr/exec.
+ * A process not in a jail gets hints on starting a jail.
+ */
+static int
+jail_getprocattr(struct task_struct *p, char *name, void *value, size_t size)
+{
+ struct jail_struct *tsec;
+ int err = 0;
+
+ if (in_jail(current)) {
+ if (strcmp(name, "current") == 0) {
+ /* provide network info */
+ err = print_jail_net_info(current->security, value,
+ size);
+ return err;
+ }
+ return -EINVAL; /* let them guess why */
+ }
+
+ if (strcmp(name, "exec") == 0) {
+ /* Print usage some help */
+ err = snprintf(value, size,
+ "Valid keywords:\n"
+ "root <pathname>\n"
+ "ip <ip4-addr>\n"
+ "ip6 <ip6-addr>\n"
+ "nrtask <max number of tasks in this jail>\n"
+ "nice <nice level for processes in this jail>\n"
+ "slice <max timeslice per process in msecs>\n"
+ "data <max data size per process in bytes>\n"
+ "memlock <max lockable memory per process in bytes>\n");
+ return err;
+ }
+
+ if (strcmp(name, "current"))
+ return -EPERM;
+
+ tsec = p->security;
+ if (!tsec || !(tsec->jail_flags & IN_USE)) {
+ err = snprintf(value, size, "Not Jailed\n");
+ } else {
+ err = snprintf(value, size,
+ "Root: %s\nIPv4: %s\nIPv6: %s\n"
+ "max_nrtask %d current nrtask %d max_timeslice %lu "
+ "nice %lu\n"
+ "max_memlock %lu max_data %lu\n",
+ tsec->root_pathname,
+ tsec->ip4_addr_name ? tsec->ip4_addr_name : "(none)",
+ tsec->ip6_addr_name ? tsec->ip6_addr_name : "(none)",
+ tsec->max_nrtask, tsec->cur_nrtask, tsec->maxtimeslice,
+ tsec->nice, tsec->max_data, tsec->max_memlock);
+ }
+
+ return err;
+}
+
+/*
+ * Forbid a process in a jail from sending a signal to a process in another
+ * (or no) jail through file sigio.
+ *
+ * We consider the process which set the fowner to be the one sending the
+ * signal, rather than the one writing to the file. Therefore we store the
+ * jail of a process during jail_file_set_fowner, then check that against
+ * the jail of the process receiving the signal.
+ */
+static int
+jail_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown,
+ int fd, int reason)
+{
+ struct file *file;
+ struct jail_struct *tsec, *fsec;
+
+ if (!in_jail(current))
+ return 0;
+
+ file = (struct file *) ((long)fown - offsetof(struct file, f_owner));
+ tsec = tsk->security;
+ fsec = file->f_security;
+
+ if (fsec != tsec)
+ return -EPERM;
+
+ return 0;
+}
+
+static int
+jail_file_set_fowner(struct file *file)
+{
+ struct jail_struct *tsec;
+
+ tsec = current->security;
+ file->f_security = tsec;
+ if (tsec)
+ kref_get(&tsec->kref);
+
+ return 0;
+}
+
+static void free_ipc_security(struct kern_ipc_perm *ipc)
+{
+ struct jail_struct *tsec;
+
+ tsec = ipc->security;
+ if (!tsec)
+ return;
+ kref_put(&tsec->kref, release_jail);
+ ipc->security = NULL;
+}
+
+static void free_file_security(struct file *file)
+{
+ struct jail_struct *tsec;
+
+ tsec = file->f_security;
+ if (!tsec)
+ return;
+ kref_put(&tsec->kref, release_jail);
+ file->f_security = NULL;
+}
+
+static void free_inode_security(struct inode *inode)
+{
+ struct jail_struct *tsec;
+
+ tsec = inode->i_security;
+ if (!tsec)
+ return;
+ kref_put(&tsec->kref, release_jail);
+ inode->i_security = NULL;
+}
+
+/*
+ * LSM ptrace hook:
+ * process in jail may not ptrace process not in the same jail
+ */
+static int
+jail_ptrace (struct task_struct *tracer, struct task_struct *tracee)
+{
+ struct jail_struct *tsec = tracer->security;
+
+ if (tsec && (tsec->jail_flags & IN_USE)) {
+ if (tsec == tracee->security)
+ return 0;
+ return -EPERM;
+ }
+ return 0;
+}
+
+/*
+ * process in jail may only use one (aliased) ip address. If they try to
+ * attach to 127.0.0.1, that is remapped to their own address. If some
+ * other address (and not their own), deny permission
+ */
+static int jail_socket_unix_bind(struct socket *sock, struct sockaddr *address,
+ int addrlen);
+
+#define loopbackaddr htonl((127 << 24) | 1)
+
+static inline int jail_inet4_bind(struct socket *sock, struct sockaddr *address,
+ int addrlen, struct jail_struct *tsec)
+{
+ struct sockaddr_in *inaddr;
+ __u32 sin_addr, jailaddr;
+
+ if (!(tsec->jail_flags & GOT_IPV4))
+ return -EPERM;
+
+ inaddr = (struct sockaddr_in *) address;
+ sin_addr = inaddr->sin_addr.s_addr;
+ jailaddr = tsec->addr4;
+
+ if (sin_addr == jailaddr)
+ return 0;
+
+ if (sin_addr == loopbackaddr || !sin_addr) {
+ bsdj_debug(DBG, "Got a loopback or 0 address\n");
+ sin_addr = jailaddr;
+ bsdj_debug(DBG, "Converted to: %u.%u.%u.%u\n",
+ NIPQUAD(sin_addr));
+ return 0;
+ }
+
+ return -EPERM;
+}
+
+static inline int
+jail_inet6_bind(struct socket *sock, struct sockaddr *address, int addrlen,
+ struct jail_struct *tsec)
+{
+ struct sockaddr_in6 *inaddr6;
+ struct in6_addr *sin6_addr, *jailaddr;
+
+ if (!(tsec->jail_flags & GOT_IPV6))
+ return -EPERM;
+
+ inaddr6 = (struct sockaddr_in6 *) address;
+ sin6_addr = &inaddr6->sin6_addr;
+ jailaddr = &tsec->addr6;
+
+ if (ipv6_addr_cmp(jailaddr, sin6_addr) == 0)
+ return 0;
+
+ if (ipv6_addr_cmp(sin6_addr, &in6addr_loopback) == 0) {
+ ipv6_addr_copy(sin6_addr, jailaddr);
+ return 0;
+ }
+
+ printk(KERN_NOTICE "%s: DENYING\n", __FUNCTION__);
+ printk(KERN_NOTICE "%s: a %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
+ "j %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+ __FUNCTION__,
+ NIP6(*sin6_addr),
+ NIP6(*jailaddr));
+
+ return -EPERM;
+}
+
+static int
+jail_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+
+ if (sock->sk->sk_family == AF_UNIX)
+ return jail_socket_unix_bind(sock, address, addrlen);
+
+ if (!(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6)))
+ /* If we want to be strict, we could just
+ * deny net access when lacking a pseudo ip.
+ * For now we just allow it. */
+ return 0;
+
+ switch(address->sa_family) {
+ case AF_INET:
+ return jail_inet4_bind(sock, address, addrlen, tsec);
+
+ case AF_INET6:
+ return jail_inet6_bind(sock, address, addrlen, tsec);
+
+ default:
+ return 0;
+ }
+}
+
+/*
+ * If locked in an ipv6 jail, don't let them use ipv4, and vice versa
+ */
+static int
+jail_socket_create(int family, int type, int protocol, int kern)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || kern || !(tsec->jail_flags & IN_USE) ||
+ !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6)))
+ return 0;
+
+ switch(family) {
+ case AF_INET:
+ if (tsec->jail_flags & GOT_IPV4)
+ return 0;
+ return -EPERM;
+ case AF_INET6:
+ if (tsec->jail_flags & GOT_IPV6)
+ return 0;
+ return -EPERM;
+ default:
+ return 0;
+ };
+
+ return 0;
+}
+
+static void
+jail_socket_post_create(struct socket *sock, int family, int type,
+ int protocol, int kern)
+{
+ struct inet_opt *inet;
+ struct ipv6_pinfo *inet6;
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || kern || !(tsec->jail_flags & IN_USE) ||
+ !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6)))
+ return;
+
+ switch(family) {
+ case AF_INET:
+ inet = inet_sk(sock->sk);
+ inet->saddr = tsec->addr4;
+ break;
+ case AF_INET6:
+ inet6 = inet6_sk(sock->sk);
+ ipv6_addr_copy(&inet6->saddr, &tsec->addr6);
+ break;
+ default:
+ break;
+ };
+
+ return;
+}
+
+static int
+jail_socket_listen(struct socket *sock, int backlog)
+{
+ struct inet_opt *inet;
+ struct ipv6_pinfo *inet6;
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE) ||
+ !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6)))
+ return 0;
+
+ switch (sock->sk->sk_family) {
+ case AF_INET:
+ inet = inet_sk(sock->sk);
+ if (inet->saddr == tsec->addr4)
+ return 0;
+ return -EPERM;
+
+ case AF_INET6:
+ inet6 = inet6_sk(sock->sk);
+ if (ipv6_addr_cmp(&inet6->saddr, &tsec->addr6) == 0)
+ return 0;
+ return -EPERM;
+
+ default:
+ return 0;
+
+ }
+}
+
+static void free_sock_security(struct sock *sk)
+{
+ struct jail_struct *tsec;
+
+ tsec = sk->sk_security;
+ if (!tsec)
+ return;
+ kref_put(&tsec->kref, release_jail);
+ sk->sk_security = NULL;
+}
+
+/*
+ * The next three (socket) hooks prevent a process in a jail from sending
+ * data to a abstract unix domain socket which was bound outside the jail.
+ */
+static int
+jail_socket_unix_bind(struct socket *sock, struct sockaddr *address,
+ int addrlen)
+{
+ struct sockaddr_un *sunaddr;
+ struct jail_struct *tsec;
+
+ if (sock->sk->sk_family != AF_UNIX)
+ return 0;
+
+ sunaddr = (struct sockaddr_un *) address;
+ if (sunaddr->sun_path[0] != 0)
+ return 0;
+
+ tsec = current->security;
+ sock->sk->sk_security = tsec;
+ if (tsec)
+ kref_get(&tsec->kref);
+ return 0;
+}
+
+/*
+ * Note - we deny sends both from unjailed to jailed, and from jailed
+ * to unjailed. As well as, of course between different jails.
+ */
+static int
+jail_socket_unix_may_send(struct socket *sock, struct socket *other)
+{
+ struct jail_struct *tsec, *ssec;
+
+ tsec = current->security; /* jail of sending process */
+ ssec = other->sk->sk_security; /* jail of receiver */
+
+ if (tsec != ssec)
+ return -EPERM;
+
+ return 0;
+}
+
+static int
+jail_socket_unix_stream_connect(struct socket *sock,
+ struct socket *other, struct sock *newsk)
+{
+ struct jail_struct *tsec, *ssec;
+
+ tsec = current->security; /* jail of sending process */
+ ssec = other->sk->sk_security; /* jail of receiver */
+
+ if (tsec != ssec)
+ return -EPERM;
+
+ return 0;
+}
+
+static int
+jail_mount(char * dev_name, struct nameidata *nd, char * type,
+ unsigned long flags, void * data)
+{
+ if (in_jail(current))
+ return -EPERM;
+
+ return 0;
+}
+
+static int
+jail_umount(struct vfsmount *mnt, int flags)
+{
+ if (in_jail(current))
+ return -EPERM;
+
+ return 0;
+}
+
+/*
+ * process in jail may not:
+ * use nice
+ * change network config
+ * load/unload modules
+ */
+static int
+jail_capable (struct task_struct *tsk, int cap)
+{
+ if (in_jail(tsk)) {
+ if (cap == CAP_SYS_NICE)
+ return -EPERM;
+ if (cap == CAP_NET_ADMIN)
+ return -EPERM;
+ if (cap == CAP_SYS_MODULE)
+ return -EPERM;
+ if (cap == CAP_SYS_RAWIO)
+ return -EPERM;
+ }
+
+ if (cap_is_fs_cap (cap) ? tsk->fsuid == 0 : tsk->euid == 0)
+ return 0;
+ return -EPERM;
+}
+
+/*
+ * jail_security_task_create:
+ *
+ * If the current process is ina a jail, and that jail is about to exceed a
+ * maximum number of processes, then refuse to fork. If the maximum number
+ * of jails is listed as 0, then there is no limit for this jail, and we allow
+ * all forks.
+ */
+static inline int
+jail_security_task_create (unsigned long clone_flags)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+
+ if (tsec->max_nrtask && tsec->cur_nrtask >= tsec->max_nrtask)
+ return -EPERM;
+ return 0;
+}
+
+/*
+ * The child of a process in a jail belongs in the same jail
+ */
+static int
+jail_task_alloc_security(struct task_struct *tsk)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+
+ tsk->security = tsec;
+ kref_get(&tsec->kref);
+ tsec->cur_nrtask++;
+ if (tsec->maxtimeslice) {
+ tsk->rlim[RLIMIT_CPU].rlim_max = tsec->maxtimeslice;
+ tsk->rlim[RLIMIT_CPU].rlim_cur = tsec->maxtimeslice;
+ }
+ if (tsec->max_data) {
+ tsk->rlim[RLIMIT_CPU].rlim_max = tsec->max_data;
+ tsk->rlim[RLIMIT_CPU].rlim_cur = tsec->max_data;
+ }
+ if (tsec->max_memlock) {
+ tsk->rlim[RLIMIT_CPU].rlim_max = tsec->max_memlock;
+ tsk->rlim[RLIMIT_CPU].rlim_cur = tsec->max_memlock;
+ }
+ if (tsec->nice)
+ set_user_nice(current, tsec->nice);
+
+ return 0;
+}
+
+static int
+jail_bprm_alloc_security(struct linux_binprm *bprm)
+{
+ struct jail_struct *tsec;
+ int ret;
+
+ tsec = current->security;
+ if (!tsec)
+ return 0;
+
+ if (tsec->jail_flags & IN_USE)
+ return 0;
+
+ if (tsec->root_pathname) {
+ ret = enable_jail(current);
+ if (ret) {
+ /* if we failed, nix out the root/ip requests */
+ jail_task_free_security(current);
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Process in jail may not create devices
+ * Thanks to Brad Spender for pointing out fifos should be allowed.
+ */
+/* TODO: We may want to allow /dev/log, at least... */
+static int
+jail_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+{
+ if (!in_jail(current))
+ return 0;
+
+ if (S_ISFIFO(mode))
+ return 0;
+
+ return -EPERM;
+}
+
+/* yanked from fs/proc/base.c */
+static unsigned name_to_int(struct dentry *dentry)
+{
+ const char *name = dentry->d_name.name;
+ int len = dentry->d_name.len;
+ unsigned n = 0;
+
+ if (len > 1 && *name == '0')
+ goto out;
+ while (len-- > 0) {
+ unsigned c = *name++ - '0';
+ if (c > 9)
+ goto out;
+ if (n >= (~0U-9)/10)
+ goto out;
+ n *= 10;
+ n += c;
+ }
+ return n;
+out:
+ return ~0U;
+}
+
+/*
+ * jail_proc_inode_permission:
+ * called only when current is in a jail, and is trying to reach
+ * /proc/<pid>. We check whether <pid> is in the same jail as
+ * current. If not, permission is denied.
+ *
+ * NOTE: On the one hand, the task_to_inode(inode)->i_security
+ * approach seems cleaner, but on the other, this prevents us
+ * from unloading bsdjail for awhile...
+ */
+static int
+jail_proc_inode_permission(struct inode *inode, int mask,
+ struct nameidata *nd)
+{
+ struct jail_struct *tsec = current->security;
+ struct dentry *dentry = nd->dentry;
+ unsigned pid;
+
+ pid = name_to_int(dentry);
+ if (pid == ~0U) {
+ struct qstr *dname = &dentry->d_name;
+ if (strcmp(dname->name, "scsi") == 0 ||
+ strcmp(dname->name, "sys") == 0 ||
+ strcmp(dname->name, "ide") == 0)
+ return -EPERM;
+ return 0;
+ }
+
+ if (dentry->d_parent != dentry->d_sb->s_root)
+ return 0;
+ if (inode->i_security != tsec)
+ return -ENOENT;
+
+ return 0;
+}
+
+/*
+ * Here is our attempt to prevent chroot escapes.
+ */
+static int
+is_jailroot_parent(struct dentry *candidate, struct dentry *root,
+ struct vfsmount *rootmnt)
+{
+ if (candidate == root)
+ return 0;
+
+ /* simple case: fs->root/.. == candidate */
+ if (root->d_parent == candidate)
+ return 1;
+
+ /*
+ * now more complicated: if fs->root is a mounted directory,
+ * then chdir(..) out of fs->root, at follow_dotdot, will follow
+ * the fs->root mount point. So we must check the parent dir of
+ * the fs->root mount point.
+ */
+ if (rootmnt->mnt_root == root && rootmnt->mnt_mountpoint!=root) {
+ root = rootmnt->mnt_mountpoint;
+ rootmnt = rootmnt->mnt_parent;
+ return is_jailroot_parent(candidate, root, rootmnt);
+ }
+
+ return 0;
+}
+
+/*
+ * A process in a jail may not see that /proc/<pid> exists for
+ * process not in its jail
+ * Unfortunately we can't pretend that pid for the starting process
+ * is 1, as vserver does.
+ */
+static int jail_task_lookup(struct task_struct *p)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec)
+ return 0;
+ if (tsec == p->security)
+ return 0;
+ return -EPERM;
+}
+/*
+ * security_task_to_inode:
+ * Set inode->security = task's jail.
+ */
+static void jail_task_to_inode(struct task_struct *p, struct inode *inode)
+{
+ struct jail_struct *tsec = p->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return;
+ if (inode->i_security)
+ return;
+ kref_get(&tsec->kref);
+ inode->i_security = tsec;
+}
+
+/*
+ * inode_permission:
+ * If we are trying to look into certain /proc files from in a jail, we
+ * may deny permission.
+ * If we are trying to cd(..), but the cwd is the root of our jail, then
+ * permission is denied.
+ */
+static int
+jail_inode_permission(struct inode *inode, int mask,
+ struct nameidata *nd)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+
+ if (!nd)
+ return 0;
+
+ if (nd->dentry &&
+ strcmp(nd->dentry->d_sb->s_type->name, "proc") == 0) {
+ return jail_proc_inode_permission(inode, mask, nd);
+
+ }
+
+ if (!(mask&MAY_EXEC))
+ return 0;
+ if (!inode || !S_ISDIR(inode->i_mode))
+ return 0;
+
+ if (is_jailroot_parent(nd->dentry, tsec->dentry, tsec->mnt)) {
+ bsdj_debug(WARN, "Attempt to chdir(..) out of jail!\n"
+ "(%s is a subdir of %s)\n",
+ tsec->dentry->d_name.name,
+ nd->dentry->d_name.name);
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+/*
+ * A function which returns -ENOENT if dentry is the dentry for
+ * a /proc/<pid> directory. It returns 0 otherwise.
+ */
+static inline int
+generic_procpid_check(struct dentry *dentry)
+{
+ struct jail_struct *jail = current->security;
+ unsigned pid = name_to_int(dentry);
+
+ if (!jail || !(jail->jail_flags & IN_USE))
+ return 0;
+ if (pid == ~0U)
+ return 0;
+ if (strcmp(dentry->d_sb->s_type->name, "proc") != 0)
+ return 0;
+ if (dentry->d_parent != dentry->d_sb->s_root)
+ return 0;
+ if (dentry->d_inode->i_security != jail)
+ return -ENOENT;
+ return 0;
+}
+
+/*
+ * We want getattr to fail on /proc/<pid> to prevent leakage through, for
+ * instance, ls -d.
+ */
+static int
+jail_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+{
+ return generic_procpid_check(dentry);
+}
+
+/* This probably is not necessary - /proc does not support xattrs? */
+static int
+jail_inode_getxattr(struct dentry *dentry, char *name)
+{
+ return generic_procpid_check(dentry);
+}
+
+/* process in jail may not send signal to process not in the same jail */
+static int
+jail_task_kill(struct task_struct *p, struct siginfo *info, int sig)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+
+ if (tsec == p->security)
+ return 0;
+
+ if (sig==SIGCHLD)
+ return 0;
+
+ return -EPERM;
+}
+
+/*
+ * LSM hooks to limit jailed process' abilities to muck with resource
+ * limits
+ */
+static int jail_task_setrlimit (unsigned int resource, struct rlimit *new_rlim)
+{
+ if (!in_jail(current))
+ return 0;
+
+ return -EPERM;
+}
+
+static int jail_task_setscheduler (struct task_struct *p, int policy,
+ struct sched_param *lp)
+{
+ if (!in_jail(current))
+ return 0;
+
+ return -EPERM;
+}
+
+/*
+ * LSM hooks to limit IPC access.
+ */
+
+static inline int
+basic_ipc_security_check(struct kern_ipc_perm *p, struct task_struct *target)
+{
+ struct jail_struct *tsec = target->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+
+ if (p->security != tsec)
+ return -EPERM;
+
+ return 0;
+}
+
+static int
+jail_ipc_permission(struct kern_ipc_perm *ipcp, short flag)
+{
+ return basic_ipc_security_check(ipcp, current);
+}
+
+static int
+jail_shm_alloc_security (struct shmid_kernel *shp)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+ shp->shm_perm.security = tsec;
+ kref_get(&tsec->kref);
+ return 0;
+}
+
+static void
+jail_shm_free_security (struct shmid_kernel *shp)
+{
+ free_ipc_security(&shp->shm_perm);
+}
+
+static int
+jail_shm_associate (struct shmid_kernel *shp, int shmflg)
+{
+ return basic_ipc_security_check(&shp->shm_perm, current);
+}
+
+static int
+jail_shm_shmctl(struct shmid_kernel *shp, int cmd)
+{
+ if (cmd == IPC_INFO || cmd == SHM_INFO)
+ return 0;
+
+ return basic_ipc_security_check(&shp->shm_perm, current);
+}
+
+static int
+jail_shm_shmat(struct shmid_kernel *shp, char *shmaddr, int shmflg)
+{
+ return basic_ipc_security_check(&shp->shm_perm, current);
+}
+
+static int
+jail_msg_queue_alloc(struct msg_queue *msq)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+ msq->q_perm.security = tsec;
+ kref_get(&tsec->kref);
+ return 0;
+}
+
+static void
+jail_msg_queue_free(struct msg_queue *msq)
+{
+ free_ipc_security(&msq->q_perm);
+}
+
+static int jail_msg_queue_associate(struct msg_queue *msq, int flag)
+{
+ return basic_ipc_security_check(&msq->q_perm, current);
+}
+
+static int
+jail_msg_queue_msgctl(struct msg_queue *msq, int cmd)
+{
+ if (cmd == IPC_INFO || cmd == MSG_INFO)
+ return 0;
+
+ return basic_ipc_security_check(&msq->q_perm, current);
+}
+
+static int
+jail_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, int msqflg)
+{
+ return basic_ipc_security_check(&msq->q_perm, current);
+}
+
+static int
+jail_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
+ struct task_struct *target, long type, int mode)
+
+{
+ return basic_ipc_security_check(&msq->q_perm, target);
+}
+
+static int
+jail_sem_alloc_security(struct sem_array *sma)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+ sma->sem_perm.security = tsec;
+ kref_get(&tsec->kref);
+ return 0;
+}
+
+static void
+jail_sem_free_security(struct sem_array *sma)
+{
+ free_ipc_security(&sma->sem_perm);
+}
+
+static int
+jail_sem_associate(struct sem_array *sma, int semflg)
+{
+ return basic_ipc_security_check(&sma->sem_perm, current);
+}
+
+static int
+jail_sem_semctl(struct sem_array *sma, int cmd)
+{
+ if (cmd == IPC_INFO || cmd == SEM_INFO)
+ return 0;
+ return basic_ipc_security_check(&sma->sem_perm, current);
+}
+
+static int
+jail_sem_semop(struct sem_array *sma, struct sembuf *sops, unsigned nsops,
+ int alter)
+{
+ return basic_ipc_security_check(&sma->sem_perm, current);
+}
+
+static struct security_operations bsdjail_security_ops = {
+ .ptrace = jail_ptrace,
+ .capable = jail_capable,
+
+ .task_kill = jail_task_kill,
+ .task_alloc_security = jail_task_alloc_security,
+ .task_free_security = jail_task_free_security,
+ .bprm_alloc_security = jail_bprm_alloc_security,
+ .task_create = jail_security_task_create,
+ .task_to_inode = jail_task_to_inode,
+ .task_lookup = jail_task_lookup,
+
+ .task_setrlimit = jail_task_setrlimit,
+ .task_setscheduler = jail_task_setscheduler,
+
+ .setprocattr = jail_setprocattr,
+ .getprocattr = jail_getprocattr,
+
+ .file_set_fowner = jail_file_set_fowner,
+ .file_send_sigiotask = jail_file_send_sigiotask,
+ .file_free_security = free_file_security,
+
+ .socket_bind = jail_socket_bind,
+ .socket_listen = jail_socket_listen,
+ .socket_create = jail_socket_create,
+ .socket_post_create = jail_socket_post_create,
+ .unix_stream_connect = jail_socket_unix_stream_connect,
+ .unix_may_send = jail_socket_unix_may_send,
+ .sk_free_security = free_sock_security,
+
+ .inode_mknod = jail_inode_mknod,
+ .inode_permission = jail_inode_permission,
+ .inode_free_security = free_inode_security,
+ .inode_getattr = jail_inode_getattr,
+ .inode_getxattr = jail_inode_getxattr,
+ .sb_mount = jail_mount,
+ .sb_umount = jail_umount,
+
+ .ipc_permission = jail_ipc_permission,
+ .shm_alloc_security = jail_shm_alloc_security,
+ .shm_free_security = jail_shm_free_security,
+ .shm_associate = jail_shm_associate,
+ .shm_shmctl = jail_shm_shmctl,
+ .shm_shmat = jail_shm_shmat,
+
+ .msg_queue_alloc_security = jail_msg_queue_alloc,
+ .msg_queue_free_security = jail_msg_queue_free,
+ .msg_queue_associate = jail_msg_queue_associate,
+ .msg_queue_msgctl = jail_msg_queue_msgctl,
+ .msg_queue_msgsnd = jail_msg_queue_msgsnd,
+ .msg_queue_msgrcv = jail_msg_queue_msgrcv,
+
+ .sem_alloc_security = jail_sem_alloc_security,
+ .sem_free_security = jail_sem_free_security,
+ .sem_associate = jail_sem_associate,
+ .sem_semctl = jail_sem_semctl,
+ .sem_semop = jail_sem_semop,
+};
+
+static int __init bsdjail_init (void)
+{
+ int rc = 0;
+
+ if (register_security (&bsdjail_security_ops)) {
+ printk (KERN_INFO
+ "Failure registering BSD Jail module with the kernel\n");
+
+ rc = mod_reg_security(MY_NAME, &bsdjail_security_ops);
+ if (rc < 0) {
+ printk (KERN_INFO "Failure registering BSD Jail "
+ " module with primary security module.\n");
+ return -EINVAL;
+ }
+ secondary = 1;
+ }
+ printk (KERN_INFO "BSD Jail module initialized.\n");
+
+ return 0;
+}
+
+static void __exit bsdjail_exit (void)
+{
+ if (secondary) {
+ if (mod_unreg_security (MY_NAME, &bsdjail_security_ops))
+ printk (KERN_INFO "Failure unregistering BSD Jail "
+ " module with primary module.\n");
+ } else {
+ if (unregister_security (&bsdjail_security_ops)) {
+ printk (KERN_INFO "Failure unregistering BSD Jail "
+ "module with the kernel\n");
+ }
+ }
+
+ printk (KERN_INFO "BSD Jail module removed\n");
+}
+
+security_initcall (bsdjail_init);
+module_exit (bsdjail_exit);
+
+MODULE_DESCRIPTION("BSD Jail LSM.");
+MODULE_LICENSE("GPL");
diff -Nrup linux-2.6.9-rc3-bk6/security/Kconfig linux-2.6.9-rc3-bk6-jail/security/Kconfig
--- linux-2.6.9-rc3-bk6/security/Kconfig 2004-10-06 10:08:02.000000000 -0500
+++ linux-2.6.9-rc3-bk6-jail/security/Kconfig 2004-10-06 10:52:13.000000000 -0500
@@ -46,5 +46,16 @@ config SECURITY_ROOTPLUG

source security/selinux/Kconfig

+config SECURITY_BSDJAIL
+ tristate "BSD Jail LSM"
+ depends on SECURITY
+ select SECURITY_NETWORK
+ help
+ Provides BSD Jail compartmentalization functionality.
+ See Documentation/bsdjail.txt for more information and
+ usage instructions.
+
+ If you are unsure how to answer this question, answer N.
+
endmenu

diff -Nrup linux-2.6.9-rc3-bk6/security/Makefile linux-2.6.9-rc3-bk6-jail/security/Makefile
--- linux-2.6.9-rc3-bk6/security/Makefile 2004-08-14 00:37:26.000000000 -0500
+++ linux-2.6.9-rc3-bk6-jail/security/Makefile 2004-10-06 10:52:13.000000000 -0500
@@ -15,3 +15,4 @@ obj-$(CONFIG_SECURITY) += security.o d
obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o
obj-$(CONFIG_SECURITY_CAPABILITIES) += commoncap.o capability.o
obj-$(CONFIG_SECURITY_ROOTPLUG) += commoncap.o root_plug.o
+obj-$(CONFIG_SECURITY_BSDJAIL) += bsdjail.o

2004-10-07 18:46:31

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch 1/3] lsm: add bsdjail module

Chris Wright <[email protected]> wrote:
>
> * Andrew Morton ([email protected]) wrote:
> > James Morris <[email protected]> wrote:
> > > On Thu, 7 Oct 2004, Serge E. Hallyn wrote:
> > >
> > > > Because it gives Linux a functionality like FreeBSD's jail and Solaris'
> > > > zones in an unobtrusive manner, without impacting users who don't wish
> > > > to use it (except for the extra security_task_lookup function calls).
> > >
> > > Yes, as an LSM module, it can be configured out. I think it's a good use
> > > of the LSM framework, and may be useful for people migrating to Linux from
> > > legacy Solaris and FreeBSD.
> >
> > Sure, but that's a bit speculative for adding a feature to the mainline
> > kernel.
>
> Which feature are you concerned over, the additional hook or the
> new module?

I am concerned about the presence of new code - simple as that.

We need to be able to demonstrate that the new code is sufficiently useful
to a sufficiently large number of people as to warrant the cost of
maintaining it in the tree for the rest of eternity.

> The module is a no-op for anybody who doesn't want it.

It still needs to be maintained.

> I can't vouch for the number of users of this module although I've seen
> some positive feedback from users. One nice bit is that it goes a way
> towards helping vserver which does have quite a few users.

Tell us more.

> This module
> really demonstrates one of the points of LSM...to support multiple
> security models.

Sure. But that doesn't mean that those modules have to live at kernel.org
rather than, say, at bsdjail.sourceforge.net.

2004-10-07 19:51:06

by Chris Wright

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

* Serge E. Hallyn ([email protected]) wrote:
> Attached is a new version of the bsdjail patch with the requested code
> cleanups applied.

I noticed Andrew picked this up in -mm3, but that he had to do some diff
cleanups (see the thread/rlim changes in his tree). If you'd like Andrew
to pick this up, it would be courteous to get the diff clean and
building against his tree.

> --- linux-2.6.9-rc3-bk6/security/bsdjail.c 1969-12-31 18:00:00.000000000 -0600
> +++ linux-2.6.9-rc3-bk6-jail/security/bsdjail.c 2004-10-07 11:30:21.000000000 -0500
> @@ -0,0 +1,1495 @@
> +/*
> + * File: linux/security/bsdjail.c
> + * Author: Serge Hallyn ([email protected])
> + * Date: Sep 12, 2004
> + *
> + * (See Documentation/bsdjail.txt for more information)
> + *
> + * Copyright (C) 2004 International Business Machines <[email protected]>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + */
> +
> +#include <linux/config.h>
> +#include <linux/module.h>
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/security.h>
> +#include <linux/namei.h>
> +#include <linux/namespace.h>
> +#include <linux/proc_fs.h>
> +#include <linux/in.h>
> +#include <linux/in6.h>
> +#include <linux/pagemap.h>
> +#include <linux/ip.h>
> +#include <net/ipv6.h>
> +#include <linux/mount.h>
> +#include <asm/uaccess.h>
> +#include <linux/netdevice.h>
> +#include <linux/inetdevice.h>
> +#include <linux/seq_file.h>
> +#include <linux/un.h>
> +#include <linux/smp_lock.h>
> +#include <linux/kref.h>

asm/ includes after linux/

> +
> +static int jail_debug = 0;

unecessary assignment to 0.

> +MODULE_PARM(jail_debug, "i");

use module_param

> +MODULE_PARM_DESC(jail_debug, "Print bsd jail debugging messages.\n");
> +
> +#define DBG 0
> +#define WARN 1
> +#define bsdj_debug(how, fmt, arg... ) \
> + do { \
> + if ( how || jail_debug ) \
> + printk(KERN_NOTICE "%s: %s: " fmt, \
> + MY_NAME, __FUNCTION__, \

Andrew has cleanup here (__FUNCTION__ ,). I just use __func__, anyway.

> + ## arg ); \
> + } while ( 0 )
> +
> +#define MY_NAME "bsdjail"
> +
> +/* flag to keep track of how we were registered */
> +static int secondary = 0;

unecessary assignment to 0

> +/*
> + * The task structure holding jail information.
> + * Taskp->security points to one of these (or is null).
> + * There is exactly one jail_struct for each jail. If >1 process
> + * are in the same jail, they share the same jail_struct.
> + */
> +struct jail_struct {
> + struct kref kref;
> +
> + /* these are set on writes to /proc/<pid>/attr/exec */
> + char *root_pathname; /* char * containing path to use as jail / */
> + char *ip4_addr_name; /* char * containing ip4 addr to use for jail */
> + char *ip6_addr_name; /* char * containing ip6 addr to use for jail */
> +
> + /* these are set when a jail becomes active */
> + __u32 addr4; /* internal form of ip4_addr_name */
> + struct in6_addr addr6; /* internal form of ip6_addr_name */
> +
> + struct dentry *dentry; /* dentry of fs root */
> + struct vfsmount *mnt; /* vfsmnt of fs root */
> +
> + /* Resource limits. 0 = no limit */
> + int max_nrtask; /* maximum number of tasks within this jail. */
> + int cur_nrtask; /* current number of tasks within this jail. */
> + long maxtimeslice; /* max timeslice in ms for procs in this jail */
> + long nice; /* nice level for processes in this jail */
> + long max_data, max_memlock; /* equivalent to RLIMIT_{DATA, MEMLOCK} */
> +/* values for the jail_flags field */
> +#define IN_USE 1 /* if 0, task is setting up jail, not yet in it */
> +#define GOT_IPV4 2
> +#define GOT_IPV6 4 /* if 0, ipv4, else ipv6 */
> + char jail_flags;
> +};

Could go into header. Perhaps not needed if it's all there is, and it's
not shared anywhere though.

> +/*
> + * disable_jail: A jail which was in use, but has no references
> + * left, is disabled - we free up the mountpoint and dentry, and
> + * give up our reference on the module.
> + *
> + * don't need to put namespace, it will be done automatically
> + * when the last process in jail is put.
> + * DO need to put the dentry and vfsmount
> + */
> +static void
> +disable_jail(struct jail_struct *tsec)
> +{
> + dput(tsec->dentry);
> + mntput(tsec->mnt);
> + module_put(THIS_MODULE);
> +}
> +
> +
> +static void free_jail(struct jail_struct *tsec)
> +{
> + if (!tsec)
> + return;
> +
> + kfree(tsec->root_pathname);
> + kfree(tsec->ip4_addr_name);
> + kfree(tsec->ip6_addr_name);
> + kfree(tsec);
> +}
> +
> +/* release_jail:
> + * Callback for kref_put to use for releasing a jail when its
> + * last user exits.
> + */
> +static void release_jail(struct kref *kref)
> +{
> + struct jail_struct *tsec;
> +
> + tsec = container_of(kref, struct jail_struct, kref);
> + disable_jail(tsec);
> + free_jail(tsec);
> +}
> +
> +/*
> + * jail_task_free_security: this is the callback hooked into LSM.
> + * If there was no task->security field for bsdjail, do nothing.
> + * If there was, but it was never put into use, free the jail.
> + * If there was, and the jail is in use, then decrement the usage
> + * count, and disable and free the jail if the usage count hits 0.
> + */
> +static void jail_task_free_security(struct task_struct *task)
> +{
> + struct jail_struct *tsec;
> +
> + tsec = task->security;
> +
> + if (!tsec)
> + return;
> +
> + if (!(tsec->jail_flags & IN_USE)) {
> + /*
> + * someone did 'echo -n x > /proc/<pid>/attr/exec' but
> + * then forked before execing. Nuke the old info.
> + */
> + free_jail(tsec);
> + task->security = NULL;
> + return;
> + }
> + tsec->cur_nrtask--;
> + /* If this was the last process in the jail, delete the jail */
> + kref_put(&tsec->kref, release_jail);
> +}
> +
> +static struct jail_struct *
> +alloc_task_security(struct task_struct *tsk)
> +{
> + struct jail_struct *tsec;
> + tsec = kmalloc(sizeof(struct jail_struct), GFP_KERNEL);
> + if (!tsec)
> + return ERR_PTR(-ENOMEM);

Just return NULL, that's expected norm, plus you're not using the error
anyway.

> + memset(tsec, 0, sizeof(struct jail_struct));
> + tsk->security = tsec;
> + return tsec;
> +}
> +
> +static inline int
> +in_jail(struct task_struct *t)
> +{
> + struct jail_struct *tsec = t->security;
> +
> + if (tsec && (tsec->jail_flags & IN_USE))
> + return 1;
> +
> + return 0;
> +}
> +
> +/*
> + * If a network address was passed into /proc/<pid>/attr/exec,
> + * then process in its jail will only be allowed to bind/listen
> + * to that address.
> + */
> +static void
> +setup_netaddress(struct jail_struct *tsec)
> +{
> + unsigned int a, b, c, d, i;
> + unsigned int x[8];
> +
> + tsec->jail_flags &= ~(GOT_IPV4 | GOT_IPV6);
> + tsec->addr4 = 0;
> + ipv6_addr_set(&tsec->addr6, 0, 0, 0, 0);
> +
> + if (tsec->ip4_addr_name) {
> + if (sscanf(tsec->ip4_addr_name, "%u.%u.%u.%u",
> + &a, &b, &c, &d) != 4)
> + return;
> + if (a>255 || b>255 || c>255 || d>255)
> + return;
> + tsec->addr4 = htonl((a<<24) | (b<<16) | (c<<8) | d);
> + tsec->jail_flags |= GOT_IPV4;
> + bsdj_debug(DBG, "Network (ipv4) set up (%s)\n",
> + tsec->ip4_addr_name);
> + }
> +
> + if (tsec->ip6_addr_name) {
> + if (sscanf(tsec->ip6_addr_name, "%x:%x:%x:%x:%x:%x:%x:%x",
> + &x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6],
> + &x[7]) != 8) {
> + printk(KERN_INFO "%s: bad ipv6 addr %s\n", __FUNCTION__,
> + tsec->ip6_addr_name);
> + return;
> + }
> + for (i=0; i<8; i++) {
> + if (x[i] > 65535) {
> + printk("%s: %x > 65535 at %d\n", __FUNCTION__, x[i], i);
> + return;
> + }
> + tsec->addr6.in6_u.u6_addr16[i] = htons(x[i]);
> + }
> + tsec->jail_flags |= GOT_IPV6;
> + bsdj_debug(DBG, "Network (ipv6) set up (%s)\n",
> + tsec->ip6_addr_name);
> + }
> +}
> +
> +/*
> + * enable_jail:
> + * Called when a process is placed into a new jail to handle the
> + * actual creation of the jail.
> + * Creates namespace
> + * Sets process root+pwd
> + * Stores the requested ip address
> + * Registers a unique pseudo-proc filesystem for this jail
> + */
> +static int enable_jail(struct task_struct *tsk)
> +{
> + struct nameidata nd;
> + struct jail_struct *tsec = tsk->security;;
^^
generates compile error, kill the extra semi-colon

> + int retval = -EFAULT;
> +
> + if (!tsec || !tsec->root_pathname)
> + goto out;
> +
> + /*
> + * USE_JAIL_NAMESPACE: could be useful, so that future mounts outside
> + * the jail don't affect the jail. But it's not necessary, and
> + * requires exporting copy_namespace from fs/namespace.c
> + *
> + * Actually, it would also be useful for truly hiding
> + * information about mounts which do not exist in this jail.
> +#define USE_JAIL_NAMESPACE
> + */
> +#ifdef USE_JAIL_NAMESPACE
> + bsdj_debug(DBG, "bsdjail: copying namespace.\n");
> + retval = -EPERM;
> + if (copy_namespace(CLONE_NEWNS, tsk))
> + goto out;
> + bsdj_debug(DBG, "bsdjail: copied namespace.\n");
> +#endif
> +
> + /* find our new root directory */
> + bsdj_debug(DBG, "bsdjail: looking up %s\n", tsec->root_pathname);
> + retval = path_lookup(tsec->root_pathname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd);
> + if (retval)
> + goto out;
> +
> + bsdj_debug(DBG, "bsdjail: got %s, setting root to it\n", tsec->root_pathname);
> +
> + /* and set the fsroot to it */
> + set_fs_root(tsk->fs, nd.mnt, nd.dentry);
> + set_fs_pwd(tsk->fs, nd.mnt, nd.dentry);
> +
> + bsdj_debug(DBG, "bsdjail: root has been set. Have fun.\n");
> +
> + /* set up networking */
> + if (tsec->ip4_addr_name || tsec->ip6_addr_name)
> + setup_netaddress(tsec);
> +
> + tsec->cur_nrtask = 1;
> + if (tsec->nice)
> + set_user_nice(current, tsec->nice);
> + if (tsec->max_data) {
> + current->rlim[RLIMIT_DATA].rlim_cur = tsec->max_data;
> + current->rlim[RLIMIT_DATA].rlim_max = tsec->max_data;
> + }
> + if (tsec->max_memlock) {
> + current->rlim[RLIMIT_MEMLOCK].rlim_cur = tsec->max_memlock;
> + current->rlim[RLIMIT_MEMLOCK].rlim_max = tsec->max_memlock;
> + }
> + if (tsec->maxtimeslice) {
> + current->rlim[RLIMIT_CPU].rlim_cur = tsec->maxtimeslice;
> + current->rlim[RLIMIT_CPU].rlim_max = tsec->maxtimeslice;
> + }
> + /* success and end */
> + tsec->mnt = mntget(nd.mnt);
> + tsec->dentry = dget(nd.dentry);
> + path_release(&nd);
> + kref_init(&tsec->kref);
> + tsec->jail_flags |= IN_USE;
> +
> + /* won't let ourselves be removed until this jail goes away */
> + try_module_get(THIS_MODULE);
> +
> + return 0;
> +
> +out:
> + return retval;
> +}
> +
> +/*
> + * LSM /proc/<pid>/attr hooks.
> + * You may write into /proc/<pid>/attr/exec:
> + * root /some/path
> + * ip 2.2.2.2
> + * These values will be used on the next exec() to set up your jail
> + * (assuming you're not already in a jail)
> + */
> +static int
> +jail_setprocattr(struct task_struct *p, char *name, void *value, size_t size)
> +{
> + struct jail_struct *tsec = current->security;
> + long val;
> + int start, len;
> +
> + if (tsec && (tsec->jail_flags & IN_USE))
> + return -EINVAL; /* let them guess why */
> +
> + if (p != current || strcmp(name, "exec"))
> + return -EPERM;
> +
> + if (strncmp(value, "root ", 5) == 0) {
> + if (!tsec)
> + tsec = alloc_task_security(current);
> + if (IS_ERR(tsec))
> + return -ENOMEM;

I think encoding error, testing error, then returning hardcoded error is
wasteful. I'd change alloc_task_security api to return NULL on ENOMEM.

> +
> + if (tsec->root_pathname)
> + kfree(tsec->root_pathname);
> + start = 5;
> + len = size-start;
> + tsec->root_pathname = kmalloc(len+1, GFP_KERNEL);
> + if (!tsec->root_pathname)
> + return -ENOMEM;
> + strlcpy(tsec->root_pathname, value+start, len+1);
> + } else if (strncmp(value, "ip ", 3) == 0) {
> + if (!tsec)
> + tsec = alloc_task_security(current);
> + if (IS_ERR(tsec))
> + return -ENOMEM;
> +
> + if (tsec->ip4_addr_name)
> + kfree(tsec->ip4_addr_name);
> + start = 3;
> + len = size-start;
> + tsec->ip4_addr_name = kmalloc(len+1, GFP_KERNEL);
> + if (!tsec->ip4_addr_name)
> + return -ENOMEM;
> + strlcpy(tsec->ip4_addr_name, value+start, len+1);
> + } else if (strncmp(value, "ip6 ", 4) == 0) {
> + if (!tsec)
> + tsec = alloc_task_security(current);
> + if (IS_ERR(tsec))
> + return -ENOMEM;
> +
> + if (tsec->ip6_addr_name)
> + kfree(tsec->ip6_addr_name);
> + start = 4;
> + len = size-start;
> + tsec->ip6_addr_name = kmalloc(len+1, GFP_KERNEL);
> + if (!tsec->ip6_addr_name)
> + return -ENOMEM;
> + strlcpy(tsec->ip6_addr_name, value+start, len+1);
> +
> + /* the next two are equivalent */
> + } else if (strncmp(value, "slice ", 6) == 0) {
> + if (!tsec)
> + tsec = alloc_task_security(current);
> + if (IS_ERR(tsec))
> + return -ENOMEM;
> +
> + val = simple_strtoul(value+6, NULL, 0);
> + tsec->maxtimeslice = val;
> + } else if (strncmp(value, "timeslice ", 10) == 0) {
> + if (!tsec)
> + tsec = alloc_task_security(current);
> + if (IS_ERR(tsec))
> + return -ENOMEM;
> +
> + val = simple_strtoul(value+10, NULL, 0);
> + tsec->maxtimeslice = val;
> + } else if (strncmp(value, "nrtask ", 7) == 0) {
> + if (!tsec)
> + tsec = alloc_task_security(current);
> + if (IS_ERR(tsec))
> + return -ENOMEM;
> +
> + val = (int) simple_strtol(value+7, NULL, 0);
> + if (val < 1)
> + return -EINVAL;
> + tsec->max_nrtask = val;
> + } else if (strncmp(value, "memlock ", 8) == 0) {
> + if (!tsec)
> + tsec = alloc_task_security(current);
> + if (IS_ERR(tsec))
> + return -ENOMEM;
> +
> + val = simple_strtoul(value+8, NULL, 0);
> + tsec->max_memlock = val;
> + } else if (strncmp(value, "data ", 5) == 0) {
> + if (!tsec)
> + tsec = alloc_task_security(current);
> + if (IS_ERR(tsec))
> + return -ENOMEM;
> +
> + val = simple_strtoul(value+5, NULL, 0);
> + tsec->max_data = val;
> + } else if (strncmp(value, "nice ", 5) == 0) {
> + if (!tsec)
> + tsec = alloc_task_security(current);
> + if (IS_ERR(tsec))
> + return -ENOMEM;
> +
> + val = simple_strtoul(value+5, NULL, 0);
> + tsec->nice = val;
> + } else
> + return -EINVAL;

Do you need all those alloc_task_security's in there? Why not just one
at the top? And are you convinced there's no leak on the other kmalloc
failures?

more after lunch.
-chris
--
Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net

2004-10-07 20:08:31

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

Chris Wright <[email protected]> wrote:
>
> * Serge E. Hallyn ([email protected]) wrote:
> > Attached is a new version of the bsdjail patch with the requested code
> > cleanups applied.
>
> I noticed Andrew picked this up in -mm3, but that he had to do some diff
> cleanups (see the thread/rlim changes in his tree). If you'd like Andrew
> to pick this up, it would be courteous to get the diff clean and
> building against his tree.

Nah, that's OK. I can drop the old patch and pick up the new.

It's only when code is settling down into a final state that I get upset
about wholesale replacements. Even then I'll just feed it through
interdiff.

> Andrew has cleanup here (__FUNCTION__ ,). I just use __func__, anyway.

That's a workaround for the gcc-2.95 pasting bug.

__FUNCTION__ is preferred, actually. Just for consistency, and so the
compiler will spit it out if someone tries to do compile-time string
concatenation with it.

2004-10-07 21:21:19

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [patch 1/3] lsm: add bsdjail module

> * Andrew Morton ([email protected]) wrote:
> > Chris Wright <[email protected]> wrote:
> > > * Andrew Morton ([email protected]) wrote:
> > > Which feature are you concerned over, the additional hook or the
> > > new module?
> >
> > I am concerned about the presence of new code - simple as that.
>
> Understood.

We do have time allotted for maintenance of LSMs, so not only am I
interested in maintaining bsdjail on my own, but I don't even have to do
it in my free time :)

> > We need to be able to demonstrate that the new code is sufficiently useful
> > to a sufficiently large number of people as to warrant the cost of
> > maintaining it in the tree for the rest of eternity.
>
> That's fine. Serge, can you enlighten us with an idea of the users of
> this code?

I am "just a developer", and don't have ready access to any marketers.
There was no customer demand which we were addressing. We just saw it
as a very useful feature easy to implement. Some people have privately
expressed interested in the patch over the last few months as I've been
sending out patches. And as Chris has mentioned, the vserver community
appears to be thriving, and should be partially (though by no means
fully!) served by this module. If nothing else it should reduce the
size of the patch they need to maintain.

I wish I had a better answer...

-serge

2004-10-08 18:21:44

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

The attached patch is against -mm3, and includes the suggestions Chris
last sent out.

> I think encoding error, testing error, then returning hardcoded error is
> wasteful. I'd change alloc_task_security api to return NULL on ENOMEM.
...
> Do you need all those alloc_task_security's in there? Why not just one
> at the top?

Good points - cleaned these up.

> And are you convinced there's no leak on the other kmalloc
> failures?

Yes, they each get freed if this function is called again on the same
item, and they get freed when the task closes. Unless I'm missing
something...

thanks,
-serge

Changelog:
Sep 10, 2004: original version
Sep 12, 2004: add ipv6 support
Sep 13, 2004: support simultaneous ipv4+ipv6
Oct 6, 2004: move kref release function to kref_put from kref_init
Oct 7, 2004: requested code cleanups (mainly nix #defines)
Oct 8, 2004: more cleanups.

Signed-Off-By: Serge E. Hallyn <[email protected]>

diff -Nrup linux-2.6.9-rc3-mm3/security/bsdjail.c linux-2.6.9-rc3-mm3-jail/security/bsdjail.c
--- linux-2.6.9-rc3-mm3/security/bsdjail.c 2004-10-08 13:56:38.851128096 -0500
+++ linux-2.6.9-rc3-mm3-jail/security/bsdjail.c 2004-10-08 12:59:41.000000000 -0500
@@ -27,16 +27,16 @@
#include <linux/ip.h>
#include <net/ipv6.h>
#include <linux/mount.h>
-#include <asm/uaccess.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/seq_file.h>
#include <linux/un.h>
#include <linux/smp_lock.h>
#include <linux/kref.h>
+#include <asm/uaccess.h>

-static int jail_debug = 0;
-MODULE_PARM(jail_debug, "i");
+static int jail_debug;
+module_param(jail_debug, int, 0);
MODULE_PARM_DESC(jail_debug, "Print bsd jail debugging messages.\n");

#define DBG 0
@@ -52,7 +52,7 @@ MODULE_PARM_DESC(jail_debug, "Print bsd
#define MY_NAME "bsdjail"

/* flag to keep track of how we were registered */
-static int secondary = 0;
+static int secondary;

/*
* The task structure holding jail information.
@@ -80,7 +80,7 @@ struct jail_struct {
int cur_nrtask; /* current number of tasks within this jail. */
long maxtimeslice; /* max timeslice in ms for procs in this jail */
long nice; /* nice level for processes in this jail */
- long max_data, max_memlock; /* equivalent to RLIMIT_{DATA,MEMLOCK} */
+ long max_data, max_memlock; /* equivalent to RLIMIT_{DATA, MEMLOCK} */
/* values for the jail_flags field */
#define IN_USE 1 /* if 0, task is setting up jail, not yet in it */
#define GOT_IPV4 2
@@ -88,29 +88,6 @@ struct jail_struct {
char jail_flags;
};

-#define in_use(x) (x->jail_flags & IN_USE)
-#define set_in_use(x) (x->jail_flags |= IN_USE)
-
-#define got_network(x) (x->jail_flags & (GOT_IPV4 | GOT_IPV6))
-#define got_ipv4(x) (x->jail_flags & (GOT_IPV4))
-#define got_ipv6(x) (x->jail_flags & (GOT_IPV6))
-#define set_ipv4(x) (x->jail_flags |= GOT_IPV4)
-#define set_ipv6(x) (x->jail_flags |= GOT_IPV6)
-#define unset_got_ipv4(x) (x->jail_flags &= ~GOT_IPV4)
-#define unset_got_ipv6(x) (x->jail_flags &= ~GOT_IPV6)
-
-/*
- * structs, defines, and functions to cope with stacking
- */
-
-#define get_task_security(task) (task->security)
-#define get_inode_security(inode) (inode->i_security)
-#define get_sock_security(sock) (sock->sk_security)
-#define get_file_security(file) (file->f_security)
-#define get_ipc_security(ipc) (ipc->security)
-
-#define jail_of(proc) (get_task_security(proc))
-
/*
* disable_jail: A jail which was in use, but has no references
* left, is disabled - we free up the mountpoint and dentry, and
@@ -134,12 +111,9 @@ static void free_jail(struct jail_struct
if (!tsec)
return;

- if (tsec->root_pathname)
- kfree(tsec->root_pathname);
- if (tsec->ip4_addr_name)
- kfree(tsec->ip4_addr_name);
- if (tsec->ip6_addr_name)
- kfree(tsec->ip6_addr_name);
+ kfree(tsec->root_pathname);
+ kfree(tsec->ip4_addr_name);
+ kfree(tsec->ip6_addr_name);
kfree(tsec);
}

@@ -151,17 +125,11 @@ static void release_jail(struct kref *kr
{
struct jail_struct *tsec;

- tsec = container_of(kref,struct jail_struct,kref);
+ tsec = container_of(kref, struct jail_struct, kref);
disable_jail(tsec);
free_jail(tsec);
}

-#define set_task_security(task,data) task->security = data
-#define set_inode_security(inode,data) inode->i_security = data
-#define set_sock_security(sock,data) sock->sk_security = data
-#define set_file_security(file,data) file->f_security = data
-#define set_ipc_security(ipc,data) ipc.security = data
-
/*
* jail_task_free_security: this is the callback hooked into LSM.
* If there was no task->security field for bsdjail, do nothing.
@@ -171,20 +139,18 @@ static void release_jail(struct kref *kr
*/
static void jail_task_free_security(struct task_struct *task)
{
- struct jail_struct *tsec;
-
- tsec = get_task_security(task);
+ struct jail_struct *tsec = task->security;

if (!tsec)
return;

- if (!in_use(tsec)) {
+ if (!(tsec->jail_flags & IN_USE)) {
/*
* someone did 'echo -n x > /proc/<pid>/attr/exec' but
* then forked before execing. Nuke the old info.
*/
free_jail(tsec);
- set_task_security(task,NULL);
+ task->security = NULL;
return;
}
tsec->cur_nrtask--;
@@ -196,20 +162,21 @@ static struct jail_struct *
alloc_task_security(struct task_struct *tsk)
{
struct jail_struct *tsec;
+
tsec = kmalloc(sizeof(struct jail_struct), GFP_KERNEL);
- if (!tsec)
- return ERR_PTR(-ENOMEM);
- memset(tsec, 0, sizeof(struct jail_struct));
- set_task_security(tsk, tsec);
+ if (tsec) {
+ memset(tsec, 0, sizeof(struct jail_struct));
+ tsk->security = tsec;
+ }
return tsec;
}

static inline int
in_jail(struct task_struct *t)
{
- struct jail_struct *tsec = jail_of(t);
+ struct jail_struct *tsec = t->security;

- if (tsec && in_use(tsec))
+ if (tsec && (tsec->jail_flags & IN_USE))
return 1;

return 0;
@@ -223,27 +190,27 @@ in_jail(struct task_struct *t)
static void
setup_netaddress(struct jail_struct *tsec)
{
- unsigned int a,b,c,d, i;
+ unsigned int a, b, c, d, i;
unsigned int x[8];

- unset_got_ipv4(tsec);
+ tsec->jail_flags &= ~(GOT_IPV4 | GOT_IPV6);
tsec->addr4 = 0;
- unset_got_ipv6(tsec);
ipv6_addr_set(&tsec->addr6, 0, 0, 0, 0);

if (tsec->ip4_addr_name) {
- if (sscanf(tsec->ip4_addr_name,"%u.%u.%u.%u",&a,&b,&c,&d)!=4)
+ if (sscanf(tsec->ip4_addr_name, "%u.%u.%u.%u",
+ &a, &b, &c, &d) != 4)
return;
if (a>255 || b>255 || c>255 || d>255)
return;
- tsec->addr4 = htonl((a<<24)|(b<<16)|(c<<8)|d);
- set_ipv4(tsec);
+ tsec->addr4 = htonl((a<<24) | (b<<16) | (c<<8) | d);
+ tsec->jail_flags |= GOT_IPV4;
bsdj_debug(DBG, "Network (ipv4) set up (%s)\n",
tsec->ip4_addr_name);
}

if (tsec->ip6_addr_name) {
- if (sscanf(tsec->ip6_addr_name,"%x:%x:%x:%x:%x:%x:%x:%x",
+ if (sscanf(tsec->ip6_addr_name, "%x:%x:%x:%x:%x:%x:%x:%x",
&x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6],
&x[7]) != 8) {
printk(KERN_INFO "%s: bad ipv6 addr %s\n", __FUNCTION__,
@@ -257,7 +224,7 @@ setup_netaddress(struct jail_struct *tse
}
tsec->addr6.in6_u.u6_addr16[i] = htons(x[i]);
}
- set_ipv6(tsec);
+ tsec->jail_flags |= GOT_IPV6;
bsdj_debug(DBG, "Network (ipv6) set up (%s)\n",
tsec->ip6_addr_name);
}
@@ -275,10 +242,9 @@ setup_netaddress(struct jail_struct *tse
static int enable_jail(struct task_struct *tsk)
{
struct nameidata nd;
- struct jail_struct *tsec;
+ struct jail_struct *tsec = tsk->security;
int retval = -EFAULT;

- tsec = jail_of(tsk);
if (!tsec || !tsec->root_pathname)
goto out;

@@ -339,7 +305,7 @@ static int enable_jail(struct task_struc
tsec->dentry = dget(nd.dentry);
path_release(&nd);
kref_init(&tsec->kref);
- set_in_use(tsec);
+ tsec->jail_flags |= IN_USE;

/* won't let ourselves be removed until this jail goes away */
try_module_get(THIS_MODULE);
@@ -361,108 +327,66 @@ out:
static int
jail_setprocattr(struct task_struct *p, char *name, void *value, size_t size)
{
- struct jail_struct *tsec = jail_of(current);
+ struct jail_struct *tsec = current->security;
long val;
int start, len;

- if (tsec && in_use(tsec))
+ if (tsec && (tsec->jail_flags & IN_USE))
return -EINVAL; /* let them guess why */

if (p != current || strcmp(name, "exec"))
return -EPERM;

- if (strncmp(value, "root ", 5)==0) {
+ if (!tsec) {
+ tsec = alloc_task_security(current);
if (!tsec)
- tsec = alloc_task_security(current);
- if (IS_ERR(tsec))
return -ENOMEM;
+ }

- if (tsec->root_pathname)
- kfree(tsec->root_pathname);
+ if (strncmp(value, "root ", 5) == 0) {
+ kfree(tsec->root_pathname);
start = 5;
- len = size-start;
- tsec->root_pathname = kmalloc(len+1, GFP_KERNEL);
+ len = size - start + 1;
+ tsec->root_pathname = kmalloc(len, GFP_KERNEL);
if (!tsec->root_pathname)
return -ENOMEM;
- strlcpy(tsec->root_pathname, value+start, len+1);
- } else if (strncmp(value, "ip ", 3)==0) {
- if (!tsec)
- tsec = alloc_task_security(current);
- if (IS_ERR(tsec))
- return -ENOMEM;
-
- if (tsec->ip4_addr_name)
- kfree(tsec->ip4_addr_name);
+ strlcpy(tsec->root_pathname, value+start, len);
+ } else if (strncmp(value, "ip ", 3) == 0) {
+ kfree(tsec->ip4_addr_name);
start = 3;
- len = size-start;
- tsec->ip4_addr_name = kmalloc(len+1, GFP_KERNEL);
+ len = size - start + 1;
+ tsec->ip4_addr_name = kmalloc(len, GFP_KERNEL);
if (!tsec->ip4_addr_name)
return -ENOMEM;
- strlcpy(tsec->ip4_addr_name, value+start, len+1);
+ strlcpy(tsec->ip4_addr_name, value+start, len);
} else if (strncmp(value, "ip6 ", 4) == 0) {
- if (!tsec)
- tsec = alloc_task_security(current);
- if (IS_ERR(tsec))
- return -ENOMEM;
-
- if (tsec->ip6_addr_name)
- kfree(tsec->ip6_addr_name);
+ kfree(tsec->ip6_addr_name);
start = 4;
- len = size-start;
- tsec->ip6_addr_name = kmalloc(len+1, GFP_KERNEL);
+ len = size - start + 1;
+ tsec->ip6_addr_name = kmalloc(len, GFP_KERNEL);
if (!tsec->ip6_addr_name)
return -ENOMEM;
- strlcpy(tsec->ip6_addr_name, value+start, len+1);
+ strlcpy(tsec->ip6_addr_name, value+start, len);

/* the next two are equivalent */
- } else if (strncmp(value, "slice ", 6)==0) {
- if (!tsec)
- tsec = alloc_task_security(current);
- if (IS_ERR(tsec))
- return -ENOMEM;
-
+ } else if (strncmp(value, "slice ", 6) == 0) {
val = simple_strtoul(value+6, NULL, 0);
tsec->maxtimeslice = val;
- } else if (strncmp(value, "timeslice ", 10)==0) {
- if (!tsec)
- tsec = alloc_task_security(current);
- if (IS_ERR(tsec))
- return -ENOMEM;
-
+ } else if (strncmp(value, "timeslice ", 10) == 0) {
val = simple_strtoul(value+10, NULL, 0);
tsec->maxtimeslice = val;
- } else if (strncmp(value, "nrtask ", 7)==0) {
- if (!tsec)
- tsec = alloc_task_security(current);
- if (IS_ERR(tsec))
- return -ENOMEM;
-
+ } else if (strncmp(value, "nrtask ", 7) == 0) {
val = (int) simple_strtol(value+7, NULL, 0);
if (val < 1)
return -EINVAL;
tsec->max_nrtask = val;
- } else if (strncmp(value, "memlock ", 8)==0) {
- if (!tsec)
- tsec = alloc_task_security(current);
- if (IS_ERR(tsec))
- return -ENOMEM;
-
+ } else if (strncmp(value, "memlock ", 8) == 0) {
val = simple_strtoul(value+8, NULL, 0);
tsec->max_memlock = val;
- } else if (strncmp(value, "data ", 5)==0) {
- if (!tsec)
- tsec = alloc_task_security(current);
- if (IS_ERR(tsec))
- return -ENOMEM;
-
+ } else if (strncmp(value, "data ", 5) == 0) {
val = simple_strtoul(value+5, NULL, 0);
tsec->max_data = val;
- } else if (strncmp(value, "nice ", 5)==0) {
- if (!tsec)
- tsec = alloc_task_security(current);
- if (IS_ERR(tsec))
- return -ENOMEM;
-
+ } else if (strncmp(value, "nice ", 5) == 0) {
val = simple_strtoul(value+5, NULL, 0);
tsec->nice = val;
} else
@@ -510,9 +434,9 @@ jail_getprocattr(struct task_struct *p,
int err = 0;

if (in_jail(current)) {
- if (strcmp(name, "current")==0) {
+ if (strcmp(name, "current") == 0) {
/* provide network info */
- err = print_jail_net_info(jail_of(current), value,
+ err = print_jail_net_info(current->security, value,
size);
return err;
}
@@ -537,8 +461,8 @@ jail_getprocattr(struct task_struct *p,
if (strcmp(name, "current"))
return -EPERM;

- tsec = jail_of(p);
- if (!tsec || !in_use(tsec)) {
+ tsec = p->security;
+ if (!tsec || !(tsec->jail_flags & IN_USE)) {
err = snprintf(value, size, "Not Jailed\n");
} else {
err = snprintf(value, size,
@@ -570,16 +494,12 @@ jail_file_send_sigiotask(struct task_str
int fd, int reason)
{
struct file *file;
- struct jail_struct *tsec, *fsec;

if (!in_jail(current))
return 0;

- file = (struct file *)((long)fown - offsetof(struct file,f_owner));
- tsec = jail_of(tsk);
- fsec = get_file_security(file);
-
- if (fsec != tsec)
+ file = (struct file *) ((long)fown - offsetof(struct file, f_owner));
+ if (file->f_security != tsk->security)
return -EPERM;

return 0;
@@ -590,8 +510,8 @@ jail_file_set_fowner(struct file *file)
{
struct jail_struct *tsec;

- tsec = jail_of(current);
- set_file_security(file, tsec);
+ tsec = current->security;
+ file->f_security = tsec;
if (tsec)
kref_get(&tsec->kref);

@@ -602,33 +522,33 @@ static void free_ipc_security(struct ker
{
struct jail_struct *tsec;

- tsec = get_ipc_security(ipc);
+ tsec = ipc->security;
if (!tsec)
return;
kref_put(&tsec->kref, release_jail);
- set_ipc_security((*ipc), NULL);
+ ipc->security = NULL;
}

static void free_file_security(struct file *file)
{
struct jail_struct *tsec;

- tsec = get_file_security(file);
+ tsec = file->f_security;
if (!tsec)
return;
kref_put(&tsec->kref, release_jail);
- set_file_security(file, NULL);
+ file->f_security = NULL;
}

static void free_inode_security(struct inode *inode)
{
struct jail_struct *tsec;

- tsec = get_inode_security(inode);
+ tsec = inode->i_security;
if (!tsec)
return;
kref_put(&tsec->kref, release_jail);
- set_inode_security(inode, NULL);
+ inode->i_security = NULL;
}

/*
@@ -638,10 +558,10 @@ static void free_inode_security(struct i
static int
jail_ptrace (struct task_struct *tracer, struct task_struct *tracee)
{
- struct jail_struct *tsec = jail_of(tracer);
+ struct jail_struct *tsec = tracer->security;

- if (tsec && in_use(tsec)) {
- if (tsec == jail_of(tracee))
+ if (tsec && (tsec->jail_flags & IN_USE)) {
+ if (tsec == tracee->security)
return 0;
return -EPERM;
}
@@ -664,10 +584,10 @@ static inline int jail_inet4_bind(struct
struct sockaddr_in *inaddr;
__u32 sin_addr, jailaddr;

- if (!got_ipv4(tsec))
+ if (!(tsec->jail_flags & GOT_IPV4))
return -EPERM;

- inaddr = (struct sockaddr_in *)address;
+ inaddr = (struct sockaddr_in *) address;
sin_addr = inaddr->sin_addr.s_addr;
jailaddr = tsec->addr4;

@@ -692,17 +612,17 @@ jail_inet6_bind(struct socket *sock, str
struct sockaddr_in6 *inaddr6;
struct in6_addr *sin6_addr, *jailaddr;

- if (!got_ipv6(tsec))
+ if (!(tsec->jail_flags & GOT_IPV6))
return -EPERM;

- inaddr6 = (struct sockaddr_in6 *)address;
+ inaddr6 = (struct sockaddr_in6 *) address;
sin6_addr = &inaddr6->sin6_addr;
jailaddr = &tsec->addr6;

- if (ipv6_addr_cmp(jailaddr, sin6_addr)==0)
+ if (ipv6_addr_cmp(jailaddr, sin6_addr) == 0)
return 0;

- if (ipv6_addr_cmp(sin6_addr, &in6addr_loopback)==0) {
+ if (ipv6_addr_cmp(sin6_addr, &in6addr_loopback) == 0) {
ipv6_addr_copy(sin6_addr, jailaddr);
return 0;
}
@@ -720,15 +640,15 @@ jail_inet6_bind(struct socket *sock, str
static int
jail_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen)
{
- struct jail_struct *tsec = jail_of(current);
+ struct jail_struct *tsec = current->security;

- if (!tsec || !in_use(tsec))
+ if (!tsec || !(tsec->jail_flags & IN_USE))
return 0;

if (sock->sk->sk_family == AF_UNIX)
return jail_socket_unix_bind(sock, address, addrlen);

- if (!got_network(tsec))
+ if (!(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6)))
/* If we want to be strict, we could just
* deny net access when lacking a pseudo ip.
* For now we just allow it. */
@@ -752,18 +672,19 @@ jail_socket_bind(struct socket *sock, st
static int
jail_socket_create(int family, int type, int protocol, int kern)
{
- struct jail_struct *tsec = jail_of(current);
+ struct jail_struct *tsec = current->security;

- if (!tsec || !in_use(tsec) || kern || !got_network(tsec))
+ if (!tsec || kern || !(tsec->jail_flags & IN_USE) ||
+ !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6)))
return 0;

switch(family) {
case AF_INET:
- if (got_ipv4(tsec))
+ if (tsec->jail_flags & GOT_IPV4)
return 0;
return -EPERM;
case AF_INET6:
- if (got_ipv6(tsec))
+ if (tsec->jail_flags & GOT_IPV6)
return 0;
return -EPERM;
default:
@@ -779,9 +700,10 @@ jail_socket_post_create(struct socket *s
{
struct inet_opt *inet;
struct ipv6_pinfo *inet6;
- struct jail_struct *tsec = jail_of(current);
+ struct jail_struct *tsec = current->security;

- if (!tsec || !in_use(tsec) || kern || !got_network(tsec))
+ if (!tsec || kern || !(tsec->jail_flags & IN_USE) ||
+ !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6)))
return;

switch(family) {
@@ -805,9 +727,10 @@ jail_socket_listen(struct socket *sock,
{
struct inet_opt *inet;
struct ipv6_pinfo *inet6;
- struct jail_struct *tsec = jail_of(current);
+ struct jail_struct *tsec = current->security;

- if (!tsec || !in_use(tsec) || !got_network(tsec))
+ if (!tsec || !(tsec->jail_flags & IN_USE) ||
+ !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6)))
return 0;

switch (sock->sk->sk_family) {
@@ -819,7 +742,7 @@ jail_socket_listen(struct socket *sock,

case AF_INET6:
inet6 = inet6_sk(sock->sk);
- if (ipv6_addr_cmp(&inet6->saddr, &tsec->addr6)==0)
+ if (ipv6_addr_cmp(&inet6->saddr, &tsec->addr6) == 0)
return 0;
return -EPERM;

@@ -833,11 +756,11 @@ static void free_sock_security(struct so
{
struct jail_struct *tsec;

- tsec = get_sock_security(sk);
+ tsec = sk->sk_security;
if (!tsec)
return;
kref_put(&tsec->kref, release_jail);
- set_sock_security(sk, NULL);
+ sk->sk_security = NULL;
}

/*
@@ -854,12 +777,12 @@ jail_socket_unix_bind(struct socket *soc
if (sock->sk->sk_family != AF_UNIX)
return 0;

- sunaddr = (struct sockaddr_un *)address;
+ sunaddr = (struct sockaddr_un *) address;
if (sunaddr->sun_path[0] != 0)
return 0;

- tsec = jail_of(current);
- set_sock_security(sock->sk, tsec);
+ tsec = current->security;
+ sock->sk->sk_security = tsec;
if (tsec)
kref_get(&tsec->kref);
return 0;
@@ -874,8 +797,8 @@ jail_socket_unix_may_send(struct socket
{
struct jail_struct *tsec, *ssec;

- tsec = jail_of(current); /* jail of sending process */
- ssec = get_sock_security(other->sk); /* jail of receiver */
+ tsec = current->security; /* jail of sending process */
+ ssec = other->sk->sk_security; /* jail of receiver */

if (tsec != ssec)
return -EPERM;
@@ -889,8 +812,8 @@ jail_socket_unix_stream_connect(struct s
{
struct jail_struct *tsec, *ssec;

- tsec = jail_of(current); /* jail of sending process */
- ssec = get_sock_security(other->sk); /* jail of receiver */
+ tsec = current->security; /* jail of sending process */
+ ssec = other->sk->sk_security; /* jail of receiver */

if (tsec != ssec)
return -EPERM;
@@ -953,9 +876,9 @@ jail_capable (struct task_struct *tsk, i
static inline int
jail_security_task_create (unsigned long clone_flags)
{
- struct jail_struct *tsec = jail_of(current);
+ struct jail_struct *tsec = current->security;

- if (!tsec || !in_use(tsec))
+ if (!tsec || !(tsec->jail_flags & IN_USE))
return 0;

if (tsec->max_nrtask && tsec->cur_nrtask >= tsec->max_nrtask)
@@ -969,12 +892,12 @@ jail_security_task_create (unsigned long
static int
jail_task_alloc_security(struct task_struct *tsk)
{
- struct jail_struct *tsec = jail_of(current);
+ struct jail_struct *tsec = current->security;

- if (!tsec || !in_use(tsec))
+ if (!tsec || !(tsec->jail_flags & IN_USE))
return 0;

- set_task_security(tsk, tsec);
+ tsk->security = tsec;
kref_get(&tsec->kref);
tsec->cur_nrtask++;
if (tsec->maxtimeslice) {
@@ -998,14 +921,13 @@ jail_task_alloc_security(struct task_str
static int
jail_bprm_alloc_security(struct linux_binprm *bprm)
{
- struct jail_struct *tsec;
+ struct jail_struct *tsec = current->security;
int ret;

- tsec = jail_of(current);
if (!tsec)
return 0;

- if (in_use(tsec))
+ if (tsec->jail_flags & IN_USE)
return 0;

if (tsec->root_pathname) {
@@ -1073,23 +995,23 @@ static int
jail_proc_inode_permission(struct inode *inode, int mask,
struct nameidata *nd)
{
- struct jail_struct *tsec = jail_of(current);
+ struct jail_struct *tsec = current->security;
struct dentry *dentry = nd->dentry;
unsigned pid;

pid = name_to_int(dentry);
if (pid == ~0U) {
struct qstr *dname = &dentry->d_name;
- if (strcmp(dname->name, "scsi")==0 ||
- strcmp(dname->name, "sys")==0 ||
- strcmp(dname->name, "ide")==0)
+ if (strcmp(dname->name, "scsi") == 0 ||
+ strcmp(dname->name, "sys") == 0 ||
+ strcmp(dname->name, "ide") == 0)
return -EPERM;
return 0;
}

if (dentry->d_parent != dentry->d_sb->s_root)
return 0;
- if (get_inode_security(inode) != tsec)
+ if (inode->i_security != tsec)
return -ENOENT;

return 0;
@@ -1132,11 +1054,11 @@ is_jailroot_parent(struct dentry *candid
*/
static int jail_task_lookup(struct task_struct *p)
{
- struct jail_struct *tsec = jail_of(current);
+ struct jail_struct *tsec = current->security;

if (!tsec)
return 0;
- if (tsec == jail_of(p))
+ if (tsec == p->security)
return 0;
return -EPERM;
}
@@ -1146,14 +1068,14 @@ static int jail_task_lookup(struct task_
*/
static void jail_task_to_inode(struct task_struct *p, struct inode *inode)
{
- struct jail_struct *tsec = jail_of(p);
+ struct jail_struct *tsec = p->security;

- if (!tsec || !in_use(tsec))
+ if (!tsec || !(tsec->jail_flags & IN_USE))
return;
- if (get_inode_security(inode))
+ if (inode->i_security)
return;
kref_get(&tsec->kref);
- set_inode_security(inode, tsec);
+ inode->i_security = tsec;
}

/*
@@ -1167,16 +1089,16 @@ static int
jail_inode_permission(struct inode *inode, int mask,
struct nameidata *nd)
{
- struct jail_struct *tsec = jail_of(current);
+ struct jail_struct *tsec = current->security;

- if (!tsec || !in_use(tsec))
+ if (!tsec || !(tsec->jail_flags & IN_USE))
return 0;

if (!nd)
return 0;

if (nd->dentry &&
- strcmp(nd->dentry->d_sb->s_type->name, "proc")==0) {
+ strcmp(nd->dentry->d_sb->s_type->name, "proc") == 0) {
return jail_proc_inode_permission(inode, mask, nd);

}
@@ -1187,10 +1109,10 @@ jail_inode_permission(struct inode *inod
return 0;

if (is_jailroot_parent(nd->dentry, tsec->dentry, tsec->mnt)) {
- bsdj_debug(WARN,"Attempt to chdir(..) out of jail!\n"
- "(%s is a subdir of %s)\n",
- tsec->dentry->d_name.name,
- nd->dentry->d_name.name);
+ bsdj_debug(WARN, "Attempt to chdir(..) out of jail!\n"
+ "(%s is a subdir of %s)\n",
+ tsec->dentry->d_name.name,
+ nd->dentry->d_name.name);
return -EPERM;
}

@@ -1204,18 +1126,18 @@ jail_inode_permission(struct inode *inod
static inline int
generic_procpid_check(struct dentry *dentry)
{
- struct jail_struct *jail = jail_of(current);
+ struct jail_struct *jail = current->security;
unsigned pid = name_to_int(dentry);

- if (!jail || !in_use(jail))
+ if (!jail || !(jail->jail_flags & IN_USE))
return 0;
if (pid == ~0U)
return 0;
- if (strcmp(dentry->d_sb->s_type->name, "proc")!=0)
+ if (strcmp(dentry->d_sb->s_type->name, "proc") != 0)
return 0;
if (dentry->d_parent != dentry->d_sb->s_root)
return 0;
- if (get_inode_security(dentry->d_inode) != jail)
+ if (dentry->d_inode->i_security != jail)
return -ENOENT;
return 0;
}
@@ -1241,12 +1163,12 @@ jail_inode_getxattr(struct dentry *dentr
static int
jail_task_kill(struct task_struct *p, struct siginfo *info, int sig)
{
- struct jail_struct *tsec = jail_of(current);
+ struct jail_struct *tsec = current->security;

- if (!tsec || !in_use(tsec))
+ if (!tsec || !(tsec->jail_flags & IN_USE))
return 0;

- if (tsec == jail_of(p))
+ if (tsec == p->security)
return 0;

if (sig==SIGCHLD)
@@ -1283,12 +1205,12 @@ static int jail_task_setscheduler (struc
static inline int
basic_ipc_security_check(struct kern_ipc_perm *p, struct task_struct *target)
{
- struct jail_struct *tsec = jail_of(target);
+ struct jail_struct *tsec = target->security;

- if (!tsec || !in_use(tsec))
+ if (!tsec || !(tsec->jail_flags & IN_USE))
return 0;

- if (get_ipc_security(p) != tsec)
+ if (p->security != tsec)
return -EPERM;

return 0;
@@ -1303,11 +1225,11 @@ jail_ipc_permission(struct kern_ipc_perm
static int
jail_shm_alloc_security (struct shmid_kernel *shp)
{
- struct jail_struct *tsec = jail_of(current);
+ struct jail_struct *tsec = current->security;

- if (!tsec || !in_use(tsec))
+ if (!tsec || !(tsec->jail_flags & IN_USE))
return 0;
- set_ipc_security(shp->shm_perm, tsec);
+ shp->shm_perm.security = tsec;
kref_get(&tsec->kref);
return 0;
}
@@ -1342,11 +1264,11 @@ jail_shm_shmat(struct shmid_kernel *shp,
static int
jail_msg_queue_alloc(struct msg_queue *msq)
{
- struct jail_struct *tsec = jail_of(current);
+ struct jail_struct *tsec = current->security;

- if (!tsec || !in_use(tsec))
+ if (!tsec || !(tsec->jail_flags & IN_USE))
return 0;
- set_ipc_security(msq->q_perm, tsec);
+ msq->q_perm.security = tsec;
kref_get(&tsec->kref);
return 0;
}
@@ -1388,11 +1310,11 @@ jail_msg_queue_msgrcv(struct msg_queue *
static int
jail_sem_alloc_security(struct sem_array *sma)
{
- struct jail_struct *tsec = jail_of(current);
+ struct jail_struct *tsec = current->security;

- if (!tsec || !in_use(tsec))
+ if (!tsec || !(tsec->jail_flags & IN_USE))
return 0;
- set_ipc_security(sma->sem_perm, tsec);
+ sma->sem_perm.security = tsec;
kref_get(&tsec->kref);
return 0;
}

2004-10-10 06:24:21

by Herbert Poetzl

[permalink] [raw]
Subject: Re: [patch 1/3] lsm: add bsdjail module

On Thu, Oct 07, 2004 at 11:52:40AM -0700, Chris Wright wrote:
> * Andrew Morton ([email protected]) wrote:
> > Chris Wright <[email protected]> wrote:
> > > * Andrew Morton ([email protected]) wrote:
> > > Which feature are you concerned over, the additional hook or the
> > > new module?
> >
> > I am concerned about the presence of new code - simple as that.
>
> Understood.
>
> > We need to be able to demonstrate that the new code is sufficiently useful
> > to a sufficiently large number of people as to warrant the cost of
> > maintaining it in the tree for the rest of eternity.
>
> That's fine. Serge, can you enlighten us with an idea of the users of
> this code?
>
> > > The module is a no-op for anybody who doesn't want it.
> >
> > It still needs to be maintained.
>
> Absolutely.
>
> > > I can't vouch for the number of users of this module although I've seen
> > > some positive feedback from users. One nice bit is that it goes a way
> > > towards helping vserver which does have quite a few users.
> >
> > Tell us more.
>
> One portion of the vserver project (that which has to do with security
> and isolation) could be largely covered by this work. And vserver
> is an active project with many users AFAICT. The vserver maintainer
> has expressed some interest in this as well. The other portion of the
> project, which does the resource limiting has a decent chance of working
> well with something like CKRM or similar.

well, as 'the vserver project' probably means the
'linux-vserver project', I would like to point out
why and where the bsdjail LSM, in it's current form
is flawed from the linux-vserver point of view ...

Serge, don't get me wrong, this is neither against
you nor against the bsdjail LSM, which I consider
an interesting approach, and I'm still confident
that we find some way of cooperation ...

(copied the jail struct here to comment it)

| struct jail_struct {
| struct kref kref;
|
| /* these are set on writes to /proc/<pid>/attr/exec */
| char *root_pathname; /* char * containing path to use as jail / */

linux-vserver uses namespaces to create the vservers,
only the legacy method uses a simple chroot() to
setup the vserver environment ...

| char *ip4_addr_name; /* char * containing ip4 addr to use for jail */
| char *ip6_addr_name; /* char * containing ip6 addr to use for jail */

linux-vserver is slowly moving from chbind (which
restricts a process and it's children to a set of
IPs to an iptable (marking) based approach, which
is much more flexible

| /* these are set when a jail becomes active */
| __u32 addr4; /* internal form of ip4_addr_name */
| struct in6_addr addr6; /* internal form of ip6_addr_name */

up to 16 addresses are currently allowed in this set
in the future the limit will go away (network code is
actually the oldest piece) by using 'markings'
(network is virtualized to allow binding to 0.0.0.0)

| struct dentry *dentry; /* dentry of fs root */
| struct vfsmount *mnt; /* vfsmnt of fs root */

| /* Resource limits. 0 = no limit */
| int max_nrtask; /* maximum number of tasks within this jail. */
| int cur_nrtask; /* current number of tasks within this jail. */

linux-vserver already has a nice and usable resource
management system for most resources, supporting
much more limits than those ...

| long maxtimeslice; /* max timeslice in ms for procs in this jail */
| long nice; /* nice level for processes in this jail */
| long max_data, max_memlock; /* equivalent to RLIMIT_{DATA,MEMLOCK} */

the resource limitations should not be part of a
security module, and the scheduler slice would be
a step in the wrong redirection, as linux-vserver
already uses token buckets to control the scheduler

| char jail_flags;
|};

also many distributions (and distribution hosting
is _the_ main application area for linux-vserver)
require the 'jail' to be as similar as possible
to a real host (like a separate init process, or
the ability to renice services) so some of the
'features' of that LSM are contra productive here
not to mention that linux-vserver's security is
mainly based on linux capabilities which are not
handled by this LSM at all ...

aside from that, without the notion of a security
context, which can be controlled and entered from
outside (the host) the 'jail' can not be used for
typical hosting purposes

> > > This module
> > > really demonstrates one of the points of LSM...to support multiple
> > > security models.
> >
> > Sure. But that doesn't mean that those modules have to live at kernel.org
> > rather than, say, at bsdjail.sourceforge.net.
>
> I agree, some userbase does wonders to justify mainlining the code.

I'm pretty confident the fact that a big company
seems interested in this LSM will help with the
integration into mainline, but I can not say that
'linux-vserver' users or developers will have any
immediate benefit from it's inclusion ... why?

- the linux-vserver user would have to apply an
additional patch anyway (and install special
tools to control the vservers)

- the LSM does not provide what linux-vserver
requires and would need heavy modification
(missing context, namespaces, network, most
virtualization, resource isolation)

- once CKRM will be able to replace the resource
management currently present in linux-vserver,
it (CKRM) will collide with the resource stuff
done in this LSM

so while I'm fine with the idea to move a part of
linux-vserver to the LSM framework (once the LSM
stackering issues are resolved), this part would
not be usable without a decent part of kernel
modifications to do the virtualization and the
resource isolation

of course if there _is_ interest to include
linux-vserver like features into mainline, then
there should be some commitment to do it properly
and this includes not limiting it to a security
module, where it requires much more to be useful
to anyone ...

finally I have no problem to maintain the 'vserver'
patches outside the kernel tree, as they are
probably only of limited interest for the typical
linux desktop user ...

best,
Herbert
(linux-vserver maintainer)

PS: for details see the linux-vserver paper at
http://www.linux-vserver.org/index.php?page=Linux-VServer-Paper

> thanks,
> -chris
> --
> Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

2004-10-10 10:41:29

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

Your filesystem handling code is completely superflous (and buggy). Please
remove all the code dealing with chroot-lookalikes. In your userland script
you simpl have to clone(.., CLONE_NEWNS) to detach your namespace from your
parent, then you can lazly unmount all filesystems and setup your new namespace
before starting the jail. The added advantage is that you don't need any
cludges to keep the user from exiting the chroot.

> +#include <linux/ip.h>
> +#include <net/ipv6.h>
> +#include <linux/mount.h>
> +#include <asm/uaccess.h>

Please always include <asm/*.h> headers after <linux/*.h>

> +#include <linux/smp_lock.h>

I don't see you using the BKL anywhere.

>
>
>
>
> +#include <linux/kref.h>

Why that many blank lines?

> +static int jail_debug = 0;

no need to initialize to 0

> +MODULE_PARM(jail_debug, "i");

please user module_param

> +static int secondary = 0;

again no need to itnialize.

> + char *ip4_addr_name; /* char * containing ip4 addr to use for jail */
> + char *ip6_addr_name; /* char * containing ip6 addr to use for jail */

How do you habdle non-ip networking? This really needs to be handled
more generally.

> + /* won't let ourselves be removed until this jail goes away */
> + try_module_get(THIS_MODULE);

must be __module_get

> +/*
> + * LSM /proc/<pid>/attr hooks.
> + * You may write into /proc/<pid>/attr/exec:
> + * root /some/path
> + * ip 2.2.2.2
> + * These values will be used on the next exec() to set up your jail
> + * (assuming you're not already in a jail)

That's a really awkward interface.

> +jail_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown,
> + int fd, int reason)
> +{
> + struct file *file;
> + struct jail_struct *tsec, *fsec;
> +
> + if (!in_jail(current))
> + return 0;
> +
> + file = (struct file *) ((long)fown - offsetof(struct file, f_owner));

bah. Please use container_of or better get lsm folks to just pass you
a struct file *

> +jail_proc_inode_permission(struct inode *inode, int mask,
> + struct nameidata *nd)
> +{
> + struct jail_struct *tsec = current->security;
> + struct dentry *dentry = nd->dentry;
> + unsigned pid;
> +
> + pid = name_to_int(dentry);
> + if (pid == ~0U) {
> + struct qstr *dname = &dentry->d_name;
> + if (strcmp(dname->name, "scsi") == 0 ||
> + strcmp(dname->name, "sys") == 0 ||
> + strcmp(dname->name, "ide") == 0)
> + return -EPERM;
> + return 0;

oh, please. Don't submit such a crap.

if you want to disable sysctl access do it on the sysctl, not procfs level.
And disabling access to /proc/ide and /proc/scsi as two very special cases
(what about /proc/md, /proc/cciss or /proc/cpqarray?) is totally bullocks,
if they allow hardware interaction without checking for capabailities
fix them in the driver code.

This half-aided security by obscurity crap _is_ going to bite later on.

2004-10-10 11:32:28

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

> Your filesystem handling code is completely superflous (and buggy). Please
> remove all the code dealing with chroot-lookalikes. In your userland script
> you simpl have to clone(.., CLONE_NEWNS) to detach your namespace from your
> parent, then you can lazly unmount all filesystems and setup your new namespace
> before starting the jail. The added advantage is that you don't need any
> cludges to keep the user from exiting the chroot.

I definately would prefer to use namespaces. I had originally wanted to
do a copy_namespace() in the module. That function is not exported,
though. Is doing that in user-space really the right way to do it?

thanks,
-serge

2004-10-10 11:34:20

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

On Sun, Oct 10, 2004 at 07:31:52AM -0400, Serge E. Hallyn wrote:
> > Your filesystem handling code is completely superflous (and buggy). Please
> > remove all the code dealing with chroot-lookalikes. In your userland script
> > you simpl have to clone(.., CLONE_NEWNS) to detach your namespace from your
> > parent, then you can lazly unmount all filesystems and setup your new namespace
> > before starting the jail. The added advantage is that you don't need any
> > cludges to keep the user from exiting the chroot.
>
> I definately would prefer to use namespaces. I had originally wanted to
> do a copy_namespace() in the module. That function is not exported,
> though. Is doing that in user-space really the right way to do it?

If something can be done in userspace nicely that's preferable over doing it in
kernelspace, yes.

2004-10-11 14:53:04

by Alan

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

On Sul, 2004-10-10 at 11:41, Christoph Hellwig wrote:
> Your filesystem handling code is completely superflous (and buggy). Please
> remove all the code dealing with chroot-lookalikes. In your userland script
> you simpl have to clone(.., CLONE_NEWNS) to detach your namespace from your
> parent, then you can lazly unmount all filesystems and setup your new namespace
> before starting the jail. The added advantage is that you don't need any
> cludges to keep the user from exiting the chroot.

AF_UNIX socket and fchdir().

That however requires a co-operator outside the chroot so doesn't seem
to be a problem. I like the CLONE approach, its a lot cleaner.

2004-10-12 06:55:11

by Herbert Poetzl

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

On Mon, Oct 11, 2004 at 02:47:29PM +0100, Alan Cox wrote:
> On Sul, 2004-10-10 at 11:41, Christoph Hellwig wrote:
> > Your filesystem handling code is completely superflous (and buggy). Please
> > remove all the code dealing with chroot-lookalikes. In your userland script
> > you simpl have to clone(.., CLONE_NEWNS) to detach your namespace from your
> > parent, then you can lazly unmount all filesystems and setup your new namespace
> > before starting the jail. The added advantage is that you don't need any
> > cludges to keep the user from exiting the chroot.
>
> AF_UNIX socket and fchdir().
>
> That however requires a co-operator outside the chroot so doesn't seem
> to be a problem. I like the CLONE approach, its a lot cleaner.

and it works well, because we use it for almost
a year now on linux-vserver ;)

best,
Herbert

> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

2004-10-12 09:03:44

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

On Tue, Oct 12, 2004 at 09:00:55AM +0200, Herbert Poetzl wrote:
> and it works well, because we use it for almost
> a year now on linux-vserver ;)

Btw, could anyone explain the exact differences between linux-vserver
and this jail module?

2004-10-12 12:21:45

by Herbert Poetzl

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

On Tue, Oct 12, 2004 at 10:00:57AM +0100, Christoph Hellwig wrote:
> On Tue, Oct 12, 2004 at 09:00:55AM +0200, Herbert Poetzl wrote:
> > and it works well, because we use it for almost
> > a year now on linux-vserver ;)
>
> Btw, could anyone explain the exact differences between linux-vserver
> and this jail module?

hmm, okay I'll try ...

linux-vserver is a combination of kernel patch and
userspace tools to create 'virtual servers' similar
to UML, but sharing the resources (and kernel).

to do this, it uses process isolation, network
isolation and disk space separation (tagging).
in addition it does resource management (accounting
and limits) for various aspects (CPU, memory,
processes, sockets, filehandles, ...)

the jail module is recreating a limited subset of
the isolation aspect via LSM (similar to the BSD
jail) which allows to confine a process (and it's
children) to a chroot() environment under certain
limitations (resources)

best,
Herbert

> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

2004-10-12 13:12:43

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

> That however requires a co-operator outside the chroot so doesn't seem
> to be a problem. I like the CLONE approach, its a lot cleaner.

The attached patch (against -rc4-mm1) moves the responsibility for
filesystem containment entirely to userspace. The Documentation/bsdjail.txt
file reflects the new usage. It also incorporates Christoph's cleanups.

I still need to see about generalizing the networking confinement. I
certainly like the concept (as I understand it at least) behind the new
vserver networking, but am not sure it can be done without patching.

-serge


diff -Nrup linux-2.6.9-rc4-mm1/Documentation/bsdjail.txt linux-2.6.9-rc4-mm1-jail/Documentation/bsdjail.txt
--- linux-2.6.9-rc4-mm1/Documentation/bsdjail.txt 1969-12-31 18:00:00.000000000 -0600
+++ linux-2.6.9-rc4-mm1-jail/Documentation/bsdjail.txt 2004-10-11 16:22:12.845891208 -0500
@@ -0,0 +1,135 @@
+BSD Jail Linux Security Module
+Serge E. Hallyn <[email protected]>
+
+Description:
+
+Used in conjunction with per-process namespaces, this implements
+a subset of the BSD Jail functionality as a Linux LSM. What is
+currently implemented:
+
+ If a proces is in a jail, it:
+
+ 1. Cannot mount or umount
+ 2. Cannot send signals outside of jail
+ 3. Cannot ptrace processes outside of jail
+ 4. Cannot create devices
+ 5. Cannot renice processes
+ 6. Cannot load or unload modules
+ 7. Cannot change network settings
+ 8. May be assigned a specific ip address which will be used
+ for all it's socket binds.
+ 9. Cannot see contents of /proc/<pid> entries of processes not in the
+ same jail. (We hide their existence for convenience's sake, but
+ their existance can still be detected using, for instance, statfs)
+ 10. Has no CAP_SYS_RAWIO capability (no ioperm/iopl)
+ 11. May not share IPC resources with processes outside its own jail.
+ 12. May find it's valid network address (if restricted) under
+ /proc/$$/attr/current.
+
+ If properly locked into its own namespace, processes will not be able
+ to escape to parts of the system's filesystem which were made
+ unavailable (without outside help).
+
+WARNINGS:
+The security of this module is very much dependent on the security
+of the rest of the system. You must carefully think through your
+use of the system.
+
+Some examples:
+ 1. If you leave /dev/hda1 in the jail, processes in the
+ jail can access that filesystem (i.e. /sbin/debugfs).
+ 2. If you provide root access within a jail, this can of
+ course be used to setuid binaries in the jail. Combined
+ with an unjailed regular user account, this gives jailed
+ users unjailed root access. (thanks to Brad Spender for
+ pointing this out).
+
+How to use:
+ 1. Load the bsdjail module if not already loaded or compiled in:
+
+ modprobe bsdjail
+
+ 3. (Optional) Set up an ipv4 alias for the jail
+
+ # /sbin/ifconfig eth0:0 192.168.1.101
+ # /sbin/route add -host 192.168.1.101 dev eth0:0
+
+ 3. Execute a shell under a new namespace:
+
+ exec clone_ns
+
+ (see http://www.win.tue.nl/~aeb/linux/lk/lk-6.html#6.3)
+
+ 4. If not already done, set up the filesystem for the jail. in our
+ example, we will set it up under /opt.
+
+ mount /dev/hdc5 /opt
+ mount -t proc proc /opt/proc
+
+ 5. Make sure there is an empty directory to put the old root in. We
+ will just use /opt/mnt
+
+ mkdir /opt/mnt
+
+ 6. Pivot the old and new roots:
+
+ cd /opt
+ /sbin/pivot_root . mnt
+ /usr/sbin/chroot . /bin/sh
+
+ 7. Unmount the old root
+
+ umount -l /mnt
+
+ 6. Give the desired arguments for the jail. If no arguments are
+ necessary, just say:
+
+ echo lock > /proc/$$/attr/exec
+
+ To lock the process into an ip alias, say:
+
+ echo "ip 192.168.1.101" > /proc/$$/attr/exec
+
+ 7. Execute a new shell. The shell will be under the new jail, and in
+ the private namespace you've been setting up.
+
+ exec /bin/sh
+
+ 8. To allow friends/customers/whoever to use this system, you might start
+ start some services.
+
+ sshd
+
+ 9. Ssh is now running under the jail, so you no longer need the original
+ shell:
+
+ exit
+
+The new shell runs in a private jail on the filesystem on /dev/hdc5. If proc
+has been mounted under /dev/hdc5, then a "ps -auxw" under the jailed shell
+will show only entries for processes started under that jail.
+
+If a private IP was specified for the jail, then
+ cat /proc/$$/attr/current
+will show the address for the private network device. Other network
+devices will be visible through /sbin/ifconfig -a, but not usable.
+
+If the reading process is not in a jail, then
+ cat /proc/$$/attr/current
+returns information about the root and ip * for the target process,
+or "Not Jailed" if the target process is not jailed.
+
+Cat /proc/$$/attr/exec gives a list of the valid keywords to cat into
+/proc/$$/attr/exec when starting a jail.
+
+Current valid keywords for creating a jail are:
+
+ lock: specifies the next exec should land us in a jail. (only needed
+ if you don't want to give any other keywords)
+ ip: IPV4 addr for this jail
+ ip6: IPV6 addr for this jail
+ nrtask: Number of tasks in this jail
+ nice: The nice level for this jail. (maybe should be min/max?)
+ slice: Max timeslice per process
+ data: Max size of DATA segment per process
+ memlock: Max size of memory which can be locked per process
diff -Nrup linux-2.6.9-rc4-mm1/fs/proc/base.c linux-2.6.9-rc4-mm1-jail/fs/proc/base.c
--- linux-2.6.9-rc4-mm1/fs/proc/base.c 2004-10-11 17:02:19.612007144 -0500
+++ linux-2.6.9-rc4-mm1-jail/fs/proc/base.c 2004-10-11 10:00:36.000000000 -0500
@@ -1706,6 +1706,8 @@ static int get_tgid_list(int index, unsi
int tgid = p->pid;
if (!pid_alive(p))
continue;
+ if (security_task_lookup(p))
+ continue;
if (--index >= 0)
continue;
tgids[nr_tgids] = tgid;
diff -Nrup linux-2.6.9-rc4-mm1/include/linux/security.h linux-2.6.9-rc4-mm1-jail/include/linux/security.h
--- linux-2.6.9-rc4-mm1/include/linux/security.h 2004-10-11 17:02:21.888661040 -0500
+++ linux-2.6.9-rc4-mm1-jail/include/linux/security.h 2004-10-11 10:00:36.000000000 -0500
@@ -630,6 +630,11 @@ struct swap_info_struct;
* Set the security attributes in @p->security for a kernel thread that
* is being reparented to the init task.
* @p contains the task_struct for the kernel thread.
+ * @task_lookup:
+ * Check permission to see the /proc/<pid> entry for process @p.
+ * @p contains the task_struct for task <pid> which is being looked
+ * up under /proc
+ * return 0 if permission is granted.
* @task_to_inode:
* Set the security attributes for an inode based on an associated task's
* security attributes, e.g. for /proc/pid inodes.
@@ -1162,6 +1167,7 @@ struct security_operations {
unsigned long arg3, unsigned long arg4,
unsigned long arg5);
void (*task_reparent_to_init) (struct task_struct * p);
+ int (*task_lookup)(struct task_struct *p);
void (*task_to_inode)(struct task_struct *p, struct inode *inode);

int (*ipc_permission) (struct kern_ipc_perm * ipcp, short flag);
@@ -1767,6 +1773,11 @@ static inline void security_task_reparen
security_ops->task_reparent_to_init (p);
}

+static inline int security_task_lookup(struct task_struct *p)
+{
+ return security_ops->task_lookup(p);
+}
+
static inline void security_task_to_inode(struct task_struct *p, struct inode *inode)
{
security_ops->task_to_inode(p, inode);
@@ -2407,6 +2418,11 @@ static inline void security_task_reparen
cap_task_reparent_to_init (p);
}

+static inline int security_task_lookup(struct task_struct *p)
+{
+ return 0;
+}
+
static inline void security_task_to_inode(struct task_struct *p, struct inode *inode)
{ }

diff -Nrup linux-2.6.9-rc4-mm1/security/bsdjail.c linux-2.6.9-rc4-mm1-jail/security/bsdjail.c
--- linux-2.6.9-rc4-mm1/security/bsdjail.c 1969-12-31 18:00:00.000000000 -0600
+++ linux-2.6.9-rc4-mm1-jail/security/bsdjail.c 2004-10-11 16:55:33.967674456 -0500
@@ -0,0 +1,1365 @@
+/*
+ * File: linux/security/bsdjail.c
+ * Author: Serge Hallyn ([email protected])
+ * Date: Sep 12, 2004
+ *
+ * (See Documentation/bsdjail.txt for more information)
+ *
+ * Copyright (C) 2004 International Business Machines <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/security.h>
+#include <linux/namei.h>
+#include <linux/namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/pagemap.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
+#include <linux/mount.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/seq_file.h>
+#include <linux/un.h>
+#include <linux/smp_lock.h>
+#include <linux/kref.h>
+#include <asm/uaccess.h>
+
+static int jail_debug;
+module_param(jail_debug, int, 0);
+MODULE_PARM_DESC(jail_debug, "Print bsd jail debugging messages.\n");
+
+#define DBG 0
+#define WARN 1
+#define bsdj_debug(how, fmt, arg... ) \
+ do { \
+ if ( how || jail_debug ) \
+ printk(KERN_NOTICE "%s: %s: " fmt, \
+ MY_NAME, __FUNCTION__ , \
+ ## arg ); \
+ } while ( 0 )
+
+#define MY_NAME "bsdjail"
+
+/* flag to keep track of how we were registered */
+static int secondary;
+
+/*
+ * The task structure holding jail information.
+ * Taskp->security points to one of these (or is null).
+ * There is exactly one jail_struct for each jail. If >1 process
+ * are in the same jail, they share the same jail_struct.
+ */
+struct jail_struct {
+ struct kref kref;
+
+ /* these are set on writes to /proc/<pid>/attr/exec */
+ char *ip4_addr_name; /* char * containing ip4 addr to use for jail */
+ char *ip6_addr_name; /* char * containing ip6 addr to use for jail */
+
+ /* these are set when a jail becomes active */
+ __u32 addr4; /* internal form of ip4_addr_name */
+ struct in6_addr addr6; /* internal form of ip6_addr_name */
+
+ /* Resource limits. 0 = no limit */
+ int max_nrtask; /* maximum number of tasks within this jail. */
+ int cur_nrtask; /* current number of tasks within this jail. */
+ long maxtimeslice; /* max timeslice in ms for procs in this jail */
+ long nice; /* nice level for processes in this jail */
+ long max_data, max_memlock; /* equivalent to RLIMIT_{DATA, MEMLOCK} */
+/* values for the jail_flags field */
+#define IN_USE 1 /* if 0, task is setting up jail, not yet in it */
+#define GOT_IPV4 2
+#define GOT_IPV6 4 /* if 0, ipv4, else ipv6 */
+ char jail_flags;
+};
+
+/*
+ * disable_jail: A jail which was in use, but has no references
+ * left, is disabled - we free up the mountpoint and dentry, and
+ * give up our reference on the module.
+ *
+ * don't need to put namespace, it will be done automatically
+ * when the last process in jail is put.
+ * DO need to put the dentry and vfsmount
+ */
+static void
+disable_jail(struct jail_struct *tsec)
+{
+ module_put(THIS_MODULE);
+}
+
+
+static void free_jail(struct jail_struct *tsec)
+{
+ if (!tsec)
+ return;
+
+ kfree(tsec->ip4_addr_name);
+ kfree(tsec->ip6_addr_name);
+ kfree(tsec);
+}
+
+/* release_jail:
+ * Callback for kref_put to use for releasing a jail when its
+ * last user exits.
+ */
+static void release_jail(struct kref *kref)
+{
+ struct jail_struct *tsec;
+
+ tsec = container_of(kref, struct jail_struct, kref);
+ disable_jail(tsec);
+ free_jail(tsec);
+}
+
+/*
+ * jail_task_free_security: this is the callback hooked into LSM.
+ * If there was no task->security field for bsdjail, do nothing.
+ * If there was, but it was never put into use, free the jail.
+ * If there was, and the jail is in use, then decrement the usage
+ * count, and disable and free the jail if the usage count hits 0.
+ */
+static void jail_task_free_security(struct task_struct *task)
+{
+ struct jail_struct *tsec = task->security;
+
+ if (!tsec)
+ return;
+
+ if (!(tsec->jail_flags & IN_USE)) {
+ /*
+ * someone did 'echo -n x > /proc/<pid>/attr/exec' but
+ * then forked before execing. Nuke the old info.
+ */
+ free_jail(tsec);
+ task->security = NULL;
+ return;
+ }
+ tsec->cur_nrtask--;
+ /* If this was the last process in the jail, delete the jail */
+ kref_put(&tsec->kref, release_jail);
+}
+
+static struct jail_struct *
+alloc_task_security(struct task_struct *tsk)
+{
+ struct jail_struct *tsec;
+
+ tsec = kmalloc(sizeof(struct jail_struct), GFP_KERNEL);
+ if (tsec) {
+ memset(tsec, 0, sizeof(struct jail_struct));
+ tsk->security = tsec;
+ }
+ return tsec;
+}
+
+static inline int
+in_jail(struct task_struct *t)
+{
+ struct jail_struct *tsec = t->security;
+
+ if (tsec && (tsec->jail_flags & IN_USE))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * If a network address was passed into /proc/<pid>/attr/exec,
+ * then process in its jail will only be allowed to bind/listen
+ * to that address.
+ */
+static void
+setup_netaddress(struct jail_struct *tsec)
+{
+ unsigned int a, b, c, d, i;
+ unsigned int x[8];
+
+ tsec->jail_flags &= ~(GOT_IPV4 | GOT_IPV6);
+ tsec->addr4 = 0;
+ ipv6_addr_set(&tsec->addr6, 0, 0, 0, 0);
+
+ if (tsec->ip4_addr_name) {
+ if (sscanf(tsec->ip4_addr_name, "%u.%u.%u.%u",
+ &a, &b, &c, &d) != 4)
+ return;
+ if (a>255 || b>255 || c>255 || d>255)
+ return;
+ tsec->addr4 = htonl((a<<24) | (b<<16) | (c<<8) | d);
+ tsec->jail_flags |= GOT_IPV4;
+ bsdj_debug(DBG, "Network (ipv4) set up (%s)\n",
+ tsec->ip4_addr_name);
+ }
+
+ if (tsec->ip6_addr_name) {
+ if (sscanf(tsec->ip6_addr_name, "%x:%x:%x:%x:%x:%x:%x:%x",
+ &x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6],
+ &x[7]) != 8) {
+ printk(KERN_INFO "%s: bad ipv6 addr %s\n", __FUNCTION__,
+ tsec->ip6_addr_name);
+ return;
+ }
+ for (i=0; i<8; i++) {
+ if (x[i] > 65535) {
+ printk("%s: %x > 65535 at %d\n", __FUNCTION__, x[i], i);
+ return;
+ }
+ tsec->addr6.in6_u.u6_addr16[i] = htons(x[i]);
+ }
+ tsec->jail_flags |= GOT_IPV6;
+ bsdj_debug(DBG, "Network (ipv6) set up (%s)\n",
+ tsec->ip6_addr_name);
+ }
+}
+
+/*
+ * enable_jail:
+ * Called when a process is placed into a new jail to handle the
+ * actual creation of the jail.
+ * Creates namespace
+ * Stores the requested ip address
+ * Registers a unique pseudo-proc filesystem for this jail
+ */
+static int enable_jail(struct task_struct *tsk)
+{
+ struct jail_struct *tsec = tsk->security;
+ int retval = -EFAULT;
+
+ if (!tsec)
+ goto out;
+
+ /* set up networking */
+ if (tsec->ip4_addr_name || tsec->ip6_addr_name)
+ setup_netaddress(tsec);
+
+ tsec->cur_nrtask = 1;
+ if (tsec->nice)
+ set_user_nice(current, tsec->nice);
+ if (tsec->max_data) {
+ current->signal->rlim[RLIMIT_DATA].rlim_cur = tsec->max_data;
+ current->signal->rlim[RLIMIT_DATA].rlim_max = tsec->max_data;
+ }
+ if (tsec->max_memlock) {
+ current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur =
+ tsec->max_memlock;
+ current->signal->rlim[RLIMIT_MEMLOCK].rlim_max =
+ tsec->max_memlock;
+ }
+ if (tsec->maxtimeslice) {
+ current->signal->rlim[RLIMIT_CPU].rlim_cur = tsec->maxtimeslice;
+ current->signal->rlim[RLIMIT_CPU].rlim_max = tsec->maxtimeslice;
+ }
+ /* success and end */
+ kref_init(&tsec->kref);
+ tsec->jail_flags |= IN_USE;
+
+ /* won't let ourselves be removed until this jail goes away */
+ try_module_get(THIS_MODULE);
+
+ return 0;
+
+out:
+ return retval;
+}
+
+/*
+ * LSM /proc/<pid>/attr hooks.
+ * You may write into /proc/<pid>/attr/exec:
+ * lock (no value, just to specify a jail)
+ * ip 2.2.2.2
+ etc...
+ * These values will be used on the next exec() to set up your jail
+ * (assuming you're not already in a jail)
+ */
+static int
+jail_setprocattr(struct task_struct *p, char *name, void *value, size_t rsize)
+{
+ struct jail_struct *tsec = current->security;
+ long val;
+ char *v = value;
+ int start, len;
+ size_t size = rsize;
+
+ if (tsec && (tsec->jail_flags & IN_USE))
+ return -EINVAL; /* let them guess why */
+
+ if (p != current || strcmp(name, "exec"))
+ return -EPERM;
+
+ if (!tsec) {
+ tsec = alloc_task_security(current);
+ if (!tsec)
+ return -ENOMEM;
+ }
+
+ if (v[size-1] == '\n')
+ size--;
+
+ if (strncmp(value, "ip ", 3) == 0) {
+ kfree(tsec->ip4_addr_name);
+ start = 3;
+ len = size - start + 1;
+ tsec->ip4_addr_name = kmalloc(len, GFP_KERNEL);
+ if (!tsec->ip4_addr_name)
+ return -ENOMEM;
+ strlcpy(tsec->ip4_addr_name, value+start, len);
+ } else if (strncmp(value, "ip6 ", 4) == 0) {
+ kfree(tsec->ip6_addr_name);
+ start = 4;
+ len = size - start + 1;
+ tsec->ip6_addr_name = kmalloc(len, GFP_KERNEL);
+ if (!tsec->ip6_addr_name)
+ return -ENOMEM;
+ strlcpy(tsec->ip6_addr_name, value+start, len);
+
+ /* the next two are equivalent */
+ } else if (strncmp(value, "slice ", 6) == 0) {
+ val = simple_strtoul(value+6, NULL, 0);
+ tsec->maxtimeslice = val;
+ } else if (strncmp(value, "timeslice ", 10) == 0) {
+ val = simple_strtoul(value+10, NULL, 0);
+ tsec->maxtimeslice = val;
+ } else if (strncmp(value, "nrtask ", 7) == 0) {
+ val = (int) simple_strtol(value+7, NULL, 0);
+ if (val < 1)
+ return -EINVAL;
+ tsec->max_nrtask = val;
+ } else if (strncmp(value, "memlock ", 8) == 0) {
+ val = simple_strtoul(value+8, NULL, 0);
+ tsec->max_memlock = val;
+ } else if (strncmp(value, "data ", 5) == 0) {
+ val = simple_strtoul(value+5, NULL, 0);
+ tsec->max_data = val;
+ } else if (strncmp(value, "nice ", 5) == 0) {
+ val = simple_strtoul(value+5, NULL, 0);
+ tsec->nice = val;
+ } else if (strncmp(value, "lock", 4) != 0)
+ return -EINVAL;
+
+ return rsize;
+}
+
+static int print_jail_net_info(struct jail_struct *j, char *buf, int maxcnt)
+{
+ int len = 0;
+
+ if (j->ip4_addr_name)
+ len += snprintf(buf, maxcnt, "%s\n", j->ip4_addr_name);
+ if (j->ip6_addr_name)
+ len += snprintf(buf, maxcnt-len, "%s\n", j->ip6_addr_name);
+
+ return snprintf(buf, maxcnt, "No network information\n");
+}
+
+/*
+ * LSM /proc/<pid>/attr read hook.
+ *
+ * /proc/$$/attr/current output:
+ * If the reading process, say process 1001, is in a jail, then
+ * cat /proc/999/attr/current
+ * will print networking information.
+ * If the reading process, say process 1001, is not in a jail, then
+ * cat /proc/999/attr/current
+ * will return
+ * ip: (ip address of jail)
+ * if 999 is in a jail, or
+ * -EINVAL
+ * if 999 is not in a jail.
+ *
+ * /proc/$$/attr/exec output:
+ * A process in a jail gets -EINVAL for /proc/$$/attr/exec.
+ * A process not in a jail gets hints on starting a jail.
+ */
+static int
+jail_getprocattr(struct task_struct *p, char *name, void *value, size_t size)
+{
+ struct jail_struct *tsec;
+ int err = 0;
+
+ if (in_jail(current)) {
+ if (strcmp(name, "current") == 0) {
+ /* provide network info */
+ err = print_jail_net_info(current->security, value,
+ size);
+ return err;
+ }
+ return -EINVAL; /* let them guess why */
+ }
+
+ if (strcmp(name, "exec") == 0) {
+ /* Print usage some help */
+ err = snprintf(value, size,
+ "Valid keywords:\n"
+ "lock\n"
+ "ip <ip4-addr>\n"
+ "ip6 <ip6-addr>\n"
+ "nrtask <max number of tasks in this jail>\n"
+ "nice <nice level for processes in this jail>\n"
+ "slice <max timeslice per process in msecs>\n"
+ "data <max data size per process in bytes>\n"
+ "memlock <max lockable memory per process in bytes>\n");
+ return err;
+ }
+
+ if (strcmp(name, "current"))
+ return -EPERM;
+
+ tsec = p->security;
+ if (!tsec || !(tsec->jail_flags & IN_USE)) {
+ err = snprintf(value, size, "Not Jailed\n");
+ } else {
+ err = snprintf(value, size,
+ "IPv4: %s\nIPv6: %s\n"
+ "max_nrtask %d current nrtask %d max_timeslice %lu "
+ "nice %lu\n"
+ "max_memlock %lu max_data %lu\n",
+ tsec->ip4_addr_name ? tsec->ip4_addr_name : "(none)",
+ tsec->ip6_addr_name ? tsec->ip6_addr_name : "(none)",
+ tsec->max_nrtask, tsec->cur_nrtask, tsec->maxtimeslice,
+ tsec->nice, tsec->max_data, tsec->max_memlock);
+ }
+
+ return err;
+}
+
+/*
+ * Forbid a process in a jail from sending a signal to a process in another
+ * (or no) jail through file sigio.
+ *
+ * We consider the process which set the fowner to be the one sending the
+ * signal, rather than the one writing to the file. Therefore we store the
+ * jail of a process during jail_file_set_fowner, then check that against
+ * the jail of the process receiving the signal.
+ */
+static int
+jail_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown,
+ int fd, int reason)
+{
+ struct file *file;
+
+ if (!in_jail(current))
+ return 0;
+
+ file = container_of(fown, struct file, f_owner);
+ if (file->f_security != tsk->security)
+ return -EPERM;
+
+ return 0;
+}
+
+static int
+jail_file_set_fowner(struct file *file)
+{
+ struct jail_struct *tsec;
+
+ tsec = current->security;
+ file->f_security = tsec;
+ if (tsec)
+ kref_get(&tsec->kref);
+
+ return 0;
+}
+
+static void free_ipc_security(struct kern_ipc_perm *ipc)
+{
+ struct jail_struct *tsec;
+
+ tsec = ipc->security;
+ if (!tsec)
+ return;
+ kref_put(&tsec->kref, release_jail);
+ ipc->security = NULL;
+}
+
+static void free_file_security(struct file *file)
+{
+ struct jail_struct *tsec;
+
+ tsec = file->f_security;
+ if (!tsec)
+ return;
+ kref_put(&tsec->kref, release_jail);
+ file->f_security = NULL;
+}
+
+static void free_inode_security(struct inode *inode)
+{
+ struct jail_struct *tsec;
+
+ tsec = inode->i_security;
+ if (!tsec)
+ return;
+ kref_put(&tsec->kref, release_jail);
+ inode->i_security = NULL;
+}
+
+/*
+ * LSM ptrace hook:
+ * process in jail may not ptrace process not in the same jail
+ */
+static int
+jail_ptrace (struct task_struct *tracer, struct task_struct *tracee)
+{
+ struct jail_struct *tsec = tracer->security;
+
+ if (tsec && (tsec->jail_flags & IN_USE)) {
+ if (tsec == tracee->security)
+ return 0;
+ return -EPERM;
+ }
+ return 0;
+}
+
+/*
+ * process in jail may only use one (aliased) ip address. If they try to
+ * attach to 127.0.0.1, that is remapped to their own address. If some
+ * other address (and not their own), deny permission
+ */
+static int jail_socket_unix_bind(struct socket *sock, struct sockaddr *address,
+ int addrlen);
+
+#define loopbackaddr htonl((127 << 24) | 1)
+
+static inline int jail_inet4_bind(struct socket *sock, struct sockaddr *address,
+ int addrlen, struct jail_struct *tsec)
+{
+ struct sockaddr_in *inaddr;
+ __u32 sin_addr, jailaddr;
+
+ if (!(tsec->jail_flags & GOT_IPV4))
+ return -EPERM;
+
+ inaddr = (struct sockaddr_in *) address;
+ sin_addr = inaddr->sin_addr.s_addr;
+ jailaddr = tsec->addr4;
+
+ if (sin_addr == jailaddr)
+ return 0;
+
+ if (sin_addr == loopbackaddr || !sin_addr) {
+ bsdj_debug(DBG, "Got a loopback or 0 address\n");
+ sin_addr = jailaddr;
+ bsdj_debug(DBG, "Converted to: %u.%u.%u.%u\n",
+ NIPQUAD(sin_addr));
+ return 0;
+ }
+
+ return -EPERM;
+}
+
+static inline int
+jail_inet6_bind(struct socket *sock, struct sockaddr *address, int addrlen,
+ struct jail_struct *tsec)
+{
+ struct sockaddr_in6 *inaddr6;
+ struct in6_addr *sin6_addr, *jailaddr;
+
+ if (!(tsec->jail_flags & GOT_IPV6))
+ return -EPERM;
+
+ inaddr6 = (struct sockaddr_in6 *) address;
+ sin6_addr = &inaddr6->sin6_addr;
+ jailaddr = &tsec->addr6;
+
+ if (ipv6_addr_cmp(jailaddr, sin6_addr) == 0)
+ return 0;
+
+ if (ipv6_addr_cmp(sin6_addr, &in6addr_loopback) == 0) {
+ ipv6_addr_copy(sin6_addr, jailaddr);
+ return 0;
+ }
+
+ printk(KERN_NOTICE "%s: DENYING\n", __FUNCTION__);
+ printk(KERN_NOTICE "%s: a %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
+ "j %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+ __FUNCTION__,
+ NIP6(*sin6_addr),
+ NIP6(*jailaddr));
+
+ return -EPERM;
+}
+
+static int
+jail_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+
+ if (sock->sk->sk_family == AF_UNIX)
+ return jail_socket_unix_bind(sock, address, addrlen);
+
+ if (!(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6)))
+ /* If we want to be strict, we could just
+ * deny net access when lacking a pseudo ip.
+ * For now we just allow it. */
+ return 0;
+
+ switch(address->sa_family) {
+ case AF_INET:
+ return jail_inet4_bind(sock, address, addrlen, tsec);
+
+ case AF_INET6:
+ return jail_inet6_bind(sock, address, addrlen, tsec);
+
+ default:
+ return 0;
+ }
+}
+
+/*
+ * If locked in an ipv6 jail, don't let them use ipv4, and vice versa
+ */
+static int
+jail_socket_create(int family, int type, int protocol, int kern)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || kern || !(tsec->jail_flags & IN_USE) ||
+ !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6)))
+ return 0;
+
+ switch(family) {
+ case AF_INET:
+ if (tsec->jail_flags & GOT_IPV4)
+ return 0;
+ return -EPERM;
+ case AF_INET6:
+ if (tsec->jail_flags & GOT_IPV6)
+ return 0;
+ return -EPERM;
+ default:
+ return 0;
+ };
+
+ return 0;
+}
+
+static void
+jail_socket_post_create(struct socket *sock, int family, int type,
+ int protocol, int kern)
+{
+ struct inet_opt *inet;
+ struct ipv6_pinfo *inet6;
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || kern || !(tsec->jail_flags & IN_USE) ||
+ !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6)))
+ return;
+
+ switch(family) {
+ case AF_INET:
+ inet = inet_sk(sock->sk);
+ inet->saddr = tsec->addr4;
+ break;
+ case AF_INET6:
+ inet6 = inet6_sk(sock->sk);
+ ipv6_addr_copy(&inet6->saddr, &tsec->addr6);
+ break;
+ default:
+ break;
+ };
+
+ return;
+}
+
+static int
+jail_socket_listen(struct socket *sock, int backlog)
+{
+ struct inet_opt *inet;
+ struct ipv6_pinfo *inet6;
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE) ||
+ !(tsec->jail_flags & (GOT_IPV4 | GOT_IPV6)))
+ return 0;
+
+ switch (sock->sk->sk_family) {
+ case AF_INET:
+ inet = inet_sk(sock->sk);
+ if (inet->saddr == tsec->addr4)
+ return 0;
+ return -EPERM;
+
+ case AF_INET6:
+ inet6 = inet6_sk(sock->sk);
+ if (ipv6_addr_cmp(&inet6->saddr, &tsec->addr6) == 0)
+ return 0;
+ return -EPERM;
+
+ default:
+ return 0;
+
+ }
+}
+
+static void free_sock_security(struct sock *sk)
+{
+ struct jail_struct *tsec;
+
+ tsec = sk->sk_security;
+ if (!tsec)
+ return;
+ kref_put(&tsec->kref, release_jail);
+ sk->sk_security = NULL;
+}
+
+/*
+ * The next three (socket) hooks prevent a process in a jail from sending
+ * data to a abstract unix domain socket which was bound outside the jail.
+ */
+static int
+jail_socket_unix_bind(struct socket *sock, struct sockaddr *address,
+ int addrlen)
+{
+ struct sockaddr_un *sunaddr;
+ struct jail_struct *tsec;
+
+ if (sock->sk->sk_family != AF_UNIX)
+ return 0;
+
+ sunaddr = (struct sockaddr_un *) address;
+ if (sunaddr->sun_path[0] != 0)
+ return 0;
+
+ tsec = current->security;
+ sock->sk->sk_security = tsec;
+ if (tsec)
+ kref_get(&tsec->kref);
+ return 0;
+}
+
+/*
+ * Note - we deny sends both from unjailed to jailed, and from jailed
+ * to unjailed. As well as, of course between different jails.
+ */
+static int
+jail_socket_unix_may_send(struct socket *sock, struct socket *other)
+{
+ struct jail_struct *tsec, *ssec;
+
+ tsec = current->security; /* jail of sending process */
+ ssec = other->sk->sk_security; /* jail of receiver */
+
+ if (tsec != ssec)
+ return -EPERM;
+
+ return 0;
+}
+
+static int
+jail_socket_unix_stream_connect(struct socket *sock,
+ struct socket *other, struct sock *newsk)
+{
+ struct jail_struct *tsec, *ssec;
+
+ tsec = current->security; /* jail of sending process */
+ ssec = other->sk->sk_security; /* jail of receiver */
+
+ if (tsec != ssec)
+ return -EPERM;
+
+ return 0;
+}
+
+static int
+jail_mount(char * dev_name, struct nameidata *nd, char * type,
+ unsigned long flags, void * data)
+{
+ if (in_jail(current))
+ return -EPERM;
+
+ return 0;
+}
+
+static int
+jail_umount(struct vfsmount *mnt, int flags)
+{
+ if (in_jail(current))
+ return -EPERM;
+
+ return 0;
+}
+
+/*
+ * process in jail may not:
+ * use nice
+ * change network config
+ * load/unload modules
+ */
+static int
+jail_capable (struct task_struct *tsk, int cap)
+{
+ if (in_jail(tsk)) {
+ if (cap == CAP_SYS_NICE)
+ return -EPERM;
+ if (cap == CAP_NET_ADMIN)
+ return -EPERM;
+ if (cap == CAP_SYS_MODULE)
+ return -EPERM;
+ if (cap == CAP_SYS_RAWIO)
+ return -EPERM;
+ }
+
+ if (cap_is_fs_cap (cap) ? tsk->fsuid == 0 : tsk->euid == 0)
+ return 0;
+ return -EPERM;
+}
+
+/*
+ * jail_security_task_create:
+ *
+ * If the current process is ina a jail, and that jail is about to exceed a
+ * maximum number of processes, then refuse to fork. If the maximum number
+ * of jails is listed as 0, then there is no limit for this jail, and we allow
+ * all forks.
+ */
+static inline int
+jail_security_task_create (unsigned long clone_flags)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+
+ if (tsec->max_nrtask && tsec->cur_nrtask >= tsec->max_nrtask)
+ return -EPERM;
+ return 0;
+}
+
+/*
+ * The child of a process in a jail belongs in the same jail
+ */
+static int
+jail_task_alloc_security(struct task_struct *tsk)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+
+ tsk->security = tsec;
+ kref_get(&tsec->kref);
+ tsec->cur_nrtask++;
+ if (tsec->maxtimeslice) {
+ tsk->signal->rlim[RLIMIT_CPU].rlim_max = tsec->maxtimeslice;
+ tsk->signal->rlim[RLIMIT_CPU].rlim_cur = tsec->maxtimeslice;
+ }
+ if (tsec->max_data) {
+ tsk->signal->rlim[RLIMIT_CPU].rlim_max = tsec->max_data;
+ tsk->signal->rlim[RLIMIT_CPU].rlim_cur = tsec->max_data;
+ }
+ if (tsec->max_memlock) {
+ tsk->signal->rlim[RLIMIT_CPU].rlim_max = tsec->max_memlock;
+ tsk->signal->rlim[RLIMIT_CPU].rlim_cur = tsec->max_memlock;
+ }
+ if (tsec->nice)
+ set_user_nice(current, tsec->nice);
+
+ return 0;
+}
+
+static int
+jail_bprm_alloc_security(struct linux_binprm *bprm)
+{
+ struct jail_struct *tsec = current->security;
+ int ret;
+
+ if (!tsec)
+ return 0;
+
+ if (tsec->jail_flags & IN_USE)
+ return 0;
+
+ ret = enable_jail(current);
+ if (ret) {
+ /* if we failed, nix out the ip requests */
+ jail_task_free_security(current);
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * Process in jail may not create devices
+ * Thanks to Brad Spender for pointing out fifos should be allowed.
+ */
+/* TODO: We may want to allow /dev/log, at least... */
+static int
+jail_inode_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+{
+ if (!in_jail(current))
+ return 0;
+
+ if (S_ISFIFO(mode))
+ return 0;
+
+ return -EPERM;
+}
+
+/* yanked from fs/proc/base.c */
+static unsigned name_to_int(struct dentry *dentry)
+{
+ const char *name = dentry->d_name.name;
+ int len = dentry->d_name.len;
+ unsigned n = 0;
+
+ if (len > 1 && *name == '0')
+ goto out;
+ while (len-- > 0) {
+ unsigned c = *name++ - '0';
+ if (c > 9)
+ goto out;
+ if (n >= (~0U-9)/10)
+ goto out;
+ n *= 10;
+ n += c;
+ }
+ return n;
+out:
+ return ~0U;
+}
+
+/*
+ * jail_proc_inode_permission:
+ * called only when current is in a jail, and is trying to reach
+ * /proc/<pid>. We check whether <pid> is in the same jail as
+ * current. If not, permission is denied.
+ *
+ * NOTE: On the one hand, the task_to_inode(inode)->i_security
+ * approach seems cleaner, but on the other, this prevents us
+ * from unloading bsdjail for awhile...
+ */
+static int
+jail_proc_inode_permission(struct inode *inode, int mask,
+ struct nameidata *nd)
+{
+ struct jail_struct *tsec = current->security;
+ struct dentry *dentry = nd->dentry;
+ unsigned pid;
+
+ pid = name_to_int(dentry);
+ if (pid == ~0U) {
+ return 0;
+ }
+
+ if (dentry->d_parent != dentry->d_sb->s_root)
+ return 0;
+ if (inode->i_security != tsec)
+ return -ENOENT;
+
+ return 0;
+}
+
+/*
+ * A process in a jail may not see that /proc/<pid> exists for
+ * process not in its jail
+ * Unfortunately we can't pretend that pid for the starting process
+ * is 1, as vserver does.
+ */
+static int jail_task_lookup(struct task_struct *p)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec)
+ return 0;
+ if (tsec == p->security)
+ return 0;
+ return -EPERM;
+}
+/*
+ * security_task_to_inode:
+ * Set inode->security = task's jail.
+ */
+static void jail_task_to_inode(struct task_struct *p, struct inode *inode)
+{
+ struct jail_struct *tsec = p->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return;
+ if (inode->i_security)
+ return;
+ kref_get(&tsec->kref);
+ inode->i_security = tsec;
+}
+
+/*
+ * inode_permission:
+ * If we are trying to look into certain /proc files from in a jail, we
+ * may deny permission.
+ */
+static int
+jail_inode_permission(struct inode *inode, int mask,
+ struct nameidata *nd)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+
+ if (!nd)
+ return 0;
+
+ if (nd->dentry &&
+ strcmp(nd->dentry->d_sb->s_type->name, "proc") == 0) {
+ return jail_proc_inode_permission(inode, mask, nd);
+
+ }
+
+ return 0;
+}
+
+/*
+ * A function which returns -ENOENT if dentry is the dentry for
+ * a /proc/<pid> directory. It returns 0 otherwise.
+ */
+static inline int
+generic_procpid_check(struct dentry *dentry)
+{
+ struct jail_struct *jail = current->security;
+ unsigned pid = name_to_int(dentry);
+
+ if (!jail || !(jail->jail_flags & IN_USE))
+ return 0;
+ if (pid == ~0U)
+ return 0;
+ if (strcmp(dentry->d_sb->s_type->name, "proc") != 0)
+ return 0;
+ if (dentry->d_parent != dentry->d_sb->s_root)
+ return 0;
+ if (dentry->d_inode->i_security != jail)
+ return -ENOENT;
+ return 0;
+}
+
+/*
+ * We want getattr to fail on /proc/<pid> to prevent leakage through, for
+ * instance, ls -d.
+ */
+static int
+jail_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
+{
+ return generic_procpid_check(dentry);
+}
+
+/* This probably is not necessary - /proc does not support xattrs? */
+static int
+jail_inode_getxattr(struct dentry *dentry, char *name)
+{
+ return generic_procpid_check(dentry);
+}
+
+/* process in jail may not send signal to process not in the same jail */
+static int
+jail_task_kill(struct task_struct *p, struct siginfo *info, int sig)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+
+ if (tsec == p->security)
+ return 0;
+
+ if (sig==SIGCHLD)
+ return 0;
+
+ return -EPERM;
+}
+
+/*
+ * LSM hooks to limit jailed process' abilities to muck with resource
+ * limits
+ */
+static int jail_task_setrlimit (unsigned int resource, struct rlimit *new_rlim)
+{
+ if (!in_jail(current))
+ return 0;
+
+ return -EPERM;
+}
+
+static int jail_task_setscheduler (struct task_struct *p, int policy,
+ struct sched_param *lp)
+{
+ if (!in_jail(current))
+ return 0;
+
+ return -EPERM;
+}
+
+/*
+ * LSM hooks to limit IPC access.
+ */
+
+static inline int
+basic_ipc_security_check(struct kern_ipc_perm *p, struct task_struct *target)
+{
+ struct jail_struct *tsec = target->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+
+ if (p->security != tsec)
+ return -EPERM;
+
+ return 0;
+}
+
+static int
+jail_ipc_permission(struct kern_ipc_perm *ipcp, short flag)
+{
+ return basic_ipc_security_check(ipcp, current);
+}
+
+static int
+jail_shm_alloc_security (struct shmid_kernel *shp)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+ shp->shm_perm.security = tsec;
+ kref_get(&tsec->kref);
+ return 0;
+}
+
+static void
+jail_shm_free_security (struct shmid_kernel *shp)
+{
+ free_ipc_security(&shp->shm_perm);
+}
+
+static int
+jail_shm_associate (struct shmid_kernel *shp, int shmflg)
+{
+ return basic_ipc_security_check(&shp->shm_perm, current);
+}
+
+static int
+jail_shm_shmctl(struct shmid_kernel *shp, int cmd)
+{
+ if (cmd == IPC_INFO || cmd == SHM_INFO)
+ return 0;
+
+ return basic_ipc_security_check(&shp->shm_perm, current);
+}
+
+static int
+jail_shm_shmat(struct shmid_kernel *shp, char *shmaddr, int shmflg)
+{
+ return basic_ipc_security_check(&shp->shm_perm, current);
+}
+
+static int
+jail_msg_queue_alloc(struct msg_queue *msq)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+ msq->q_perm.security = tsec;
+ kref_get(&tsec->kref);
+ return 0;
+}
+
+static void
+jail_msg_queue_free(struct msg_queue *msq)
+{
+ free_ipc_security(&msq->q_perm);
+}
+
+static int jail_msg_queue_associate(struct msg_queue *msq, int flag)
+{
+ return basic_ipc_security_check(&msq->q_perm, current);
+}
+
+static int
+jail_msg_queue_msgctl(struct msg_queue *msq, int cmd)
+{
+ if (cmd == IPC_INFO || cmd == MSG_INFO)
+ return 0;
+
+ return basic_ipc_security_check(&msq->q_perm, current);
+}
+
+static int
+jail_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, int msqflg)
+{
+ return basic_ipc_security_check(&msq->q_perm, current);
+}
+
+static int
+jail_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
+ struct task_struct *target, long type, int mode)
+
+{
+ return basic_ipc_security_check(&msq->q_perm, target);
+}
+
+static int
+jail_sem_alloc_security(struct sem_array *sma)
+{
+ struct jail_struct *tsec = current->security;
+
+ if (!tsec || !(tsec->jail_flags & IN_USE))
+ return 0;
+ sma->sem_perm.security = tsec;
+ kref_get(&tsec->kref);
+ return 0;
+}
+
+static void
+jail_sem_free_security(struct sem_array *sma)
+{
+ free_ipc_security(&sma->sem_perm);
+}
+
+static int
+jail_sem_associate(struct sem_array *sma, int semflg)
+{
+ return basic_ipc_security_check(&sma->sem_perm, current);
+}
+
+static int
+jail_sem_semctl(struct sem_array *sma, int cmd)
+{
+ if (cmd == IPC_INFO || cmd == SEM_INFO)
+ return 0;
+ return basic_ipc_security_check(&sma->sem_perm, current);
+}
+
+static int
+jail_sem_semop(struct sem_array *sma, struct sembuf *sops, unsigned nsops,
+ int alter)
+{
+ return basic_ipc_security_check(&sma->sem_perm, current);
+}
+
+static int
+jail_sysctl(struct ctl_table *table, int op)
+{
+ if (!in_jail(current))
+ return 0;
+
+ if (op & 002)
+ return -EPERM;
+
+ return 0;
+}
+
+static struct security_operations bsdjail_security_ops = {
+ .ptrace = jail_ptrace,
+ .capable = jail_capable,
+
+ .task_kill = jail_task_kill,
+ .task_alloc_security = jail_task_alloc_security,
+ .task_free_security = jail_task_free_security,
+ .bprm_alloc_security = jail_bprm_alloc_security,
+ .task_create = jail_security_task_create,
+ .task_to_inode = jail_task_to_inode,
+ .task_lookup = jail_task_lookup,
+
+ .task_setrlimit = jail_task_setrlimit,
+ .task_setscheduler = jail_task_setscheduler,
+
+ .setprocattr = jail_setprocattr,
+ .getprocattr = jail_getprocattr,
+
+ .file_set_fowner = jail_file_set_fowner,
+ .file_send_sigiotask = jail_file_send_sigiotask,
+ .file_free_security = free_file_security,
+
+ .socket_bind = jail_socket_bind,
+ .socket_listen = jail_socket_listen,
+ .socket_create = jail_socket_create,
+ .socket_post_create = jail_socket_post_create,
+ .unix_stream_connect = jail_socket_unix_stream_connect,
+ .unix_may_send = jail_socket_unix_may_send,
+ .sk_free_security = free_sock_security,
+
+ .inode_mknod = jail_inode_mknod,
+ .inode_permission = jail_inode_permission,
+ .inode_free_security = free_inode_security,
+ .inode_getattr = jail_inode_getattr,
+ .inode_getxattr = jail_inode_getxattr,
+ .sb_mount = jail_mount,
+ .sb_umount = jail_umount,
+
+ .ipc_permission = jail_ipc_permission,
+ .shm_alloc_security = jail_shm_alloc_security,
+ .shm_free_security = jail_shm_free_security,
+ .shm_associate = jail_shm_associate,
+ .shm_shmctl = jail_shm_shmctl,
+ .shm_shmat = jail_shm_shmat,
+
+ .msg_queue_alloc_security = jail_msg_queue_alloc,
+ .msg_queue_free_security = jail_msg_queue_free,
+ .msg_queue_associate = jail_msg_queue_associate,
+ .msg_queue_msgctl = jail_msg_queue_msgctl,
+ .msg_queue_msgsnd = jail_msg_queue_msgsnd,
+ .msg_queue_msgrcv = jail_msg_queue_msgrcv,
+
+ .sem_alloc_security = jail_sem_alloc_security,
+ .sem_free_security = jail_sem_free_security,
+ .sem_associate = jail_sem_associate,
+ .sem_semctl = jail_sem_semctl,
+ .sem_semop = jail_sem_semop,
+
+ .sysctl = jail_sysctl,
+};
+
+static int __init bsdjail_init (void)
+{
+ int rc = 0;
+
+ if (register_security (&bsdjail_security_ops)) {
+ printk (KERN_INFO
+ "Failure registering BSD Jail module with the kernel\n");
+
+ rc = mod_reg_security(MY_NAME, &bsdjail_security_ops);
+ if (rc < 0) {
+ printk (KERN_INFO "Failure registering BSD Jail "
+ " module with primary security module.\n");
+ return -EINVAL;
+ }
+ secondary = 1;
+ }
+ printk (KERN_INFO "BSD Jail module initialized.\n");
+
+ return 0;
+}
+
+static void __exit bsdjail_exit (void)
+{
+ if (secondary) {
+ if (mod_unreg_security (MY_NAME, &bsdjail_security_ops))
+ printk (KERN_INFO "Failure unregistering BSD Jail "
+ " module with primary module.\n");
+ } else {
+ if (unregister_security (&bsdjail_security_ops)) {
+ printk (KERN_INFO "Failure unregistering BSD Jail "
+ "module with the kernel\n");
+ }
+ }
+
+ printk (KERN_INFO "BSD Jail module removed\n");
+}
+
+security_initcall (bsdjail_init);
+module_exit (bsdjail_exit);
+
+MODULE_DESCRIPTION("BSD Jail LSM.");
+MODULE_LICENSE("GPL");
diff -Nrup linux-2.6.9-rc4-mm1/security/dummy.c linux-2.6.9-rc4-mm1-jail/security/dummy.c
--- linux-2.6.9-rc4-mm1/security/dummy.c 2004-10-11 17:02:22.265603736 -0500
+++ linux-2.6.9-rc4-mm1-jail/security/dummy.c 2004-10-11 10:00:36.000000000 -0500
@@ -623,6 +623,11 @@ static void dummy_task_reparent_to_init
return;
}

+static int dummy_task_lookup(struct task_struct *p)
+{
+ return 0;
+}
+
static void dummy_task_to_inode(struct task_struct *p, struct inode *inode)
{ }

@@ -986,6 +991,7 @@ void security_fixup_ops (struct security
set_to_dummy_if_null(ops, task_kill);
set_to_dummy_if_null(ops, task_prctl);
set_to_dummy_if_null(ops, task_reparent_to_init);
+ set_to_dummy_if_null(ops, task_lookup);
set_to_dummy_if_null(ops, task_to_inode);
set_to_dummy_if_null(ops, ipc_permission);
set_to_dummy_if_null(ops, msg_msg_alloc_security);
diff -Nrup linux-2.6.9-rc4-mm1/security/Kconfig linux-2.6.9-rc4-mm1-jail/security/Kconfig
--- linux-2.6.9-rc4-mm1/security/Kconfig 2004-10-11 17:02:22.265603736 -0500
+++ linux-2.6.9-rc4-mm1-jail/security/Kconfig 2004-10-11 10:00:51.000000000 -0500
@@ -86,5 +86,16 @@ config SECURITY_SECLVL

source security/selinux/Kconfig

+config SECURITY_BSDJAIL
+ tristate "BSD Jail LSM"
+ depends on SECURITY
+ select SECURITY_NETWORK
+ help
+ Provides BSD Jail compartmentalization functionality.
+ See Documentation/bsdjail.txt for more information and
+ usage instructions.
+
+ If you are unsure how to answer this question, answer N.
+
endmenu

diff -Nrup linux-2.6.9-rc4-mm1/security/Makefile linux-2.6.9-rc4-mm1-jail/security/Makefile
--- linux-2.6.9-rc4-mm1/security/Makefile 2004-10-11 17:02:22.287600392 -0500
+++ linux-2.6.9-rc4-mm1-jail/security/Makefile 2004-10-11 10:00:51.000000000 -0500
@@ -17,3 +17,4 @@ obj-$(CONFIG_SECURITY_SELINUX) += selin
obj-$(CONFIG_SECURITY_CAPABILITIES) += commoncap.o capability.o
obj-$(CONFIG_SECURITY_ROOTPLUG) += commoncap.o root_plug.o
obj-$(CONFIG_SECURITY_SECLVL) += seclvl.o
+obj-$(CONFIG_SECURITY_BSDJAIL) += bsdjail.o

2004-10-12 14:15:54

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

On Tue, Oct 12, 2004 at 08:11:24AM -0500, Serge E. Hallyn wrote:
> > That however requires a co-operator outside the chroot so doesn't seem
> > to be a problem. I like the CLONE approach, its a lot cleaner.
>
> The attached patch (against -rc4-mm1) moves the responsibility for
> filesystem containment entirely to userspace. The Documentation/bsdjail.txt
> file reflects the new usage. It also incorporates Christoph's cleanups.
>
> I still need to see about generalizing the networking confinement. I
> certainly like the concept (as I understand it at least) behind the new
> vserver networking, but am not sure it can be done without patching.

Please remember that linux kernel work is not about "not needing patching".
If a concept makes sense changing code is a good thing.

2004-10-12 22:38:47

by Ulrich Drepper

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Serge E. Hallyn wrote:

> +If a private IP was specified for the jail, then
> + cat /proc/$$/attr/current

How is this going to interact with SELinux? Currently SELinux uses
/proc/*/attr/current to report the current security context of the
process. libselinux expects the file to contain one string (not even a
newline) which is the textual representation of the context. Now with
your changes you want to change this. libselinux as-is would break
miserably.

I don't know the history of the file and who is hijacking the file.
Fact is that the file content is currently unstructured and libselinux
couldn't possibly determine what part is of interest to itself.

So, either you use another file, SELinux uses another file, or the file
gets tagged lines like

selinux: user_u:user_r:user_t

I guess you couldn't even start the userlevel code in FC3 in such a jail
in the moment since the libselinux startup tests would fail.

- --
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.6 (GNU/Linux)

iD8DBQFBbFwm2ijCOnn/RHQRAvimAJ9W3bIil5Yi1Ex/CX1FpUjzxyheIQCeNKRu
RHv5SGG0iQSEsmbIWfHmwAA=
=HZM3
-----END PGP SIGNATURE-----

2004-10-13 00:58:56

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

> > +If a private IP was specified for the jail, then
> > + cat /proc/$$/attr/current
>
> How is this going to interact with SELinux? Currently SELinux uses

The first problem is that to use jail with selinux you'll need to use
a stacking infrastructure (which is still being developed) anyway, in
order to get around the multiplexing of task->security, file->f_security,
and sk->sk_security.

But you're right, this is a problem I've had to address with the stacker:

> /proc/*/attr/current to report the current security context of the
> process. libselinux expects the file to contain one string (not even a

...

> selinux: user_u:user_r:user_t

This is exactly what my current stacker does, to the byte :-)

-serge

2004-10-13 01:10:09

by Ulrich Drepper

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Serge E. Hallyn wrote:

>> selinux: user_u:user_r:user_t
>
>
> This is exactly what my current stacker does, to the byte :-)

This is all nice and good, but you have to bring this up with the
SELinux people _now_ since, as I said before, the current
SELinux-enabled userland code might not even start with this change of
the format even if SELinux is not enabled. If it is decided that
/proc/*/attr/current does not belong to SELinux alone, then the guys
should be told about it now so that all the relevant code (libselinux,
kernel without your "stacker" stuff, ...) can be changed before the
current use spreads too far.

- --
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.6 (GNU/Linux)

iD8DBQFBbIBI2ijCOnn/RHQRAqXMAJ96lsdsTsZf3jI+8UXLAziK1iKC2QCfZyZT
zewSIJsYVpIFK2lG0lFcrgY=
=SGiv
-----END PGP SIGNATURE-----

2004-10-13 01:12:21

by Chris Wright

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

* Ulrich Drepper ([email protected]) wrote:
> Serge E. Hallyn wrote:
> > +If a private IP was specified for the jail, then
> > + cat /proc/$$/attr/current
>
> How is this going to interact with SELinux?

Poorly. It's not expected to work with SELinux. There's no good
stacking yet.

> Currently SELinux uses
> /proc/*/attr/current to report the current security context of the
> process. libselinux expects the file to contain one string (not even a
> newline) which is the textual representation of the context. Now with
> your changes you want to change this. libselinux as-is would break
> miserably.

Maybe libselinux should not look around in there unless SELinux is
enabled in kernel.

> I don't know the history of the file and who is hijacking the file.
> Fact is that the file content is currently unstructured and libselinux
> couldn't possibly determine what part is of interest to itself.
>
> So, either you use another file, SELinux uses another file, or the file
> gets tagged lines like
>
> selinux: user_u:user_r:user_t

Yeah, that's workable. Other options would probably look like putting
stuff in module specific locations, which is more painful.

> I guess you couldn't even start the userlevel code in FC3 in such a jail
> in the moment since the libselinux startup tests would fail.

Userspace won't start in a jail, and once it's up, jailing works (on
rawhide for example). Admittedly, the label looks a bit funny.

# in jail
$ ps -eM
LABEL PID TTY TIME CMD
No 16933 ? 00:00:00 bash
No 17010 ? 00:00:00 ps

# unconfined
$ ps -eM
<snip>
Not 5714 pts/5 00:00:00 ssh
Not 12027 pts/6 00:00:00 bash
Not 12046 pts/6 00:00:00 vim
Not 16823 pts/4 00:00:00 vim
Not 16911 pts/8 00:00:00 bash
Root: 16933 pts/7 00:00:00 bash
Not 17016 pts/8 00:00:00 ps

thanks,
-chris
--
Linux Security Modules http://lsm.immunix.org http://lsm.bkbits.net

2004-10-13 01:22:15

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

On Tue, Oct 12, 2004 at 06:09:28PM -0700, Ulrich Drepper wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
>
> Serge E. Hallyn wrote:
>
> >> selinux: user_u:user_r:user_t
> >
> >
> > This is exactly what my current stacker does, to the byte :-)
>
> This is all nice and good, but you have to bring this up with the
> SELinux people _now_ since, as I said before, the current
> SELinux-enabled userland code might not even start with this change of
> the format even if SELinux is not enabled. If it is decided that
> /proc/*/attr/current does not belong to SELinux alone, then the guys
> should be told about it now so that all the relevant code (libselinux,
> kernel without your "stacker" stuff, ...) can be changed before the
> current use spreads too far.

Then they would have to check for an optional "selinux: " at the front
of each security_setprocattr entry read in the kernel, in order to handle
an lsm infrastructure change which might never be accepted into the kernel
anyway. I suppose it's pretty trivial anyway, but then why would they
bother...

-serge

2004-10-13 14:29:33

by Stephen Smalley

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

On Tue, 2004-10-12 at 18:35, Ulrich Drepper wrote:
> Serge E. Hallyn wrote:
>
> > +If a private IP was specified for the jail, then
> > + cat /proc/$$/attr/current
>
> How is this going to interact with SELinux? Currently SELinux uses
> /proc/*/attr/current to report the current security context of the
> process. libselinux expects the file to contain one string (not even a
> newline) which is the textual representation of the context. Now with
> your changes you want to change this. libselinux as-is would break
> miserably.

libselinux is_selinux_enabled() checks /proc/filesystems for selinuxfs
first, and SELinux userland checks is_selinux_enabled(). As security
modules cannot presently be stacked if they both use the security
fields, this is sufficient. There were patches floated on
rhselinux-list circa Oct 2003 to add a "selinux/" prefix to the
/proc/pid/attr values to explicitly identify the security module, ala
the "security.selinux" attribute name for the file extended attribute,
but the consensus at that time was that it was sufficient to test for
the presence of SELinux via /proc/filesystems.

> I don't know the history of the file and who is hijacking the file.
> Fact is that the file content is currently unstructured and libselinux
> couldn't possibly determine what part is of interest to itself.

The /proc/pid/attr interface was submitted by us based on Al Viro's
recommendations when the SELinux API was overhauled. We attempted to
keep it sufficiently general that other security modules could also use
it, but not at the same time, as shared use of LSM security fields
wasn't supported anyway. We had earlier proposed [gs]etprocattr calls
ala [gs]etxattr calls with distinguished attribute names, but were
directed to use /proc instead.

> So, either you use another file, SELinux uses another file, or the file
> gets tagged lines like
>
> selinux: user_u:user_r:user_t

One value per file seems preferred, but /proc/pid doesn't lend itself to
dynamic extension by modules. [gs]etprocattr calls ala [gs]etxattr
calls would be simpler if we want to export multiple attribute names,
but that was also suggested earlier and rejected.

Side bar: Any change here also affects upstream procps, which presently
directly takes the /proc/pid/attr/current value and displays it as a
single field.

--
Stephen Smalley <[email protected]>
National Security Agency

2004-10-13 15:29:47

by Stephen Smalley

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

On Tue, 2004-10-12 at 21:22, Serge E. Hallyn wrote:
> Then they would have to check for an optional "selinux: " at the front
> of each security_setprocattr entry read in the kernel, in order to handle
> an lsm infrastructure change which might never be accepted into the kernel
> anyway. I suppose it's pretty trivial anyway, but then why would they
> bother...

The changes to libselinux and procps and any scripts that directly
access /proc/pid/attr to deal with multi-entry values would be more
important; changing the kernel to prepend "selinux: " on getprocattr and
to strip it on setprocattr would indeed be trivial (but one wonders
whether we can be confident that userspace will never try to pass one of
these multi-entry values read from /proc/pid/attr to another interface
that expects a single context, e.g. selinuxfs or
setxattr("security.selinux")).

--
Stephen Smalley <[email protected]>
National Security Agency

2004-10-20 15:47:07

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

On Tue, Oct 12, 2004 at 02:27:33PM +0200, Herbert Poetzl wrote:
> On Tue, Oct 12, 2004 at 10:00:57AM +0100, Christoph Hellwig wrote:
> > On Tue, Oct 12, 2004 at 09:00:55AM +0200, Herbert Poetzl wrote:
> > > and it works well, because we use it for almost
> > > a year now on linux-vserver ;)
> >
> > Btw, could anyone explain the exact differences between linux-vserver
> > and this jail module?
>
> hmm, okay I'll try ...
>
> linux-vserver is a combination of kernel patch and
> userspace tools to create 'virtual servers' similar
> to UML, but sharing the resources (and kernel).
>
> to do this, it uses process isolation, network
> isolation and disk space separation (tagging).
> in addition it does resource management (accounting
> and limits) for various aspects (CPU, memory,
> processes, sockets, filehandles, ...)
>
> the jail module is recreating a limited subset of
> the isolation aspect via LSM (similar to the BSD
> jail) which allows to confine a process (and it's
> children) to a chroot() environment under certain
> limitations (resources)

So why

a) can't linux-vserver use LSM hooks where applicable
b) can't the two projects share code so we don't only have a crippled
version in mainline

2004-10-20 19:25:19

by Herbert Poetzl

[permalink] [raw]
Subject: Re: [patch 2/3] lsm: add bsdjail module

On Wed, Oct 20, 2004 at 04:36:21PM +0100, Christoph Hellwig wrote:
> On Tue, Oct 12, 2004 at 02:27:33PM +0200, Herbert Poetzl wrote:
> > On Tue, Oct 12, 2004 at 10:00:57AM +0100, Christoph Hellwig wrote:
> > > On Tue, Oct 12, 2004 at 09:00:55AM +0200, Herbert Poetzl wrote:
> > > > and it works well, because we use it for almost
> > > > a year now on linux-vserver ;)
> > >
> > > Btw, could anyone explain the exact differences between linux-vserver
> > > and this jail module?
> >
> > hmm, okay I'll try ...
> >
> > linux-vserver is a combination of kernel patch and
> > userspace tools to create 'virtual servers' similar
> > to UML, but sharing the resources (and kernel).
> >
> > to do this, it uses process isolation, network
> > isolation and disk space separation (tagging).
> > in addition it does resource management (accounting
> > and limits) for various aspects (CPU, memory,
> > processes, sockets, filehandles, ...)
> >
> > the jail module is recreating a limited subset of
> > the isolation aspect via LSM (similar to the BSD
> > jail) which allows to confine a process (and it's
> > children) to a chroot() environment under certain
> > limitations (resources)
>
> So why
>
> a) can't linux-vserver use LSM hooks where applicable

well, it could, and probably in future it will do so,
but currently there are three reasons which keep me
from doing that:

1) some folks want to use LSM for other things, and
proper stackering of LSM was broken/missing last
time I looked at the code

2) performance: I'm not convinced that the LSM
hooks are a good choice, where a single check
of a flag (in current) is more than sufficient

3) why move 20% of linux vserver to LSM, where
those 20% can not do anything useful without the
remaining 80% (or at least some part of it)
which can not be done with LSM for various
reasons.

> b) can't the two projects share code so we don't only have a crippled
> version in mainline

I'm sure the projects can share code, and IMHO the
best solution would be to create a 'cripled' version
of linux-vserver and to include it in mainline (if
that is what kernel folks want) and to slowly extend
this version where possible, moving existing code
from linux-vserver into mainline ...

once CKRM is working and included, and LSM provides
the 'security' features, linux-vserver might become
a simple compile time option ...

best,
Herbert

> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/