2008-03-06 09:20:13

by Pavel Emelyanov

[permalink] [raw]
Subject: [PATCH] Make /proc/net a symlink on /proc/self/net (v2)

Current /proc/net is done with so called "shadows", but current
implementation is broken and has little chances to get fixed.

The problem is that dentries subtree of /proc/net directory has
fancy revalidation rules to make processes living in different
net namespaces see different entries in /proc/net subtree, but
currently, tasks see in the /proc/net subdir the contents of any
other namespace, depending on who opened the file first.

The proposed fix is to turn /proc/net into a symlink, which points
to /proc/self/net, which in turn shows what previously was in
/proc/net - the network-related info, from the net namespace the
appropriate task lives in.

# ls -l /proc/net
lrwxrwxrwx 1 root root 8 Mar 5 15:17 /proc/net -> self/net

In other words - this behaves like /proc/mounts, but unlike
"mounts", "net" is not a file, but a directory.

Fixed a task_struct leak in get_proc_task_net, pointed out by Paul.

Signed-off-by: Pavel Emelyanov <[email protected]>

---

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 96ee899..cc43cf0 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2274,6 +2274,7 @@ static const struct pid_entry tgid_base_stuff[] = {
DIR("task", S_IRUGO|S_IXUGO, task),
DIR("fd", S_IRUSR|S_IXUSR, fd),
DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo),
+ DIR("net", S_IRUGO|S_IXUSR, net),
REG("environ", S_IRUSR, environ),
INF("auxv", S_IRUSR, pid_auxv),
ONE("status", S_IRUGO, pid_status),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 68971e6..a36ad3c 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -377,15 +377,14 @@ static struct dentry_operations proc_dentry_operations =
* Don't create negative dentries here, return -ENOENT by hand
* instead.
*/
-struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
+ struct dentry *dentry)
{
struct inode *inode = NULL;
- struct proc_dir_entry * de;
int error = -ENOENT;

lock_kernel();
spin_lock(&proc_subdir_lock);
- de = PDE(dir);
if (de) {
for (de = de->subdir; de ; de = de->next) {
if (de->namelen != dentry->d_name.len)
@@ -393,8 +392,6 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
unsigned int ino;

- if (de->shadow_proc)
- de = de->shadow_proc(current, de);
ino = de->low_ino;
de_get(de);
spin_unlock(&proc_subdir_lock);
@@ -417,6 +414,12 @@ out_unlock:
return ERR_PTR(error);
}

+struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nd)
+{
+ return proc_lookup_de(PDE(dir), dir, dentry);
+}
+
/*
* This returns non-zero if at EOF, so that the /proc
* root directory can use this and check if it should
@@ -426,10 +429,9 @@ out_unlock:
* value of the readdir() call, as long as it's non-negative
* for success..
*/
-int proc_readdir(struct file * filp,
- void * dirent, filldir_t filldir)
+int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
+ filldir_t filldir)
{
- struct proc_dir_entry * de;
unsigned int ino;
int i;
struct inode *inode = filp->f_path.dentry->d_inode;
@@ -438,7 +440,6 @@ int proc_readdir(struct file * filp,
lock_kernel();

ino = inode->i_ino;
- de = PDE(inode);
if (!de) {
ret = -EINVAL;
goto out;
@@ -499,6 +500,13 @@ out: unlock_kernel();
return ret;
}

+int proc_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+ struct inode *inode = filp->f_path.dentry->d_inode;
+
+ return proc_readdir_de(PDE(inode), filp, dirent, filldir);
+}
+
/*
* These are the generic /proc directory operations. They
* use the in-memory "struct proc_dir_entry" tree to parse
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 1c81c8f..bc72f5c 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -64,6 +64,8 @@ extern const struct file_operations proc_numa_maps_operations;
extern const struct file_operations proc_smaps_operations;
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
+extern const struct file_operations proc_net_operations;
+extern const struct inode_operations proc_net_inode_operations;

void free_proc_entry(struct proc_dir_entry *de);

@@ -83,3 +85,8 @@ static inline int proc_fd(struct inode *inode)
{
return PROC_I(inode)->fd;
}
+
+struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
+ struct dentry *dentry);
+int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
+ filldir_t filldir);
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 14e9b5a..18d413c 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -63,6 +63,63 @@ int seq_release_net(struct inode *ino, struct file *f)
}
EXPORT_SYMBOL_GPL(seq_release_net);

+static struct net *get_proc_task_net(struct inode *dir)
+{
+ struct task_struct *task;
+ struct nsproxy *ns;
+ struct net *net = NULL;
+
+ rcu_read_lock();
+ task = pid_task(proc_pid(dir), PIDTYPE_PID);
+ if (task != NULL) {
+ ns = task_nsproxy(task);
+ if (ns != NULL)
+ net = get_net(ns->net_ns);
+ }
+ rcu_read_unlock();
+
+ return net;
+}
+
+static struct dentry *proc_tgid_net_lookup(struct inode *dir,
+ struct dentry *dentry, struct nameidata *nd)
+{
+ struct dentry *de;
+ struct net *net;
+
+ de = ERR_PTR(-ENOENT);
+ net = get_proc_task_net(dir);
+ if (net != NULL) {
+ de = proc_lookup_de(net->proc_net, dir, dentry);
+ put_net(net);
+ }
+ return de;
+}
+
+const struct inode_operations proc_net_inode_operations = {
+ .lookup = proc_tgid_net_lookup,
+};
+
+static int proc_tgid_net_readdir(struct file *filp, void *dirent,
+ filldir_t filldir)
+{
+ int ret;
+ struct net *net;
+
+ ret = -EINVAL;
+ net = get_proc_task_net(filp->f_path.dentry->d_inode);
+ if (net != NULL) {
+ ret = proc_readdir_de(net->proc_net, filp, dirent, filldir);
+ put_net(net);
+ }
+ return ret;
+}
+
+const struct file_operations proc_net_operations = {
+ .read = generic_read_dir,
+ .readdir = proc_tgid_net_readdir,
+};
+

struct proc_dir_entry *proc_net_fops_create(struct net *net,
const char *name, mode_t mode, const struct file_operations *fops)
@@ -83,14 +140,6 @@ struct net *get_proc_net(const struct inode *inode)
}
EXPORT_SYMBOL_GPL(get_proc_net);

-static struct proc_dir_entry *shadow_pde;
-
-static struct proc_dir_entry *proc_net_shadow(struct task_struct *task,
- struct proc_dir_entry *de)
-{
- return task->nsproxy->net_ns->proc_net;
-}
-
struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
struct proc_dir_entry *parent)
{
@@ -104,45 +153,35 @@ EXPORT_SYMBOL_GPL(proc_net_mkdir);

static __net_init int proc_net_ns_init(struct net *net)
{
- struct proc_dir_entry *root, *netd, *net_statd;
+ struct proc_dir_entry *netd, *net_statd;
int err;

err = -ENOMEM;
- root = kzalloc(sizeof(*root), GFP_KERNEL);
- if (!root)
+ netd = kzalloc(sizeof(*netd), GFP_KERNEL);
+ if (!netd)
goto out;

- err = -EEXIST;
- netd = proc_net_mkdir(net, "net", root);
- if (!netd)
- goto free_root;
+ netd->data = net;

err = -EEXIST;
net_statd = proc_net_mkdir(net, "stat", netd);
if (!net_statd)
goto free_net;

- root->data = net;
-
- net->proc_net_root = root;
net->proc_net = netd;
net->proc_net_stat = net_statd;
- err = 0;
+ return 0;

+free_net:
+ kfree(netd);
out:
return err;
-free_net:
- remove_proc_entry("net", root);
-free_root:
- kfree(root);
- goto out;
}

static __net_exit void proc_net_ns_exit(struct net *net)
{
remove_proc_entry("stat", net->proc_net);
- remove_proc_entry("net", net->proc_net_root);
- kfree(net->proc_net_root);
+ kfree(net->proc_net);
}

static struct pernet_operations __net_initdata proc_net_ns_ops = {
@@ -152,8 +191,7 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = {

int __init proc_net_init(void)
{
- shadow_pde = proc_mkdir("net", NULL);
- shadow_pde->shadow_proc = proc_net_shadow;
+ proc_symlink("net", NULL, "self/net");

return register_pernet_subsys(&proc_net_ns_ops);
}
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index d9a9e71..9b6c935 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -50,8 +50,6 @@ typedef int (read_proc_t)(char *page, char **start, off_t off,
typedef int (write_proc_t)(struct file *file, const char __user *buffer,
unsigned long count, void *data);
typedef int (get_info_t)(char *, char **, off_t, int);
-typedef struct proc_dir_entry *(shadow_proc_t)(struct task_struct *task,
- struct proc_dir_entry *pde);

struct proc_dir_entry {
unsigned int low_ino;
@@ -82,7 +80,6 @@ struct proc_dir_entry {
int pde_users; /* number of callers into module in progress */
spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
struct completion *pde_unload_completion;
- shadow_proc_t *shadow_proc;
};

struct kcore_list {
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 28738b7..923f2b8 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -31,7 +31,6 @@ struct net {

struct proc_dir_entry *proc_net;
struct proc_dir_entry *proc_net_stat;
- struct proc_dir_entry *proc_net_root;

struct list_head sysctl_table_headers;


2008-03-06 12:25:04

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH] Make /proc/net a symlink on /proc/self/net (v2)

On Thu, Mar 06, 2008 at 12:18:02PM +0300, Pavel Emelyanov wrote:
> Current /proc/net is done with so called "shadows", but current
> implementation is broken and has little chances to get fixed.
>
> The problem is that dentries subtree of /proc/net directory has
> fancy revalidation rules to make processes living in different
> net namespaces see different entries in /proc/net subtree, but
> currently, tasks see in the /proc/net subdir the contents of any
> other namespace, depending on who opened the file first.
>
> The proposed fix is to turn /proc/net into a symlink, which points
> to /proc/self/net, which in turn shows what previously was in
> /proc/net - the network-related info, from the net namespace the
> appropriate task lives in.

Yes, that's a much better way to get this functionality.

2008-03-06 15:08:35

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [PATCH] Make /proc/net a symlink on /proc/self/net (v2)

Pavel Emelyanov <[email protected]> writes:

> Current /proc/net is done with so called "shadows", but current
> implementation is broken and has little chances to get fixed.
>
> The problem is that dentries subtree of /proc/net directory has
> fancy revalidation rules to make processes living in different
> net namespaces see different entries in /proc/net subtree, but
> currently, tasks see in the /proc/net subdir the contents of any
> other namespace, depending on who opened the file first.
>
> The proposed fix is to turn /proc/net into a symlink, which points
> to /proc/self/net, which in turn shows what previously was in
> /proc/net - the network-related info, from the net namespace the
> appropriate task lives in.
>
> # ls -l /proc/net
> lrwxrwxrwx 1 root root 8 Mar 5 15:17 /proc/net -> self/net
>
> In other words - this behaves like /proc/mounts, but unlike
> "mounts", "net" is not a file, but a directory.
>
> Fixed a task_struct leak in get_proc_task_net, pointed out by Paul.
>
> Signed-off-by: Pavel Emelyanov <[email protected]>

Acked-by: "Eric W. Biederman" <[email protected]>

2008-03-06 16:03:55

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [PATCH] Make /proc/net a symlink on /proc/self/net (v2)

Pavel Emelyanov <[email protected]> writes:

> Current /proc/net is done with so called "shadows", but current
> implementation is broken and has little chances to get fixed.
>
> The problem is that dentries subtree of /proc/net directory has
> fancy revalidation rules to make processes living in different
> net namespaces see different entries in /proc/net subtree, but
> currently, tasks see in the /proc/net subdir the contents of any
> other namespace, depending on who opened the file first.
>
> The proposed fix is to turn /proc/net into a symlink, which points
> to /proc/self/net, which in turn shows what previously was in
> /proc/net - the network-related info, from the net namespace the
> appropriate task lives in.
>
> # ls -l /proc/net
> lrwxrwxrwx 1 root root 8 Mar 5 15:17 /proc/net -> self/net
>
> In other words - this behaves like /proc/mounts, but unlike
> "mounts", "net" is not a file, but a directory.
>
> Fixed a task_struct leak in get_proc_task_net, pointed out by Paul.
>
> Signed-off-by: Pavel Emelyanov <[email protected]>

Before I forget.

As a future patch we need to implement a d_revalidate rule for
/proc/<pid>/task/net that will notice we have unshared a network
namespace and flush all cached dentries for the old network namespace.

Since unsharing happens rarely if at all this d_revalidate should
be relatively cheap, and since the change is real and for everyone
it will not suffer from the inconsistencies that plagued us when
working with d_revalidate on /proc/net.

Eric

2008-03-06 16:38:15

by Stephen Smalley

[permalink] [raw]
Subject: Re: [PATCH] Make /proc/net a symlink on /proc/self/net (v2)


On Thu, 2008-03-06 at 12:18 +0300, Pavel Emelyanov wrote:
> Current /proc/net is done with so called "shadows", but current
> implementation is broken and has little chances to get fixed.
>
> The problem is that dentries subtree of /proc/net directory has
> fancy revalidation rules to make processes living in different
> net namespaces see different entries in /proc/net subtree, but
> currently, tasks see in the /proc/net subdir the contents of any
> other namespace, depending on who opened the file first.
>
> The proposed fix is to turn /proc/net into a symlink, which points
> to /proc/self/net, which in turn shows what previously was in
> /proc/net - the network-related info, from the net namespace the
> appropriate task lives in.
>
> # ls -l /proc/net
> lrwxrwxrwx 1 root root 8 Mar 5 15:17 /proc/net -> self/net
>
> In other words - this behaves like /proc/mounts, but unlike
> "mounts", "net" is not a file, but a directory.
>
> Fixed a task_struct leak in get_proc_task_net, pointed out by Paul.

Does this break SELinux labeling of /proc/net inodes and thus its access
controls on them?

> Signed-off-by: Pavel Emelyanov <[email protected]>
>
> ---
>
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index 96ee899..cc43cf0 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -2274,6 +2274,7 @@ static const struct pid_entry tgid_base_stuff[] = {
> DIR("task", S_IRUGO|S_IXUGO, task),
> DIR("fd", S_IRUSR|S_IXUSR, fd),
> DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo),
> + DIR("net", S_IRUGO|S_IXUSR, net),
> REG("environ", S_IRUSR, environ),
> INF("auxv", S_IRUSR, pid_auxv),
> ONE("status", S_IRUGO, pid_status),
> diff --git a/fs/proc/generic.c b/fs/proc/generic.c
> index 68971e6..a36ad3c 100644
> --- a/fs/proc/generic.c
> +++ b/fs/proc/generic.c
> @@ -377,15 +377,14 @@ static struct dentry_operations proc_dentry_operations =
> * Don't create negative dentries here, return -ENOENT by hand
> * instead.
> */
> -struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
> +struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
> + struct dentry *dentry)
> {
> struct inode *inode = NULL;
> - struct proc_dir_entry * de;
> int error = -ENOENT;
>
> lock_kernel();
> spin_lock(&proc_subdir_lock);
> - de = PDE(dir);
> if (de) {
> for (de = de->subdir; de ; de = de->next) {
> if (de->namelen != dentry->d_name.len)
> @@ -393,8 +392,6 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
> if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
> unsigned int ino;
>
> - if (de->shadow_proc)
> - de = de->shadow_proc(current, de);
> ino = de->low_ino;
> de_get(de);
> spin_unlock(&proc_subdir_lock);
> @@ -417,6 +414,12 @@ out_unlock:
> return ERR_PTR(error);
> }
>
> +struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
> + struct nameidata *nd)
> +{
> + return proc_lookup_de(PDE(dir), dir, dentry);
> +}
> +
> /*
> * This returns non-zero if at EOF, so that the /proc
> * root directory can use this and check if it should
> @@ -426,10 +429,9 @@ out_unlock:
> * value of the readdir() call, as long as it's non-negative
> * for success..
> */
> -int proc_readdir(struct file * filp,
> - void * dirent, filldir_t filldir)
> +int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
> + filldir_t filldir)
> {
> - struct proc_dir_entry * de;
> unsigned int ino;
> int i;
> struct inode *inode = filp->f_path.dentry->d_inode;
> @@ -438,7 +440,6 @@ int proc_readdir(struct file * filp,
> lock_kernel();
>
> ino = inode->i_ino;
> - de = PDE(inode);
> if (!de) {
> ret = -EINVAL;
> goto out;
> @@ -499,6 +500,13 @@ out: unlock_kernel();
> return ret;
> }
>
> +int proc_readdir(struct file *filp, void *dirent, filldir_t filldir)
> +{
> + struct inode *inode = filp->f_path.dentry->d_inode;
> +
> + return proc_readdir_de(PDE(inode), filp, dirent, filldir);
> +}
> +
> /*
> * These are the generic /proc directory operations. They
> * use the in-memory "struct proc_dir_entry" tree to parse
> diff --git a/fs/proc/internal.h b/fs/proc/internal.h
> index 1c81c8f..bc72f5c 100644
> --- a/fs/proc/internal.h
> +++ b/fs/proc/internal.h
> @@ -64,6 +64,8 @@ extern const struct file_operations proc_numa_maps_operations;
> extern const struct file_operations proc_smaps_operations;
> extern const struct file_operations proc_clear_refs_operations;
> extern const struct file_operations proc_pagemap_operations;
> +extern const struct file_operations proc_net_operations;
> +extern const struct inode_operations proc_net_inode_operations;
>
> void free_proc_entry(struct proc_dir_entry *de);
>
> @@ -83,3 +85,8 @@ static inline int proc_fd(struct inode *inode)
> {
> return PROC_I(inode)->fd;
> }
> +
> +struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
> + struct dentry *dentry);
> +int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
> + filldir_t filldir);
> diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
> index 14e9b5a..18d413c 100644
> --- a/fs/proc/proc_net.c
> +++ b/fs/proc/proc_net.c
> @@ -63,6 +63,63 @@ int seq_release_net(struct inode *ino, struct file *f)
> }
> EXPORT_SYMBOL_GPL(seq_release_net);
>
> +static struct net *get_proc_task_net(struct inode *dir)
> +{
> + struct task_struct *task;
> + struct nsproxy *ns;
> + struct net *net = NULL;
> +
> + rcu_read_lock();
> + task = pid_task(proc_pid(dir), PIDTYPE_PID);
> + if (task != NULL) {
> + ns = task_nsproxy(task);
> + if (ns != NULL)
> + net = get_net(ns->net_ns);
> + }
> + rcu_read_unlock();
> +
> + return net;
> +}
> +
> +static struct dentry *proc_tgid_net_lookup(struct inode *dir,
> + struct dentry *dentry, struct nameidata *nd)
> +{
> + struct dentry *de;
> + struct net *net;
> +
> + de = ERR_PTR(-ENOENT);
> + net = get_proc_task_net(dir);
> + if (net != NULL) {
> + de = proc_lookup_de(net->proc_net, dir, dentry);
> + put_net(net);
> + }
> + return de;
> +}
> +
> +const struct inode_operations proc_net_inode_operations = {
> + .lookup = proc_tgid_net_lookup,
> +};
> +
> +static int proc_tgid_net_readdir(struct file *filp, void *dirent,
> + filldir_t filldir)
> +{
> + int ret;
> + struct net *net;
> +
> + ret = -EINVAL;
> + net = get_proc_task_net(filp->f_path.dentry->d_inode);
> + if (net != NULL) {
> + ret = proc_readdir_de(net->proc_net, filp, dirent, filldir);
> + put_net(net);
> + }
> + return ret;
> +}
> +
> +const struct file_operations proc_net_operations = {
> + .read = generic_read_dir,
> + .readdir = proc_tgid_net_readdir,
> +};
> +
>
> struct proc_dir_entry *proc_net_fops_create(struct net *net,
> const char *name, mode_t mode, const struct file_operations *fops)
> @@ -83,14 +140,6 @@ struct net *get_proc_net(const struct inode *inode)
> }
> EXPORT_SYMBOL_GPL(get_proc_net);
>
> -static struct proc_dir_entry *shadow_pde;
> -
> -static struct proc_dir_entry *proc_net_shadow(struct task_struct *task,
> - struct proc_dir_entry *de)
> -{
> - return task->nsproxy->net_ns->proc_net;
> -}
> -
> struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
> struct proc_dir_entry *parent)
> {
> @@ -104,45 +153,35 @@ EXPORT_SYMBOL_GPL(proc_net_mkdir);
>
> static __net_init int proc_net_ns_init(struct net *net)
> {
> - struct proc_dir_entry *root, *netd, *net_statd;
> + struct proc_dir_entry *netd, *net_statd;
> int err;
>
> err = -ENOMEM;
> - root = kzalloc(sizeof(*root), GFP_KERNEL);
> - if (!root)
> + netd = kzalloc(sizeof(*netd), GFP_KERNEL);
> + if (!netd)
> goto out;
>
> - err = -EEXIST;
> - netd = proc_net_mkdir(net, "net", root);
> - if (!netd)
> - goto free_root;
> + netd->data = net;
>
> err = -EEXIST;
> net_statd = proc_net_mkdir(net, "stat", netd);
> if (!net_statd)
> goto free_net;
>
> - root->data = net;
> -
> - net->proc_net_root = root;
> net->proc_net = netd;
> net->proc_net_stat = net_statd;
> - err = 0;
> + return 0;
>
> +free_net:
> + kfree(netd);
> out:
> return err;
> -free_net:
> - remove_proc_entry("net", root);
> -free_root:
> - kfree(root);
> - goto out;
> }
>
> static __net_exit void proc_net_ns_exit(struct net *net)
> {
> remove_proc_entry("stat", net->proc_net);
> - remove_proc_entry("net", net->proc_net_root);
> - kfree(net->proc_net_root);
> + kfree(net->proc_net);
> }
>
> static struct pernet_operations __net_initdata proc_net_ns_ops = {
> @@ -152,8 +191,7 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = {
>
> int __init proc_net_init(void)
> {
> - shadow_pde = proc_mkdir("net", NULL);
> - shadow_pde->shadow_proc = proc_net_shadow;
> + proc_symlink("net", NULL, "self/net");
>
> return register_pernet_subsys(&proc_net_ns_ops);
> }
> diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
> index d9a9e71..9b6c935 100644
> --- a/include/linux/proc_fs.h
> +++ b/include/linux/proc_fs.h
> @@ -50,8 +50,6 @@ typedef int (read_proc_t)(char *page, char **start, off_t off,
> typedef int (write_proc_t)(struct file *file, const char __user *buffer,
> unsigned long count, void *data);
> typedef int (get_info_t)(char *, char **, off_t, int);
> -typedef struct proc_dir_entry *(shadow_proc_t)(struct task_struct *task,
> - struct proc_dir_entry *pde);
>
> struct proc_dir_entry {
> unsigned int low_ino;
> @@ -82,7 +80,6 @@ struct proc_dir_entry {
> int pde_users; /* number of callers into module in progress */
> spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
> struct completion *pde_unload_completion;
> - shadow_proc_t *shadow_proc;
> };
>
> struct kcore_list {
> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> index 28738b7..923f2b8 100644
> --- a/include/net/net_namespace.h
> +++ b/include/net/net_namespace.h
> @@ -31,7 +31,6 @@ struct net {
>
> struct proc_dir_entry *proc_net;
> struct proc_dir_entry *proc_net_stat;
> - struct proc_dir_entry *proc_net_root;
>
> struct list_head sysctl_table_headers;
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
--
Stephen Smalley
National Security Agency

2008-03-06 16:47:37

by Pavel Emelyanov

[permalink] [raw]
Subject: Re: [PATCH] Make /proc/net a symlink on /proc/self/net (v2)

Stephen Smalley wrote:
> On Thu, 2008-03-06 at 12:18 +0300, Pavel Emelyanov wrote:
>> Current /proc/net is done with so called "shadows", but current
>> implementation is broken and has little chances to get fixed.
>>
>> The problem is that dentries subtree of /proc/net directory has
>> fancy revalidation rules to make processes living in different
>> net namespaces see different entries in /proc/net subtree, but
>> currently, tasks see in the /proc/net subdir the contents of any
>> other namespace, depending on who opened the file first.
>>
>> The proposed fix is to turn /proc/net into a symlink, which points
>> to /proc/self/net, which in turn shows what previously was in
>> /proc/net - the network-related info, from the net namespace the
>> appropriate task lives in.
>>
>> # ls -l /proc/net
>> lrwxrwxrwx 1 root root 8 Mar 5 15:17 /proc/net -> self/net
>>
>> In other words - this behaves like /proc/mounts, but unlike
>> "mounts", "net" is not a file, but a directory.
>>
>> Fixed a task_struct leak in get_proc_task_net, pointed out by Paul.
>
> Does this break SELinux labeling of /proc/net inodes and thus its access
> controls on them?

It should not, since the proc_dir_etries are still organized in
a required hierarchy.

>> Signed-off-by: Pavel Emelyanov <[email protected]>
>>
>> ---
>>
>> diff --git a/fs/proc/base.c b/fs/proc/base.c
>> index 96ee899..cc43cf0 100644
>> --- a/fs/proc/base.c
>> +++ b/fs/proc/base.c
>> @@ -2274,6 +2274,7 @@ static const struct pid_entry tgid_base_stuff[] = {
>> DIR("task", S_IRUGO|S_IXUGO, task),
>> DIR("fd", S_IRUSR|S_IXUSR, fd),
>> DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo),
>> + DIR("net", S_IRUGO|S_IXUSR, net),
>> REG("environ", S_IRUSR, environ),
>> INF("auxv", S_IRUSR, pid_auxv),
>> ONE("status", S_IRUGO, pid_status),
>> diff --git a/fs/proc/generic.c b/fs/proc/generic.c
>> index 68971e6..a36ad3c 100644
>> --- a/fs/proc/generic.c
>> +++ b/fs/proc/generic.c
>> @@ -377,15 +377,14 @@ static struct dentry_operations proc_dentry_operations =
>> * Don't create negative dentries here, return -ENOENT by hand
>> * instead.
>> */
>> -struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
>> +struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
>> + struct dentry *dentry)
>> {
>> struct inode *inode = NULL;
>> - struct proc_dir_entry * de;
>> int error = -ENOENT;
>>
>> lock_kernel();
>> spin_lock(&proc_subdir_lock);
>> - de = PDE(dir);
>> if (de) {
>> for (de = de->subdir; de ; de = de->next) {
>> if (de->namelen != dentry->d_name.len)
>> @@ -393,8 +392,6 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
>> if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
>> unsigned int ino;
>>
>> - if (de->shadow_proc)
>> - de = de->shadow_proc(current, de);
>> ino = de->low_ino;
>> de_get(de);
>> spin_unlock(&proc_subdir_lock);
>> @@ -417,6 +414,12 @@ out_unlock:
>> return ERR_PTR(error);
>> }
>>
>> +struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
>> + struct nameidata *nd)
>> +{
>> + return proc_lookup_de(PDE(dir), dir, dentry);
>> +}
>> +
>> /*
>> * This returns non-zero if at EOF, so that the /proc
>> * root directory can use this and check if it should
>> @@ -426,10 +429,9 @@ out_unlock:
>> * value of the readdir() call, as long as it's non-negative
>> * for success..
>> */
>> -int proc_readdir(struct file * filp,
>> - void * dirent, filldir_t filldir)
>> +int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
>> + filldir_t filldir)
>> {
>> - struct proc_dir_entry * de;
>> unsigned int ino;
>> int i;
>> struct inode *inode = filp->f_path.dentry->d_inode;
>> @@ -438,7 +440,6 @@ int proc_readdir(struct file * filp,
>> lock_kernel();
>>
>> ino = inode->i_ino;
>> - de = PDE(inode);
>> if (!de) {
>> ret = -EINVAL;
>> goto out;
>> @@ -499,6 +500,13 @@ out: unlock_kernel();
>> return ret;
>> }
>>
>> +int proc_readdir(struct file *filp, void *dirent, filldir_t filldir)
>> +{
>> + struct inode *inode = filp->f_path.dentry->d_inode;
>> +
>> + return proc_readdir_de(PDE(inode), filp, dirent, filldir);
>> +}
>> +
>> /*
>> * These are the generic /proc directory operations. They
>> * use the in-memory "struct proc_dir_entry" tree to parse
>> diff --git a/fs/proc/internal.h b/fs/proc/internal.h
>> index 1c81c8f..bc72f5c 100644
>> --- a/fs/proc/internal.h
>> +++ b/fs/proc/internal.h
>> @@ -64,6 +64,8 @@ extern const struct file_operations proc_numa_maps_operations;
>> extern const struct file_operations proc_smaps_operations;
>> extern const struct file_operations proc_clear_refs_operations;
>> extern const struct file_operations proc_pagemap_operations;
>> +extern const struct file_operations proc_net_operations;
>> +extern const struct inode_operations proc_net_inode_operations;
>>
>> void free_proc_entry(struct proc_dir_entry *de);
>>
>> @@ -83,3 +85,8 @@ static inline int proc_fd(struct inode *inode)
>> {
>> return PROC_I(inode)->fd;
>> }
>> +
>> +struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
>> + struct dentry *dentry);
>> +int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
>> + filldir_t filldir);
>> diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
>> index 14e9b5a..18d413c 100644
>> --- a/fs/proc/proc_net.c
>> +++ b/fs/proc/proc_net.c
>> @@ -63,6 +63,63 @@ int seq_release_net(struct inode *ino, struct file *f)
>> }
>> EXPORT_SYMBOL_GPL(seq_release_net);
>>
>> +static struct net *get_proc_task_net(struct inode *dir)
>> +{
>> + struct task_struct *task;
>> + struct nsproxy *ns;
>> + struct net *net = NULL;
>> +
>> + rcu_read_lock();
>> + task = pid_task(proc_pid(dir), PIDTYPE_PID);
>> + if (task != NULL) {
>> + ns = task_nsproxy(task);
>> + if (ns != NULL)
>> + net = get_net(ns->net_ns);
>> + }
>> + rcu_read_unlock();
>> +
>> + return net;
>> +}
>> +
>> +static struct dentry *proc_tgid_net_lookup(struct inode *dir,
>> + struct dentry *dentry, struct nameidata *nd)
>> +{
>> + struct dentry *de;
>> + struct net *net;
>> +
>> + de = ERR_PTR(-ENOENT);
>> + net = get_proc_task_net(dir);
>> + if (net != NULL) {
>> + de = proc_lookup_de(net->proc_net, dir, dentry);
>> + put_net(net);
>> + }
>> + return de;
>> +}
>> +
>> +const struct inode_operations proc_net_inode_operations = {
>> + .lookup = proc_tgid_net_lookup,
>> +};
>> +
>> +static int proc_tgid_net_readdir(struct file *filp, void *dirent,
>> + filldir_t filldir)
>> +{
>> + int ret;
>> + struct net *net;
>> +
>> + ret = -EINVAL;
>> + net = get_proc_task_net(filp->f_path.dentry->d_inode);
>> + if (net != NULL) {
>> + ret = proc_readdir_de(net->proc_net, filp, dirent, filldir);
>> + put_net(net);
>> + }
>> + return ret;
>> +}
>> +
>> +const struct file_operations proc_net_operations = {
>> + .read = generic_read_dir,
>> + .readdir = proc_tgid_net_readdir,
>> +};
>> +
>>
>> struct proc_dir_entry *proc_net_fops_create(struct net *net,
>> const char *name, mode_t mode, const struct file_operations *fops)
>> @@ -83,14 +140,6 @@ struct net *get_proc_net(const struct inode *inode)
>> }
>> EXPORT_SYMBOL_GPL(get_proc_net);
>>
>> -static struct proc_dir_entry *shadow_pde;
>> -
>> -static struct proc_dir_entry *proc_net_shadow(struct task_struct *task,
>> - struct proc_dir_entry *de)
>> -{
>> - return task->nsproxy->net_ns->proc_net;
>> -}
>> -
>> struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
>> struct proc_dir_entry *parent)
>> {
>> @@ -104,45 +153,35 @@ EXPORT_SYMBOL_GPL(proc_net_mkdir);
>>
>> static __net_init int proc_net_ns_init(struct net *net)
>> {
>> - struct proc_dir_entry *root, *netd, *net_statd;
>> + struct proc_dir_entry *netd, *net_statd;
>> int err;
>>
>> err = -ENOMEM;
>> - root = kzalloc(sizeof(*root), GFP_KERNEL);
>> - if (!root)
>> + netd = kzalloc(sizeof(*netd), GFP_KERNEL);
>> + if (!netd)
>> goto out;
>>
>> - err = -EEXIST;
>> - netd = proc_net_mkdir(net, "net", root);
>> - if (!netd)
>> - goto free_root;
>> + netd->data = net;
>>
>> err = -EEXIST;
>> net_statd = proc_net_mkdir(net, "stat", netd);
>> if (!net_statd)
>> goto free_net;
>>
>> - root->data = net;
>> -
>> - net->proc_net_root = root;
>> net->proc_net = netd;
>> net->proc_net_stat = net_statd;
>> - err = 0;
>> + return 0;
>>
>> +free_net:
>> + kfree(netd);
>> out:
>> return err;
>> -free_net:
>> - remove_proc_entry("net", root);
>> -free_root:
>> - kfree(root);
>> - goto out;
>> }
>>
>> static __net_exit void proc_net_ns_exit(struct net *net)
>> {
>> remove_proc_entry("stat", net->proc_net);
>> - remove_proc_entry("net", net->proc_net_root);
>> - kfree(net->proc_net_root);
>> + kfree(net->proc_net);
>> }
>>
>> static struct pernet_operations __net_initdata proc_net_ns_ops = {
>> @@ -152,8 +191,7 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = {
>>
>> int __init proc_net_init(void)
>> {
>> - shadow_pde = proc_mkdir("net", NULL);
>> - shadow_pde->shadow_proc = proc_net_shadow;
>> + proc_symlink("net", NULL, "self/net");
>>
>> return register_pernet_subsys(&proc_net_ns_ops);
>> }
>> diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
>> index d9a9e71..9b6c935 100644
>> --- a/include/linux/proc_fs.h
>> +++ b/include/linux/proc_fs.h
>> @@ -50,8 +50,6 @@ typedef int (read_proc_t)(char *page, char **start, off_t off,
>> typedef int (write_proc_t)(struct file *file, const char __user *buffer,
>> unsigned long count, void *data);
>> typedef int (get_info_t)(char *, char **, off_t, int);
>> -typedef struct proc_dir_entry *(shadow_proc_t)(struct task_struct *task,
>> - struct proc_dir_entry *pde);
>>
>> struct proc_dir_entry {
>> unsigned int low_ino;
>> @@ -82,7 +80,6 @@ struct proc_dir_entry {
>> int pde_users; /* number of callers into module in progress */
>> spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
>> struct completion *pde_unload_completion;
>> - shadow_proc_t *shadow_proc;
>> };
>>
>> struct kcore_list {
>> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
>> index 28738b7..923f2b8 100644
>> --- a/include/net/net_namespace.h
>> +++ b/include/net/net_namespace.h
>> @@ -31,7 +31,6 @@ struct net {
>>
>> struct proc_dir_entry *proc_net;
>> struct proc_dir_entry *proc_net_stat;
>> - struct proc_dir_entry *proc_net_root;
>>
>> struct list_head sysctl_table_headers;
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at http://www.tux.org/lkml/

2008-03-06 19:45:33

by Stephen Smalley

[permalink] [raw]
Subject: Re: [PATCH] Make /proc/net a symlink on /proc/self/net (v2)


On Thu, 2008-03-06 at 19:45 +0300, Pavel Emelyanov wrote:
> Stephen Smalley wrote:
> > On Thu, 2008-03-06 at 12:18 +0300, Pavel Emelyanov wrote:
> >> Current /proc/net is done with so called "shadows", but current
> >> implementation is broken and has little chances to get fixed.
> >>
> >> The problem is that dentries subtree of /proc/net directory has
> >> fancy revalidation rules to make processes living in different
> >> net namespaces see different entries in /proc/net subtree, but
> >> currently, tasks see in the /proc/net subdir the contents of any
> >> other namespace, depending on who opened the file first.
> >>
> >> The proposed fix is to turn /proc/net into a symlink, which points
> >> to /proc/self/net, which in turn shows what previously was in
> >> /proc/net - the network-related info, from the net namespace the
> >> appropriate task lives in.
> >>
> >> # ls -l /proc/net
> >> lrwxrwxrwx 1 root root 8 Mar 5 15:17 /proc/net -> self/net
> >>
> >> In other words - this behaves like /proc/mounts, but unlike
> >> "mounts", "net" is not a file, but a directory.
> >>
> >> Fixed a task_struct leak in get_proc_task_net, pointed out by Paul.
> >
> > Does this break SELinux labeling of /proc/net inodes and thus its access
> > controls on them?
>
> It should not, since the proc_dir_etries are still organized in
> a required hierarchy.

Unfortunately, it does break selinux labeling of /proc/net inodes.
Easily seen by running ls -ZR /proc/net before and after the patch is
applied and comparing the results.

Also, as a separate issue, a "find /proc/self/net ..." will show that
the hard link count is wrong for it.

Currently the inodes get labeled by selinux_proc_get_sid() by walking up
the proc_dir_entry tree to generate a name and prefix matching that name
in policy. Before the patch, we'd get names like "//net/tcp", i.e.
relative to the root of proc (the extra leading slash is due to the
earlier proc net rewrite which also broke selinux until we adjusted it
to ignore it). After the patch, we just get "//tcp", which is ambiguous
since we would also get that for a /proc/tcp inode if one existed.

Not trying to obstruct your proc net improvements, but just want to make
sure that selinux doesn't get broken in the process.

>
> >> Signed-off-by: Pavel Emelyanov <[email protected]>
> >>
> >> ---
> >>
> >> diff --git a/fs/proc/base.c b/fs/proc/base.c
> >> index 96ee899..cc43cf0 100644
> >> --- a/fs/proc/base.c
> >> +++ b/fs/proc/base.c
> >> @@ -2274,6 +2274,7 @@ static const struct pid_entry tgid_base_stuff[] = {
> >> DIR("task", S_IRUGO|S_IXUGO, task),
> >> DIR("fd", S_IRUSR|S_IXUSR, fd),
> >> DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo),
> >> + DIR("net", S_IRUGO|S_IXUSR, net),
> >> REG("environ", S_IRUSR, environ),
> >> INF("auxv", S_IRUSR, pid_auxv),
> >> ONE("status", S_IRUGO, pid_status),
> >> diff --git a/fs/proc/generic.c b/fs/proc/generic.c
> >> index 68971e6..a36ad3c 100644
> >> --- a/fs/proc/generic.c
> >> +++ b/fs/proc/generic.c
> >> @@ -377,15 +377,14 @@ static struct dentry_operations proc_dentry_operations =
> >> * Don't create negative dentries here, return -ENOENT by hand
> >> * instead.
> >> */
> >> -struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
> >> +struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
> >> + struct dentry *dentry)
> >> {
> >> struct inode *inode = NULL;
> >> - struct proc_dir_entry * de;
> >> int error = -ENOENT;
> >>
> >> lock_kernel();
> >> spin_lock(&proc_subdir_lock);
> >> - de = PDE(dir);
> >> if (de) {
> >> for (de = de->subdir; de ; de = de->next) {
> >> if (de->namelen != dentry->d_name.len)
> >> @@ -393,8 +392,6 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam
> >> if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
> >> unsigned int ino;
> >>
> >> - if (de->shadow_proc)
> >> - de = de->shadow_proc(current, de);
> >> ino = de->low_ino;
> >> de_get(de);
> >> spin_unlock(&proc_subdir_lock);
> >> @@ -417,6 +414,12 @@ out_unlock:
> >> return ERR_PTR(error);
> >> }
> >>
> >> +struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
> >> + struct nameidata *nd)
> >> +{
> >> + return proc_lookup_de(PDE(dir), dir, dentry);
> >> +}
> >> +
> >> /*
> >> * This returns non-zero if at EOF, so that the /proc
> >> * root directory can use this and check if it should
> >> @@ -426,10 +429,9 @@ out_unlock:
> >> * value of the readdir() call, as long as it's non-negative
> >> * for success..
> >> */
> >> -int proc_readdir(struct file * filp,
> >> - void * dirent, filldir_t filldir)
> >> +int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
> >> + filldir_t filldir)
> >> {
> >> - struct proc_dir_entry * de;
> >> unsigned int ino;
> >> int i;
> >> struct inode *inode = filp->f_path.dentry->d_inode;
> >> @@ -438,7 +440,6 @@ int proc_readdir(struct file * filp,
> >> lock_kernel();
> >>
> >> ino = inode->i_ino;
> >> - de = PDE(inode);
> >> if (!de) {
> >> ret = -EINVAL;
> >> goto out;
> >> @@ -499,6 +500,13 @@ out: unlock_kernel();
> >> return ret;
> >> }
> >>
> >> +int proc_readdir(struct file *filp, void *dirent, filldir_t filldir)
> >> +{
> >> + struct inode *inode = filp->f_path.dentry->d_inode;
> >> +
> >> + return proc_readdir_de(PDE(inode), filp, dirent, filldir);
> >> +}
> >> +
> >> /*
> >> * These are the generic /proc directory operations. They
> >> * use the in-memory "struct proc_dir_entry" tree to parse
> >> diff --git a/fs/proc/internal.h b/fs/proc/internal.h
> >> index 1c81c8f..bc72f5c 100644
> >> --- a/fs/proc/internal.h
> >> +++ b/fs/proc/internal.h
> >> @@ -64,6 +64,8 @@ extern const struct file_operations proc_numa_maps_operations;
> >> extern const struct file_operations proc_smaps_operations;
> >> extern const struct file_operations proc_clear_refs_operations;
> >> extern const struct file_operations proc_pagemap_operations;
> >> +extern const struct file_operations proc_net_operations;
> >> +extern const struct inode_operations proc_net_inode_operations;
> >>
> >> void free_proc_entry(struct proc_dir_entry *de);
> >>
> >> @@ -83,3 +85,8 @@ static inline int proc_fd(struct inode *inode)
> >> {
> >> return PROC_I(inode)->fd;
> >> }
> >> +
> >> +struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
> >> + struct dentry *dentry);
> >> +int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
> >> + filldir_t filldir);
> >> diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
> >> index 14e9b5a..18d413c 100644
> >> --- a/fs/proc/proc_net.c
> >> +++ b/fs/proc/proc_net.c
> >> @@ -63,6 +63,63 @@ int seq_release_net(struct inode *ino, struct file *f)
> >> }
> >> EXPORT_SYMBOL_GPL(seq_release_net);
> >>
> >> +static struct net *get_proc_task_net(struct inode *dir)
> >> +{
> >> + struct task_struct *task;
> >> + struct nsproxy *ns;
> >> + struct net *net = NULL;
> >> +
> >> + rcu_read_lock();
> >> + task = pid_task(proc_pid(dir), PIDTYPE_PID);
> >> + if (task != NULL) {
> >> + ns = task_nsproxy(task);
> >> + if (ns != NULL)
> >> + net = get_net(ns->net_ns);
> >> + }
> >> + rcu_read_unlock();
> >> +
> >> + return net;
> >> +}
> >> +
> >> +static struct dentry *proc_tgid_net_lookup(struct inode *dir,
> >> + struct dentry *dentry, struct nameidata *nd)
> >> +{
> >> + struct dentry *de;
> >> + struct net *net;
> >> +
> >> + de = ERR_PTR(-ENOENT);
> >> + net = get_proc_task_net(dir);
> >> + if (net != NULL) {
> >> + de = proc_lookup_de(net->proc_net, dir, dentry);
> >> + put_net(net);
> >> + }
> >> + return de;
> >> +}
> >> +
> >> +const struct inode_operations proc_net_inode_operations = {
> >> + .lookup = proc_tgid_net_lookup,
> >> +};
> >> +
> >> +static int proc_tgid_net_readdir(struct file *filp, void *dirent,
> >> + filldir_t filldir)
> >> +{
> >> + int ret;
> >> + struct net *net;
> >> +
> >> + ret = -EINVAL;
> >> + net = get_proc_task_net(filp->f_path.dentry->d_inode);
> >> + if (net != NULL) {
> >> + ret = proc_readdir_de(net->proc_net, filp, dirent, filldir);
> >> + put_net(net);
> >> + }
> >> + return ret;
> >> +}
> >> +
> >> +const struct file_operations proc_net_operations = {
> >> + .read = generic_read_dir,
> >> + .readdir = proc_tgid_net_readdir,
> >> +};
> >> +
> >>
> >> struct proc_dir_entry *proc_net_fops_create(struct net *net,
> >> const char *name, mode_t mode, const struct file_operations *fops)
> >> @@ -83,14 +140,6 @@ struct net *get_proc_net(const struct inode *inode)
> >> }
> >> EXPORT_SYMBOL_GPL(get_proc_net);
> >>
> >> -static struct proc_dir_entry *shadow_pde;
> >> -
> >> -static struct proc_dir_entry *proc_net_shadow(struct task_struct *task,
> >> - struct proc_dir_entry *de)
> >> -{
> >> - return task->nsproxy->net_ns->proc_net;
> >> -}
> >> -
> >> struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
> >> struct proc_dir_entry *parent)
> >> {
> >> @@ -104,45 +153,35 @@ EXPORT_SYMBOL_GPL(proc_net_mkdir);
> >>
> >> static __net_init int proc_net_ns_init(struct net *net)
> >> {
> >> - struct proc_dir_entry *root, *netd, *net_statd;
> >> + struct proc_dir_entry *netd, *net_statd;
> >> int err;
> >>
> >> err = -ENOMEM;
> >> - root = kzalloc(sizeof(*root), GFP_KERNEL);
> >> - if (!root)
> >> + netd = kzalloc(sizeof(*netd), GFP_KERNEL);
> >> + if (!netd)
> >> goto out;
> >>
> >> - err = -EEXIST;
> >> - netd = proc_net_mkdir(net, "net", root);
> >> - if (!netd)
> >> - goto free_root;
> >> + netd->data = net;
> >>
> >> err = -EEXIST;
> >> net_statd = proc_net_mkdir(net, "stat", netd);
> >> if (!net_statd)
> >> goto free_net;
> >>
> >> - root->data = net;
> >> -
> >> - net->proc_net_root = root;
> >> net->proc_net = netd;
> >> net->proc_net_stat = net_statd;
> >> - err = 0;
> >> + return 0;
> >>
> >> +free_net:
> >> + kfree(netd);
> >> out:
> >> return err;
> >> -free_net:
> >> - remove_proc_entry("net", root);
> >> -free_root:
> >> - kfree(root);
> >> - goto out;
> >> }
> >>
> >> static __net_exit void proc_net_ns_exit(struct net *net)
> >> {
> >> remove_proc_entry("stat", net->proc_net);
> >> - remove_proc_entry("net", net->proc_net_root);
> >> - kfree(net->proc_net_root);
> >> + kfree(net->proc_net);
> >> }
> >>
> >> static struct pernet_operations __net_initdata proc_net_ns_ops = {
> >> @@ -152,8 +191,7 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = {
> >>
> >> int __init proc_net_init(void)
> >> {
> >> - shadow_pde = proc_mkdir("net", NULL);
> >> - shadow_pde->shadow_proc = proc_net_shadow;
> >> + proc_symlink("net", NULL, "self/net");
> >>
> >> return register_pernet_subsys(&proc_net_ns_ops);
> >> }
> >> diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
> >> index d9a9e71..9b6c935 100644
> >> --- a/include/linux/proc_fs.h
> >> +++ b/include/linux/proc_fs.h
> >> @@ -50,8 +50,6 @@ typedef int (read_proc_t)(char *page, char **start, off_t off,
> >> typedef int (write_proc_t)(struct file *file, const char __user *buffer,
> >> unsigned long count, void *data);
> >> typedef int (get_info_t)(char *, char **, off_t, int);
> >> -typedef struct proc_dir_entry *(shadow_proc_t)(struct task_struct *task,
> >> - struct proc_dir_entry *pde);
> >>
> >> struct proc_dir_entry {
> >> unsigned int low_ino;
> >> @@ -82,7 +80,6 @@ struct proc_dir_entry {
> >> int pde_users; /* number of callers into module in progress */
> >> spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
> >> struct completion *pde_unload_completion;
> >> - shadow_proc_t *shadow_proc;
> >> };
> >>
> >> struct kcore_list {
> >> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> >> index 28738b7..923f2b8 100644
> >> --- a/include/net/net_namespace.h
> >> +++ b/include/net/net_namespace.h
> >> @@ -31,7 +31,6 @@ struct net {
> >>
> >> struct proc_dir_entry *proc_net;
> >> struct proc_dir_entry *proc_net_stat;
> >> - struct proc_dir_entry *proc_net_root;
> >>
> >> struct list_head sysctl_table_headers;
> >>
> >> --
> >> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> >> the body of a message to [email protected]
> >> More majordomo info at http://vger.kernel.org/majordomo-info.html
> >> Please read the FAQ at http://www.tux.org/lkml/
--
Stephen Smalley
National Security Agency

2008-03-07 12:30:32

by Pavel Emelyanov

[permalink] [raw]
Subject: Re: [PATCH] Make /proc/net a symlink on /proc/self/net (v2)

[snip]

>>> Does this break SELinux labeling of /proc/net inodes and thus its access
>>> controls on them?
>> It should not, since the proc_dir_etries are still organized in
>> a required hierarchy.
>
> Unfortunately, it does break selinux labeling of /proc/net inodes.
> Easily seen by running ls -ZR /proc/net before and after the patch is
> applied and comparing the results.
>
> Also, as a separate issue, a "find /proc/self/net ..." will show that
> the hard link count is wrong for it.

Oops... I've checked this thing explicitly before sending, but it looks
like I used too old find version. I.e. 4.1.20 worked OK, but 4.2.23
reported the nlink discrepancy.

Anyway, this is fixed now (see below). I override the getattr method for
/proc/net directory and update one from the net->proc_net entry, which
is calculated correctly.

> Currently the inodes get labeled by selinux_proc_get_sid() by walking up
> the proc_dir_entry tree to generate a name and prefix matching that name
> in policy. Before the patch, we'd get names like "//net/tcp", i.e.
> relative to the root of proc (the extra leading slash is due to the
> earlier proc net rewrite which also broke selinux until we adjusted it
> to ignore it). After the patch, we just get "//tcp", which is ambiguous
> since we would also get that for a /proc/tcp inode if one existed.

OK, got it. Is it good if we used to get the //net/tcp, but will /net/tcp
(without the double slash)? The patch below does exactly this. It initializes
the name for the net->proc_net entry and makes its parent be proc_root.

Can you Ack this patch, so that I can merge it with the original one and
re-post the combined v3?

> Not trying to obstruct your proc net improvements, but just want to make
> sure that selinux doesn't get broken in the process.

diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 18d413c..4caa5f7 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -96,8 +96,27 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir,
return de;
}

+static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ struct inode *inode = dentry->d_inode;
+ struct net *net;
+
+ net = get_proc_task_net(inode);
+
+ generic_fillattr(inode, stat);
+
+ if (net != NULL) {
+ stat->nlink = net->proc_net->nlink;
+ put_net(net);
+ }
+
+ return 0;
+}
+
const struct inode_operations proc_net_inode_operations = {
.lookup = proc_tgid_net_lookup,
+ .getattr = proc_tgid_net_getattr,
};

static int proc_tgid_net_readdir(struct file *filp, void *dirent,
@@ -162,6 +181,10 @@ static __net_init int proc_net_ns_init(struct net *net)
goto out;

netd->data = net;
+ netd->nlink = 2;
+ netd->name = "net";
+ netd->namelen = 3;
+ netd->parent = &proc_root;

err = -EEXIST;
net_statd = proc_net_mkdir(net, "stat", netd);

2008-03-07 13:28:35

by Stephen Smalley

[permalink] [raw]
Subject: Re: [PATCH] Make /proc/net a symlink on /proc/self/net (v2)


On Fri, 2008-03-07 at 15:03 +0300, Pavel Emelyanov wrote:
> [snip]
>
> >>> Does this break SELinux labeling of /proc/net inodes and thus its access
> >>> controls on them?
> >> It should not, since the proc_dir_etries are still organized in
> >> a required hierarchy.
> >
> > Unfortunately, it does break selinux labeling of /proc/net inodes.
> > Easily seen by running ls -ZR /proc/net before and after the patch is
> > applied and comparing the results.
> >
> > Also, as a separate issue, a "find /proc/self/net ..." will show that
> > the hard link count is wrong for it.
>
> Oops... I've checked this thing explicitly before sending, but it looks
> like I used too old find version. I.e. 4.1.20 worked OK, but 4.2.23
> reported the nlink discrepancy.
>
> Anyway, this is fixed now (see below). I override the getattr method for
> /proc/net directory and update one from the net->proc_net entry, which
> is calculated correctly.
>
> > Currently the inodes get labeled by selinux_proc_get_sid() by walking up
> > the proc_dir_entry tree to generate a name and prefix matching that name
> > in policy. Before the patch, we'd get names like "//net/tcp", i.e.
> > relative to the root of proc (the extra leading slash is due to the
> > earlier proc net rewrite which also broke selinux until we adjusted it
> > to ignore it). After the patch, we just get "//tcp", which is ambiguous
> > since we would also get that for a /proc/tcp inode if one existed.
>
> OK, got it. Is it good if we used to get the //net/tcp, but will /net/tcp
> (without the double slash)? The patch below does exactly this. It initializes
> the name for the net->proc_net entry and makes its parent be proc_root.
>
> Can you Ack this patch, so that I can merge it with the original one and
> re-post the combined v3?

Thanks, ls -ZR /proc/net now yields the same output before and after the
patch.

Acked-by: Stephen Smalley <[email protected]>

>
> > Not trying to obstruct your proc net improvements, but just want to make
> > sure that selinux doesn't get broken in the process.
>
> diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
> index 18d413c..4caa5f7 100644
> --- a/fs/proc/proc_net.c
> +++ b/fs/proc/proc_net.c
> @@ -96,8 +96,27 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir,
> return de;
> }
>
> +static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry,
> + struct kstat *stat)
> +{
> + struct inode *inode = dentry->d_inode;
> + struct net *net;
> +
> + net = get_proc_task_net(inode);
> +
> + generic_fillattr(inode, stat);
> +
> + if (net != NULL) {
> + stat->nlink = net->proc_net->nlink;
> + put_net(net);
> + }
> +
> + return 0;
> +}
> +
> const struct inode_operations proc_net_inode_operations = {
> .lookup = proc_tgid_net_lookup,
> + .getattr = proc_tgid_net_getattr,
> };
>
> static int proc_tgid_net_readdir(struct file *filp, void *dirent,
> @@ -162,6 +181,10 @@ static __net_init int proc_net_ns_init(struct net *net)
> goto out;
>
> netd->data = net;
> + netd->nlink = 2;
> + netd->name = "net";
> + netd->namelen = 3;
> + netd->parent = &proc_root;
>
> err = -EEXIST;
> net_statd = proc_net_mkdir(net, "stat", netd);
--
Stephen Smalley
National Security Agency