Generalize the proc inode allocation so that it can be
used without having to having to create a proc_dir_entry.
This will allow namespace file descriptors to remain light
weight entitities but still have the same inode number
when the backing namespace is the same.
Signed-off-by: Eric W. Biederman <[email protected]>
---
Baring problems in review I plan to merge these patches
via my linux-2.6-nsfd tree.
fs/proc/generic.c | 26 +++++++++++++-------------
include/linux/proc_fs.h | 10 ++++++++++
2 files changed, 23 insertions(+), 13 deletions(-)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index f1637f1..65416a1 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -350,14 +350,14 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
* Return an inode number between PROC_DYNAMIC_FIRST and
* 0xffffffff, or zero on failure.
*/
-static unsigned int get_inode_number(void)
+int proc_alloc_inum(unsigned int *inum)
{
unsigned int i;
int error;
retry:
- if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
- return 0;
+ if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL))
+ return -ENOMEM;
spin_lock(&proc_inum_lock);
error = ida_get_new(&proc_inum_ida, &i);
@@ -365,18 +365,19 @@ retry:
if (error == -EAGAIN)
goto retry;
else if (error)
- return 0;
+ return error;
if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
spin_lock(&proc_inum_lock);
ida_remove(&proc_inum_ida, i);
spin_unlock(&proc_inum_lock);
- return 0;
+ return -ENOSPC;
}
- return PROC_DYNAMIC_FIRST + i;
+ *inum = PROC_DYNAMIC_FIRST + i;
+ return 0;
}
-static void release_inode_number(unsigned int inum)
+void proc_free_inum(unsigned int inum)
{
spin_lock(&proc_inum_lock);
ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
@@ -554,13 +555,12 @@ static const struct inode_operations proc_dir_inode_operations = {
static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
{
- unsigned int i;
struct proc_dir_entry *tmp;
+ int ret;
- i = get_inode_number();
- if (i == 0)
- return -EAGAIN;
- dp->low_ino = i;
+ ret = proc_alloc_inum(&dp->low_ino);
+ if (ret)
+ return ret;
if (S_ISDIR(dp->mode)) {
if (dp->proc_iops == NULL) {
@@ -766,7 +766,7 @@ EXPORT_SYMBOL(proc_create_data);
static void free_proc_entry(struct proc_dir_entry *de)
{
- release_inode_number(de->low_ino);
+ proc_free_inum(de->low_ino);
if (S_ISLNK(de->mode))
kfree(de->data);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index e7576cf..3067b44 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -175,6 +175,8 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
extern struct file *proc_ns_fget(int fd);
+extern int proc_alloc_inum(unsigned int *pino);
+extern void proc_free_inum(unsigned int inum);
#else
#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; })
@@ -229,6 +231,14 @@ static inline struct file *proc_ns_fget(int fd)
return ERR_PTR(-EINVAL);
}
+static inline int proc_alloc_inum(unsigned int *inum)
+{
+ *inum = 1;
+ return 0;
+}
+static inline void proc_free_inum(unsigned int inum)
+{
+}
#endif /* CONFIG_PROC_FS */
#if !defined(CONFIG_PROC_KCORE)
--
1.7.5.1.217.g4e3aa
Assign a unique proc inode to each namespace, yielding an
identifier that userspace can use for identifying a namespace.
This has been a long requested feature and only blocked because
a naive implementation would put the id in a global space and
would ultimately require having a namespace for the names of
namespaces, making migration and certain virtualization tricks
impossible.
We still don't have per superblock inode numbers for proc, which
appears necessary for application unaware checkpoint/restart and
migrations (if the application is using namespace filedescriptors)
but that is now allowd by the design if it becomes important.
I have preallocated the ipc and uts initial proc inode numbers so
their structures can be statically initialized.
Signed-off-by: Eric W. Biederman <[email protected]>
---
fs/proc/namespaces.c | 1 +
include/linux/ipc_namespace.h | 2 ++
include/linux/proc_fs.h | 4 ++++
include/linux/utsname.h | 1 +
include/net/net_namespace.h | 2 ++
init/version.c | 2 ++
ipc/msgutil.c | 2 ++
ipc/namespace.c | 16 ++++++++++++++++
kernel/utsname.c | 17 ++++++++++++++++-
net/core/net_namespace.c | 24 ++++++++++++++++++++++++
10 files changed, 70 insertions(+), 1 deletions(-)
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index be177f7..ddc2bb4 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -54,6 +54,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
ei->ns_ops = ns_ops;
ei->ns = ns;
+ inode->i_ino = ns_ops->inum(ei->ns);
dentry->d_op = &pid_dentry_operations;
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index a6d1655..22a4dc4 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -60,6 +60,8 @@ struct ipc_namespace {
/* user_ns which owns the ipc ns */
struct user_namespace *user_ns;
+
+ unsigned int proc_inum;
};
extern struct ipc_namespace init_ipc_ns;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 3067b44..1aee7f0 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -29,8 +29,11 @@ struct mm_struct;
enum {
PROC_ROOT_INO = 1,
+ PROC_IPC_INIT_INO = 2,
+ PROC_UTS_INIT_INO = 3,
};
+
/*
* This is not completely implemented yet. The idea is to
* create an in-memory tree (like the actual /proc filesystem
@@ -257,6 +260,7 @@ struct proc_ns_operations {
void *(*get)(struct task_struct *task);
void (*put)(void *ns);
int (*install)(struct nsproxy *nsproxy, void *ns);
+ unsigned int (*inum)(void *ns);
};
extern const struct proc_ns_operations netns_operations;
extern const struct proc_ns_operations utsns_operations;
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 4e5b021..03db764 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -44,6 +44,7 @@ struct uts_namespace {
struct kref kref;
struct new_utsname name;
struct user_namespace *user_ns;
+ unsigned int proc_inum;
};
extern struct uts_namespace init_uts_ns;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 2bf9ed9..4b85be2 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -49,6 +49,8 @@ struct net {
struct list_head cleanup_list; /* namespaces on death row */
struct list_head exit_list; /* Use only net_mutex */
+ unsigned int proc_inum;
+
struct proc_dir_entry *proc_net;
struct proc_dir_entry *proc_net_stat;
diff --git a/init/version.c b/init/version.c
index 86fe0cc..58170f1 100644
--- a/init/version.c
+++ b/init/version.c
@@ -12,6 +12,7 @@
#include <linux/utsname.h>
#include <generated/utsrelease.h>
#include <linux/version.h>
+#include <linux/proc_fs.h>
#ifndef CONFIG_KALLSYMS
#define version(a) Version_ ## a
@@ -34,6 +35,7 @@ struct uts_namespace init_uts_ns = {
.domainname = UTS_DOMAINNAME,
},
.user_ns = &init_user_ns,
+ .proc_inum = PROC_UTS_INIT_INO,
};
EXPORT_SYMBOL_GPL(init_uts_ns);
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 8b5ce5d..f7da485 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/ipc.h>
#include <linux/ipc_namespace.h>
+#include <linux/proc_fs.h>
#include <asm/uaccess.h>
#include "util.h"
@@ -33,6 +34,7 @@ struct ipc_namespace init_ipc_ns = {
.mq_msgsize_max = DFLT_MSGSIZEMAX,
#endif
.user_ns = &init_user_ns,
+ .proc_inum = PROC_IPC_INIT_INO,
};
atomic_t nr_ipc_ns = ATOMIC_INIT(1);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index ce0a647..cd7f733 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -26,9 +26,16 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
if (ns == NULL)
return ERR_PTR(-ENOMEM);
+ err = proc_alloc_inum(&ns->proc_inum);
+ if (err) {
+ kfree(ns);
+ return ERR_PTR(err);
+ }
+
atomic_set(&ns->count, 1);
err = mq_init_ns(ns);
if (err) {
+ proc_free_inum(ns->proc_inum);
kfree(ns);
return ERR_PTR(err);
}
@@ -113,6 +120,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
*/
ipcns_notify(IPCNS_REMOVED);
put_user_ns(ns->user_ns);
+ proc_free_inum(ns->proc_inum);
kfree(ns);
}
@@ -170,10 +178,18 @@ static int ipcns_install(struct nsproxy *nsproxy, void *ns)
return 0;
}
+static unsigned int ipcns_inum(void *vp)
+{
+ struct ipc_namespace *ns = vp;
+
+ return ns->proc_inum;
+}
+
const struct proc_ns_operations ipcns_operations = {
.name = "ipc",
.type = CLONE_NEWIPC,
.get = ipcns_get,
.put = ipcns_put,
.install = ipcns_install,
+ .inum = ipcns_inum,
};
diff --git a/kernel/utsname.c b/kernel/utsname.c
index bff131b..3ab6a08 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -36,11 +36,18 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
struct uts_namespace *old_ns)
{
struct uts_namespace *ns;
+ int err;
ns = create_uts_ns();
if (!ns)
return ERR_PTR(-ENOMEM);
+ err = proc_alloc_inum(&ns->proc_inum);
+ if (err) {
+ kfree(ns);
+ return ERR_PTR(err);
+ }
+
down_read(&uts_sem);
memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
@@ -78,6 +85,7 @@ void free_uts_ns(struct kref *kref)
ns = container_of(kref, struct uts_namespace, kref);
put_user_ns(ns->user_ns);
+ proc_free_inum(ns->proc_inum);
kfree(ns);
}
@@ -110,11 +118,18 @@ static int utsns_install(struct nsproxy *nsproxy, void *ns)
return 0;
}
+static unsigned int utsns_inum(void *vp)
+{
+ struct uts_namespace *ns = vp;
+
+ return ns->proc_inum;
+}
+
const struct proc_ns_operations utsns_operations = {
.name = "uts",
.type = CLONE_NEWUTS,
.get = utsns_get,
.put = utsns_put,
.install = utsns_install,
+ .inum = utsns_inum,
};
-
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index e41e511..6199ec2 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -358,6 +358,21 @@ struct net *get_net_ns_by_pid(pid_t pid)
}
EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
+static __net_init int net_ns_net_init(struct net *net)
+{
+ return proc_alloc_inum(&net->proc_inum);
+}
+
+static __net_exit void net_ns_net_exit(struct net *net)
+{
+ proc_free_inum(net->proc_inum);
+}
+
+static struct pernet_operations __net_initdata net_ns_ops = {
+ .init = net_ns_net_init,
+ .exit = net_ns_net_exit,
+};
+
static int __init net_ns_init(void)
{
struct net_generic *ng;
@@ -389,6 +404,8 @@ static int __init net_ns_init(void)
mutex_unlock(&net_mutex);
+ register_pernet_subsys(&net_ns_ops);
+
return 0;
}
@@ -616,11 +633,18 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
return 0;
}
+static unsigned int netns_inum(void *ns)
+{
+ struct net *net = ns;
+ return net->proc_inum;
+}
+
const struct proc_ns_operations netns_operations = {
.name = "net",
.type = CLONE_NEWNET,
.get = netns_get,
.put = netns_put,
.install = netns_install,
+ .inum = netns_inum,
};
#endif
--
1.7.5.1.217.g4e3aa
Quoting Eric W. Biederman ([email protected]):
>
> Generalize the proc inode allocation so that it can be
> used without having to having to create a proc_dir_entry.
>
> This will allow namespace file descriptors to remain light
> weight entitities but still have the same inode number
> when the backing namespace is the same.
>
> Signed-off-by: Eric W. Biederman <[email protected]>
Acked-by: Serge Hallyn <[email protected]>
> ---
>
> Baring problems in review I plan to merge these patches
> via my linux-2.6-nsfd tree.
>
> fs/proc/generic.c | 26 +++++++++++++-------------
> include/linux/proc_fs.h | 10 ++++++++++
> 2 files changed, 23 insertions(+), 13 deletions(-)
>
> diff --git a/fs/proc/generic.c b/fs/proc/generic.c
> index f1637f1..65416a1 100644
> --- a/fs/proc/generic.c
> +++ b/fs/proc/generic.c
> @@ -350,14 +350,14 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
> * Return an inode number between PROC_DYNAMIC_FIRST and
> * 0xffffffff, or zero on failure.
> */
> -static unsigned int get_inode_number(void)
> +int proc_alloc_inum(unsigned int *inum)
> {
> unsigned int i;
> int error;
>
> retry:
> - if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
> - return 0;
> + if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL))
> + return -ENOMEM;
>
> spin_lock(&proc_inum_lock);
> error = ida_get_new(&proc_inum_ida, &i);
> @@ -365,18 +365,19 @@ retry:
> if (error == -EAGAIN)
> goto retry;
> else if (error)
> - return 0;
> + return error;
>
> if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
> spin_lock(&proc_inum_lock);
> ida_remove(&proc_inum_ida, i);
> spin_unlock(&proc_inum_lock);
> - return 0;
> + return -ENOSPC;
> }
> - return PROC_DYNAMIC_FIRST + i;
> + *inum = PROC_DYNAMIC_FIRST + i;
> + return 0;
> }
>
> -static void release_inode_number(unsigned int inum)
> +void proc_free_inum(unsigned int inum)
> {
> spin_lock(&proc_inum_lock);
> ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
> @@ -554,13 +555,12 @@ static const struct inode_operations proc_dir_inode_operations = {
>
> static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
> {
> - unsigned int i;
> struct proc_dir_entry *tmp;
> + int ret;
>
> - i = get_inode_number();
> - if (i == 0)
> - return -EAGAIN;
> - dp->low_ino = i;
> + ret = proc_alloc_inum(&dp->low_ino);
> + if (ret)
> + return ret;
>
> if (S_ISDIR(dp->mode)) {
> if (dp->proc_iops == NULL) {
> @@ -766,7 +766,7 @@ EXPORT_SYMBOL(proc_create_data);
>
> static void free_proc_entry(struct proc_dir_entry *de)
> {
> - release_inode_number(de->low_ino);
> + proc_free_inum(de->low_ino);
>
> if (S_ISLNK(de->mode))
> kfree(de->data);
> diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
> index e7576cf..3067b44 100644
> --- a/include/linux/proc_fs.h
> +++ b/include/linux/proc_fs.h
> @@ -175,6 +175,8 @@ extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
>
> extern struct file *proc_ns_fget(int fd);
>
> +extern int proc_alloc_inum(unsigned int *pino);
> +extern void proc_free_inum(unsigned int inum);
> #else
>
> #define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; })
> @@ -229,6 +231,14 @@ static inline struct file *proc_ns_fget(int fd)
> return ERR_PTR(-EINVAL);
> }
>
> +static inline int proc_alloc_inum(unsigned int *inum)
> +{
> + *inum = 1;
> + return 0;
> +}
> +static inline void proc_free_inum(unsigned int inum)
> +{
> +}
> #endif /* CONFIG_PROC_FS */
>
> #if !defined(CONFIG_PROC_KCORE)
> --
> 1.7.5.1.217.g4e3aa
>
From: [email protected] (Eric W. Biederman)
Date: Fri, 17 Jun 2011 16:33:19 -0700
>
> Assign a unique proc inode to each namespace, yielding an
> identifier that userspace can use for identifying a namespace.
>
> This has been a long requested feature and only blocked because
> a naive implementation would put the id in a global space and
> would ultimately require having a namespace for the names of
> namespaces, making migration and certain virtualization tricks
> impossible.
>
> We still don't have per superblock inode numbers for proc, which
> appears necessary for application unaware checkpoint/restart and
> migrations (if the application is using namespace filedescriptors)
> but that is now allowd by the design if it becomes important.
>
> I have preallocated the ipc and uts initial proc inode numbers so
> their structures can be statically initialized.
>
> Signed-off-by: Eric W. Biederman <[email protected]>
For networking bits:
Acked-by: David S. Miller <[email protected]>
Quoting Eric W. Biederman ([email protected]):
>
> Assign a unique proc inode to each namespace, yielding an
> identifier that userspace can use for identifying a namespace.
>
> This has been a long requested feature and only blocked because
> a naive implementation would put the id in a global space and
> would ultimately require having a namespace for the names of
> namespaces, making migration and certain virtualization tricks
> impossible.
>
> We still don't have per superblock inode numbers for proc, which
> appears necessary for application unaware checkpoint/restart and
> migrations (if the application is using namespace filedescriptors)
> but that is now allowd by the design if it becomes important.
>
> I have preallocated the ipc and uts initial proc inode numbers so
> their structures can be statically initialized.
>
> Signed-off-by: Eric W. Biederman <[email protected]>
I've not looked at the setns patches enough, but from what I can see
here it looks good.
Acked-by: Serge Hallyn <[email protected]>
thanks,
-serge
> ---
> fs/proc/namespaces.c | 1 +
> include/linux/ipc_namespace.h | 2 ++
> include/linux/proc_fs.h | 4 ++++
> include/linux/utsname.h | 1 +
> include/net/net_namespace.h | 2 ++
> init/version.c | 2 ++
> ipc/msgutil.c | 2 ++
> ipc/namespace.c | 16 ++++++++++++++++
> kernel/utsname.c | 17 ++++++++++++++++-
> net/core/net_namespace.c | 24 ++++++++++++++++++++++++
> 10 files changed, 70 insertions(+), 1 deletions(-)
>
> diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
> index be177f7..ddc2bb4 100644
> --- a/fs/proc/namespaces.c
> +++ b/fs/proc/namespaces.c
> @@ -54,6 +54,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
> ei->ns_ops = ns_ops;
> ei->ns = ns;
>
> + inode->i_ino = ns_ops->inum(ei->ns);
> dentry->d_op = &pid_dentry_operations;
> d_add(dentry, inode);
> /* Close the race of the process dying before we return the dentry */
> diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
> index a6d1655..22a4dc4 100644
> --- a/include/linux/ipc_namespace.h
> +++ b/include/linux/ipc_namespace.h
> @@ -60,6 +60,8 @@ struct ipc_namespace {
>
> /* user_ns which owns the ipc ns */
> struct user_namespace *user_ns;
> +
> + unsigned int proc_inum;
> };
>
> extern struct ipc_namespace init_ipc_ns;
> diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
> index 3067b44..1aee7f0 100644
> --- a/include/linux/proc_fs.h
> +++ b/include/linux/proc_fs.h
> @@ -29,8 +29,11 @@ struct mm_struct;
>
> enum {
> PROC_ROOT_INO = 1,
> + PROC_IPC_INIT_INO = 2,
> + PROC_UTS_INIT_INO = 3,
> };
>
> +
> /*
> * This is not completely implemented yet. The idea is to
> * create an in-memory tree (like the actual /proc filesystem
> @@ -257,6 +260,7 @@ struct proc_ns_operations {
> void *(*get)(struct task_struct *task);
> void (*put)(void *ns);
> int (*install)(struct nsproxy *nsproxy, void *ns);
> + unsigned int (*inum)(void *ns);
> };
> extern const struct proc_ns_operations netns_operations;
> extern const struct proc_ns_operations utsns_operations;
> diff --git a/include/linux/utsname.h b/include/linux/utsname.h
> index 4e5b021..03db764 100644
> --- a/include/linux/utsname.h
> +++ b/include/linux/utsname.h
> @@ -44,6 +44,7 @@ struct uts_namespace {
> struct kref kref;
> struct new_utsname name;
> struct user_namespace *user_ns;
> + unsigned int proc_inum;
> };
> extern struct uts_namespace init_uts_ns;
>
> diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
> index 2bf9ed9..4b85be2 100644
> --- a/include/net/net_namespace.h
> +++ b/include/net/net_namespace.h
> @@ -49,6 +49,8 @@ struct net {
> struct list_head cleanup_list; /* namespaces on death row */
> struct list_head exit_list; /* Use only net_mutex */
>
> + unsigned int proc_inum;
> +
> struct proc_dir_entry *proc_net;
> struct proc_dir_entry *proc_net_stat;
>
> diff --git a/init/version.c b/init/version.c
> index 86fe0cc..58170f1 100644
> --- a/init/version.c
> +++ b/init/version.c
> @@ -12,6 +12,7 @@
> #include <linux/utsname.h>
> #include <generated/utsrelease.h>
> #include <linux/version.h>
> +#include <linux/proc_fs.h>
>
> #ifndef CONFIG_KALLSYMS
> #define version(a) Version_ ## a
> @@ -34,6 +35,7 @@ struct uts_namespace init_uts_ns = {
> .domainname = UTS_DOMAINNAME,
> },
> .user_ns = &init_user_ns,
> + .proc_inum = PROC_UTS_INIT_INO,
> };
> EXPORT_SYMBOL_GPL(init_uts_ns);
>
> diff --git a/ipc/msgutil.c b/ipc/msgutil.c
> index 8b5ce5d..f7da485 100644
> --- a/ipc/msgutil.c
> +++ b/ipc/msgutil.c
> @@ -14,6 +14,7 @@
> #include <linux/slab.h>
> #include <linux/ipc.h>
> #include <linux/ipc_namespace.h>
> +#include <linux/proc_fs.h>
> #include <asm/uaccess.h>
>
> #include "util.h"
> @@ -33,6 +34,7 @@ struct ipc_namespace init_ipc_ns = {
> .mq_msgsize_max = DFLT_MSGSIZEMAX,
> #endif
> .user_ns = &init_user_ns,
> + .proc_inum = PROC_IPC_INIT_INO,
> };
>
> atomic_t nr_ipc_ns = ATOMIC_INIT(1);
> diff --git a/ipc/namespace.c b/ipc/namespace.c
> index ce0a647..cd7f733 100644
> --- a/ipc/namespace.c
> +++ b/ipc/namespace.c
> @@ -26,9 +26,16 @@ static struct ipc_namespace *create_ipc_ns(struct task_struct *tsk,
> if (ns == NULL)
> return ERR_PTR(-ENOMEM);
>
> + err = proc_alloc_inum(&ns->proc_inum);
> + if (err) {
> + kfree(ns);
> + return ERR_PTR(err);
> + }
> +
> atomic_set(&ns->count, 1);
> err = mq_init_ns(ns);
> if (err) {
> + proc_free_inum(ns->proc_inum);
> kfree(ns);
> return ERR_PTR(err);
> }
> @@ -113,6 +120,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
> */
> ipcns_notify(IPCNS_REMOVED);
> put_user_ns(ns->user_ns);
> + proc_free_inum(ns->proc_inum);
> kfree(ns);
> }
>
> @@ -170,10 +178,18 @@ static int ipcns_install(struct nsproxy *nsproxy, void *ns)
> return 0;
> }
>
> +static unsigned int ipcns_inum(void *vp)
> +{
> + struct ipc_namespace *ns = vp;
> +
> + return ns->proc_inum;
> +}
> +
> const struct proc_ns_operations ipcns_operations = {
> .name = "ipc",
> .type = CLONE_NEWIPC,
> .get = ipcns_get,
> .put = ipcns_put,
> .install = ipcns_install,
> + .inum = ipcns_inum,
> };
> diff --git a/kernel/utsname.c b/kernel/utsname.c
> index bff131b..3ab6a08 100644
> --- a/kernel/utsname.c
> +++ b/kernel/utsname.c
> @@ -36,11 +36,18 @@ static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
> struct uts_namespace *old_ns)
> {
> struct uts_namespace *ns;
> + int err;
>
> ns = create_uts_ns();
> if (!ns)
> return ERR_PTR(-ENOMEM);
>
> + err = proc_alloc_inum(&ns->proc_inum);
> + if (err) {
> + kfree(ns);
> + return ERR_PTR(err);
> + }
> +
> down_read(&uts_sem);
> memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
> ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
> @@ -78,6 +85,7 @@ void free_uts_ns(struct kref *kref)
>
> ns = container_of(kref, struct uts_namespace, kref);
> put_user_ns(ns->user_ns);
> + proc_free_inum(ns->proc_inum);
> kfree(ns);
> }
>
> @@ -110,11 +118,18 @@ static int utsns_install(struct nsproxy *nsproxy, void *ns)
> return 0;
> }
>
> +static unsigned int utsns_inum(void *vp)
> +{
> + struct uts_namespace *ns = vp;
> +
> + return ns->proc_inum;
> +}
> +
> const struct proc_ns_operations utsns_operations = {
> .name = "uts",
> .type = CLONE_NEWUTS,
> .get = utsns_get,
> .put = utsns_put,
> .install = utsns_install,
> + .inum = utsns_inum,
> };
> -
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index e41e511..6199ec2 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -358,6 +358,21 @@ struct net *get_net_ns_by_pid(pid_t pid)
> }
> EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
>
> +static __net_init int net_ns_net_init(struct net *net)
> +{
> + return proc_alloc_inum(&net->proc_inum);
> +}
> +
> +static __net_exit void net_ns_net_exit(struct net *net)
> +{
> + proc_free_inum(net->proc_inum);
> +}
> +
> +static struct pernet_operations __net_initdata net_ns_ops = {
> + .init = net_ns_net_init,
> + .exit = net_ns_net_exit,
> +};
> +
> static int __init net_ns_init(void)
> {
> struct net_generic *ng;
> @@ -389,6 +404,8 @@ static int __init net_ns_init(void)
>
> mutex_unlock(&net_mutex);
>
> + register_pernet_subsys(&net_ns_ops);
> +
> return 0;
> }
>
> @@ -616,11 +633,18 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
> return 0;
> }
>
> +static unsigned int netns_inum(void *ns)
> +{
> + struct net *net = ns;
> + return net->proc_inum;
> +}
> +
> const struct proc_ns_operations netns_operations = {
> .name = "net",
> .type = CLONE_NEWNET,
> .get = netns_get,
> .put = netns_put,
> .install = netns_install,
> + .inum = netns_inum,
> };
> #endif
> --
> 1.7.5.1.217.g4e3aa
>
"Serge E. Hallyn" <[email protected]> writes:
> Quoting Eric W. Biederman ([email protected]):
>>
>> Assign a unique proc inode to each namespace, yielding an
>> identifier that userspace can use for identifying a namespace.
>>
>> This has been a long requested feature and only blocked because
>> a naive implementation would put the id in a global space and
>> would ultimately require having a namespace for the names of
>> namespaces, making migration and certain virtualization tricks
>> impossible.
>>
>> We still don't have per superblock inode numbers for proc, which
>> appears necessary for application unaware checkpoint/restart and
>> migrations (if the application is using namespace filedescriptors)
>> but that is now allowd by the design if it becomes important.
>>
>> I have preallocated the ipc and uts initial proc inode numbers so
>> their structures can be statically initialized.
>>
>> Signed-off-by: Eric W. Biederman <[email protected]>
>
> I've not looked at the setns patches enough, but from what I can see
> here it looks good.
>
> Acked-by: Serge Hallyn <[email protected]>
Thanks.
There are bugs in my existing proc bits that I am working on fixing so
this second patch will have a small update, before it gets merged.
But posting the patches I was looking for a little review and I was
announcing I had solved the technical problem of how we talk about
namespaces, without needing to introduce another namespace for
namespaces.
Eric