From: Cedric Le Goater <[email protected]>
This patch adds an empty net namespace framework
Signed-off-by: Cedric Le Goater <[email protected]>
---
include/linux/init_task.h | 2 +
include/linux/net_namespace.h | 49 ++++++++++++++++++++++++++++++++++++++++++
include/linux/nsproxy.h | 2 +
kernel/nsproxy.c | 12 ++++++++++
net/Kconfig | 8 ++++++
net/core/Makefile | 2 -
net/core/net_namespace.c | 41 +++++++++++++++++++++++++++++++++++
7 files changed, 115 insertions(+), 1 deletion(-)
Index: 2.6.19-rc5-mm2/include/linux/init_task.h
===================================================================
--- 2.6.19-rc5-mm2.orig/include/linux/init_task.h
+++ 2.6.19-rc5-mm2/include/linux/init_task.h
@@ -8,6 +8,7 @@
#include <linux/lockdep.h>
#include <linux/ipc.h>
#include <linux/pid_namespace.h>
+#include <linux/net_namespace.h>
#define INIT_FDTABLE \
{ \
@@ -78,6 +79,7 @@ extern struct nsproxy init_nsproxy;
.id = 0, \
.uts_ns = &init_uts_ns, \
.mnt_ns = NULL, \
+ INIT_NET_NS(net_ns) \
INIT_IPC_NS(ipc_ns) \
}
Index: 2.6.19-rc5-mm2/include/linux/net_namespace.h
===================================================================
--- /dev/null
+++ 2.6.19-rc5-mm2/include/linux/net_namespace.h
@@ -0,0 +1,49 @@
+#ifndef _LINUX_NET_NAMESPACE_H
+#define _LINUX_NET_NAMESPACE_H
+
+#include <linux/kref.h>
+#include <linux/nsproxy.h>
+
+struct net_namespace {
+ struct kref kref;
+};
+
+extern struct net_namespace init_net_ns;
+
+#ifdef CONFIG_NET_NS
+
+#define INIT_NET_NS(net_ns) .net_ns = &init_net_ns,
+
+static inline void get_net_ns(struct net_namespace *ns)
+{
+ kref_get(&ns->kref);
+}
+
+extern int copy_net_ns(int flags, struct task_struct *tsk);
+
+extern void free_net_ns(struct kref *kref);
+
+static inline void put_net_ns(struct net_namespace *ns)
+{
+ kref_put(&ns->kref, free_net_ns);
+}
+
+#else
+
+#define INIT_NET_NS(net_ns)
+
+static inline void get_net_ns(struct net_namespace *ns)
+{
+}
+
+static inline int copy_net_ns(int flags, struct task_struct *tsk)
+{
+ return 0;
+}
+
+static inline void put_net_ns(struct net_namespace *ns)
+{
+}
+#endif
+
+#endif /* _LINUX_NET_NAMESPACE_H */
Index: 2.6.19-rc5-mm2/include/linux/nsproxy.h
===================================================================
--- 2.6.19-rc5-mm2.orig/include/linux/nsproxy.h
+++ 2.6.19-rc5-mm2/include/linux/nsproxy.h
@@ -10,6 +10,7 @@ struct mnt_namespace;
struct uts_namespace;
struct ipc_namespace;
struct pid_namespace;
+struct net_namespace;
/*
* namespaces flags
@@ -42,6 +43,7 @@ struct nsproxy {
struct ipc_namespace *ipc_ns;
struct mnt_namespace *mnt_ns;
struct pid_namespace *pid_ns;
+ struct net_namespace *net_ns;
struct hlist_node ns_hash_node;
};
Index: 2.6.19-rc5-mm2/kernel/nsproxy.c
===================================================================
--- 2.6.19-rc5-mm2.orig/kernel/nsproxy.c
+++ 2.6.19-rc5-mm2/kernel/nsproxy.c
@@ -20,6 +20,7 @@
#include <linux/mnt_namespace.h>
#include <linux/utsname.h>
#include <linux/pid_namespace.h>
+#include <linux/net_namespace.h>
#define NS_HASH_BITS 3 /* this might need some configuration */
#define NS_HASH_SIZE (1 << NS_HASH_BITS)
@@ -89,6 +90,8 @@ struct nsproxy *dup_namespaces(struct ns
get_ipc_ns(ns->ipc_ns);
if (ns->pid_ns)
get_pid_ns(ns->pid_ns);
+ if (ns->net_ns)
+ get_net_ns(ns->net_ns);
}
return ns;
@@ -136,10 +139,17 @@ int copy_namespaces(int flags, struct ta
if (err)
goto out_pid;
+ err = copy_net_ns(flags, tsk);
+ if (err)
+ goto out_net;
+
out:
put_nsproxy(old_ns);
return err;
+out_net:
+ if (new_ns->pid_ns)
+ put_pid_ns(new_ns->pid_ns);
out_pid:
if (new_ns->ipc_ns)
put_ipc_ns(new_ns->ipc_ns);
@@ -165,6 +175,8 @@ static void free_nsproxy(struct nsproxy
put_ipc_ns(ns->ipc_ns);
if (ns->pid_ns)
put_pid_ns(ns->pid_ns);
+ if (ns->net_ns)
+ put_net_ns(ns->net_ns);
kfree(ns);
}
Index: 2.6.19-rc5-mm2/net/core/Makefile
===================================================================
--- 2.6.19-rc5-mm2.orig/net/core/Makefile
+++ 2.6.19-rc5-mm2/net/core/Makefile
@@ -3,7 +3,7 @@
#
obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
- gen_stats.o gen_estimator.o
+ gen_stats.o gen_estimator.o net_namespace.o
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
Index: 2.6.19-rc5-mm2/net/core/net_namespace.c
===================================================================
--- /dev/null
+++ 2.6.19-rc5-mm2/net/core/net_namespace.c
@@ -0,0 +1,41 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/nsproxy.h>
+#include <linux/net_namespace.h>
+
+struct net_namespace init_net_ns = {
+ .kref = {
+ .refcount = ATOMIC_INIT(2),
+ },
+};
+
+#ifdef CONFIG_NET_NS
+
+int copy_net_ns(int flags, struct task_struct *tsk)
+{
+ struct net_namespace *old_ns = tsk->nsproxy->net_ns;
+ int err = 0;
+
+ if (!old_ns)
+ return 0;
+
+ get_net_ns(old_ns);
+ return err;
+}
+
+void free_net_ns(struct kref *kref)
+{
+ struct net_namespace *ns;
+
+ ns = container_of(kref, struct net_namespace, kref);
+ kfree(ns);
+}
+
+#endif /* CONFIG_NET_NS */
Index: 2.6.19-rc5-mm2/net/Kconfig
===================================================================
--- 2.6.19-rc5-mm2.orig/net/Kconfig
+++ 2.6.19-rc5-mm2/net/Kconfig
@@ -67,6 +67,14 @@ source "net/netlabel/Kconfig"
endif # if INET
+config NET_NS
+ bool "Network Namespaces"
+ help
+ This option enables multiple independent network namespaces,
+ each having own network devices, IP addresses, routes, and so on.
+ If unsure, answer N.
+
+
config NETWORK_SECMARK
bool "Security Marking"
help
Cedric,
Dmitry Mishin and Daniel Lezcano are working together on the full
network namespace incorporating both needs of OpenVZ and VServer/IBM.
Thanks,
Kirill
> From: Cedric Le Goater <[email protected]>
>
> This patch adds an empty net namespace framework
>
> Signed-off-by: Cedric Le Goater <[email protected]>
> ---
> include/linux/init_task.h | 2 +
> include/linux/net_namespace.h | 49 ++++++++++++++++++++++++++++++++++++++++++
> include/linux/nsproxy.h | 2 +
> kernel/nsproxy.c | 12 ++++++++++
> net/Kconfig | 8 ++++++
> net/core/Makefile | 2 -
> net/core/net_namespace.c | 41 +++++++++++++++++++++++++++++++++++
> 7 files changed, 115 insertions(+), 1 deletion(-)
>
> Index: 2.6.19-rc5-mm2/include/linux/init_task.h
> ===================================================================
> --- 2.6.19-rc5-mm2.orig/include/linux/init_task.h
> +++ 2.6.19-rc5-mm2/include/linux/init_task.h
> @@ -8,6 +8,7 @@
> #include <linux/lockdep.h>
> #include <linux/ipc.h>
> #include <linux/pid_namespace.h>
> +#include <linux/net_namespace.h>
>
> #define INIT_FDTABLE \
> { \
> @@ -78,6 +79,7 @@ extern struct nsproxy init_nsproxy;
> .id = 0, \
> .uts_ns = &init_uts_ns, \
> .mnt_ns = NULL, \
> + INIT_NET_NS(net_ns) \
> INIT_IPC_NS(ipc_ns) \
> }
>
> Index: 2.6.19-rc5-mm2/include/linux/net_namespace.h
> ===================================================================
> --- /dev/null
> +++ 2.6.19-rc5-mm2/include/linux/net_namespace.h
> @@ -0,0 +1,49 @@
> +#ifndef _LINUX_NET_NAMESPACE_H
> +#define _LINUX_NET_NAMESPACE_H
> +
> +#include <linux/kref.h>
> +#include <linux/nsproxy.h>
> +
> +struct net_namespace {
> + struct kref kref;
> +};
> +
> +extern struct net_namespace init_net_ns;
> +
> +#ifdef CONFIG_NET_NS
> +
> +#define INIT_NET_NS(net_ns) .net_ns = &init_net_ns,
> +
> +static inline void get_net_ns(struct net_namespace *ns)
> +{
> + kref_get(&ns->kref);
> +}
> +
> +extern int copy_net_ns(int flags, struct task_struct *tsk);
> +
> +extern void free_net_ns(struct kref *kref);
> +
> +static inline void put_net_ns(struct net_namespace *ns)
> +{
> + kref_put(&ns->kref, free_net_ns);
> +}
> +
> +#else
> +
> +#define INIT_NET_NS(net_ns)
> +
> +static inline void get_net_ns(struct net_namespace *ns)
> +{
> +}
> +
> +static inline int copy_net_ns(int flags, struct task_struct *tsk)
> +{
> + return 0;
> +}
> +
> +static inline void put_net_ns(struct net_namespace *ns)
> +{
> +}
> +#endif
> +
> +#endif /* _LINUX_NET_NAMESPACE_H */
> Index: 2.6.19-rc5-mm2/include/linux/nsproxy.h
> ===================================================================
> --- 2.6.19-rc5-mm2.orig/include/linux/nsproxy.h
> +++ 2.6.19-rc5-mm2/include/linux/nsproxy.h
> @@ -10,6 +10,7 @@ struct mnt_namespace;
> struct uts_namespace;
> struct ipc_namespace;
> struct pid_namespace;
> +struct net_namespace;
>
> /*
> * namespaces flags
> @@ -42,6 +43,7 @@ struct nsproxy {
> struct ipc_namespace *ipc_ns;
> struct mnt_namespace *mnt_ns;
> struct pid_namespace *pid_ns;
> + struct net_namespace *net_ns;
>
> struct hlist_node ns_hash_node;
> };
> Index: 2.6.19-rc5-mm2/kernel/nsproxy.c
> ===================================================================
> --- 2.6.19-rc5-mm2.orig/kernel/nsproxy.c
> +++ 2.6.19-rc5-mm2/kernel/nsproxy.c
> @@ -20,6 +20,7 @@
> #include <linux/mnt_namespace.h>
> #include <linux/utsname.h>
> #include <linux/pid_namespace.h>
> +#include <linux/net_namespace.h>
>
> #define NS_HASH_BITS 3 /* this might need some configuration */
> #define NS_HASH_SIZE (1 << NS_HASH_BITS)
> @@ -89,6 +90,8 @@ struct nsproxy *dup_namespaces(struct ns
> get_ipc_ns(ns->ipc_ns);
> if (ns->pid_ns)
> get_pid_ns(ns->pid_ns);
> + if (ns->net_ns)
> + get_net_ns(ns->net_ns);
> }
>
> return ns;
> @@ -136,10 +139,17 @@ int copy_namespaces(int flags, struct ta
> if (err)
> goto out_pid;
>
> + err = copy_net_ns(flags, tsk);
> + if (err)
> + goto out_net;
> +
> out:
> put_nsproxy(old_ns);
> return err;
>
> +out_net:
> + if (new_ns->pid_ns)
> + put_pid_ns(new_ns->pid_ns);
> out_pid:
> if (new_ns->ipc_ns)
> put_ipc_ns(new_ns->ipc_ns);
> @@ -165,6 +175,8 @@ static void free_nsproxy(struct nsproxy
> put_ipc_ns(ns->ipc_ns);
> if (ns->pid_ns)
> put_pid_ns(ns->pid_ns);
> + if (ns->net_ns)
> + put_net_ns(ns->net_ns);
> kfree(ns);
> }
>
> Index: 2.6.19-rc5-mm2/net/core/Makefile
> ===================================================================
> --- 2.6.19-rc5-mm2.orig/net/core/Makefile
> +++ 2.6.19-rc5-mm2/net/core/Makefile
> @@ -3,7 +3,7 @@
> #
>
> obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
> - gen_stats.o gen_estimator.o
> + gen_stats.o gen_estimator.o net_namespace.o
>
> obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
>
> Index: 2.6.19-rc5-mm2/net/core/net_namespace.c
> ===================================================================
> --- /dev/null
> +++ 2.6.19-rc5-mm2/net/core/net_namespace.c
> @@ -0,0 +1,41 @@
> +/*
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License as
> + * published by the Free Software Foundation, version 2 of the
> + * License.
> + */
> +
> +#include <linux/module.h>
> +#include <linux/version.h>
> +#include <linux/nsproxy.h>
> +#include <linux/net_namespace.h>
> +
> +struct net_namespace init_net_ns = {
> + .kref = {
> + .refcount = ATOMIC_INIT(2),
> + },
> +};
> +
> +#ifdef CONFIG_NET_NS
> +
> +int copy_net_ns(int flags, struct task_struct *tsk)
> +{
> + struct net_namespace *old_ns = tsk->nsproxy->net_ns;
> + int err = 0;
> +
> + if (!old_ns)
> + return 0;
> +
> + get_net_ns(old_ns);
> + return err;
> +}
> +
> +void free_net_ns(struct kref *kref)
> +{
> + struct net_namespace *ns;
> +
> + ns = container_of(kref, struct net_namespace, kref);
> + kfree(ns);
> +}
> +
> +#endif /* CONFIG_NET_NS */
> Index: 2.6.19-rc5-mm2/net/Kconfig
> ===================================================================
> --- 2.6.19-rc5-mm2.orig/net/Kconfig
> +++ 2.6.19-rc5-mm2/net/Kconfig
> @@ -67,6 +67,14 @@ source "net/netlabel/Kconfig"
>
> endif # if INET
>
> +config NET_NS
> + bool "Network Namespaces"
> + help
> + This option enables multiple independent network namespaces,
> + each having own network devices, IP addresses, routes, and so on.
> + If unsure, answer N.
> +
> +
> config NETWORK_SECMARK
> bool "Security Marking"
> help
>
Kirill Korotaev wrote:
> Cedric,
>
> Dmitry Mishin and Daniel Lezcano are working together on the full
> network namespace incorporating both needs of OpenVZ and VServer/IBM.
>
> Thanks,
> Kirill
Kirill,
We will need this framework to move the network isolation code to the
ns_proxy/net_namespace structure. So if Cedric gives us a empty
framework it is fine, except if someone does not agree with it...
-- Daniel.
On Tue, Nov 21, 2006 at 07:01:03PM +0100, Daniel Lezcano wrote:
> Kirill Korotaev wrote:
> >Cedric,
> >
> >Dmitry Mishin and Daniel Lezcano are working together on the full
> >network namespace incorporating both needs of OpenVZ and VServer/IBM.
> >
> >Thanks,
> >Kirill
>
> Kirill,
>
> We will need this framework to move the network isolation code to the
> ns_proxy/net_namespace structure. So if Cedric gives us a empty
> framework it is fine, except if someone does not agree with it...
no problem here, but I think we will need another one,
or some smart way to do the network isolation (layer 3)
for the network namespace (as alternative to the layer 2
approach) ...
as they are both complementary in some way, I'm not sure
a single space will suffice ...
best,
Herbert
> -- Daniel.
On Tuesday 21 November 2006 21:01, Daniel Lezcano wrote:
> Kirill Korotaev wrote:
> > Cedric,
> >
> > Dmitry Mishin and Daniel Lezcano are working together on the full
> > network namespace incorporating both needs of OpenVZ and VServer/IBM.
> >
> > Thanks,
> > Kirill
>
> Kirill,
>
> We will need this framework to move the network isolation code to the
> ns_proxy/net_namespace structure. So if Cedric gives us a empty
> framework it is fine, except if someone does not agree with it...
>
> -- Daniel.
This patch looks acceptable for us.
BTW, Daniel, we agreed to be based on the Andrey's patchset. I do not see a
reason, why Cedric force us to make some unnecessary work and move existent
patchset over his interface.
--
Thanks,
Dmitry.
Dmitry Mishin <[email protected]> writes:
> On Tuesday 21 November 2006 21:01, Daniel Lezcano wrote:
>> Kirill Korotaev wrote:
>> > Cedric,
>> >
>> > Dmitry Mishin and Daniel Lezcano are working together on the full
>> > network namespace incorporating both needs of OpenVZ and VServer/IBM.
>> >
>> > Thanks,
>> > Kirill
>>
>> Kirill,
>>
>> We will need this framework to move the network isolation code to the
>> ns_proxy/net_namespace structure. So if Cedric gives us a empty
>> framework it is fine, except if someone does not agree with it...
>>
>> -- Daniel.
> This patch looks acceptable for us.
> BTW, Daniel, we agreed to be based on the Andrey's patchset. I do not see a
> reason, why Cedric force us to make some unnecessary work and move existent
> patchset over his interface.
If you are going to take that attitude. Where was this conversation?
It appears several relevant people were not aware of this development
discussion. So when it comes up for general review you can expect your
approach as well as your code to be critiqued.
Eric
Dmitry Mishin wrote:
> This patch looks acceptable for us.
> BTW, Daniel, we agreed to be based on the Andrey's patchset. I do not see a
> reason, why Cedric force us to make some unnecessary work and move existent
> patchset over his interface.
I still agree.
Don't blame Cedric, he just wanted to help us. BTW, it is not "his"
interface but the namespace interface.
-- Daniel
>>>We will need this framework to move the network isolation code to the
>>>ns_proxy/net_namespace structure. So if Cedric gives us a empty
>>>framework it is fine, except if someone does not agree with it...
>>>
>>> -- Daniel.
>>
>>This patch looks acceptable for us.
>>BTW, Daniel, we agreed to be based on the Andrey's patchset. I do not see a
>>reason, why Cedric force us to make some unnecessary work and move existent
>>patchset over his interface.
>
>
> If you are going to take that attitude. Where was this conversation?
>
> It appears several relevant people were not aware of this development
> discussion. So when it comes up for general review you can expect your
> approach as well as your code to be critiqued.
Eric,
Dim collected the requirements for all the network virtualization approaches:
http://wiki.openvz.org/Containers/Network_virtualization
This was discussed with Daniel and Herbert.
Dim and Daniel just wanted to prepare the patches for this.
So I hope your critique will be constructive as they do a hard job :)
Thanks,
Kirill
On Wednesday 22 November 2006 11:43, Eric W. Biederman wrote:
> Dmitry Mishin <[email protected]> writes:
>
> > On Tuesday 21 November 2006 21:01, Daniel Lezcano wrote:
> >> Kirill Korotaev wrote:
> >> > Cedric,
> >> >
> >> > Dmitry Mishin and Daniel Lezcano are working together on the full
> >> > network namespace incorporating both needs of OpenVZ and VServer/IBM.
> >> >
> >> > Thanks,
> >> > Kirill
> >>
> >> Kirill,
> >>
> >> We will need this framework to move the network isolation code to the
> >> ns_proxy/net_namespace structure. So if Cedric gives us a empty
> >> framework it is fine, except if someone does not agree with it...
> >>
> >> -- Daniel.
> > This patch looks acceptable for us.
> > BTW, Daniel, we agreed to be based on the Andrey's patchset. I do not see a
> > reason, why Cedric force us to make some unnecessary work and move existent
> > patchset over his interface.
>
> If you are going to take that attitude. Where was this conversation?
>
> It appears several relevant people were not aware of this development
> discussion. So when it comes up for general review you can expect your
> approach as well as your code to be critiqued.
Eric,
please read and comment Daniel's summary:
http://marc.theaimsgroup.com/?l=linux-netdev&m=116352117000763&w=2
Seems, that you missed it.
--
Thanks,
Dmitry.
> no problem here, but I think we will need another one,
> or some smart way to do the network isolation (layer 3)
> for the network namespace (as alternative to the layer 2
> approach) ...
My feeling (Dmitry and Daniel can correct me) is that it will be
addressed with an unshare-like flag : NETNS2 and NETNS3.
> as they are both complementary in some way, I'm not sure
> a single space will suffice ...
hmm, so you think there could be a 2 differents namespaces
for network to handle layer 2 or 3. Couldn't that be just a sub part
of net_namespace.
C.
Hello,
Dmitry Mishin wrote:
> This patch looks acceptable for us.
good. shall we merge it then ? see comment below.
> BTW, Daniel, we agreed to be based on the Andrey's patchset. I do not see a
> reason, why Cedric force us to make some unnecessary work and move existent
> patchset over his interface.
yeah it's a bit different from andrey's but not that much and it's more in
the spirit of uts and ipc namespace (and user namespace if that reaches the
kernel one day :) so that's why i made the small changes.
It also helping the nsproxy/namespace syscalls to have a similar interface
to manipulate namespaces. who knows, soon we might be able to have a 'struct
namespace' with a ops field to define new namespace types ?
I can also send a empty framework for user namespace ;)
thanks for reacting !
C.
Quoting Cedric Le Goater ([email protected]):
> Hello,
>
> Dmitry Mishin wrote:
>
> > This patch looks acceptable for us.
>
> good. shall we merge it then ? see comment below.
>
> > BTW, Daniel, we agreed to be based on the Andrey's patchset. I do not see a
> > reason, why Cedric force us to make some unnecessary work and move existent
> > patchset over his interface.
>
> yeah it's a bit different from andrey's but not that much and it's more in
Where is Andrey's patch?
> the spirit of uts and ipc namespace (and user namespace if that reaches the
> kernel one day :) so that's why i made the small changes.
I agree the namespace frameworks should be consistent, but i don't know
whether Andrey's is or not. I'd like to have the framework included so
we reduce the number of silly rewrites due to clone flag collisions etc.
>
> It also helping the nsproxy/namespace syscalls to have a similar interface
> to manipulate namespaces. who knows, soon we might be able to have a 'struct
> namespace' with a ops field to define new namespace types ?
>
> I can also send a empty framework for user namespace ;)
Please do - then I'll rebase the patchset I sent to the containes list
onto your patch, and resubmit the whole userns.
-serge
> Where is Andrey's patch?
The last I saw was on 2.6.18-rc4-mm1 :
http://marc.theaimsgroup.com/?l=linux-netdev&m=115572448503723&w=2
>> the spirit of uts and ipc namespace (and user namespace if that reaches the
>> kernel one day :) so that's why i made the small changes.
>
> I agree the namespace frameworks should be consistent, but i don't know
> whether Andrey's is or not. I'd like to have the framework included so
> we reduce the number of silly rewrites due to clone flag collisions etc.
yes. it is a pain to maintain.
>> It also helping the nsproxy/namespace syscalls to have a similar interface
>> to manipulate namespaces. who knows, soon we might be able to have a 'struct
>> namespace' with a ops field to define new namespace types ?
>>
>> I can also send a empty framework for user namespace ;)
>
> Please do - then I'll rebase the patchset I sent to the containes list
> onto your patch, and resubmit the whole userns.
I'll send a refreshed version of both in the next round.
C.
On Wednesday 22 November 2006 19:41, Serge E. Hallyn wrote:
> Quoting Cedric Le Goater ([email protected]):
> > Hello,
> >
> > Dmitry Mishin wrote:
> >
> > > This patch looks acceptable for us.
> >
> > good. shall we merge it then ? see comment below.
> >
> > > BTW, Daniel, we agreed to be based on the Andrey's patchset. I do not see a
> > > reason, why Cedric force us to make some unnecessary work and move existent
> > > patchset over his interface.
> >
> > yeah it's a bit different from andrey's but not that much and it's more in
>
> Where is Andrey's patch?
This thread - http://thread.gmane.org/gmane.linux.network/42666
>
> > the spirit of uts and ipc namespace (and user namespace if that reaches the
> > kernel one day :) so that's why i made the small changes.
>
> I agree the namespace frameworks should be consistent, but i don't know
> whether Andrey's is or not. I'd like to have the framework included so
> we reduce the number of silly rewrites due to clone flag collisions etc.
>
> >
> > It also helping the nsproxy/namespace syscalls to have a similar interface
> > to manipulate namespaces. who knows, soon we might be able to have a 'struct
> > namespace' with a ops field to define new namespace types ?
> >
> > I can also send a empty framework for user namespace ;)
>
> Please do - then I'll rebase the patchset I sent to the containes list
> onto your patch, and resubmit the whole userns.
>
> -serge
> -
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
Thanks,
Dmitry.
Cedric Le Goater <[email protected]> writes:
>> no problem here, but I think we will need another one,
>> or some smart way to do the network isolation (layer 3)
>> for the network namespace (as alternative to the layer 2
>> approach) ...
>
> My feeling (Dmitry and Daniel can correct me) is that it will be
> addressed with an unshare-like flag : NETNS2 and NETNS3.
>
>> as they are both complementary in some way, I'm not sure
>> a single space will suffice ...
>
> hmm, so you think there could be a 2 differents namespaces
> for network to handle layer 2 or 3. Couldn't that be just a sub part
> of net_namespace.
The justification is performance and a little on the simplicity side.
My personal feel is still that layer 3 is something easier done
as a new kind of table in an iptables type infrastructure. And in
fact I believe if done that way would capture do what 90%+ of what
all of the iptables rules do. So it might be a nice firewalling speed up.
I don't think the layer 3 idea where you just do bind filter fits
the namespace concept very well.
Eric
Eric W. Biederman wrote:
> The justification is performance and a little on the simplicity side.
>
> My personal feel is still that layer 3 is something easier done
> as a new kind of table in an iptables type infrastructure. And in
> fact I believe if done that way would capture do what 90%+ of what
> all of the iptables rules do. So it might be a nice firewalling speed up.
> I don't think the layer 3 idea where you just do bind filter fits
> the namespace concept very well.
The question is why do we need to do isolation/virtualization at the
layer 3 ?
1 - for security
2 - for ressource management
3 - for mobility
The last one is not implementable with a netfilter only solution. The
solution is to have a container id by sockets in order to identify them
and to descriminate the sockets owned by the container for
checkpoint/restart and for quescient point. If you look closely to the
layer 3 approach with network isolation you will see if we replace the
container id by the network namespace pointer, the code is the *same*.
So we have a common code for the layer 4 for namespaces and layer 3
isolation. Pushing a little more the layer 3 isolation into namespaces
we have reach a common solution for layer 2 and layer 3 with Dmitry and
made the two to co-exists.
The next step will be to reach a quescient point in order to
checkpoint/restart. The quescient point will be reach using the
namespace identifier, the traffic will be dropped for incoming and
outgoing traffic and network timers will be frozen. Should we have again
two approaches ? One for the layer 2 and another one for the layer 3,
instead of using the same mechanism for namespaces ?
After that the checkpoint will use the network namespace in order to
find the sockets to be checkpointed. Should we have 2 checkpoint/restart
mechanisms ?
I agree, some part of the layer 3 approach does not fit the namespaces
concept very well, but this is a conceptual vision of the namespaces. I
can argue, the layer 2 does not fit the namespace concept too, the
socket hashtable are not by namespaces, the routes cache are not by
namespaces, does it mean it does not fit the namespace concept at all ?
No, it does, but it is written to be optimized, it is a question of
performance...
I don't want to enter to the debate, again, about layer 2/3
isolation/virtualization, I did my best to promote layer 2 and to
justify layer 3 on top of that. Now, I let the network guys to decide...
-- Daniel
Quoting Dmitry Mishin ([email protected]):
> On Wednesday 22 November 2006 19:41, Serge E. Hallyn wrote:
> > Quoting Cedric Le Goater ([email protected]):
> > > Hello,
> > >
> > > Dmitry Mishin wrote:
> > >
> > > > This patch looks acceptable for us.
> > >
> > > good. shall we merge it then ? see comment below.
> > >
> > > > BTW, Daniel, we agreed to be based on the Andrey's patchset. I do not see a
> > > > reason, why Cedric force us to make some unnecessary work and move existent
> > > > patchset over his interface.
> > >
> > > yeah it's a bit different from andrey's but not that much and it's more in
> >
> > Where is Andrey's patch?
> This thread - http://thread.gmane.org/gmane.linux.network/42666
Thanks, Dmitry. Now I do recall seeing that before.
That patchset appears to go part, but not all the way to fitting in with
the existing namespaces. For instance, you use exit_task_namespaces() for
refcounting, but don't put the net_namespace in the nsproxy and use your
own mechanism for unsharing.
It really seems useful to have all the namespaces be consistent whenever
practical, and I don't think your patchset would need much tweaking to
fit onto Cedric's patch. Am I missing a complicating factor?
thanks,
-serge
On Wednesday 22 November 2006 20:53, Eric W. Biederman wrote:
> Cedric Le Goater <[email protected]> writes:
> >> no problem here, but I think we will need another one,
> >> or some smart way to do the network isolation (layer 3)
> >> for the network namespace (as alternative to the layer 2
> >> approach) ...
> >
> > My feeling (Dmitry and Daniel can correct me) is that it will be
> > addressed with an unshare-like flag : NETNS2 and NETNS3.
> >
> >> as they are both complementary in some way, I'm not sure
> >> a single space will suffice ...
> >
> > hmm, so you think there could be a 2 differents namespaces
> > for network to handle layer 2 or 3. Couldn't that be just a sub part
> > of net_namespace.
>
> The justification is performance and a little on the simplicity side.
>
> My personal feel is still that layer 3 is something easier done
> as a new kind of table in an iptables type infrastructure. And in
> fact I believe if done that way would capture do what 90%+ of what
> all of the iptables rules do. So it might be a nice firewalling speed up.
Two points about solution using netfilter infrastructure:
1) Conntracks and dependant modules are called with the highest priority and
will require, that skb context will be the same in input and output chains,
else it will be a good place for bugs. So, we should change context before it
will be marked by conntracks;
2) This solution has worse performance in comparison with Daniel's solution
due to additional lookup of context by ip addr.
>
> I don't think the layer 3 idea where you just do bind filter fits
> the namespace concept very well.
>
> Eric
--
Thanks,
Dmitry.
On Thursday 23 November 2006 05:39, Serge E. Hallyn wrote:
> Quoting Dmitry Mishin ([email protected]):
> > On Wednesday 22 November 2006 19:41, Serge E. Hallyn wrote:
> > > Quoting Cedric Le Goater ([email protected]):
> > > > Hello,
> > > >
> > > > Dmitry Mishin wrote:
> > > > > This patch looks acceptable for us.
> > > >
> > > > good. shall we merge it then ? see comment below.
> > > >
> > > > > BTW, Daniel, we agreed to be based on the Andrey's patchset. I do
> > > > > not see a reason, why Cedric force us to make some unnecessary work
> > > > > and move existent patchset over his interface.
> > > >
> > > > yeah it's a bit different from andrey's but not that much and it's
> > > > more in
> > >
> > > Where is Andrey's patch?
> >
> > This thread - http://thread.gmane.org/gmane.linux.network/42666
>
> Thanks, Dmitry. Now I do recall seeing that before.
>
> That patchset appears to go part, but not all the way to fitting in with
> the existing namespaces. For instance, you use exit_task_namespaces() for
> refcounting, but don't put the net_namespace in the nsproxy and use your
> own mechanism for unsharing.
>
> It really seems useful to have all the namespaces be consistent whenever
> practical, and I don't think your patchset would need much tweaking to
> fit onto Cedric's patch. Am I missing a complicating factor?
No. I've already said, Cedric's patch is acceptable for us.
--
Thanks,
Dmitry.
Dmitry Mishin wrote:
> On Thursday 23 November 2006 05:39, Serge E. Hallyn wrote:
>> Quoting Dmitry Mishin ([email protected]):
>>> On Wednesday 22 November 2006 19:41, Serge E. Hallyn wrote:
>>>> Quoting Cedric Le Goater ([email protected]):
>>>>> Hello,
>>>>>
>>>>> Dmitry Mishin wrote:
>>>>>> This patch looks acceptable for us.
>>>>> good. shall we merge it then ? see comment below.
>>>>>
>>>>>> BTW, Daniel, we agreed to be based on the Andrey's patchset. I do
>>>>>> not see a reason, why Cedric force us to make some unnecessary work
>>>>>> and move existent patchset over his interface.
>>>>> yeah it's a bit different from andrey's but not that much and it's
>>>>> more in
>>>> Where is Andrey's patch?
>>> This thread - http://thread.gmane.org/gmane.linux.network/42666
>> Thanks, Dmitry. Now I do recall seeing that before.
>>
>> That patchset appears to go part, but not all the way to fitting in with
>> the existing namespaces. For instance, you use exit_task_namespaces() for
>> refcounting, but don't put the net_namespace in the nsproxy and use your
>> own mechanism for unsharing.
>>
>> It really seems useful to have all the namespaces be consistent whenever
>> practical, and I don't think your patchset would need much tweaking to
>> fit onto Cedric's patch. Am I missing a complicating factor?
> No. I've already said, Cedric's patch is acceptable for us.
Cool, so it should reduce the patchsets of everyone working on layer 3, layer 2,
etc.
Here's a refreshed version for 2.6.19-rc5-mm2. the previous was a bit fuzzy.
thanks,
C.
From: Cedric Le Goater <[email protected]>
This patch adds an empty net namespace framework
Signed-off-by: Cedric Le Goater <[email protected]>
---
include/linux/init_task.h | 2 +
include/linux/net_namespace.h | 49 ++++++++++++++++++++++++++++++++++++++++++
include/linux/nsproxy.h | 2 +
kernel/nsproxy.c | 12 ++++++++++
net/Kconfig | 8 ++++++
net/core/Makefile | 2 -
net/core/net_namespace.c | 41 +++++++++++++++++++++++++++++++++++
7 files changed, 115 insertions(+), 1 deletion(-)
Index: 2.6.19-rc5-mm2/include/linux/init_task.h
===================================================================
--- 2.6.19-rc5-mm2.orig/include/linux/init_task.h
+++ 2.6.19-rc5-mm2/include/linux/init_task.h
@@ -8,6 +8,7 @@
#include <linux/lockdep.h>
#include <linux/ipc.h>
#include <linux/pid_namespace.h>
+#include <linux/net_namespace.h>
#define INIT_FDTABLE \
{ \
@@ -78,6 +79,7 @@ extern struct nsproxy init_nsproxy;
.id = 0, \
.uts_ns = &init_uts_ns, \
.mnt_ns = NULL, \
+ INIT_NET_NS(net_ns) \
INIT_IPC_NS(ipc_ns) \
}
Index: 2.6.19-rc5-mm2/include/linux/net_namespace.h
===================================================================
--- /dev/null
+++ 2.6.19-rc5-mm2/include/linux/net_namespace.h
@@ -0,0 +1,49 @@
+#ifndef _LINUX_NET_NAMESPACE_H
+#define _LINUX_NET_NAMESPACE_H
+
+#include <linux/kref.h>
+#include <linux/nsproxy.h>
+
+struct net_namespace {
+ struct kref kref;
+};
+
+extern struct net_namespace init_net_ns;
+
+#ifdef CONFIG_NET_NS
+
+#define INIT_NET_NS(net_ns) .net_ns = &init_net_ns,
+
+static inline void get_net_ns(struct net_namespace *ns)
+{
+ kref_get(&ns->kref);
+}
+
+extern int copy_net_ns(int flags, struct task_struct *tsk);
+
+extern void free_net_ns(struct kref *kref);
+
+static inline void put_net_ns(struct net_namespace *ns)
+{
+ kref_put(&ns->kref, free_net_ns);
+}
+
+#else
+
+#define INIT_NET_NS(net_ns)
+
+static inline void get_net_ns(struct net_namespace *ns)
+{
+}
+
+static inline int copy_net_ns(int flags, struct task_struct *tsk)
+{
+ return 0;
+}
+
+static inline void put_net_ns(struct net_namespace *ns)
+{
+}
+#endif
+
+#endif /* _LINUX_NET_NAMESPACE_H */
Index: 2.6.19-rc5-mm2/include/linux/nsproxy.h
===================================================================
--- 2.6.19-rc5-mm2.orig/include/linux/nsproxy.h
+++ 2.6.19-rc5-mm2/include/linux/nsproxy.h
@@ -8,6 +8,7 @@ struct mnt_namespace;
struct uts_namespace;
struct ipc_namespace;
struct pid_namespace;
+struct net_namespace;
/*
* A structure to contain pointers to all per-process
@@ -29,6 +30,7 @@ struct nsproxy {
struct ipc_namespace *ipc_ns;
struct mnt_namespace *mnt_ns;
struct pid_namespace *pid_ns;
+ struct net_namespace *net_ns;
};
extern struct nsproxy init_nsproxy;
Index: 2.6.19-rc5-mm2/kernel/nsproxy.c
===================================================================
--- 2.6.19-rc5-mm2.orig/kernel/nsproxy.c
+++ 2.6.19-rc5-mm2/kernel/nsproxy.c
@@ -20,6 +20,7 @@
#include <linux/mnt_namespace.h>
#include <linux/utsname.h>
#include <linux/pid_namespace.h>
+#include <linux/net_namespace.h>
struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
@@ -71,6 +72,8 @@ struct nsproxy *dup_namespaces(struct ns
get_ipc_ns(ns->ipc_ns);
if (ns->pid_ns)
get_pid_ns(ns->pid_ns);
+ if (ns->net_ns)
+ get_net_ns(ns->net_ns);
}
return ns;
@@ -118,10 +121,17 @@ int copy_namespaces(int flags, struct ta
if (err)
goto out_pid;
+ err = copy_net_ns(flags, tsk);
+ if (err)
+ goto out_net;
+
out:
put_nsproxy(old_ns);
return err;
+out_net:
+ if (new_ns->pid_ns)
+ put_pid_ns(new_ns->pid_ns);
out_pid:
if (new_ns->ipc_ns)
put_ipc_ns(new_ns->ipc_ns);
@@ -147,5 +157,7 @@ void free_nsproxy(struct nsproxy *ns)
put_ipc_ns(ns->ipc_ns);
if (ns->pid_ns)
put_pid_ns(ns->pid_ns);
+ if (ns->net_ns)
+ put_net_ns(ns->net_ns);
kfree(ns);
}
Index: 2.6.19-rc5-mm2/net/core/Makefile
===================================================================
--- 2.6.19-rc5-mm2.orig/net/core/Makefile
+++ 2.6.19-rc5-mm2/net/core/Makefile
@@ -3,7 +3,7 @@
#
obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
- gen_stats.o gen_estimator.o
+ gen_stats.o gen_estimator.o net_namespace.o
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
Index: 2.6.19-rc5-mm2/net/core/net_namespace.c
===================================================================
--- /dev/null
+++ 2.6.19-rc5-mm2/net/core/net_namespace.c
@@ -0,0 +1,41 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/nsproxy.h>
+#include <linux/net_namespace.h>
+
+struct net_namespace init_net_ns = {
+ .kref = {
+ .refcount = ATOMIC_INIT(2),
+ },
+};
+
+#ifdef CONFIG_NET_NS
+
+int copy_net_ns(int flags, struct task_struct *tsk)
+{
+ struct net_namespace *old_ns = tsk->nsproxy->net_ns;
+ int err = 0;
+
+ if (!old_ns)
+ return 0;
+
+ get_net_ns(old_ns);
+ return err;
+}
+
+void free_net_ns(struct kref *kref)
+{
+ struct net_namespace *ns;
+
+ ns = container_of(kref, struct net_namespace, kref);
+ kfree(ns);
+}
+
+#endif /* CONFIG_NET_NS */
Index: 2.6.19-rc5-mm2/net/Kconfig
===================================================================
--- 2.6.19-rc5-mm2.orig/net/Kconfig
+++ 2.6.19-rc5-mm2/net/Kconfig
@@ -67,6 +67,14 @@ source "net/netlabel/Kconfig"
endif # if INET
+config NET_NS
+ bool "Network Namespaces"
+ help
+ This option enables multiple independent network namespaces,
+ each having own network devices, IP addresses, routes, and so on.
+ If unsure, answer N.
+
+
config NETWORK_SECMARK
bool "Security Marking"
help
Dmitry Mishin wrote:
> On Thursday 23 November 2006 05:39, Serge E. Hallyn wrote:
>> Quoting Dmitry Mishin ([email protected]):
>>> On Wednesday 22 November 2006 19:41, Serge E. Hallyn wrote:
>>>> Quoting Cedric Le Goater ([email protected]):
>>>>> Hello,
>>>>>
>>>>> Dmitry Mishin wrote:
>>>>>> This patch looks acceptable for us.
>>>>> good. shall we merge it then ? see comment below.
>>>>>
>>>>>> BTW, Daniel, we agreed to be based on the Andrey's patchset. I do
>>>>>> not see a reason, why Cedric force us to make some unnecessary work
>>>>>> and move existent patchset over his interface.
>>>>> yeah it's a bit different from andrey's but not that much and it's
>>>>> more in
>>>> Where is Andrey's patch?
>>> This thread - http://thread.gmane.org/gmane.linux.network/42666
>> Thanks, Dmitry. Now I do recall seeing that before.
>>
>> That patchset appears to go part, but not all the way to fitting in with
>> the existing namespaces. For instance, you use exit_task_namespaces() for
>> refcounting, but don't put the net_namespace in the nsproxy and use your
>> own mechanism for unsharing.
>>
>> It really seems useful to have all the namespaces be consistent whenever
>> practical, and I don't think your patchset would need much tweaking to
>> fit onto Cedric's patch. Am I missing a complicating factor?
> No. I've already said, Cedric's patch is acceptable for us.
Do you mind if we port your patchset on top of this patch ? If you have
no time for that I can do it.
-- Daniel