2008-06-02 13:48:32

by Benjamin Thery

[permalink] [raw]
Subject: [PATCH 09/10] netns: Enable tagging for net_class directories in sysfs

net: Enable tagging for net_class directories in sysfs

The problem. Network devices show up in sysfs and with the network
namespace active multiple devices with the same name can show up in
the same directory, ouch!

To avoid that problem and allow existing applications in network namespaces
to see the same interface that is currently presented in sysfs, this
patch enables the tagging directory support in sysfs.

By using the network namespace pointers as tags to separate out the
the sysfs directory entries we ensure that we don't have conflicts
in the directories and applications only see a limited set of
the network devices.

Signed-off-by: Eric W. Biederman <[email protected]>
Signed-off-by: Benjamin Thery <[email protected]>
---
fs/sysfs/mount.c | 36 ++++++++++++++++++++++++++++++++++++
include/linux/sysfs.h | 2 ++
net/Kconfig | 2 +-
net/core/net-sysfs.c | 20 ++++++++++++++++++++
4 files changed, 59 insertions(+), 1 deletion(-)

Index: linux-mm/fs/sysfs/mount.c
===================================================================
--- linux-mm.orig/fs/sysfs/mount.c
+++ linux-mm/fs/sysfs/mount.c
@@ -16,6 +16,8 @@
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/init.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>

#include "sysfs.h"

@@ -78,6 +80,7 @@ static int sysfs_fill_super(struct super
root->d_sb = sb;
sb->s_root = root;
sb->s_fs_info = info;
+ info->tag.net_ns = hold_net(current->nsproxy->net_ns);
return 0;

out_err:
@@ -95,6 +98,9 @@ static int sysfs_test_super(struct super
struct sysfs_super_info *info = sysfs_info(sb);
int found = 1;

+ if (task->nsproxy->net_ns != info->tag.net_ns)
+ found = 0;
+
return found;
}

@@ -131,6 +137,8 @@ static void sysfs_kill_sb(struct super_b
struct sysfs_super_info *info = sysfs_info(sb);

kill_anon_super(sb);
+ if (info->tag.net_ns)
+ release_net(info->tag.net_ns);
kfree(info);
}

@@ -181,6 +189,31 @@ restart:
spin_unlock(&sb_lock);
}

+#ifdef CONFIG_NET
+static void sysfs_net_exit(struct net *net)
+{
+ /* Allow the net namespace to go away while sysfs is still mounted. */
+ struct super_block *sb;
+ mutex_lock(&sysfs_rename_mutex);
+ sysfs_grab_supers();
+ mutex_lock(&sysfs_mutex);
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ struct sysfs_super_info *info = sysfs_info(sb);
+ if (info->tag.net_ns != net)
+ continue;
+ release_net(info->tag.net_ns);
+ info->tag.net_ns = NULL;
+ }
+ mutex_unlock(&sysfs_mutex);
+ sysfs_release_supers();
+ mutex_unlock(&sysfs_rename_mutex);
+}
+
+static struct pernet_operations sysfs_net_ops = {
+ .exit = sysfs_net_exit,
+};
+#endif
+
int __init sysfs_init(void)
{
int err = -ENOMEM;
@@ -205,6 +238,9 @@ int __init sysfs_init(void)
unregister_filesystem(&sysfs_fs_type);
goto out_err;
}
+#ifdef CONFIG_NET
+ register_pernet_subsys(&sysfs_net_ops);
+#endif
} else
goto out_err;
out:
Index: linux-mm/include/linux/sysfs.h
===================================================================
--- linux-mm.orig/include/linux/sysfs.h
+++ linux-mm/include/linux/sysfs.h
@@ -19,6 +19,7 @@

struct kobject;
struct module;
+struct net;

/* FIXME
* The *owner field is no longer used, but leave around
@@ -79,6 +80,7 @@ struct sysfs_ops {
};

struct sysfs_tag_info {
+ struct net *net_ns;
};

struct sysfs_tagged_dir_operations {
Index: linux-mm/net/Kconfig
===================================================================
--- linux-mm.orig/net/Kconfig
+++ linux-mm/net/Kconfig
@@ -30,7 +30,7 @@ menu "Networking options"
config NET_NS
bool "Network namespace support"
default n
- depends on EXPERIMENTAL && !SYSFS && NAMESPACES
+ depends on EXPERIMENTAL && NAMESPACES
help
Allow user space to create what appear to be multiple instances
of the network stack.
Index: linux-mm/net/core/net-sysfs.c
===================================================================
--- linux-mm.orig/net/core/net-sysfs.c
+++ linux-mm/net/core/net-sysfs.c
@@ -13,7 +13,9 @@
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
+#include <linux/nsproxy.h>
#include <net/sock.h>
+#include <net/net_namespace.h>
#include <linux/rtnetlink.h>
#include <linux/wireless.h>
#include <net/iw_handler.h>
@@ -421,6 +423,23 @@ static void netdev_release(struct device
kfree((char *)dev - dev->padded);
}

+static const void *net_sb_tag(struct sysfs_tag_info *info)
+{
+ return info->net_ns;
+}
+
+static const void *net_kobject_tag(struct kobject *kobj)
+{
+ struct net_device *dev;
+ dev = container_of(kobj, struct net_device, dev.kobj);
+ return dev_net(dev);
+}
+
+static const struct sysfs_tagged_dir_operations net_tagged_dir_operations = {
+ .sb_tag = net_sb_tag,
+ .kobject_tag = net_kobject_tag,
+};
+
static struct class net_class = {
.name = "net",
.dev_release = netdev_release,
@@ -430,6 +449,7 @@ static struct class net_class = {
#ifdef CONFIG_HOTPLUG
.dev_uevent = netdev_uevent,
#endif
+ .tag_ops = &net_tagged_dir_operations,
};

/* Delete sysfs entries but hold kobject reference until after all

--


2008-06-03 04:10:20

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH 09/10] netns: Enable tagging for net_class directories in sysfs

On Mon, Jun 02, 2008 at 03:46:08PM +0200, Benjamin Thery wrote:
> net: Enable tagging for net_class directories in sysfs
>
> The problem. Network devices show up in sysfs and with the network
> namespace active multiple devices with the same name can show up in
> the same directory, ouch!
>
> To avoid that problem and allow existing applications in network namespaces
> to see the same interface that is currently presented in sysfs, this
> patch enables the tagging directory support in sysfs.
>
> By using the network namespace pointers as tags to separate out the
> the sysfs directory entries we ensure that we don't have conflicts
> in the directories and applications only see a limited set of
> the network devices.

I don't like it how the network subsystem is starting to leach into the
sysfs core here. What happens when the next subsystem wants to do the
same thing? And then the next one? Will they all have to do this kind
of intrusive changes to sysfs?

Can't this be done only in the network subsystem?

thanks,

greg k-h

2008-06-03 12:17:09

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [PATCH 09/10] netns: Enable tagging for net_class directories in sysfs

Quoting Greg KH ([email protected]):
> On Mon, Jun 02, 2008 at 03:46:08PM +0200, Benjamin Thery wrote:
> > net: Enable tagging for net_class directories in sysfs
> >
> > The problem. Network devices show up in sysfs and with the network
> > namespace active multiple devices with the same name can show up in
> > the same directory, ouch!
> >
> > To avoid that problem and allow existing applications in network namespaces
> > to see the same interface that is currently presented in sysfs, this
> > patch enables the tagging directory support in sysfs.
> >
> > By using the network namespace pointers as tags to separate out the
> > the sysfs directory entries we ensure that we don't have conflicts
> > in the directories and applications only see a limited set of
> > the network devices.
>
> I don't like it how the network subsystem is starting to leach into the
> sysfs core here. What happens when the next subsystem wants to do the
> same thing? And then the next one? Will they all have to do this kind
> of intrusive changes to sysfs?

At least as far as the tagging goes, each namespace which needs to do
this (network, devices, and user, at least) will add a field to the
tag structure and call sysfs_enable_tagging() on the relevant
directories. So no more core sysfs changes should be needed, as Eric
tried to make this generic enough to be generally useful.

> Can't this be done only in the network subsystem?

sysfs/kobject layer has to somehow decide what to show for
/sys/class/net contents based on the mountpoint, right, so I don't see
how the network subsystem could do it.

The only non-tagging alternative I'd see would be to keep entirely
separate kobject pools for each namespace. To do that we'd probably
want to break /sys/class/net into a separate fs that can be
remounted, so at least we don't have to keep the rest of the kobject
pools (/sys/firmware, kernel, etc) in sync...

Eric had mentioned before breaking /sys into multiple mountpoints, so
I'll assume the fact that he implemented tagging means that there was
too much cross-linking and whatnot across the /sys tree to make that
feasible.

More importantly, that approach would require more core sysfs changes
for the next namespace, whereas the tagging approach does not!

thanks,
-serge

2008-06-03 15:24:08

by Benjamin Thery

[permalink] [raw]
Subject: Re: [PATCH 09/10] netns: Enable tagging for net_class directories in sysfs

Greg KH wrote:
> On Mon, Jun 02, 2008 at 03:46:08PM +0200, Benjamin Thery wrote:
>> net: Enable tagging for net_class directories in sysfs
>>
>> The problem. Network devices show up in sysfs and with the network
>> namespace active multiple devices with the same name can show up in
>> the same directory, ouch!
>>
>> To avoid that problem and allow existing applications in network namespaces
>> to see the same interface that is currently presented in sysfs, this
>> patch enables the tagging directory support in sysfs.
>>
>> By using the network namespace pointers as tags to separate out the
>> the sysfs directory entries we ensure that we don't have conflicts
>> in the directories and applications only see a limited set of
>> the network devices.
>
> I don't like it how the network subsystem is starting to leach into the
> sysfs core here. What happens when the next subsystem wants to do the
> same thing? And then the next one? Will they all have to do this kind
> of intrusive changes to sysfs?
>
> Can't this be done only in the network subsystem?

I'm not sure to understand exactly what you mean.

What you don't like is seeing these hunks of network code in
fs/sysfs/mount.c? And you prefer to see these bits of code resides in
the network subsystem instead and see only "generic" sysfs services in
fs/sysfs/mount.c?

If this is it, I have some idea to implement a less intrusive
sysfs_net_exit(), which can be shared with the other namespaces.
Serge introduces the same kind of changes in patch 10 to fix an issue in
user namespace. I think we can share a bit of code and move the parts
specific to each namespace in their own subsystems.

Benjamin

>
> thanks,
>
> greg k-h
>
>


--
B e n j a m i n T h e r y - BULL/DT/Open Software R&D

http://www.bull.com

2008-06-03 16:37:15

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH 09/10] netns: Enable tagging for net_class directories in sysfs

On Tue, Jun 03, 2008 at 05:24:21PM +0200, Benjamin Thery wrote:
> Greg KH wrote:
>> On Mon, Jun 02, 2008 at 03:46:08PM +0200, Benjamin Thery wrote:
>>> net: Enable tagging for net_class directories in sysfs
>>>
>>> The problem. Network devices show up in sysfs and with the network
>>> namespace active multiple devices with the same name can show up in
>>> the same directory, ouch!
>>>
>>> To avoid that problem and allow existing applications in network
>>> namespaces
>>> to see the same interface that is currently presented in sysfs, this
>>> patch enables the tagging directory support in sysfs.
>>>
>>> By using the network namespace pointers as tags to separate out the
>>> the sysfs directory entries we ensure that we don't have conflicts
>>> in the directories and applications only see a limited set of
>>> the network devices.
>> I don't like it how the network subsystem is starting to leach into the
>> sysfs core here. What happens when the next subsystem wants to do the
>> same thing? And then the next one? Will they all have to do this kind
>> of intrusive changes to sysfs?
> >
> > Can't this be done only in the network subsystem?
>
> I'm not sure to understand exactly what you mean.
>
> What you don't like is seeing these hunks of network code in
> fs/sysfs/mount.c? And you prefer to see these bits of code resides in
> the network subsystem instead and see only "generic" sysfs services in
> fs/sysfs/mount.c?

Yes, exactly.

I don't want the problem that if more subsystems want to implement
something like this, they too need to modify the sysfs core.

And the mess with the #ifdef, that's not nice either :)

> If this is it, I have some idea to implement a less intrusive
> sysfs_net_exit(), which can be shared with the other namespaces.
> Serge introduces the same kind of changes in patch 10 to fix an issue in
> user namespace. I think we can share a bit of code and move the parts
> specific to each namespace in their own subsystems.

I think that would be a good idea. Care to redo the series?

thanks,

greg k-h

2008-06-03 19:10:31

by Benjamin Thery

[permalink] [raw]
Subject: Re: [PATCH 09/10] netns: Enable tagging for net_class directories in sysfs

Quoting Greg KH <[email protected]>:

> On Tue, Jun 03, 2008 at 05:24:21PM +0200, Benjamin Thery wrote:
>> Greg KH wrote:
>>> On Mon, Jun 02, 2008 at 03:46:08PM +0200, Benjamin Thery wrote:
>>>> net: Enable tagging for net_class directories in sysfs
>>>>
>>>> The problem. Network devices show up in sysfs and with the network
>>>> namespace active multiple devices with the same name can show up in
>>>> the same directory, ouch!
>>>>
>>>> To avoid that problem and allow existing applications in network
>>>> namespaces
>>>> to see the same interface that is currently presented in sysfs, this
>>>> patch enables the tagging directory support in sysfs.
>>>>
>>>> By using the network namespace pointers as tags to separate out the
>>>> the sysfs directory entries we ensure that we don't have conflicts
>>>> in the directories and applications only see a limited set of
>>>> the network devices.
>>> I don't like it how the network subsystem is starting to leach into the
>>> sysfs core here. What happens when the next subsystem wants to do the
>>> same thing? And then the next one? Will they all have to do this kind
>>> of intrusive changes to sysfs?
>> >
>> > Can't this be done only in the network subsystem?
>>
>> I'm not sure to understand exactly what you mean.
>>
>> What you don't like is seeing these hunks of network code in
>> fs/sysfs/mount.c? And you prefer to see these bits of code resides in
>> the network subsystem instead and see only "generic" sysfs services in
>> fs/sysfs/mount.c?
>
> Yes, exactly.
>
> I don't want the problem that if more subsystems want to implement
> something like this, they too need to modify the sysfs core.
>
> And the mess with the #ifdef, that's not nice either :)

OK. This is clear now :)

>> If this is it, I have some idea to implement a less intrusive
>> sysfs_net_exit(), which can be shared with the other namespaces.
>> Serge introduces the same kind of changes in patch 10 to fix an issue in
>> user namespace. I think we can share a bit of code and move the parts
>> specific to each namespace in their own subsystems.
>
> I think that would be a good idea. Care to redo the series?

I'm already working on it.
I'll try to re-post in a couple of days.

Benjamin

>
> thanks,
>
> greg k-h
>
>



----------------------------------------------------------------
This message was sent using IMP, the Internet Messaging Program.