Hi,
A while ago we had talked about adding a kobject to struct module. By
doing this we add support for module paramaters and other module info to
be exported in sysfs. So here's a patch that does this that is against
2.6.0-test4 (it applies with some fuzz, sorry.)
This patch adds basic kobject support to struct module, and it creates a
/sys/module directory which contains all of the individual modules.
Each module currently exports only one file, the module refcount:
$ tree /sys/module/
/sys/module/
|-- ehci_hcd
| `-- refcount
|-- hid
| `-- refcount
|-- parport
| `-- refcount
|-- parport_pc
| `-- refcount
|-- uhci_hcd
| `-- refcount
`-- usbcore
`-- refcount
I used the kobject reference count to add to the module reference count
to handle races if a user has a module owned sysfs file open, but this
reference is not exported to userspace, as that just confuses the
userspace tools a bunch (and I don't want to force people to upgrade
module-init-tools this late in the development cycle...)
Rusty, any comments? If this looks sane, I'll work on adding the
module_paramater support to the individual module directories.
thanks,
greg k-h
# Module: Add a kobject to struct module
#
# This gets us /sys/module to show all modules.
# Module attributes will happen next.
diff -Nru a/include/linux/module.h b/include/linux/module.h
--- a/include/linux/module.h Tue Sep 9 14:58:58 2003
+++ b/include/linux/module.h Tue Sep 9 14:58:58 2003
@@ -16,6 +16,7 @@
#include <linux/kmod.h>
#include <linux/elf.h>
#include <linux/stringify.h>
+#include <linux/kobject.h>
#include <asm/local.h>
#include <asm/module.h>
@@ -184,6 +185,8 @@
struct module
{
+ struct kobject kobj;
+
enum module_state state;
/* Member of list of modules */
diff -Nru a/kernel/module.c b/kernel/module.c
--- a/kernel/module.c Tue Sep 9 14:58:58 2003
+++ b/kernel/module.c Tue Sep 9 14:58:58 2003
@@ -613,6 +613,7 @@
for (i = 0; i < NR_CPUS; i++)
total += local_read(&mod->ref[i].count);
+ total += atomic_read(&mod->kobj.refcount);
return total;
}
EXPORT_SYMBOL(module_refcount);
@@ -656,6 +657,8 @@
down(&module_mutex);
}
+static void mod_kobject_remove(struct module *mod);
+
asmlinkage long
sys_delete_module(const char __user *name_user, unsigned int flags)
{
@@ -704,6 +707,10 @@
goto out;
}
}
+
+ /* unregister the kobject in this module */
+ mod_kobject_remove(mod);
+
/* Stop the machine so refcounts can't move: irqs disabled. */
DEBUGP("Stopping refcounts...\n");
ret = stop_refcounts();
@@ -743,7 +750,7 @@
struct module_use *use;
int printed_something = 0;
- seq_printf(m, " %u ", module_refcount(mod));
+ seq_printf(m, " %u ", module_refcount(mod) - atomic_read(&mod->kobj.refcount));
/* Always include a trailing , so userspace can differentiate
between this and the old multi-field proc format. */
@@ -1753,6 +1760,85 @@
else return ptr;
}
+/* sysfs stuff */
+struct module_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct module *mod, char *);
+ ssize_t (*store)(struct module *mod, const char *, size_t);
+};
+#define to_module_attr(n) container_of(n, struct module_attribute, attr);
+#define to_module(n) container_of(n, struct module, kobj)
+
+static ssize_t module_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct module *slot = to_module(kobj);
+ struct module_attribute *attribute = to_module_attr(attr);
+ return attribute->show ? attribute->show(slot, buf) : 0;
+}
+
+static ssize_t module_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t len)
+{
+ struct module *slot = to_module(kobj);
+ struct module_attribute *attribute = to_module_attr(attr);
+ return attribute->store ? attribute->store(slot, buf, len) : 0;
+}
+
+static struct sysfs_ops module_sysfs_ops = {
+ .show = module_attr_show,
+ .store = module_attr_store,
+};
+
+/* Huh? A release() function that doesn't do anything?
+ * This is here only because a module has another reference count that
+ * it uses to determine if it should be cleaned up or not. If the
+ * module wants to switch over to use the kobject reference instead of
+ * its own, then this release function needs to do some work.
+ */
+static void module_release(struct kobject *kobj)
+{
+}
+
+static struct kobj_type module_ktype = {
+ .sysfs_ops = &module_sysfs_ops,
+ .release = &module_release,
+};
+static decl_subsys(module, &module_ktype, NULL);
+
+static int __init module_subsys_init(void)
+{
+ return subsystem_register(&module_subsys);
+}
+core_initcall(module_subsys_init);
+
+static ssize_t show_mod_refcount(struct module *mod, char *buf)
+{
+ return sprintf(buf, "%d\n", module_refcount(mod) - atomic_read(&mod->kobj.refcount));
+}
+
+static struct module_attribute mod_refcount = {
+ .attr = {.name = "refcount", .mode = S_IRUGO},
+ .show = show_mod_refcount,
+};
+
+static int mod_kobject_init(struct module *mod)
+{
+ int retval;
+
+ memset(&mod->kobj, 0x00, sizeof(struct kobject));
+ kobject_set_name(&mod->kobj, mod->name);
+ kobj_set_kset_s(mod, module_subsys);
+ retval = kobject_register(&mod->kobj);
+ if (!retval)
+ retval = sysfs_create_file(&mod->kobj, &mod_refcount.attr);
+ return retval;
+}
+
+static void mod_kobject_remove(struct module *mod)
+{
+ sysfs_remove_file(&mod->kobj, &mod_refcount.attr);
+ kobject_unregister(&mod->kobj);
+}
+
/* This is where the real work happens */
asmlinkage long
sys_init_module(void __user *umod,
@@ -1816,6 +1902,8 @@
}
return ret;
}
+
+ mod_kobject_init(mod);
/* Now it's a first class citizen! */
down(&module_mutex);
On Tue, Sep 09, 2003 at 03:24:21PM -0700, Greg KH wrote:
> So here's a patch that does this that is against 2.6.0-test4 (it
> applies with some fuzz, sorry.)
Ugh, that should read "2.6.0-test5"...
greg k-h
In message <[email protected]> you write:
> Hi,
>
> A while ago we had talked about adding a kobject to struct module. By
> doing this we add support for module paramaters and other module info to
> be exported in sysfs. So here's a patch that does this that is against
> 2.6.0-test4 (it applies with some fuzz, sorry.)
I'd just started on the same thing, but I'll use yours as a bae.
> I used the kobject reference count to add to the module reference count
> to handle races if a user has a module owned sysfs file open, but this
> reference is not exported to userspace, as that just confuses the
> userspace tools a bunch (and I don't want to force people to upgrade
> module-init-tools this late in the development cycle...)
I'm not sure if embedding the kobject in the module is the correct
approach in this case, because we can't use the kobject refcount for
modules because it's too slow. This cannot be fixed before 2.7 8(
Because kobject does not have a "struct module *owner", we can't
simply add in the refcount. The module reference count is defined to
never go from zero to one when the module is dying, which means
callers must use try_module_get(). I grab the reference on
read/write, which means opening the file won't hold the module,
either.
Were you intending to put all the info currently in /proc/modules
under sysfs? Makes sense I think. For the options you'll need a
subdir to avoid name clashes.
Cheers!
Rusty.
Name: Modules in sysfs
Author: Greg KH <[email protected]>
Status: Tested on 2.6.0-test5
D: Modified by Rusty to allocate kobject and module separately.
D:
D: This patch adds basic kobject support to struct module, and it creates a
D: /sys/module directory which contains all of the individual modules.
D: Each module currently exports only one file, the module refcount:
D: $ tree /sys/module/
D: /sys/module/
D: |-- ehci_hcd
D: | `-- refcount
D: |-- hid
D: | `-- refcount
D: |-- parport
D: | `-- refcount
D: |-- parport_pc
D: | `-- refcount
D: |-- uhci_hcd
D: | `-- refcount
D: `-- usbcore
D: `-- refcount
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .26542-linux-2.6.0-test5/include/linux/module.h .26542-linux-2.6.0-test5.updated/include/linux/module.h
--- .26542-linux-2.6.0-test5/include/linux/module.h 2003-07-31 01:50:19.000000000 +1000
+++ .26542-linux-2.6.0-test5.updated/include/linux/module.h 2003-09-10 11:50:04.000000000 +1000
@@ -16,6 +16,7 @@
#include <linux/kmod.h>
#include <linux/elf.h>
#include <linux/stringify.h>
+#include <linux/kobject.h>
#include <asm/local.h>
#include <asm/module.h>
@@ -182,6 +183,12 @@ enum module_state
MODULE_STATE_GOING,
};
+struct module_kobj
+{
+ struct kobject kobj;
+ struct module *mod;
+};
+
struct module
{
enum module_state state;
@@ -192,6 +199,9 @@ struct module
/* Unique handle for this module */
char name[MODULE_NAME_LEN];
+ /* Our presence in sysfs. */
+ struct module_kobj *mkobj;
+
/* Exported symbols */
const struct kernel_symbol *syms;
unsigned int num_syms;
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .26542-linux-2.6.0-test5/kernel/module.c .26542-linux-2.6.0-test5.updated/kernel/module.c
--- .26542-linux-2.6.0-test5/kernel/module.c 2003-09-09 10:35:05.000000000 +1000
+++ .26542-linux-2.6.0-test5.updated/kernel/module.c 2003-09-10 12:19:51.000000000 +1000
@@ -1071,6 +1071,29 @@ static unsigned long resolve_symbol(Elf_
return ret;
}
+static ssize_t show_mod_refcount(struct module *mod, char *buf)
+{
+ /* We hold one temporarily to access this, so sub 1. */
+ return sprintf(buf, "%d\n", module_refcount(mod)-1);
+}
+
+struct module_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct module *mod, char *);
+ ssize_t (*store)(struct module *mod, const char *, size_t);
+};
+
+static struct module_attribute mod_refcount = {
+ .attr = {.name = "refcount", .mode = S_IRUGO},
+ .show = show_mod_refcount,
+};
+
+static void mod_kobject_remove(struct module_kobj *mkobj)
+{
+ sysfs_remove_file(&mkobj->kobj, &mod_refcount.attr);
+ kobject_unregister(&mkobj->kobj);
+}
+
/* Free a module, remove from lists, etc (must hold module mutex). */
static void free_module(struct module *mod)
{
@@ -1079,6 +1103,10 @@ static void free_module(struct module *m
list_del(&mod->list);
spin_unlock_irq(&modlist_lock);
+ /* unregister the kobject in this module */
+ if (mod->mkobj)
+ mod_kobject_remove(mod->mkobj);
+
/* Arch-specific cleanup. */
module_arch_cleanup(mod);
@@ -1677,6 +1705,99 @@ static struct module *load_module(void _
else return ptr;
}
+/* sysfs stuff */
+#define to_module_attr(n) container_of(n, struct module_attribute, attr);
+#define to_module_kobj(n) container_of(n, struct module_kobj, kobj);
+
+static ssize_t module_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct module_attribute *attribute = to_module_attr(attr);
+ struct module_kobj *mkobj = to_module_kobj(kobj);
+ ssize_t ret;
+
+ /* Don't let them in a module unloading *or loading*. */
+ if (!strong_try_module_get(mkobj->mod))
+ return -EIO;
+
+ ret = attribute->show ? attribute->show(mkobj->mod, buf) : 0;
+ module_put(mkobj->mod);
+ return ret;
+}
+
+static ssize_t module_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct module_attribute *attribute = to_module_attr(attr);
+ struct module_kobj *mkobj = to_module_kobj(kobj);
+ ssize_t ret;
+
+ /* Don't let them in a module unloading *or loading*. */
+ if (!strong_try_module_get(mkobj->mod))
+ return -EIO;
+
+ ret = attribute->store ? attribute->store(mkobj->mod, buf, len) : 0;
+ module_put(mkobj->mod);
+ return ret;
+}
+
+static struct sysfs_ops module_sysfs_ops = {
+ .show = module_attr_show,
+ .store = module_attr_store,
+};
+
+static void module_release(struct kobject *kobj)
+{
+ struct module_kobj *mkobj = to_module_kobj(kobj);
+ kfree(mkobj);
+}
+
+static struct kobj_type module_ktype = {
+ .sysfs_ops = &module_sysfs_ops,
+ .release = &module_release,
+};
+static decl_subsys(module, &module_ktype, NULL);
+
+static int __init module_subsys_init(void)
+{
+ return subsystem_register(&module_subsys);
+}
+core_initcall(module_subsys_init);
+
+static int mod_kobject_init(struct module *mod)
+{
+ int retval;
+
+ mod->mkobj = kmalloc(sizeof(*mod->mkobj), GFP_KERNEL);
+ if (!mod->mkobj)
+ return -ENOMEM;
+
+ memset(&mod->mkobj->kobj, 0x00, sizeof(struct kobject));
+ mod->mkobj->mod = mod;
+ retval = kobject_set_name(&mod->mkobj->kobj, "%s", mod->name);
+ if (retval < 0)
+ goto free_kobj;
+ kobj_set_kset_s(mod->mkobj, module_subsys);
+ retval = kobject_register(&mod->mkobj->kobj);
+ if (retval < 0)
+ goto free_kobj;
+ retval = sysfs_create_file(&mod->mkobj->kobj, &mod_refcount.attr);
+ if (retval < 0) {
+ kobject_unregister(&mod->mkobj->kobj); /* Frees for us */
+ goto fail;
+ }
+ return retval;
+
+free_kobj:
+ kfree(mod->mkobj);
+fail:
+ /* Make us idempotent for free_module() */
+ mod->mkobj = NULL;
+ return retval;
+}
+
/* This is where the real work happens */
asmlinkage long
sys_init_module(void __user *umod,
@@ -1715,6 +1836,13 @@ sys_init_module(void __user *umod,
list_add(&mod->list, &modules);
spin_unlock_irq(&modlist_lock);
+ ret = mod_kobject_init(mod);
+ if (ret < 0) {
+ free_module(mod);
+ up(&module_mutex);
+ return ret;
+ }
+
/* Drop lock so they can recurse */
up(&module_mutex);
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
On Wed, Sep 10, 2003 at 01:31:02PM +1000, Rusty Russell wrote:
> In message <[email protected]> you write:
> > Hi,
> >
> > A while ago we had talked about adding a kobject to struct module. By
> > doing this we add support for module paramaters and other module info to
> > be exported in sysfs. So here's a patch that does this that is against
> > 2.6.0-test4 (it applies with some fuzz, sorry.)
>
> I'd just started on the same thing, but I'll use yours as a bae.
>
> > I used the kobject reference count to add to the module reference count
> > to handle races if a user has a module owned sysfs file open, but this
> > reference is not exported to userspace, as that just confuses the
> > userspace tools a bunch (and I don't want to force people to upgrade
> > module-init-tools this late in the development cycle...)
>
> I'm not sure if embedding the kobject in the module is the correct
> approach in this case, because we can't use the kobject refcount for
> modules because it's too slow. This cannot be fixed before 2.7 8(
That's fine, I do not want to use the kobject refcount for modules.
modules have "special" refcount issues that you've already solved. I
don't want to go down that rathole :)
> Because kobject does not have a "struct module *owner", we can't
> simply add in the refcount.
Um, I don't understand. There is no "struct module *owner in struct
kobject. There is one in struct attribute, but I don't set it, so it
doesn't matter for this usage.
> The module reference count is defined to never go from zero to one
> when the module is dying, which means callers must use
> try_module_get(). I grab the reference on read/write, which means
> opening the file won't hold the module, either.
read/write of what? The attribute? Sure, why not set the module
attribute sysfs file to the module that way the reference count will be
incremented if the sysfs file is opened.
I'm not trying to touch the module reference count logic here, besides
adding the kobject reference count to the internal module count logic.
I think I got it all correct and it worked for me :)
But in looking at your patch, I don't see why you want to separate the
module from the kobject? What benefit does it have?
Hey, you're the maintainer, it's your call :)
> Were you intending to put all the info currently in /proc/modules
> under sysfs? Makes sense I think. For the options you'll need a
> subdir to avoid name clashes.
Yes, I was going to add it, this patch was more of a "test" to see how
receptive you were to it.
If you want, I can add the options and other info based off of this
patch.
thanks,
greg k-h
In message <[email protected]> you write:
> On Wed, Sep 10, 2003 at 01:31:02PM +1000, Rusty Russell wrote:
> > Because kobject does not have a "struct module *owner", we can't
> > simply add in the refcount.
>
> Um, I don't understand. There is no "struct module *owner in struct
> kobject. There is one in struct attribute, but I don't set it, so it
> doesn't matter for this usage.
Your parser broke, I think 8)
> > The module reference count is defined to never go from zero to one
> > when the module is dying, which means callers must use
> > try_module_get(). I grab the reference on read/write, which means
> > opening the file won't hold the module, either.
>
> read/write of what? The attribute? Sure, why not set the module
> attribute sysfs file to the module that way the reference count will be
> incremented if the sysfs file is opened.
Hmm, because there's one attribute: which module would own it? You're
going to creation attributes per module later (for module parameters),
so when you do that it might make sense to do this too.
> But in looking at your patch, I don't see why you want to separate the
> module from the kobject? What benefit does it have?
The lifetimes are separate, each controlled by their own reference
count. I *know* this will work even if someone holds a reference to
the kobject (for some reason in the future) even as the module is
removed.
> > Were you intending to put all the info currently in /proc/modules
> > under sysfs? Makes sense I think. For the options you'll need a
> > subdir to avoid name clashes.
>
> Yes, I was going to add it, this patch was more of a "test" to see how
> receptive you were to it.
More more! 8)
Thanks,
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
> > read/write of what? The attribute? Sure, why not set the module
> > attribute sysfs file to the module that way the reference count will be
> > incremented if the sysfs file is opened.
>
> Hmm, because there's one attribute: which module would own it? You're
> going to creation attributes per module later (for module parameters),
> so when you do that it might make sense to do this too.
kernel/module.c owns the attribute code. (The same code and attribute
structure is reused for each object its exported for, so the owner field
must be set to the owner of the code itself.)
> > But in looking at your patch, I don't see why you want to separate the
> > module from the kobject? What benefit does it have?
>
> The lifetimes are separate, each controlled by their own reference
> count. I *know* this will work even if someone holds a reference to
> the kobject (for some reason in the future) even as the module is
> removed.
Correct me if I'm wrong, but this sounds similar to the networking
refcount problem. The reference on the containing object is the
interesting one, as far as visibility goes. As long as its positive, the
module is active.
The kobject refcount can still be used for the lifetime of the object. It
can simply be pinned the entire time the module is active. When the module
is deleted, the kobject can be unregistered somewhere around
free_module(). It looks like you might want to split that up into two
parts: "Before unregistering the kobject" that removed it from lists, etc,
and "In kobject release method", which would call module_free() (and
probably free the args then too).
This way you should retain the same semantics and be able to use the
kobject for controlling the lifetime (which will allow you to safely
export attributes through sysfs).
Make sense?
Pat
On Wed, Sep 10, 2003 at 06:07:35PM +1000, Rusty Russell wrote:
> In message <[email protected]> you write:
> > On Wed, Sep 10, 2003 at 01:31:02PM +1000, Rusty Russell wrote:
> > > Because kobject does not have a "struct module *owner", we can't
> > > simply add in the refcount.
> >
> > Um, I don't understand. There is no "struct module *owner in struct
> > kobject. There is one in struct attribute, but I don't set it, so it
> > doesn't matter for this usage.
>
> Your parser broke, I think 8)
Ok, let's try again. :)
Why are you detaching the kobject from struct module?
In my patch I accounted for the kobject's reference count in the module
reference count (just not the count exported to userspace, as to not
break the userspace tools.) So if a user has a module sysfs file open
(like the "refcount" file), the module reference count is incremented
and the module is not allowed to be unloaded until that count drops.
This removes any race condition with the kobject being in use when the
module structure is freed.
Does that make sense?
> > > The module reference count is defined to never go from zero to one
> > > when the module is dying, which means callers must use
> > > try_module_get(). I grab the reference on read/write, which means
> > > opening the file won't hold the module, either.
> >
> > read/write of what? The attribute? Sure, why not set the module
> > attribute sysfs file to the module that way the reference count will be
> > incremented if the sysfs file is opened.
>
> Hmm, because there's one attribute: which module would own it? You're
> going to creation attributes per module later (for module parameters),
> so when you do that it might make sense to do this too.
The attribute "refcount" is "owned" by the module itself. The kobject
count is incremented if the file is opened by the sysfs core, thus
preventing the module from being able to be unloaded.
The same thing will happen for module paramaters. They are owned by the
module structure as well.
It all "just works" :)
> > But in looking at your patch, I don't see why you want to separate the
> > module from the kobject? What benefit does it have?
>
> The lifetimes are separate, each controlled by their own reference
> count. I *know* this will work even if someone holds a reference to
> the kobject (for some reason in the future) even as the module is
> removed.
But my patch prevented that from ever happening. If someone grabbed the
kobject, the module could not be unloaded. That fixes all kinds of
races.
> > > Were you intending to put all the info currently in /proc/modules
> > > under sysfs? Makes sense I think. For the options you'll need a
> > > subdir to avoid name clashes.
> >
> > Yes, I was going to add it, this patch was more of a "test" to see how
> > receptive you were to it.
>
> More more! 8)
But whose patch to build on, mine, or your version? :)
thanks,
greg k-h
On Tue, Sep 09, 2003 at 03:24:21PM -0700, Greg KH wrote:
> A while ago we had talked about adding a kobject to struct module. By
> doing this we add support for module paramaters and other module info to
> be exported in sysfs. So here's a patch that does this that is against
> 2.6.0-test4 (it applies with some fuzz, sorry.)
Please excuse my short-sightedness, but I think the following points
haven't been thought deeply enough:
- modules which use their parameters on initialisation only once.
(eg, 3c59x "full_duplex" parameter)
- Also, what about module parameters which modules aren't expecting to
change beneath themselves? (eg, all the watchdog modules)
- Are we opening the floodgates for another round of races and driver
updates?
--
Russell King ([email protected]) http://www.arm.linux.org.uk/personal/
Linux kernel maintainer of:
2.6 ARM Linux - http://www.arm.linux.org.uk/
2.6 PCMCIA - http://pcmcia.arm.linux.org.uk/
2.6 Serial core
On Wed, Sep 10, 2003 at 04:45:38PM -0700, Greg KH wrote:
> To quote from include/linux/moduleparam.h:
> /* This is the fundamental function for registering boot/module
> parameters. perm sets the visibility in driverfs: 000 means it's
> not there, read bits mean it's readable, write bits mean it's
> writable. */
Any chance to make it always visible and read-only by default with the
option of making it writable?
Exposing the module options would be very helpful.
Also showing its read/write in the sysfs directory listing would be great.
(if it doesn't already do that).
Any chance the parameter defaults (if they're not hard coded...) could be
exposed even if they're not given to the module on the command line? (wish
list...)
On Wed, Sep 10, 2003 at 05:04:29PM -0700, Mike Fedyk wrote:
> On Wed, Sep 10, 2003 at 04:45:38PM -0700, Greg KH wrote:
> > To quote from include/linux/moduleparam.h:
> > /* This is the fundamental function for registering boot/module
> > parameters. perm sets the visibility in driverfs: 000 means it's
> > not there, read bits mean it's readable, write bits mean it's
> > writable. */
>
> Any chance to make it always visible and read-only by default with the
> option of making it writable?
I don't know. Doesn't matter to me.
> Exposing the module options would be very helpful.
>
> Also showing its read/write in the sysfs directory listing would be great.
> (if it doesn't already do that).
Look at the permission bits :)
> Any chance the parameter defaults (if they're not hard coded...) could be
> exposed even if they're not given to the module on the command line? (wish
> list...)
That's probably just a modinfo hack.
thanks,
greg k-h
On Thu, Sep 11, 2003 at 12:32:47AM +0100, Russell King wrote:
> On Tue, Sep 09, 2003 at 03:24:21PM -0700, Greg KH wrote:
> > A while ago we had talked about adding a kobject to struct module. By
> > doing this we add support for module paramaters and other module info to
> > be exported in sysfs. So here's a patch that does this that is against
> > 2.6.0-test4 (it applies with some fuzz, sorry.)
>
> Please excuse my short-sightedness, but I think the following points
> haven't been thought deeply enough:
>
> - modules which use their parameters on initialisation only once.
> (eg, 3c59x "full_duplex" parameter)
>
> - Also, what about module parameters which modules aren't expecting to
> change beneath themselves? (eg, all the watchdog modules)
>
> - Are we opening the floodgates for another round of races and driver
> updates?
If you set the "perm" portion of the module_param() macro to 0, then the
sysfs file will not be created. That will cause all of the old modules
that use the MODULE_PARAM() macro to not have things change for them, as
they will not be expecting it.
To quote from include/linux/moduleparam.h:
/* This is the fundamental function for registering boot/module
parameters. perm sets the visibility in driverfs: 000 means it's
not there, read bits mean it's readable, write bits mean it's
writable. */
Sound ok with you?
thanks,
greg k-h
In message <[email protected]> you write:
> kernel/module.c owns the attribute code. (The same code and attribute
> structure is reused for each object its exported for, so the owner field
> must be set to the owner of the code itself.)
Right, then the current code is correct.
> > > But in looking at your patch, I don't see why you want to separate the
> > > module from the kobject? What benefit does it have?
> >
> > The lifetimes are separate, each controlled by their own reference
> > count. I *know* this will work even if someone holds a reference to
> > the kobject (for some reason in the future) even as the module is
> > removed.
>
> Correct me if I'm wrong, but this sounds similar to the networking
> refcount problem. The reference on the containing object is the
> interesting one, as far as visibility goes. As long as its positive, the
> module is active.
There are basically two choices: ensure that the reference count is
taken using try_module_get() (kobject doesn't have an owner field, so
it does not match this one), or ensure that an object isn't ever
referenced after the module cleanup function is called.
In this context, that means that the module cleanup must pause until
the reference count of the kobject hits zero, so it can be freed.
Implementation below.
BTW, The *real* answer IMHO is (this is 2.7 stuff:)
1) Adopt a faster, smaller implementation of alloc_percpu (this patch
exists, needs some arch-dependent love for ia64).
2) Use it to generalize the current module reference count scheme to
a "bigref_t" (I have a couple of these)
3) Use that in kobjects.
4) Decide that module removal is not as important as it was, and not
all modules need be removable (at least in finite time).
5) Use the kobject reference count everywhere, including modules.
This would make everything faster, except for the case where someone
is actually waiting for a refcount to hit zero: for long-lived objects
like kobjects, this seems the right tradeoff.
Cheers!
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
Name: Modules in sysfs
Author: Greg KH <[email protected]>
Status: Tested on 2.6.0-test5-bk1
D: This patch adds basic kobject support to struct module, and it creates a
D: /sys/module directory which contains all of the individual modules.
D: Each module currently exports only one file, the module refcount:
D: $ tree /sys/module/
D: /sys/module/
D: |-- ehci_hcd
D: | `-- refcount
D: |-- hid
D: | `-- refcount
D: |-- parport
D: | `-- refcount
D: |-- parport_pc
D: | `-- refcount
D: |-- uhci_hcd
D: | `-- refcount
D: `-- usbcore
D: `-- refcount
D:
D: Rusty: We ensure that the kobject embedded in the module has a shorter
D: lifetime than the module itself by waiting for its reference count
D: to reach zero before freeing the module structure.
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .28943-linux-2.6.0-test5/include/linux/module.h .28943-linux-2.6.0-test5.updated/include/linux/module.h
--- .28943-linux-2.6.0-test5/include/linux/module.h 2003-07-31 01:50:19.000000000 +1000
+++ .28943-linux-2.6.0-test5.updated/include/linux/module.h 2003-09-11 09:19:42.000000000 +1000
@@ -16,6 +16,7 @@
#include <linux/kmod.h>
#include <linux/elf.h>
#include <linux/stringify.h>
+#include <linux/kobject.h>
#include <asm/local.h>
#include <asm/module.h>
@@ -184,6 +185,8 @@ enum module_state
struct module
{
+ struct kobject kobj;
+
enum module_state state;
/* Member of list of modules */
@@ -230,6 +233,9 @@ struct module
/* Am I GPL-compatible */
int license_gplok;
+ /* Who is waiting for us to be unloaded, or kobject to be unused. */
+ struct task_struct *waiter;
+
#ifdef CONFIG_MODULE_UNLOAD
/* Reference counts */
struct module_ref ref[NR_CPUS];
@@ -237,9 +243,6 @@ struct module
/* What modules depend on me? */
struct list_head modules_which_use_me;
- /* Who is waiting for us to be unloaded */
- struct task_struct *waiter;
-
/* Destruction function. */
void (*exit)(void);
#endif
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .28943-linux-2.6.0-test5/kernel/module.c .28943-linux-2.6.0-test5.updated/kernel/module.c
--- .28943-linux-2.6.0-test5/kernel/module.c 2003-09-09 10:35:05.000000000 +1000
+++ .28943-linux-2.6.0-test5.updated/kernel/module.c 2003-09-11 09:35:51.000000000 +1000
@@ -702,6 +702,7 @@ sys_delete_module(const char __user *nam
goto out;
}
}
+
/* Stop the machine so refcounts can't move: irqs disabled. */
DEBUGP("Stopping refcounts...\n");
ret = stop_refcounts();
@@ -1071,6 +1072,96 @@ static unsigned long resolve_symbol(Elf_
return ret;
}
+/* sysfs stuff */
+struct module_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct module *mod, char *);
+ ssize_t (*store)(struct module *mod, const char *, size_t);
+};
+#define to_module_attr(n) container_of(n, struct module_attribute, attr);
+#define to_module(n) container_of(n, struct module, kobj)
+
+static ssize_t module_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct module *slot = to_module(kobj);
+ struct module_attribute *attribute = to_module_attr(attr);
+ return attribute->show ? attribute->show(slot, buf) : 0;
+}
+
+static ssize_t module_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t len)
+{
+ struct module *slot = to_module(kobj);
+ struct module_attribute *attribute = to_module_attr(attr);
+ return attribute->store ? attribute->store(slot, buf, len) : 0;
+}
+
+static struct sysfs_ops module_sysfs_ops = {
+ .show = module_attr_show,
+ .store = module_attr_store,
+};
+
+/* remove_kobject_wait is waiting for this (called when kobj->refcount
+ * hits zero) */
+static void module_release(struct kobject *kobj)
+{
+ struct module *mod = to_module(kobj);
+ wake_up_process(mod->waiter);
+}
+
+static struct kobj_type module_ktype = {
+ .sysfs_ops = &module_sysfs_ops,
+ .release = &module_release,
+};
+static decl_subsys(module, &module_ktype, NULL);
+
+static int __init module_subsys_init(void)
+{
+ return subsystem_register(&module_subsys);
+}
+core_initcall(module_subsys_init);
+
+static ssize_t show_mod_refcount(struct module *mod, char *buf)
+{
+ return sprintf(buf, "%d\n", module_refcount(mod));
+}
+
+static struct module_attribute mod_refcount = {
+ .attr = {.name = "refcount", .mode = S_IRUGO},
+ .show = show_mod_refcount,
+};
+
+/* Remove kobject and block until refcount hits zero. */
+static void remove_kobject_wait(struct module *mod)
+{
+ mod->waiter = current;
+ set_task_state(current, TASK_UNINTERRUPTIBLE);
+ kobject_unregister(&mod->kobj);
+ schedule();
+}
+
+static int mod_kobject_init(struct module *mod)
+{
+ int retval;
+
+ retval = kobject_set_name(&mod->kobj, mod->name);
+ if (retval < 0)
+ return retval;
+ kobj_set_kset_s(mod, module_subsys);
+ retval = kobject_register(&mod->kobj);
+ if (retval)
+ return retval;
+ retval = sysfs_create_file(&mod->kobj, &mod_refcount.attr);
+ if (retval < 0)
+ remove_kobject_wait(mod);
+ return retval;
+}
+
+static void mod_kobject_remove(struct module *mod)
+{
+ sysfs_remove_file(&mod->kobj, &mod_refcount.attr);
+ remove_kobject_wait(mod);
+}
+
/* Free a module, remove from lists, etc (must hold module mutex). */
static void free_module(struct module *mod)
{
@@ -1079,6 +1170,8 @@ static void free_module(struct module *m
list_del(&mod->list);
spin_unlock_irq(&modlist_lock);
+ mod_kobject_remove(mod);
+
/* Arch-specific cleanup. */
module_arch_cleanup(mod);
@@ -1655,6 +1748,10 @@ static struct module *load_module(void _
if (err < 0)
goto cleanup;
+ err = mod_kobject_init(mod);
+ if (err < 0)
+ goto cleanup;
+
/* Get rid of temporary copy */
vfree(hdr);
In message <[email protected]> you write:
> On Wed, Sep 10, 2003 at 04:45:38PM -0700, Greg KH wrote:
> > To quote from include/linux/moduleparam.h:
> > /* This is the fundamental function for registering boot/module
> > parameters. perm sets the visibility in driverfs: 000 means it's
> > not there, read bits mean it's readable, write bits mean it's
> > writable. */
>
> Any chance to make it always visible and read-only by default with the
> option of making it writable?
Nope. The author specifies exactly what they want, no default. It's
just safer this way: see RMK's concerns about what would happen if we
did it to unsuspecting module authors...
See include/linux/moduleparam.h, especially the module_param() macro.
> Any chance the parameter defaults (if they're not hard coded...) could be
> exposed even if they're not given to the module on the command line? (wish
> list...)
They should be there. It would be nice to have some way of telling
which ones were modified, so that a userspace util could save just
those ones on shutdown, for example. I can't think of an obvious way
of doing this though (mtime > epoch maybe?).
Cheers,
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
In message <[email protected]> you write:
> On Tue, Sep 09, 2003 at 03:24:21PM -0700, Greg KH wrote:
> > A while ago we had talked about adding a kobject to struct module. By
> > doing this we add support for module paramaters and other module info to
> > be exported in sysfs. So here's a patch that does this that is against
> > 2.6.0-test4 (it applies with some fuzz, sorry.)
>
> Please excuse my short-sightedness, but I think the following points
> haven't been thought deeply enough:
Well, obviously not publically enough, at least 8(.
To clarify: this is for new-style module params, which explicitly
control their own visibility with the perm arg.
One more reason for module authors to switch 8)
Hope that helps,
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
In message <[email protected]> you write:
> On Wed, Sep 10, 2003 at 06:07:35PM +1000, Rusty Russell wrote:
> > In message <[email protected]> you write:
> Why are you detaching the kobject from struct module?
> In my patch I accounted for the kobject's reference count in the module
> reference count (just not the count exported to userspace, as to not
> break the userspace tools.) So if a user has a module sysfs file open
> (like the "refcount" file), the module reference count is incremented
> and the module is not allowed to be unloaded until that count drops.
> This removes any race condition with the kobject being in use when the
> module structure is freed.
Sorry, my bad. This is related to another bug: you unregistered the
kobject before checking the reference count. This is bad, because the
remove can fail, and you don't re-register the kobject. Even if we
re-register, now there's a spurious failure as the kobject vanishes
for a moment and reappears. And let's not think about what happens if
trying to re-register the kobject fails 8(
So I think we really do want to unregister the kobject as part of the
cleanup, which makes things a little more complicated: my immediately
previous patch which should do what we want. If it's still not clear,
then I'm obviously doing a really crappy job of explaining...
Thanks,
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
On Thu, Sep 11, 2003 at 11:13:25AM +1000, Rusty Russell wrote:
> > > > But in looking at your patch, I don't see why you want to separate the
> > > > module from the kobject? What benefit does it have?
> > >
> > > The lifetimes are separate, each controlled by their own reference
> > > count. I *know* this will work even if someone holds a reference to
> > > the kobject (for some reason in the future) even as the module is
> > > removed.
> >
> > Correct me if I'm wrong, but this sounds similar to the networking
> > refcount problem. The reference on the containing object is the
> > interesting one, as far as visibility goes. As long as its positive, the
> > module is active.
>
> There are basically two choices: ensure that the reference count is
> taken using try_module_get() (kobject doesn't have an owner field, so
> it does not match this one), or ensure that an object isn't ever
> referenced after the module cleanup function is called.
>
> In this context, that means that the module cleanup must pause until
> the reference count of the kobject hits zero, so it can be freed.
>
> Implementation below.
Ah, nice catch on that bug. I like this implementation.
On a site note, can't you just use a "struct completion" to use for your
waiting? Or do you need to do something special here?
> BTW, The *real* answer IMHO is (this is 2.7 stuff:)
>
> 1) Adopt a faster, smaller implementation of alloc_percpu (this patch
> exists, needs some arch-dependent love for ia64).
> 2) Use it to generalize the current module reference count scheme to
> a "bigref_t" (I have a couple of these)
> 3) Use that in kobjects.
Hm, I don't know if kobjects really need to get that heavy.
> 4) Decide that module removal is not as important as it was, and not
> all modules need be removable (at least in finite time).
> 5) Use the kobject reference count everywhere, including modules.
>
> This would make everything faster, except for the case where someone
> is actually waiting for a refcount to hit zero: for long-lived objects
> like kobjects, this seems the right tradeoff.
As more people use kobjects, I think we'll see some pretty short
lifespans...
But yes, that's all 2.7 dreams :)
thanks,
greg k-h
In message <[email protected]> you write:
> On a site note, can't you just use a "struct completion" to use for your
> waiting? Or do you need to do something special here?
Hmm, *good* question. Think...
Ah, it's because when someone's waiting for the reference count to hit
zero, we wake them *every* time we decrement. With the reference
count spread across every cpu, it's the only way:
static inline void module_put(struct module *module)
{
if (module) {
unsigned int cpu = get_cpu();
local_dec(&module->ref[cpu].count);
/* Maybe they're waiting for us to drop reference? */
if (unlikely(!module_is_live(module)))
wake_up_process(module->waiter);
put_cpu();
}
}
This doesn't really fit with a completion, unfortunately.
> > 1) Adopt a faster, smaller implementation of alloc_percpu (this patch
> > exists, needs some arch-dependent love for ia64).
> > 2) Use it to generalize the current module reference count scheme to
> > a "bigref_t" (I have a couple of these)
> > 3) Use that in kobjects.
>
> Hm, I don't know if kobjects really need to get that heavy.
I'm not sure either: really depends on kobject usage. I was thinking
struct netdevice. The size for UP is the same, the size for SMP is
ptr + sizeof(int) + sizeof(atomic_t)*NR_CPUs.
> But yes, that's all 2.7 dreams :)
Cheers,
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
On Thu, Sep 11, 2003 at 06:18:12PM +1000, Rusty Russell wrote:
> In message <[email protected]> you write:
> > On a site note, can't you just use a "struct completion" to use for your
> > waiting? Or do you need to do something special here?
>
> Hmm, *good* question. Think...
>
> Ah, it's because when someone's waiting for the reference count to hit
> zero, we wake them *every* time we decrement. With the reference
> count spread across every cpu, it's the only way:
>
> static inline void module_put(struct module *module)
> {
> if (module) {
> unsigned int cpu = get_cpu();
> local_dec(&module->ref[cpu].count);
> /* Maybe they're waiting for us to drop reference? */
> if (unlikely(!module_is_live(module)))
> wake_up_process(module->waiter);
> put_cpu();
> }
> }
>
> This doesn't really fit with a completion, unfortunately.
Ah, thanks for the explaination. Makes sense.
greg k-h
On Wed, 2004-02-25 at 10:29, Greg KH wrote:
> So, here's a patch on top of your patch (full patch against 2.6.3 is
> below), that simply makes the module reference file be owned by the
> module that it is assigned to. Yes, this means that when you actually
> read the file value, the reference is 1 greater than when it is closed,
> but that's the breaks. It does solve the "grab the file and then try to
> unload the module" problem, as it simply prevents this from happening.
>
> Comments?
I've rewritten your rewrite again 8)
This patch actually implements module_param in sysfs for modules. You
can see my tests in crypto_null().
What's missing is inbuilt modules. I think /sys/modules/ne2k/debug
should exist whether ne2k is built-in or modular, for example. Since
the only thing we have is a kparam with "ne2k.debug" as its name, there
will have to be some kludgery here, but we can end up with something OK.
Cheers,
Rusty.
--
Anyone who quotes me in their signature is an idiot -- Rusty Russell