2003-01-31 00:11:25

by Rusty Russell

[permalink] [raw]
Subject: [PATCH] Module alias and device table support.

This patch adds MODULE_ALIAS("foo") capability, and uses it to
automatically generate sensible aliases from device tables. The
post-processing is a little rough, but works.

Name: Module alias and device table support
Author: Rusty Russell
Status: Tested on 2.5.59

D: Introduces "MODULE_ALIAS" which modules can use to embed their own
D: aliases for modprobe to use. Also adds a "finishing" step to modules to
D: supplement their aliases based on MODULE_TABLE declarations, eg.
D: 'usb:v0506p4601dl*dh*dc*dsc*dp*ic*isc*ip*' for drivers/usb/net/pegasus.o

diff -urNp --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.59/include/linux/isapnp.h working-2.5.59-alias/include/linux/isapnp.h
--- linux-2.5.59/include/linux/isapnp.h 2003-01-14 10:13:08.000000000 +1100
+++ working-2.5.59-alias/include/linux/isapnp.h 2003-01-30 16:13:12.000000000 +1100
@@ -69,8 +69,9 @@

/* export used IDs outside module */
#define ISAPNP_CARD_TABLE(name) \
- MODULE_GENERIC_TABLE(isapnp_card, name)
+ MODULE_TABLE(isapnp_card, name)

+/* If you change this, you must update scripts/table2alias.c. */
struct isapnp_card_id {
unsigned long driver_data; /* data private to the driver */
unsigned short card_vendor, card_device;
@@ -85,6 +86,7 @@ struct isapnp_card_id {
#define ISAPNP_DEVICE_SINGLE_END \
.card_vendor = 0, .card_device = 0

+/* If you change this, you must update scripts/table2alias.c. */
struct isapnp_device_id {
unsigned short card_vendor, card_device;
unsigned short vendor, function;
diff -urNp --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.59/include/linux/module.h working-2.5.59-alias/include/linux/module.h
--- linux-2.5.59/include/linux/module.h 2003-01-17 17:01:18.000000000 +1100
+++ working-2.5.59-alias/include/linux/module.h 2003-01-30 17:56:37.000000000 +1100
@@ -50,13 +50,14 @@ search_extable(const struct exception_ta
unsigned long value);

#ifdef MODULE
+#define ___module_cat(a,b) a ## b
+#define __module_cat(a,b) ___module_cat(a,b)
+/* For userspace: you can also call me... */
+#define MODULE_ALIAS(alias) \
+ static const char __module_cat(__alias_,__LINE__)[] \
+ __attribute__((section(".modalias"),unused)) = alias

-/* For replacement modutils, use an alias not a pointer. */
#define MODULE_GENERIC_TABLE(gtype,name) \
-static const unsigned long __module_##gtype##_size \
- __attribute__ ((unused)) = sizeof(struct gtype##_id); \
-static const struct gtype##_id * __module_##gtype##_table \
- __attribute__ ((unused)) = name; \
extern const struct gtype##_id __mod_##gtype##_table \
__attribute__ ((unused, alias(__stringify(name))))

@@ -96,6 +97,7 @@ extern const struct gtype##_id __mod_##g

#else /* !MODULE */

+#define MODULE_ALIAS(alias)
#define MODULE_GENERIC_TABLE(gtype,name)
#define THIS_MODULE ((struct module *)0)
#define MOD_INC_USE_COUNT do { } while (0)
diff -urNp --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.59/include/linux/pci.h working-2.5.59-alias/include/linux/pci.h
--- linux-2.5.59/include/linux/pci.h 2003-01-02 14:48:00.000000000 +1100
+++ working-2.5.59-alias/include/linux/pci.h 2003-01-30 16:13:12.000000000 +1100
@@ -491,6 +491,7 @@ struct pbus_set_ranges_data
unsigned long prefetch_start, prefetch_end;
};

+/* If you change this, you must update scripts/table2alias.c. */
struct pci_device_id {
unsigned int vendor, device; /* Vendor and device ID or PCI_ANY_ID */
unsigned int subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */
diff -urNp --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.59/include/linux/usb.h working-2.5.59-alias/include/linux/usb.h
--- linux-2.5.59/include/linux/usb.h 2003-01-17 17:01:18.000000000 +1100
+++ working-2.5.59-alias/include/linux/usb.h 2003-01-30 16:13:12.000000000 +1100
@@ -371,6 +371,7 @@ static inline int usb_make_path (struct
* matches towards the beginning of your table, so that driver_info can
* record quirks of specific products.
*/
+/* If you change this, you must update scripts/table2alias.c. */
struct usb_device_id {
/* which fields to match against? */
__u16 match_flags;
diff -urNp --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.59/scripts/Makefile working-2.5.59-alias/scripts/Makefile
--- linux-2.5.59/scripts/Makefile 2003-01-02 12:45:31.000000000 +1100
+++ working-2.5.59-alias/scripts/Makefile 2003-01-30 16:13:12.000000000 +1100
@@ -8,7 +8,7 @@
# docproc: Preprocess .tmpl file in order to generate .sgml documentation
# conmakehash: Create arrays for initializing the kernel console tables

-host-progs := fixdep split-include conmakehash docproc kallsyms
+host-progs := fixdep split-include conmakehash docproc kallsyms table2alias
build-targets := $(host-progs)

# Let clean descend into subdirs
diff -urNp --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.59/scripts/Makefile.build working-2.5.59-alias/scripts/Makefile.build
--- linux-2.5.59/scripts/Makefile.build 2003-01-17 17:01:18.000000000 +1100
+++ working-2.5.59-alias/scripts/Makefile.build 2003-01-30 17:51:31.000000000 +1100
@@ -175,11 +175,16 @@ endif
quiet_cmd_link_multi-y = LD $@
cmd_link_multi-y = $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) -r -o $@ $(filter $(addprefix $(obj)/,$($(subst $(obj)/,,$(@:.o=-objs))) $($(subst $(obj)/,,$(@:.o=-y)))),$^)

+ifdef CONFIG_HOTPLUG
+module_link_hotplug-multi = `$(CONFIG_SHELL) scripts/extract_aliases [email protected] "$(modname_flags)" $(filter $(addprefix $(obj)/,$($(subst $(obj)/,,$(@:.ko=-objs))) $($(subst $(obj)/,,$(@:.ko=-y)))),$^)`
+module_link_hotplug-single = `$(CONFIG_SHELL) scripts/extract_aliases [email protected] "$(modname_flags)" $<`
+endif
+
quiet_cmd_link_multi-m = LD [M] $@
-cmd_link_multi-m = $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) $(LDFLAGS_MODULE) -o $@ $(filter $(addprefix $(obj)/,$($(subst $(obj)/,,$(@:.ko=-objs))) $($(subst $(obj)/,,$(@:.ko=-y)))),$^) init/vermagic.o
+cmd_link_multi-m = $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) $(LDFLAGS_MODULE) -o $@ $(filter $(addprefix $(obj)/,$($(subst $(obj)/,,$(@:.ko=-objs))) $($(subst $(obj)/,,$(@:.ko=-y)))),$^) $(module_link_hotplug-multi) init/vermagic.o

quiet_cmd_link_single-m = LD [M] $@
-cmd_link_single-m = $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) $(LDFLAGS_MODULE) -o $@ $< init/vermagic.o
+cmd_link_single-m = $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) $(LDFLAGS_MODULE) -o $@ $< init/vermagic.o $(module_link_hotplug-single)

# Don't rebuilt vermagic.o unless we actually are in the init/ dir
ifneq ($(obj),init)
diff -urNp --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.59/scripts/extract_aliases working-2.5.59-alias/scripts/extract_aliases
--- linux-2.5.59/scripts/extract_aliases 1970-01-01 10:00:00.000000000 +1000
+++ working-2.5.59-alias/scripts/extract_aliases 2003-01-30 18:06:59.000000000 +1100
@@ -0,0 +1,21 @@
+#! /bin/sh
+
+# Look for module tables, and if found, put them in the object file
+# and print its name.
+set -e
+
+OUTPUT="$1"
+MODNAME_FLAGS="$2"
+shift 2
+
+$NM --no-sort --print-size --radix=d --print-file-name "$@" |
+ grep '__mod_[a-z_]*_table' |
+ while IFS=": " read FILE OFFSET SIZE TYPE NAME; do
+ scripts/table2alias $NAME $FILE $OFFSET $SIZE
+ done > $OUTPUT.c
+
+if [ -s $OUTPUT.c ]; then
+ $CC $CFLAGS $NOSTDINC_FLAGS $EXTRA_CFLAGS $MODNAME_FLAGS -DMODULE -include include/linux/module.h -c -o $OUTPUT.o $OUTPUT.c
+ echo $OUTPUT.o
+fi
+rm $OUTPUT.c
diff -urNp --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.59/scripts/table2alias.c working-2.5.59-alias/scripts/table2alias.c
--- linux-2.5.59/scripts/table2alias.c 1970-01-01 10:00:00.000000000 +1000
+++ working-2.5.59-alias/scripts/table2alias.c 2003-01-30 17:49:45.000000000 +1100
@@ -0,0 +1,244 @@
+/* Simple code to turn various tables into module aliases.
+ This deals with kernel datastructures where they should be
+ dealt with: in the kernel source.
+ (C) 2002 Rusty Russell IBM Corporation.
+*/
+#include <stdint.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* We need __LITTLE_ENDIAN/__BIG_ENDIAN and BITS_PER_LONG */
+#define __KERNEL__
+#include "../include/asm/byteorder.h"
+#include "../include/asm/types.h"
+
+#if BITS_PER_LONG == 32
+typedef uint32_t kernel_long_t;
+#elif BITS_PER_LONG == 64
+typedef uint64_t kernel_long_t;
+#else
+#error Unknown BITS_PER_LONG
+#endif
+
+/* If we're cross-compiling, we could have any wierd endian
+ combination. Keep it simple. */
+static kernel_long_t __to_native(unsigned char *ptr, unsigned int size)
+{
+ unsigned int i;
+ kernel_long_t ret = 0;
+
+#ifdef __LITTLE_ENDIAN
+ for (i = 0; i < size; i++)
+ ret += ((unsigned long long)ptr[size - 1 - i]) << (i * 8);
+#elif defined(__BIG_ENDIAN)
+ for (i = 0; i < size; i++)
+ ret += ((unsigned long long)ptr[i]) << (i * 8);
+#else
+#error Must be big or little endian.
+#endif
+ return ret;
+}
+
+#define TO_NATIVE(x) (x) = __to_native((void *)&(x), sizeof(x))
+
+#define USB_DEVICE_ID_MATCH_VENDOR 0x0001
+#define USB_DEVICE_ID_MATCH_PRODUCT 0x0002
+#define USB_DEVICE_ID_MATCH_DEV_LO 0x0004
+#define USB_DEVICE_ID_MATCH_DEV_HI 0x0008
+#define USB_DEVICE_ID_MATCH_DEV_CLASS 0x0010
+#define USB_DEVICE_ID_MATCH_DEV_SUBCLASS 0x0020
+#define USB_DEVICE_ID_MATCH_DEV_PROTOCOL 0x0040
+#define USB_DEVICE_ID_MATCH_INT_CLASS 0x0080
+#define USB_DEVICE_ID_MATCH_INT_SUBCLASS 0x0100
+#define USB_DEVICE_ID_MATCH_INT_PROTOCOL 0x0200
+
+struct usb_device_id {
+ /* which fields to match against? */
+ uint16_t match_flags;
+
+ /* Used for product specific matches; range is inclusive */
+ uint16_t idVendor;
+ uint16_t idProduct;
+ uint16_t bcdDevice_lo;
+ uint16_t bcdDevice_hi;
+
+ /* Used for device class matches */
+ uint8_t bDeviceClass;
+ uint8_t bDeviceSubClass;
+ uint8_t bDeviceProtocol;
+
+ /* Used for interface class matches */
+ uint8_t bInterfaceClass;
+ uint8_t bInterfaceSubClass;
+ uint8_t bInterfaceProtocol;
+
+ /* not matched against */
+ kernel_long_t driver_info;
+};
+
+#define ADD(str, sep, cond, field) \
+do { \
+ strcat(str, sep); \
+ if (cond) \
+ sprintf(str + strlen(str), \
+ sizeof(field) == 1 ? "%02X" : \
+ sizeof(field) == 2 ? "%04X" : \
+ sizeof(field) == 4 ? "%08X" : "", \
+ field); \
+ else \
+ sprintf(str + strlen(str), "*"); \
+} while(0)
+
+/* Looks like "usb:vNpNdlNdhNdcNdscNdpNicNiscNipN" */
+static void do_usb_table(struct usb_device_id *ids, unsigned int size,
+ const char *filename)
+{
+ unsigned int i;
+ char alias[200];
+
+ /* Should be exact multiple. */
+ if (size % sizeof(ids[0]))
+ fprintf(stderr, "WARNING: %s USB ids size has %u left\n",
+ filename, size % sizeof(ids[0]));
+ for (i = 0; i < size / sizeof(ids[0]); i++) {
+ TO_NATIVE(ids[i].match_flags);
+ TO_NATIVE(ids[i].idVendor);
+ TO_NATIVE(ids[i].idProduct);
+ TO_NATIVE(ids[i].bcdDevice_lo);
+ TO_NATIVE(ids[i].bcdDevice_hi);
+
+ strcpy(alias, "usb:");
+ ADD(alias, "v", ids[i].match_flags&USB_DEVICE_ID_MATCH_VENDOR,
+ ids[i].idVendor);
+ ADD(alias, "p", ids[i].match_flags&USB_DEVICE_ID_MATCH_PRODUCT,
+ ids[i].idProduct);
+ ADD(alias, "dl", ids[i].match_flags&USB_DEVICE_ID_MATCH_DEV_LO,
+ ids[i].bcdDevice_lo);
+ ADD(alias, "dh", ids[i].match_flags&USB_DEVICE_ID_MATCH_DEV_HI,
+ ids[i].bcdDevice_hi);
+ ADD(alias, "dc", ids[i].match_flags&USB_DEVICE_ID_MATCH_DEV_CLASS,
+ ids[i].bDeviceClass);
+ ADD(alias, "dsc",
+ ids[i].match_flags&USB_DEVICE_ID_MATCH_DEV_SUBCLASS,
+ ids[i].bDeviceSubClass);
+ ADD(alias, "dp",
+ ids[i].match_flags&USB_DEVICE_ID_MATCH_DEV_PROTOCOL,
+ ids[i].bDeviceProtocol);
+ ADD(alias, "ic",
+ ids[i].match_flags&USB_DEVICE_ID_MATCH_INT_CLASS,
+ ids[i].bInterfaceClass);
+ ADD(alias, "isc",
+ ids[i].match_flags&USB_DEVICE_ID_MATCH_INT_SUBCLASS,
+ ids[i].bInterfaceSubClass);
+ ADD(alias, "ip",
+ ids[i].match_flags&USB_DEVICE_ID_MATCH_INT_PROTOCOL,
+ ids[i].bInterfaceProtocol);
+ /* Always end in a wildcard, for future extension */
+ if (alias[strlen(alias)-1] != '*')
+ strcat(alias, "*");
+ printf("MODULE_ALIAS(\"%s\");\n", alias);
+ }
+}
+
+#define PCI_ANY_ID (~0)
+
+struct pci_device_id {
+ unsigned int vendor, device; /* Vendor and device ID or PCI_ANY_ID */
+ unsigned int subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */
+ unsigned int class, class_mask; /* (class,subclass,prog-if) triplet */
+ kernel_long_t driver_data; /* Data private to the driver */
+};
+
+/* Looks like: pci:vNdNsvNsdNcN. */
+static void do_pci_table(struct pci_device_id *ids, unsigned int size,
+ const char *filename)
+{
+ unsigned int i;
+ char alias[200];
+
+ /* Should be exact multiple. */
+ if (size % sizeof(ids[0]))
+ fprintf(stderr, "WARNING: %s PCI ids size has %u left\n",
+ filename, size % sizeof(ids[0]));
+ for (i = 0; i < size / sizeof(ids[0]); i++) {
+ TO_NATIVE(ids[i].vendor);
+ TO_NATIVE(ids[i].device);
+ TO_NATIVE(ids[i].subvendor);
+ TO_NATIVE(ids[i].subdevice);
+ TO_NATIVE(ids[i].class);
+ TO_NATIVE(ids[i].class_mask);
+
+ strcpy(alias, "pci:");
+ ADD(alias, "v", ids[i].vendor != PCI_ANY_ID, ids[i].vendor);
+ ADD(alias, "d", ids[i].device != PCI_ANY_ID, ids[i].device);
+ ADD(alias, "sv", ids[i].subvendor != PCI_ANY_ID, ids[i].subvendor);
+ ADD(alias, "sd", ids[i].subdevice != PCI_ANY_ID, ids[i].subdevice);
+ if (ids[i].class_mask != 0 && ids[i].class_mask != ~0) {
+ fprintf(stderr,
+ "Can't handle strange class_mask in %s:%04X\n",
+ filename, ids[i].class_mask);
+ exit(1);
+ }
+ ADD(alias, "c", ids[i].class != PCI_ANY_ID, ids[i].subvendor);
+ /* Always end in a wildcard, for future extension */
+ if (alias[strlen(alias)-1] != '*')
+ strcat(alias, "*");
+ printf("MODULE_ALIAS(\"%s\");\n", alias);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int ret, fd;
+ unsigned int size, offset;
+ void *file;
+
+ if (argc != 5) {
+ fprintf(stderr,
+ "Usage: table2alias <type> <file> <offset> <size>\n"
+ " Where type is __mod_{pci, usb}_device_table.\n");
+ exit(1);
+ }
+
+ /* Suck it in. */
+ offset = atoi(argv[3]);
+ size = atoi(argv[4]);
+ file = malloc(size);
+
+ fd = open(argv[2], O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "opening %s: %s\n", argv[2], strerror(errno));
+ exit(1);
+ }
+ if (lseek(fd, offset, SEEK_SET) == (off_t)-1) {
+ fprintf(stderr, "seeking to %u in %s: %s\n",
+ offset, argv[2], strerror(errno));
+ exit(1);
+ }
+
+ offset = 0;
+ while (offset < size) {
+ ret = read(fd, file+offset, size-offset);
+ if (ret < 0) {
+ fprintf(stderr, "reading from %s: %s\n",
+ argv[2], strerror(errno));
+ exit(1);
+ }
+ offset += ret;
+ }
+
+ if (strcmp(argv[1], "__mod_usb_device_table") == 0)
+ do_usb_table(file, size, argv[2]);
+ else if (strcmp(argv[1], "__mod_pci_device_table") == 0)
+ do_pci_table(file, size, argv[2]);
+ else {
+ fprintf(stderr, "table2alias: unknown type %s\n", argv[1]);
+ exit(1);
+ }
+ exit(0);
+}

--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.


2003-01-31 06:14:42

by Kai Germaschewski

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

On Fri, 31 Jan 2003, Rusty Russell wrote:

> This patch adds MODULE_ALIAS("foo") capability, and uses it to
> automatically generate sensible aliases from device tables. The
> post-processing is a little rough, but works.
>
> Name: Module alias and device table support
> Author: Rusty Russell
> Status: Tested on 2.5.59
>
> D: Introduces "MODULE_ALIAS" which modules can use to embed their own
> D: aliases for modprobe to use. Also adds a "finishing" step to modules to
> D: supplement their aliases based on MODULE_TABLE declarations, eg.
> D: 'usb:v0506p4601dl*dh*dc*dsc*dp*ic*isc*ip*' for drivers/usb/net/pegasus.o

Some comments:
o First of all, we're basically moving depmod functionality into the
kernel tree, which I regard as a good thing, since we have to deal
with actual kernel structures here. (The obvious disadvantage is that
this makes it much easier to change these kernel structures, which
breaks compatibility with other (user space) tools who expect a certain
format)
I was wondering if it's not somewhat kludgy to add info into an ELF
section into a module, just to have it extracted again by modutils
shortly afterwards - the alternative would be to have the kernel
generate modules.*map directly (or rather modules.alias now).
However, I guess I'm convinced now that this is per-module information
and should be kept as such. To avoid all kinds of trouble with separate
files (module.ko and module.alias), it's probably really best to store
it into an elf section directly.
o My nm (RH 7.2 or .3, GNU nm 2.11.90.0.8) doesn't support --print-size.
That'll probably affect many users.
o What about collecting the struct xxx_device_id definitions into some
header which could be included from the userspace code extracting
the info instead of duplicating it. Still not quite fool-proof, but
better than duplicating the info.
o I think it'd be a good time to consider naming these sections e.g.
"__discard.modalias", the license one "__discard.license" and have
the kernel module loader discard "__discard*", so that it doesn't
need to be aware of all that special crap, nor waste space for it.
(Well, it needs to know about the license, anyway, so that's not such
a good example).
o I'm not totally happy with the integration into the build system yet,
but it'll clash with the module versioning changes anyway ;)

The modversions patch introduces a postprocessing stage for modules, which
currently will only be invoked with CONFIG_MODVERSIONS set. However, I'm
considering to make that pass mandatory either way. It basically obtains
the list of all modules from the earlier stage, so it doesn't recurse and
can thus be very fast. I'm currently coding the actual versioning process
in C, since the shell / sed / grep based solution's performance isn't
exactly great. In doing that, I already notice unresolved symbols and warn
about them, which I think is an improvement to the build process, missing
EXPORT_SYMBOL()s tend to go unnoticed quite often otherwise.

Doing this postprocessing unconditionally would allow to generate the
alias tables at this point as well.

And while we're at it, we could add another section which specifies which
other modules this module depends on (a.k.a which symbols it uses), making
depmod kinda obsolete.

--Kai

2003-01-31 09:36:03

by Horst von Brand

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

Kai Germaschewski <[email protected]> said:
> On Fri, 31 Jan 2003, Rusty Russell wrote:
>
> > This patch adds MODULE_ALIAS("foo") capability, and uses it to
> > automatically generate sensible aliases from device tables. The
> > post-processing is a little rough, but works.

I fail to see why a module would have to declare aliases for itself.
Aliases are an userspace/after boot problem (i.e., which one is eth0?,
etc), so this means having _two_ (three?) ways of getting the same kind of
info (in-module/in-kernel, /etc/module.somethingortheother). Not nice.

> > Name: Module alias and device table support
> > Author: Rusty Russell
> > Status: Tested on 2.5.59
> >
> > D: Introduces "MODULE_ALIAS" which modules can use to embed their own
> > D: aliases for modprobe to use. Also adds a "finishing" step to modules to
> > D: supplement their aliases based on MODULE_TABLE declarations, eg.
> > D: 'usb:v0506p4601dl*dh*dc*dsc*dp*ic*isc*ip*' for drivers/usb/net/pegasus.o
>
> Some comments:
> o First of all, we're basically moving depmod functionality into the
> kernel tree, which I regard as a good thing, since we have to deal
> with actual kernel structures here. (The obvious disadvantage is that
> this makes it much easier to change these kernel structures, which
> breaks compatibility with other (user space) tools who expect a certain
> format)

It doesn't "move", it "replicates into". Not nice at all.

[...]

> o I think it'd be a good time to consider naming these sections e.g.
> "__discard.modalias", the license one "__discard.license" and have
> the kernel module loader discard "__discard*", so that it doesn't
> need to be aware of all that special crap, nor waste space for it.
> (Well, it needs to know about the license, anyway, so that's not such
> a good example).

Good idea.
--
Dr. Horst H. von Brand User #22616 counter.li.org
Departamento de Informatica Fono: +56 32 654431
Universidad Tecnica Federico Santa Maria +56 32 654239
Casilla 110-V, Valparaiso, Chile Fax: +56 32 797513

2003-01-31 15:49:45

by Ingo Oeser

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

On Fri, Jan 31, 2003 at 10:41:36AM +0100, Horst von Brand wrote:
> I fail to see why a module would have to declare aliases for itself.
> Aliases are an userspace/after boot problem (i.e., which one is eth0?,
> etc) Please read the FAQ at http://www.tux.org/lkml/

I second this. A module can declare, what it provides (e.g. ethX,
scsi-host-adapter), but what is loaded for each actual device
should be decided be be user space (/sbin/hotplug?).

Identification, enumeration and classification is fine in the
kernel, but assigning actual devices to each driver
(e.g. host-adapter-A to a request_module("scsi-host-adapter"))
should be done by user space, where important.

Regards

Ingo Oeser
--
Science is what we can tell a computer. Art is everything else. --- D.E.Knuth

2003-01-31 22:25:29

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

Hi,

On Fri, 31 Jan 2003, Kai Germaschewski wrote:

> exactly great. In doing that, I already notice unresolved symbols and warn
> about them, which I think is an improvement to the build process, missing
> EXPORT_SYMBOL()s tend to go unnoticed quite often otherwise.

The problem here is that we use System.map, it's not that difficult to
extract the exported symbols:
objcopy -j .kstrtab -O binary vmlinux .export.tmp
tr \\0 \\n < .export.tmp > Export.map

> Doing this postprocessing unconditionally would allow to generate the
> alias tables at this point as well.
>
> And while we're at it, we could add another section which specifies which
> other modules this module depends on (a.k.a which symbols it uses), making
> depmod kinda obsolete.

It makes sense to keep depmod close to the linker, as both need the same
knowledge about resolving symbols, but I still don't know why that would
be a reason to put it into the kernel.
It doesn't really matter if that information is generated during build or
at install, it just has to be at /lib/module/`uname -r` in a way modprobe
understands. BTW for my taste modprobe has too much knowledge about the
module layout, which actually belongs to the linker.

I finally looked a bit closer at the module alias. The possibility of
wildcards is certainly interesting, but besides of this it looks to me as
if we exchange one crutch with another. The alias string is too static
and cryptic. Adding information to it requires changes at too many places
(let alone adding information dynamically).
What I'd really like to see is a really generic but still simple system to
match devices and drivers, e.g. describing properties like this:

bus=usb
vendor=0x1234
product=0x4321
device=1-3,5

Forcing the matching onto modprobe doesn't look like a good idea to me, as
IMO it takes too much away from hotplug. The alias string is not usable
for hotplug, but above properties can be used to trigger other operations
beside module loading.

bye, Roman

2003-02-01 00:38:44

by Kai Germaschewski

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

On Fri, 31 Jan 2003, Roman Zippel wrote:

> On Fri, 31 Jan 2003, Kai Germaschewski wrote:
>
> > exactly great. In doing that, I already notice unresolved symbols and warn
> > about them, which I think is an improvement to the build process, missing
> > EXPORT_SYMBOL()s tend to go unnoticed quite often otherwise.
>
> The problem here is that we use System.map, it's not that difficult to
> extract the exported symbols:
> objcopy -j .kstrtab -O binary vmlinux .export.tmp
> tr \\0 \\n < .export.tmp > Export.map

What you say is right (except that it misses symbols exported from
modules), but I don't see what you mean the problem is?

> It makes sense to keep depmod close to the linker, as both need the same
> knowledge about resolving symbols, but I still don't know why that would
> be a reason to put it into the kernel.

Well, I hope you mean into the kernel tree, it sure doesn't make sense to
put it into the kernel itself.

Anyway, I think rusty's approach is to deal with the kernel-internal data
structures from inside the kernel tree (during the build, that is) and
generate data in a fixed format (.modalias) for depmod to read. Since
depmod is external, it needs a fixed interface. Makes sense to me.

--Kai


2003-02-01 01:13:55

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

Hi,

On Fri, 31 Jan 2003, Kai Germaschewski wrote:

> > > missing
> > > EXPORT_SYMBOL()s tend to go unnoticed quite often otherwise.
> >
> > The problem here is that we use System.map, it's not that difficult to
> > extract the exported symbols:
> > objcopy -j .kstrtab -O binary vmlinux .export.tmp
> > tr \\0 \\n < .export.tmp > Export.map
>
> What you say is right (except that it misses symbols exported from
> modules), but I don't see what you mean the problem is?

See above, maybe I quoted to much. The other exported symbols are
already extracted by depmod, so it had exactly the information it needs
and would give more correct warnings.

> > It makes sense to keep depmod close to the linker, as both need the same
> > knowledge about resolving symbols, but I still don't know why that would
> > be a reason to put it into the kernel.
>
> Well, I hope you mean into the kernel tree, it sure doesn't make sense to
> put it into the kernel itself.
>
> Anyway, I think rusty's approach is to deal with the kernel-internal data
> structures from inside the kernel tree (during the build, that is) and
> generate data in a fixed format (.modalias) for depmod to read. Since
> depmod is external, it needs a fixed interface. Makes sense to me.

You have to define a fixed format somewhere anyway, either you have to do
it for depmod or for modprobe. This only moves the problem around and if
we already break interfaces, we should look at all the possibilities.
What I'm really missing is an analysis of the problem(s) and a description
of how the solution solves it. After reading most of the patches I think I
understand what Rusty is trying to do, but I still think there are better
solutions, unfortunately Rusty doesn't talk with me anymore :(, if anyone
else knows what I'm doing wrong, I'd be really happy to know about it.

bye, Roman

2003-02-01 02:49:41

by Kai Germaschewski

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

On Sat, 1 Feb 2003, Roman Zippel wrote:

> > > > missing
> > > > EXPORT_SYMBOL()s tend to go unnoticed quite often otherwise.
> > >
> > > The problem here is that we use System.map, it's not that difficult to
> > > extract the exported symbols:
> > > objcopy -j .kstrtab -O binary vmlinux .export.tmp
> > > tr \\0 \\n < .export.tmp > Export.map
> >
> > What you say is right (except that it misses symbols exported from
> > modules), but I don't see what you mean the problem is?
>
> See above, maybe I quoted to much. The other exported symbols are
> already extracted by depmod, so it had exactly the information it needs
> and would give more correct warnings.

The exported symbols can be extracted just as easily from System.map as
from vmlinux, so I think I still don't understand your point. (And chances
are higher that System.map is in /boot than an uncompressed vmlinux).

depmod does give correct warnings, but only at modules install time, not
at modules build time, that's what I was trying to say.

--Kai


2003-02-01 07:12:04

by Kai Germaschewski

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

On Sat, 1 Feb 2003, Rusty Russell wrote:

> > o First of all, we're basically moving depmod functionality into the
> > kernel tree, which I regard as a good thing, since we have to deal
> > with actual kernel structures here. (The obvious disadvantage is that
> > this makes it much easier to change these kernel structures, which
> > breaks compatibility with other (user space) tools who expect a certain
> > format)
>
> Yes, but people already expect to run depmod at boot, and I haven't
> made depmod safe for cross compiling. It could be done, but is it
> worth it? I don't know.

Well, I don't necessarily mean to kill all of depmod, I guess it still
makes sense to have userspace code extract and contract dependency and
alias information, so that modprobe doesn't need to open all modules just
to find the one which is requested.

It's not mandatory to use depmod though, it's e.g. possible someone comes
up with code which composes an initramfs on the fly, that might be
perfectly happy with extracting the information directly from the modules.

> BTW, the reason for using the alias mechanism is that aliases are
> useful in themselves: consider you write a "new_foo" driver, you can
> do "MODULE_ALIAS("foo")" and so no userspace changes are neccessary.
> module-init-tools 0.9.8 already supported this.

Yup, that's nice.

> > o My nm (RH 7.2 or .3, GNU nm 2.11.90.0.8) doesn't support --print-size.
> > That'll probably affect many users.
>
> OK. Fortunately I have a new version of the table2alias program which
> takes the elf object directly, anyway, which has the benifit of being
> faster, too.

Alright. I think we're heading towards a generic postprocessor here, which
takes the .o, extracts information as necessary and generates some .c file
which contains e.g. checksums for the unresolved symbols (when MODVERSIONS
is selected), a section to record which modules we depend on, an alias
section etc. This .c is then compiled and linked into the final .ko

table2alias would then be just another module in this postprocessor.
(My current version is in C already but calls "nm" to extract symbol info.
If we put in the code from your new table2alias, I suppose we can instead
open the object directly and find the information as necessary)

> I prefer to keep special symbols out of section names, so we can do
> nice tricks later with __start_. So __discard_modalias would be my
> preference if we're going to change it.

Fine with me. Since it's not being actually used yet, if you agree on
changing it, let's do it now ;)
>
> > o I'm not totally happy with the integration into the build system yet,
> > but it'll clash with the module versioning changes anyway ;)
>
> Yeah, I thought you'd say that 8). I consider this to be after
> modversions in the queue, and I don't want to overload you.

Yup, I think modversions should have a little time to settle first.
There's really only one tricky point with modversions (and the other stuff
above), i.e. we need a complete list of all modules. With people
playing tricks with "make SUBDIRS=..." that needs some care to not go
accidentally wrong.

> We can already figure what symbols it uses in depmod: the original
> modprobe did just that, but Adam Richter complained about speed with
> 1200 modules (sure, it's < 1 second for most people, but Debian on an
> old 486 would suck hard).

Well, just reading the symbols from my 100 modules takes about 1.5 secs on
the laptop here, and that's with everything in cache, so I think Adam was
right there ;)

--Kai

2003-02-01 10:22:48

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

Hi,

On Fri, 31 Jan 2003, Kai Germaschewski wrote:

> depmod does give correct warnings, but only at modules install time, not
> at modules build time, that's what I was trying to say.

depmod certainly could give correct warnings, but currently it doesn't
because System.map contains not only exported symbols.

bye, Roman

2003-02-01 10:46:47

by Arnd Bergmann

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

Kai Germaschewski wrote:

> Alright. I think we're heading towards a generic postprocessor here, which
> takes the .o, extracts information as necessary and generates some .c file
> which contains e.g. checksums for the unresolved symbols (when MODVERSIONS
> is selected), a section to record which modules we depend on, an alias
> section etc. This .c is then compiled and linked into the final .ko
...
> Yup, I think modversions should have a little time to settle first.
> There's really only one tricky point with modversions (and the other stuff
> above), i.e. we need a complete list of all modules. With people
> playing tricks with "make SUBDIRS=..." that needs some care to not go
> accidentally wrong.

Worse that "make SUBDIRS=...", what do you think can be done about third
party modules? After all, I thought they are what modversions are about.
I don't see how you can reliably find the list of required modules when
you build a module outside of the kernel tree.

Arnd <><

2003-02-02 15:19:11

by Horst von Brand

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

Rusty Russell <[email protected]> said:

[...]

> BTW, the reason for using the alias mechanism is that aliases are
> useful in themselves: consider you write a "new_foo" driver, you can
> do "MODULE_ALIAS("foo")" and so no userspace changes are neccessary.
> module-init-tools 0.9.8 already supported this.

May I respectfully disagree again?

This is fundamentally broken, as it takes away the possibility of me
(sysadmin) to load foo or old_foo. I end up with an (useless) foo, and a
new_foo that aliases for foo, and soon I'd have even_newer_foo masquerading
as foo too, and all hell breaks loose. The effect is bloat over just
deleting foo in the first place, as it can't be used at all now.

I remember a few cases of renamed modules (yes, annoying like hell) and
very few cases of old_foo and new_foo coexisting. In the last case the
_user_ had to decide if old_foo or new_foo worked better on their machine,
no amount of kbuild ESP would do, no distribution could set this up sanely.

The case where this might be useful are very far in between (if they even
exist), and the potential for subtle, invisible, breakage is just too high
IMVHO.

Please axe it.
--
Dr. Horst H. von Brand User #22616 counter.li.org
Departamento de Informatica Fono: +56 32 654431
Universidad Tecnica Federico Santa Maria +56 32 654239
Casilla 110-V, Valparaiso, Chile Fax: +56 32 797513

2003-02-03 02:17:39

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

In message <[email protected]> you write:
> Rusty Russell <[email protected]> said:
>
> [...]
>
> > BTW, the reason for using the alias mechanism is that aliases are
> > useful in themselves: consider you write a "new_foo" driver, you can
> > do "MODULE_ALIAS("foo")" and so no userspace changes are neccessary.
> > module-init-tools 0.9.8 already supported this.
>
> May I respectfully disagree again?

Hi Horst,

Thoughtful and respecful criticism? I didn't think that was
allowed on linux-kernel any more? 8)

> This is fundamentally broken, as it takes away the possibility of me
> (sysadmin) to load foo or old_foo. I end up with an (useless) foo, and a
> new_foo that aliases for foo, and soon I'd have even_newer_foo masquerading
> as foo too, and all hell breaks loose. The effect is bloat over just
> deleting foo in the first place, as it can't be used at all now.

Well, "modprobe foo" will only give you the "new_foo" driver if (1) the
foo driver isn't found, and (2) the new driver author decides that
it's a valid replacement.

Whether (2) is ever justified, I'm happy leaving to the individual
author (I know, that makes me a wimp).

Consider another example: convenience aliases such as char-major-xxx.
Now, I'm not convinced they're a great idea anyway, but if people are
going to do this, I'd rather they did it in the kernel, rather than
some random userspace program.

I think the alias mechanism is valid, but you have a point about the
dangers, too.

Thoughts?
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2003-02-03 02:40:17

by John Levon

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

On Mon, Feb 03, 2003 at 11:52:57AM +1100, Rusty Russell wrote:

> Well, "modprobe foo" will only give you the "new_foo" driver if (1) the
> foo driver isn't found, and (2) the new driver author decides that
> it's a valid replacement.

It's not the driver author's decision as to which module an admin would
like to use. This just seems to make things a lot more awkward.

> going to do this, I'd rather they did it in the kernel, rather than
> some random userspace program.

Can you explain why please ?

regards
john

2003-02-03 08:25:59

by Horst von Brand

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

Rusty Russell <[email protected]> said:
> Horst von Brand wrote:
> > Rusty Russell <[email protected]> said:

> > [...]
> >
> > > BTW, the reason for using the alias mechanism is that aliases are
> > > useful in themselves: consider you write a "new_foo" driver, you can
> > > do "MODULE_ALIAS("foo")" and so no userspace changes are neccessary.
> > > module-init-tools 0.9.8 already supported this.
> >
> > May I respectfully disagree again?
>
> Hi Horst,
>
> Thoughtful and respecful criticism? I didn't think that was
> allowed on linux-kernel any more? 8)

Sorry about that. Won't happen again, I promise.

> > This is fundamentally broken, as it takes away the possibility of me
> > (sysadmin) to load foo or old_foo. I end up with an (useless) foo, and a
> > new_foo that aliases for foo, and soon I'd have even_newer_foo masquerading
> > as foo too, and all hell breaks loose. The effect is bloat over just
> > deleting foo in the first place, as it can't be used at all now.
>
> Well, "modprobe foo" will only give you the "new_foo" driver if (1) the
> foo driver isn't found, and (2) the new driver author decides that
> it's a valid replacement.

So the alias only works if the original isn't found? Weird... I'd just
rename the dang thing and get over it. A distribution kernel won't be able
to use this anyway, as they'll either build both alternatives or just one
of them and adjust configuration to match.

> Whether (2) is ever justified, I'm happy leaving to the individual
> author (I know, that makes me a wimp).

Don't trust authors too much when it comes to guessing at random individual
installations... ;-)

> Consider another example: convenience aliases such as char-major-xxx.
> Now, I'm not convinced they're a great idea anyway, but if people are
> going to do this, I'd rather they did it in the kernel, rather than
> some random userspace program.

The module munging programs and their configuration are (logically) a part
of the kernel (configuration). So this goes against the current wave of
exporting as much as possible from the kernel. And IMHO it places policy
into the kernel, where it has no place. Plus it enlarges modules, which is
a consideration for installation/rescue media.

> I think the alias mechanism is valid, but you have a point about the
> dangers, too.
>
> Thoughts?

Maybe I'm just being a bit too conservative here. But I still think this is
too dangerous for little (or even no) real gain.

Could you please provide examples of use in generic, distribution kernels?
Contrast with configuration in /etc/modules.conf and/or modprobe (I think
placing this stuff in modprobe is wrong, but that is the way it is today).
--
Dr. Horst H. von Brand User #22616 counter.li.org
Departamento de Informatica Fono: +56 32 654431
Universidad Tecnica Federico Santa Maria +56 32 654239
Casilla 110-V, Valparaiso, Chile Fax: +56 32 797513

2003-02-03 10:44:15

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

In message <[email protected]> you write:
> Rusty Russell <[email protected]> said:
> > Well, "modprobe foo" will only give you the "new_foo" driver if (1) the
> > foo driver isn't found, and (2) the new driver author decides that
> > it's a valid replacement.
>
> So the alias only works if the original isn't found?

It's defined to be that way for aliases taken from the modules
themselves, for this very reason: the admin has no control over it.

It's undefined for the "in the config file case" (they curently *do*
override, but that's an implementation detail). It'd be clearer to
explicitly say "you can't override module names with "alias", use
"install" instead, IMHO.

> Weird... I'd just
> rename the dang thing and get over it. A distribution kernel won't be able
> to use this anyway, as they'll either build both alternatives or just one
> of them and adjust configuration to match.

I'm not so sure. There have been several cases where a more than one
driver supports the same card, but the old one is kept around "just in
case". Backwards compatibility during such a transition would be
really nice.

In most cases the admin, not the distribution, is the one setting the
module options: losing them when you upgrade the kernel is not good.

> > Whether (2) is ever justified, I'm happy leaving to the individual
> > author (I know, that makes me a wimp).
>
> Don't trust authors too much when it comes to guessing at random individual
> installations... ;-)

Well, true, but if they don't, there's a deeper problem.

> > Consider another example: convenience aliases such as char-major-xxx.
> > Now, I'm not convinced they're a great idea anyway, but if people are
> > going to do this, I'd rather they did it in the kernel, rather than
> > some random userspace program.
>
> The module munging programs and their configuration are (logically) a part
> of the kernel (configuration). So this goes against the current wave of
> exporting as much as possible from the kernel.

Well, one major point of the module rewrite is that kernel internals
belong in the kernel sources. If you disagree with that, we're
probably not going to make progress.

> And IMHO it places policy into the kernel, where it has no place.

I try to avoid such fuzzy discussions, as they are rarely benificial.

I would point suggest that you grep for "request_module" in order to
understand (1) where policy already is in the kernel, (2) why it is in
the kernel, and (3) why this suggestion merely centralizes it.

> Plus it enlarges modules, which is a consideration for
> installation/rescue media.

Now I think you're really grasping at straws, but you could always use
"strip -R .modinfo" if you want to save ~20 bytes.

> Maybe I'm just being a bit too conservative here. But I still think this is
> too dangerous for little (or even no) real gain.

Possibly. However I beg you to consider how you would introduce a new
cypher into 2.6.3.

> Could you please provide examples of use in generic, distribution kernels?
> Contrast with configuration in /etc/modules.conf and/or modprobe (I think
> placing this stuff in modprobe is wrong, but that is the way it is today).

Ignoring the hotplug stuff which is going to use it, consider adding a
new binary format for XYZ3000 compatibility. For 2.4, you have to do:

1) Write the new binfmt_XYZ3000 module.

2) Write a patch to the modutils to place it in the built-in
modules config. Keith's quite receptive with this.

3) Tell your users to upgrade modutils or place "alias
binfmt-764 binfmt_XYZ3000" in their /etc/modules.conf (or
/etc/modprobe.d/local for Debian).

For 2.5:
1) Write the new binfmt_XYZ3000 module.

2) Place MODULE_ALIAS("binfmt-764") at the bottom.

Hope that clarifies?
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2003-02-03 10:44:11

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

In message <[email protected]> you write:
> On Mon, Feb 03, 2003 at 11:52:57AM +1100, Rusty Russell wrote:
>
> > Well, "modprobe foo" will only give you the "new_foo" driver if (1) the
> > foo driver isn't found, and (2) the new driver author decides that
> > it's a valid replacement.
>
> It's not the driver author's decision as to which module an admin would
> like to use. This just seems to make things a lot more awkward.

I disagree.

"insmod foo" will *always* get foo. The only exception is when "foo"
doesn't exist, in which case modprobe looks for another module which
explicitly says it can serve in the place of foo.

This allows smooth transition when a driver is superceded, *if* the
new author wants it.

> > going to do this, I'd rather they did it in the kernel, rather than
> > some random userspace program.
>
> Can you explain why please ?

Sure, but you cut the vital bit of my mail. Currently we have (1)
request_module() which is used in various cases to request a service,
and (2) aliases like "char-major-36", which modprobe.conf (or the old
modutils' builtin) says is "netlink". If you introduce a new char
major (or, say a new cypher, or new network family, etc), you currenly
have to get everyone to include it in their configuration file.

Now, the netlink module *knows* it provides char-major-36: with
MODULE_ALIAS() it can say so.

Obviously, there is a place for aliases which are configured by the
user: they are definitely not going away. But many are simple
enumerations, which are currently duplicated external to the kernel
sources.

So I think it's a good idea, even if using it to replace drivers is
insane...

Does that clarify?
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2003-02-03 13:31:52

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

Hi,

> > Consider another example: convenience aliases such as char-major-xxx.
> > Now, I'm not convinced they're a great idea anyway, but if people are
> > going to do this, I'd rather they did it in the kernel, rather than
> > some random userspace program.
>
> The module munging programs and their configuration are (logically) a part
> of the kernel (configuration). So this goes against the current wave of
> exporting as much as possible from the kernel. And IMHO it places policy
> into the kernel, where it has no place. Plus it enlarges modules, which is
> a consideration for installation/rescue media.

Maybe it helps to put this into a larger perspective, because you both
have valid points.
Currently the kernel has two mechanisms to request a module (modprobe and
hotplug) and these also have different ways to map the request to a module
name. modprobe needs a hardcoded list of module names, so it e.g. knows
that it should map net-pf-1 to unix. OTOH we generate such a mapping for
hotplug, but here the mapping is very device specific and requires
knowledge about kernel structures.
If module loading were the only problem, the alias mechanism would be a
good solution. We could remove hotplug and let modprobe do the job.
Unfortunately it's not that easy, as we might want to extend hotplug to a
more generic event mechanism, which e.g. could be used to replace devfs.
This means we not only have "load the driver for this new device on this
bus" events, but also "generate the device nodes for this new driver"
events. In this context the module alias encoding would be very limited.
So what actually has to be discussed/decided, whether it's ok to special
case module loading or if we want a generic kernel event mechanism, which
can map any kind of event to some action. Until this isn't decided, it
makes little sense to discuss details.

bye, Roman

2003-02-04 08:00:13

by Horst von Brand

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

Rusty Russell <[email protected]> said:
> In message <[email protected]> you write:
> > Rusty Russell <[email protected]> said:
> > > Well, "modprobe foo" will only give you the "new_foo" driver if (1) the
> > > foo driver isn't found, and (2) the new driver author decides that
> > > it's a valid replacement.
> >
> > So the alias only works if the original isn't found?
>
> It's defined to be that way for aliases taken from the modules
> themselves, for this very reason: the admin has no control over it.

That's exactly what bothers me. This gives several ways to go to the same
place, and the rules of which one applies when _will_ confuse your friendy
local BOFH (who does 237 other things besides messing with modules).

> It's undefined for the "in the config file case" (they curently *do*
> override, but that's an implementation detail). It'd be clearer to
> explicitly say "you can't override module names with "alias", use
> "install" instead, IMHO.

Urgh. What is "alias" then for? It has been used for ages as a way of "call
module foo by name bar, possibly with this further arguments". Why change
that gratuitously?

> > Weird... I'd just
> > rename the dang thing and get over it. A distribution kernel won't be able
> > to use this anyway, as they'll either build both alternatives or just one
> > of them and adjust configuration to match.

> I'm not so sure. There have been several cases where a more than one
> driver supports the same card, but the old one is kept around "just in
> case". Backwards compatibility during such a transition would be
> really nice.

Yep. foo --> old-foo, (new) foo is ... foo! ;-)

Or just like today: foo stays foo, new foo is nfoo. When (if) nfoo ends up
stable enough, and works everywhere, and can be taken as an all-around
replacement for old foo, old foo gets the axe (eventually). AFAIR, there
has never been a case of "new driver works everywhere, period" that was not
just a replacement for the old one. If you have both around, you _don't_
want the driver author(s) deciding which one to call foo.. If there is just
one, no problem.

> In most cases the admin, not the dist ribution, is the one setting the
> module options: losing them when you upgrade the kernel is not good.

Exactly. The admin won't be messing around with MODULE_ALIAS macros for a
random collection of modules everytime she downloads latest-n-greatest, but
she will certainly check /etc/modules.conf (just as today); she won't want
to recompile the distribution kernel just to fix wrong aliases. People
backward compatibility and minimal upgrading pain (to get your random BOFH
to recompile a kernel is _not_ trivial today!) is much more important than
code backward compatibility, IMHO. [I'm speaking from the perspective of
the user/sysadmin, _not_ the kernel hacker here].

> > > Whether (2) is ever justified, I'm happy leaving to the individual
> > > author (I know, that makes me a wimp).

> > Don't trust authors too much when it comes to guessing at random individual
> > installations... ;-)

> Well, true, but if they don't, there's a deeper problem.

Wellcome to the real world. There are literally thnousands of different
configurations out there. I wouldn't dare cast any part of the
configuration in binary for use everywhere.

> > > Consider another example: convenience aliases such as char-major-xxx.
> > > Now, I'm not convinced they're a great idea anyway, but if people are
> > > going to do this, I'd rather they did it in the kernel, rather than
> > > some random userspace program.

> > The module munging programs and their configuration are (logically) a part
> > of the kernel (configuration). So this goes against the current wave of
> > exporting as much as possible from the kernel.

> Well, one major point of the module rewrite is that kernel internals
> belong in the kernel sources. If you disagree with that, we're
> probably not going to make progress.

Kernel internals yes; kernel configuration no. Several different ways of
doing the same thing, with subtle overridings and invisible/out of reach
parts is no-no-no in my book.

[...]

> > Maybe I'm just being a bit too conservative here. But I still think this is
> > too dangerous for little (or even no) real gain.
>
> Possibly. However I beg you to consider how you would introduce a new
> cypher into 2.6.3.

How were new cyphers added in 2.5.x recently? No need for strange aliases
in-kernel, they get integrated via API. Outside the kernel, leave it to the
sysadmin, explicitly. If such an addition is done, I'd want to control the
use of the new cypher myself (because I don't upgrade all machines at the
same time, whatever), not just getting it suddenly used everywhere when I
mention "cypher", and I get to run around in circles trying to find out why
shiny new A isn't talking to old B anymore.

> > Could you please provide examples of use in generic, distribution kernels?
> > Contrast with configuration in /etc/modules.conf and/or modprobe (I think
> > placing this stuff in modprobe is wrong, but that is the way it is today).
>
> Ignoring the hotplug stuff which is going to use it, consider adding a
> new binary format for XYZ3000 compatibility. For 2.4, you have to do:
>
> 1) Write the new binfmt_XYZ3000 module.
>
> 2) Write a patch to the modutils to place it in the built-in
> modules config. Keith's quite receptive with this.
>
> 3) Tell your users to upgrade modutils or place "alias
> binfmt-764 binfmt_XYZ3000" in their /etc/modules.conf (or
> /etc/modprobe.d/local for Debian).
>
> For 2.5:
> 1) Write the new binfmt_XYZ3000 module.
>
> 2) Place MODULE_ALIAS("binfmt-764") at the bottom.
>
> Hope that clarifies?

Yep. But (a) How often does this happen? Which other areas could benefit?
(b) $BIG_DISTRIBUTION_VENDOR is quite capable of distributing
/etc/modules.conf or modutils with the change; kernel hackers are used to
this kind of change (no, I haven't seen Aunt Tillie lately).


Seems we will always disagree :(
--
Dr. Horst H. von Brand User #22616 counter.li.org
Departamento de Informatica Fono: +56 32 654431
Universidad Tecnica Federico Santa Maria +56 32 654239
Casilla 110-V, Valparaiso, Chile Fax: +56 32 797513

2003-02-04 09:46:46

by Horst von Brand

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

Rusty Russell <[email protected]> said:
> "insmod foo" will *always* get foo. The only exception is when "foo"
> doesn't exist, in which case modprobe looks for another module which
> explicitly says it can serve in the place of foo.

OK.

> This allows smooth transition when a driver is superceded, *if* the
> new author wants it.

I would't let this happen, ever. What if foo does exist and Aunt Tillie
just didn't compile it?

[...]

> Sure, but you cut the vital bit of my mail. Currently we have (1)
> request_module() which is used in various cases to request a service,
> and (2) aliases like "char-major-36", which modprobe.conf (or the old
> modutils' builtin) says is "netlink". If you introduce a new char
> major (or, say a new cypher, or new network family, etc), you currenly
> have to get everyone to include it in their configuration file.
>
> Now, the netlink module *knows* it provides char-major-36: with
> MODULE_ALIAS() it can say so.

The "provides" is the missing clue... You are taking about "provides" (and
mixing it up with "alias", something I still can't agree on), I'm talking
about "alias". Maybe they should be separate? In your examples netlink
_provides_ char-major-36, xyz3000 _provides_ binfmt-754, eepro100 _aliases_
to eth0 here. First use is clearly in-kernel, second one is (or should
always be IMVHO) out-of-kernel. Sure, could use the same infrastructure for
simplicity.

Now, what if xyz and zyx both provide foo? This will be the case when a new
driver comes along...

[This is looking more and more like a task for rpm/apt...
/me ducks and runs]

Thanks for your patience!
--
Dr. Horst H. von Brand User #22616 counter.li.org
Departamento de Informatica Fono: +56 32 654431
Universidad Tecnica Federico Santa Maria +56 32 654239
Casilla 110-V, Valparaiso, Chile Fax: +56 32 797513

2003-02-04 14:40:58

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

Hi,

On Tue, 4 Feb 2003, Horst von Brand wrote:

> People
> backward compatibility and minimal upgrading pain (to get your random BOFH
> to recompile a kernel is _not_ trivial today!) is much more important than
> code backward compatibility, IMHO. [I'm speaking from the perspective of
> the user/sysadmin, _not_ the kernel hacker here].

I can only agree and I hope more people realize the importance of this.
My main problem with the module fiasco are the complete new user space
tools. I urge anyone who only cares a little bit about modules to compare
modules.conf(5) with modprobe.conf(5) and to tell me whether _all_ the
removed options are really unnecessary? What happened to deprecating
features _first_?

bye, Roman

2003-02-04 17:15:55

by Adam J. Richter

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

Roman Zippel wrote:
>Currently the kernel has two mechanisms to request a module (modprobe and
>hotplug) and these also have different ways to map the request to a module
>name.

I don't know if I'm disagreeing with you, but I'd like to
bring up the following point.

The kernel notifying the user level that a new device has been
plugged is a often a separate event from the kernel needing a module
for that device.

When a USB disk is detected, the computer should update its
list of devices to check when an attempt it made to access an
undefined disk, put a new icon on the desktop, and see if there are
any user defined scripts for the event, which would probably include a
default script to update the desktop user interface with this
information. There is not necessarily any need at that point to load
a module at that point as you don't know that the user is going to
actually access the disk (it may just have been attached when the USB
controller was detected, and might not be accessed at all before the
computer is shut down).

When a program attempts to access an undefined disk, including
testing the existence of a partition, then the system should start
loading modules for the unbound devices that potentially may have disk
drives.

It is also possible that the appropriate kernel module is
already compiled in or loaded, but the user interface should be
notified that a new device has been plugged in, say, to pop up a video
window by default whenever a USB camera is plugged in.

Granted, some users may want a policy of immediately loading
all potentially relevant kernel modules when hardware is detected,
just for the user interface benefits of the kernel printk's and devfs
entries, and they should easily be able to set that, and that should
probably be the default policy for the case where a kernel module is
matched, but the hotplug system does not see that the device is of a
class that will automatically be loaded later by some subsequent
event such as a specific devfs lookup or an attempt to access an
undefined networking interface.

For some devices, the events set in motion by hotplug may
never result in a kernel module being loaded. For example, plugging
in a video card might result in invocation of an X server that just
maps in the card's IO registers and a memory window, or some USB devices
may be controlled by user level programs through /proc/bus/usb.

That said, we could perhaps should shave a few lines from the
kernel by unifying the call_usermodehelper clients a bit more
(hotplug, request_module and my mini-devfs if and when that goes in),
but something like hotplug should be the surviving interface rather
than request_module, because hotplug passes other important
information, such as the type of event and the type of facility being
requested.

The additional information in the hotplug interface makes it
much easier to write scripts that can do useful things for event types
or module types that haven't been written yet and can help security by
ensuring that only modules of the appropriate type are loaded (so that
a user cannot do something like "ifconfig scsi_debug" to get the kernel
to load an arbitrary module). As an example of extensibility, imagine
that if we define a new "suspend" hotplug event for device type
"ieee1394", the hotplug handler might know enough to exec
"/usr/libexec/hotplug/drivers/ieee1934 suspend /proc/sys/ieee1394/dev2342",
or the user interface might know enough to recognized the "suspend"
event and change the color of some icon, even though it doesn't know
what ieee1394 is.

Adam J. Richter __ ______________ 575 Oroville Road
[email protected] \ / Milpitas, California 95035
+1 408 309-6081 | g g d r a s i l United States of America
"Free Software For The Rest Of Us."

2003-02-04 17:36:22

by Adam J. Richter

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

Rusty Russell responded to someone else (whom Rusty didn't name, and
whom I didn't immediately find in the archives):
>"insmod foo" will *always* get foo. The only exception is when "foo"
>doesn't exist, in which case modprobe looks for another module which
>explicitly says it can serve in the place of foo.

I think perhaps we should separate the name spaces so that the
kernel never modprobes for an actual module file name. In other
words, there would only be three ways in which a module would
"automatically" be loaded:

(1) it exports an alias like "fs-ext3", for the level helper that
request_module calls (devfs could also use these aliases),
(2) it exports a device ID table for hotplug et al (probably
should not be the same name space as module "aliases" because
of device ID extensibility issues argued by David Brownell),
(3) it exports a symbol needed by some other module.

This would reduce the security attacks based on getting the
kernel to load arbitrary module names.

It would also be straightfoward to add a flaag to depmod to
ask it to detect any modules that export no aliases, device ID tables
or symbols (perhaps they could have flag that says "yes, I really only
want to be loaded manually").

Adam J. Richter __ ______________ 575 Oroville Road
[email protected] \ / Milpitas, California 95035
+1 408 309-6081 | g g d r a s i l United States of America
"Free Software For The Rest Of Us."

2003-02-04 23:23:36

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

In message <[email protected]> you write:
> > It's undefined for the "in the config file case" (they curently *do*
> > override, but that's an implementation detail). It'd be clearer to
> > explicitly say "you can't override module names with "alias", use
> > "install" instead, IMHO.
>
> Urgh. What is "alias" then for? It has been used for ages as a way of "call
> module foo by name bar, possibly with this further arguments". Why change
> that gratuitously?

I'm going to stop here, since I don't think you understand what I am
proposing, nor how the current system works: this makes is extremely
difficult to describe changes, and time consuming.

Sorry,
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2003-02-05 00:09:59

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

In message <[email protected]> you write:
> Rusty Russell responded to someone else (whom Rusty didn't name, and
> whom I didn't immediately find in the archives):
> >"insmod foo" will *always* get foo. The only exception is when "foo"
> >doesn't exist, in which case modprobe looks for another module which
> >explicitly says it can serve in the place of foo.
>
> I think perhaps we should separate the name spaces so that the
> kernel never modprobes for an actual module file name. In other
> words, there would only be three ways in which a module would
> "automatically" be loaded:

This sounds like a good idea to me: the current approach is very
ad-hoc. I think we're headed in the right direction, for example the
alias patch introduces "pci:" and "usb:" prefixes for hotplug (and if
you look very hard, there's already a "symbol:" prefix, unused, in the
tree).

Thanks!
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2003-02-05 00:10:01

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] Module alias and device table support.

In message <[email protected]> you write:
> Rusty Russell <[email protected]> said:
> > "insmod foo" will *always* get foo. The only exception is when "foo"
> > doesn't exist, in which case modprobe looks for another module which
> > explicitly says it can serve in the place of foo.
>
> OK.
>
> > This allows smooth transition when a driver is superceded, *if* the
> > new author wants it.
>
> I would't let this happen, ever. What if foo does exist and Aunt Tillie
> just didn't compile it?

Then they turned the config option off themselves.

> > Now, the netlink module *knows* it provides char-major-36: with
> > MODULE_ALIAS() it can say so.
>
> The "provides" is the missing clue... You are taking about "provides" (and
> mixing it up with "alias", something I still can't agree on), I'm talking
> about "alias". Maybe they should be separate? In your examples netlink
> _provides_ char-major-36, xyz3000 _provides_ binfmt-754, eepro100 _aliases_
> to eth0 here. First use is clearly in-kernel, second one is (or should
> always be IMVHO) out-of-kernel. Sure, could use the same infrastructure for
> simplicity.

That's a different debate.

This is how it works today, and how it has worked since before 2.2.
If you want to argue that another mechanism should be used instead,
that's a completely different issue (and not neccessarily something I
would disagree with, especially since hotplug is now a first class
citizen).

Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2003-02-06 23:01:42

by Roman Zippel

[permalink] [raw]
Subject: [PATCH] Restore module support.

Hi,

On Tue, 4 Feb 2003, Rusty Russell wrote:

> I'm going to stop here, since I don't think you understand what I am
> proposing, nor how the current system works: this makes is extremely
> difficult to describe changes, and time consuming.

Rusty, if you continue to ignore criticism, I have only one answer left:

http://www.xs4all.nl/~zippel/restore-modules-2.5.59.diff

These numbers are quite interesting:

$ diffstat restore-modules-2.5.59.diff
arch/i386/kernel/Makefile | 1
arch/i386/kernel/cpu/mtrr/if.c | 1
arch/i386/kernel/entry.S | 6
arch/i386/kernel/module.c | 111 -
arch/i386/mm/extable.c | 1
drivers/char/agp/Makefile | 2
drivers/char/misc.c | 1
drivers/eisa/eisa-bus.c | 1
drivers/input/serio/serport.c | 1
fs/filesystems.c | 27
fs/proc/proc_misc.c | 12
include/asm-generic/percpu.h | 1
include/asm-generic/vmlinux.lds.h | 15
include/asm-i386/module.h | 57
include/linux/init.h | 130 -
include/linux/module.h | 817 ++++++------
include/linux/moduleloader.h | 43
include/linux/moduleparam.h | 126 -
init/Kconfig | 40
init/main.c | 109 +
kernel/Makefile | 8
kernel/extable.c | 41
kernel/intermodule.c | 182 --
kernel/kallsyms.c | 5
kernel/kmod.c | 2
kernel/ksyms.c | 7
kernel/module.c | 2448 +++++++++++++++++--------------------
kernel/params.c | 336 -----
net/ipv4/netfilter/ip_nat_helper.c | 2
scripts/Makefile.modinst | 8
sound/sound_core.c | 1
31 files changed, 1805 insertions(+), 2737 deletions(-)

$ size linux-2.5.59-org/vmlinux linux-2.5.59-mod/vmlinux
text data bss dec hex filename
3403915 864229 338052 4606196 4648f4 linux-2.5.59-org/vmlinux
3361448 863393 342020 4566861 45af4d linux-2.5.59-mod/vmlinux

This patch still has modversion disabled, when Kai finishes the new
modversion support, I'll add the support for it to modutils.

bye, Roman

2003-02-06 23:20:11

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH] Restore module support.

On Fri, Feb 07, 2003 at 12:09:27AM +0100, Roman Zippel wrote:
> Hi,
>
> On Tue, 4 Feb 2003, Rusty Russell wrote:
>
> > I'm going to stop here, since I don't think you understand what I am
> > proposing, nor how the current system works: this makes is extremely
> > difficult to describe changes, and time consuming.
>
> Rusty, if you continue to ignore criticism, I have only one answer left:
>
> http://www.xs4all.nl/~zippel/restore-modules-2.5.59.diff

But what are the modutils numbers? :)

Come on, what Rusty did was the "right thing to do" and has made life
easier for all of the arch maintainers (or so says the ones that I've
talked to), and has made my life easier with regards to
MODULE_DEVICE_TABLE() logic, which will enable the /sbin/hotplug
scripts/binary to shrink a _lot_.

thanks,

gre gk-h

2003-02-06 23:40:31

by Adam J. Richter

[permalink] [raw]
Subject: Re: [PATCH] Restore module support.

On 2003-02-06, Greg KH wrote:
>On Fri, Feb 07, 2003 at 12:09:27AM +0100, Roman Zippel wrote:
>> Hi,
>>
>> On Tue, 4 Feb 2003, Rusty Russell wrote:
>>
>> > I'm going to stop here, since I don't think you understand what I am
>> > proposing, nor how the current system works: this makes is extremely
>> > difficult to describe changes, and time consuming.
>>
>> Rusty, if you continue to ignore criticism, I have only one answer left:
>>
>> http://www.xs4all.nl/~zippel/restore-modules-2.5.59.diff
>
>But what are the modutils numbers? :)
>
>Come on, what Rusty did was the "right thing to do" and has made life
>easier for all of the arch maintainers (or so says the ones that I've
>talked to), and has made my life easier with regards to
>MODULE_DEVICE_TABLE() logic, which will enable the /sbin/hotplug
>scripts/binary to shrink a _lot_.

I'd be interested in some elaboration on these two points.

I'd like to understand what problems were solved for other
architectures by putting the module loader into the kernel, so I could
compare what would be involved to delivering the same benefit with a
user-level module loader.

I think the MODULE_DEVICE_TABLE stuff is largely independent
of whether the module loading is done inside the kernel or from user
level, but if this is due to some misunderstanding on my part, please
set me straight.

Although I write this in response to a message by Greg KH, I
would welcome answers from anyone.

Adam J. Richter __ ______________ 575 Oroville Road
[email protected] \ / Milpitas, California 95035
+1 408 309-6081 | g g d r a s i l United States of America
"Free Software For The Rest Of Us."

2003-02-06 23:52:19

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH] Restore module support.

Hi,

On Thu, 6 Feb 2003, Greg KH wrote:

> But what are the modutils numbers? :)

There should be no real difference as I'd like to integrate Kai's patch too.

> Come on, what Rusty did was the "right thing to do" and has made life
> easier for all of the arch maintainers (or so says the ones that I've
> talked to), and has made my life easier with regards to
> MODULE_DEVICE_TABLE() logic, which will enable the /sbin/hotplug
> scripts/binary to shrink a _lot_.

What was the "right thing to do"?
There were certainly a few interesting changes, but I'd like discuss them
first. For example there is more than one solution to improve the
MODULE_DEVICE_TABLE() logic (*), so how is Rusty's better?

bye, Roman

(*) http://marc.theaimsgroup.com/?l=linux-kernel&m=104405265719327&w=2
http://marc.theaimsgroup.com/?l=linux-kernel&m=104437966220610&w=2

2003-02-07 00:00:51

by Russell King

[permalink] [raw]
Subject: Re: [PATCH] Restore module support.

On Thu, Feb 06, 2003 at 03:25:15PM -0800, Greg KH wrote:
> Come on, what Rusty did was the "right thing to do" and has made life
> easier for all of the arch maintainers (or so says the ones that I've
> talked to)

And I'll promptly provide you with the other view. I'm still trying to
sort out the best thing to do for ARM. We have the choice of:

1. load modules in the vmalloc region and build two jump tables, one for
the init text and one for the core text.

2. fix vmalloc and /proc/kcore to be able to cope with a separate module
region located below PAGE_OFFSET. Currently, neither play well with
this option.

(1) has the advantage that it's all architecture code, its what we've
done with the old modutils, and I've finally managed to implement it.
However, it introduces an extra instruction and data cache line fetch
to branches from modules into the kernel text.

(2) has the disadvantage that its touching non-architecture specific
code, but this is the option I'd prefer due to the obvious performance
advantage. However, I'm afraid that it isn't worth the effort to fix
up vmalloc and /proc/kcore. vmalloc fix appears simple, but /proc/kcore
has issues (anyone know what KCORE_BASE is all about?)

I've not made up my mind which option I'm going to take. If I don't get
around to fixing /proc/kcore by this weekend, I'll probably just throw
option (1) at Linus, which bring Linus' tree back to a buildable state
for some ARM targets again.

--
Russell King ([email protected]) The developer of ARM Linux
http://www.arm.linux.org.uk/personal/aboutme.html

2003-02-07 04:01:12

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH] Restore module support.

On Fri, Feb 07, 2003 at 01:01:01AM +0100, Roman Zippel wrote:
> Hi,
>
> On Thu, 6 Feb 2003, Greg KH wrote:
>
> > But what are the modutils numbers? :)
>
> There should be no real difference as I'd like to integrate Kai's patch too.

Ok, I'm confused, you're advocating putting back the old modutils
interface, but somehow not using the old modutils code? I don't
understand.


> > Come on, what Rusty did was the "right thing to do" and has made life
> > easier for all of the arch maintainers (or so says the ones that I've
> > talked to), and has made my life easier with regards to
> > MODULE_DEVICE_TABLE() logic, which will enable the /sbin/hotplug
> > scripts/binary to shrink a _lot_.
>
> What was the "right thing to do"?
> There were certainly a few interesting changes, but I'd like discuss them
> first. For example there is more than one solution to improve the
> MODULE_DEVICE_TABLE() logic (*), so how is Rusty's better?

Neither one of those proposals, no any others, were backed with working
examples. Rusty had the only working example of getting rid of the
userspace knowledge of the kernel data structures that I know of so far.

thanks,

greg k-h

2003-02-07 04:44:48

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] Restore module support.

In message <[email protected]> you write:
> And I'll promptly provide you with the other view. I'm still trying to
> sort out the best thing to do for ARM. We have the choice of:
>
> 1. load modules in the vmalloc region and build two jump tables, one for
> the init text and one for the core text.

Yes. PPC and PPC64 have the same issues: currently this is done by
(1) putting nothing in the .init sections (on PPC64), and (2) with
stubs when jumping outside the module code.

This gives the same effect as the previous userspace loader: for PPC64
noone cares about discarding init stuff, so it's firmly on the TODO
list. ARM's priorities are obviously different.

> 2. fix vmalloc and /proc/kcore to be able to cope with a separate module
> region located below PAGE_OFFSET. Currently, neither play well with
> this option.

x86_64 has this, as does sparc64: they do their own allocation. Does
ARM require something special in this regard? I'd love to see what
you've got...

Thanks!
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2003-02-07 06:02:58

by Kai Germaschewski

[permalink] [raw]
Subject: Re: [PATCH] Restore module support.

On Fri, 7 Feb 2003, Russell King wrote:

> On Thu, Feb 06, 2003 at 03:25:15PM -0800, Greg KH wrote:
> > Come on, what Rusty did was the "right thing to do" and has made life
> > easier for all of the arch maintainers (or so says the ones that I've
> > talked to)
>
> And I'll promptly provide you with the other view. I'm still trying to
> sort out the best thing to do for ARM. We have the choice of:
>
> 1. load modules in the vmalloc region and build two jump tables, one for
> the init text and one for the core text.
>
> 2. fix vmalloc and /proc/kcore to be able to cope with a separate module
> region located below PAGE_OFFSET. Currently, neither play well with
> this option.

So you have the choice of either sticking to the solution which was
previously used (only that it's now done in the kernel, not in modutils),
or doing something new and more efficient.

Now, what's the reason you're not happy with that? You've got more
flexibility than before, and you can even switch between different ways
without having to teach an external package about it, so you avoid the
compatibility issues when kernel and modutils are not in sync.

--Kai


2003-02-07 08:40:22

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] Restore module support.

> In message <[email protected]> you write:
> > And I'll promptly provide you with the other view. I'm still trying to
> > sort out the best thing to do for ARM. We have the choice of:

Actually, I must be really confused. I thought ARM was already
complete.

Anyway, here's a version which simply does what the usermode one did,
if you decide to take the "fix it later" approach.

Cheers!
Rusty.
PS. I did this in the usermode test framework, so not live tested.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.59/arch/arm/kernel/module.c working-2.5.59-armmodule/arch/arm/kernel/module.c
--- linux-2.5.59/arch/arm/kernel/module.c 2003-02-07 19:21:51.000000000 +1100
+++ working-2.5.59-armmodule/arch/arm/kernel/module.c 2003-02-07 19:04:12.000000000 +1100
@@ -2,6 +2,7 @@
* linux/arch/arm/kernel/module.c
*
* Copyright (C) 2002 Russell King.
+ * Dumbed down by Rusty Russell.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -17,54 +18,74 @@
#include <linux/fs.h>
#include <linux/string.h>

-#include <asm/pgtable.h>
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt , ...)
+#endif

-void *module_alloc(unsigned long size)
+/* This is a beautiful architecture. --RR */
+struct arm_plt_entry
{
- struct vm_struct *area;
- struct page **pages;
- unsigned int array_size, i;
-
- size = PAGE_ALIGN(size);
- if (!size)
- goto out_null;
+ u32 ldr_pc; /* ldr pc,[pc,#-4] */
+ u32 location; /* sym@ */
+};

- area = __get_vm_area(size, VM_ALLOC, MODULE_START, MODULE_END);
- if (!area)
- goto out_null;
+void *module_alloc(unsigned long size)
+{
+ if (size == 0)
+ return NULL;
+ return vmalloc(size);
+}

- area->nr_pages = size >> PAGE_SHIFT;
- array_size = area->nr_pages * sizeof(struct page *);
- area->pages = pages = kmalloc(array_size, GFP_KERNEL);
- if (!area->pages) {
- remove_vm_area(area->addr);
- kfree(area);
- goto out_null;
- }
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+ vfree(module_region);
+}

- memset(pages, 0, array_size);
+/* Count how many different PC24 relocations (different symbol) */
+static unsigned int count_relocs(const Elf32_Rel *rel, unsigned int num)
+{
+ unsigned int i, j, ret = 0;

- for (i = 0; i < area->nr_pages; i++) {
- pages[i] = alloc_page(GFP_KERNEL);
- if (unlikely(!pages[i])) {
- area->nr_pages = i;
- goto out_no_pages;
+ /* Sure, this is order(n^2), but it's usually short, and not
+ time critical */
+ for (i = 0; i < num; i++) {
+ if (ELF32_R_TYPE(rel[i].r_info) != R_ARM_PC24)
+ continue;
+ for (j = 0; j < i; j++) {
+ if (ELF32_R_TYPE(rel[j].r_info) != R_ARM_PC24)
+ continue;
+ /* If this addend appeared before, it's
+ already been counted */
+ if (ELF32_R_SYM(rel[i].r_info)
+ == ELF32_R_SYM(rel[j].r_info))
+ break;
}
+ if (j == i) ret++;
}
-
- if (map_vm_area(area, PAGE_KERNEL, &pages))
- goto out_no_pages;
- return area->addr;
-
- out_no_pages:
- vfree(area->addr);
- out_null:
- return NULL;
+ return ret;
}

-void module_free(struct module *module, void *region)
+/* Get the potential trampolines size required sections */
+static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
+ const Elf32_Shdr *sechdrs,
+ const char *secstrings)
{
- vfree(region);
+ unsigned long ret = 0;
+ unsigned i;
+
+ /* Everything marked ALLOC (this includes the exported
+ symbols) */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type != SHT_REL)
+ continue;
+ ret += count_relocs((void *)hdr + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size / sizeof(Elf32_Rel));
+ }
+
+ return ret * sizeof(struct arm_plt_entry);
}

int module_frob_arch_sections(Elf_Ehdr *hdr,
@@ -72,9 +93,55 @@ int module_frob_arch_sections(Elf_Ehdr *
char *secstrings,
struct module *mod)
{
+ unsigned int i;
+ char *p;
+
+ /* Find .plt section, and rename .init sections, which we
+ don't handle */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (strcmp(secstrings + sechdrs[i].sh_name, ".plt") == 0)
+ mod->arch.plt_section = i;
+ while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init")))
+ p[0] = '_';
+ }
+ if (!mod->arch.plt_section) {
+ printk("Module doesn't contain .plt section.\n");
+ return -ENOEXEC;
+ }
+
+ /* Override its size */
+ sechdrs[mod->arch.plt_section].sh_size
+ = get_plt_size(hdr, sechdrs, secstrings);
+ /* Override its type and flags: in asm statement doesn't work 8( */
+ sechdrs[mod->arch.plt_section].sh_type = SHT_NOBITS;
+ sechdrs[mod->arch.plt_section].sh_flags = (SHF_EXECINSTR | SHF_ALLOC);
return 0;
}

+/* Allocate (or find) the PLT entry for this function. */
+static u32 make_plt(Elf32_Shdr *sechdrs, struct module *module, u32 funcaddr)
+{
+ struct arm_plt_entry *plt;
+ unsigned int i, num_plts;
+
+ plt = (void *)sechdrs[module->arch.plt_section].sh_addr;
+ num_plts = sechdrs[module->arch.plt_section].sh_size / sizeof(*plt);
+
+ for (i = 0; i < num_plts; i++) {
+ if (!plt[i].ldr_pc) {
+ /* New one. Fill in. */
+ plt[i].ldr_pc = 0xe51ff004;
+ plt[i].location = funcaddr;
+ }
+ if (plt[i].location == funcaddr) {
+ DEBUGP("Made plt %u for %p at %p\n",
+ i, (void *)funcaddr, &plt[i]);
+ return (u32)&plt[i];
+ }
+ }
+ BUG();
+}
+
int
apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
unsigned int relindex, struct module *module)
@@ -86,7 +153,7 @@ apply_relocate(Elf32_Shdr *sechdrs, cons
unsigned int i;

for (i = 0; i < relsec->sh_size / sizeof(Elf32_Rel); i++, rel++) {
- unsigned long loc;
+ unsigned long loc, addend;
Elf32_Sym *sym;
s32 offset;

@@ -98,6 +165,11 @@ apply_relocate(Elf32_Shdr *sechdrs, cons
}

sym = ((Elf32_Sym *)symsec->sh_addr) + offset;
+ if (!sym->st_value) {
+ printk(KERN_WARNING "%s: unknown symbol %s\n",
+ module->name, strtab + sym->st_name);
+ return -ENOENT;
+ }

if (rel->r_offset < 0 || rel->r_offset > dstsec->sh_size - sizeof(u32)) {
printk(KERN_ERR "%s: out of bounds relocation, "
@@ -115,24 +187,26 @@ apply_relocate(Elf32_Shdr *sechdrs, cons
break;

case R_ARM_PC24:
- offset = (*(u32 *)loc & 0x00ffffff) << 2;
- if (offset & 0x02000000)
- offset -= 0x04000000;
+ /* Pull addend from location */
+ addend = (*(u32 *)loc & 0x00ffffff) << 2;
+ if (addend & 0x02000000)
+ addend -= 0x04000000;
+ offset = sym->st_value + addend - loc;

- offset += sym->st_value - loc;
- if (offset & 3 ||
- offset <= (s32)0xfc000000 ||
- offset >= (s32)0x04000000) {
+ /* if the target is too far away, use plt. */
+ if (offset < -0x02000000 || offset >= 0x02000000)
+ offset = make_plt(sechdrs,module,sym->st_value)
+ + addend - loc;
+
+ if (offset & 3) {
printk(KERN_ERR "%s: unable to fixup "
- "relocation: out of range\n",
- module->name);
+ "relocation: %u out of range\n",
+ module->name, offset);
return -ENOEXEC;
}

- offset >>= 2;
-
*(u32 *)loc &= 0xff000000;
- *(u32 *)loc |= offset & 0x00ffffff;
+ *(u32 *)loc |= (offset >> 2) & 0x00ffffff;
break;

default:
diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.59/include/asm-arm/module.h working-2.5.59-armmodule/include/asm-arm/module.h
--- linux-2.5.59/include/asm-arm/module.h 2003-02-07 19:16:25.000000000 +1100
+++ working-2.5.59-armmodule/include/asm-arm/module.h 2003-02-07 19:04:03.000000000 +1100
@@ -3,11 +3,16 @@

struct mod_arch_specific
{
- int foo;
+ /* Index of PLT section within module. */
+ unsigned int plt_section;
};

#define Elf_Shdr Elf32_Shdr
#define Elf_Sym Elf32_Sym
#define Elf_Ehdr Elf32_Ehdr

+/* Make empty sections for module_frob_arch_sections to expand. */
+#ifdef MODULE
+asm(".section .plt; .align 3; .previous");
+#endif
#endif /* _ASM_ARM_MODULE_H */

2003-02-07 09:30:54

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH] Restore module support.

Hi,

On Thu, 6 Feb 2003, Greg KH wrote:

> > There should be no real difference as I'd like to integrate Kai's patch too.
>
> Ok, I'm confused, you're advocating putting back the old modutils
> interface, but somehow not using the old modutils code? I don't
> understand.

No, I'm advocating to break as little as possible. I'm certainly willing
to port any interesting feature from Rusty's patches. If one feature
requires changes to modutils, that's fine.

> Neither one of those proposals, no any others, were backed with working
> examples. Rusty had the only working example of getting rid of the
> userspace knowledge of the kernel data structures that I know of so far.

1. In the past I posted enough example code, which was pretty much
ignored, why should I think it would be any different this time?
2. Is hotplug that broken, that it wouldn't survive 2.6, so it required a
complete new implementation? If that should be case I herewith volunteer
to add module alias support to modutils.

bye, Roman

2003-02-07 09:37:16

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH] Restore module support.

Hi,

On Fri, 7 Feb 2003, Kai Germaschewski wrote:

> So you have the choice of either sticking to the solution which was
> previously used (only that it's now done in the kernel, not in modutils),
> or doing something new and more efficient.

Where is the problem to do the "new and more efficient" in modutils?

> Now, what's the reason you're not happy with that? You've got more
> flexibility than before, and you can even switch between different ways
> without having to teach an external package about it, so you avoid the
> compatibility issues when kernel and modutils are not in sync.

Where is the problem with updating user space tools? We should certainly
reduce dependencies, but moving everything into the kernel source can't be
the answer either.

bye, Roman

2003-02-07 09:53:45

by Russell King

[permalink] [raw]
Subject: Re: [PATCH] Restore module support.

On Fri, Feb 07, 2003 at 03:53:44PM +1100, Rusty Russell wrote:
> Yes. PPC and PPC64 have the same issues: currently this is done by
> (1) putting nothing in the .init sections (on PPC64), and (2) with
> stubs when jumping outside the module code.
>
> This gives the same effect as the previous userspace loader: for PPC64
> noone cares about discarding init stuff, so it's firmly on the TODO
> list. ARM's priorities are obviously different.

As I say, I have this solution working, but its suboptimal, and I'll
probably push this Linus-wards if I can't resolve (2) soon.

> > 2. fix vmalloc and /proc/kcore to be able to cope with a separate module
> > region located below PAGE_OFFSET. Currently, neither play well with
> > this option.
>
> x86_64 has this, as does sparc64: they do their own allocation. Does
> ARM require something special in this regard? I'd love to see what
> you've got...

There are two problems - one I mentioned during on LKML recently:

Message-ID: <[email protected]>

This seems simple to resolve. We just need to make get_vm_area() ignore
mappings for invalid areas:

--- orig/mm/vmalloc.c Tue Nov 5 12:51:41 2002
+++ linux/mm/vmalloc.c Fri Feb 7 09:48:42 2003
@@ -210,6 +210,8 @@

write_lock(&vmlist_lock);
for (p = &vmlist; (tmp = *p) ;p = &tmp->next) {
+ if (tmp->addr < addr)
+ continue;
if ((size + addr) < addr)
goto out;
if (size + addr <= (unsigned long)tmp->addr)

Since the vmlist is an ordered list, and we place the modules below
VMALLOC_START, this change ensures that we will completely ignore any
vmlist entries below the current minimum address (addr) we're looking
for.

/proc/kcore currently assumes that:

1. all vmlist mappings are above PAGE_OFFSET.
2. all vmlist mappings are within VMALLOC_START to VMALLOC_END

Looking at fs/proc/kcore.c this morning, I have a couple of ideas to
solve this problem. Patch will follow later today, hopefully without
any ifdefs.

--
Russell King ([email protected]) The developer of ARM Linux
http://www.arm.linux.org.uk/personal/aboutme.html

2003-02-07 09:56:05

by Russell King

[permalink] [raw]
Subject: Re: [PATCH] Restore module support.

On Fri, Feb 07, 2003 at 07:26:50PM +1100, Rusty Russell wrote:
> Actually, I must be really confused. I thought ARM was already
> complete.
>
> Anyway, here's a version which simply does what the usermode one did,
> if you decide to take the "fix it later" approach.

Rusty, as I said, I already have a patch for this approach. Its the
second approach that I'd prefer to get working.

Also, if you see the message-id I posted in my mail just 5 minutes ago,
you'll see why the existing code does not work.

--
Russell King ([email protected]) The developer of ARM Linux
http://www.arm.linux.org.uk/personal/aboutme.html

2003-02-07 17:52:47

by Roman Zippel

[permalink] [raw]
Subject: Re: [PATCH] Restore module support.

Hi,

On Thu, 6 Feb 2003, Greg KH wrote:

> Neither one of those proposals, no any others, were backed with working
> examples. Rusty had the only working example of getting rid of the
> userspace knowledge of the kernel data structures that I know of so far.

3. Somehow I hoped we could discuss this on a technical base, I didn't
know we've reached already the "first post" level.
SCNR :)

bye, Roman

2003-02-07 18:33:47

by Luck, Tony

[permalink] [raw]
Subject: Re: [PATCH] Restore module support.

> (2) has the disadvantage that its touching non-architecture specific
> code, but this is the option I'd prefer due to the obvious performance
> advantage. However, I'm afraid that it isn't worth the effort to fix
> up vmalloc and /proc/kcore. vmalloc fix appears simple, but /proc/kcore
> has issues (anyone know what KCORE_BASE is all about?)

KCORE_BASE is my fault ... it was an attempt to fix the "modules
below PAGE_OFFSET" problem for the ia64 port. For a few nanoseconds
the code just here looked like this:

#if VMALLOC_START < PAGE_OFFSET
#define KCORE_BASE VMALLOC_START
#else
#define KCORE_BASE PAGE_OFFSET
#endif

Which worked great for ia64, but failed to even compile on i386
(because on i386 VMALLOC_START isn't a simple constant that cpp
can compare against).

Linus kept the bulk of my patch and just replaced the above code with
the "#define KCORE_BASE PAGE_OFFSET" that is there today, maybe
in the hope that I'd come back with a workable #ifdef ... but the only
one I've come up with so far is "#ifdef CONFIG_IA64" which can't be
right as ia64 isn't the only architecture with this issue.

There was some discussion on a better way to do this, by adding the
kernel itself to the vmlist, and eliminating all the special case code.
I took a brief look at this, but realised that there were all sorts
of ugly race conditions with /proc/kcore if a module is loaded/unloaded
after some process has read the Elf header.

-Tony Luck

2003-02-07 19:41:23

by Russell King

[permalink] [raw]
Subject: Re: [PATCH] Restore module support.

On Fri, Feb 07, 2003 at 10:43:19AM -0800, Luck, Tony wrote:
> > (2) has the disadvantage that its touching non-architecture specific
> > code, but this is the option I'd prefer due to the obvious performance
> > advantage. However, I'm afraid that it isn't worth the effort to fix
> > up vmalloc and /proc/kcore. vmalloc fix appears simple, but /proc/kcore
> > has issues (anyone know what KCORE_BASE is all about?)
>
> KCORE_BASE is my fault ... it was an attempt to fix the "modules
> below PAGE_OFFSET" problem for the ia64 port. For a few nanoseconds
> the code just here looked like this:
>
> #if VMALLOC_START < PAGE_OFFSET
> #define KCORE_BASE VMALLOC_START
> #else
> #define KCORE_BASE PAGE_OFFSET
> #endif

Ah, ok. What I'm thinking of is something like the following (untested
and probably improperly thought out patch...):

--- orig/fs/proc/kcore.c Sat Nov 2 18:58:18 2002
+++ linux/fs/proc/kcore.c Fri Feb 7 19:48:35 2003
@@ -99,7 +99,10 @@
}
#else /* CONFIG_KCORE_AOUT */

+#ifndef KCORE_BASE
#define KCORE_BASE PAGE_OFFSET
+#define in_vmlist_region(x) ((x) >= VMALLOC_START && (x) < VMALLOC_END)
+#endif

#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))

@@ -394,7 +397,7 @@
tsz = buflen;

while (buflen) {
- if ((start >= VMALLOC_START) && (start < VMALLOC_END)) {
+ if (in_vmlist_region(start)) {
char * elf_buf;
struct vm_struct *m;
unsigned long curstart = start;

An architecture could then define KCORE_BASE and in_vmlist_region()
alongside their VMALLOC_START definition if they needed to change
them.

> There was some discussion on a better way to do this, by adding the
> kernel itself to the vmlist, and eliminating all the special case code.
> I took a brief look at this, but realised that there were all sorts
> of ugly race conditions with /proc/kcore if a module is loaded/unloaded
> after some process has read the Elf header.

Well, only root can debug using /proc/kcore, and I'd suggest the best
answer to that problem is "if it hurts, don't do that." I don't think
you should prevent modules from being unloaded just because you have
/proc/kcore open.

--
Russell King ([email protected]) The developer of ARM Linux
http://www.arm.linux.org.uk/personal/aboutme.html

2003-02-07 21:06:22

by Luck, Tony

[permalink] [raw]
Subject: RE: [PATCH] Restore module support.

Russell King wrote:
> On Fri, Feb 07, 2003 at 10:43:19AM -0800, Luck, Tony wrote:
> > > (2) has the disadvantage that its touching
> non-architecture specific
> > > code, but this is the option I'd prefer due to the
> obvious performance
> > > advantage. However, I'm afraid that it isn't worth the
> effort to fix
> > > up vmalloc and /proc/kcore. vmalloc fix appears simple,
> but /proc/kcore
> > > has issues (anyone know what KCORE_BASE is all about?)
> >
> > KCORE_BASE is my fault ... it was an attempt to fix the "modules
> > below PAGE_OFFSET" problem for the ia64 port. For a few nanoseconds
> > the code just here looked like this:
> >
> > #if VMALLOC_START < PAGE_OFFSET
> > #define KCORE_BASE VMALLOC_START
> > #else
> > #define KCORE_BASE PAGE_OFFSET
> > #endif
>
> Ah, ok. What I'm thinking of is something like the following
> (untested
> and probably improperly thought out patch...):
>
> --- orig/fs/proc/kcore.c Sat Nov 2 18:58:18 2002
> +++ linux/fs/proc/kcore.c Fri Feb 7 19:48:35 2003
> @@ -99,7 +99,10 @@
> }
> #else /* CONFIG_KCORE_AOUT */
>
> +#ifndef KCORE_BASE
> #define KCORE_BASE PAGE_OFFSET
> +) < #define in_vmlist_region(x) ((x) >= VMALLOC_START && (x
> VMALLOC_END)
> +#endif
>
> #define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
>
> @@ -394,7 +397,7 @@
> tsz = buflen;
>
> while (buflen) {
> - if ((start >= VMALLOC_START) && (start < VMALLOC_END)) {
> + if (in_vmlist_region(start)) {
> char * elf_buf;
> struct vm_struct *m;
> unsigned long curstart = start;
>
> An architecture could then define KCORE_BASE and in_vmlist_region()
> alongside their VMALLOC_START definition if they needed to change
> them.

Looks pretty good. What's the motivation for the in_vmlist_region()?
I don't think that I need that for ia64 ... so it might be better to
have separate #ifdefs:

#ifndef KCORE_BASE
#define KCORE_BASE PAGE_OFFSET
endif
#ifndef in_vmlist_region
#define in_vmlist_region(x) ((x) >= VMALLOC_START && (x < VMALLOC_END))
#endif

-Tony

2003-02-08 06:06:14

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] Restore module support.

In message <[email protected]> you write:
> On Fri, Feb 07, 2003 at 07:26:50PM +1100, Rusty Russell wrote:
> > Actually, I must be really confused. I thought ARM was already
> > complete.
> >
> > Anyway, here's a version which simply does what the usermode one did,
> > if you decide to take the "fix it later" approach.
>
> Rusty, as I said, I already have a patch for this approach. Its the
> second approach that I'd prefer to get working.

Sure, but you complained that I hadn't made life easier for the arch
maintainers. I'm sorry if you feel this way, but I felt that the
least I could do, at the first complaint I was aware of, was to
provide you with a solution.

Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.