Hi,
This patch makes use of the character device hash and avoids introducing
unnecessary interfaces. I also moved the tty hack where it belongs.
As an example I converted the misc device to demonstrate how drivers can
make use of it.
Probably the most interesting part is how the open path stays short and
fast.
bye, Roman
Index: drivers/char/misc.c
===================================================================
RCS file: /usr/src/cvsroot/linux-2.5/drivers/char/misc.c,v
retrieving revision 1.1.1.7
diff -u -p -r1.1.1.7 misc.c
--- drivers/char/misc.c 15 Feb 2003 01:28:54 -0000 1.1.1.7
+++ drivers/char/misc.c 20 Mar 2003 00:38:23 -0000
@@ -60,11 +60,8 @@ static DECLARE_MUTEX(misc_sem);
* Assigned numbers, used for dynamic minors
*/
#define DYNAMIC_MINORS 64 /* like dynamic majors */
-static unsigned char misc_minors[DYNAMIC_MINORS / 8];
-#ifdef CONFIG_SGI_NEWPORT_GFX
extern void gfx_register(void);
-#endif
extern void streamable_init(void);
extern int rtc_DP8570A_init(void);
extern int rtc_MK48T08_init(void);
@@ -100,45 +97,21 @@ static int misc_read_proc(char *buf, cha
static int misc_open(struct inode * inode, struct file * file)
{
- int minor = minor(inode->i_rdev);
- struct miscdevice *c;
+ struct char_device *cdev = inode->i_cdev;
+ char modname[32];
int err = -ENODEV;
- struct file_operations *old_fops, *new_fops = NULL;
-
- down(&misc_sem);
-
- c = misc_list.next;
+ struct file_operations *fops;
- while ((c != &misc_list) && (c->minor != minor))
- c = c->next;
- if (c != &misc_list)
- new_fops = fops_get(c->fops);
- if (!new_fops) {
- char modname[20];
- up(&misc_sem);
- sprintf(modname, "char-major-%d-%d", MISC_MAJOR, minor);
- request_module(modname);
- down(&misc_sem);
- c = misc_list.next;
- while ((c != &misc_list) && (c->minor != minor))
- c = c->next;
- if (c == &misc_list || (new_fops = fops_get(c->fops)) == NULL)
- goto fail;
+ sprintf(modname, "char-major-%d-%d", MISC_MAJOR, MINOR(cdev->dev));
+ request_module(modname);
+ fops = fops_get(cdev->fops);
+ if (fops) {
+ err = 0;
+ fops_put(file->f_op);
+ file->f_op = fops;
+ if (fops->open)
+ err = fops->open(inode, file);
}
-
- err = 0;
- old_fops = file->f_op;
- file->f_op = new_fops;
- if (file->f_op->open) {
- err=file->f_op->open(inode,file);
- if (err) {
- fops_put(file->f_op);
- file->f_op = fops_get(old_fops);
- }
- }
- fops_put(old_fops);
-fail:
- up(&misc_sem);
return err;
}
@@ -167,34 +140,35 @@ static struct file_operations misc_fops
int misc_register(struct miscdevice * misc)
{
static devfs_handle_t devfs_handle, dir;
- struct miscdevice *c;
-
- if (misc->next || misc->prev)
- return -EBUSY;
- down(&misc_sem);
- c = misc_list.next;
-
- while ((c != &misc_list) && (c->minor != misc->minor))
- c = c->next;
- if (c != &misc_list) {
- up(&misc_sem);
- return -EBUSY;
- }
+ struct char_device *cdev;
if (misc->minor == MISC_DYNAMIC_MINOR) {
- int i = DYNAMIC_MINORS;
- while (--i >= 0)
- if ( (misc_minors[i>>3] & (1 << (i&7))) == 0)
- break;
- if (i<0)
- {
- up(&misc_sem);
- return -EBUSY;
+ int i;
+
+ for (i = DYNAMIC_MINORS; i > 0; i--) {
+ cdev = cdget(MKDEV(MISC_MAJOR, i));
+ down(&cdev->sem);
+ if (!cdev->fops)
+ goto found;
+ up(&cdev->sem);
+ cdput(cdev);
}
- misc->minor = i;
- }
- if (misc->minor < DYNAMIC_MINORS)
- misc_minors[misc->minor >> 3] |= 1 << (misc->minor & 7);
+ } else {
+ cdev = cdget(MKDEV(MISC_MAJOR, misc->minor));
+ down(&cdev->sem);
+ if (!cdev->fops)
+ goto found;
+ up(&cdev->sem);
+ }
+ return -EBUSY;
+
+found:
+ cdev->fops = misc->fops;
+ cdev->name = misc->name;
+ up(&cdev->sem);
+
+ down(&misc_sem);
+
if (!devfs_handle)
devfs_handle = devfs_mk_dir (NULL, "misc", NULL);
dir = strchr (misc->name, '/') ? NULL : devfs_handle;
@@ -204,14 +178,10 @@ int misc_register(struct miscdevice * mi
S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP,
misc->fops, NULL);
- /*
- * Add it to the front, so that later devices can "override"
- * earlier defaults
- */
misc->prev = &misc_list;
misc->next = misc_list.next;
- misc->prev->next = misc;
- misc->next->prev = misc;
+ misc_list.next->prev = misc;
+ misc_list.next = misc;
up(&misc_sem);
return 0;
}
@@ -228,18 +198,22 @@ int misc_register(struct miscdevice * mi
int misc_deregister(struct miscdevice * misc)
{
- int i = misc->minor;
- if (!misc->next || !misc->prev)
+ struct char_device *cdev;
+
+ cdev = cdget(MKDEV(MISC_MAJOR, misc->minor));
+ down(&cdev->sem);
+ if (cdev->fops != misc->fops) {
+ up(&cdev->sem);
return -EINVAL;
+ }
+ cdev->fops = NULL;
+ cdev->name = NULL;
+ up(&cdev->sem);
+
down(&misc_sem);
misc->prev->next = misc->next;
misc->next->prev = misc->prev;
- misc->next = NULL;
- misc->prev = NULL;
- devfs_unregister (misc->devfs_handle);
- if (i < DYNAMIC_MINORS && i>0) {
- misc_minors[i>>3] &= ~(1 << (misc->minor & 7));
- }
+ devfs_unregister(misc->devfs_handle);
up(&misc_sem);
return 0;
}
Index: drivers/char/tty_io.c
===================================================================
RCS file: /usr/src/cvsroot/linux-2.5/drivers/char/tty_io.c,v
retrieving revision 1.1.1.32
diff -u -p -r1.1.1.32 tty_io.c
--- drivers/char/tty_io.c 17 Mar 2003 22:56:57 -0000 1.1.1.32
+++ drivers/char/tty_io.c 19 Mar 2003 23:20:07 -0000
@@ -274,7 +274,7 @@ static int tty_set_ldisc(struct tty_stru
/* Eduardo Blanco <[email protected]> */
/* Cyrus Durgin <[email protected]> */
if (!(ldiscs[ldisc].flags & LDISC_FLAG_DEFINED)) {
- char modname [20];
+ char modname [32];
sprintf(modname, "tty-ldisc-%d", ldisc);
request_module (modname);
}
@@ -327,11 +327,15 @@ static int tty_set_ldisc(struct tty_stru
/*
* This routine returns a tty driver structure, given a device number
*/
-struct tty_driver *get_tty_driver(kdev_t device)
+static struct tty_driver *get_tty_driver(kdev_t device)
{
int major, minor;
struct tty_driver *p;
-
+#ifdef CONFIG_KMOD
+ char name[20] = "";
+again:
+#endif
+
minor = minor(device);
major = major(device);
@@ -344,6 +348,13 @@ struct tty_driver *get_tty_driver(kdev_t
continue;
return p;
}
+#ifdef CONFIG_KMOD
+ if (!name[0]) {
+ sprintf(name, "char-major-%d", MAJOR(major));
+ request_module(name);
+ goto again;
+ }
+#endif
return NULL;
}
@@ -2108,22 +2119,33 @@ EXPORT_SYMBOL(tty_unregister_device);
*/
int tty_register_driver(struct tty_driver *driver)
{
- int error;
+ int error = 0;
int i;
if (driver->flags & TTY_DRIVER_INSTALLED)
return 0;
+ down_tty_sem(0);
+ if (driver->major) {
+ struct char_device *cdev = cdget(MKDEV(driver->major, 0));
+ struct file_operations *fops = cdev->fops;
+ cdput(cdev);
+ if (fops == &tty_fops)
+ goto skip;
+ }
error = register_chrdev(driver->major, driver->name, &tty_fops);
- if (error < 0)
+ if (error < 0) {
+ up_tty_sem(0);
return error;
- else if(driver->major == 0)
+ } else if (driver->major == 0)
driver->major = error;
+skip:
if (!driver->put_char)
driver->put_char = tty_default_put_char;
list_add(&driver->tty_drivers, &tty_drivers);
+ up_tty_sem(0);
if ( !(driver->flags & TTY_DRIVER_NO_DEVFS) ) {
for(i = 0; i < driver->num; i++)
@@ -2147,6 +2169,7 @@ int tty_unregister_driver(struct tty_dri
if (*driver->refcount)
return -EBUSY;
+ down_tty_sem(0);
list_for_each_entry(p, &tty_drivers, tty_drivers) {
if (p == driver)
found++;
@@ -2154,17 +2177,22 @@ int tty_unregister_driver(struct tty_dri
othername = p->name;
}
- if (!found)
- return -ENOENT;
-
- if (othername == NULL) {
- retval = unregister_chrdev(driver->major, driver->name);
- if (retval)
- return retval;
+ if (found) {
+ if (othername) {
+ struct char_device *cdev = cdget(MKDEV(driver->major, 0));
+ cdev->name = othername;
+ cdput(cdev);
+ retval = 0;
+ } else
+ retval = unregister_chrdev(driver->major, driver->name);
} else
- register_chrdev(driver->major, othername, &tty_fops);
+ retval = -ENOENT;
- list_del(&driver->tty_drivers);
+ if (!retval)
+ list_del(&driver->tty_drivers);
+ up_tty_sem(0);
+ if (retval)
+ return retval;
/*
* Free the termios and termios_locked structures because
Index: fs/char_dev.c
===================================================================
RCS file: /usr/src/cvsroot/linux-2.5/fs/char_dev.c,v
retrieving revision 1.1.1.7
diff -u -p -r1.1.1.7 char_dev.c
--- fs/char_dev.c 17 Mar 2003 22:53:20 -0000 1.1.1.7
+++ fs/char_dev.c 19 Mar 2003 23:20:07 -0000
@@ -15,17 +15,8 @@
#include <linux/errno.h>
#include <linux/module.h>
#include <linux/smp_lock.h>
-#include <linux/devfs_fs_kernel.h>
-#ifdef CONFIG_KMOD
#include <linux/kmod.h>
-#include <linux/tty.h>
-
-/* serial module kmod load support */
-struct tty_driver *get_tty_driver(kdev_t device);
-#define isa_tty_dev(ma) (ma == TTY_MAJOR || ma == TTYAUX_MAJOR)
-#define need_serial(ma,mi) (get_tty_driver(mk_kdev(ma,mi)) == NULL)
-#endif
#define HASH_BITS 6
#define HASH_SIZE (1UL << HASH_BITS)
@@ -43,8 +34,10 @@ static void init_once(void *foo, kmem_ca
struct char_device *cdev = (struct char_device *) foo;
if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
- SLAB_CTOR_CONSTRUCTOR)
+ SLAB_CTOR_CONSTRUCTOR) {
memset(cdev, 0, sizeof(*cdev));
+ sema_init(&cdev->sem, 1);
+ }
}
void __init cdev_cache_init(void)
@@ -127,27 +120,22 @@ void cdput(struct char_device *cdev)
}
}
-struct device_struct {
- const char * name;
- struct file_operations * fops;
-};
-
-static rwlock_t chrdevs_lock = RW_LOCK_UNLOCKED;
-static struct device_struct chrdevs[MAX_CHRDEV];
int get_chrdev_list(char *page)
{
- int i;
- int len;
+ struct char_device *cdev;
+ dev_t dev;
+ int len, i;
len = sprintf(page, "Character devices:\n");
- read_lock(&chrdevs_lock);
+ spin_lock(&cdev_lock);
for (i = 0; i < MAX_CHRDEV ; i++) {
- if (chrdevs[i].fops) {
- len += sprintf(page+len, "%3d %s\n", i, chrdevs[i].name);
- }
+ dev = MKDEV(i, 0);
+ cdev = cdfind(dev, cdev_hashtable + hash(dev));
+ if (cdev && cdev->name)
+ len += sprintf(page+len, "%3d %s\n", i, cdev->name);
}
- read_unlock(&chrdevs_lock);
+ spin_unlock(&cdev_lock);
return len;
}
@@ -156,80 +144,92 @@ int get_chrdev_list(char *page)
Load the driver if needed.
Increment the reference count of module in question.
*/
-static struct file_operations * get_chrfops(unsigned int major, unsigned int minor)
+static struct file_operations *get_chrfops(struct char_device *cdev)
{
- struct file_operations *ret = NULL;
-
- if (!major || major >= MAX_CHRDEV)
- return NULL;
+ struct file_operations *fops;
- read_lock(&chrdevs_lock);
- ret = fops_get(chrdevs[major].fops);
- read_unlock(&chrdevs_lock);
+ down(&cdev->sem);
+ fops = fops_get(cdev->fops);
+ up(&cdev->sem);
+ if (fops || !MINOR(cdev->dev))
+ return fops;
+
+ cdev = cdget(MKDEV(MAJOR(cdev->dev), 0));
+ down(&cdev->sem);
+ fops = fops_get(cdev->fops);
+ up(&cdev->sem);
#ifdef CONFIG_KMOD
- if (ret && isa_tty_dev(major)) {
- lock_kernel();
- if (need_serial(major,minor)) {
- /* Force request_module anyway, but what for? */
- fops_put(ret);
- ret = NULL;
- }
- unlock_kernel();
- }
- if (!ret) {
+ if (!fops) {
char name[20];
- sprintf(name, "char-major-%d", major);
- request_module(name);
- read_lock(&chrdevs_lock);
- ret = fops_get(chrdevs[major].fops);
- read_unlock(&chrdevs_lock);
+ sprintf(name, "char-major-%d", MAJOR(cdev->dev));
+ request_module(name);
+ down(&cdev->sem);
+ fops = fops_get(cdev->fops);
+ up(&cdev->sem);
}
#endif
- return ret;
+ cdput(cdev);
+ return fops;
}
-
+
int register_chrdev(unsigned int major, const char * name, struct file_operations *fops)
{
+ struct char_device *cdev;
+
if (major == 0) {
- write_lock(&chrdevs_lock);
for (major = MAX_CHRDEV-1; major > 0; major--) {
- if (chrdevs[major].fops == NULL) {
- chrdevs[major].name = name;
- chrdevs[major].fops = fops;
- write_unlock(&chrdevs_lock);
+ cdev = cdget(MKDEV(major, 0));
+ down(&cdev->sem);
+ if (!cdev->fops) {
+ cdev->fops = fops;
+ cdev->name = name;
+ up(&cdev->sem);
return major;
}
+ up(&cdev->sem);
+ cdput(cdev);
}
- write_unlock(&chrdevs_lock);
return -EBUSY;
}
+
if (major >= MAX_CHRDEV)
return -EINVAL;
- write_lock(&chrdevs_lock);
- if (chrdevs[major].fops && chrdevs[major].fops != fops) {
- write_unlock(&chrdevs_lock);
+
+ cdev = cdget(MKDEV(major, 0));
+ down(&cdev->sem);
+ if (cdev->fops) {
+ up(&cdev->sem);
+ cdput(cdev);
return -EBUSY;
}
- chrdevs[major].name = name;
- chrdevs[major].fops = fops;
- write_unlock(&chrdevs_lock);
+ cdev->fops = fops;
+ cdev->name = name;
+ up(&cdev->sem);
return 0;
}
int unregister_chrdev(unsigned int major, const char * name)
{
+ struct char_device *cdev;
+ int res;
+
if (major >= MAX_CHRDEV)
return -EINVAL;
- write_lock(&chrdevs_lock);
- if (!chrdevs[major].fops || strcmp(chrdevs[major].name, name)) {
- write_unlock(&chrdevs_lock);
- return -EINVAL;
+
+ cdev = cdget(MKDEV(major, 0));
+ down(&cdev->sem);
+ res = -EINVAL;
+ if (cdev->fops && !strcmp(cdev->name, name)) {
+ cdev->fops = NULL;
+ cdev->name = NULL;
+ cdput(cdev);
+ res = 0;
}
- chrdevs[major].name = NULL;
- chrdevs[major].fops = NULL;
- write_unlock(&chrdevs_lock);
- return 0;
+ up(&cdev->sem);
+
+ cdput(cdev);
+ return res;
}
/*
@@ -237,14 +237,16 @@ int unregister_chrdev(unsigned int major
*/
int chrdev_open(struct inode * inode, struct file * filp)
{
+ struct file_operations *fops;
int ret = -ENODEV;
- filp->f_op = get_chrfops(major(inode->i_rdev), minor(inode->i_rdev));
- if (filp->f_op) {
+ fops = get_chrfops(inode->i_cdev);
+ if (fops) {
ret = 0;
- if (filp->f_op->open != NULL) {
+ filp->f_op = fops;
+ if (fops->open) {
lock_kernel();
- ret = filp->f_op->open(inode,filp);
+ ret = fops->open(inode,filp);
unlock_kernel();
}
}
@@ -262,11 +264,13 @@ struct file_operations def_chr_fops = {
const char * cdevname(kdev_t dev)
{
- static char buffer[32];
- const char * name = chrdevs[major(dev)].name;
+ static char buffer[64];
+ struct char_device *cdev;
+
+ cdev = cdget(kdev_t_to_nr(dev));
+ down(&cdev->sem);
+ sprintf(buffer, "%s(%d,%d)", cdev->name ? cdev->name : "unknown-char", major(dev), minor(dev));
+ up(&cdev->sem);
- if (!name)
- name = "unknown-char";
- sprintf(buffer, "%s(%d,%d)", name, major(dev), minor(dev));
return buffer;
}
Index: include/linux/fs.h
===================================================================
RCS file: /usr/src/cvsroot/linux-2.5/include/linux/fs.h,v
retrieving revision 1.1.1.53
diff -u -p -r1.1.1.53 fs.h
--- include/linux/fs.h 17 Mar 2003 22:54:06 -0000 1.1.1.53
+++ include/linux/fs.h 19 Mar 2003 23:20:07 -0000
@@ -331,8 +331,11 @@ struct address_space {
struct char_device {
struct list_head hash;
+ struct file_operations *fops;
+ const char * name;
atomic_t count;
dev_t dev;
+ struct semaphore sem;
};
struct block_device {
Hi,
On Thu, 20 Mar 2003, Roman Zippel wrote:
> This patch makes use of the character device hash and avoids introducing
> unnecessary interfaces. I also moved the tty hack where it belongs.
> As an example I converted the misc device to demonstrate how drivers can
> make use of it.
> Probably the most interesting part is how the open path stays short and
> fast.
Here is a more detailed explanation of the patch and how it compares to
Andries patch.
The open path for most devices is quite simple and tries to avoid a lookup
by using the cached value in inode->i_cdev, so it simply has to get
inode->i_cdev->fops. Only if that fails, open tries the slow path looking
up the major device. Andries removes all the infrastructure for this with
the first patch and replaces it with a lookup which can be quite expensive
(e.g. when a major device has a lot of minor devices registered).
Further he introduces a new function register_chrdev_region(), which is
only needed by the tty code and rather hides the problem than solves
it. So the complexity should stay in the tty layer (and has to be fixed
there), than forcing it into the generic layer.
The misc device example is interesting that misc_open() now is only called
if no misc device is registered for that minor, so it only needs to
request a module. The remaining functionality is basically to export
information about misc devices to userspace and even part of that could be
moved into the generic part (e.g. part of the driver model, but that
depends a lot on what the tty layer needs).
Overall the generic code is 32bit dev_t safe (+ minor fixes/
optimizations). A flag still needs to be added, whether a driver can
handle more than 256 minor numbers, but this patch helps drivers to manage
them without huge tables (this latter part is also missing in Andries
patch).
bye, Roman
From: Roman Zippel <[email protected]>
> Here is a more detailed explanation of the patch
Thanks!
However, I am unconvinced.
(i)
There was some unused code, and you decide to start using that
in order to speed up the open() of a chardev. Is that urgent?
Is the speed of opening a chardev a bottleneck?
I think something is wrong with this philosophy.
Look at what happens on open("/dev/ttyS1") - there is a hash
lookup of the "dev", then a hash lookup of the "ttyS1", then
we find that this is device (4,65) and do a hash lookup for
(4,65). You want to eliminate this last hash lookup by building
infrastructure to cache it? That is possible of course.
Some extra code, an extra slab cache, a field i_cdev in struct inode,
a semaphore, refcounting..
I have not benchmarked anything but it sure feels like a lot of
machinery to avoid a simple hash lookup.
I very much doubt that Al had speeding up the open() in mind when
he wrote that (so far unused) infrastructure.
(ii)
> Further he introduces a new function register_chrdev_region(),
> which is only needed by the tty code and rather hides the problem
> than solves it.
What does one want? A driver announces the device number regions
that it wants to cover. Simple and straightforward.
Hardly a new idea. How is this done for block devices?
Using blk_register_region(). How is register_chrdev_region()
hiding problems? It eliminates the tty kludges that you only
move to a different file.
[Al muttered for an entirely different reason:
He wants to specify the region like "dev_t dev, unsigned long range",
where I left the parts of dev separate. I plan (eventually, there is
no hurry) to turn the dev_t here into a kdev_t since that is much
faster, and once that is done both blk_register_region() and
register_chrdev_region() can get a kdev_t as first parameter.]
(iii)
> this patch helps drivers to manage them without huge tables
> (this latter part is also missing in Andries patch).
I am not sure I understand. Where are these huge tables?
And how did you remove them?
Andries
Hi,
On Thu, 20 Mar 2003 [email protected] wrote:
> (i)
> There was some unused code, and you decide to start using that
> in order to speed up the open() of a chardev. Is that urgent?
> Is the speed of opening a chardev a bottleneck?
Currently it isn't, but it shouldn't become one?
I'm unsure how your code will scale. It depends on how that code will be
used. If drivers register a lot of devices, your lookup function has to
scan a possibly very long list of minor devices and that function is
difficult to optimize. If drivers want to avoid this they have to
implement their own lookup function.
One other major reason for the "unused" code is to convert the 16/32/64bit
dev_t as early as possible to either struct block_device/struct
char_device, so the kernel mostly doesn't care about the dev_t resolution,
the character device core would match here the behaviour of the block
device core.
> > Further he introduces a new function register_chrdev_region(),
> > which is only needed by the tty code and rather hides the problem
> > than solves it.
>
> What does one want? A driver announces the device number regions
> that it wants to cover. Simple and straightforward.
> Hardly a new idea. How is this done for block devices?
> Using blk_register_region(). How is register_chrdev_region()
> hiding problems? It eliminates the tty kludges that you only
> move to a different file.
char devices don't have partitions, so you hardly need regions. The
problem with the tty layer is that the console and the serial devices
should have different majors.
Even for block devices blk_register_region() is not the preferred
interface, you should use alloc_disk/add_disk instead. This will make it
easier to assign dynamic device numbers later.
> (iii)
> > this patch helps drivers to manage them without huge tables
> > (this latter part is also missing in Andries patch).
>
> I am not sure I understand. Where are these huge tables?
> And how did you remove them?
See the misc device example. It doesn't have a table, but the list is now
only needed to generate /proc/misc. As soon as character devices are
better integrated into the driver model, even this list is not needed
anymore. This means for simple character devices, we can easily add a
alloc_chardev/add_chardev interface similiar to block devices.
bye, Roman
On Fri, Mar 21, 2003 at 12:03:57AM +0100, Roman Zippel wrote:
> I'm unsure how your code will scale. It depends on how that code will be
> used. If drivers register a lot of devices, your lookup function has to
> scan a possibly very long list of minor devices and that function is
> difficult to optimize.
And then we grab the BKL :(
Hint, optimizing the open() path for char devices is not anything we
will probably be doing in 2.6, due to the BKL usage there. It's also
not anything anyone has seen on any known benchmarks as a point of
contention, so I would not really worry about this for now.
For 2.7, when we want to drop the BKL, then we can worry about this.
> char devices don't have partitions, so you hardly need regions. The
> problem with the tty layer is that the console and the serial devices
> should have different majors.
There are a number of char drivers that have "regions". The tty layer
support them, and the usb core supports them as two examples. I'm sure
there are others. Personally, I like the symmetry with the block device
function the way Andries did it.
> Even for block devices blk_register_region() is not the preferred
> interface, you should use alloc_disk/add_disk instead. This will make it
> easier to assign dynamic device numbers later.
True, but dynamic device numbers can be built on top of the *_region()
calls as it is today. Anyway, dynamic numbers are for 2.7 :)
> > I am not sure I understand. Where are these huge tables?
> > And how did you remove them?
>
> See the misc device example. It doesn't have a table, but the list is now
> only needed to generate /proc/misc. As soon as character devices are
> better integrated into the driver model, even this list is not needed
> anymore. This means for simple character devices, we can easily add a
> alloc_chardev/add_chardev interface similiar to block devices.
No, I don't see /proc/misc going away due to the driver model, I imagine
there are too many users of it to disappear. Also, the driver model
doesn't care a thing about major/minor numbers so I don't understand how
you think it can help in this situation.
thanks,
greg k-h
>
> bye, Roman
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
Hi,
On Thu, 20 Mar 2003, Greg KH wrote:
> On Fri, Mar 21, 2003 at 12:03:57AM +0100, Roman Zippel wrote:
> > I'm unsure how your code will scale. It depends on how that code will be
> > used. If drivers register a lot of devices, your lookup function has to
> > scan a possibly very long list of minor devices and that function is
> > difficult to optimize.
>
> And then we grab the BKL :(
This is currently required for either implementation and needs to be moved
to the driver.
> Hint, optimizing the open() path for char devices is not anything we
> will probably be doing in 2.6, due to the BKL usage there. It's also
> not anything anyone has seen on any known benchmarks as a point of
> contention, so I would not really worry about this for now.
The BKL also shouldn't be a reason to make it unnecessary expensive? I
don't understand your argument.
> > char devices don't have partitions, so you hardly need regions. The
> > problem with the tty layer is that the console and the serial devices
> > should have different majors.
>
> There are a number of char drivers that have "regions". The tty layer
> support them, and the usb core supports them as two examples. I'm sure
> there are others. Personally, I like the symmetry with the block device
> function the way Andries did it.
Every single call to usb_register_dev in 2.5.65 uses exactly 1 minor
number. Block device drivers need regions because they have partitions
and we need to find out which device a partition belongs to. Where have
character devices such requirements?
> > See the misc device example. It doesn't have a table, but the list is now
> > only needed to generate /proc/misc. As soon as character devices are
> > better integrated into the driver model, even this list is not needed
> > anymore. This means for simple character devices, we can easily add a
> > alloc_chardev/add_chardev interface similiar to block devices.
>
> No, I don't see /proc/misc going away due to the driver model, I imagine
> there are too many users of it to disappear. Also, the driver model
> doesn't care a thing about major/minor numbers so I don't understand how
> you think it can help in this situation.
I didn't mean that /proc/misc goes away, I meant the misc_list in misc.c.
They could be other ways to generate /proc/misc.
/proc/devices, /proc/misc, /proc/tty/drivers, ... is currently mostly
needed to generate device nodes for dynamic device numbers. This badly
needs a more generic mechanism.
bye, Roman
On Fri, Mar 21, 2003 at 10:08:43AM +0100, Roman Zippel wrote:
> Hi,
>
> On Thu, 20 Mar 2003, Greg KH wrote:
>
> > On Fri, Mar 21, 2003 at 12:03:57AM +0100, Roman Zippel wrote:
> > > I'm unsure how your code will scale. It depends on how that code will be
> > > used. If drivers register a lot of devices, your lookup function has to
> > > scan a possibly very long list of minor devices and that function is
> > > difficult to optimize.
> >
> > And then we grab the BKL :(
>
> This is currently required for either implementation and needs to be moved
> to the driver.
Heh, that is definiatly a 2.7 thing, too many drivers rely on this
functionality :(
> > Hint, optimizing the open() path for char devices is not anything we
> > will probably be doing in 2.6, due to the BKL usage there. It's also
> > not anything anyone has seen on any known benchmarks as a point of
> > contention, so I would not really worry about this for now.
>
> The BKL also shouldn't be a reason to make it unnecessary expensive? I
> don't understand your argument.
I was trying to point out that pre-mature optimiziation of this code
should not be done before we get rid of the most expensive portion, the
bkl. That's all.
> > > char devices don't have partitions, so you hardly need regions. The
> > > problem with the tty layer is that the console and the serial devices
> > > should have different majors.
> >
> > There are a number of char drivers that have "regions". The tty layer
> > support them, and the usb core supports them as two examples. I'm sure
> > there are others. Personally, I like the symmetry with the block device
> > function the way Andries did it.
>
> Every single call to usb_register_dev in 2.5.65 uses exactly 1 minor
> number. Block device drivers need regions because they have partitions
> and we need to find out which device a partition belongs to. Where have
> character devices such requirements?
Oh yeah, I forgot I had cleaned up that api to not reserve minors in
chunks, sorry. It used to do that :)
So only tty drivers currently do this. But that might just be because
it's pretty hard to get a range of minors right now, as the api hasn't
been present. Once we expand the range, I bet it will get quite common
(most character drivers only want from 1-16 minors normally.)
> > > only needed to generate /proc/misc. As soon as character devices are
> > > better integrated into the driver model, even this list is not needed
> > > anymore. This means for simple character devices, we can easily add a
> > > alloc_chardev/add_chardev interface similiar to block devices.
> >
> > No, I don't see /proc/misc going away due to the driver model, I imagine
> > there are too many users of it to disappear. Also, the driver model
> > doesn't care a thing about major/minor numbers so I don't understand how
> > you think it can help in this situation.
>
> I didn't mean that /proc/misc goes away, I meant the misc_list in misc.c.
> They could be other ways to generate /proc/misc.
> /proc/devices, /proc/misc, /proc/tty/drivers, ... is currently mostly
> needed to generate device nodes for dynamic device numbers. This badly
> needs a more generic mechanism.
I agree. But again, 2.7. Remember our feature freeze?
thanks,
greg k-h
Hi,
On Fri, 21 Mar 2003, Greg KH wrote:
> > The BKL also shouldn't be a reason to make it unnecessary expensive? I
> > don't understand your argument.
>
> I was trying to point out that pre-mature optimiziation of this code
> should not be done before we get rid of the most expensive portion, the
> bkl. That's all.
Can we at least note, that my patch has a performance advantage?
We can still deal with the BKL later.
I hope we can agree, that we should avoid adding premature new
interfaces, which can be expensive later?
> So only tty drivers currently do this. But that might just be because
> it's pretty hard to get a range of minors right now, as the api hasn't
> been present. Once we expand the range, I bet it will get quite common
> (most character drivers only want from 1-16 minors normally.)
There are a few options:
1. Drivers can implement that themselves:
a) The driver allocates the major itself and opens the real minor device
in its open function, (e.g. see the misc driver example). Especially tape
drivers have to do this anyway, because they encoded the open mode in
higher bits, so regions won't help you here at all.
b) The driver allocates the major itself and installs the file_operations
directly in the char_device, e.g. that is something you might want to do
in the usb driver:
register_usb_device(...)
{
...
cdev = cdget(dev);
down(&cdev->sem);
if (cdev->fops)
...;
cdev->fops = fops;
up(&cdev->sem);
}
(see the misc driver again for a detailed example.)
2. If it should be really needed, we can add simple region support by
adding a minor_shift argument to the major device, so get_chrfops() would
first try (major, minor & ((1 << shift) - 1)), before it tries directly
(major, 0).
So I really don't see why we should support arbitrary regions, since
currently nobody needs it and if someone should need it in the future, he
can easily do it himself.
> > /proc/devices, /proc/misc, /proc/tty/drivers, ... is currently mostly
> > needed to generate device nodes for dynamic device numbers. This badly
> > needs a more generic mechanism.
>
> I agree. But again, 2.7. Remember our feature freeze?
I agree too and nowhere in my patch did I change something about this.
bye, Roman
On Sat, Mar 22, 2003 at 02:02:00PM +0100, Roman Zippel wrote:
> > So only tty drivers currently do this. But that might just be because
> > it's pretty hard to get a range of minors right now, as the api hasn't
> > been present. Once we expand the range, I bet it will get quite common
> > (most character drivers only want from 1-16 minors normally.)
>
> There are a few options:
> 1. Drivers can implement that themselves:
Yeah, but as I know, it's a big pain in the butt. Let's make it easy to
do this, don't make writing a driver tougher than it has to be (it's
already much harder than it used to be.) Andries's patch makes it easy,
which is a good thing in my book.
> a) The driver allocates the major itself and opens the real minor device
> in its open function, (e.g. see the misc driver example). Especially tape
> drivers have to do this anyway, because they encoded the open mode in
> higher bits, so regions won't help you here at all.
> b) The driver allocates the major itself and installs the file_operations
> directly in the char_device, e.g. that is something you might want to do
> in the usb driver:
>
> register_usb_device(...)
> {
> ...
> cdev = cdget(dev);
> down(&cdev->sem);
> if (cdev->fops)
> ...;
> cdev->fops = fops;
> up(&cdev->sem);
> }
> (see the misc driver again for a detailed example.)
Look at drivers/usb/core/file.c::usb_open(), it does much the same
thing. Well, functionally the same, not identical in any way :)
thanks,
greg k-h
On Sun, Mar 23, 2003 at 12:19:56AM -0800, Greg KH wrote:
> Look at drivers/usb/core/file.c::usb_open(), it does much the same
> thing. Well, functionally the same, not identical in any way :)
And sound (well, not actually ranges), input, v4l, tty, etc..
->i_cdev together with char device probes would have solved that nicely
and without the linear list serach in ->open that we got now, which will
bite us much more once 32bit dev_ts are actually used.
On Fri, Mar 21, 2003 at 12:03:57AM +0100, Roman Zippel wrote:
> char devices don't have partitions, so you hardly need regions. The
> problem with the tty layer is that the console and the serial devices
> should have different majors.
That;s not exactly true. A tradition major device is nothing but a
region with a fixed size, and we need to get rid of this major/minor
thinking. Converting a dev_t to struct char_device * / struct block_device *
early is the way we wan't to go for that. It helped the block layer
a lot..
On Thu, Mar 20, 2003 at 05:24:55PM -0800, Greg KH wrote:
> On Fri, Mar 21, 2003 at 12:03:57AM +0100, Roman Zippel wrote:
> > I'm unsure how your code will scale. It depends on how that code will be
> > used. If drivers register a lot of devices, your lookup function has to
> > scan a possibly very long list of minor devices and that function is
> > difficult to optimize.
>
> And then we grab the BKL :(
You're thinking the wrnog way around. Locking reduction / splitting is
trivial, getting the algorithms right is the hard part. Getting rid
of BKL in chardev open is a matter of simple search and replace and I expect
it to happen before 2.6.
> There are a number of char drivers that have "regions". The tty layer
> support them, and the usb core supports them as two examples. I'm sure
> there are others. Personally, I like the symmetry with the block device
> function the way Andries did it.
No, Andries did not do it symmetric to the block devices.
> > Even for block devices blk_register_region() is not the preferred
> > interface, you should use alloc_disk/add_disk instead. This will make it
> > easier to assign dynamic device numbers later.
>
> True, but dynamic device numbers can be built on top of the *_region()
> calls as it is today. Anyway, dynamic numbers are for 2.7 :)
Dynamic numbers are there today and have been for ages.
> No, I don't see /proc/misc going away due to the driver model,
I do see it going away. Dito for /proc/devices. They already are
inaccurate for those midlevel drivers that partition their major number
themselves and having the ranges properly exported like it's already
done for block devices is the proper replacement. Yes, this breaks
userspace and we'll have to live with it.
Hi,
On Sun, 23 Mar 2003, Greg KH wrote:
> > There are a few options:
> > 1. Drivers can implement that themselves:
>
> Yeah, but as I know, it's a big pain in the butt. Let's make it easy to
> do this, don't make writing a driver tougher than it has to be (it's
> already much harder than it used to be.) Andries's patch makes it easy,
> which is a good thing in my book.
Andries' patch doesn't help at all and only makes things worse. Only very
few drivers should have to deal with the major/minor business. Most
drivers should just do add_serial_device/add_tape_device/... and these
function will do the right thing (e.g. announce the new device via sysfs).
> > register_usb_device(...)
> > {
> > ...
> > cdev = cdget(dev);
> > down(&cdev->sem);
> > if (cdev->fops)
> > ...;
> > cdev->fops = fops;
> > up(&cdev->sem);
> > }
> > (see the misc driver again for a detailed example.)
>
> Look at drivers/usb/core/file.c::usb_open(), it does much the same
> thing. Well, functionally the same, not identical in any way :)
No, with my patch usb_open() and usb_minors[] could go away completely,
you just setup everything in usb_register_dev() and the rest is done for
you.
With Andries' patch you will not get rid of this.
bye, Roman
Hi,
On Sun, 23 Mar 2003, Christoph Hellwig wrote:
> That;s not exactly true. A tradition major device is nothing but a
> region with a fixed size, and we need to get rid of this major/minor
> thinking. Converting a dev_t to struct char_device * / struct block_device *
> early is the way we wan't to go for that. It helped the block layer
> a lot..
I basically agree, I only want to note that the block layer only has to
deal with disks, where we have different types of character devices. So
having one major per serial/tape/tty/... devices might not be necessary,
but it could makes things bit easier. In any case the actual drivers
should not see any of this.
bye, Roman
On Sun, Mar 23, 2003 at 04:05:24PM +0100, Roman Zippel wrote:
> > Yeah, but as I know, it's a big pain in the butt. Let's make it easy to
> > do this, don't make writing a driver tougher than it has to be (it's
> > already much harder than it used to be.) Andries's patch makes it easy,
> > which is a good thing in my book.
>
> Andries' patch doesn't help at all and only makes things worse. Only very
> few drivers should have to deal with the major/minor business. Most
> drivers should just do add_serial_device/add_tape_device/... and these
> function will do the right thing (e.g. announce the new device via sysfs).
Yupp. The add_serial_device/add_tape_device/ would also have the benefit
that they could keept track of devfs like the gendisk handling when
GENHD_FL_DEVFS is set by the drivers - lots of cruft can be remove from
drivers and midlayers incrementally.