Hi,
Here's a simple patch against 2.5.52 that adds the kobject structure to
struct mapped_device. I did this for two reasons:
- kobject provides proper reference counting for a structure
(not saying that struct mapped_device didn't get this correct,
but it's nice to use the core for these things).
- This lets us tie into the block layer better to describe the
dm devices.
I originally hacked something together that added dm attributes to the
block devices themselves, but that wasn't proper, as it forced the dm
code to dig into the gendisk core too intrusively. With this patch, it
is very easy to make a dm object be a child of the proper struct gendisk
object (which it already is logically, but that representation isn't
exported to userspace yet.) Then, the dm specific attributes of the
gendisk object will show up in sysfs properly, under the associated
block device.
Anyway, Joe, please add this to your set of patches to send on to Linus.
Oh, and why isn't struct mapped_device declared in dm.h? If it was,
dm_get and dm_put could be inlined, along with a few other potential
cleanups.
thanks,
greg k-h
# add kobj support to struct mapped_device
diff -Nru a/drivers/block/genhd.c b/drivers/block/genhd.c
--- a/drivers/block/genhd.c Wed Dec 18 10:39:48 2002
+++ b/drivers/block/genhd.c Wed Dec 18 10:39:48 2002
@@ -475,3 +475,4 @@
EXPORT_SYMBOL(bdev_read_only);
EXPORT_SYMBOL(set_device_ro);
EXPORT_SYMBOL(set_disk_ro);
+EXPORT_SYMBOL(block_subsys);
diff -Nru a/drivers/md/dm.c b/drivers/md/dm.c
--- a/drivers/md/dm.c Wed Dec 18 10:39:48 2002
+++ b/drivers/md/dm.c Wed Dec 18 10:39:48 2002
@@ -41,7 +41,7 @@
struct mapped_device {
struct rw_semaphore lock;
- atomic_t holders;
+ struct kobject kobj;
unsigned long flags;
@@ -60,11 +60,14 @@
*/
struct dm_table *map;
};
+#define to_md(obj) container_of(obj, struct mapped_device, kobj)
#define MIN_IOS 256
static kmem_cache_t *_io_cache;
static mempool_t *_io_pool;
+struct subsystem dm_subsys;
+
static __init int local_init(void)
{
int r;
@@ -94,6 +97,7 @@
if (!_major)
_major = r;
+ subsystem_register(&dm_subsys);
return 0;
}
@@ -106,7 +110,8 @@
DMERR("devfs_unregister_blkdev failed");
_major = 0;
-
+ subsystem_unregister(&dm_subsys);
+
DMINFO("cleaned up");
}
@@ -553,7 +558,8 @@
DMWARN("allocating minor %d.", minor);
memset(md, 0, sizeof(*md));
init_rwsem(&md->lock);
- atomic_set(&md->holders, 1);
+ md->kobj.subsys = &dm_subsys;
+ kobject_init(&md->kobj);
md->queue.queuedata = md;
blk_queue_make_request(&md->queue, dm_request);
@@ -637,17 +643,30 @@
void dm_get(struct mapped_device *md)
{
- atomic_inc(&md->holders);
+ kobject_get(&md->kobj);
}
void dm_put(struct mapped_device *md)
{
- if (atomic_dec_and_test(&md->holders)) {
- DMWARN("destroying md");
- __unbind(md);
- free_dev(md);
- }
+ kobject_put(&md->kobj);
+}
+
+static void dm_release(struct kobject *kobj)
+{
+ struct mapped_device *md = to_md(kobj);
+
+ DMWARN("destroying md");
+ __unbind(md);
+ free_dev(md);
}
+
+extern struct subsystem block_subsys;
+
+struct subsystem dm_subsys = {
+ .kobj = { .name = "dm", .parent = &block_subsys.kobj },
+ .release = dm_release,
+};
+
/*
* Requeue the deferred bios by calling generic_make_request.
On Wed, Dec 18, 2002 at 10:43:07AM -0800, Greg KH wrote:
>
> Here's a simple patch against 2.5.52 that adds the kobject structure to
> struct mapped_device.
Sorry, that patch didn't apply against the latest 2.5-bk tree. Here's
an updated version.
thanks,
greg k-h
===== drivers/block/genhd.c 1.61 vs edited =====
--- 1.61/drivers/block/genhd.c Wed Dec 4 16:07:17 2002
+++ edited/drivers/block/genhd.c Wed Dec 18 10:53:26 2002
@@ -475,3 +475,4 @@
EXPORT_SYMBOL(bdev_read_only);
EXPORT_SYMBOL(set_device_ro);
EXPORT_SYMBOL(set_disk_ro);
+EXPORT_SYMBOL(block_subsys);
===== drivers/md/dm.c 1.14 vs edited =====
--- 1.14/drivers/md/dm.c Mon Dec 16 01:42:31 2002
+++ edited/drivers/md/dm.c Wed Dec 18 10:54:10 2002
@@ -40,7 +40,7 @@
struct mapped_device {
struct rw_semaphore lock;
- atomic_t holders;
+ struct kobject kobj;
unsigned long flags;
@@ -65,9 +65,13 @@
mempool_t *io_pool;
};
+#define to_md(obj) container_of(obj, struct mapped_device, kobj)
+
#define MIN_IOS 256
static kmem_cache_t *_io_cache;
+struct subsystem dm_subsys;
+
static __init int local_init(void)
{
int r;
@@ -89,6 +93,7 @@
if (!_major)
_major = r;
+ subsystem_register(&dm_subsys);
return 0;
}
@@ -100,7 +105,8 @@
DMERR("devfs_unregister_blkdev failed");
_major = 0;
-
+ subsystem_unregister(&dm_subsys);
+
DMINFO("cleaned up");
}
@@ -603,7 +609,8 @@
DMWARN("allocating minor %d.", minor);
memset(md, 0, sizeof(*md));
init_rwsem(&md->lock);
- atomic_set(&md->holders, 1);
+ md->kobj.subsys = &dm_subsys;
+ kobject_init(&md->kobj);
md->queue.queuedata = md;
blk_queue_make_request(&md->queue, dm_request);
@@ -696,17 +703,30 @@
void dm_get(struct mapped_device *md)
{
- atomic_inc(&md->holders);
+ kobject_get(&md->kobj);
}
void dm_put(struct mapped_device *md)
{
- if (atomic_dec_and_test(&md->holders)) {
- DMWARN("destroying md");
- __unbind(md);
- free_dev(md);
- }
+ kobject_put(&md->kobj);
}
+
+static void dm_release(struct kobject *kobj)
+{
+ struct mapped_device *md = to_md(kobj);
+
+ DMWARN("destroying md");
+ __unbind(md);
+ free_dev(md);
+}
+
+extern struct subsystem block_subsys;
+
+struct subsystem dm_subsys = {
+ .kobj = { .name = "dm", .parent = &block_subsys.kobj },
+ .release = dm_release,
+};
+
/*
* Requeue the deferred bios by calling generic_make_request.
Greg,
This looks like patch 1 of many, since it doesn't actually export any
attributes through sysfs yet. Can you please give me more of an idea
of what the attributes are that you want to export ? Are you trying
to move the dmfs functionality into sysfs ?
I won't accept this patch on it's own, but am sure what you are trying
to do is the right thing, so will probably have no objections when the
rest of the patches arrive.
On Wed, Dec 18, 2002 at 10:43:07AM -0800, Greg KH wrote:
> Oh, and why isn't struct mapped_device declared in dm.h? If it was,
> dm_get and dm_put could be inlined, along with a few other potential
> cleanups.
I'm try to keep implementation details out of header files. dm_get()
and dm_put() are not performance critical so I see no need to inline them.
> diff -Nru a/drivers/block/genhd.c b/drivers/block/genhd.c
> --- a/drivers/block/genhd.c Wed Dec 18 10:39:48 2002
> +++ b/drivers/block/genhd.c Wed Dec 18 10:39:48 2002
> @@ -475,3 +475,4 @@
> EXPORT_SYMBOL(bdev_read_only);
> EXPORT_SYMBOL(set_device_ro);
> EXPORT_SYMBOL(set_disk_ro);
> +EXPORT_SYMBOL(block_subsys);
> diff -Nru a/drivers/md/dm.c b/drivers/md/dm.c
> --- a/drivers/md/dm.c Wed Dec 18 10:39:48 2002
> +++ b/drivers/md/dm.c Wed Dec 18 10:39:48 2002
...
> +
> +extern struct subsystem block_subsys;
> +
Please declare this in a suitable header like genhd.h rather than in
dm.c. The above two snippets should then be pushed seperately from
the dm patches.
- Joe
On Thu, Dec 19, 2002 at 10:55:30AM +0000, Joe Thornber wrote:
> Greg,
>
> This looks like patch 1 of many, since it doesn't actually export any
> attributes through sysfs yet.
Correct.
> Can you please give me more of an idea
> of what the attributes are that you want to export ? Are you trying
> to move the dmfs functionality into sysfs ?
Only the parts of dmfs that make sense to :)
Right now every struct gendisk shows up in sysfs under the block/
directory, including every struct mapped_device that is created through
the dm code. Now every mapped_device that already exists in sysfs,
exports the major/minor number (in the dev file), and other gendisk
specific attributes. Why not just add the mapped_device specific
attributes of this gendisk object into the same directory (or one lower
to try to partition things a bit.) That would mean taking the files
that were going to be in dmfs, and placing them into this directory
(like status, suspend, and others). Some of them might have to be split
up into multiple files to keep the "one value per file" rule, but that
shouldn't be very difficult.
Here's an ascii picture which probably makes more sense:
/sys/block/
|-- fd0
| |-- dev
| |-- range
| |-- size
| `-- stat
|-- dm-1
| |-- dev
| |-- dm
| | |-- device0 -> ../../devices/pci0/00:02.5/ide0/0.0
| | |-- device1 -> ../../devices/pci0/00:02.5/ide1/1.0
| | |-- status
| | |-- suspend
| | `-- table
| |-- range
| |-- size
| `-- stat
Look reasonable?
And yes, the deviceX files are symlinks to the struct block_device that
are controlled by this mapped_device, which I think is a easy visual
clue as to what is going on.
I know, this doesn't address the issue of creating these mapped_device
structures in the first place with either an ioctl, or a "special file",
but let's try to export the values and relationships that we already
have. One step at a time...
> I won't accept this patch on it's own, but am sure what you are trying
> to do is the right thing, so will probably have no objections when the
> rest of the patches arrive.
That's fair enough, I'll work on getting the rest of the patches out in
a fairly timely manner, considering the holiday season...
> On Wed, Dec 18, 2002 at 10:43:07AM -0800, Greg KH wrote:
> > Oh, and why isn't struct mapped_device declared in dm.h? If it was,
> > dm_get and dm_put could be inlined, along with a few other potential
> > cleanups.
>
> I'm try to keep implementation details out of header files. dm_get()
> and dm_put() are not performance critical so I see no need to inline them.
Ok. I can place all of the sysfs specific functions in dm.c, just like
drivers/block/genhd.c has, or if we place struct mapped_device into
dm.h, they can live in their own file. Doesn't bother me either way.
thanks,
greg k-h
On Fri, Dec 20, 2002 at 12:31:50AM -0800, Greg KH wrote:
> Here's an ascii picture which probably makes more sense:
> /sys/block/
> |-- fd0
> | |-- dev
> | |-- range
> | |-- size
> | `-- stat
> |-- dm-1
> | |-- dev
> | |-- dm
> | | |-- device0 -> ../../devices/pci0/00:02.5/ide0/0.0
> | | |-- device1 -> ../../devices/pci0/00:02.5/ide1/1.0
> | | |-- status
> | | |-- suspend
> | | `-- table
> | |-- range
> | |-- size
> | `-- stat
>
> Look reasonable?
Yes, it looks promising. Some worries:
i) The 'status' and 'table' files do not contain a single value.
Splitting it up into single values would be ungainly to say the least,
eg.
dm
|-- table
|-- target1
| |-- sector_start
| |-- sector_len
| |-- target_type
| `-- target args
|
|-- target2
|-- sector_start
hmm ... maybe that's not too bad.
ii) If the table files are not split up then we have the problem that
they can be larger than a single page, which sysfs can't handle
(is this still true ?).
iii) We need to be able to poll on the status file so that people can
block until there is a change of status. eg, a snapshot uses up
another 5% of its COW storage, a mirror completes its initial
build, a path fails in the multipath target.
> Ok. I can place all of the sysfs specific functions in dm.c, just like
> drivers/block/genhd.c has, or if we place struct mapped_device into
> dm.h, they can live in their own file. Doesn't bother me either way.
Either put it in dm.c, or define some extra access functions (like
dm_suspended() and dm_kdev()) to get the information you need. I
would prefer the latter, but we can always move things later.
- Joe
On Fri, Dec 20, 2002 at 09:44:47AM +0000, Joe Thornber wrote:
> On Fri, Dec 20, 2002 at 12:31:50AM -0800, Greg KH wrote:
> > Look reasonable?
>
> Yes, it looks promising. Some worries:
>
> i) The 'status' and 'table' files do not contain a single value.
> Splitting it up into single values would be ungainly to say the least,
> eg.
>
> dm
> |-- table
> |-- target1
> | |-- sector_start
> | |-- sector_len
> | |-- target_type
> | `-- target args
> |
> |-- target2
> |-- sector_start
>
> hmm ... maybe that's not too bad.
Yeah, it's not that bad, I'll look into it when I get to those files :)
> ii) If the table files are not split up then we have the problem that
> they can be larger than a single page, which sysfs can't handle
> (is this still true ?).
Yes, I think this is still true. And it's also a good way to enforce
the "one value per file" rule :)
> iii) We need to be able to poll on the status file so that people can
> block until there is a change of status. eg, a snapshot uses up
> another 5% of its COW storage, a mirror completes its initial
> build, a path fails in the multipath target.
I saw that logic. I don't think sysfs can handle poll right now, but we
could modify the last access/modify/change time when the status changes
so stat(2) can be used on the file. This is how usbfs currently
notifies userspace of changes to their files. Would this be acceptable?
> > Ok. I can place all of the sysfs specific functions in dm.c, just like
> > drivers/block/genhd.c has, or if we place struct mapped_device into
> > dm.h, they can live in their own file. Doesn't bother me either way.
>
> Either put it in dm.c, or define some extra access functions (like
> dm_suspended() and dm_kdev()) to get the information you need. I
> would prefer the latter, but we can always move things later.
Ok, will do.
thanks for your comments,
greg k-h
On Fri, Dec 20, 2002 at 09:49:52AM -0800, Greg KH wrote:
> I saw that logic. I don't think sysfs can handle poll right now, but we
> could modify the last access/modify/change time when the status changes
> so stat(2) can be used on the file. This is how usbfs currently
> notifies userspace of changes to their files. Would this be acceptable?
Blech. The nice thing about a blocking operation like poll is that
the process doesn't keep getting rescheduled to check if there is
anything to do.
- Joe