Hello.
This is a patch set for fixing bugs on partition handling code which
was found when I read loop block device code as well as some cleanups.
The patch 2/5 and 3/5 are real fixes and should go to the stable tree
as Jens suggested on loop patches. Since partition managing code was
introduced by commit d7853d1f8932 ("brd: modify ramdisk device to be
able to manage partitions") all stable releases would need these fixes.
$ git name-rev --tags d7853d1f8932
d7853d1f8932 tags/v2.6.26-rc1~122
As always, any comments are welcomed.
Thanks.
Namhyung Kim (5):
brd: get rid of unused members from struct brd_device
brd: limit 'max_part' module param to DISK_MAX_PARTS
brd: handle on-demand devices correctly
brd: fix comment on initial device creation
brd: export module parameters
drivers/block/brd.c | 42 ++++++++++++++++++++++++++----------------
1 files changed, 26 insertions(+), 16 deletions(-)
--
1.7.5.2
brd_refcnt, brd_offset, brd_sizelimit and brd_blocksize in struct
brd_device seem to be copied from struct loop_device but they're
not used anywhere. Let get rid of them.
Signed-off-by: Namhyung Kim <[email protected]>
---
drivers/block/brd.c | 4 ----
1 files changed, 0 insertions(+), 4 deletions(-)
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index b7f51e4594f8..bae9a3d4e15b 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -35,10 +35,6 @@
*/
struct brd_device {
int brd_number;
- int brd_refcnt;
- loff_t brd_offset;
- loff_t brd_sizelimit;
- unsigned brd_blocksize;
struct request_queue *brd_queue;
struct gendisk *brd_disk;
--
1.7.5.2
The 'max_part' parameter controls the number of maximum partition
a brd device can have. However if a user specifies very large
value it would exceed the limitation of device minor number and
can cause a kernel panic (or, at least, produce invalid device
nodes in some cases).
On my desktop system, following command kills the kernel. On qemu,
it triggers similar oops but the kernel was alive:
$ sudo modprobe brd max_part=100000
BUG: unable to handle kernel NULL pointer dereference at 0000000000000058
IP: [<ffffffff81110a9a>] sysfs_create_dir+0x2d/0xae
PGD 7af1067 PUD 7b19067 PMD 0
Oops: 0000 [#1] SMP
last sysfs file:
CPU 0
Modules linked in: brd(+)
Pid: 44, comm: insmod Tainted: G W 2.6.39-qemu+ #158 Bochs Bochs
RIP: 0010:[<ffffffff81110a9a>] [<ffffffff81110a9a>] sysfs_create_dir+0x2d/0xae
RSP: 0018:ffff880007b15d78 EFLAGS: 00000286
RAX: ffff880007b05478 RBX: ffff880007a52760 RCX: ffff880007b15dc8
RDX: ffff880007a4f900 RSI: ffff880007b15e48 RDI: ffff880007a52760
RBP: ffff880007b15da8 R08: 0000000000000002 R09: 0000000000000000
R10: ffff880007b15e48 R11: ffff880007b05478 R12: 0000000000000000
R13: ffff880007b05478 R14: 0000000000400920 R15: 0000000000000063
FS: 0000000002160880(0063) GS:ffff880007c00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000058 CR3: 0000000007b1c000 CR4: 00000000000006b0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 0000000000000000 DR7: 0000000000000000
Process insmod (pid: 44, threadinfo ffff880007b14000, task ffff880007acb980)
Stack:
ffff880007b15dc8 ffff880007b05478 ffff880007b15da8 00000000fffffffe
ffff880007a52760 ffff880007b05478 ffff880007b15de8 ffffffff81143c0a
0000000000400920 ffff880007a52760 ffff880007b05478 0000000000000000
Call Trace:
[<ffffffff81143c0a>] kobject_add_internal+0xdf/0x1a0
[<ffffffff81143da1>] kobject_add_varg+0x41/0x50
[<ffffffff81143e6b>] kobject_add+0x64/0x66
[<ffffffff8113bbe7>] blk_register_queue+0x5f/0xb8
[<ffffffff81140f72>] add_disk+0xdf/0x289
[<ffffffffa00040df>] brd_init+0xdf/0x1aa [brd]
[<ffffffffa0004000>] ? 0xffffffffa0003fff
[<ffffffffa0004000>] ? 0xffffffffa0003fff
[<ffffffff8100020a>] do_one_initcall+0x7a/0x12e
[<ffffffff8108516c>] sys_init_module+0x9c/0x1dc
[<ffffffff812ff4bb>] system_call_fastpath+0x16/0x1b
Code: 89 e5 41 55 41 54 53 48 89 fb 48 83 ec 18 48 85 ff 75 04 0f 0b eb fe 48 8b 47 18 49 c7 c4 70 1e 4d 81 48 85 c0 74 04 4c 8b 60 30
8b 44 24 58 45 31 ed 0f b6 c4 85 c0 74 0d 48 8b 43 28 48 89
RIP [<ffffffff81110a9a>] sysfs_create_dir+0x2d/0xae
RSP <ffff880007b15d78>
CR2: 0000000000000058
---[ end trace aebb1175ce1f6739 ]---
Signed-off-by: Namhyung Kim <[email protected]>
Cc: Laurent Vivier <[email protected]>
Cc: [email protected]
---
drivers/block/brd.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index bae9a3d4e15b..e9a19d99f928 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -581,6 +581,9 @@ static int __init brd_init(void)
if (max_part > 0)
part_shift = fls(max_part);
+ if ((1UL << part_shift) > DISK_MAX_PARTS)
+ return -EINVAL;
+
if (rd_nr > 1UL << (MINORBITS - part_shift))
return -EINVAL;
--
1.7.5.2
When finding or allocating a ram disk device, brd_probe() did not take
partition numbers into account so that it can result to a different
device. Consider following example (I set CONFIG_BLK_DEV_RAM_COUNT=4
for simplicity) :
$ sudo modprobe brd max_part=15
$ ls -l /dev/ram*
brw-rw---- 1 root disk 1, 0 2011-05-25 15:41 /dev/ram0
brw-rw---- 1 root disk 1, 16 2011-05-25 15:41 /dev/ram1
brw-rw---- 1 root disk 1, 32 2011-05-25 15:41 /dev/ram2
brw-rw---- 1 root disk 1, 48 2011-05-25 15:41 /dev/ram3
$ sudo mknod /dev/ram4 b 1 64
$ sudo dd if=/dev/zero of=/dev/ram4 bs=4k count=256
256+0 records in
256+0 records out
1048576 bytes (1.0 MB) copied, 0.00215578 s, 486 MB/s
namhyung@leonhard:linux$ ls -l /dev/ram*
brw-rw---- 1 root disk 1, 0 2011-05-25 15:41 /dev/ram0
brw-rw---- 1 root disk 1, 16 2011-05-25 15:41 /dev/ram1
brw-rw---- 1 root disk 1, 32 2011-05-25 15:41 /dev/ram2
brw-rw---- 1 root disk 1, 48 2011-05-25 15:41 /dev/ram3
brw-r--r-- 1 root root 1, 64 2011-05-25 15:45 /dev/ram4
brw-rw---- 1 root disk 1, 1024 2011-05-25 15:44 /dev/ram64
After this patch, /dev/ram4 - instead of /dev/ram64 - was
accessed correctly.
In addition, 'range' passed to blk_register_region() should
include all range of dev_t that RAMDISK_MAJOR can address.
It does not need to be limited by partition numbers unless
'rd_nr' param was specified.
Signed-off-by: Namhyung Kim <[email protected]>
Cc: Laurent Vivier <[email protected]>
Cc: [email protected]
---
drivers/block/brd.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index e9a19d99f928..b1efa8f9ff42 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -548,7 +548,7 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
struct kobject *kobj;
mutex_lock(&brd_devices_mutex);
- brd = brd_init_one(dev & MINORMASK);
+ brd = brd_init_one(MINOR(dev) >> part_shift);
kobj = brd ? get_disk(brd->brd_disk) : ERR_PTR(-ENOMEM);
mutex_unlock(&brd_devices_mutex);
@@ -589,10 +589,10 @@ static int __init brd_init(void)
if (rd_nr) {
nr = rd_nr;
- range = rd_nr;
+ range = rd_nr << part_shift;
} else {
nr = CONFIG_BLK_DEV_RAM_COUNT;
- range = 1UL << (MINORBITS - part_shift);
+ range = 1UL << MINORBITS;
}
if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
@@ -631,7 +631,7 @@ static void __exit brd_exit(void)
unsigned long range;
struct brd_device *brd, *next;
- range = rd_nr ? rd_nr : 1UL << (MINORBITS - part_shift);
+ range = rd_nr ? rd_nr << part_shift : 1UL << MINORBITS;
list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
brd_del_one(brd);
--
1.7.5.2
If 'rd_nr' param was not specified, 16 (can be adjusted via
CONFIG_BLK_DEV_RAM_COUNT) devices would be created by default
but comment said 1. Fix it.
Signed-off-by: Namhyung Kim <[email protected]>
---
drivers/block/brd.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index b1efa8f9ff42..d904d0da2928 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -571,10 +571,10 @@ static int __init brd_init(void)
*
* (1) if rd_nr is specified, create that many upfront, and this
* also becomes a hard limit.
- * (2) if rd_nr is not specified, create 1 rd device on module
- * load, user can further extend brd device by create dev node
- * themselves and have kernel automatically instantiate actual
- * device on-demand.
+ * (2) if rd_nr is not specified, create CONFIG_BLK_DEV_RAM_COUNT
+ * (default 16) rd device on module load, user can further
+ * extend brd device by create dev node themselves and have
+ * kernel automatically instantiate actual device on-demand.
*/
part_shift = 0;
--
1.7.5.2
Export 'rd_nr', 'rd_size' and 'max_part' parameters to sysfs so user can
know that how many devices are allowed, how big each device is and how
many partitions are supported. If 'max_part' is 0, it means simply the
device doesn't support partitioning.
Also note that 'max_part' can be adjusted to power of 2 minus 1 form if
needed. User should check this value after the module loading if he/she
want to use that number correctly (i.e. fdisk, mknod, etc.).
Signed-off-by: Namhyung Kim <[email protected]>
Cc: Laurent Vivier <[email protected]>
---
drivers/block/brd.c | 19 +++++++++++++++----
1 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index d904d0da2928..dba1c32e1ddf 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -436,11 +436,11 @@ static int rd_nr;
int rd_size = CONFIG_BLK_DEV_RAM_SIZE;
static int max_part;
static int part_shift;
-module_param(rd_nr, int, 0);
+module_param(rd_nr, int, S_IRUGO);
MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
-module_param(rd_size, int, 0);
+module_param(rd_size, int, S_IRUGO);
MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
-module_param(max_part, int, 0);
+module_param(max_part, int, S_IRUGO);
MODULE_PARM_DESC(max_part, "Maximum number of partitions per RAM disk");
MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
@@ -578,9 +578,20 @@ static int __init brd_init(void)
*/
part_shift = 0;
- if (max_part > 0)
+ if (max_part > 0) {
part_shift = fls(max_part);
+ /*
+ * Adjust max_part according to part_shift as it is exported
+ * to user space so that user can decide correct minor number
+ * if [s]he want to create more devices.
+ *
+ * Note that -1 is required because partition 0 is reserved
+ * for the whole disk.
+ */
+ max_part = (1UL << part_shift) - 1;
+ }
+
if ((1UL << part_shift) > DISK_MAX_PARTS)
return -EINVAL;
--
1.7.5.2
On 2011-05-26 14:55, Namhyung Kim wrote:
> Hello.
>
> This is a patch set for fixing bugs on partition handling code which
> was found when I read loop block device code as well as some cleanups.
>
> The patch 2/5 and 3/5 are real fixes and should go to the stable tree
> as Jens suggested on loop patches. Since partition managing code was
> introduced by commit d7853d1f8932 ("brd: modify ramdisk device to be
> able to manage partitions") all stable releases would need these fixes.
>
> $ git name-rev --tags d7853d1f8932
> d7853d1f8932 tags/v2.6.26-rc1~122
Looks good, applied all 5. Thanks!
--
Jens Axboe