In brd_init func, rd_nr num of brd_device are firstly allocated
and add in brd_devices, then brd_devices are traversed to add each
brd_device by calling add_disk func. When allocating brd_device,
the disk->first_minor is set to i * max_part, if rd_nr * max_part
is larger than MINORMASK, two different brd_device may have the same
devt, then only one of them can be successfully added.
when rmmod brd.ko, it will cause oops when calling brd_exit.
Follow those steps:
# modprobe brd rd_nr=3 rd_size=102400 max_part=1048576
# rmmod brd
then, the oops will appear.
Oops log:
[ 726.613722] Call trace:
[ 726.614175] kernfs_find_ns+0x24/0x130
[ 726.614852] kernfs_find_and_get_ns+0x44/0x68
[ 726.615749] sysfs_remove_group+0x38/0xb0
[ 726.616520] blk_trace_remove_sysfs+0x1c/0x28
[ 726.617320] blk_unregister_queue+0x98/0x100
[ 726.618105] del_gendisk+0x144/0x2b8
[ 726.618759] brd_exit+0x68/0x560 [brd]
[ 726.619501] __arm64_sys_delete_module+0x19c/0x2a0
[ 726.620384] el0_svc_common+0x78/0x130
[ 726.621057] el0_svc_handler+0x38/0x78
[ 726.621738] el0_svc+0x8/0xc
[ 726.622259] Code: aa0203f6 aa0103f7 aa1e03e0 d503201f (7940e260)
Here, we add brd_check_and_reset_par func to check and limit max_part par.
--
V2->V3: (suggested by Ming Lei)
- clear .minors when running out of consecutive minor space in brd_alloc
- remove limit of rd_nr
V1->V2: add more checks in brd_check_par_valid as suggested by Ming Lei.
Signed-off-by: Zhiqiang Liu <[email protected]>
---
drivers/block/brd.c | 35 ++++++++++++++++++++++++++++-------
1 file changed, 28 insertions(+), 7 deletions(-)
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index df8103dd40ac..2295a0bafb5e 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -330,16 +330,16 @@ static const struct block_device_operations brd_fops = {
/*
* And now the modules code and kernel interface.
*/
-static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
-module_param(rd_nr, int, 0444);
+static unsigned int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
+module_param(rd_nr, uint, 0444);
MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
module_param(rd_size, ulong, 0444);
MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
-static int max_part = 1;
-module_param(max_part, int, 0444);
+static unsigned int max_part = 1;
+module_param(max_part, uint, 0444);
MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
MODULE_LICENSE("GPL");
@@ -393,7 +393,14 @@ static struct brd_device *brd_alloc(int i)
if (!disk)
goto out_free_queue;
disk->major = RAMDISK_MAJOR;
- disk->first_minor = i * max_part;
+ /*
+ * Clear .minors when running out of consecutive minor space since
+ * GENHD_FL_EXT_DEVT is set, and we can allocate from extended devt.
+ */
+ if ((i * disk->minors) & ~MINORMASK)
+ disk->minors = 0;
+ else
+ disk->first_minor = i * disk->minors;
disk->fops = &brd_fops;
disk->private_data = brd;
disk->queue = brd->brd_queue;
@@ -468,6 +475,21 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
return kobj;
}
+static inline void brd_check_and_reset_par(void)
+{
+ if (unlikely(!rd_nr))
+ rd_nr = 1;
+
+ if (unlikely(!max_part))
+ max_part = 1;
+
+ if (max_part > DISK_MAX_PARTS) {
+ pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n",
+ DISK_MAX_PARTS, DISK_MAX_PARTS);
+ max_part = DISK_MAX_PARTS;
+ }
+}
+
static int __init brd_init(void)
{
struct brd_device *brd, *next;
@@ -491,8 +513,7 @@ static int __init brd_init(void)
if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
return -EIO;
- if (unlikely(!max_part))
- max_part = 1;
+ brd_check_and_reset_par();
for (i = 0; i < rd_nr; i++) {
brd = brd_alloc(i);
--
2.19.1
On Tue, Jan 14, 2020 at 07:56:07PM +0800, Zhiqiang Liu wrote:
>
> In brd_init func, rd_nr num of brd_device are firstly allocated
> and add in brd_devices, then brd_devices are traversed to add each
> brd_device by calling add_disk func. When allocating brd_device,
> the disk->first_minor is set to i * max_part, if rd_nr * max_part
> is larger than MINORMASK, two different brd_device may have the same
> devt, then only one of them can be successfully added.
> when rmmod brd.ko, it will cause oops when calling brd_exit.
>
> Follow those steps:
> # modprobe brd rd_nr=3 rd_size=102400 max_part=1048576
> # rmmod brd
> then, the oops will appear.
>
> Oops log:
> [ 726.613722] Call trace:
> [ 726.614175] kernfs_find_ns+0x24/0x130
> [ 726.614852] kernfs_find_and_get_ns+0x44/0x68
> [ 726.615749] sysfs_remove_group+0x38/0xb0
> [ 726.616520] blk_trace_remove_sysfs+0x1c/0x28
> [ 726.617320] blk_unregister_queue+0x98/0x100
> [ 726.618105] del_gendisk+0x144/0x2b8
> [ 726.618759] brd_exit+0x68/0x560 [brd]
> [ 726.619501] __arm64_sys_delete_module+0x19c/0x2a0
> [ 726.620384] el0_svc_common+0x78/0x130
> [ 726.621057] el0_svc_handler+0x38/0x78
> [ 726.621738] el0_svc+0x8/0xc
> [ 726.622259] Code: aa0203f6 aa0103f7 aa1e03e0 d503201f (7940e260)
>
> Here, we add brd_check_and_reset_par func to check and limit max_part par.
>
> --
> V2->V3: (suggested by Ming Lei)
> - clear .minors when running out of consecutive minor space in brd_alloc
> - remove limit of rd_nr
>
> V1->V2: add more checks in brd_check_par_valid as suggested by Ming Lei.
>
> Signed-off-by: Zhiqiang Liu <[email protected]>
> ---
> drivers/block/brd.c | 35 ++++++++++++++++++++++++++++-------
> 1 file changed, 28 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/block/brd.c b/drivers/block/brd.c
> index df8103dd40ac..2295a0bafb5e 100644
> --- a/drivers/block/brd.c
> +++ b/drivers/block/brd.c
> @@ -330,16 +330,16 @@ static const struct block_device_operations brd_fops = {
> /*
> * And now the modules code and kernel interface.
> */
> -static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
> -module_param(rd_nr, int, 0444);
> +static unsigned int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
> +module_param(rd_nr, uint, 0444);
The above change isn't needed.
> MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
>
> unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
> module_param(rd_size, ulong, 0444);
> MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
>
> -static int max_part = 1;
> -module_param(max_part, int, 0444);
> +static unsigned int max_part = 1;
> +module_param(max_part, uint, 0444);
The above change isn't needed.
> MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
>
> MODULE_LICENSE("GPL");
> @@ -393,7 +393,14 @@ static struct brd_device *brd_alloc(int i)
> if (!disk)
> goto out_free_queue;
> disk->major = RAMDISK_MAJOR;
> - disk->first_minor = i * max_part;
> + /*
> + * Clear .minors when running out of consecutive minor space since
> + * GENHD_FL_EXT_DEVT is set, and we can allocate from extended devt.
> + */
> + if ((i * disk->minors) & ~MINORMASK)
> + disk->minors = 0;
> + else
> + disk->first_minor = i * disk->minors;
The above looks a bit ugly, one nice way could be to change in
brd_alloc():
disk = brd->brd_disk = alloc_disk(((i * max_part) & ~MINORMASK) ?
0 : max_part);
> disk->fops = &brd_fops;
> disk->private_data = brd;
> disk->queue = brd->brd_queue;
> @@ -468,6 +475,21 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
> return kobj;
> }
>
> +static inline void brd_check_and_reset_par(void)
> +{
> + if (unlikely(!rd_nr))
> + rd_nr = 1;
zero rd_nr should work as expected, given user can create dev file via
mknod, and brd_probe() will be called for populate brd disk/queue when
the disk file is opened.
> +static inline void brd_check_and_reset_par(void)
> +{
> + ? ? ? if (unlikely(!rd_nr))
> + ? ? ? ? ? ? ? rd_nr = 1;
> +
> + ? ? ? if (unlikely(!max_part))
> + ? ? ? ? ? ? ? max_part = 1;
Another limit is that 'max_part' needs to be divided exactly by (1U <<
MINORBITS), something like:
max_part = 1UL << fls(max_part);
Thanks,
Ming
On 2020/1/15 10:27, Ming Lei wrote:
>
>> MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
>>
>> unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
>> module_param(rd_size, ulong, 0444);
>> MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
>>
>> -static int max_part = 1;
>> -module_param(max_part, int, 0444);
>> +static unsigned int max_part = 1;
>> +module_param(max_part, uint, 0444);
>
> The above change isn't needed.
Thanks for your suggestion.
I will remove that in v4 patch.
>
>> MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
>>
>> MODULE_LICENSE("GPL");
>> @@ -393,7 +393,14 @@ static struct brd_device *brd_alloc(int i)
>> if (!disk)
>> goto out_free_queue;
>> disk->major = RAMDISK_MAJOR;
>> - disk->first_minor = i * max_part;
>> + /*
>> + * Clear .minors when running out of consecutive minor space since
>> + * GENHD_FL_EXT_DEVT is set, and we can allocate from extended devt.
>> + */
>> + if ((i * disk->minors) & ~MINORMASK)
>> + disk->minors = 0;
>> + else
>> + disk->first_minor = i * disk->minors;
>
> The above looks a bit ugly, one nice way could be to change in
> brd_alloc():
>
> disk = brd->brd_disk = alloc_disk(((i * max_part) & ~MINORMASK) ?
> 0 : max_part);
I will change it as your suggestion.
>
>> disk->fops = &brd_fops;
>> disk->private_data = brd;
>> disk->queue = brd->brd_queue;
>> @@ -468,6 +475,21 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
>> return kobj;
>> }
>>
>> +static inline void brd_check_and_reset_par(void)
>> +{
>> + if (unlikely(!rd_nr))
>> + rd_nr = 1;
>
> zero rd_nr should work as expected, given user can create dev file via
> mknod, and brd_probe() will be called for populate brd disk/queue when
> the disk file is opened.
>
>> +static inline void brd_check_and_reset_par(void)
>> +{
>> + if (unlikely(!rd_nr))
>> + rd_nr = 1;
>> +
>> + if (unlikely(!max_part))
>> + max_part = 1;
>
> Another limit is that 'max_part' needs to be divided exactly by (1U <<
> MINORBITS), something like:
>
> max_part = 1UL << fls(max_part)
Do we have to limit that 'max_part' needs to be divided exactly by (1U <<
> MINORBITS)? As your suggestion, the i * max_part is larger than MINORMASK,
we can allocate from extended devt.
Thanks,
Zhiqiang Liu
On Mon, Jan 20, 2020 at 09:14:50PM +0800, Zhiqiang Liu wrote:
>
>
> On 2020/1/15 10:27, Ming Lei wrote:
>
> >
> >> MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
> >>
> >> unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
> >> module_param(rd_size, ulong, 0444);
> >> MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
> >>
> >> -static int max_part = 1;
> >> -module_param(max_part, int, 0444);
> >> +static unsigned int max_part = 1;
> >> +module_param(max_part, uint, 0444);
> >
> > The above change isn't needed.
> Thanks for your suggestion.
> I will remove that in v4 patch.
> >
> >> MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
> >>
> >> MODULE_LICENSE("GPL");
> >> @@ -393,7 +393,14 @@ static struct brd_device *brd_alloc(int i)
> >> if (!disk)
> >> goto out_free_queue;
> >> disk->major = RAMDISK_MAJOR;
> >> - disk->first_minor = i * max_part;
> >> + /*
> >> + * Clear .minors when running out of consecutive minor space since
> >> + * GENHD_FL_EXT_DEVT is set, and we can allocate from extended devt.
> >> + */
> >> + if ((i * disk->minors) & ~MINORMASK)
> >> + disk->minors = 0;
> >> + else
> >> + disk->first_minor = i * disk->minors;
> >
> > The above looks a bit ugly, one nice way could be to change in
> > brd_alloc():
> >
> > disk = brd->brd_disk = alloc_disk(((i * max_part) & ~MINORMASK) ?
> > 0 : max_part);
>
> I will change it as your suggestion.
>
> >
> >> disk->fops = &brd_fops;
> >> disk->private_data = brd;
> >> disk->queue = brd->brd_queue;
> >> @@ -468,6 +475,21 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
> >> return kobj;
> >> }
> >>
> >> +static inline void brd_check_and_reset_par(void)
> >> +{
> >> + if (unlikely(!rd_nr))
> >> + rd_nr = 1;
> >
> > zero rd_nr should work as expected, given user can create dev file via
> > mknod, and brd_probe() will be called for populate brd disk/queue when
> > the disk file is opened.
> >
> >> +static inline void brd_check_and_reset_par(void)
> >> +{
> >> + ? ? ? if (unlikely(!rd_nr))
> >> + ? ? ? ? ? ? ? rd_nr = 1;
> >> +
> >> + ? ? ? if (unlikely(!max_part))
> >> + ? ? ? ? ? ? ? max_part = 1;
> >
> > Another limit is that 'max_part' needs to be divided exactly by (1U <<
> > MINORBITS), something like:
> >
> > max_part = 1UL << fls(max_part)
>
> Do we have to limit that 'max_part' needs to be divided exactly by (1U <<
> > MINORBITS)? As your suggestion, the i * max_part is larger than MINORMASK,
> we can allocate from extended devt.
Exact dividing is for reserving same minors for all disks with
RAMDISK_MAJOR, otherwise there is still chance to get same dev_t when
adding partitions.
Extended devt is for covering more disks, not related with 'max_part'.
Thanks,
Ming
On 2020/1/21 6:58, Ming Lei wrote:
> On Mon, Jan 20, 2020 at 09:14:50PM +0800, Zhiqiang Liu wrote:
>>>> +static inline void brd_check_and_reset_par(void)
>>>> +{
>>>> + if (unlikely(!rd_nr))
>>>> + rd_nr = 1;
>>>> +
>>>> + if (unlikely(!max_part))
>>>> + max_part = 1;
>>>
>>> Another limit is that 'max_part' needs to be divided exactly by (1U <<
>>> MINORBITS), something like:
>>>
>>> max_part = 1UL << fls(max_part)
>>
>> Do we have to limit that 'max_part' needs to be divided exactly by (1U <<
>>> MINORBITS)? As your suggestion, the i * max_part is larger than MINORMASK,
>> we can allocate from extended devt.
>
> Exact dividing is for reserving same minors for all disks with
> RAMDISK_MAJOR, otherwise there is still chance to get same dev_t when
> adding partitions.
>
> Extended devt is for covering more disks, not related with 'max_part'.
>
Thank you very much.
I will change that as you said.