2001-12-27 08:43:36

by Anders Gustafsson

[permalink] [raw]
Subject: lvm in 2.5.1


Hi Jens and other kernelhackers,

I'm now running 2.5.1 with lvm. The following patch makes some minor changes
for bio-support and removes allocation of a lv_t on the stack, which made
the stack overflow and gave me something to spend my last 24 hours
debugging.

I dont garantee that this will trash your disks, but probably it will.
It works for me, running my /var and /home on lvm.

Merry Christmas and A Happy New Year

//anders/g

diff -ru linux-2.5.1-vanilj/drivers/md/lvm.c linux-2.5.1/drivers/md/lvm.c
--- linux-2.5.1-vanilj/drivers/md/lvm.c Thu Dec 27 08:28:39 2001
+++ linux-2.5.1/drivers/md/lvm.c Thu Dec 27 08:03:10 2001
@@ -179,6 +179,9 @@
* (Jens Axboe)
* - Defer writes to an extent that is being moved [JT + AD]
* 28/05/2001 - implemented missing BLKSSZGET ioctl [AD]
+ * 28/12/2001 - buffer_head -> bio
+ * removed huge allocation of a lv_t on stack
+ * (Anders Gusafsson)
*
*/

@@ -731,7 +734,7 @@

case PV_FLUSH:
/* physical volume buffer flush/invalidate */
- return lvm_do_pv_flush(arg);
+ return lvm_do_pv_flush(arg);


default:
@@ -1043,7 +1046,7 @@

memset(&bio,0,sizeof(bio));
bio.bi_dev = inode->i_rdev;
- bio.bi_io_vec.bv_len = lvm_get_blksize(bio.bi_dev);
+ bio.bi_size = lvm_get_blksize(bio.bi_dev); /* NEEDED by bio_sectors */
bio.bi_sector = block * bio_sectors(&bio);
bio.bi_rw = READ;
if ((err=lvm_map(&bio)) < 0) {
@@ -1119,18 +1122,18 @@
return 0;
}

-static int lvm_map(struct bio *bh)
+static int lvm_map(struct bio *bi)
{
- int minor = MINOR(bh->bi_dev);
+ int minor = MINOR(bi->bi_dev);
ulong index;
ulong pe_start;
- ulong size = bio_sectors(bh);
- ulong rsector_org = bh->bi_sector;
+ ulong size = bio_sectors(bi);
+ ulong rsector_org = bi->bi_sector;
ulong rsector_map;
kdev_t rdev_map;
vg_t *vg_this = vg[VG_BLK(minor)];
lv_t *lv = vg_this->lv[LV_BLK(minor)];
- int rw = bio_data_dir(bh);
+ int rw = bio_data_dir(bi);


down_read(&lv->lv_lock);
@@ -1151,7 +1154,7 @@

P_MAP("%s - lvm_map minor: %d *rdev: %s *rsector: %lu size:%lu\n",
lvm_name, minor,
- kdevname(bh->bi_dev),
+ kdevname(bi->bi_dev),
rsector_org, size);

if (rsector_org + size > lv->lv_size) {
@@ -1205,7 +1208,7 @@
* we need to queue this request, because this is in the fast path.
*/
if (rw == WRITE || rw == WRITEA) {
- if(_defer_extent(bh, rw, rdev_map,
+ if(_defer_extent(bi, rw, rdev_map,
rsector_map, vg_this->pe_size)) {

up_read(&lv->lv_lock);
@@ -1246,13 +1249,13 @@
}

out:
- bh->bi_dev = rdev_map;
- bh->bi_sector = rsector_map;
+ bi->bi_dev = rdev_map;
+ bi->bi_sector = rsector_map;
up_read(&lv->lv_lock);
return 1;

bad:
- bio_io_error(bh);
+ bio_io_error(bi);
up_read(&lv->lv_lock);
return -1;
} /* lvm_map() */
@@ -1436,9 +1439,9 @@
{
int ret = 0;
ulong l, ls = 0, p, size;
- lv_t lv;
vg_t *vg_ptr;
lv_t **snap_lv_ptr;
+ lv_t *tmplv;

if ((vg_ptr = kmalloc(sizeof(vg_t),GFP_KERNEL)) == NULL) {
printk(KERN_CRIT
@@ -1446,6 +1449,7 @@
lvm_name, __LINE__);
return -ENOMEM;
}
+
/* get the volume group structure */
if (copy_from_user(vg_ptr, arg, sizeof(vg_t)) != 0) {
P_IOCTL("lvm_do_vg_create ERROR: copy VG ptr %p (%d bytes)\n",
@@ -1454,6 +1458,8 @@
return -EFAULT;
}

+
+
/* VG_CREATE now uses minor number in VG structure */
if (minor == -1) minor = vg_ptr->vg_number;

@@ -1513,19 +1519,27 @@
}
memset(snap_lv_ptr, 0, size);

+ if ((tmplv = kmalloc(sizeof(lv_t),GFP_KERNEL)) == NULL) {
+ printk(KERN_CRIT
+ "%s -- VG_CREATE: kmalloc error LV at line %d\n",
+ lvm_name, __LINE__);
+ return -ENOMEM;
+ }
+
/* get the logical volume structures */
vg_ptr->lv_cur = 0;
for (l = 0; l < vg_ptr->lv_max; l++) {
lv_t *lvp;
+
/* user space address */
if ((lvp = vg_ptr->lv[l]) != NULL) {
- if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
+ if (copy_from_user(tmplv, lvp, sizeof(lv_t)) != 0) {
P_IOCTL("ERROR: copying LV ptr %p (%d bytes)\n",
lvp, sizeof(lv_t));
lvm_do_vg_remove(minor);
return -EFAULT;
}
- if ( lv.lv_access & LV_SNAPSHOT) {
+ if ( tmplv->lv_access & LV_SNAPSHOT) {
snap_lv_ptr[ls] = lvp;
vg_ptr->lv[l] = NULL;
ls++;
@@ -1533,7 +1547,7 @@
}
vg_ptr->lv[l] = NULL;
/* only create original logical volumes for now */
- if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) {
+ if (lvm_do_lv_create(minor, tmplv->lv_name, tmplv) != 0) {
lvm_do_vg_remove(minor);
return -EFAULT;
}
@@ -1544,18 +1558,18 @@
in place during first path above */
for (l = 0; l < ls; l++) {
lv_t *lvp = snap_lv_ptr[l];
- if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
+ if (copy_from_user(tmplv, lvp, sizeof(lv_t)) != 0) {
lvm_do_vg_remove(minor);
return -EFAULT;
}
- if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) {
+ if (lvm_do_lv_create(minor, tmplv->lv_name, tmplv) != 0) {
lvm_do_vg_remove(minor);
return -EFAULT;
}
}

vfree(snap_lv_ptr);
-
+ kfree(tmplv);
vg_count++;


@@ -1566,7 +1580,6 @@

return 0;
} /* lvm_do_vg_create() */
-

/*
* character device support function VGDA extend


2001-12-27 09:36:29

by Andrew Morton

[permalink] [raw]
Subject: Re: lvm in 2.5.1

[email protected] wrote:
>
> Hi Jens and other kernelhackers,
>
> I'm now running 2.5.1 with lvm. The following patch makes some minor changes
> for bio-support and removes allocation of a lv_t on the stack, which made
> the stack overflow and gave me something to spend my last 24 hours
> debugging.

That's a worry, because an lv_t is only 420 bytes. If that's triggering
a stack overflow then we're way too close. Think interrupts....

There must be other sources of stack bloat.

lvm_chr_ioctl() calls lvm_do_vg_create(), and it has has another lv_t
on the stack. That's 840 bytes - still not enough. Maybe lvm_do_vg_create()
is calling something which uses lots of stack? Can't see it. Odd.

> ...
> /* get the logical volume structures */
> vg_ptr->lv_cur = 0;
> for (l = 0; l < vg_ptr->lv_max; l++) {
> lv_t *lvp;
> +
> /* user space address */
> if ((lvp = vg_ptr->lv[l]) != NULL) {
> - if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
> + if (copy_from_user(tmplv, lvp, sizeof(lv_t)) != 0) {
> P_IOCTL("ERROR: copying LV ptr %p (%d bytes)\n",
> lvp, sizeof(lv_t));
> lvm_do_vg_remove(minor);
> return -EFAULT;

Seems that in various places here you've forgotten to free the lv_t storage
on error paths?

Which means you must painfully go through and put a kfree() in front
of each and every `return' statement, hope you don't miss any, hope
you don't break anything, and hope that people don't forget to do this
next time they change the code. Or you need to go through and restructure
the code so you need only one kfree.

Which is a fine and shining example of why one should never, ever, ever,
ever put more than one `return' statement in a C function. Dammit. It
is the single worst feature of C and should not be used.

BTW, it appears that lvm_do_vg_create() leaks the vmalloced snap_lv_ptr
on an error path...

-

2001-12-27 12:25:54

by Anders Gustafsson

[permalink] [raw]
Subject: Re: lvm in 2.5.1

On Thu, Dec 27, 2001 at 01:33:15AM -0800, Andrew Morton wrote:

> > I'm now running 2.5.1 with lvm. The following patch makes some minor changes
> > for bio-support and removes allocation of a lv_t on the stack, which made
> > the stack overflow and gave me something to spend my last 24 hours
> > debugging.
>
> That's a worry, because an lv_t is only 420 bytes. If that's triggering
> a stack overflow then we're way too close. Think interrupts....
>
> There must be other sources of stack bloat.
>
> lvm_chr_ioctl() calls lvm_do_vg_create(), and it has has another lv_t
> on the stack. That's 840 bytes - still not enough. Maybe lvm_do_vg_create()
> is calling something which uses lots of stack? Can't see it. Odd.

did a calltrace in lvm_do_vg_create and it contains 48 symbols between

Trace; c013c786 <sys_ioctl+16a/184>

and

Trace; c01a78f8 <lvm_chr_ioctl+2b8/670>

which looks like they comes from an old system call as it just contiains
lots of unrelated symbols. That would suggest that lvm_char_ioctl allcates a
big object on the stack that it havn't touched?

Removing these symbols makes the calltrace look like:

>>EIP; c01a8cf2 <lvm_do_vg_create+22/498> <=====
Trace; c01a78f8 <lvm_chr_ioctl+2b8/670>
Trace; c013c786 <sys_ioctl+16a/184>
Trace; c010856a <system_call+32/38>

not many symbols that could allcate stackspace, lets have a look how they
allocates:

0x938 <lvm_blk_ioctl>: sub $0x8,%esp

not much... lets have a look at lvm_do_vg_create then:

with my patch:
0x1830 <lvm_do_vg_create>: sub $0x20,%esp

without my patch:
0x1830 <lvm_do_vg_create>: sub $0x11c4,%esp

whoa! 0x11c4

thats a LOT! much more than sizeof(lv_t)

> Seems that in various places here you've forgotten to free the lv_t storage
> on error paths?

of course, how could i forget.. will put together a new patch with that
fixed in a minute.

--

//anders/g

2001-12-27 13:57:29

by Anders Gustafsson

[permalink] [raw]
Subject: Re: lvm in 2.5.1

> > Seems that in various places here you've forgotten to free the lv_t storage
> > on error paths?
>
> of course, how could i forget.. will put together a new patch with that
> fixed in a minute.

here comes an updated patch...

//anders/g

diff -ru linux-2.5.2-pre2/drivers/md/lvm.c linux-2.5.2-pre2-lvmfix/drivers/md/lvm.c
--- linux-2.5.2-pre2/drivers/md/lvm.c Thu Dec 27 08:28:39 2001
+++ linux-2.5.2-pre2-lvmfix/drivers/md/lvm.c Thu Dec 27 14:08:40 2001
@@ -179,6 +179,9 @@
* (Jens Axboe)
* - Defer writes to an extent that is being moved [JT + AD]
* 28/05/2001 - implemented missing BLKSSZGET ioctl [AD]
+ * 28/12/2001 - buffer_head -> bio
+ * removed huge allocation of a lv_t on stack
+ * (Anders Gustafsson)
*
*/

@@ -1043,7 +1046,7 @@

memset(&bio,0,sizeof(bio));
bio.bi_dev = inode->i_rdev;
- bio.bi_io_vec.bv_len = lvm_get_blksize(bio.bi_dev);
+ bio.bi_size = lvm_get_blksize(bio.bi_dev); /* NEEDED by bio_sectors */
bio.bi_sector = block * bio_sectors(&bio);
bio.bi_rw = READ;
if ((err=lvm_map(&bio)) < 0) {
@@ -1119,18 +1122,18 @@
return 0;
}

-static int lvm_map(struct bio *bh)
+static int lvm_map(struct bio *bi)
{
- int minor = MINOR(bh->bi_dev);
+ int minor = MINOR(bi->bi_dev);
ulong index;
ulong pe_start;
- ulong size = bio_sectors(bh);
- ulong rsector_org = bh->bi_sector;
+ ulong size = bio_sectors(bi);
+ ulong rsector_org = bi->bi_sector;
ulong rsector_map;
kdev_t rdev_map;
vg_t *vg_this = vg[VG_BLK(minor)];
lv_t *lv = vg_this->lv[LV_BLK(minor)];
- int rw = bio_data_dir(bh);
+ int rw = bio_data_dir(bi);


down_read(&lv->lv_lock);
@@ -1151,7 +1154,7 @@

P_MAP("%s - lvm_map minor: %d *rdev: %s *rsector: %lu size:%lu\n",
lvm_name, minor,
- kdevname(bh->bi_dev),
+ kdevname(bi->bi_dev),
rsector_org, size);

if (rsector_org + size > lv->lv_size) {
@@ -1205,7 +1208,7 @@
* we need to queue this request, because this is in the fast path.
*/
if (rw == WRITE || rw == WRITEA) {
- if(_defer_extent(bh, rw, rdev_map,
+ if(_defer_extent(bi, rw, rdev_map,
rsector_map, vg_this->pe_size)) {

up_read(&lv->lv_lock);
@@ -1246,13 +1249,13 @@
}

out:
- bh->bi_dev = rdev_map;
- bh->bi_sector = rsector_map;
+ bi->bi_dev = rdev_map;
+ bi->bi_sector = rsector_map;
up_read(&lv->lv_lock);
return 1;

bad:
- bio_io_error(bh);
+ bio_io_error(bi);
up_read(&lv->lv_lock);
return -1;
} /* lvm_map() */
@@ -1436,9 +1439,9 @@
{
int ret = 0;
ulong l, ls = 0, p, size;
- lv_t lv;
vg_t *vg_ptr;
lv_t **snap_lv_ptr;
+ lv_t *tmplv;

if ((vg_ptr = kmalloc(sizeof(vg_t),GFP_KERNEL)) == NULL) {
printk(KERN_CRIT
@@ -1446,6 +1449,7 @@
lvm_name, __LINE__);
return -ENOMEM;
}
+
/* get the volume group structure */
if (copy_from_user(vg_ptr, arg, sizeof(vg_t)) != 0) {
P_IOCTL("lvm_do_vg_create ERROR: copy VG ptr %p (%d bytes)\n",
@@ -1454,6 +1458,8 @@
return -EFAULT;
}

+
+
/* VG_CREATE now uses minor number in VG structure */
if (minor == -1) minor = vg_ptr->vg_number;

@@ -1513,19 +1519,30 @@
}
memset(snap_lv_ptr, 0, size);

+ if ((tmplv = kmalloc(sizeof(lv_t),GFP_KERNEL)) == NULL) {
+ printk(KERN_CRIT
+ "%s -- VG_CREATE: kmalloc error LV at line %d\n",
+ lvm_name, __LINE__);
+ vfree(snap_lv_ptr);
+ return -ENOMEM;
+ }
+
/* get the logical volume structures */
vg_ptr->lv_cur = 0;
for (l = 0; l < vg_ptr->lv_max; l++) {
lv_t *lvp;
+
/* user space address */
if ((lvp = vg_ptr->lv[l]) != NULL) {
- if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
+ if (copy_from_user(tmplv, lvp, sizeof(lv_t)) != 0) {
P_IOCTL("ERROR: copying LV ptr %p (%d bytes)\n",
lvp, sizeof(lv_t));
lvm_do_vg_remove(minor);
+ vfree(snap_lv_ptr);
+ kfree(tmplv);
return -EFAULT;
}
- if ( lv.lv_access & LV_SNAPSHOT) {
+ if ( tmplv->lv_access & LV_SNAPSHOT) {
snap_lv_ptr[ls] = lvp;
vg_ptr->lv[l] = NULL;
ls++;
@@ -1533,8 +1550,10 @@
}
vg_ptr->lv[l] = NULL;
/* only create original logical volumes for now */
- if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) {
+ if (lvm_do_lv_create(minor, tmplv->lv_name, tmplv) != 0) {
lvm_do_vg_remove(minor);
+ vfree(snap_lv_ptr);
+ kfree(tmplv);
return -EFAULT;
}
}
@@ -1544,18 +1563,22 @@
in place during first path above */
for (l = 0; l < ls; l++) {
lv_t *lvp = snap_lv_ptr[l];
- if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
+ if (copy_from_user(tmplv, lvp, sizeof(lv_t)) != 0) {
lvm_do_vg_remove(minor);
+ vfree(snap_lv_ptr);
+ kfree(tmplv);
return -EFAULT;
}
- if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) {
+ if (lvm_do_lv_create(minor, tmplv->lv_name, tmplv) != 0) {
lvm_do_vg_remove(minor);
+ vfree(snap_lv_ptr);
+ kfree(tmplv);
return -EFAULT;
}
}

vfree(snap_lv_ptr);
-
+ kfree(tmplv);
vg_count++;


2001-12-27 15:20:50

by Jens Axboe

[permalink] [raw]
Subject: Re: lvm in 2.5.1

On Thu, Dec 27 2001, [email protected] wrote:
> - int rw = bio_data_dir(bh);
> + int rw = bio_data_dir(bi);
>
>
> down_read(&lv->lv_lock);
> @@ -1151,7 +1154,7 @@
>
> P_MAP("%s - lvm_map minor: %d *rdev: %s *rsector: %lu size:%lu\n",
> lvm_name, minor,
> - kdevname(bh->bi_dev),
> + kdevname(bi->bi_dev),
> rsector_org, size);
>
> if (rsector_org + size > lv->lv_size) {
> @@ -1205,7 +1208,7 @@
> * we need to queue this request, because this is in the fast path.
> */
> if (rw == WRITE || rw == WRITEA) {
> - if(_defer_extent(bh, rw, rdev_map,
> + if(_defer_extent(bi, rw, rdev_map,
> rsector_map, vg_this->pe_size)) {

You are tossing out read-ahead info here, you want to use bio_rw and not
bio_data_dir. The former will pass back xA bits too, while bio_data_dir
is strictly the data direction (strangely :-)

--
Jens Axboe

2001-12-27 16:03:12

by Anders Gustafsson

[permalink] [raw]
Subject: Re: lvm in 2.5.1

On Thu, Dec 27, 2001 at 04:20:19PM +0100, Jens Axboe wrote:
> You are tossing out read-ahead info here, you want to use bio_rw and not
> bio_data_dir. The former will pass back xA bits too, while bio_data_dir
> is strictly the data direction (strangely :-)

you mean like this?

--

//anders/g

diff -ru linux-2.5.2-pre2/drivers/md/lvm.c linux-2.5.2-pre2-lvmfix/drivers/md/lvm.c
--- linux-2.5.2-pre2/drivers/md/lvm.c Thu Dec 27 08:28:39 2001
+++ linux-2.5.2-pre2-lvmfix/drivers/md/lvm.c Thu Dec 27 16:43:52 2001
@@ -179,6 +179,9 @@
* (Jens Axboe)
* - Defer writes to an extent that is being moved [JT + AD]
* 28/05/2001 - implemented missing BLKSSZGET ioctl [AD]
+ * 28/12/2001 - buffer_head -> bio
+ * removed huge allocation of a lv_t on stack
+ * (Anders Gustafsson)
*
*/

@@ -1043,7 +1046,7 @@

memset(&bio,0,sizeof(bio));
bio.bi_dev = inode->i_rdev;
- bio.bi_io_vec.bv_len = lvm_get_blksize(bio.bi_dev);
+ bio.bi_size = lvm_get_blksize(bio.bi_dev); /* NEEDED by bio_sectors */
bio.bi_sector = block * bio_sectors(&bio);
bio.bi_rw = READ;
if ((err=lvm_map(&bio)) < 0) {
@@ -1119,19 +1122,18 @@
return 0;
}

-static int lvm_map(struct bio *bh)
+static int lvm_map(struct bio *bi)
{
- int minor = MINOR(bh->bi_dev);
+ int minor = MINOR(bi->bi_dev);
ulong index;
ulong pe_start;
- ulong size = bio_sectors(bh);
- ulong rsector_org = bh->bi_sector;
+ ulong size = bio_sectors(bi);
+ ulong rsector_org = bi->bi_sector;
ulong rsector_map;
kdev_t rdev_map;
vg_t *vg_this = vg[VG_BLK(minor)];
lv_t *lv = vg_this->lv[LV_BLK(minor)];
- int rw = bio_data_dir(bh);
-
+ int rw = bio_rw(bi);

down_read(&lv->lv_lock);
if (!(lv->lv_status & LV_ACTIVE)) {
@@ -1151,7 +1153,7 @@

P_MAP("%s - lvm_map minor: %d *rdev: %s *rsector: %lu size:%lu\n",
lvm_name, minor,
- kdevname(bh->bi_dev),
+ kdevname(bi->bi_dev),
rsector_org, size);

if (rsector_org + size > lv->lv_size) {
@@ -1205,7 +1207,7 @@
* we need to queue this request, because this is in the fast path.
*/
if (rw == WRITE || rw == WRITEA) {
- if(_defer_extent(bh, rw, rdev_map,
+ if(_defer_extent(bi, rw, rdev_map,
rsector_map, vg_this->pe_size)) {

up_read(&lv->lv_lock);
@@ -1246,13 +1248,13 @@
}

out:
- bh->bi_dev = rdev_map;
- bh->bi_sector = rsector_map;
+ bi->bi_dev = rdev_map;
+ bi->bi_sector = rsector_map;
up_read(&lv->lv_lock);
return 1;

bad:
- bio_io_error(bh);
+ bio_io_error(bi);
up_read(&lv->lv_lock);
return -1;
} /* lvm_map() */
@@ -1436,9 +1438,9 @@
{
int ret = 0;
ulong l, ls = 0, p, size;
- lv_t lv;
vg_t *vg_ptr;
lv_t **snap_lv_ptr;
+ lv_t *tmplv;

if ((vg_ptr = kmalloc(sizeof(vg_t),GFP_KERNEL)) == NULL) {
printk(KERN_CRIT
@@ -1446,6 +1448,7 @@
lvm_name, __LINE__);
return -ENOMEM;
}
+
/* get the volume group structure */
if (copy_from_user(vg_ptr, arg, sizeof(vg_t)) != 0) {
P_IOCTL("lvm_do_vg_create ERROR: copy VG ptr %p (%d bytes)\n",
@@ -1454,6 +1457,8 @@
return -EFAULT;
}

+
+
/* VG_CREATE now uses minor number in VG structure */
if (minor == -1) minor = vg_ptr->vg_number;

@@ -1513,19 +1518,30 @@
}
memset(snap_lv_ptr, 0, size);

+ if ((tmplv = kmalloc(sizeof(lv_t),GFP_KERNEL)) == NULL) {
+ printk(KERN_CRIT
+ "%s -- VG_CREATE: kmalloc error LV at line %d\n",
+ lvm_name, __LINE__);
+ vfree(snap_lv_ptr);
+ return -ENOMEM;
+ }
+
/* get the logical volume structures */
vg_ptr->lv_cur = 0;
for (l = 0; l < vg_ptr->lv_max; l++) {
lv_t *lvp;
+
/* user space address */
if ((lvp = vg_ptr->lv[l]) != NULL) {
- if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
+ if (copy_from_user(tmplv, lvp, sizeof(lv_t)) != 0) {
P_IOCTL("ERROR: copying LV ptr %p (%d bytes)\n",
lvp, sizeof(lv_t));
lvm_do_vg_remove(minor);
+ vfree(snap_lv_ptr);
+ kfree(tmplv);
return -EFAULT;
}
- if ( lv.lv_access & LV_SNAPSHOT) {
+ if ( tmplv->lv_access & LV_SNAPSHOT) {
snap_lv_ptr[ls] = lvp;
vg_ptr->lv[l] = NULL;
ls++;
@@ -1533,8 +1549,10 @@
}
vg_ptr->lv[l] = NULL;
/* only create original logical volumes for now */
- if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) {
+ if (lvm_do_lv_create(minor, tmplv->lv_name, tmplv) != 0) {
lvm_do_vg_remove(minor);
+ vfree(snap_lv_ptr);
+ kfree(tmplv);
return -EFAULT;
}
}
@@ -1544,18 +1562,22 @@
in place during first path above */
for (l = 0; l < ls; l++) {
lv_t *lvp = snap_lv_ptr[l];
- if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
+ if (copy_from_user(tmplv, lvp, sizeof(lv_t)) != 0) {
lvm_do_vg_remove(minor);
+ vfree(snap_lv_ptr);
+ kfree(tmplv);
return -EFAULT;
}
- if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) {
+ if (lvm_do_lv_create(minor, tmplv->lv_name, tmplv) != 0) {
lvm_do_vg_remove(minor);
+ vfree(snap_lv_ptr);
+ kfree(tmplv);
return -EFAULT;
}
}

vfree(snap_lv_ptr);
-
+ kfree(tmplv);
vg_count++;

2001-12-27 16:12:12

by Jens Axboe

[permalink] [raw]
Subject: Re: lvm in 2.5.1

On Thu, Dec 27 2001, [email protected] wrote:
> On Thu, Dec 27, 2001 at 04:20:19PM +0100, Jens Axboe wrote:
> > You are tossing out read-ahead info here, you want to use bio_rw and not
> > bio_data_dir. The former will pass back xA bits too, while bio_data_dir
> > is strictly the data direction (strangely :-)
>
> you mean like this?

yes

--
Jens Axboe

2001-12-27 17:20:46

by Andreas Dilger

[permalink] [raw]
Subject: Re: [lvm-devel] Re: lvm in 2.5.1

On Dec 27, 2001 17:02 +0100, [email protected] wrote:
> you mean like this?
>
> --
>
> //anders/g
>
> diff -ru linux-2.5.2-pre2/drivers/md/lvm.c linux-2.5.2-pre2-lvmfix/drivers/md/lvm.c
> --- linux-2.5.2-pre2/drivers/md/lvm.c Thu Dec 27 08:28:39 2001
> +++ linux-2.5.2-pre2-lvmfix/drivers/md/lvm.c Thu Dec 27 16:43:52 2001
> @@ -1533,8 +1549,10 @@
> }
> vg_ptr->lv[l] = NULL;
> /* only create original logical volumes for now */
> - if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) {
> + if (lvm_do_lv_create(minor, tmplv->lv_name, tmplv) != 0) {
> lvm_do_vg_remove(minor);
> + vfree(snap_lv_ptr);
> + kfree(tmplv);
> return -EFAULT;
> }
> }

How about re-doing this to look more like:

lvm_do_vg_remove(minor);
ret = -EFAULT;
goto exit_lv;
}
}
.
.
.
vg_count++;

MOD_INC_USE_COUNT;

/* let's go active */
vg_ptr->vg_status |= VG_ACTIVE;

exit_lv:
kfree(tmplv);
exit_snap:
vfree(snap_lv_ptr);

return ret;
}

Cheers, Andreas
--
Andreas Dilger
http://sourceforge.net/projects/ext2resize/
http://www-mddsp.enel.ucalgary.ca/People/adilger/

2001-12-27 19:28:43

by Andrew Morton

[permalink] [raw]
Subject: Re: lvm in 2.5.1

[email protected] wrote:
>
> not much... lets have a look at lvm_do_vg_create then:
>
> with my patch:
> 0x1830 <lvm_do_vg_create>: sub $0x20,%esp
>
> without my patch:
> 0x1830 <lvm_do_vg_create>: sub $0x11c4,%esp
>
> whoa! 0x11c4
>
> thats a LOT! much more than sizeof(lv_t)
>

With egcs-1.1.2:

0xc02546c7 <lvm_do_vg_create+3>: sub $0x1d4,%esp

So perhaps we have a compiler problem. Which version of the
compiler are you using? Have you verified that sizeof(lv_t)
is really around 420 bytes in your setup?


-

2001-12-27 19:37:41

by Anders Gustafsson

[permalink] [raw]
Subject: Re: lvm in 2.5.1

On Thu, Dec 27, 2001 at 11:25:39AM -0800, Andrew Morton wrote:
> 0xc02546c7 <lvm_do_vg_create+3>: sub $0x1d4,%esp
>
> So perhaps we have a compiler problem. Which version of the
> compiler are you using? Have you verified that sizeof(lv_t)
> is really around 420 bytes in your setup?

gcc version 2.95.4 20011223 (Debian prerelease)

i didn't check the exact amount. i dont know where the 420 bytes comes from?
but (as Mike Galbraith pointed out) a lv_t contains:

sector_t blocks[LVM_MAX_SECTORS];

with:

#define LVM_MAX_ATOMIC_IO 512
#define LVM_MAX_SECTORS (LVM_MAX_ATOMIC_IO * 2)

and
typedef unsigned long sector_t;

unsigned long beeing 4bytes => the blocks-member of lv_t should then be 4096
by it self...

--

//anders/g

2001-12-27 19:49:56

by Andrew Morton

[permalink] [raw]
Subject: Re: lvm in 2.5.1

[email protected] wrote:
>
> On Thu, Dec 27, 2001 at 11:25:39AM -0800, Andrew Morton wrote:
> > 0xc02546c7 <lvm_do_vg_create+3>: sub $0x1d4,%esp
> >
> > So perhaps we have a compiler problem. Which version of the
> > compiler are you using? Have you verified that sizeof(lv_t)
> > is really around 420 bytes in your setup?
>
> gcc version 2.95.4 20011223 (Debian prerelease)
>
> i didn't check the exact amount. i dont know where the 420 bytes comes from?
> but (as Mike Galbraith pointed out) a lv_t contains:
>
> sector_t blocks[LVM_MAX_SECTORS];
>
> with:
>
> #define LVM_MAX_ATOMIC_IO 512
> #define LVM_MAX_SECTORS (LVM_MAX_ATOMIC_IO * 2)
>
> and
> typedef unsigned long sector_t;
>
> unsigned long beeing 4bytes => the blocks-member of lv_t should then be 4096
> by it self...

Ah. Right you are. I was looking at the 2.4.17 source. That array
was added in 2.5.x.

So 2.4.x is OK.

Thanks ;)

-

2001-12-27 20:25:15

by Anders Gustafsson

[permalink] [raw]
Subject: Re: lvm in 2.5.1

On Thu, Dec 27, 2001 at 11:45:02AM -0800, Andrew Morton wrote:

> Ah. Right you are. I was looking at the 2.4.17 source. That array
> was added in 2.5.x.
>
> So 2.4.x is OK.

this means the userspace and kernelspace lv_t differ in size.. so a
copy_to_user(..,sizeof(lv_t)) copies to much data and corrupts userspace..

hmm, enlarging the dummy[200] in the userspace version of lv_t seems to be a
nice quickndirty solution.

--

//anders/g

2001-12-28 16:45:46

by Jan Niehusmann

[permalink] [raw]
Subject: Re: lvm in 2.5.1

On Thu, Dec 27, 2001 at 09:24:51PM +0100, [email protected] wrote:
> hmm, enlarging the dummy[200] in the userspace version of lv_t seems to be a
> nice quickndirty solution.

Please do not change the kernel / userspace interface easily. Past
experience has shown that this leads to significant update problems,
because kernel and userspace tools need to be updated at the same time.

Jan

2001-12-28 20:20:00

by Andreas Dilger

[permalink] [raw]
Subject: Re: [lvm-devel] Re: lvm in 2.5.1

On Dec 28, 2001 17:45 +0100, Jan Niehusmann wrote:
> On Thu, Dec 27, 2001 at 09:24:51PM +0100, [email protected] wrote:
> > hmm, enlarging the dummy[200] in the userspace version of lv_t seems to be a
> > nice quickndirty solution.
>
> Please do not change the kernel / userspace interface easily. Past
> experience has shown that this leads to significant update problems,
> because kernel and userspace tools need to be updated at the same time.

But since this is a 2.5 kernel, now is the time to change it, but you must
_also_ change the IOP version in lvm.h to 12 (not 11!) if you change the
struct sizes.

Sadly, I think the work that was done in the past to support user tools
for multiple IOP versions was dropped from (or never added to) the LVM
build process, so it will not just be a matter of "install the latest
user tools and all will be well", sigh. We had this problem with the
change from IOP6 to IOP10, and had fixed it all up, but we are doomed
to repeat the same problem over again.

Cheers, Andreas
--
Andreas Dilger
http://sourceforge.net/projects/ext2resize/
http://www-mddsp.enel.ucalgary.ca/People/adilger/

2001-12-28 20:43:50

by Anders Gustafsson

[permalink] [raw]
Subject: Re: [lvm-devel] Re: lvm in 2.5.1

On Fri, Dec 28, 2001 at 01:17:57PM -0700, Andreas Dilger wrote:
> On Dec 28, 2001 17:45 +0100, Jan Niehusmann wrote:
> > On Thu, Dec 27, 2001 at 09:24:51PM +0100, [email protected] wrote:
> > > hmm, enlarging the dummy[200] in the userspace version of lv_t seems to be a
> > > nice quickndirty solution.
> >
> > Please do not change the kernel / userspace interface easily. Past
> > experience has shown that this leads to significant update problems,
> > because kernel and userspace tools need to be updated at the same time.
>
> But since this is a 2.5 kernel, now is the time to change it, but you must
> _also_ change the IOP version in lvm.h to 12 (not 11!) if you change the
> struct sizes.
>
> Sadly, I think the work that was done in the past to support user tools
> for multiple IOP versions was dropped from (or never added to) the LVM
> build process, so it will not just be a matter of "install the latest
> user tools and all will be well", sigh. We had this problem with the
> change from IOP6 to IOP10, and had fixed it all up, but we are doomed
> to repeat the same problem over again.

but there is no need to change the kernel-userspace interface. it is just
the __KERNEL__ part that has grown, having different kernel and userspace
lv_t and just converting between them should be enough. this will hopefully
make the interface more stable too. anyhow its broken as it is in 2.5 right
now, with (sizeof(lv_t) in kernel != sizeof(lv_t) in userspace) when the
tools expects it to be.

it would be nice to be able to use the same tools in 2.4 and 2.5 and i don't
see any reason it shouldn't be possible.

i'm thinking about something like this:

userspace uses lv_t which is about 240 bytes, kern space uses its own _huge_
klv_t and the ioctls converts between them. then we can go adding stuff into
the klv_t without worry.. klv_t could begin with the lv_t so converting it
for sending it to user is simply a cast. getting it from user involves
validating it the convertion almost be in place already.

good that we have uml in 2.5 again so i don't have to be afraid to trash my
laptop disk.

--

//anders/g