2003-06-25 06:37:39

by Lou Langholtz

[permalink] [raw]
Subject: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

diff -urN linux-2.5.73-p5/drivers/block/nbd.c linux-2.5.73-p6/drivers/block/nbd.c
--- linux-2.5.73-p5/drivers/block/nbd.c 2003-06-24 21:44:14.114372240 -0600
+++ linux-2.5.73-p6/drivers/block/nbd.c 2003-06-24 21:43:09.565848344 -0600
@@ -36,6 +36,11 @@
* 03-06-24 Remove unneeded blksize_bits field from nbd_device struct.
* <[email protected]>
* 03-06-24 Cleanup PARANOIA usage & code. <[email protected]>
+ * 03-06-24 Fix resource locking issues with ioctl user interface. Note that
+ * this change is incompatible with existing user tools (nbd-client) and
+ * require them to be updated. Actually, the linux 2.5 block layer redisign
+ * already required existing user tools to be updated to properly set the
+ * correct device size. <[email protected]>
*
* possible FIXME: make set_sock / set_blksize / set_size / do_it one syscall
* why not: would need verify_area and friends, would share yet another
@@ -137,21 +142,26 @@
}
#endif /* NDEBUG */

-static void nbd_end_request(struct request *req)
+static void request_end_while_locked(struct request *req)
{
int uptodate = (req->errors == 0) ? 1 : 0;
- request_queue_t *q = req->q;
- unsigned long flags;

dprintk(DBG_BLKDEV, "%s: request %p: %s\n", req->rq_disk->disk_name,
req, uptodate? "done": "failed");
+ if (!end_that_request_first(req, uptodate, req->nr_sectors))
+ end_that_request_last(req);
#ifdef PARANOIA
requests_out++;
#endif
+}
+
+static void request_end(struct request *req)
+{
+ unsigned long flags;
+ request_queue_t *q = req->q;
+
spin_lock_irqsave(q->queue_lock, flags);
- if (!end_that_request_first(req, uptodate, req->nr_sectors)) {
- end_that_request_last(req);
- }
+ request_end_while_locked(req);
spin_unlock_irqrestore(q->queue_lock, flags);
}

@@ -239,7 +249,7 @@
return result;
}

-void nbd_send_req(struct nbd_device *lo, struct request *req)
+static void nbd_send_req(struct nbd_device *lo, struct request *req)
{
int result, i, flags;
struct nbd_request request;
@@ -252,13 +262,7 @@
request.len = htonl(size);
memcpy(request.handle, &req, sizeof(req));

- down(&lo->tx_lock);
-
- if (!sock || !lo->sock) {
- printk(KERN_ERR "%s: Attempted send on closed socket\n",
- lo->disk->disk_name);
- goto error_out;
- }
+ BUG_ON(sock == NULL);

dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%luB)\n",
lo->disk->disk_name, req,
@@ -297,11 +301,9 @@
}
}
}
- up(&lo->tx_lock);
return;

- error_out:
- up(&lo->tx_lock);
+error_out:
req->errors++;
}

@@ -398,28 +400,15 @@
return req;
}

-void nbd_do_it(struct nbd_device *lo)
+static int nbd_clear_que(struct nbd_device *lo)
{
struct request *req;

-#ifdef PARANOIA
- BUG_ON(lo->magic != LO_MAGIC);
-#endif
- while ((req = nbd_read_stat(lo)) != NULL)
- nbd_end_request(req);
- printk(KERN_NOTICE "%s: req should never be null\n",
- lo->disk->disk_name);
- return;
-}
-
-void nbd_clear_que(struct nbd_device *lo)
-{
- struct request *req;
-
-#ifdef PARANOIA
- BUG_ON(lo->magic != LO_MAGIC);
-#endif
-
+ down(&lo->tx_lock);
+ if (lo->sock) {
+ up(&lo->tx_lock);
+ return -EBUSY;
+ }
do {
req = NULL;
spin_lock(&lo->queue_lock);
@@ -430,16 +419,18 @@
spin_unlock(&lo->queue_lock);
if (req) {
req->errors++;
- nbd_end_request(req);
+ request_end(req);
}
} while (req);
+ up(&lo->tx_lock);
+ return 0;
}

/*
* We always wait for result of write, for now. It would be nice to make it optional
* in future
* if ((req->cmd == WRITE) && (lo->flags & NBD_WRITE_NOCHK))
- * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
+ * { printk( "Warning: Ignoring result!\n"); request_end( req ); }
*/

static void do_nbd_request(request_queue_t * q)
@@ -448,75 +439,69 @@

while ((req = elv_next_request(q)) != NULL) {
struct nbd_device *lo;
+ int notready;

blkdev_dequeue_request(req);
+#ifdef PARANOIA
+ requests_in++;
+#endif
+ req->errors = 0;
dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%lx)\n",
req->rq_disk->disk_name, req, req->flags);

if (!(req->flags & REQ_CMD))
- goto error_out;
+ goto fail_request;

lo = req->rq_disk->private_data;
#ifdef PARANOIA
BUG_ON(lo->magic != LO_MAGIC);
#endif
- if (!lo->file) {
- printk(KERN_ERR "%s: Request when not-ready\n",
- lo->disk->disk_name);
- goto error_out;
- }
+
nbd_cmd(req) = NBD_CMD_READ;
if (rq_data_dir(req) == WRITE) {
nbd_cmd(req) = NBD_CMD_WRITE;
if (lo->flags & NBD_READ_ONLY) {
printk(KERN_ERR "%s: Write on read-only\n",
lo->disk->disk_name);
- goto error_out;
+ goto fail_request;
}
}
-#ifdef PARANOIA
- requests_in++;
-#endif

- req->errors = 0;
spin_unlock_irq(q->queue_lock);
-
- spin_lock(&lo->queue_lock);
-
- if (!lo->file) {
- spin_unlock(&lo->queue_lock);
- printk(KERN_ERR "%s: failed between accept and semaphore, file lost\n",
- lo->disk->disk_name);
- req->errors++;
- nbd_end_request(req);
- spin_lock_irq(q->queue_lock);
- continue;
- }
-
- list_add(&req->queuelist, &lo->queue_head);
- spin_unlock(&lo->queue_lock);
-
- nbd_send_req(lo, req);
-
- if (req->errors) {
- printk(KERN_ERR "%s: Request send failed\n",
- lo->disk->disk_name);
+ down(&lo->tx_lock);
+ if (lo->sock) {
+ notready = 0;
spin_lock(&lo->queue_lock);
- list_del_init(&req->queuelist);
+ list_add(&req->queuelist, &lo->queue_head);
spin_unlock(&lo->queue_lock);
- nbd_end_request(req);
- spin_lock_irq(q->queue_lock);
- continue;
+ nbd_send_req(lo, req);
+ if (req->errors) {
+ printk(KERN_ERR "%s: Request send failed\n",
+ lo->disk->disk_name);
+ spin_lock(&lo->queue_lock);
+ list_del_init(&req->queuelist);
+ spin_unlock(&lo->queue_lock);
+ }
}
-
+ else
+ notready = 1;
+ up(&lo->tx_lock);
spin_lock_irq(q->queue_lock);
+ if (notready) {
+ printk(KERN_ERR "%s: Request when not-ready\n",
+ lo->disk->disk_name);
+ req->errors++;
+ request_end_while_locked(req);
+ }
continue;

- error_out:
+fail_request:
+ /*
+ * Fail the request: anyone waiting on a read or write gets
+ * an error and can move on to their close() call.
+ */
req->errors++;
- spin_unlock(q->queue_lock);
- nbd_end_request(req);
- spin_lock(q->queue_lock);
+ request_end_while_locked(req);
}
return;
}
@@ -548,12 +533,86 @@
return 0;
}

+static int nbd_do_it(struct nbd_device *lo, unsigned int fd)
+{
+ struct file *file;
+ struct inode *inode;
+ struct socket *sock;
+ struct request *req;
+
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+ inode = file->f_dentry->d_inode;
+ if (!inode->i_sock) {
+ fput(file);
+ return -ENOTSOCK;
+ }
+ sock = SOCKET_I(inode);
+ if (sock->type != SOCK_STREAM) {
+ fput(file);
+ return -EPROTONOSUPPORT;
+ }
+
+ down(&lo->tx_lock);
+ if (lo->sock) {
+ up(&lo->tx_lock);
+ fput(file);
+ return -EBUSY;
+ }
+ lo->sock = sock;
+ up(&lo->tx_lock);
+
+ /* no need for rx lock since here is the rx... */
+ dprintk(DBG_IOCTL, "%s: nbd_do_it: commencing read loop\n",
+ lo->disk->disk_name);
+ while ((req = nbd_read_stat(lo)) != NULL)
+ request_end(req);
+ dprintk(DBG_IOCTL, "%s: nbd_do_it: read loop finished\n",
+ lo->disk->disk_name);
+
+ down(&lo->tx_lock);
+ lo->sock = NULL;
+ up(&lo->tx_lock);
+
+ fput(file);
+ return lo->harderror;
+}
+
+static int nbd_disconnect(struct nbd_device *lo)
+{
+ int error;
+ struct request sreq;
+
+ printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
+
+ sreq.flags = REQ_SPECIAL;
+ nbd_cmd(&sreq) = NBD_CMD_DISC;
+
+ /*
+ * Set sreq to sane values in case server implementation
+ * fails to check the request type first and also to keep
+ * debugging output cleaner.
+ */
+ sreq.sector = 0;
+ sreq.nr_sectors = 0;
+
+ down(&lo->tx_lock);
+ if (lo->sock) {
+ error = 0;
+ nbd_send_req(lo, &sreq);
+ }
+ else {
+ error = -ENOTCONN;
+ }
+ up(&lo->tx_lock);
+ return error;
+}
+
static int nbd_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg)
{
struct nbd_device *lo = inode->i_bdev->bd_disk->private_data;
- int error;
- struct request sreq ;

#ifdef PARANOIA
BUG_ON(lo->magic != LO_MAGIC);
@@ -565,57 +624,15 @@
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
switch (cmd) {
+ case NBD_CLEAR_SOCK: /* deprecated! */
+ case NBD_SET_SOCK: /* deprecated! */
+ printk(KERN_WARNING "%s: %s[%d] called deprecated ioctl %s\n",
+ lo->disk->disk_name,
+ current->comm, current->pid,
+ ioctl_cmd_to_ascii(cmd));
+ return -EINVAL;
case NBD_DISCONNECT:
- printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
- sreq.flags = REQ_SPECIAL;
- nbd_cmd(&sreq) = NBD_CMD_DISC;
- /*
- * Set these to sane values in case server implementation
- * fails to check the request type first and also to keep
- * debugging output cleaner.
- */
- sreq.sector = 0;
- sreq.nr_sectors = 0;
- if (!lo->sock)
- return -EINVAL;
- nbd_send_req(lo, &sreq);
- return 0 ;
-
- case NBD_CLEAR_SOCK:
- nbd_clear_que(lo);
- spin_lock(&lo->queue_lock);
- if (!list_empty(&lo->queue_head)) {
- spin_unlock(&lo->queue_lock);
- printk(KERN_ERR "%s: Some requests are in progress -> can not turn off.\n",
- lo->disk->disk_name);
- return -EBUSY;
- }
- file = lo->file;
- if (!file) {
- spin_unlock(&lo->queue_lock);
- return -EINVAL;
- }
- lo->file = NULL;
- lo->sock = NULL;
- spin_unlock(&lo->queue_lock);
- fput(file);
- return 0;
- case NBD_SET_SOCK:
- if (lo->file)
- return -EBUSY;
- error = -EINVAL;
- file = fget(arg);
- if (file) {
- inode = file->f_dentry->d_inode;
- if (inode->i_sock) {
- lo->file = file;
- lo->sock = SOCKET_I(inode);
- error = 0;
- } else {
- fput(file);
- }
- }
- return error;
+ return nbd_disconnect(lo);
case NBD_SET_BLKSIZE:
if ((arg & (arg-1)) || (arg < 512) || (arg > PAGE_SIZE))
return -EINVAL;
@@ -632,34 +649,9 @@
set_capacity(lo->disk, lo->bytesize >> 9);
return 0;
case NBD_DO_IT:
- if (!lo->file)
- return -EINVAL;
- nbd_do_it(lo);
- /* on return tidy up in case we have a signal */
- /* Forcibly shutdown the socket causing all listeners
- * to error
- *
- * FIXME: This code is duplicated from sys_shutdown, but
- * there should be a more generic interface rather than
- * calling socket ops directly here */
- down(&lo->tx_lock);
- printk(KERN_WARNING "%s: shutting down socket\n",
- lo->disk->disk_name);
- lo->sock->ops->shutdown(lo->sock, SEND_SHUTDOWN|RCV_SHUTDOWN);
- lo->sock = NULL;
- up(&lo->tx_lock);
- spin_lock(&lo->queue_lock);
- file = lo->file;
- lo->file = NULL;
- spin_unlock(&lo->queue_lock);
- nbd_clear_que(lo);
- printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name);
- if (file)
- fput(file);
- return lo->harderror;
+ return nbd_do_it(lo, arg);
case NBD_CLEAR_QUE:
- nbd_clear_que(lo);
- return 0;
+ return nbd_clear_que(lo);
case NBD_PRINT_DEBUG:
#ifdef PARANOIA
printk(KERN_INFO "%s: next = %p, prev = %p. Global: in %d, out %d\n",
@@ -731,7 +723,6 @@
for (i = 0; i < MAX_NBD; i++) {
struct gendisk *disk = nbd_dev[i].disk;
nbd_dev[i].refcnt = 0;
- nbd_dev[i].file = NULL;
#ifdef PARANOIA
nbd_dev[i].magic = LO_MAGIC;
#endif
diff -urN linux-2.5.73-p5/include/linux/nbd.h linux-2.5.73-p6/include/linux/nbd.h
--- linux-2.5.73-p5/include/linux/nbd.h 2003-06-24 21:48:44.697043654 -0600
+++ linux-2.5.73-p6/include/linux/nbd.h 2003-06-24 21:48:29.595035591 -0600
@@ -8,6 +8,7 @@
* 2003/06/24 Louis D. Langholtz <[email protected]>
* Removed unneeded blksize_bits field from nbd_device struct.
* Cleanup PARANOIA usage & code.
+ * Removed unneeded file field from nbd_device struct.
*/

#ifndef LINUX_NBD_H
@@ -42,7 +43,6 @@
#define NBD_READ_ONLY 0x0001
#define NBD_WRITE_NOCHK 0x0002
struct socket * sock;
- struct file * file; /* If == NULL, device is not ready, yet */
#ifdef PARANOIA
int magic; /* FIXME: not if debugging is off */
#endif


Attachments:
nbd-patch6 (11.75 kB)

2003-06-25 07:04:47

by Andrew Morton

[permalink] [raw]
Subject: Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

Lou Langholtz <[email protected]> wrote:
>
> That said, this seems like an opportunistic time to further break
> compatibility with the existing nbd-client user tool and do away with
> the problematic components of the ioctl user interface.

Is a suitably modified userspace tool available?

2003-06-25 14:10:14

by Lou Langholtz

[permalink] [raw]
Subject: Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

Andrew Morton wrote:

>Lou Langholtz <[email protected]> wrote:
>
>
>>That said, this seems like an opportunistic time to further break
>> compatibility with the existing nbd-client user tool and do away with
>> the problematic components of the ioctl user interface.
>>
>>
>
>Is a suitably modified userspace tool available?
>
>
Seems like a pandora's box (and/or catch-22)... A modified userspace
tool is not yet available that I know of. I have a hacked version that
I've been using to test things with of course. But it's not in a very
nice state to release. And then Pavel's distro of the tool is probably
what most people use anyway. I have a seperate distro in part because I
wanted to just play with things at first and thought sharing the sources
might be useful to someone. Etc. Anyways, I wanted to get a feeling
first from the kernel hacking community how important back compatibility
was to them w.r.t. this driver. Since the original jumbo patch also got
criticized against for supporting multiple linux version in the driver
sources (via the LINUX_VERSION macros), seems like there's some split in
what people may prefer. I wanted to generate comments on these specific
changes to solidify what changes would really be needed in the user
space tool before also distributing a changed nbd-client. It would also
help to have something like a version ioctl or something that could be
keyed on to provide the correct support. On the other hand, I'm not sure
that Pavel even realized that the block layer changes impacted the
semantics of the sizing ioctl (such that the user-space tool needs to
re-open the file descriptor to get the proper size in). I didn't realize
this myself even till only a few days ago but I had been wondering all
along why sizing hadn't been working right. Sigh. Just to many other
things to also do. The driver could also fix this sizing issue but
coding the change their feels more like a hack than in the client tool.
On the bright side, Pavel's nbd-client could probably be ifdef'd quite
easily to work-around/fix the sizing issue and also this new proposed
ioctl interface. Let me catch up w/ LKML mail and see what others have
to say so far. I can probably produce a diff for Pavel easily enough
(more easily than for my distro of the same) so that we won't have these
issues anymore. But then full circle, it all depends on how the
community would like to see these issues resolved (ie. maybe they want
the sizing fixed in the driver instead and personally it's a close balance).

2003-06-25 15:23:12

by Lou Langholtz

[permalink] [raw]
Subject: Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

diff -urN nbd/nbd-client.c nbd-patched/nbd-client.c
--- nbd/nbd-client.c 2002-03-25 04:44:18.000000000 -0700
+++ nbd-patched/nbd-client.c 2003-06-25 08:57:49.280300439 -0600
@@ -22,6 +22,7 @@
#include <fcntl.h>
#include <syslog.h>
#include <stdlib.h>
+#include <linux/version.h>

#define MY_NAME "nbd_client"
#ifndef __GNUC__
@@ -62,7 +63,7 @@
unsigned long size;
char buf[256] = "\0\0\0\0\0\0\0\0\0";
int blocksize=1024;
- char *hostname;
+ char *hostname, *nbdname;
int swap=0;

logging();
@@ -117,10 +118,11 @@

if (argc==0) goto errmsg;
sock = opennet(hostname, port);
- nbd = open(argv[0], O_RDWR);
+ nbdname = argv[0];
+ nbd = open(nbdname, O_RDWR);
if (nbd < 0)
err("Can not open NBD: %m");
- ++argv; --argc; /* skip device */
+ ++argv; --argc; /* skip device command line argument */

if (argc>1) goto errmsg;
if (argc!=0) swap=1;
@@ -182,10 +184,18 @@
err("Ioctl/1 failed: %m\n");
}
#endif
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ close(nbd);
+ nbd = open(nbdname, O_RDWR);
+ if (nbd < 0)
+ err("Can not re-open NBD: %m");
+#endif

+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,74))
ioctl(nbd, NBD_CLEAR_SOCK);
if (ioctl(nbd, NBD_SET_SOCK, sock) < 0)
err("Ioctl/2 failed: %m\n");
+#endif

#ifndef SO_SWAPPING
if (swap)
@@ -202,14 +212,23 @@
if (fork())
exit(0);

+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,74))
if (ioctl(nbd, NBD_DO_IT) < 0)
fprintf(stderr, "Kernel call returned: %m");
else
fprintf(stderr, "Kernel call returned.");
+#else
+ if (ioctl(nbd, NBD_DO_IT, sock) < 0)
+ fprintf(stderr, "Kernel call returned: %m");
+ else
+ fprintf(stderr, "Kernel call returned.");
+#endif
printf("Closing: que, ");
ioctl(nbd, NBD_CLEAR_QUE);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,74))
printf("sock, ");
ioctl(nbd, NBD_CLEAR_SOCK);
+#endif
printf("done\n");
return 0;
}


Attachments:
nbd-patch6.1 (14.74 kB)
nbd-tools.patch (1.84 kB)
Download all attachments

2003-06-25 15:41:08

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

On Wed, Jun 25, 2003 at 09:36:50AM -0600, Lou Langholtz wrote:
> I have also attached a patch to Pavel's nbd-2.0 release nbd tools that
> updates the nbd-client to work with linux 2.5 as well as 2.5.74
> (assuming the aforementioned patch 6.1 made it into 2.5.74). Handling is
> switched at compile time however and uses <linux/version.h> to do the
> switching. This will have problems of course if the builder doesn't pay
> close attention to where there header file are coming from or tries to
> run the same binary on a different kernel release. Etc.

That's broken. You must make sure that a binary works with different
kernels or at least make it fail gracefully. Using <linux/version.h>
from userspace is absolutely not acceptable, just don't use kernel headers
at all but a local copy of <linux/nbd.h>.

2003-06-25 17:24:43

by Lou Langholtz

[permalink] [raw]
Subject: Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

Christoph Hellwig wrote:

>On Wed, Jun 25, 2003 at 09:36:50AM -0600, Lou Langholtz wrote:
>
>
>>I have also attached a patch to Pavel's nbd-2.0 release nbd tools that
>>updates the nbd-client to work with linux 2.5 as well as 2.5.74
>>(assuming the aforementioned patch 6.1 made it into 2.5.74). Handling is
>>switched at compile time however and uses <linux/version.h> to do the
>>switching. This will have problems of course if the builder doesn't pay
>>close attention to where there header file are coming from or tries to
>>run the same binary on a different kernel release. Etc.
>>
>>
>
>That's broken. You must make sure that a binary works with different
>kernels or at least make it fail gracefully. Using <linux/version.h>
>from userspace is absolutely not acceptable, just don't use kernel headers
>at all but a local copy of <linux/nbd.h>.
>
>
Yes. To be fair though, the binary (and the driver too) was broken on
linux 2.5 kernels long before I even proposed any changes to the nbd
driver. I'm trying to fix that. But it's a puzzle that has to have
pieces moved out of the way first. With constraints like making one
patch per fundemental change, it's more of a challenge trying to keep
things in sync with user space. I'd like to see binary (runtime)
compatibility too, but it's a bigger step to implement that. In the
meantime, the hack/patch I submitted to nbd-client seems like a step
forward. At least it works its way around several of the
incompatibilities and lets people find out what other problem may lie
ahead. I just found another problem for example with the disconnect
function in nbd-client that will need to be fixed in order to be able to
unload the module. A future step will be to change the compile time
switching to a runtime switch, but I'm not sold on any one way to
implement this yet. If you have something in mind for this, let me know.
For example, should the ioctls be used to somehow notify the user
process of the differing implementation. Like returning EINVAL for
NBD_SET_SOCK. That'd tell nbd-tool that the nbd driver thinks something
about the ioctl was invalid but not what. I wanted to return EDEPRECATED
instead but I haven't found that errno yet. I could overload an errno
but that seems ugly too. Or the driver could have a NBD_GET_VERSION
ioctl. Is there precedence for that? I haven't come accross it yet.

How would you propose these issues be solved? Keep in touch!! Thanks!!

2003-06-25 17:30:18

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

On Wed, Jun 25, 2003 at 11:38:43AM -0600, Lou Langholtz wrote:
> Yes. To be fair though, the binary (and the driver too) was broken on
> linux 2.5 kernels long before I even proposed any changes to the nbd
> driver. I'm trying to fix that.

Hey, I didn't say changing the interface is wrong. But if possible
do it in a way that the new userspace can still support the old kernel
driver.

> NBD_SET_SOCK. That'd tell nbd-tool that the nbd driver thinks something
> about the ioctl was invalid but not what. I wanted to return EDEPRECATED
> instead but I haven't found that errno yet. I could overload an errno
> but that seems ugly too. Or the driver could have a NBD_GET_VERSION
> ioctl. Is there precedence for that? I haven't come accross it yet.

That's one choice. At least md and device mapper do that.

2003-06-25 17:35:43

by Paul Clements

[permalink] [raw]
Subject: Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

Lou,

a few comments on the patch...


On Wed, 25 Jun 2003, Lou Langholtz wrote:
> Please review and comment...
>
> This is the sixth patch to the NBD driver and it fixes a variety of
> outstanding locking issues with the ioctl user interface. This patch

This patch introduces a locking hierarchy between the lo->tx_lock and
lo->queue_lock. The existing driver does not have this limitation.
I would feel a lot better about this patch if you were to recode it
to avoid this.

Also, I noticed that you've removed the forcible shutdown of the
socket at the end of NBD_DO_IT. Was there a particular reason for
that?


> ... the new NBD_DO_IT style interface
> could be introduced instead as another ioctl completely and these 3
> ioctls could be maintained for backward compatibility for a while
> longer.

It would be really nice if the driver remained (as much as
possible) compatible with the 2.4 version...unless you have
a really good reason to break things... :)

--
Paul

2003-06-25 17:43:26

by Al Viro

[permalink] [raw]
Subject: Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

On Wed, Jun 25, 2003 at 01:48:16PM -0400, Paul Clements wrote:

> This patch introduces a locking hierarchy between the lo->tx_lock and
> lo->queue_lock. The existing driver does not have this limitation.
> I would feel a lot better about this patch if you were to recode it
> to avoid this.

?????

One of them is a semaphore, another - a spinlock. Of course there always
had been a hierarchy - you can't call down() under a spinlock, the damn
thing can schedule.

2003-06-25 17:45:30

by Pavel Machek

[permalink] [raw]
Subject: Anyone for NBD maintainer [was Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI]

Hi!

> >>That said, this seems like an opportunistic time to further break
> >>compatibility with the existing nbd-client user tool and do away with
> >>the problematic components of the ioctl user interface.
> >>
> >>
> >Is a suitably modified userspace tool available?

> Here is an updated patch 6 (I called it 6.1) that fixes some additional
> locking issues as well as fixing the header file so it can be used
> by

...

BTW Anyone wanting to become nbd maintainer? I'm not much interested
in nbd these days...
Pavel
--
When do you have a heart between your knees?
[Johanka's followup: and *two* hearts?]

2003-06-25 18:02:13

by Lou Langholtz

[permalink] [raw]
Subject: Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

Christoph Hellwig wrote:

>On Wed, Jun 25, 2003 at 11:38:43AM -0600, Lou Langholtz wrote:
>
>
>>Yes. To be fair though, the binary (and the driver too) was broken on
>>linux 2.5 kernels long before I even proposed any changes to the nbd
>>driver. I'm trying to fix that.
>>
>>
>Hey, I didn't say changing the interface is wrong. But if possible
>do it in a way that the new userspace can still support the old kernel
>driver.
>
Agreed! Will do, but how???

>>NBD_SET_SOCK. That'd tell nbd-tool that the nbd driver thinks something
>>about the ioctl was invalid but not what. I wanted to return EDEPRECATED
>>instead but I haven't found that errno yet. I could overload an errno
>>but that seems ugly too. Or the driver could have a NBD_GET_VERSION
>>ioctl. Is there precedence for that? I haven't come accross it yet.
>>
>>
>
>That's one choice. At least md and device mapper do that.
>
>
Cool! I'll take a look at these. Is this the prefered way then? There's
probably a lot of need for this generally speaking. Thought about using
/proc for this too. And then sysfs is gaining favor so maybe in there?
Any preference?

2003-06-25 18:05:55

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

On Wed, Jun 25, 2003 at 12:16:10PM -0600, Lou Langholtz wrote:
> >Hey, I didn't say changing the interface is wrong. But if possible
> >do it in a way that the new userspace can still support the old kernel
> >driver.
> >
> Agreed! Will do, but how???

if (detected_new_driver) {
new_code();
} else {
old_code();
}

in the userland app. if structures change in an incompatible way
you need _v1 and _v2 versions of them of course.

And what to use for detected_new_driver? Probably the new ABI version
ioctl..

> Cool! I'll take a look at these. Is this the prefered way then? There's
> probably a lot of need for this generally speaking.

Yeah. And please do like device-mapper with major and minor versions.
major as in completly incompatible and minor as in support old protocol
but has new featues in addition.

> Thought about using
> /proc for this too.

Bad idea :)

> And then sysfs is gaining favor so maybe in there?

Doesn't sound like a fit either. Maybe you could do a small own fs
to control nbd, but I'm not familar enough with the actual API to comment
on this more.

2003-06-25 18:07:43

by Lou Langholtz

[permalink] [raw]
Subject: Re: Anyone for NBD maintainer [was Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI]

Pavel Machek wrote:

>. . .
>BTW Anyone wanting to become nbd maintainer? I'm not much interested
>in nbd these days...
> Pavel
>
>
I'd be honoured too but not sure what exactly that entails. What other
stuff would I have to do besides actively developing it? I won't be able
to do anything on the Internet till Monday either.

2003-06-25 18:43:34

by Lou Langholtz

[permalink] [raw]
Subject: Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

Paul Clements wrote:

>. . .
>This patch introduces a locking hierarchy between the lo->tx_lock and
>lo->queue_lock. The existing driver does not have this limitation.
>I would feel a lot better about this patch if you were to recode it
>to avoid this.
>
Did Al's statements regarding this make this feel better? The code could
be redone so that the queuelist is added to before the down but then
more often it will have to be removed from since the lo->sock check
wouldn't have been done yet. On the other hand I've been thinking that I
might be able to take advantage of the irq locked condition imposed by
the q->queue_lock and just use nbd_lock to replace q->queue_lock then.
Al and Andrew seem to have a much deeper understanding though for
spinlocking though so I'll defer to there comments on this idea (of
replacing lo->queue_lock by use of nbd_lock). This has the added
attraction of already having nbd_lock locked when in do_nbd_request.

>Also, I noticed that you've removed the forcible shutdown of the
>socket at the end of NBD_DO_IT. Was there a particular reason for
>that?
>
Yes. The forcible shutdown that was put in place (while closing one race
condition) still left a panicable race condition with the nbd-client
tool. I forget exactly what this was at the moment. Suffice it to say
that since the user space tool opened the socket to begin with, it seems
a better design to have the user space tool do the close as well. When
nbd-client exits, it'd effect close of this socket anyway even if
killed. What still needs to be done though, is to have the
implementation of the NBD_DISCONNECT ioctl in the driver wait until
nbd_do_it sets lo->sock back to NULL. That way the NBD_CLEAR_QUE ioctl
can return -EBUSY if lo->sock, while not getting called by nbd-client
till lo->sock == NULL so that the que clear works.

>>... the new NBD_DO_IT style interface
>>could be introduced instead as another ioctl completely and these 3
>>ioctls could be maintained for backward compatibility for a while
>>longer.
>>
>>
>
>It would be really nice if the driver remained (as much as
>possible) compatible with the 2.4 version...unless you have
>a really good reason to break things... :)
>
>
So you'd prefer to have a new ioctl then to do this rolled together
NBD_DO_IT function? Say NBD_RUN or something that takes the sock
argument. That will require the nbd_device structure to maintain that
file pointer again and possibly leave open the races that seem inherent
to having the three seperate ioctl's. Someone else long ago commented in
the driver "possible FIXME: make set_sock / set_blksize / set_size /
do_it one syscall. Why not: would need verify_area and friends, would
share yet another structure with userland". So it seems someone was long
ago realizing there were problems having the three seperate ioctls for
set_sock, do_it, and clear_sock. Of course this doesn't address
set_size, set_blksize, but I don't see them as problematic w.r.t.
locking once set_sock, do_it, and clear_sock are rolled together.

Seems like we're better off deprecating at least the set_sock and
clear_sock ioctls to return some error code and having the nbd-client
tool runtime switch off of something like their return value (or a
release level value as was also suggested). It's maybe more painful in
requiring people to update their nbd-client's but we live with those
kinds of required updates all the time (example mod-utils insmod, rmmod,
from Rusty to boot 2.5 kernels). The benefit will be memory saving and
better kernel stability. I'm more torn between the idea of deprecating
all three ioctls and adding a NBD_RUN versus just deprecating
NBD_SET_SOCK and NBD_CLEAR_SOCK and changing the NBD_DO_IT ioctl to
require the socket descriptor.

Comments?

2003-06-25 19:05:36

by Pavel Machek

[permalink] [raw]
Subject: Re: Anyone for NBD maintainer [was Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI]

Hi!

> >BTW Anyone wanting to become nbd maintainer? I'm not much interested
> >in nbd these days...
> > Pavel
> >
> >
> I'd be honoured too but not sure what exactly that entails. What other
> stuff would I have to do besides actively developing it? I won't be able
> to do anything on the Internet till Monday either.

Well, maintainer gets patches from people and should decide which are
good and which are not...

Anyway if you give me your sf.net name, I can probably add you as a
developer to the nbd.sf.net... but: I'd like you to keep userland code
backward compatible. Ie. new userland code should still work with
2.4.x kernel.
Pavel
--
When do you have a heart between your knees?
[Johanka's followup: and *two* hearts?]

2003-06-25 19:26:52

by Lou Langholtz

[permalink] [raw]
Subject: Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

Lou Langholtz wrote:

> . . . On the other hand I've been thinking that I might be able to
> take advantage of the irq locked condition imposed by the
> q->queue_lock and just use nbd_lock to replace q->queue_lock then. Al
> and Andrew seem to have a much deeper understanding though for
> spinlocking though so I'll defer to there comments on this idea (of
> replacing lo->queue_lock by use of nbd_lock). This has the added
> attraction of already having nbd_lock locked when in do_nbd_request.. . .

Typo! Above should have read "just use nbd_lock to replace
lo->queue_lock" (another spinlock_t per nbd_device). Anyways... would
using the one nbd_lock to also protect the lo->queue_list work better
than using the queue_lock per nbd_device I'm wondering. According to the
prior discusions about spinlocks this should be better. I don't have a
picture right now of wether that even works or not. Gotta run though,
thanks!


2003-06-25 19:47:33

by Paul Clements

[permalink] [raw]
Subject: Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

On Wed, 25 Jun 2003, Lou Langholtz wrote:

> Did Al's statements regarding this make this feel better? The code could
> be redone so that the queuelist is added to before the down but then
> more often it will have to be removed from since the lo->sock check

It wasn't that I was worried about someone calling down() under
spinlock, obviously that'd be a problem. However, if the queue_lock were
to be changed back to a semaphore (as it used to be, pre-2.4.16 or so)
this could lead to problems. I guess I was just wondering why the
tx_lock was pulled out of nbd_send_req(). It just seems to make the
code harder to follow with the overlapping locking, the duplicated
checks for sock == NULL, and it also means the tx_lock gets held
(a little bit) longer...


> that since the user space tool opened the socket to begin with, it seems
> a better design to have the user space tool do the close as well.

I agree, but I thought that the shutdown was causing different behavior...

> When
> nbd-client exits, it'd effect close of this socket anyway even if
> killed.

Does the socket close at exit have the same effect as a socket shutdown?
If so, then I guess the shutdown is unnecessary...


> So you'd prefer to have a new ioctl then to do this rolled together
> NBD_DO_IT function? Say NBD_RUN or something that takes the sock

I do like the combined ioctl, as it seems to make the code a little bit
cleaner and safer. But, it would also be nice to maintain compatibility
with the existing userland tools. Maybe if the driver could support both
new and old interfaces (at least for now), then users could gradually
move over to the new interface(s)?

--
Paul

2003-06-25 21:21:48

by Lou Langholtz

[permalink] [raw]
Subject: Re: Anyone for NBD maintainer [was Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI]

Pavel Machek wrote:

>> . . .I'd be honoured too but not sure what exactly that entails. What other
>>stuff would I have to do besides actively developing it? I won't be able
>>to do anything on the Internet till Monday either.
>>
>>
>
>Well, maintainer gets patches from people and should decide which are
>good and which are not...
>
>Anyway if you give me your sf.net name, I can probably add you as a
>developer to the nbd.sf.net... but: I'd like you to keep userland code
>backward compatible. Ie. new userland code should still work with
>2.4.x kernel.
> Pavel
>
>
Okay. Haven't heard anyone say lose the compatibility in favor of the
cleanup so I'll work on the back compatibility more in the driver. Same
with userland tools. I'd better get a sf.net name then apparantly.
Thanks ;-)

2003-06-25 22:03:30

by Lou Langholtz

[permalink] [raw]
Subject: Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

Paul Clements wrote:

>. . . I guess I was just wondering why the
>tx_lock was pulled out of nbd_send_req(). It just seems to make the
>code harder to follow with the overlapping locking, the duplicated
>checks for sock == NULL, and it also means the tx_lock gets held
>(a little bit) longer...
>
Reviewing the code some more, I'm not sure why I moved the tx_lock out
from nbd_send_req(). Some possible explanations are:

1. nbd_send_req() was big enough without the locking in it.
2. keeps the locking at the same level as the spin_unlock_irq() which
makes lock analysis a little easier. it's also a more consistant
level to have at for consistantly locking accross all the ioctl
handling.
3. the next patch I had done (patch 7) implements a default blocking
strategy and having the lock be outside nbd_send_req made analysis
a little easier again. as in the last reason, the locking then
fell into place more consistantly level wise.
4. in order to have the locking inside nbd_send_req it would seem
it'd help to make nbd_send_req return a value that could be checked.

I'm not convinced by any of my own reasons though. Maybe it should be
inside. There aren't duplicated checks for sock == null except in
different execution paths so it doesn't get checked twice in a row if
that's what you mean. Just larger executable size as any inlining causes.

>>that since the user space tool opened the socket to begin with, it seems
>>a better design to have the user space tool do the close as well.
>>
>>
>
>I agree, but I thought that the shutdown was causing different behavior...
>
>
What behavior would the shutdown cause that close doesn't also cause?

>>When
>>nbd-client exits, it'd effect close of this socket anyway even if
>>killed.
>>
>>
>
>Does the socket close at exit have the same effect as a socket shutdown?
>If so, then I guess the shutdown is unnecessary...
>
I believe so. I thought someone from steeleye put the call in to begin
with just in order to close up a race condition gap and that's better
handled by not having the three seperate ioctls. My understanding is
that the shutdown is analogous to calling the shutdown() system call on
the socket which is just a way to individually shutdown the read side or
write side of the socket no? In anycase, I believe close (once really
finished) has the same net effect.

>>So you'd prefer to have a new ioctl then to do this rolled together
>>NBD_DO_IT function? Say NBD_RUN or something that takes the sock
>>
>>
>
>I do like the combined ioctl, as it seems to make the code a little bit
>cleaner and safer. But, it would also be nice to maintain compatibility
>with the existing userland tools. Maybe if the driver could support both
>new and old interfaces (at least for now), then users could gradually
>move over to the new interface(s)?
>
Agreed then. That's what I'd done in my original jumbo nbd patch
(maintain suport for old and new ioctls). So I'll work on that next
week. In the meantime, the nbd-client tool currently can't correctly set
the size of the device and either that needs to be worked around in the
driver (I'd done that in the original jumbo patch), or the nbd-client
tool needs to be updated (the patch I'd mailed out for nbd-client works
around the sizing issue by re-opening the nbd). To be clear, that's not
something any of the changes that have gone in so far broke nor address.
It's a consequence of how bd_set_size() is called in fs/block_dev.c
do_open(). One of the other drivers (in drivers/block I believe) works
around the problem by updating the info in the inode but it seems kind
of like a hack. Will someone who has worked on the fs/block_dev.c file
recently chime in on this issue of how to properly effect size?

Thanks for your input on this!!!

2003-06-28 16:59:16

by Paul Clements

[permalink] [raw]
Subject: Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

On Wed, 25 Jun 2003, Lou Langholtz wrote:

> Reviewing the code some more, I'm not sure why I moved the tx_lock out
> from nbd_send_req(). Some possible explanations are:
>
> 2. keeps the locking at the same level as the spin_unlock_irq() which
> makes lock analysis a little easier. it's also a more consistant
> level to have at for consistantly locking accross all the ioctl
> handling.

But, unless the locks are overlapping (which they did not used to be) there
really is nothing to analyze in the first place...it's just obviously
correct, right?


> 4. in order to have the locking inside nbd_send_req it would seem
> it'd help to make nbd_send_req return a value that could be checked.

That's not an unreasonable thing to do...it'd be a much more minor change.
I guess I'm just having a hard time seeing the benefit of rearranging
all this code. Unless there's some substantial benefit, it seems wiser
to keep the changes as minimal as possible...after all, the current code
does work.


> week. In the meantime, the nbd-client tool currently can't correctly set
> the size of the device and either that needs to be worked around in the
> driver (I'd done that in the original jumbo patch), or the nbd-client
> tool needs to be updated (the patch I'd mailed out for nbd-client works
> around the sizing issue by re-opening the nbd). To be clear, that's not
> something any of the changes that have gone in so far broke nor address.

I'm in favor of doing the minor change in the driver to maintain
compatibility with existing userland tools. It's a pretty simple fix...

The patch will be coming shortly...

--
Paul

2003-06-28 17:06:43

by Paul Clements

[permalink] [raw]
Subject: [PATCH] nbd: maintain compatibility with existing nbd tools

On Wed, 25 Jun 2003, Lou Langholtz wrote:

> [ ... ] In the meantime, the nbd-client tool currently can't correctly set
> the size of the device and either that needs to be worked around in the
> driver (I'd done that in the original jumbo patch), or the nbd-client
> tool needs to be updated (the patch I'd mailed out for nbd-client works
> around the sizing issue by re-opening the nbd). To be clear, that's not
> something any of the changes that have gone in so far broke nor address.
> It's a consequence of how bd_set_size() is called in fs/block_dev.c
> do_open().

And here's the (tiny) patch for nbd to maintain compatibility with the
existing nbd-client tool. Compiled and tested on a couple machines.
Please apply.

Thanks,
Paul


Attachments:
nbd_2_5_compat.diff (795.00 B)
nbd patch

2003-06-29 18:28:59

by Pavel Machek

[permalink] [raw]
Subject: Re: [PATCH] nbd: maintain compatibility with existing nbd tools

Hi!


> > [ ... ] In the meantime, the nbd-client tool currently can't correctly set
> > the size of the device and either that needs to be worked around in the
> > driver (I'd done that in the original jumbo patch), or the nbd-client
> > tool needs to be updated (the patch I'd mailed out for nbd-client works
> > around the sizing issue by re-opening the nbd). To be clear, that's not
> > something any of the changes that have gone in so far broke nor address.
> > It's a consequence of how bd_set_size() is called in fs/block_dev.c
> > do_open().
>
> And here's the (tiny) patch for nbd to maintain compatibility with the
> existing nbd-client tool. Compiled and tested on a couple machines.
> Please apply.

You are the maintainer, you can go to the linus directly. (I hope
Linus took that MAINTAINERS patch). Or you can send this to Rusty
'trivial patch monkey' russel. It seems easy enough.

Pavel

[Aha, if you wanted *Andrew* to apply it... I guess it is better to
say directly who do you want to take it.]

> Thanks,
> Paul

Content-Description: nbd patch
> --- nbd.c.ORIG 2003-06-26 10:35:43.000000000 -0400
> +++ nbd.c 2003-06-26 17:03:08.000000000 -0400
> @@ -465,15 +468,18 @@
> lo->blksize_bits++;
> temp >>= 1;
> }
> - lo->bytesize &= ~(lo->blksize-1);
> + lo->bytesize &= ~(lo->blksize-1);
> + inode->i_bdev->bd_inode->i_size = lo->bytesize;
> set_capacity(lo->disk, lo->bytesize >> 9);
> return 0;
> case NBD_SET_SIZE:
> - lo->bytesize = arg & ~(lo->blksize-1);
> + lo->bytesize = arg & ~(lo->blksize-1);
> + inode->i_bdev->bd_inode->i_size = lo->bytesize;
> set_capacity(lo->disk, lo->bytesize >> 9);
> return 0;
> case NBD_SET_SIZE_BLOCKS:
> lo->bytesize = ((u64) arg) << lo->blksize_bits;
> + inode->i_bdev->bd_inode->i_size = lo->bytesize;
> set_capacity(lo->disk, lo->bytesize >> 9);
> return 0;
> case NBD_DO_IT:


--
When do you have a heart between your knees?
[Johanka's followup: and *two* hearts?]

2003-06-29 20:51:16

by Paul Clements

[permalink] [raw]
Subject: Re: [PATCH 2.5.73] nbd: maintain compatibility with existing nbd tools

On Sun, 29 Jun 2003, Pavel Machek wrote:

> > > [ ... ] In the meantime, the nbd-client tool currently can't correctly set
> > > the size of the device and either that needs to be worked around in the
> > > driver (I'd done that in the original jumbo patch), or the nbd-client
> > > tool needs to be updated (the patch I'd mailed out for nbd-client works
> > > around the sizing issue by re-opening the nbd). To be clear, that's not
> > > something any of the changes that have gone in so far broke nor address.
> > > It's a consequence of how bd_set_size() is called in fs/block_dev.c
> > > do_open().
> >
> > And here's the (tiny) patch for nbd to maintain compatibility with the
> > existing nbd-client tool. Compiled and tested on a couple machines.
> > Please apply.
>
> You are the maintainer, you can go to the linus directly. (I hope
> Linus took that MAINTAINERS patch).

Not yet, I don't think...

> [Aha, if you wanted *Andrew* to apply it... I guess it is better to
> say directly who do you want to take it.]

Well, it's just that Andrew seems particularly willing to help on this,
so that's why I sent to him...

At any rate, Andrew, here's a cleaned up version of the patch... please apply.

Thanks,
Paul


Attachments:
nbd_2_5_compat.diff (825.00 B)

2003-06-30 15:56:20

by Lou Langholtz

[permalink] [raw]
Subject: Re: [RFC][PATCH] nbd driver for 2.5+: fix locking issues with ioctl UI

Paul Clements wrote:

>. . .
>That's not an unreasonable thing to do...it'd be a much more minor change.
>I guess I'm just having a hard time seeing the benefit of rearranging
>all this code. Unless there's some substantial benefit, it seems wiser
>to keep the changes as minimal as possible...after all, the current code
>does work.
>. . .
>
Sorry for the confusion. I think you're looking at the nbd code more
from the standpoint of fixing problem areas and integration with the
changes from 2.5. I've had my eye meanwhile on a more robust
implemenation and enhanced design. The difference being that what I've
submitted so far are just the changes I've needed along that path which
also achieve the former target. But the changes clearly aren't ideal for
the former perspective - in not being minimalistic changes. Just nobody
else has completely fixed problematic areas of the driver liking locking
races so I figure if the code also fixes these problematic areas it's
worth sharing with this audience. YMMV of course.