2009-01-26 17:31:59

by Pavel Machek

[permalink] [raw]
Subject: nbd: add locking to nbd_ioctl


The code was written to rely on big kernel lock to protect it from
races. It mostly works when interface is not abused.

So this uses tx_lock to protect data structures from concurrent use
between ioctl and worker threads.

Next step will be moving from ioctl to unlocked_ioctl.

Signed-off-by: Pavel Machek <[email protected]>
Acked-by: Paul Clements <[email protected]>

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 34f80fa..81c94a4 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -559,74 +557,68 @@ static void do_nbd_request(struct reques
}
}

-static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
-{
- struct nbd_device *lo = bdev->bd_disk->private_data;
- struct file *file;
- int error;
- struct request sreq ;
- struct task_struct *thread;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- BUG_ON(lo->magic != LO_MAGIC);
-
- /* Anyone capable of this syscall can do *real bad* things */
- dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n",
- lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
+/* Must be called with tx_lock held */

+static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
+ unsigned int cmd, unsigned long arg)
+{
switch (cmd) {
case NBD_DISCONNECT:
printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
- blk_rq_init(NULL, &sreq);
- sreq.cmd_type = REQ_TYPE_SPECIAL;
- nbd_cmd(&sreq) = NBD_CMD_DISC;
- /*
- * Set these to sane values in case server implementation
- * fails to check the request type first and also to keep
- * debugging output cleaner.
- */
- sreq.sector = 0;
- sreq.nr_sectors = 0;
- if (!lo->sock)
- return -EINVAL;
- mutex_lock(&lo->tx_lock);
- nbd_send_req(lo, &sreq);
- mutex_unlock(&lo->tx_lock);
+ {
+ struct request sreq;
+
+ blk_rq_init(NULL, &sreq);
+ sreq.cmd_type = REQ_TYPE_SPECIAL;
+ nbd_cmd(&sreq) = NBD_CMD_DISC;
+ /*
+ * Set these to sane values in case server implementation
+ * fails to check the request type first and also to keep
+ * debugging output cleaner.
+ */
+ sreq.sector = 0;
+ sreq.nr_sectors = 0;
+ if (!lo->sock)
+ return -EINVAL;
+ nbd_send_req(lo, &sreq);
+ }
return 0;

case NBD_CLEAR_SOCK:
- error = 0;
- mutex_lock(&lo->tx_lock);
- lo->sock = NULL;
- mutex_unlock(&lo->tx_lock);
- file = lo->file;
- lo->file = NULL;
- nbd_clear_que(lo);
- BUG_ON(!list_empty(&lo->queue_head));
- if (file)
- fput(file);
- return error;
- case NBD_SET_SOCK:
- if (lo->file)
- return -EBUSY;
- error = -EINVAL;
- file = fget(arg);
- if (file) {
- struct inode *inode = file->f_path.dentry->d_inode;
- if (S_ISSOCK(inode->i_mode)) {
- lo->file = file;
- lo->sock = SOCKET_I(inode);
- if (max_part > 0)
- bdev->bd_invalidated = 1;
- error = 0;
- } else {
+ {
+ struct file *file;
+
+ lo->sock = NULL;
+ file = lo->file;
+ lo->file = NULL;
+ nbd_clear_que(lo);
+ BUG_ON(!list_empty(&lo->queue_head));
+ if (file)
fput(file);
+ }
+ return 0;
+
+ case NBD_SET_SOCK:
+ {
+ struct file *file;
+ if (lo->file)
+ return -EBUSY;
+ file = fget(arg);
+ if (file) {
+ struct inode *inode = file->f_path.dentry->d_inode;
+ if (S_ISSOCK(inode->i_mode)) {
+ lo->file = file;
+ lo->sock = SOCKET_I(inode);
+ if (max_part > 0)
+ bdev->bd_invalidated = 1;
+ return 0;
+ } else {
+ fput(file);
+ }
}
}
- return error;
+ return -EINVAL;
+
case NBD_SET_BLKSIZE:
lo->blksize = arg;
lo->bytesize &= ~(lo->blksize-1);
@@ -634,47 +626,65 @@ static int nbd_ioctl(struct block_device
set_blocksize(bdev, lo->blksize);
set_capacity(lo->disk, lo->bytesize >> 9);
return 0;
+
case NBD_SET_SIZE:
lo->bytesize = arg & ~(lo->blksize-1);
bdev->bd_inode->i_size = lo->bytesize;
set_blocksize(bdev, lo->blksize);
set_capacity(lo->disk, lo->bytesize >> 9);
return 0;
+
case NBD_SET_TIMEOUT:
lo->xmit_timeout = arg * HZ;
return 0;
+
case NBD_SET_SIZE_BLOCKS:
lo->bytesize = ((u64) arg) * lo->blksize;
bdev->bd_inode->i_size = lo->bytesize;
set_blocksize(bdev, lo->blksize);
set_capacity(lo->disk, lo->bytesize >> 9);
return 0;
+
case NBD_DO_IT:
- if (lo->pid)
- return -EBUSY;
- if (!lo->file)
- return -EINVAL;
- thread = kthread_create(nbd_thread, lo, lo->disk->disk_name);
- if (IS_ERR(thread))
- return PTR_ERR(thread);
- wake_up_process(thread);
- error = nbd_do_it(lo);
- kthread_stop(thread);
- if (error)
- return error;
- sock_shutdown(lo, 1);
- file = lo->file;
- lo->file = NULL;
- nbd_clear_que(lo);
- printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name);
- if (file)
- fput(file);
- lo->bytesize = 0;
- bdev->bd_inode->i_size = 0;
- set_capacity(lo->disk, 0);
- if (max_part > 0)
- ioctl_by_bdev(bdev, BLKRRPART, 0);
- return lo->harderror;
+ {
+ struct task_struct *thread;
+ struct file *file;
+ int error;
+
+ if (lo->pid)
+ return -EBUSY;
+ if (!lo->file)
+ return -EINVAL;
+
+ mutex_unlock(&lo->tx_lock);
+
+ thread = kthread_create(nbd_thread, lo, lo->disk->disk_name);
+ if (IS_ERR(thread)) {
+ mutex_lock(&lo->tx_lock);
+ return PTR_ERR(thread);
+ }
+ wake_up_process(thread);
+ error = nbd_do_it(lo);
+ kthread_stop(thread);
+
+ mutex_lock(&lo->tx_lock);
+ if (error)
+ return error;
+ sock_shutdown(lo, 0);
+ file = lo->file;
+ lo->file = NULL;
+ nbd_clear_que(lo);
+ printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name);
+ if (file)
+ fput(file);
+ lo->bytesize = 0;
+ bdev->bd_inode->i_size = 0;
+ set_capacity(lo->disk, 0);
+ if (max_part > 0)
+ ioctl_by_bdev(bdev, BLKRRPART, 0);
+ return lo->harderror;
+ }
+
case NBD_CLEAR_QUE:
/*
* This is for compatibility only. The queue is always cleared
@@ -682,6 +692,7 @@ static int nbd_ioctl(struct block_device
*/
BUG_ON(!lo->sock && !list_empty(&lo->queue_head));
return 0;
+
case NBD_PRINT_DEBUG:
printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n",
bdev->bd_disk->disk_name,
@@ -689,7 +700,29 @@ static int nbd_ioctl(struct block_device
&lo->queue_head);
return 0;
}
- return -EINVAL;
+}
+
+
+static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ struct nbd_device *lo = bdev->bd_disk->private_data;
+ int error;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ BUG_ON(lo->magic != LO_MAGIC);
+
+ /* Anyone capable of this syscall can do *real bad* things */
+ dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n",
+ lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
+
+ mutex_lock(&lo->tx_lock);
+ error = __nbd_ioctl(bdev, lo, cmd, arg);
+ mutex_unlock(&lo->tx_lock);
+
+ return error;
}

static struct block_device_operations nbd_fops =


--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html


2009-01-29 01:14:51

by Andrew Morton

[permalink] [raw]
Subject: Re: nbd: add locking to nbd_ioctl

On Mon, 26 Jan 2009 18:31:33 +0100
Pavel Machek <[email protected]> wrote:

>
> The code was written to rely on big kernel lock to protect it from
> races. It mostly works when interface is not abused.
>
> So this uses tx_lock to protect data structures from concurrent use
> between ioctl and worker threads.
>
> Next step will be moving from ioctl to unlocked_ioctl.

I hope I got the latest version of this...

The patch adds new trailing whitespace. Either a) you wanted to add
new trailing whitespace or b) you're not running checkpatch. Either
way: bad Pavel!

>
> ...
>
> + case NBD_SET_SOCK:
> + {
> + struct file *file;
> + if (lo->file)
> + return -EBUSY;
> + file = fget(arg);
> + if (file) {
> + struct inode *inode = file->f_path.dentry->d_inode;
> + if (S_ISSOCK(inode->i_mode)) {
> + lo->file = file;
> + lo->sock = SOCKET_I(inode);
> + if (max_part > 0)
> + bdev->bd_invalidated = 1;
> + return 0;
> + } else {
> + fput(file);
> + }
> }
> }
> - return error;
> + return -EINVAL;

This isn't terribly readable - the -EINVAL is in fact specifically a
response to receiving a !S_ISSOCK fd, but this is presented in a
rather obfuscated way.

> case NBD_SET_BLKSIZE:
> lo->blksize = arg;
> lo->bytesize &= ~(lo->blksize-1);
> @@ -634,47 +626,65 @@ static int nbd_ioctl(struct block_device
> set_blocksize(bdev, lo->blksize);
> set_capacity(lo->disk, lo->bytesize >> 9);
> return 0;
> +
> case NBD_SET_SIZE:
> lo->bytesize = arg & ~(lo->blksize-1);
> bdev->bd_inode->i_size = lo->bytesize;
> set_blocksize(bdev, lo->blksize);
> set_capacity(lo->disk, lo->bytesize >> 9);
> return 0;
> +
> case NBD_SET_TIMEOUT:
> lo->xmit_timeout = arg * HZ;
> return 0;
> +
> case NBD_SET_SIZE_BLOCKS:
> lo->bytesize = ((u64) arg) * lo->blksize;
> bdev->bd_inode->i_size = lo->bytesize;
> set_blocksize(bdev, lo->blksize);
> set_capacity(lo->disk, lo->bytesize >> 9);
> return 0;
> +
> case NBD_DO_IT:
> - if (lo->pid)
> - return -EBUSY;
> - if (!lo->file)
> - return -EINVAL;
> - thread = kthread_create(nbd_thread, lo, lo->disk->disk_name);
> - if (IS_ERR(thread))
> - return PTR_ERR(thread);
> - wake_up_process(thread);
> - error = nbd_do_it(lo);
> - kthread_stop(thread);
> - if (error)
> - return error;
> - sock_shutdown(lo, 1);
> - file = lo->file;
> - lo->file = NULL;
> - nbd_clear_que(lo);
> - printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name);
> - if (file)
> - fput(file);
> - lo->bytesize = 0;
> - bdev->bd_inode->i_size = 0;
> - set_capacity(lo->disk, 0);
> - if (max_part > 0)
> - ioctl_by_bdev(bdev, BLKRRPART, 0);
> - return lo->harderror;
> + {
> + struct task_struct *thread;
> + struct file *file;
> + int error;
> +
> + if (lo->pid)
> + return -EBUSY;
> + if (!lo->file)
> + return -EINVAL;
> +
> + mutex_unlock(&lo->tx_lock);
> +
> + thread = kthread_create(nbd_thread, lo, lo->disk->disk_name);
> + if (IS_ERR(thread)) {
> + mutex_lock(&lo->tx_lock);
> + return PTR_ERR(thread);
> + }
> + wake_up_process(thread);

We could/should use kthread_run() here.

> + error = nbd_do_it(lo);
> + kthread_stop(thread);
> +
> + mutex_lock(&lo->tx_lock);
> + if (error)
> + return error;
> + sock_shutdown(lo, 0);
> + file = lo->file;
> + lo->file = NULL;
> + nbd_clear_que(lo);
> + printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name);

I don't get it - why are we warning about what appears to be an
expected operation?

> + if (file)
> + fput(file);
> + lo->bytesize = 0;
> + bdev->bd_inode->i_size = 0;
> + set_capacity(lo->disk, 0);
> + if (max_part > 0)
> + ioctl_by_bdev(bdev, BLKRRPART, 0);
> + return lo->harderror;
> + }
> +

2009-01-29 01:19:42

by Andrew Morton

[permalink] [raw]
Subject: Re: nbd: add locking to nbd_ioctl

On Mon, 26 Jan 2009 18:31:33 +0100
Pavel Machek <[email protected]> wrote:

> The code was written to rely on big kernel lock to protect it from
> races. It mostly works when interface is not abused.
>
> So this uses tx_lock to protect data structures from concurrent use
> between ioctl and worker threads.
>
> Next step will be moving from ioctl to unlocked_ioctl.

I dinked with this rather a lot. Moved all those case statements in a
tabstop (makes it all fit into 80 cols without adding weird
linebreaks), moved a few `return' statements to more logical places,
etc.

Please apply and double-check?

--- a/drivers/block/nbd.c~nbd-add-locking-to-nbd_ioctl-checkpatch-fixes
+++ a/drivers/block/nbd.c
@@ -565,61 +565,59 @@ static int __nbd_ioctl(struct block_devi
unsigned int cmd, unsigned long arg)
{
switch (cmd) {
- case NBD_DISCONNECT:
+ case NBD_DISCONNECT: {
+ struct request sreq;
+
printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
- {
- struct request sreq;

- blk_rq_init(NULL, &sreq);
- sreq.cmd_type = REQ_TYPE_SPECIAL;
- nbd_cmd(&sreq) = NBD_CMD_DISC;
- /*
- * Set these to sane values in case server implementation
- * fails to check the request type first and also to keep
- * debugging output cleaner.
- */
- sreq.sector = 0;
- sreq.nr_sectors = 0;
- if (!lo->sock)
- return -EINVAL;
- nbd_send_req(lo, &sreq);
- }
+ blk_rq_init(NULL, &sreq);
+ sreq.cmd_type = REQ_TYPE_SPECIAL;
+ nbd_cmd(&sreq) = NBD_CMD_DISC;
+ /*
+ * Set these to sane values in case server implementation
+ * fails to check the request type first and also to keep
+ * debugging output cleaner.
+ */
+ sreq.sector = 0;
+ sreq.nr_sectors = 0;
+ if (!lo->sock)
+ return -EINVAL;
+ nbd_send_req(lo, &sreq);
return 0;
+ }

- case NBD_CLEAR_SOCK:
- {
- struct file *file;
-
- lo->sock = NULL;
- file = lo->file;
- lo->file = NULL;
- nbd_clear_que(lo);
- BUG_ON(!list_empty(&lo->queue_head));
- if (file)
- fput(file);
- }
- return 0;
+ case NBD_CLEAR_SOCK: {
+ struct file *file;

- case NBD_SET_SOCK:
- {
- struct file *file;
- if (lo->file)
- return -EBUSY;
- file = fget(arg);
- if (file) {
- struct inode *inode = file->f_path.dentry->d_inode;
- if (S_ISSOCK(inode->i_mode)) {
- lo->file = file;
- lo->sock = SOCKET_I(inode);
- if (max_part > 0)
- bdev->bd_invalidated = 1;
- return 0;
- } else {
- fput(file);
- }
+ lo->sock = NULL;
+ file = lo->file;
+ lo->file = NULL;
+ nbd_clear_que(lo);
+ BUG_ON(!list_empty(&lo->queue_head));
+ if (file)
+ fput(file);
+ return 0;
+ }
+
+ case NBD_SET_SOCK: {
+ struct file *file;
+ if (lo->file)
+ return -EBUSY;
+ file = fget(arg);
+ if (file) {
+ struct inode *inode = file->f_path.dentry->d_inode;
+ if (S_ISSOCK(inode->i_mode)) {
+ lo->file = file;
+ lo->sock = SOCKET_I(inode);
+ if (max_part > 0)
+ bdev->bd_invalidated = 1;
+ return 0;
+ } else {
+ fput(file);
}
}
return -EINVAL;
+ }

case NBD_SET_BLKSIZE:
lo->blksize = arg;
@@ -647,45 +645,44 @@ static int __nbd_ioctl(struct block_devi
set_capacity(lo->disk, lo->bytesize >> 9);
return 0;

- case NBD_DO_IT:
- {
- struct task_struct *thread;
- struct file *file;
- int error;
-
- if (lo->pid)
- return -EBUSY;
- if (!lo->file)
- return -EINVAL;
-
- mutex_unlock(&lo->tx_lock);
-
- thread = kthread_create(nbd_thread, lo, lo->disk->disk_name);
- if (IS_ERR(thread)) {
- mutex_lock(&lo->tx_lock);
- return PTR_ERR(thread);
- }
- wake_up_process(thread);
- error = nbd_do_it(lo);
- kthread_stop(thread);
+ case NBD_DO_IT: {
+ struct task_struct *thread;
+ struct file *file;
+ int error;
+
+ if (lo->pid)
+ return -EBUSY;
+ if (!lo->file)
+ return -EINVAL;
+
+ mutex_unlock(&lo->tx_lock);

+ thread = kthread_create(nbd_thread, lo, lo->disk->disk_name);
+ if (IS_ERR(thread)) {
mutex_lock(&lo->tx_lock);
- if (error)
- return error;
- sock_shutdown(lo, 0);
- file = lo->file;
- lo->file = NULL;
- nbd_clear_que(lo);
- printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name);
- if (file)
- fput(file);
- lo->bytesize = 0;
- bdev->bd_inode->i_size = 0;
- set_capacity(lo->disk, 0);
- if (max_part > 0)
- ioctl_by_bdev(bdev, BLKRRPART, 0);
- return lo->harderror;
+ return PTR_ERR(thread);
}
+ wake_up_process(thread);
+ error = nbd_do_it(lo);
+ kthread_stop(thread);
+
+ mutex_lock(&lo->tx_lock);
+ if (error)
+ return error;
+ sock_shutdown(lo, 0);
+ file = lo->file;
+ lo->file = NULL;
+ nbd_clear_que(lo);
+ printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name);
+ if (file)
+ fput(file);
+ lo->bytesize = 0;
+ bdev->bd_inode->i_size = 0;
+ set_capacity(lo->disk, 0);
+ if (max_part > 0)
+ ioctl_by_bdev(bdev, BLKRRPART, 0);
+ return lo->harderror;
+ }

case NBD_CLEAR_QUE:
/*
_